SW 개발
[Android] 구글STT, TTS 사용하기 (android.speech)
minkyung
2020. 5. 26. 13:50
● Android Speech Recognizer
안드로이드에서는 구글 SpeechToText, TextToSpeech 기술인 stt, tts를 내장 API로 지원하고 있습니다.
별도의 설치 없이 gradle에 특별한 세팅이 필요하지 않고 manifest에 몇 가지 권한을 주면 바로 불러와서 사용가능 합니다.
Android Developers SpeechRecognizer 공식문서
○ android.speech 클래스를 사용해 STT 기능 구현하기
1. AndroidManifest.xml
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />
2. activity_main.xml
화면은 음성인식 시작 버튼과 음성인식 결과 텍스트뷰만 생성하였습니다.
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:gravity="center"
android:orientation="vertical"
tools:context=".MainActivity">
<TextView
android:id="@+id/sttResult"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="음성인식 결과가 여기에 표시됩니다." />
<Button
android:id="@+id/sttStart"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="음성인식 시작" />
</LinearLayout>
3. MainActivity.java
// android.speech 클래스에서 필요한 객체 임포트
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// 퍼미션 체크
if ( Build.VERSION.SDK_INT >= 23 ){
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.INTERNET,
Manifest.permission.RECORD_AUDIO},PERMISSION);
}
// xml의 버튼과 텍스트 뷰 연결
textView = (TextView)findViewById(R.id.sttResult);
sttBtn = (Button) findViewById(R.id.sttStart);
// RecognizerIntent 객체 생성
intent=new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,getPackageName());
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,"ko-KR");
// 버튼을 클릭 이벤트 - 객체에 Context와 listener를 할당한 후 실행
sttBtn.setOnClickListener(v -> {
mRecognizer=SpeechRecognizer.createSpeechRecognizer(this);
mRecognizer.setRecognitionListener(listener);
mRecognizer.startListening(intent);
});
}
// RecognizerIntent 객체에 할당할 listener 생성
private RecognitionListener listener = new RecognitionListener() {
@Override
public void onReadyForSpeech(Bundle params) {
Toast.makeText(getApplicationContext(),"음성인식을 시작합니다.",Toast.LENGTH_SHORT).show();
}
@Override
public void onBeginningOfSpeech() {}
@Override
public void onRmsChanged(float rmsdB) {}
@Override
public void onBufferReceived(byte[] buffer) {}
@Override
public void onEndOfSpeech() {}
@Override
public void onError(int error) {
String message;
switch (error) {
case SpeechRecognizer.ERROR_AUDIO:
message = "오디오 에러";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "클라이언트 에러";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "퍼미션 없음";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "네트워크 에러";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "네트웍 타임아웃";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "찾을 수 없음";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RECOGNIZER가 바쁨";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "서버가 이상함";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "말하는 시간초과";
break;
default:
message = "알 수 없는 오류임";
break;
}
Toast.makeText(getApplicationContext(), "에러가 발생하였습니다. : " + message,Toast.LENGTH_SHORT).show();
}
@Override
public void onResults(Bundle results) {
// 말을 하면 ArrayList에 단어를 넣고 textView에 단어를 이어줍니다.
ArrayList<String> matches =
results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
for(int i = 0; i < matches.size() ; i++){
textView.setText(matches.get(i));
}
}
@Override
public void onPartialResults(Bundle partialResults) {}
@Override
public void onEvent(int eventType, Bundle params) {}
};
전체코드
package com.example.stttest;
import androidx.appcompat.app.AppCompatActivity;
import androidx.core.app.ActivityCompat;
import android.Manifest;
import android.content.Intent;
import android.os.Build;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import java.util.ArrayList;
public class MainActivity extends AppCompatActivity {
Intent intent;
SpeechRecognizer mRecognizer;
Button sttBtn;
TextView textView;
final int PERMISSION = 1;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
if ( Build.VERSION.SDK_INT >= 23 ){
// 퍼미션 체크
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.INTERNET,
Manifest.permission.RECORD_AUDIO},PERMISSION);
}
textView = (TextView)findViewById(R.id.sttResult);
sttBtn = (Button) findViewById(R.id.sttStart);
intent=new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,getPackageName());
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,"ko-KR");
sttBtn.setOnClickListener(v -> {
mRecognizer=SpeechRecognizer.createSpeechRecognizer(this);
mRecognizer.setRecognitionListener(listener);
mRecognizer.startListening(intent);
});
}
private RecognitionListener listener = new RecognitionListener() {
@Override
public void onReadyForSpeech(Bundle params) {
Toast.makeText(getApplicationContext(),"음성인식을 시작합니다.",Toast.LENGTH_SHORT).show();
}
@Override
public void onBeginningOfSpeech() {}
@Override
public void onRmsChanged(float rmsdB) {}
@Override
public void onBufferReceived(byte[] buffer) {}
@Override
public void onEndOfSpeech() {}
@Override
public void onError(int error) {
String message;
switch (error) {
case SpeechRecognizer.ERROR_AUDIO:
message = "오디오 에러";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "클라이언트 에러";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "퍼미션 없음";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "네트워크 에러";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "네트웍 타임아웃";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "찾을 수 없음";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RECOGNIZER가 바쁨";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "서버가 이상함";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "말하는 시간초과";
break;
default:
message = "알 수 없는 오류임";
break;
}
Toast.makeText(getApplicationContext(), "에러가 발생하였습니다. : " + message,Toast.LENGTH_SHORT).show();
}
@Override
public void onResults(Bundle results) {
// 말을 하면 ArrayList에 단어를 넣고 textView에 단어를 이어줍니다.
ArrayList<String> matches =
results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
for(int i = 0; i < matches.size() ; i++){
textView.setText(matches.get(i));
}
}
@Override
public void onPartialResults(Bundle partialResults) {}
@Override
public void onEvent(int eventType, Bundle params) {}
};
}
○ android.speech 클래스를 사용해 TTS 기능 구현하기
시뮬레이션 할 기기 휴대폰이나 테블릿에 android tts가 설치되어있어야 합니다.
1. activity_main.xml
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<EditText
android:id="@+id/txtText"
android:layout_width="300dp"
android:layout_height="wrap_content"
android:inputType="text" />
<Button
android:id="@+id/btnSpeak"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Speak Out" />
</LinearLayout>
EditTextView에 문장을 입력하고 버튼을 클릭하면 음성으로 결과가 출력됩니다.
2. MainActivity.java
TextToSpeech 임포트
import android.speech.tts.TextToSpeech;
package com.example.text2speech;
import android.app.Activity;
import android.os.Build;
import android.speech.tts.TextToSpeech;
import android.support.annotation.RequiresApi;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.EditText;
import java.util.Locale;
public class MainActivity extends Activity implements TextToSpeech.OnInitListener {
private TextToSpeech tts;
private Button btn_Speak;
private EditText txtText;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
tts = new TextToSpeech(this, this);
btn_Speak = findViewById(R.id.btnSpeak);
txtText = findViewById(R.id.txtText);
btn_Speak.setOnClickListener(new View.OnClickListener() {
@RequiresApi(api = Build.VERSION_CODES.LOLLIPOP)
@Override
public void onClick(View v) {
speakOut();
}
});
}
@RequiresApi(api = Build.VERSION_CODES.LOLLIPOP)
private void speakOut() {
CharSequence text = txtText.getText();
tts.setPitch((float) 0.6);
tts.setSpeechRate((float) 0.1);
tts.speak(text,TextToSpeech.QUEUE_FLUSH,null,"id1");
}
@Override
public void onDestroy() {
if (tts != null) {
tts.stop();
tts.shutdown();
}
super.onDestroy();
}
@RequiresApi(api = Build.VERSION_CODES.LOLLIPOP)
@Override
public void onInit(int status) {
if (status == TextToSpeech.SUCCESS) {
int result = tts.setLanguage(Locale.KOREA);
if (result == TextToSpeech.LANG_MISSING_DATA
|| result == TextToSpeech.LANG_NOT_SUPPORTED) {
Log.e("TTS", "This Language is not supported");
} else {
btn_Speak.setEnabled(true);
speakOut();
}
} else {
Log.e("TTS", "Initilization Failed!");
}
}
}
onDestroy()는 해당 액티비티가 종료될때 실행됩니다.
○ 번외
STT+TTS 같이 구현,
음성으로 안내하고 싶은 메시지가 있으면 String 변수에 담아 funcVoiceOut()의 인자로 넘겨주기만 하면 됩니다.
번외 예제는 버튼을 눌러 음성인식을 실행하고 음성에서 "액티비티 투"라는 단어가 포함되어 있으면 다음 액티비티로 넘어갑니다.
import android.Manifest;
import android.content.Context;
import android.content.Intent;
import android.os.Build;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import androidx.appcompat.app.AppCompatActivity;
import androidx.core.app.ActivityCompat;
import java.util.ArrayList;
import java.util.Locale;
public class MainActivity extends AppCompatActivity implements TextToSpeech.OnInitListener {
Button btn1;
TextView txtInMsg;
public static Context mContext;
Intent sttIntent;
SpeechRecognizer mRecognizer;
TextToSpeech tts;
final int PERMISSION = 1;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
mContext = this;
btn1 = (Button)findViewById(R.id.btn1);
// 오디오, 카메라 권한설정
if ( Build.VERSION.SDK_INT >= 23 ){
// 퍼미션 체크
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.INTERNET,
Manifest.permission.RECORD_AUDIO},PERMISSION);
}
// STT, TTS 로드
speechInit();
// Button Click Event 설정
btn1.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v){
speechStart();
}
});
}
private void speechInit() {
// stt 객체 생성, 초기화
sttIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
sttIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, getPackageName());
sttIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,"ko-KR");
// tts 객체 생성, 초기화
tts = new TextToSpeech(MainActivity.this, this);
}
public void speechStart() {
mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext); // 음성인식 객체
mRecognizer.setRecognitionListener(listener); // 음성인식 리스너 등록
mRecognizer.startListening(sttIntent);
}
private RecognitionListener listener = new RecognitionListener() {
@Override
public void onReadyForSpeech(Bundle params) {
Toast.makeText(getApplicationContext(), "음성인식을 시작합니다.", Toast.LENGTH_SHORT).show();
}
@Override
public void onBeginningOfSpeech() {
}
@Override
public void onRmsChanged(float rmsdB) {
}
@Override
public void onBufferReceived(byte[] buffer) {
}
@Override
public void onEndOfSpeech() {
}
@Override
public void onError(int error) {
String message;
switch (error) {
case SpeechRecognizer.ERROR_AUDIO:
message = "오디오 에러";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "클라이언트 에러";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "퍼미션 없음";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "네트워크 에러";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "네트웍 타임아웃";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "찾을 수 없음";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RECOGNIZER가 바쁨";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "서버가 이상함";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "말하는 시간초과";
break;
default:
message = "알 수 없는 오류임";
break;
}
String guideStr = "에러가 발생하였습니다.";
Toast.makeText(getApplicationContext(), guideStr + message, Toast.LENGTH_SHORT).show();
funcVoiceOut(guideStr);
}
@Override
public void onResults(Bundle results) {
ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String resultStr = "";
for (int i = 0; i < matches.size(); i++) {
txtInMsg.setText(matches.get(i));
resultStr += matches.get(i);
}
if(resultStr.length() < 1) return;
resultStr = resultStr.replace(" ", "");
moveActivity(resultStr);
}
@Override
public void onPartialResults(Bundle partialResults) {
}
@Override
public void onEvent(int eventType, Bundle params) {
}
};
public void moveActivity(String resultStr) {
if(resultStr.indexOf("액티비티 투") > -1) {
String guideStr = "액티비티를 넘어갑니다.";
Toast.makeText(getApplicationContext(), guideStr, Toast.LENGTH_SHORT).show();
funcVoiceOut(guideStr);
Intent intent = new Intent(getApplicationContext(), NextActivity.class);
startActivity(intent);
}
}
public void funcVoiceOut(String OutMsg){
if(OutMsg.length()<1)return;
if(!tts.isSpeaking()) {
tts.speak(OutMsg, TextToSpeech.QUEUE_FLUSH, null);
}
}
@Override
public void onInit(int status) {
if (status == TextToSpeech.SUCCESS) {
tts.setLanguage(Locale.KOREAN);
tts.setPitch(1);
} else {
Log.e("TTS", "초기화 실패");
}
}
@Override
protected void onDestroy() {
if (tts != null) {
tts.stop();
tts.shutdown();
}
if(mRecognizer!=null){
mRecognizer.destroy();
mRecognizer.cancel();
mRecognizer=null;
}
super.onDestroy();
}
}