eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TtsService.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. * Copyright (C) 2022 Beka Gozalishvili
  3. * Copyright (C) 2012-2015 Reece H. Dunn
  4. * Copyright (C) 2011 Google Inc.
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /*
  19. * This file implements the Android Text-to-Speech engine for eSpeak.
  20. *
  21. * Android Version: 4.0 (Ice Cream Sandwich)
  22. * API Version: 14
  23. */
  24. package com.reecedunn.espeak;
  25. import android.annotation.SuppressLint;
  26. import android.content.BroadcastReceiver;
  27. import android.content.Context;
  28. import android.content.Intent;
  29. import android.content.IntentFilter;
  30. import android.media.AudioTrack;
  31. import android.os.Build;
  32. import android.os.Bundle;
  33. import android.preference.PreferenceManager;
  34. import android.speech.tts.SynthesisCallback;
  35. import android.speech.tts.SynthesisRequest;
  36. import android.speech.tts.TextToSpeech;
  37. import android.speech.tts.TextToSpeechService;
  38. import android.util.Log;
  39. import android.util.Pair;
  40. import com.reecedunn.espeak.SpeechSynthesis.SynthReadyCallback;
  41. import java.util.ArrayList;
  42. import java.util.HashMap;
  43. import java.util.HashSet;
  44. import java.util.List;
  45. import java.util.Locale;
  46. import java.util.Map;
  47. import java.util.Set;
  48. /**
  49. * Implements the eSpeak engine as a {@link TextToSpeechService}.
  50. *
  51. * @author [email protected] (Reece H. Dunn)
  52. * @author [email protected] (Alan Viverette)
  53. */
  54. @SuppressLint("NewApi")
  55. public class TtsService extends TextToSpeechService {
  56. public static final String ESPEAK_INITIALIZED = "com.reecedunn.espeak.ESPEAK_INITIALIZED";
  57. private static final String TAG = TtsService.class.getSimpleName();
  58. private static Context storageContext;
  59. private static final boolean DEBUG = BuildConfig.DEBUG;
  60. private SpeechSynthesis mEngine;
  61. private SynthesisCallback mCallback;
  62. private final Map<String, Voice> mAvailableVoices = new HashMap<String, Voice>();
  63. protected Voice mMatchingVoice = null;
  64. private BroadcastReceiver mOnLanguagesDownloaded = null;
  65. @Override
  66. public void onCreate() {
  67. storageContext = EspeakApp.getStorageContext();
  68. if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N)
  69. storageContext.moveSharedPreferencesFrom(this, this.getPackageName() + "_preferences");
  70. initializeTtsEngine();
  71. super.onCreate();
  72. }
  73. @Override
  74. public void onDestroy() {
  75. super.onDestroy();
  76. if (mOnLanguagesDownloaded != null) {
  77. unregisterReceiver(mOnLanguagesDownloaded);
  78. }
  79. }
  80. /**
  81. * Sets up the native eSpeak engine.
  82. */
  83. private void initializeTtsEngine() {
  84. if (mEngine != null) {
  85. mEngine.stop();
  86. mEngine = null;
  87. }
  88. mEngine = new SpeechSynthesis(storageContext, mSynthCallback);
  89. mAvailableVoices.clear();
  90. for (Voice voice : mEngine.getAvailableVoices()) {
  91. mAvailableVoices.put(voice.name, voice);
  92. }
  93. final Intent intent = new Intent(ESPEAK_INITIALIZED);
  94. sendBroadcast(intent);
  95. }
  96. @Override
  97. protected String[] onGetLanguage() {
  98. // This is used to specify the language requested from GetSampleText.
  99. if (mMatchingVoice == null) {
  100. return new String[] { "eng", "GBR", "" };
  101. }
  102. return new String[] {
  103. mMatchingVoice.locale.getISO3Language(),
  104. mMatchingVoice.locale.getISO3Country(),
  105. mMatchingVoice.locale.getVariant()
  106. };
  107. }
  108. private Pair<Voice, Integer> findVoice(String language, String country, String variant) {
  109. if (!CheckVoiceData.hasBaseResources(storageContext) || CheckVoiceData.canUpgradeResources(storageContext)) {
  110. if (mOnLanguagesDownloaded == null) {
  111. mOnLanguagesDownloaded = new BroadcastReceiver() {
  112. @Override
  113. public void onReceive(Context context, Intent intent) {
  114. initializeTtsEngine();
  115. }
  116. };
  117. final IntentFilter filter = new IntentFilter(DownloadVoiceData.BROADCAST_LANGUAGES_UPDATED);
  118. registerReceiver(mOnLanguagesDownloaded, filter);
  119. }
  120. final Intent intent = new Intent(storageContext, DownloadVoiceData.class);
  121. intent.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK);
  122. startActivity(intent);
  123. return new Pair<>(null, TextToSpeech.LANG_MISSING_DATA);
  124. }
  125. final Locale query = new Locale(language, country, variant);
  126. Voice languageVoice = null;
  127. Voice countryVoice = null;
  128. synchronized (mAvailableVoices) {
  129. for (Voice voice : mAvailableVoices.values()) {
  130. switch (voice.match(query)) {
  131. case TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE:
  132. return new Pair<>(voice, TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE);
  133. case TextToSpeech.LANG_COUNTRY_AVAILABLE:
  134. countryVoice = voice;
  135. case TextToSpeech.LANG_AVAILABLE:
  136. languageVoice = voice;
  137. break;
  138. }
  139. }
  140. }
  141. if (languageVoice == null) {
  142. return new Pair<>(null, TextToSpeech.LANG_NOT_SUPPORTED);
  143. } else if (countryVoice == null) {
  144. return new Pair<>(languageVoice, TextToSpeech.LANG_AVAILABLE);
  145. } else {
  146. return new Pair<>(countryVoice, TextToSpeech.LANG_COUNTRY_AVAILABLE);
  147. }
  148. }
  149. private Pair<Voice, Integer> getDefaultVoiceFor(String language, String country, String variant) {
  150. final Pair<Voice, Integer> match = findVoice(language, country, variant);
  151. switch (match.second) {
  152. case TextToSpeech.LANG_AVAILABLE:
  153. if (language.equals("fr") || language.equals("fra")) {
  154. return new Pair<>(findVoice(language, "FRA", "").first, match.second);
  155. }
  156. if (language.equals("pt") || language.equals("por")) {
  157. return new Pair<>(findVoice(language, "PRT", "").first, match.second);
  158. }
  159. return new Pair<>(findVoice(language, "", "").first, match.second);
  160. case TextToSpeech.LANG_COUNTRY_AVAILABLE:
  161. if ((language.equals("vi") || language.equals("vie")) && (country.equals("VN") || country.equals("VNM"))) {
  162. return new Pair<>(findVoice(language, country, "hue").first, match.second);
  163. }
  164. return new Pair<>(findVoice(language, country, "").first, match.second);
  165. default:
  166. return match;
  167. }
  168. }
  169. @Override
  170. protected int onIsLanguageAvailable(String language, String country, String variant) {
  171. return findVoice(language, country, variant).second;
  172. }
  173. @Override
  174. protected int onLoadLanguage(String language, String country, String variant) {
  175. final Pair<Voice, Integer> match = getDefaultVoiceFor(language, country, variant);
  176. if (match.first != null) {
  177. mMatchingVoice = match.first;
  178. }
  179. return match.second;
  180. }
  181. @Override
  182. protected Set<String> onGetFeaturesForLanguage(String lang, String country, String variant) {
  183. return new HashSet<String>();
  184. }
  185. @Override
  186. public String onGetDefaultVoiceNameFor(String language, String country, String variant) {
  187. final Voice match = getDefaultVoiceFor(language, country, variant).first;
  188. return (match == null) ? null : match.name;
  189. }
  190. @Override
  191. public List<android.speech.tts.Voice> onGetVoices() {
  192. List<android.speech.tts.Voice> voices = new ArrayList<android.speech.tts.Voice>();
  193. for (Voice voice : mAvailableVoices.values()) {
  194. int quality = android.speech.tts.Voice.QUALITY_NORMAL;
  195. int latency = android.speech.tts.Voice.LATENCY_VERY_LOW;
  196. Locale locale = new Locale(voice.locale.getISO3Language(), voice.locale.getISO3Country(), voice.locale.getVariant());
  197. Set<String> features = onGetFeaturesForLanguage(locale.getLanguage(), locale.getCountry(), locale.getVariant());
  198. voices.add(new android.speech.tts.Voice(voice.name, voice.locale, quality, latency, false, features));
  199. }
  200. return voices;
  201. }
  202. @Override
  203. public int onIsValidVoiceName(String name) {
  204. Voice voice = mAvailableVoices.get(name);
  205. return (voice == null) ? TextToSpeech.ERROR : TextToSpeech.SUCCESS;
  206. }
  207. @Override
  208. public int onLoadVoice(String name) {
  209. Voice voice = mAvailableVoices.get(name);
  210. if (voice == null) {
  211. return TextToSpeech.ERROR;
  212. }
  213. mMatchingVoice = voice;
  214. return TextToSpeech.SUCCESS;
  215. }
  216. @Override
  217. protected void onStop() {
  218. Log.i(TAG, "Received stop request.");
  219. mEngine.stop();
  220. }
  221. @SuppressWarnings("deprecation")
  222. private String getRequestString(SynthesisRequest request) {
  223. if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
  224. return request.getCharSequenceText().toString();
  225. } else {
  226. return request.getText();
  227. }
  228. }
  229. private int selectVoice(SynthesisRequest request) {
  230. if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
  231. final String name = request.getVoiceName();
  232. if (name != null && !name.isEmpty()) {
  233. return onLoadVoice(name);
  234. }
  235. }
  236. final int result = onLoadLanguage(request.getLanguage(), request.getCountry(), request.getVariant());
  237. switch (result) {
  238. case TextToSpeech.LANG_MISSING_DATA:
  239. case TextToSpeech.LANG_NOT_SUPPORTED:
  240. return TextToSpeech.ERROR;
  241. }
  242. return TextToSpeech.SUCCESS;
  243. }
  244. @Override
  245. protected synchronized void onSynthesizeText(SynthesisRequest request, SynthesisCallback callback) {
  246. if (mMatchingVoice == null)
  247. return;
  248. String text = getRequestString(request);
  249. if (text == null)
  250. return;
  251. if (DEBUG) {
  252. Log.i(TAG, "Received synthesis request: {language=\"" + mMatchingVoice.name + "\"}");
  253. final Bundle params = request.getParams();
  254. for (String key : params.keySet()) {
  255. Log.v(TAG,
  256. "Synthesis request contained param {" + key + ", " + params.get(key) + "}");
  257. }
  258. }
  259. if (text.startsWith("<?xml"))
  260. {
  261. // eSpeak does not recognise/skip "<?...?>" preprocessing tags,
  262. // so need to remove these before passing to synthesize.
  263. text = text.substring(text.indexOf("?>") + 2).trim();
  264. }
  265. mCallback = callback;
  266. mCallback.start(mEngine.getSampleRate(), mEngine.getAudioFormat(), mEngine.getChannelCount());
  267. final VoiceSettings settings = new VoiceSettings(PreferenceManager.getDefaultSharedPreferences(storageContext), mEngine);
  268. mEngine.setVoice(mMatchingVoice, settings.getVoiceVariant());
  269. mEngine.Rate.setValue(settings.getRate(), request.getSpeechRate());
  270. mEngine.Pitch.setValue(settings.getPitch(), request.getPitch());
  271. mEngine.PitchRange.setValue(settings.getPitchRange());
  272. mEngine.Volume.setValue(settings.getVolume());
  273. mEngine.Punctuation.setValue(settings.getPunctuationLevel());
  274. mEngine.setPunctuationCharacters(settings.getPunctuationCharacters());
  275. mEngine.synthesize(text, text.startsWith("<speak"));
  276. }
  277. /**
  278. * Pipes synthesizer output from native eSpeak to an {@link AudioTrack}.
  279. */
  280. private final SpeechSynthesis.SynthReadyCallback mSynthCallback = new SynthReadyCallback() {
  281. @Override
  282. public void onSynthDataReady(byte[] audioData) {
  283. if ((audioData == null) || (audioData.length == 0)) {
  284. onSynthDataComplete();
  285. return;
  286. }
  287. final int maxBytesToCopy = mCallback.getMaxBufferSize();
  288. int offset = 0;
  289. while (offset < audioData.length) {
  290. final int bytesToWrite = Math.min(maxBytesToCopy, (audioData.length - offset));
  291. mCallback.audioAvailable(audioData, offset, bytesToWrite);
  292. offset += bytesToWrite;
  293. }
  294. }
  295. @Override
  296. public void onSynthDataComplete() {
  297. mCallback.done();
  298. }
  299. };
  300. }