eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SpeechSynthesis.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /*
  2. * Copyright (C) 2012-2015 Reece H. Dunn
  3. * Copyright (C) 2011 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * This file implements the Java API to eSpeak using the JNI bindings.
  19. *
  20. * Android Version: 4.0 (Ice Cream Sandwich)
  21. * API Version: 14
  22. */
  23. package com.reecedunn.espeak;
  24. import android.content.Context;
  25. import android.content.res.Configuration;
  26. import android.content.res.Resources;
  27. import android.util.DisplayMetrics;
  28. import android.util.Log;
  29. import java.io.File;
  30. import java.util.HashMap;
  31. import java.util.LinkedList;
  32. import java.util.List;
  33. import java.util.Locale;
  34. import java.util.Map;
  35. import java.util.MissingResourceException;
  36. public class SpeechSynthesis {
  37. private static final String TAG = SpeechSynthesis.class.getSimpleName();
  38. public static final int GENDER_UNSPECIFIED = 0;
  39. public static final int GENDER_MALE = 1;
  40. public static final int GENDER_FEMALE = 2;
  41. public static final int AGE_ANY = 0;
  42. public static final int AGE_YOUNG = 12;
  43. public static final int AGE_OLD = 60;
  44. public static final int CHANNEL_COUNT_MONO = 1;
  45. public static final int FORMAT_PCM_S16 = 2;
  46. static {
  47. System.loadLibrary("ttsespeak");
  48. nativeClassInit();
  49. }
  50. private final Context mContext;
  51. private final SynthReadyCallback mCallback;
  52. private final String mDatapath;
  53. private boolean mInitialized = false;
  54. private static int mVoiceCount = 0;
  55. private int mSampleRate = 0;
  56. public SpeechSynthesis(Context context, SynthReadyCallback callback) {
  57. // First, ensure the data directory exists, otherwise init will crash.
  58. final File dataPath = CheckVoiceData.getDataPath(context);
  59. if (!dataPath.exists()) {
  60. Log.e(TAG, "Missing voice data");
  61. dataPath.mkdirs();
  62. }
  63. mContext = context;
  64. mCallback = callback;
  65. mDatapath = dataPath.getParentFile().getPath();
  66. attemptInit();
  67. }
  68. public static String getVersion() {
  69. return nativeGetVersion();
  70. }
  71. public static int getVoiceCount() {
  72. return mVoiceCount;
  73. }
  74. public int getSampleRate() {
  75. return mSampleRate;
  76. }
  77. public int getChannelCount() {
  78. return CHANNEL_COUNT_MONO;
  79. }
  80. public int getAudioFormat() {
  81. return FORMAT_PCM_S16;
  82. }
  83. private Locale getLocaleFromLanguageName(String name) {
  84. if (mLocaleFixes.containsKey(name)) {
  85. return mLocaleFixes.get(name);
  86. }
  87. String[] parts = name.split("-");
  88. switch (parts.length) {
  89. case 1: // language
  90. return new Locale(parts[0]);
  91. case 2: // language-country
  92. return new Locale(parts[0], parts[1]);
  93. case 3: // language-country-variant
  94. return new Locale(parts[0], parts[1], parts[2]);
  95. case 4: // language-country-x-privateuse
  96. return new Locale(parts[0], parts[1], parts[3]);
  97. default:
  98. return null;
  99. }
  100. }
  101. public List<Voice> getAvailableVoices() {
  102. final List<Voice> voices = new LinkedList<Voice>();
  103. final String[] results = nativeGetAvailableVoices();
  104. mVoiceCount = results.length / 4;
  105. for (int i = 0; i < results.length; i += 4) {
  106. final String name = results[i];
  107. final String identifier = results[i + 1];
  108. final int gender = Integer.parseInt(results[i + 2]);
  109. final int age = Integer.parseInt(results[i + 3]);
  110. try {
  111. final Locale locale;
  112. if (identifier.equals("asia/fa-en-us")) {
  113. throw new IllegalArgumentException("Voice '" + identifier + "' is a duplicate voice.");
  114. } else {
  115. locale = getLocaleFromLanguageName(name);
  116. if (locale == null) {
  117. throw new IllegalArgumentException("Locale not supported.");
  118. }
  119. }
  120. String language = locale.getISO3Language();
  121. if (language.equals("")) {
  122. throw new IllegalArgumentException("Language '" + locale.getLanguage() + "' not supported.");
  123. }
  124. String country = locale.getISO3Country();
  125. if (country.equals("") && !locale.getCountry().equals("")) {
  126. throw new IllegalArgumentException("Country '" + locale.getCountry() + "' not supported.");
  127. }
  128. final Voice voice = new Voice(name, identifier, gender, age, locale);
  129. voices.add(voice);
  130. } catch (MissingResourceException e) {
  131. // Android 4.3 throws this exception if the 3-letter language
  132. // (e.g. nci) or country (e.g. 021) code is missing for a locale.
  133. // Earlier versions return an empty string (handled above).
  134. Log.d(TAG, "getAvailableResources: skipping " + name + " => " + e.getMessage());
  135. } catch (IllegalArgumentException e) {
  136. Log.d(TAG, "getAvailableResources: skipping " + name + " => " + e.getMessage());
  137. }
  138. }
  139. return voices;
  140. }
  141. public void setVoice(Voice voice, VoiceVariant variant) {
  142. // NOTE: espeak_SetVoiceByProperties does not support specifying the
  143. // voice variant (e.g. klatt), but espeak_SetVoiceByName does.
  144. if (variant.variant == null) {
  145. nativeSetVoiceByProperties(voice.name, variant.gender, variant.age);
  146. } else {
  147. nativeSetVoiceByName(voice.identifier + "+" + variant.variant);
  148. }
  149. }
  150. public void setPunctuationCharacters(String characters) {
  151. nativeSetPunctuationCharacters(characters);
  152. }
  153. /** Don't announce any punctuation characters. */
  154. public static final int PUNCT_NONE = 0;
  155. /** Announce every punctuation character. */
  156. public static final int PUNCT_ALL = 1;
  157. /** Announce some of the punctuation characters. */
  158. public static final int PUNCT_SOME = 2;
  159. public enum UnitType {
  160. Percentage,
  161. WordsPerMinute,
  162. /** One of the PUNCT_* constants. */
  163. Punctuation,
  164. }
  165. public class Parameter {
  166. private final int id;
  167. private final int min;
  168. private final int max;
  169. private final UnitType unitType;
  170. private Parameter(int id, int min, int max, UnitType unitType) {
  171. this.id = id;
  172. this.min = min;
  173. this.max = max;
  174. this.unitType = unitType;
  175. }
  176. public int getMinValue() {
  177. return min;
  178. }
  179. public int getMaxValue() {
  180. return max;
  181. }
  182. public int getDefaultValue() {
  183. return nativeGetParameter(id, 0);
  184. }
  185. public int getValue() {
  186. return nativeGetParameter(id, 1);
  187. }
  188. public void setValue(int value, int scale) {
  189. setValue((value * scale) / 100);
  190. }
  191. public void setValue(int value) {
  192. nativeSetParameter(id, value);
  193. }
  194. public UnitType getUnitType() {
  195. return unitType;
  196. }
  197. }
  198. /** Speech rate. */
  199. public final Parameter Rate = new Parameter(1, 80, 450, UnitType.WordsPerMinute);
  200. /** Audio volume. */
  201. public final Parameter Volume = new Parameter(2, 0, 200, UnitType.Percentage);
  202. /** Base pitch. */
  203. public final Parameter Pitch = new Parameter(3, 0, 100, UnitType.Percentage);
  204. /** Pitch range (monotone = 0). */
  205. public final Parameter PitchRange = new Parameter(4, 0, 100, UnitType.Percentage);
  206. /** Which punctuation characters to announce. */
  207. public final Parameter Punctuation = new Parameter(5, 0, 2, UnitType.Punctuation);
  208. public void synthesize(String text, boolean isSsml) {
  209. nativeSynthesize(text, isSsml);
  210. }
  211. public void stop() {
  212. nativeStop();
  213. }
  214. private void nativeSynthCallback(byte[] audioData) {
  215. if (mCallback == null)
  216. return;
  217. if (audioData == null) {
  218. mCallback.onSynthDataComplete();
  219. } else {
  220. mCallback.onSynthDataReady(audioData);
  221. }
  222. }
  223. private void attemptInit() {
  224. if (mInitialized) {
  225. return;
  226. }
  227. if (!CheckVoiceData.hasBaseResources(mContext)) {
  228. Log.e(TAG, "Missing base resources");
  229. return;
  230. }
  231. mSampleRate = nativeCreate(mDatapath);
  232. if (mSampleRate == 0) {
  233. Log.e(TAG, "Failed to initialize speech synthesis library");
  234. return;
  235. }
  236. Log.i(TAG, "Initialized synthesis library with sample rate = " + getSampleRate());
  237. mInitialized = true;
  238. }
  239. public static String getSampleText(Context context, Locale locale) {
  240. final DisplayMetrics metrics = context.getResources().getDisplayMetrics();
  241. final Configuration config = context.getResources().getConfiguration();
  242. final String language = getIanaLanguageCode(locale.getLanguage());
  243. final String country = getIanaCountryCode(locale.getCountry());
  244. config.locale = new Locale(language, country, locale.getVariant());
  245. Resources res = new Resources(context.getAssets(), metrics, config);
  246. return res.getString(R.string.sample_text, config.locale.getDisplayName(config.locale));
  247. }
  248. private static native final boolean nativeClassInit();
  249. private native final int nativeCreate(String path);
  250. private native final static String nativeGetVersion();
  251. private native final String[] nativeGetAvailableVoices();
  252. private native final boolean nativeSetVoiceByName(String name);
  253. private native final boolean nativeSetVoiceByProperties(String language, int gender, int age);
  254. private native final boolean nativeSetParameter(int parameter, int value);
  255. private native final int nativeGetParameter(int parameter, int current);
  256. private native final boolean nativeSetPunctuationCharacters(String characters);
  257. private native final boolean nativeSynthesize(String text, boolean isSsml);
  258. private native final boolean nativeStop();
  259. public interface SynthReadyCallback {
  260. void onSynthDataReady(byte[] audioData);
  261. void onSynthDataComplete();
  262. }
  263. public static String getIanaLanguageCode(String code) {
  264. return getIanaLocaleCode(code, mJavaToIanaLanguageCode);
  265. }
  266. public static String getIanaCountryCode(String code) {
  267. return getIanaLocaleCode(code, mJavaToIanaCountryCode);
  268. }
  269. private static String getIanaLocaleCode(String code, final Map<String, String> javaToIana) {
  270. final String iana = javaToIana.get(code);
  271. if (iana != null) {
  272. return iana;
  273. }
  274. return code;
  275. }
  276. private static final Map<String, String> mJavaToIanaLanguageCode = new HashMap<String, String>();
  277. private static final Map<String, String> mJavaToIanaCountryCode = new HashMap<String, String>();
  278. private static final HashMap<String, Locale> mLocaleFixes = new HashMap<String, Locale>();
  279. static {
  280. mJavaToIanaLanguageCode.put("afr", "af");
  281. mJavaToIanaLanguageCode.put("amh", "am");
  282. mJavaToIanaLanguageCode.put("ara", "ar");
  283. mJavaToIanaLanguageCode.put("arg", "an");
  284. mJavaToIanaLanguageCode.put("asm", "as");
  285. mJavaToIanaLanguageCode.put("aze", "az");
  286. mJavaToIanaLanguageCode.put("bul", "bg");
  287. mJavaToIanaLanguageCode.put("ben", "bn");
  288. mJavaToIanaLanguageCode.put("bos", "bs");
  289. mJavaToIanaLanguageCode.put("cat", "ca");
  290. mJavaToIanaLanguageCode.put("ces", "cs");
  291. mJavaToIanaLanguageCode.put("cym", "cy");
  292. mJavaToIanaLanguageCode.put("dan", "da");
  293. mJavaToIanaLanguageCode.put("deu", "de");
  294. mJavaToIanaLanguageCode.put("ell", "el");
  295. mJavaToIanaLanguageCode.put("eng", "en");
  296. mJavaToIanaLanguageCode.put("epo", "eo");
  297. mJavaToIanaLanguageCode.put("spa", "es");
  298. mJavaToIanaLanguageCode.put("est", "et");
  299. mJavaToIanaLanguageCode.put("eus", "eu");
  300. mJavaToIanaLanguageCode.put("fas", "fa");
  301. mJavaToIanaLanguageCode.put("fin", "fi");
  302. mJavaToIanaLanguageCode.put("fra", "fr");
  303. mJavaToIanaLanguageCode.put("gle", "ga");
  304. mJavaToIanaLanguageCode.put("gla", "gd");
  305. mJavaToIanaLanguageCode.put("grn", "gn");
  306. mJavaToIanaLanguageCode.put("guj", "gu");
  307. mJavaToIanaLanguageCode.put("hin", "hi");
  308. mJavaToIanaLanguageCode.put("hrv", "hr");
  309. mJavaToIanaLanguageCode.put("hun", "hu");
  310. mJavaToIanaLanguageCode.put("hye", "hy");
  311. mJavaToIanaLanguageCode.put("ina", "ia");
  312. mJavaToIanaLanguageCode.put("ind", "in"); // NOTE: The deprecated 'in' code is used by Java/Android.
  313. mJavaToIanaLanguageCode.put("isl", "is");
  314. mJavaToIanaLanguageCode.put("ita", "it");
  315. mJavaToIanaLanguageCode.put("jpn", "ja");
  316. mJavaToIanaLanguageCode.put("kat", "ka");
  317. mJavaToIanaLanguageCode.put("kal", "kl");
  318. mJavaToIanaLanguageCode.put("kan", "kn");
  319. mJavaToIanaLanguageCode.put("kir", "ky");
  320. mJavaToIanaLanguageCode.put("kor", "ko");
  321. mJavaToIanaLanguageCode.put("kur", "ku");
  322. mJavaToIanaLanguageCode.put("lat", "la");
  323. mJavaToIanaLanguageCode.put("lit", "lt");
  324. mJavaToIanaLanguageCode.put("lav", "lv");
  325. mJavaToIanaLanguageCode.put("mkd", "mk");
  326. mJavaToIanaLanguageCode.put("mal", "ml");
  327. mJavaToIanaLanguageCode.put("mar", "mr");
  328. mJavaToIanaLanguageCode.put("mlt", "mt");
  329. mJavaToIanaLanguageCode.put("mri", "mi");
  330. mJavaToIanaLanguageCode.put("msa", "ms");
  331. mJavaToIanaLanguageCode.put("mya", "my");
  332. mJavaToIanaLanguageCode.put("nep", "ne");
  333. mJavaToIanaLanguageCode.put("nld", "nl");
  334. mJavaToIanaLanguageCode.put("nob", "nb");
  335. mJavaToIanaLanguageCode.put("nor", "no");
  336. mJavaToIanaLanguageCode.put("ori", "or");
  337. mJavaToIanaLanguageCode.put("orm", "om");
  338. mJavaToIanaLanguageCode.put("pan", "pa");
  339. mJavaToIanaLanguageCode.put("pol", "pl");
  340. mJavaToIanaLanguageCode.put("por", "pt");
  341. mJavaToIanaLanguageCode.put("ron", "ro");
  342. mJavaToIanaLanguageCode.put("rus", "ru");
  343. mJavaToIanaLanguageCode.put("sin", "si");
  344. mJavaToIanaLanguageCode.put("slk", "sk");
  345. mJavaToIanaLanguageCode.put("slv", "sl");
  346. mJavaToIanaLanguageCode.put("snd", "sd");
  347. mJavaToIanaLanguageCode.put("sqi", "sq");
  348. mJavaToIanaLanguageCode.put("srp", "sr");
  349. mJavaToIanaLanguageCode.put("swe", "sv");
  350. mJavaToIanaLanguageCode.put("swa", "sw");
  351. mJavaToIanaLanguageCode.put("tam", "ta");
  352. mJavaToIanaLanguageCode.put("tel", "te");
  353. mJavaToIanaLanguageCode.put("tat", "tt");
  354. mJavaToIanaLanguageCode.put("tsn", "tn");
  355. mJavaToIanaLanguageCode.put("tur", "tr");
  356. mJavaToIanaLanguageCode.put("urd", "ur");
  357. mJavaToIanaLanguageCode.put("vie", "vi");
  358. mJavaToIanaLanguageCode.put("zho", "zh");
  359. mJavaToIanaCountryCode.put("ARM", "AM");
  360. mJavaToIanaCountryCode.put("BEL", "BE");
  361. mJavaToIanaCountryCode.put("BRA", "BR");
  362. mJavaToIanaCountryCode.put("CHE", "CH");
  363. mJavaToIanaCountryCode.put("FRA", "FR");
  364. mJavaToIanaCountryCode.put("GBR", "GB");
  365. mJavaToIanaCountryCode.put("HKG", "HK");
  366. mJavaToIanaCountryCode.put("JAM", "JM");
  367. mJavaToIanaCountryCode.put("MEX", "MX");
  368. mJavaToIanaCountryCode.put("PRT", "PT");
  369. mJavaToIanaCountryCode.put("USA", "US");
  370. mJavaToIanaCountryCode.put("VNM", "VN");
  371. // Fix up BCP47 locales not handled correctly by Android:
  372. mLocaleFixes.put("cmn", new Locale("zh"));
  373. mLocaleFixes.put("en-029", new Locale("en", "JM"));
  374. mLocaleFixes.put("es-419", new Locale("es", "MX"));
  375. mLocaleFixes.put("hy-arevmda", new Locale("hy", "AM", "arevmda")); // hy-arevmda crashes on Android 5.0
  376. mLocaleFixes.put("yue", new Locale("zh", "HK"));
  377. }
  378. }