eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SpeechSynthesis.java 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. /*
  2. * Copyright (C) 2012-2013 Reece H. Dunn
  3. * Copyright (C) 2011 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * This file implements the Java API to eSpeak using the JNI bindings.
  19. *
  20. * Android Version: 4.0 (Ice Cream Sandwich)
  21. * API Version: 14
  22. */
  23. package com.reecedunn.espeak;
  24. import android.content.Context;
  25. import android.content.res.Configuration;
  26. import android.content.res.Resources;
  27. import android.speech.tts.TextToSpeech;
  28. import android.util.DisplayMetrics;
  29. import android.util.Log;
  30. import java.io.File;
  31. import java.util.HashMap;
  32. import java.util.LinkedList;
  33. import java.util.List;
  34. import java.util.Locale;
  35. import java.util.Map;
  36. public class SpeechSynthesis {
  37. private static final String TAG = SpeechSynthesis.class.getSimpleName();
  38. public static final int GENDER_UNSPECIFIED = 0;
  39. public static final int GENDER_MALE = 1;
  40. public static final int GENDER_FEMALE = 2;
  41. public static final int AGE_ANY = 0;
  42. public static final int AGE_YOUNG = 12;
  43. public static final int AGE_OLD = 60;
  44. static {
  45. System.loadLibrary("ttsespeak");
  46. nativeClassInit();
  47. }
  48. private final Context mContext;
  49. private final SynthReadyCallback mCallback;
  50. private final String mDatapath;
  51. private boolean mInitialized = false;
  52. public SpeechSynthesis(Context context, SynthReadyCallback callback) {
  53. // First, ensure the data directory exists, otherwise init will crash.
  54. final File dataPath = CheckVoiceData.getDataPath(context);
  55. if (!dataPath.exists()) {
  56. Log.e(TAG, "Missing voice data");
  57. dataPath.mkdirs();
  58. }
  59. mContext = context;
  60. mCallback = callback;
  61. mDatapath = dataPath.getParentFile().getPath();
  62. attemptInit();
  63. }
  64. @Override
  65. protected void finalize() {
  66. nativeDestroy();
  67. }
  68. public static String getVersion() {
  69. return nativeGetVersion();
  70. }
  71. public int getSampleRate() {
  72. return nativeGetSampleRate();
  73. }
  74. public int getChannelCount() {
  75. return nativeGetChannelCount();
  76. }
  77. public int getAudioFormat() {
  78. return nativeGetAudioFormat();
  79. }
  80. public int getBufferSizeInBytes() {
  81. final int bufferSizeInMillis = nativeGetBufferSizeInMillis();
  82. final int sampleRate = nativeGetSampleRate();
  83. return (bufferSizeInMillis * sampleRate) / 1000;
  84. }
  85. public List<Voice> getAvailableVoices() {
  86. final List<Voice> voices = new LinkedList<Voice>();
  87. final String[] results = nativeGetAvailableVoices();
  88. for (int i = 0; i < results.length; i += 4) {
  89. final String name = results[i];
  90. final String identifier = results[i + 1];
  91. final int gender = Integer.parseInt(results[i + 2]);
  92. final int age = Integer.parseInt(results[i + 3]);
  93. final Locale locale;
  94. if (name.equals("fa-pin")) {
  95. // Android locales do not support scripts, so fa-Latn is not possible for Farsi Pinglish:
  96. locale = null;
  97. }
  98. else if (name.equals("en-sc")) {
  99. // 'SC' is not a country code.
  100. locale = new Locale("en", "GB", "scotland");
  101. } else if (name.equals("en-wi")) {
  102. // 'WI' is not a country code.
  103. locale = new Locale("en", "029");
  104. } else if (name.equals("es-la")) {
  105. // 'LA' is the country code for Laos, not Latin America.
  106. locale = new Locale("es", "419");
  107. } else if (name.equals("hy-west")) {
  108. // 'west' is not a country code.
  109. locale = new Locale("hy", "", "arevmda");
  110. } else if (name.equals("vi-hue")) {
  111. // 'hue' is for the Hue Province accent/dialect (Central Vietnamese).
  112. locale = new Locale("vi", "", "hue");
  113. } else if (name.equals("vi-sgn")) {
  114. // 'sgn' is for the Saigon accent/dialect (South Vietnamese).
  115. locale = new Locale("vi", "", "saigon");
  116. } else if (name.equals("zh-yue")) {
  117. // Android/Java does not support macrolanguages.
  118. locale = new Locale("zh", "HK");
  119. } else {
  120. String[] parts = name.split("-");
  121. switch (parts.length) {
  122. case 1: // language
  123. locale = new Locale(parts[0]);
  124. break;
  125. case 2: // language-country
  126. if (parts[1].equals("uk")) {
  127. // 'uk' is the language code for Ukranian, not Great Britain.
  128. parts[1] = "GB";
  129. }
  130. locale = new Locale(parts[0], parts[1]);
  131. break;
  132. case 3: // language-country-variant
  133. if (parts[1].equals("uk")) {
  134. // 'uk' is the language code for Ukranian, not Great Britain.
  135. parts[1] = "GB";
  136. }
  137. locale = new Locale(parts[0], parts[1], parts[2]);
  138. break;
  139. default:
  140. locale = null;
  141. }
  142. }
  143. if (locale != null && !locale.getISO3Language().equals("")) {
  144. final Voice voice = new Voice(name, identifier, gender, age, locale);
  145. voices.add(voice);
  146. }
  147. }
  148. return voices;
  149. }
  150. public void setVoice(Voice voice, VoiceVariant variant) {
  151. // NOTE: espeak_SetVoiceByProperties does not support specifying the
  152. // voice variant (e.g. klatt), but espeak_SetVoiceByName does.
  153. if (variant.variant == null) {
  154. nativeSetVoiceByProperties(voice.name, variant.gender, variant.age);
  155. } else {
  156. nativeSetVoiceByName(voice.identifier + "+" + variant.variant);
  157. }
  158. }
  159. /** Don't announce any punctuation characters. */
  160. public static int PUNCT_NONE = 0;
  161. /** Announce every punctuation character. */
  162. public static int PUNCT_ALL = 1;
  163. /** Announce some of the punctuation characters. */
  164. public static int PUNCT_SOME = 2;
  165. public enum UnitType {
  166. Percentage,
  167. WordsPerMinute,
  168. /** One of the PUNCT_* constants. */
  169. Punctuation,
  170. }
  171. public class Parameter {
  172. private final int id;
  173. private final int min;
  174. private final int max;
  175. private final UnitType unitType;
  176. private Parameter(int id, int min, int max, UnitType unitType) {
  177. this.id = id;
  178. this.min = min;
  179. this.max = max;
  180. this.unitType = unitType;
  181. }
  182. public int getMinValue() {
  183. return min;
  184. }
  185. public int getMaxValue() {
  186. return max;
  187. }
  188. public int getDefaultValue() {
  189. return nativeGetParameter(id, 0);
  190. }
  191. public int getValue() {
  192. return nativeGetParameter(id, 1);
  193. }
  194. public void setValue(int value, int scale) {
  195. setValue((value * scale) / 100);
  196. }
  197. public void setValue(int value) {
  198. nativeSetParameter(id, value);
  199. }
  200. public UnitType getUnitType() {
  201. return unitType;
  202. }
  203. }
  204. /** Speech rate. */
  205. public final Parameter Rate = new Parameter(1, 80, 450, UnitType.WordsPerMinute);
  206. /** Audio volume. */
  207. public final Parameter Volume = new Parameter(2, 0, 200, UnitType.Percentage);
  208. /** Base pitch. */
  209. public final Parameter Pitch = new Parameter(3, 0, 100, UnitType.Percentage);
  210. /** Pitch range (monotone = 0). */
  211. public final Parameter PitchRange = new Parameter(4, 0, 100, UnitType.Percentage);
  212. /** Which punctuation characters to announce. */
  213. public final Parameter Punctuation = new Parameter(5, 0, 2, UnitType.Punctuation);
  214. public void synthesize(String text, boolean isSsml) {
  215. nativeSynthesize(text, isSsml);
  216. }
  217. public void stop() {
  218. nativeStop();
  219. }
  220. private void nativeSynthCallback(byte[] audioData) {
  221. if (mCallback == null)
  222. return;
  223. if (audioData == null) {
  224. mCallback.onSynthDataComplete();
  225. } else {
  226. mCallback.onSynthDataReady(audioData);
  227. }
  228. }
  229. private void attemptInit() {
  230. if (mInitialized) {
  231. return;
  232. }
  233. if (!CheckVoiceData.hasBaseResources(mContext)) {
  234. Log.e(TAG, "Missing base resources");
  235. return;
  236. }
  237. if (!nativeCreate(mDatapath)) {
  238. Log.e(TAG, "Failed to initialize speech synthesis library");
  239. return;
  240. }
  241. Log.i(TAG, "Initialized synthesis library with sample rate = " + getSampleRate());
  242. mInitialized = true;
  243. }
  244. public static String getSampleText(Context context, Locale locale) {
  245. final DisplayMetrics metrics = context.getResources().getDisplayMetrics();
  246. final Configuration config = context.getResources().getConfiguration();
  247. final String language = getIanaLocaleCode(locale.getLanguage(), mJavaToIanaLanguageCode);
  248. final String country = getIanaLocaleCode(locale.getCountry(), mJavaToIanaCountryCode);
  249. config.locale = new Locale(language, country, locale.getVariant());
  250. Resources res = new Resources(context.getAssets(), metrics, config);
  251. return res.getString(R.string.sample_text, config.locale.getDisplayName(config.locale));
  252. }
  253. private int mNativeData;
  254. private static native final boolean nativeClassInit();
  255. private native final boolean nativeCreate(String path);
  256. private native final boolean nativeDestroy();
  257. private native final static String nativeGetVersion();
  258. private native final int nativeGetSampleRate();
  259. private native final int nativeGetChannelCount();
  260. private native final int nativeGetAudioFormat();
  261. private native final int nativeGetBufferSizeInMillis();
  262. private native final String[] nativeGetAvailableVoices();
  263. private native final boolean nativeSetVoiceByName(String name);
  264. private native final boolean nativeSetVoiceByProperties(String language, int gender, int age);
  265. private native final boolean nativeSetParameter(int parameter, int value);
  266. private native final int nativeGetParameter(int parameter, int current);
  267. private native final boolean nativeSynthesize(String text, boolean isSsml);
  268. private native final boolean nativeStop();
  269. public interface SynthReadyCallback {
  270. void onSynthDataReady(byte[] audioData);
  271. void onSynthDataComplete();
  272. }
  273. public static class VoiceVariant {
  274. public final String variant;
  275. public final int gender;
  276. public final int age;
  277. protected VoiceVariant(String variant, int age) {
  278. if (variant.equals("male")) {
  279. this.variant = null;
  280. this.gender = GENDER_MALE;
  281. } else if (variant.equals("female")) {
  282. this.variant = null;
  283. this.gender = GENDER_FEMALE;
  284. } else {
  285. this.variant = variant;
  286. this.gender = GENDER_UNSPECIFIED;
  287. }
  288. this.age = age;
  289. }
  290. @Override
  291. public String toString() {
  292. final String ret;
  293. if (gender == GENDER_MALE) {
  294. ret = "male";
  295. } else if (gender == GENDER_FEMALE) {
  296. ret = "female";
  297. } else {
  298. ret = variant;
  299. }
  300. if (age == AGE_YOUNG) {
  301. return ret + "-young";
  302. } else if (age == AGE_OLD) {
  303. return ret + "-old";
  304. }
  305. return ret;
  306. }
  307. }
  308. public static VoiceVariant parseVoiceVariant(String value) {
  309. String[] parts = value.split("-");
  310. int age = AGE_ANY;
  311. switch (parts.length) {
  312. case 1: // variant
  313. break;
  314. case 2: // variant-age
  315. age = parts[1].equals("young") ? AGE_YOUNG : AGE_OLD;
  316. break;
  317. default:
  318. return null;
  319. }
  320. return new VoiceVariant(parts[0], age);
  321. }
  322. public class Voice {
  323. public final String name;
  324. public final String identifier;
  325. public final int gender;
  326. public final int age;
  327. public final Locale locale;
  328. public Voice(String name, String identifier, int gender, int age, Locale locale) {
  329. this.name = name;
  330. this.identifier = identifier;
  331. this.gender = gender;
  332. this.age = age;
  333. this.locale = locale;
  334. }
  335. /**
  336. * Attempts a partial match against a query locale.
  337. *
  338. * @param query The locale to match.
  339. * @return A text-to-speech availability code. One of:
  340. * <ul>
  341. * <li>{@link TextToSpeech#LANG_NOT_SUPPORTED}
  342. * <li>{@link TextToSpeech#LANG_AVAILABLE}
  343. * <li>{@link TextToSpeech#LANG_COUNTRY_AVAILABLE}
  344. * <li>{@link TextToSpeech#LANG_COUNTRY_VAR_AVAILABLE}
  345. * </ul>
  346. */
  347. public int match(Locale query) {
  348. if (!locale.getISO3Language().equals(query.getISO3Language())) {
  349. return TextToSpeech.LANG_NOT_SUPPORTED;
  350. } else if (!locale.getISO3Country().equals(query.getISO3Country())) {
  351. return TextToSpeech.LANG_AVAILABLE;
  352. } else if (!locale.getVariant().equals(query.getVariant())) {
  353. return TextToSpeech.LANG_COUNTRY_AVAILABLE;
  354. } else {
  355. return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
  356. }
  357. }
  358. @Override
  359. public String toString() {
  360. return locale.toString().replace('_', '-');
  361. }
  362. }
  363. private static String getIanaLocaleCode(String code, final Map<String, String> javaToIana) {
  364. final String iana = javaToIana.get(code);
  365. if (iana != null) {
  366. return iana;
  367. }
  368. return code;
  369. }
  370. private static final Map<String, String> mJavaToIanaLanguageCode = new HashMap<String, String>();
  371. private static final Map<String, String> mJavaToIanaCountryCode = new HashMap<String, String>();
  372. static {
  373. mJavaToIanaLanguageCode.put("afr", "af");
  374. mJavaToIanaLanguageCode.put("aka", "ak");
  375. mJavaToIanaLanguageCode.put("amh", "am");
  376. mJavaToIanaLanguageCode.put("arg", "an");
  377. mJavaToIanaLanguageCode.put("aze", "az");
  378. mJavaToIanaLanguageCode.put("bul", "bg");
  379. mJavaToIanaLanguageCode.put("bos", "bs");
  380. mJavaToIanaLanguageCode.put("cat", "ca");
  381. mJavaToIanaLanguageCode.put("ces", "cs");
  382. mJavaToIanaLanguageCode.put("cym", "cy");
  383. mJavaToIanaLanguageCode.put("dan", "da");
  384. mJavaToIanaLanguageCode.put("deu", "de");
  385. mJavaToIanaLanguageCode.put("div", "dv");
  386. mJavaToIanaLanguageCode.put("ell", "el");
  387. mJavaToIanaLanguageCode.put("eng", "en");
  388. mJavaToIanaLanguageCode.put("epo", "eo");
  389. mJavaToIanaLanguageCode.put("spa", "es");
  390. mJavaToIanaLanguageCode.put("est", "et");
  391. mJavaToIanaLanguageCode.put("fas", "fa");
  392. mJavaToIanaLanguageCode.put("fin", "fi");
  393. mJavaToIanaLanguageCode.put("fra", "fr");
  394. mJavaToIanaLanguageCode.put("gle", "ga");
  395. mJavaToIanaLanguageCode.put("hin", "hi");
  396. mJavaToIanaLanguageCode.put("hrv", "hr");
  397. mJavaToIanaLanguageCode.put("hat", "ht");
  398. mJavaToIanaLanguageCode.put("hun", "hu");
  399. mJavaToIanaLanguageCode.put("hye", "hy");
  400. mJavaToIanaLanguageCode.put("ind", "in"); // NOTE: The deprecated 'in' code is used by Java/Android.
  401. mJavaToIanaLanguageCode.put("isl", "is");
  402. mJavaToIanaLanguageCode.put("ita", "it");
  403. mJavaToIanaLanguageCode.put("kat", "ka");
  404. mJavaToIanaLanguageCode.put("kaz", "kk");
  405. mJavaToIanaLanguageCode.put("kal", "kl");
  406. mJavaToIanaLanguageCode.put("kan", "kn");
  407. mJavaToIanaLanguageCode.put("kor", "ko");
  408. mJavaToIanaLanguageCode.put("kur", "ku");
  409. mJavaToIanaLanguageCode.put("lat", "la");
  410. mJavaToIanaLanguageCode.put("lit", "lt");
  411. mJavaToIanaLanguageCode.put("lav", "lv");
  412. mJavaToIanaLanguageCode.put("mkd", "mk");
  413. mJavaToIanaLanguageCode.put("mal", "ml");
  414. mJavaToIanaLanguageCode.put("mlt", "mt");
  415. mJavaToIanaLanguageCode.put("nep", "ne");
  416. mJavaToIanaLanguageCode.put("nld", "nl");
  417. mJavaToIanaLanguageCode.put("nor", "no");
  418. mJavaToIanaLanguageCode.put("pan", "pa");
  419. mJavaToIanaLanguageCode.put("pol", "pl");
  420. mJavaToIanaLanguageCode.put("por", "pt");
  421. mJavaToIanaLanguageCode.put("ron", "ro");
  422. mJavaToIanaLanguageCode.put("rus", "ru");
  423. mJavaToIanaLanguageCode.put("kin", "rw");
  424. mJavaToIanaLanguageCode.put("sin", "si");
  425. mJavaToIanaLanguageCode.put("slk", "sk");
  426. mJavaToIanaLanguageCode.put("slv", "sl");
  427. mJavaToIanaLanguageCode.put("sqi", "sq");
  428. mJavaToIanaLanguageCode.put("srp", "sr");
  429. mJavaToIanaLanguageCode.put("swe", "sv");
  430. mJavaToIanaLanguageCode.put("swa", "sw");
  431. mJavaToIanaLanguageCode.put("tam", "ta");
  432. mJavaToIanaLanguageCode.put("tel", "te");
  433. mJavaToIanaLanguageCode.put("tsn", "tn");
  434. mJavaToIanaLanguageCode.put("tur", "tr");
  435. mJavaToIanaLanguageCode.put("tat", "tt");
  436. mJavaToIanaLanguageCode.put("urd", "ur");
  437. mJavaToIanaLanguageCode.put("vie", "vi");
  438. mJavaToIanaLanguageCode.put("wol", "wo");
  439. mJavaToIanaLanguageCode.put("zho", "zh");
  440. mJavaToIanaLanguageCode.put("yue", "zh");
  441. mJavaToIanaCountryCode.put("029", ""); // Locale.getCountry() does not map numeric country codes.
  442. mJavaToIanaCountryCode.put("419", ""); // Locale.getCountry() does not map numeric country codes.
  443. mJavaToIanaCountryCode.put("BEL", "BE");
  444. mJavaToIanaCountryCode.put("BRA", "BR");
  445. mJavaToIanaCountryCode.put("FRA", "FR");
  446. mJavaToIanaCountryCode.put("GBR", "GB");
  447. mJavaToIanaCountryCode.put("PRT", "PT");
  448. mJavaToIanaCountryCode.put("USA", "US");
  449. }
  450. }