eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

eSpeakService.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*
  2. * Copyright (C) 2012-2017 Reece H. Dunn
  3. * Copyright (C) 2011 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * This file contains the JNI bindings to eSpeak used by SpeechSynthesis.java.
  19. *
  20. * Android Version: 4.0 (Ice Cream Sandwich)
  21. * API Version: 14
  22. */
  23. #include <stdio.h>
  24. #include <unistd.h>
  25. #include <stdlib.h>
  26. #include <stdbool.h>
  27. #include <jni.h>
  28. #include <espeak/speak_lib.h>
  29. #include <Log.h>
  30. #define BUFFER_SIZE_IN_MILLISECONDS 1000
  31. /* These are helpers for converting a jstring to wchar_t*.
  32. *
  33. * This assumes that wchar_t is a 32-bit (UTF-32) value.
  34. */
  35. //@{
  36. static const char *utf8_read(const char *in, wchar_t *c)
  37. {
  38. if (((uint8_t)*in) < 0x80)
  39. *c = *in++;
  40. else switch (((uint8_t)*in) & 0xF0)
  41. {
  42. default:
  43. *c = ((uint8_t)*in++) & 0x1F;
  44. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  45. break;
  46. case 0xE0:
  47. *c = ((uint8_t)*in++) & 0x0F;
  48. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  49. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  50. break;
  51. case 0xF0:
  52. *c = ((uint8_t)*in++) & 0x07;
  53. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  54. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  55. *c = (*c << 6) + (((uint8_t)*in++) & 0x3F);
  56. break;
  57. }
  58. return in;
  59. }
  60. static wchar_t *unicode_string(JNIEnv *env, jstring str)
  61. {
  62. if (str == NULL) return NULL;
  63. const char *utf8 = (*env)->GetStringUTFChars(env, str, NULL);
  64. wchar_t *utf32 = (wchar_t *)malloc((strlen(utf8) + 1) * sizeof(wchar_t));
  65. const char *utf8_current = utf8;
  66. wchar_t *utf32_current = utf32;
  67. while (*utf8_current)
  68. {
  69. utf8_current = utf8_read(utf8_current, utf32_current);
  70. ++utf32_current;
  71. }
  72. *utf32_current = 0;
  73. (*env)->ReleaseStringUTFChars(env, str, utf8);
  74. return utf32;
  75. }
  76. //@}
  77. #define LOG_TAG "eSpeakService"
  78. #define DEBUG true
  79. enum synthesis_result {
  80. SYNTH_CONTINUE = 0,
  81. SYNTH_ABORT = 1
  82. };
  83. static JavaVM *jvm = NULL;
  84. jmethodID METHOD_nativeSynthCallback;
  85. static JNIEnv *getJniEnv() {
  86. JNIEnv *env = NULL;
  87. (*jvm)->AttachCurrentThread(jvm, &env, NULL);
  88. return env;
  89. }
  90. /* Callback from espeak. Should call back to the TTS API */
  91. static int SynthCallback(short *audioData, int numSamples,
  92. espeak_EVENT *events) {
  93. JNIEnv *env = getJniEnv();
  94. jobject object = (jobject)events->user_data;
  95. if (numSamples < 1) {
  96. (*env)->CallVoidMethod(env, object, METHOD_nativeSynthCallback, NULL);
  97. return SYNTH_ABORT;
  98. } else {
  99. jbyteArray arrayAudioData = (*env)->NewByteArray(env, numSamples * 2);
  100. (*env)->SetByteArrayRegion(env, arrayAudioData, 0, (numSamples * 2), (jbyte *) audioData);
  101. (*env)->CallVoidMethod(env, object, METHOD_nativeSynthCallback, arrayAudioData);
  102. return SYNTH_CONTINUE;
  103. }
  104. }
  105. #ifdef __cplusplus
  106. extern "C" {
  107. #endif /* __cplusplus */
  108. JNIEXPORT jint
  109. JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) {
  110. jvm = vm;
  111. JNIEnv *env;
  112. if ((*vm)->GetEnv(vm, (void **) &env, JNI_VERSION_1_6) != JNI_OK) {
  113. LOGE("Failed to get the environment using GetEnv()");
  114. return -1;
  115. }
  116. return JNI_VERSION_1_6;
  117. }
  118. JNIEXPORT jboolean
  119. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeClassInit(
  120. JNIEnv* env, jclass clazz) {
  121. if (DEBUG) LOGV("%s", __FUNCTION__);
  122. METHOD_nativeSynthCallback = (*env)->GetMethodID(env, clazz, "nativeSynthCallback", "([B)V");
  123. return JNI_TRUE;
  124. }
  125. JNIEXPORT jint
  126. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeCreate(
  127. JNIEnv *env, jobject object, jstring path) {
  128. if (DEBUG) LOGV("%s [env=%p, object=%p]", __FUNCTION__, env, object);
  129. const char *c_path = path ? (*env)->GetStringUTFChars(env, path, NULL) : NULL;
  130. if (DEBUG) LOGV("Initializing with path %s", c_path);
  131. int sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, BUFFER_SIZE_IN_MILLISECONDS, c_path, 0);
  132. if (c_path) (*env)->ReleaseStringUTFChars(env, path, c_path);
  133. return sampleRate;
  134. }
  135. JNIEXPORT jobject
  136. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetVersion(
  137. JNIEnv *env, jclass clazz) {
  138. if (DEBUG) LOGV("%s", __FUNCTION__);
  139. return (*env)->NewStringUTF(env, espeak_Info(NULL));
  140. }
  141. JNIEXPORT jobjectArray
  142. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetAvailableVoices(
  143. JNIEnv *env, jobject object) {
  144. if (DEBUG) LOGV("%s", __FUNCTION__);
  145. const espeak_VOICE **voices = espeak_ListVoices(NULL);
  146. int count;
  147. // First, count the number of voices returned.
  148. for (count = 0; voices[count] != NULL; count++);
  149. // Next, create a Java String array.
  150. jobjectArray voicesArray = (jobjectArray) (*env)->NewObjectArray(
  151. env, count * 4, (*env)->FindClass(env, "java/lang/String"), NULL);
  152. const espeak_VOICE *v;
  153. char gender_buf[12];
  154. char age_buf[12];
  155. // Finally, populate the array.
  156. for (int i = 0, voicesIndex = 0; (v = voices[i]) != NULL; i++) {
  157. const char *lang_name = v->languages + 1;
  158. const char *identifier = v->identifier;
  159. sprintf(gender_buf, "%d", v->gender);
  160. sprintf(age_buf, "%d", v->age);
  161. jstring lang = (*env)->NewStringUTF(env, lang_name);
  162. (*env)->SetObjectArrayElement(env, voicesArray, voicesIndex++, lang);
  163. (*env)->DeleteLocalRef(env, lang);
  164. jstring ident = (*env)->NewStringUTF(env, identifier);
  165. (*env)->SetObjectArrayElement(env, voicesArray, voicesIndex++, ident);
  166. (*env)->DeleteLocalRef(env, ident);
  167. jstring gender = (*env)->NewStringUTF(env, gender_buf);
  168. (*env)->SetObjectArrayElement(env, voicesArray, voicesIndex++, gender);
  169. (*env)->DeleteLocalRef(env, gender);
  170. jstring age = (*env)->NewStringUTF(env, age_buf);
  171. (*env)->SetObjectArrayElement(env, voicesArray, voicesIndex++, age);
  172. (*env)->DeleteLocalRef(env, age);
  173. }
  174. return voicesArray;
  175. }
  176. JNIEXPORT jboolean
  177. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetVoiceByName(
  178. JNIEnv *env, jobject object, jstring name) {
  179. const char *c_name = name ? (*env)->GetStringUTFChars(env, name, NULL) : NULL;
  180. if (DEBUG) LOGV("%s(name=%s)", __FUNCTION__, c_name);
  181. const espeak_ERROR result = espeak_SetVoiceByName(c_name);
  182. if (c_name) (*env)->ReleaseStringUTFChars(env, name, c_name);
  183. switch (result) {
  184. case EE_OK: return JNI_TRUE;
  185. case EE_INTERNAL_ERROR: LOGE("espeak_SetVoiceByName: internal error."); break;
  186. case EE_BUFFER_FULL: LOGE("espeak_SetVoiceByName: buffer full."); break;
  187. case EE_NOT_FOUND: LOGE("espeak_SetVoiceByName: not found."); break;
  188. }
  189. return JNI_FALSE;
  190. }
  191. JNIEXPORT jboolean
  192. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetVoiceByProperties(
  193. JNIEnv *env, jobject object, jstring language, jint gender, jint age) {
  194. const char *c_language = language ? (*env)->GetStringUTFChars(env, language, NULL) : NULL;
  195. if (DEBUG) LOGV("%s(language=%s, gender=%d, age=%d)", __FUNCTION__, c_language, gender, age);
  196. espeak_VOICE voice_select;
  197. memset(&voice_select, 0, sizeof(espeak_VOICE));
  198. voice_select.languages = c_language;
  199. voice_select.gender = (int) gender;
  200. voice_select.age = (int) age;
  201. const espeak_ERROR result = espeak_SetVoiceByProperties(&voice_select);
  202. if (c_language) (*env)->ReleaseStringUTFChars(env, language, c_language);
  203. switch (result) {
  204. case EE_OK: return JNI_TRUE;
  205. case EE_INTERNAL_ERROR: LOGE("espeak_SetVoiceByProperties: internal error."); break;
  206. case EE_BUFFER_FULL: LOGE("espeak_SetVoiceByProperties: buffer full."); break;
  207. case EE_NOT_FOUND: LOGE("espeak_SetVoiceByProperties: not found."); break;
  208. }
  209. return JNI_FALSE;
  210. }
  211. JNIEXPORT jboolean
  212. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetParameter(
  213. JNIEnv *env, jobject object, jint parameter, jint value) {
  214. if (DEBUG) LOGV("%s(parameter=%d, value=%d)", __FUNCTION__, parameter, value);
  215. const espeak_ERROR result = espeak_SetParameter((espeak_PARAMETER)parameter, (int)value, 0);
  216. switch (result) {
  217. case EE_OK: return JNI_TRUE;
  218. case EE_INTERNAL_ERROR: LOGE("espeak_SetParameter: internal error."); break;
  219. case EE_BUFFER_FULL: LOGE("espeak_SetParameter: buffer full."); break;
  220. case EE_NOT_FOUND: LOGE("espeak_SetParameter: not found."); break;
  221. }
  222. return JNI_FALSE;
  223. }
  224. JNIEXPORT jint
  225. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetParameter(
  226. JNIEnv *env, jobject object, jint parameter, jint current) {
  227. if (DEBUG) LOGV("%s(parameter=%d, pitch=%d)", __FUNCTION__, parameter, current);
  228. return espeak_GetParameter((espeak_PARAMETER)parameter, (int)current);
  229. }
  230. JNIEXPORT jboolean
  231. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetPunctuationCharacters(
  232. JNIEnv *env, jobject object, jstring characters) {
  233. if (DEBUG) LOGV("%s)", __FUNCTION__);
  234. wchar_t *list = unicode_string(env, characters);
  235. const espeak_ERROR result = espeak_SetPunctuationList(list);
  236. free(list);
  237. switch (result) {
  238. case EE_OK: return JNI_TRUE;
  239. case EE_INTERNAL_ERROR: LOGE("espeak_SetPunctuationList: internal error."); break;
  240. case EE_BUFFER_FULL: LOGE("espeak_SetPunctuationList: buffer full."); break;
  241. case EE_NOT_FOUND: LOGE("espeak_SetPunctuationList: not found."); break;
  242. }
  243. return JNI_FALSE;
  244. }
  245. JNIEXPORT jboolean
  246. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSynthesize(
  247. JNIEnv *env, jobject object, jstring text, jboolean isSsml) {
  248. if (DEBUG) LOGV("%s", __FUNCTION__);
  249. const char *c_text = text ? (*env)->GetStringUTFChars(env, text, NULL) : NULL;
  250. unsigned int unique_identifier;
  251. espeak_SetSynthCallback(SynthCallback);
  252. const espeak_ERROR result = espeak_Synth(c_text, strlen(c_text), 0, // position
  253. POS_CHARACTER, 0, // end position (0 means no end position)
  254. isSsml ? espeakCHARS_UTF8 | espeakSSML // UTF-8 encoded SSML
  255. : espeakCHARS_UTF8, // UTF-8 encoded text
  256. &unique_identifier, object);
  257. espeak_Synchronize();
  258. if (c_text) (*env)->ReleaseStringUTFChars(env, text, c_text);
  259. switch (result) {
  260. case EE_OK: return JNI_TRUE;
  261. case EE_INTERNAL_ERROR: LOGE("espeak_Synth: internal error."); break;
  262. case EE_BUFFER_FULL: LOGE("espeak_Synth: buffer full."); break;
  263. case EE_NOT_FOUND: LOGE("espeak_Synth: not found."); break;
  264. }
  265. return JNI_TRUE;
  266. }
  267. JNIEXPORT jboolean
  268. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeStop(
  269. JNIEnv *env, jobject object) {
  270. if (DEBUG) LOGV("%s", __FUNCTION__);
  271. espeak_Cancel();
  272. return JNI_TRUE;
  273. }
  274. #ifdef __cplusplus
  275. }
  276. #endif /* __cplusplus */