eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

eSpeakService.cpp 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /*
  2. * Copyright (C) 2012-2013 Reece H. Dunn
  3. * Copyright (C) 2011 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * This file contains the JNI bindings to eSpeak used by SpeechSynthesis.java.
  19. *
  20. * Android Version: 4.0 (Ice Cream Sandwich)
  21. * API Version: 14
  22. */
  23. #include <stdio.h>
  24. #include <unistd.h>
  25. #include <stdlib.h>
  26. #include <jni.h>
  27. #include <speak_lib.h>
  28. #include <TtsEngine.h>
  29. #include <Log.h>
  30. #define LOG_TAG "eSpeakService"
  31. #define DEBUG true
  32. enum audio_channel_count {
  33. CHANNEL_COUNT_MONO = 1,
  34. CHANNEL_COUNT_STEREO = 2
  35. };
  36. enum audio_encoding {
  37. ENCODING_INVALID = 0x00,
  38. ENCODING_DEFAULT = 0x01,
  39. ENCODING_PCM_16BIT = 0x02,
  40. ENCODING_PCM_8BIT = 0x03
  41. };
  42. enum synthesis_result {
  43. SYNTH_CONTINUE = 0,
  44. SYNTH_ABORT = 1
  45. };
  46. const int DEFAULT_CHANNEL_COUNT = CHANNEL_COUNT_MONO;
  47. const int DEFAULT_AUDIO_FORMAT = ENCODING_PCM_16BIT;
  48. const int DEFAULT_BUFFER_SIZE = 1000;
  49. struct native_data_t {
  50. JNIEnv *env;
  51. jobject object;
  52. int sampleRate;
  53. int channelCount;
  54. int audioFormat;
  55. int bufferSizeInMillis;
  56. native_data_t() {
  57. env = NULL;
  58. object = NULL;
  59. sampleRate = 0;
  60. channelCount = DEFAULT_CHANNEL_COUNT;
  61. audioFormat = DEFAULT_AUDIO_FORMAT;
  62. bufferSizeInMillis = DEFAULT_BUFFER_SIZE;
  63. }
  64. };
  65. jmethodID METHOD_nativeSynthCallback;
  66. jfieldID FIELD_mNativeData;
  67. static inline native_data_t *getNativeData(JNIEnv *env, jobject object) {
  68. return (native_data_t *) (env->GetIntField(object, FIELD_mNativeData));
  69. }
  70. /* Callback from espeak. Should call back to the TTS API */
  71. static int SynthCallback(short *audioData, int numSamples,
  72. espeak_EVENT *events) {
  73. native_data_t *nat = (native_data_t *) events->user_data;
  74. JNIEnv *env = nat->env;
  75. jobject object = nat->object;
  76. if (numSamples < 1) {
  77. env->CallVoidMethod(object, METHOD_nativeSynthCallback, NULL);
  78. return SYNTH_ABORT;
  79. } else {
  80. jbyteArray arrayAudioData = env->NewByteArray(numSamples * 2);
  81. env->SetByteArrayRegion(arrayAudioData, 0, (numSamples * 2), (jbyte *) audioData);
  82. env->CallVoidMethod(object, METHOD_nativeSynthCallback, arrayAudioData);
  83. return SYNTH_CONTINUE;
  84. }
  85. }
  86. #ifdef __cplusplus
  87. extern "C" {
  88. #endif /* __cplusplus */
  89. JNIEXPORT jint
  90. JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) {
  91. JNIEnv *env;
  92. if (vm->GetEnv((void **) &env, JNI_VERSION_1_6) != JNI_OK) {
  93. LOGE("Failed to get the environment using GetEnv()");
  94. return -1;
  95. }
  96. return JNI_VERSION_1_6;
  97. }
  98. JNIEXPORT jboolean
  99. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeClassInit(
  100. JNIEnv* env, jclass clazz) {
  101. if (DEBUG) LOGV("%s", __FUNCTION__);
  102. METHOD_nativeSynthCallback = env->GetMethodID(clazz, "nativeSynthCallback", "([B)V");
  103. FIELD_mNativeData = env->GetFieldID(clazz, "mNativeData", "I");
  104. return JNI_TRUE;
  105. }
  106. JNIEXPORT jboolean
  107. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeCreate(
  108. JNIEnv *env, jobject object, jstring path) {
  109. if (DEBUG) LOGV("%s", __FUNCTION__);
  110. native_data_t *nat = new native_data_t;
  111. if (nat == NULL) {
  112. LOGE("%s: out of memory!", __FUNCTION__);
  113. return JNI_FALSE;
  114. }
  115. env->SetIntField(object, FIELD_mNativeData, (jint) nat);
  116. const char *c_path = path ? env->GetStringUTFChars(path, NULL) : NULL;
  117. nat->object = env->NewWeakGlobalRef(object);
  118. if (DEBUG) LOGV("Initializing with path %s", c_path);
  119. nat->sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, nat->bufferSizeInMillis, c_path, 0);
  120. if (c_path) env->ReleaseStringUTFChars(path, c_path);
  121. return (nat->sampleRate > 0) ? JNI_TRUE : JNI_FALSE;
  122. }
  123. JNIEXPORT jboolean
  124. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeDestroy(
  125. JNIEnv *env, jobject object) {
  126. if (DEBUG) LOGV("%s", __FUNCTION__);
  127. native_data_t *nat = getNativeData(env, object);
  128. env->DeleteWeakGlobalRef(nat->object);
  129. delete nat;
  130. return JNI_TRUE;
  131. }
  132. JNIEXPORT jobject
  133. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetVersion(
  134. JNIEnv *env, jclass clazz) {
  135. if (DEBUG) LOGV("%s", __FUNCTION__);
  136. return env->NewStringUTF(espeak_Info(NULL));
  137. }
  138. JNIEXPORT jint
  139. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetSampleRate(
  140. JNIEnv *env, jobject object) {
  141. if (DEBUG) LOGV("%s", __FUNCTION__);
  142. const native_data_t *nat = getNativeData(env, object);
  143. return (jint) nat->sampleRate;
  144. }
  145. JNIEXPORT jint
  146. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetChannelCount(
  147. JNIEnv *env, jobject object) {
  148. if (DEBUG) LOGV("%s", __FUNCTION__);
  149. const native_data_t *nat = getNativeData(env, object);
  150. return (jint) nat->channelCount;
  151. }
  152. JNIEXPORT jint
  153. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetAudioFormat(
  154. JNIEnv *env, jobject object) {
  155. if (DEBUG) LOGV("%s", __FUNCTION__);
  156. const native_data_t *nat = getNativeData(env, object);
  157. return (jint) nat->audioFormat;
  158. }
  159. JNIEXPORT jint
  160. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetBufferSizeInMillis(
  161. JNIEnv *env, jobject object) {
  162. if (DEBUG) LOGV("%s", __FUNCTION__);
  163. const native_data_t *nat = getNativeData(env, object);
  164. return (jint) nat->bufferSizeInMillis;
  165. }
  166. JNIEXPORT jobjectArray
  167. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetAvailableVoices(
  168. JNIEnv *env, jobject object) {
  169. if (DEBUG) LOGV("%s", __FUNCTION__);
  170. const espeak_VOICE **voices = espeak_ListVoices(NULL);
  171. int count;
  172. // First, count the number of voices returned.
  173. for (count = 0; voices[count] != NULL; count++);
  174. // Next, create a Java String array.
  175. jobjectArray voicesArray = (jobjectArray) env->NewObjectArray(
  176. count * 4, env->FindClass("java/lang/String"), NULL);
  177. const espeak_VOICE *v;
  178. char gender_buf[12];
  179. char age_buf[12];
  180. // Finally, populate the array.
  181. for (int i = 0, voicesIndex = 0; (v = voices[i]) != NULL; i++) {
  182. const char *lang_name = v->languages + 1;
  183. const char *identifier = v->identifier;
  184. sprintf(gender_buf, "%d", v->gender);
  185. sprintf(age_buf, "%d", v->age);
  186. env->SetObjectArrayElement(
  187. voicesArray, voicesIndex++, env->NewStringUTF(lang_name));
  188. env->SetObjectArrayElement(
  189. voicesArray, voicesIndex++, env->NewStringUTF(identifier));
  190. env->SetObjectArrayElement(
  191. voicesArray, voicesIndex++, env->NewStringUTF(gender_buf));
  192. env->SetObjectArrayElement(
  193. voicesArray, voicesIndex++, env->NewStringUTF(age_buf));
  194. }
  195. return voicesArray;
  196. }
  197. JNIEXPORT jboolean
  198. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetVoiceByName(
  199. JNIEnv *env, jobject object, jstring name) {
  200. const char *c_name = name ? env->GetStringUTFChars(name, NULL) : NULL;
  201. if (DEBUG) LOGV("%s(name=%s)", __FUNCTION__, c_name);
  202. const espeak_ERROR result = espeak_SetVoiceByName(c_name);
  203. if (c_name) env->ReleaseStringUTFChars(name, c_name);
  204. switch (result) {
  205. case EE_OK: return JNI_TRUE;
  206. case EE_INTERNAL_ERROR: LOGE("espeak_SetVoiceByName: internal error."); break;
  207. case EE_BUFFER_FULL: LOGE("espeak_SetVoiceByName: buffer full."); break;
  208. case EE_NOT_FOUND: LOGE("espeak_SetVoiceByName: not found."); break;
  209. }
  210. return JNI_FALSE;
  211. }
  212. JNIEXPORT jboolean
  213. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetVoiceByProperties(
  214. JNIEnv *env, jobject object, jstring language, jint gender, jint age) {
  215. const char *c_language = language ? env->GetStringUTFChars(language, NULL) : NULL;
  216. if (DEBUG) LOGV("%s(language=%s, gender=%d, age=%d)", __FUNCTION__, c_language, gender, age);
  217. espeak_VOICE voice_select;
  218. memset(&voice_select, 0, sizeof(espeak_VOICE));
  219. voice_select.languages = c_language;
  220. voice_select.gender = (int) gender;
  221. voice_select.age = (int) age;
  222. const espeak_ERROR result = espeak_SetVoiceByProperties(&voice_select);
  223. if (c_language) env->ReleaseStringUTFChars(language, c_language);
  224. switch (result) {
  225. case EE_OK: return JNI_TRUE;
  226. case EE_INTERNAL_ERROR: LOGE("espeak_SetVoiceByProperties: internal error."); break;
  227. case EE_BUFFER_FULL: LOGE("espeak_SetVoiceByProperties: buffer full."); break;
  228. case EE_NOT_FOUND: LOGE("espeak_SetVoiceByProperties: not found."); break;
  229. }
  230. return JNI_FALSE;
  231. }
  232. JNIEXPORT jboolean
  233. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSetParameter(
  234. JNIEnv *env, jobject object, jint parameter, jint value) {
  235. if (DEBUG) LOGV("%s(parameter=%d, value=%d)", __FUNCTION__, parameter, value);
  236. const espeak_ERROR result = espeak_SetParameter((espeak_PARAMETER)parameter, (int)value, 0);
  237. switch (result) {
  238. case EE_OK: return JNI_TRUE;
  239. case EE_INTERNAL_ERROR: LOGE("espeak_SetParameter: internal error."); break;
  240. case EE_BUFFER_FULL: LOGE("espeak_SetParameter: buffer full."); break;
  241. case EE_NOT_FOUND: LOGE("espeak_SetParameter: not found."); break;
  242. }
  243. return JNI_FALSE;
  244. }
  245. JNIEXPORT jint
  246. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeGetParameter(
  247. JNIEnv *env, jobject object, jint parameter, jint current) {
  248. if (DEBUG) LOGV("%s(parameter=%d, pitch=%d)", __FUNCTION__, parameter, current);
  249. return espeak_GetParameter((espeak_PARAMETER)parameter, (int)current);
  250. }
  251. JNIEXPORT jboolean
  252. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeSynthesize(
  253. JNIEnv *env, jobject object, jstring text, jboolean isSsml) {
  254. if (DEBUG) LOGV("%s", __FUNCTION__);
  255. native_data_t *nat = getNativeData(env, object);
  256. const char *c_text = text ? env->GetStringUTFChars(text, NULL) : NULL;
  257. unsigned int unique_identifier;
  258. nat->env = env;
  259. espeak_SetSynthCallback(SynthCallback);
  260. const espeak_ERROR result = espeak_Synth(c_text, strlen(c_text), 0, // position
  261. POS_CHARACTER, 0, // end position (0 means no end position)
  262. isSsml ? espeakCHARS_UTF8 | espeakSSML // UTF-8 encoded SSML
  263. : espeakCHARS_UTF8, // UTF-8 encoded text
  264. &unique_identifier, nat);
  265. espeak_Synchronize();
  266. if (c_text) env->ReleaseStringUTFChars(text, c_text);
  267. switch (result) {
  268. case EE_OK: return JNI_TRUE;
  269. case EE_INTERNAL_ERROR: LOGE("espeak_Synth: internal error."); break;
  270. case EE_BUFFER_FULL: LOGE("espeak_Synth: buffer full."); break;
  271. case EE_NOT_FOUND: LOGE("espeak_Synth: not found."); break;
  272. }
  273. return JNI_TRUE;
  274. }
  275. JNIEXPORT jboolean
  276. JNICALL Java_com_reecedunn_espeak_SpeechSynthesis_nativeStop(
  277. JNIEnv *env, jobject object) {
  278. if (DEBUG) LOGV("%s", __FUNCTION__);
  279. espeak_Cancel();
  280. return JNI_TRUE;
  281. }
  282. #ifdef __cplusplus
  283. }
  284. #endif /* __cplusplus */