eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TtsEngine.h 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. /*
  2. * Copyright (C) 2009 Google Inc.
  3. * Copyright (C) 2012 Reece H. Dunn
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * This file contains the TtsEngine interface used by Android to implement
  19. * Text-to-Speech services.
  20. *
  21. * Android Version: 2.2 (Froyo)
  22. * API Version: 8
  23. */
  24. #ifndef TTS_ENGINE_H_
  25. #define TTS_ENGINE_H_
  26. namespace android {
  27. #define ANDROID_TTS_ENGINE_PROPERTY_CONFIG "engineConfig"
  28. #define ANDROID_TTS_ENGINE_PROPERTY_PITCH "pitch"
  29. #define ANDROID_TTS_ENGINE_PROPERTY_RATE "rate"
  30. #define ANDROID_TTS_ENGINE_PROPERTY_VOLUME "volume"
  31. enum tts_synth_status {
  32. TTS_SYNTH_DONE = 0,
  33. TTS_SYNTH_PENDING = 1
  34. };
  35. enum tts_callback_status {
  36. TTS_CALLBACK_HALT = 0,
  37. TTS_CALLBACK_CONTINUE = 1
  38. };
  39. // NOTE: This is duplicated in compat/jni/tts.h. Please
  40. // make changes there as well.
  41. enum tts_audio_format {
  42. TTS_AUDIO_FORMAT_INVALID = -1,
  43. TTS_AUDIO_FORMAT_DEFAULT = 0,
  44. TTS_AUDIO_FORMAT_PCM_16_BIT = 1,
  45. TTS_AUDIO_FORMAT_PCM_8_BIT = 2,
  46. };
  47. // The callback is used by the implementation of this interface to notify its
  48. // client, the Android TTS service, that the last requested synthesis has been
  49. // completed. // TODO reword
  50. // The callback for synthesis completed takes:
  51. // @param [inout] void *& - The userdata pointer set in the original
  52. // synth call
  53. // @param [in] uint32_t - Track sampling rate in Hz
  54. // @param [in] tts_audio_format - The audio format
  55. // @param [in] int - The number of channels
  56. // @param [inout] int8_t *& - A buffer of audio data only valid during the
  57. // execution of the callback
  58. // @param [inout] size_t & - The size of the buffer
  59. // @param [in] tts_synth_status - indicate whether the synthesis is done, or
  60. // if more data is to be synthesized.
  61. // @return TTS_CALLBACK_HALT to indicate the synthesis must stop,
  62. // TTS_CALLBACK_CONTINUE to indicate the synthesis must continue if
  63. // there is more data to produce.
  64. typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t,
  65. tts_audio_format, int, int8_t *&, size_t&, tts_synth_status);
  66. class TtsEngine;
  67. extern "C" TtsEngine* getTtsEngine();
  68. enum tts_result {
  69. TTS_SUCCESS = 0,
  70. TTS_FAILURE = -1,
  71. TTS_FEATURE_UNSUPPORTED = -2,
  72. TTS_VALUE_INVALID = -3,
  73. TTS_PROPERTY_UNSUPPORTED = -4,
  74. TTS_PROPERTY_SIZE_TOO_SMALL = -5,
  75. TTS_MISSING_RESOURCES = -6
  76. };
  77. enum tts_support_result {
  78. TTS_LANG_COUNTRY_VAR_AVAILABLE = 2,
  79. TTS_LANG_COUNTRY_AVAILABLE = 1,
  80. TTS_LANG_AVAILABLE = 0,
  81. TTS_LANG_MISSING_DATA = -1,
  82. TTS_LANG_NOT_SUPPORTED = -2
  83. };
  84. class TtsEngine
  85. {
  86. public:
  87. virtual ~TtsEngine() {}
  88. // Initialize the TTS engine and returns whether initialization succeeded.
  89. // @param synthDoneCBPtr synthesis callback function pointer
  90. // @return TTS_SUCCESS, or TTS_FAILURE
  91. virtual tts_result init(synthDoneCB_t synthDoneCBPtr, const char *engineConfig);
  92. // Shut down the TTS engine and releases all associated resources.
  93. // @return TTS_SUCCESS, or TTS_FAILURE
  94. virtual tts_result shutdown();
  95. // Interrupt synthesis and flushes any synthesized data that hasn't been
  96. // output yet. This will block until callbacks underway are completed.
  97. // @return TTS_SUCCESS, or TTS_FAILURE
  98. virtual tts_result stop();
  99. // Returns the level of support for the language, country and variant.
  100. // @return TTS_LANG_COUNTRY_VAR_AVAILABLE if the language, country and variant are supported,
  101. // and the corresponding resources are correctly installed
  102. // TTS_LANG_COUNTRY_AVAILABLE if the language and country are supported and the
  103. // corresponding resources are correctly installed, but there is no match for
  104. // the specified variant
  105. // TTS_LANG_AVAILABLE if the language is supported and the
  106. // corresponding resources are correctly installed, but there is no match for
  107. // the specified country and variant
  108. // TTS_LANG_MISSING_DATA if the required resources to provide any level of support
  109. // for the language are not correctly installed
  110. // TTS_LANG_NOT_SUPPORTED if the language is not supported by the TTS engine.
  111. virtual tts_support_result isLanguageAvailable(const char *lang, const char *country,
  112. const char *variant);
  113. // Load the resources associated with the specified language. The loaded
  114. // language will only be used once a call to setLanguage() with the same
  115. // language value is issued. Language and country values are coded according to the ISO three
  116. // letter codes for languages and countries, as can be retrieved from a java.util.Locale
  117. // instance. The variant value is encoded as the variant string retrieved from a
  118. // java.util.Locale instance built with that variant data.
  119. // @param lang pointer to the ISO three letter code for the language
  120. // @param country pointer to the ISO three letter code for the country
  121. // @param variant pointer to the variant code
  122. // @return TTS_SUCCESS, or TTS_FAILURE
  123. virtual tts_result loadLanguage(const char *lang, const char *country, const char *variant);
  124. // Load the resources associated with the specified language, country and Locale variant.
  125. // The loaded language will only be used once a call to setLanguageFromLocale() with the same
  126. // language value is issued. Language and country values are coded according to the ISO three
  127. // letter codes for languages and countries, as can be retrieved from a java.util.Locale
  128. // instance. The variant value is encoded as the variant string retrieved from a
  129. // java.util.Locale instance built with that variant data.
  130. // @param lang pointer to the ISO three letter code for the language
  131. // @param country pointer to the ISO three letter code for the country
  132. // @param variant pointer to the variant code
  133. // @return TTS_SUCCESS, or TTS_FAILURE
  134. virtual tts_result setLanguage(const char *lang, const char *country, const char *variant);
  135. // Retrieve the currently set language, country and variant, or empty strings if none of
  136. // parameters have been set. Language and country are represented by their 3-letter ISO code
  137. // @param[out] pointer to the retrieved 3-letter code language value
  138. // @param[out] pointer to the retrieved 3-letter code country value
  139. // @param[out] pointer to the retrieved variant value
  140. // @return TTS_SUCCESS, or TTS_FAILURE
  141. virtual tts_result getLanguage(char *language, char *country, char *variant);
  142. // Notifies the engine what audio parameters should be used for the synthesis.
  143. // This is meant to be used as a hint, the engine implementation will set the output values
  144. // to those of the synthesis format, based on a given hint.
  145. // @param[inout] encoding in: the desired audio sample format
  146. // out: the format used by the TTS engine
  147. // @param[inout] rate in: the desired audio sample rate
  148. // out: the sample rate used by the TTS engine
  149. // @param[inout] channels in: the desired number of audio channels
  150. // out: the number of channels used by the TTS engine
  151. // @return TTS_SUCCESS, or TTS_FAILURE
  152. virtual tts_result setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
  153. int& channels);
  154. // Set a property for the the TTS engine
  155. // "size" is the maximum size of "value" for properties "property"
  156. // @param property pointer to the property name
  157. // @param value pointer to the property value
  158. // @param size maximum size required to store this type of property
  159. // @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, or TTS_FAILURE,
  160. // or TTS_VALUE_INVALID
  161. virtual tts_result setProperty(const char *property, const char *value,
  162. const size_t size);
  163. // Retrieve a property from the TTS engine
  164. // @param property pointer to the property name
  165. // @param[out] value pointer to the retrieved language value
  166. // @param[inout] iosize in: stores the size available to store the
  167. // property value.
  168. // out: stores the size required to hold the language
  169. // value if getLanguage() returned
  170. // TTS_PROPERTY_SIZE_TOO_SMALL, unchanged otherwise
  171. // @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS,
  172. // or TTS_PROPERTY_SIZE_TOO_SMALL
  173. virtual tts_result getProperty(const char *property, char *value,
  174. size_t *iosize);
  175. // Synthesize the text.
  176. // As the synthesis is performed, the engine invokes the callback to notify
  177. // the TTS framework that it has filled the given buffer, and indicates how
  178. // many bytes it wrote. The callback is called repeatedly until the engine
  179. // has generated all the audio data corresponding to the text.
  180. // Note about the format of the input: the text parameter may use the
  181. // following elements
  182. // and their respective attributes as defined in the SSML 1.0 specification:
  183. // * lang
  184. // * say-as:
  185. // o interpret-as
  186. // * phoneme
  187. // * voice:
  188. // o gender,
  189. // o age,
  190. // o variant,
  191. // o name
  192. // * emphasis
  193. // * break:
  194. // o strength,
  195. // o time
  196. // * prosody:
  197. // o pitch,
  198. // o contour,
  199. // o range,
  200. // o rate,
  201. // o duration,
  202. // o volume
  203. // * mark
  204. // Differences between this text format and SSML are:
  205. // * full SSML documents are not supported
  206. // * namespaces are not supported
  207. // Text is coded in UTF-8.
  208. // @param text the UTF-8 text to synthesize
  209. // @param userdata pointer to be returned when the call is invoked
  210. // @param buffer the location where the synthesized data must be written
  211. // @param bufferSize the number of bytes that can be written in buffer
  212. // @return TTS_SUCCESS or TTS_FAILURE
  213. virtual tts_result synthesizeText(const char *text, int8_t *buffer,
  214. size_t bufferSize, void *userdata);
  215. };
  216. } // namespace android
  217. #endif /* TTS_ENGINE_H_ */