eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ttsengine.cpp 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. /*
  2. * Copyright (C) 2016 Reece H. Dunn
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  16. */
  17. #include "config.h"
  18. #include <windows.h>
  19. #include <sapiddk.h>
  20. #include <sperror.h>
  21. #include <espeak-ng/espeak_ng.h>
  22. #include <espeak-ng/speak_lib.h>
  23. #include <new>
  24. #include <errno.h>
  25. extern "C" ULONG ObjectCount;
  26. static HRESULT espeak_status_to_hresult(espeak_ng_STATUS status)
  27. {
  28. switch (status)
  29. {
  30. case ENS_OK: return S_OK;
  31. case EACCES: return E_ACCESSDENIED;
  32. case EINVAL: return E_INVALIDARG;
  33. case ENOENT: return HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND);
  34. case ENOMEM: return E_OUTOFMEMORY;
  35. default: return E_FAIL;
  36. }
  37. }
  38. struct TtsEngine
  39. : public ISpObjectWithToken
  40. , public ISpTTSEngine
  41. {
  42. TtsEngine();
  43. ~TtsEngine();
  44. // IUnknown
  45. ULONG __stdcall AddRef();
  46. ULONG __stdcall Release();
  47. HRESULT __stdcall QueryInterface(REFIID iid, void **object);
  48. // ISpObjectWithToken
  49. HRESULT __stdcall GetObjectToken(ISpObjectToken **token);
  50. HRESULT __stdcall SetObjectToken(ISpObjectToken *token);
  51. // ISpTTSEngine
  52. HRESULT __stdcall
  53. Speak(DWORD flags,
  54. REFGUID formatId,
  55. const WAVEFORMATEX *format,
  56. const SPVTEXTFRAG *textFragList,
  57. ISpTTSEngineSite *site);
  58. HRESULT __stdcall
  59. GetOutputFormat(const GUID *targetFormatId,
  60. const WAVEFORMATEX *targetFormat,
  61. GUID *formatId,
  62. WAVEFORMATEX **format);
  63. int OnEvent(short *data, int samples, espeak_EVENT *events);
  64. private:
  65. HRESULT GetStringValue(LPCWSTR key, char *&value);
  66. ULONG refCount;
  67. ISpObjectToken *objectToken;
  68. ISpTTSEngineSite *site;
  69. };
  70. static int
  71. espeak_callback(short *data, int samples, espeak_EVENT *events)
  72. {
  73. TtsEngine *engine = (TtsEngine *)events->user_data;
  74. return engine->OnEvent(data, samples, events);
  75. }
  76. TtsEngine::TtsEngine()
  77. : refCount(1)
  78. , objectToken(NULL)
  79. , site(NULL)
  80. {
  81. InterlockedIncrement(&ObjectCount);
  82. }
  83. TtsEngine::~TtsEngine()
  84. {
  85. InterlockedDecrement(&ObjectCount);
  86. if (objectToken)
  87. objectToken->Release();
  88. }
  89. ULONG __stdcall TtsEngine::AddRef()
  90. {
  91. return InterlockedIncrement(&refCount);
  92. }
  93. ULONG __stdcall TtsEngine::Release()
  94. {
  95. ULONG ret = InterlockedDecrement(&refCount);
  96. if (ret == 0)
  97. delete this;
  98. return ret;
  99. }
  100. HRESULT __stdcall TtsEngine::QueryInterface(REFIID iid, void **object)
  101. {
  102. *object = NULL;
  103. if (IsEqualIID(iid, IID_IUnknown) || IsEqualIID(iid, IID_ISpTTSEngine))
  104. *object = (ISpTTSEngine *)this;
  105. else if (IsEqualIID(iid, IID_ISpObjectWithToken))
  106. *object = (ISpObjectWithToken *)this;
  107. else
  108. return E_NOINTERFACE;
  109. this->AddRef();
  110. return S_OK;
  111. }
  112. HRESULT __stdcall TtsEngine::GetObjectToken(ISpObjectToken **token)
  113. {
  114. if (!token)
  115. return E_POINTER;
  116. *token = objectToken;
  117. if (objectToken) {
  118. objectToken->AddRef();
  119. return S_OK;
  120. }
  121. return S_FALSE;
  122. }
  123. HRESULT __stdcall TtsEngine::SetObjectToken(ISpObjectToken *token)
  124. {
  125. if (!token)
  126. return E_INVALIDARG;
  127. if (objectToken)
  128. return SPERR_ALREADY_INITIALIZED;
  129. objectToken = token;
  130. objectToken->AddRef();
  131. char *path = NULL;
  132. GetStringValue(L"Path", path);
  133. espeak_ng_InitializePath(path);
  134. if (path)
  135. free(path);
  136. espeak_ng_STATUS status;
  137. status = espeak_ng_Initialize(NULL);
  138. if (status == ENS_OK)
  139. status = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 100, NULL);
  140. espeak_SetSynthCallback(espeak_callback);
  141. char *voiceName = NULL;
  142. if (SUCCEEDED(GetStringValue(L"VoiceName", voiceName))) {
  143. if (status == ENS_OK)
  144. status = espeak_ng_SetVoiceByName(voiceName);
  145. free(voiceName);
  146. }
  147. return espeak_status_to_hresult(status);
  148. }
  149. HRESULT __stdcall
  150. TtsEngine::Speak(DWORD flags,
  151. REFGUID formatId,
  152. const WAVEFORMATEX *format,
  153. const SPVTEXTFRAG *textFragList,
  154. ISpTTSEngineSite *site)
  155. {
  156. if (!site || !textFragList)
  157. return E_INVALIDARG;
  158. this->site = site;
  159. while (textFragList != NULL)
  160. {
  161. DWORD actions = site->GetActions();
  162. if (actions & SPVES_ABORT)
  163. return S_OK;
  164. switch (textFragList->State.eAction)
  165. {
  166. case SPVA_Speak:
  167. espeak_ng_Synthesize(textFragList->pTextStart, 0, 0, POS_CHARACTER, 0, espeakCHARS_WCHAR, NULL, this);
  168. break;
  169. }
  170. textFragList = textFragList->pNext;
  171. }
  172. return E_NOTIMPL;
  173. }
  174. HRESULT __stdcall
  175. TtsEngine::GetOutputFormat(const GUID *targetFormatId,
  176. const WAVEFORMATEX *targetFormat,
  177. GUID *formatId,
  178. WAVEFORMATEX **format)
  179. {
  180. *format = (WAVEFORMATEX *)CoTaskMemAlloc(sizeof(WAVEFORMATEX));
  181. if (!*format)
  182. return E_OUTOFMEMORY;
  183. (*format)->wFormatTag = WAVE_FORMAT_PCM;
  184. (*format)->nChannels = 1;
  185. (*format)->nBlockAlign = 2;
  186. (*format)->nSamplesPerSec = 22050;
  187. (*format)->wBitsPerSample = 16;
  188. (*format)->nAvgBytesPerSec = (*format)->nSamplesPerSec * (*format)->nBlockAlign;
  189. (*format)->cbSize = 0;
  190. *formatId = SPDFID_WaveFormatEx;
  191. return S_OK;
  192. }
  193. int
  194. TtsEngine::OnEvent(short *data, int samples, espeak_EVENT *events)
  195. {
  196. DWORD actions = site->GetActions();
  197. if (actions & SPVES_ABORT)
  198. return 1;
  199. if (data)
  200. site->Write(data, samples * 2, NULL);
  201. return 0;
  202. }
  203. HRESULT TtsEngine::GetStringValue(LPCWSTR key, char *&value)
  204. {
  205. if (!objectToken)
  206. return E_FAIL;
  207. LPWSTR wvalue = NULL;
  208. HRESULT hr = objectToken->GetStringValue(key, &wvalue);
  209. if (FAILED(hr))
  210. return hr;
  211. size_t len = wcslen(wvalue);
  212. value = (char *)malloc(len + 1);
  213. if (!value) {
  214. CoTaskMemFree(wvalue);
  215. return E_OUTOFMEMORY;
  216. }
  217. wcstombs(value, wvalue, len + 1);
  218. CoTaskMemFree(wvalue);
  219. return S_OK;
  220. }
  221. extern "C" HRESULT __stdcall TtsEngine_CreateInstance(IClassFactory *iface, IUnknown *outer, REFIID iid, void **object)
  222. {
  223. if (outer != NULL)
  224. return CLASS_E_NOAGGREGATION;
  225. TtsEngine *engine = new (std::nothrow) TtsEngine();
  226. if (!engine)
  227. return E_OUTOFMEMORY;
  228. HRESULT ret = engine->QueryInterface(iid, object);
  229. engine->Release();
  230. return ret;
  231. }