eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

wavegen.c 45KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697
  1. /*
  2. * Copyright (C) 2005 to 2013 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. // this version keeps wavemult window as a constant fraction
  20. // of the cycle length - but that spreads out the HF peaks too much
  21. #include "config.h"
  22. #include <math.h>
  23. #include <stdint.h>
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <espeak-ng/espeak_ng.h>
  28. #include <espeak/speak_lib.h>
  29. #include "speech.h"
  30. #include "phoneme.h"
  31. #include "synthesize.h"
  32. #include "voice.h"
  33. #if HAVE_SONIC_H
  34. #include "sonic.h"
  35. #endif
  36. #ifdef USE_PORTAUDIO
  37. #include "portaudio.h"
  38. #undef USE_PORTAUDIO
  39. // determine portaudio version by looking for a #define which is not in V18
  40. #ifdef paNeverDropInput
  41. #define USE_PORTAUDIO 19
  42. #else
  43. #define USE_PORTAUDIO 18
  44. #endif
  45. #endif
  46. #include "sintab.h"
  47. #define N_WAV_BUF 10
  48. voice_t *wvoice;
  49. FILE *f_log = NULL;
  50. int option_waveout = 0;
  51. static int option_harmonic1 = 10;
  52. static int flutter_amp = 64;
  53. static int general_amplitude = 60;
  54. static int consonant_amp = 26;
  55. int embedded_value[N_EMBEDDED_VALUES];
  56. static int PHASE_INC_FACTOR;
  57. int samplerate = 0; // this is set by Wavegeninit()
  58. int samplerate_native = 0;
  59. extern int option_device_number;
  60. extern int option_quiet;
  61. static wavegen_peaks_t peaks[N_PEAKS];
  62. static int peak_harmonic[N_PEAKS];
  63. static int peak_height[N_PEAKS];
  64. int echo_head;
  65. int echo_tail;
  66. int echo_amp = 0;
  67. short echo_buf[N_ECHO_BUF];
  68. static int echo_length = 0; // period (in sample\) to ensure completion of echo at the end of speech, set in WavegenSetEcho()
  69. static int voicing;
  70. static RESONATOR rbreath[N_PEAKS];
  71. static int harm_sqrt_n = 0;
  72. #define N_LOWHARM 30
  73. static int harm_inc[N_LOWHARM]; // only for these harmonics do we interpolate amplitude between steps
  74. static int *harmspect;
  75. static int hswitch = 0;
  76. static int hspect[2][MAX_HARMONIC]; // 2 copies, we interpolate between then
  77. static int max_hval = 0;
  78. static int nsamples = 0; // number to do
  79. static int modulation_type = 0;
  80. static int glottal_flag = 0;
  81. static int glottal_reduce = 0;
  82. WGEN_DATA wdata;
  83. static int amp_ix;
  84. static int amp_inc;
  85. static unsigned char *amplitude_env = NULL;
  86. static int samplecount = 0; // number done
  87. static int samplecount_start = 0; // count at start of this segment
  88. static int end_wave = 0; // continue to end of wave cycle
  89. static int wavephase;
  90. static int phaseinc;
  91. static int cycle_samples; // number of samples in a cycle at current pitch
  92. static int cbytes;
  93. static int hf_factor;
  94. static double minus_pi_t;
  95. static double two_pi_t;
  96. unsigned char *out_ptr;
  97. unsigned char *out_start;
  98. unsigned char *out_end;
  99. int outbuf_size = 0;
  100. // the queue of operations passed to wavegen from sythesize
  101. intptr_t wcmdq[N_WCMDQ][4];
  102. int wcmdq_head = 0;
  103. int wcmdq_tail = 0;
  104. // pitch,speed,
  105. int embedded_default[N_EMBEDDED_VALUES] = { 0, 50, 175, 100, 50, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0 };
  106. static int embedded_max[N_EMBEDDED_VALUES] = { 0, 0x7fff, 750, 300, 99, 99, 99, 0, 750, 0, 0, 0, 0, 4, 0 };
  107. int current_source_index = 0;
  108. extern FILE *f_wave;
  109. #if (USE_PORTAUDIO == 18)
  110. static PortAudioStream *pa_stream = NULL;
  111. #endif
  112. #if (USE_PORTAUDIO == 19)
  113. static PaStream *pa_stream = NULL;
  114. #endif
  115. #if HAVE_SONIC_H
  116. static sonicStream sonicSpeedupStream = NULL;
  117. double sonicSpeed = 1.0;
  118. #endif
  119. // 1st index=roughness
  120. // 2nd index=modulation_type
  121. // value: bits 0-3 amplitude (16ths), bits 4-7 every n cycles
  122. #define N_ROUGHNESS 8
  123. static unsigned char modulation_tab[N_ROUGHNESS][8] = {
  124. { 0, 0x00, 0x00, 0x00, 0, 0x46, 0xf2, 0x29 },
  125. { 0, 0x2f, 0x00, 0x2f, 0, 0x45, 0xf2, 0x29 },
  126. { 0, 0x2f, 0x00, 0x2e, 0, 0x45, 0xf2, 0x28 },
  127. { 0, 0x2e, 0x00, 0x2d, 0, 0x34, 0xf2, 0x28 },
  128. { 0, 0x2d, 0x2d, 0x2c, 0, 0x34, 0xf2, 0x28 },
  129. { 0, 0x2b, 0x2b, 0x2b, 0, 0x34, 0xf2, 0x28 },
  130. { 0, 0x2a, 0x2a, 0x2a, 0, 0x34, 0xf2, 0x28 },
  131. { 0, 0x29, 0x29, 0x29, 0, 0x34, 0xf2, 0x28 },
  132. };
  133. // Flutter table, to add natural variations to the pitch
  134. #define N_FLUTTER 0x170
  135. static int Flutter_inc;
  136. static const unsigned char Flutter_tab[N_FLUTTER] = {
  137. 0x80, 0x9b, 0xb5, 0xcb, 0xdc, 0xe8, 0xed, 0xec,
  138. 0xe6, 0xdc, 0xce, 0xbf, 0xb0, 0xa3, 0x98, 0x90,
  139. 0x8c, 0x8b, 0x8c, 0x8f, 0x92, 0x94, 0x95, 0x92,
  140. 0x8c, 0x83, 0x78, 0x69, 0x59, 0x49, 0x3c, 0x31,
  141. 0x2a, 0x29, 0x2d, 0x36, 0x44, 0x56, 0x69, 0x7d,
  142. 0x8f, 0x9f, 0xaa, 0xb1, 0xb2, 0xad, 0xa4, 0x96,
  143. 0x87, 0x78, 0x69, 0x5c, 0x53, 0x4f, 0x4f, 0x55,
  144. 0x5e, 0x6b, 0x7a, 0x88, 0x96, 0xa2, 0xab, 0xb0,
  145. 0xb1, 0xae, 0xa8, 0xa0, 0x98, 0x91, 0x8b, 0x88,
  146. 0x89, 0x8d, 0x94, 0x9d, 0xa8, 0xb2, 0xbb, 0xc0,
  147. 0xc1, 0xbd, 0xb4, 0xa5, 0x92, 0x7c, 0x63, 0x4a,
  148. 0x32, 0x1e, 0x0e, 0x05, 0x02, 0x05, 0x0f, 0x1e,
  149. 0x30, 0x44, 0x59, 0x6d, 0x7f, 0x8c, 0x96, 0x9c,
  150. 0x9f, 0x9f, 0x9d, 0x9b, 0x99, 0x99, 0x9c, 0xa1,
  151. 0xa9, 0xb3, 0xbf, 0xca, 0xd5, 0xdc, 0xe0, 0xde,
  152. 0xd8, 0xcc, 0xbb, 0xa6, 0x8f, 0x77, 0x60, 0x4b,
  153. 0x3a, 0x2e, 0x28, 0x29, 0x2f, 0x3a, 0x48, 0x59,
  154. 0x6a, 0x7a, 0x86, 0x90, 0x94, 0x95, 0x91, 0x89,
  155. 0x80, 0x75, 0x6b, 0x62, 0x5c, 0x5a, 0x5c, 0x61,
  156. 0x69, 0x74, 0x80, 0x8a, 0x94, 0x9a, 0x9e, 0x9d,
  157. 0x98, 0x90, 0x86, 0x7c, 0x71, 0x68, 0x62, 0x60,
  158. 0x63, 0x6b, 0x78, 0x88, 0x9b, 0xaf, 0xc2, 0xd2,
  159. 0xdf, 0xe6, 0xe7, 0xe2, 0xd7, 0xc6, 0xb2, 0x9c,
  160. 0x84, 0x6f, 0x5b, 0x4b, 0x40, 0x39, 0x37, 0x38,
  161. 0x3d, 0x43, 0x4a, 0x50, 0x54, 0x56, 0x55, 0x52,
  162. 0x4d, 0x48, 0x42, 0x3f, 0x3e, 0x41, 0x49, 0x56,
  163. 0x67, 0x7c, 0x93, 0xab, 0xc3, 0xd9, 0xea, 0xf6,
  164. 0xfc, 0xfb, 0xf4, 0xe7, 0xd5, 0xc0, 0xaa, 0x94,
  165. 0x80, 0x71, 0x64, 0x5d, 0x5a, 0x5c, 0x61, 0x68,
  166. 0x70, 0x77, 0x7d, 0x7f, 0x7f, 0x7b, 0x74, 0x6b,
  167. 0x61, 0x57, 0x4e, 0x48, 0x46, 0x48, 0x4e, 0x59,
  168. 0x66, 0x75, 0x84, 0x93, 0x9f, 0xa7, 0xab, 0xaa,
  169. 0xa4, 0x99, 0x8b, 0x7b, 0x6a, 0x5b, 0x4e, 0x46,
  170. 0x43, 0x45, 0x4d, 0x5a, 0x6b, 0x7f, 0x92, 0xa6,
  171. 0xb8, 0xc5, 0xcf, 0xd3, 0xd2, 0xcd, 0xc4, 0xb9,
  172. 0xad, 0xa1, 0x96, 0x8e, 0x89, 0x87, 0x87, 0x8a,
  173. 0x8d, 0x91, 0x92, 0x91, 0x8c, 0x84, 0x78, 0x68,
  174. 0x55, 0x41, 0x2e, 0x1c, 0x0e, 0x05, 0x01, 0x05,
  175. 0x0f, 0x1f, 0x34, 0x4d, 0x68, 0x81, 0x9a, 0xb0,
  176. 0xc1, 0xcd, 0xd3, 0xd3, 0xd0, 0xc8, 0xbf, 0xb5,
  177. 0xab, 0xa4, 0x9f, 0x9c, 0x9d, 0xa0, 0xa5, 0xaa,
  178. 0xae, 0xb1, 0xb0, 0xab, 0xa3, 0x96, 0x87, 0x76,
  179. 0x63, 0x51, 0x42, 0x36, 0x2f, 0x2d, 0x31, 0x3a,
  180. 0x48, 0x59, 0x6b, 0x7e, 0x8e, 0x9c, 0xa6, 0xaa,
  181. 0xa9, 0xa3, 0x98, 0x8a, 0x7b, 0x6c, 0x5d, 0x52,
  182. 0x4a, 0x48, 0x4a, 0x50, 0x5a, 0x67, 0x75, 0x82
  183. };
  184. // waveform shape table for HF peaks, formants 6,7,8
  185. #define N_WAVEMULT 128
  186. static int wavemult_offset = 0;
  187. static int wavemult_max = 0;
  188. // the presets are for 22050 Hz sample rate.
  189. // A different rate will need to recalculate the presets in WavegenInit()
  190. static unsigned char wavemult[N_WAVEMULT] = {
  191. 0, 0, 0, 2, 3, 5, 8, 11, 14, 18, 22, 27, 32, 37, 43, 49,
  192. 55, 62, 69, 76, 83, 90, 98, 105, 113, 121, 128, 136, 144, 152, 159, 166,
  193. 174, 181, 188, 194, 201, 207, 213, 218, 224, 228, 233, 237, 240, 244, 246, 249,
  194. 251, 252, 253, 253, 253, 253, 252, 251, 249, 246, 244, 240, 237, 233, 228, 224,
  195. 218, 213, 207, 201, 194, 188, 181, 174, 166, 159, 152, 144, 136, 128, 121, 113,
  196. 105, 98, 90, 83, 76, 69, 62, 55, 49, 43, 37, 32, 27, 22, 18, 14,
  197. 11, 8, 5, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  198. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  199. };
  200. // set from y = pow(2,x) * 128, x=-1 to 1
  201. unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1] = {
  202. 64, 65, 66, 67, 68, 69, 70, 71,
  203. 72, 73, 74, 75, 76, 77, 78, 79,
  204. 80, 81, 82, 83, 84, 86, 87, 88,
  205. 89, 91, 92, 93, 94, 96, 97, 98,
  206. 100, 101, 103, 104, 105, 107, 108, 110,
  207. 111, 113, 115, 116, 118, 119, 121, 123,
  208. 124, 126, 128, 130, 132, 133, 135, 137,
  209. 139, 141, 143, 145, 147, 149, 151, 153,
  210. 155, 158, 160, 162, 164, 167, 169, 171,
  211. 174, 176, 179, 181, 184, 186, 189, 191,
  212. 194, 197, 199, 202, 205, 208, 211, 214,
  213. 217, 220, 223, 226, 229, 232, 236, 239,
  214. 242, 246, 249, 252, 254, 255
  215. };
  216. void WcmdqStop()
  217. {
  218. wcmdq_head = 0;
  219. wcmdq_tail = 0;
  220. #if HAVE_SONIC_H
  221. if (sonicSpeedupStream != NULL) {
  222. sonicDestroyStream(sonicSpeedupStream);
  223. sonicSpeedupStream = NULL;
  224. }
  225. #endif
  226. #ifdef USE_PORTAUDIO
  227. Pa_AbortStream(pa_stream);
  228. #endif
  229. if (mbrola_name[0] != 0)
  230. MbrolaReset();
  231. }
  232. int WcmdqFree()
  233. {
  234. int i;
  235. i = wcmdq_head - wcmdq_tail;
  236. if (i <= 0) i += N_WCMDQ;
  237. return i;
  238. }
  239. int WcmdqUsed()
  240. {
  241. return N_WCMDQ - WcmdqFree();
  242. }
  243. void WcmdqInc()
  244. {
  245. wcmdq_tail++;
  246. if (wcmdq_tail >= N_WCMDQ) wcmdq_tail = 0;
  247. }
  248. static void WcmdqIncHead()
  249. {
  250. wcmdq_head++;
  251. if (wcmdq_head >= N_WCMDQ) wcmdq_head = 0;
  252. }
  253. #define PEAKSHAPEW 256
  254. unsigned char pk_shape1[PEAKSHAPEW+1] = {
  255. 255, 254, 254, 254, 254, 254, 253, 253, 252, 251, 251, 250, 249, 248, 247, 246,
  256. 245, 244, 242, 241, 239, 238, 236, 234, 233, 231, 229, 227, 225, 223, 220, 218,
  257. 216, 213, 211, 209, 207, 205, 203, 201, 199, 197, 195, 193, 191, 189, 187, 185,
  258. 183, 180, 178, 176, 173, 171, 169, 166, 164, 161, 159, 156, 154, 151, 148, 146,
  259. 143, 140, 138, 135, 132, 129, 126, 123, 120, 118, 115, 112, 108, 105, 102, 99,
  260. 96, 95, 93, 91, 90, 88, 86, 85, 83, 82, 80, 79, 77, 76, 74, 73,
  261. 72, 70, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
  262. 55, 54, 53, 52, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 46,
  263. 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 43, 43, 43, 43, 44, 43,
  264. 42, 42, 41, 40, 40, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33,
  265. 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 26, 26, 25, 25, 24, 24,
  266. 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16,
  267. 16, 15, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10,
  268. 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5,
  269. 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2,
  270. 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  271. 0
  272. };
  273. static unsigned char pk_shape2[PEAKSHAPEW+1] = {
  274. 255, 254, 254, 254, 254, 254, 254, 254, 254, 254, 253, 253, 253, 253, 252, 252,
  275. 252, 251, 251, 251, 250, 250, 249, 249, 248, 248, 247, 247, 246, 245, 245, 244,
  276. 243, 243, 242, 241, 239, 237, 235, 233, 231, 229, 227, 225, 223, 221, 218, 216,
  277. 213, 211, 208, 205, 203, 200, 197, 194, 191, 187, 184, 181, 178, 174, 171, 167,
  278. 163, 160, 156, 152, 148, 144, 140, 136, 132, 127, 123, 119, 114, 110, 105, 100,
  279. 96, 94, 91, 88, 86, 83, 81, 78, 76, 74, 71, 69, 66, 64, 62, 60,
  280. 57, 55, 53, 51, 49, 47, 44, 42, 40, 38, 36, 34, 32, 30, 29, 27,
  281. 25, 23, 21, 19, 18, 16, 14, 12, 11, 9, 7, 6, 4, 3, 1, 0,
  282. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  283. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  284. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  285. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  286. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  287. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  288. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  289. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  290. 0
  291. };
  292. static unsigned char *pk_shape;
  293. #ifdef USE_PORTAUDIO
  294. // PortAudio interface
  295. static int userdata[4];
  296. static PaError pa_init_err = 0;
  297. static int out_channels = 1;
  298. unsigned char *outbuffer = NULL;
  299. int outbuffer_size = 0;
  300. #if USE_PORTAUDIO == 18
  301. static int WaveCallback(void *inputBuffer, void *outputBuffer,
  302. unsigned long framesPerBuffer, PaTimestamp outTime, void *userData)
  303. #else
  304. static int WaveCallback(const void *inputBuffer, void *outputBuffer,
  305. long unsigned int framesPerBuffer, const PaStreamCallbackTimeInfo *outTime,
  306. PaStreamCallbackFlags flags, void *userData)
  307. #endif
  308. {
  309. (void)inputBuffer; // unused
  310. (void)outTime; // unused
  311. (void)userData; // unused
  312. int ix;
  313. int result;
  314. unsigned char *p;
  315. unsigned char *out_buf;
  316. unsigned char *out_end2;
  317. int pa_size;
  318. pa_size = framesPerBuffer*2;
  319. // make a buffer 3x size of the portaudio output
  320. ix = pa_size*3;
  321. if (ix > outbuffer_size) {
  322. unsigned char *new_outbuffer = (unsigned char *)realloc(outbuffer, ix);
  323. if (new_outbuffer == NULL)
  324. fprintf(stderr, "espeak: out of memory\n");
  325. else
  326. outbuffer = new_outbuffer;
  327. outbuffer_size = ix;
  328. out_ptr = NULL;
  329. }
  330. if (out_ptr == NULL) {
  331. out_ptr = out_start = outbuffer;
  332. out_end = out_start + outbuffer_size;
  333. }
  334. out_end2 = &outbuffer[pa_size]; // top of data needed for the portaudio buffer
  335. event_list_ix = 0;
  336. result = WavegenFill();
  337. // copy from the outbut buffer into the portaudio buffer
  338. if (result && (out_ptr > out_end2))
  339. result = 0; // don't end yet, there is more data in the buffer than can fit in portaudio
  340. while (out_ptr < out_end2)
  341. *out_ptr++ = 0; // fill with zeros up to the size of the portaudio buffer
  342. memcpy(outputBuffer, outbuffer, pa_size);
  343. // move the remaining contents of the start of the output buffer
  344. for (p = out_end2; p < out_end; p++)
  345. p[-pa_size] = p[0];
  346. out_ptr -= pa_size;
  347. count_samples += framesPerBuffer;
  348. if (synth_callback) {
  349. // synchronous-playback mode, allow the calling process to abort the speech
  350. event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list
  351. event_list[event_list_ix].user_data = 0;
  352. if (synth_callback(NULL, 0, event_list) == 1) {
  353. SpeakNextClause(NULL, NULL, 2); // stop speaking
  354. result = 1;
  355. }
  356. }
  357. #ifdef ARCH_BIG
  358. // swap the order of bytes in each sound sample in the portaudio buffer
  359. int c;
  360. unsigned char *buf_end;
  361. out_buf = (unsigned char *)outputBuffer;
  362. buf_end = out_buf + framesPerBuffer*2;
  363. while (out_buf < buf_end) {
  364. c = out_buf[0];
  365. out_buf[0] = out_buf[1];
  366. out_buf[1] = c;
  367. out_buf += 2;
  368. }
  369. #endif
  370. if (out_channels == 2) {
  371. // sound output can only do stereo, not mono. Duplicate each sound sample to
  372. // produce 2 channels.
  373. out_buf = (unsigned char *)outputBuffer;
  374. for (ix = framesPerBuffer-1; ix >= 0; ix--) {
  375. p = &out_buf[ix*4];
  376. p[3] = p[1] = out_buf[ix*2 + 1];
  377. p[2] = p[0] = out_buf[ix*2];
  378. }
  379. }
  380. #if USE_PORTAUDIO == 18
  381. #ifdef PLATFORM_WINDOWS
  382. return result;
  383. #endif
  384. if (result != 0) {
  385. static int end_timer = 0;
  386. if (end_timer == 0)
  387. end_timer = 4;
  388. if (end_timer > 0) {
  389. end_timer--;
  390. if (end_timer == 0)
  391. return 1;
  392. }
  393. }
  394. return 0;
  395. #else
  396. return result;
  397. #endif
  398. }
  399. #if USE_PORTAUDIO == 19
  400. /* This is a fixed version of Pa_OpenDefaultStream() for use if the version in portaudio V19
  401. is broken */
  402. static PaError Pa_OpenDefaultStream2(PaStream **stream,
  403. int inputChannelCount,
  404. int outputChannelCount,
  405. PaSampleFormat sampleFormat,
  406. double sampleRate,
  407. unsigned long framesPerBuffer,
  408. PaStreamCallback *streamCallback,
  409. void *userData)
  410. {
  411. PaError result;
  412. PaStreamParameters hostApiOutputParameters;
  413. if (option_device_number >= 0)
  414. hostApiOutputParameters.device = option_device_number;
  415. else
  416. hostApiOutputParameters.device = Pa_GetDefaultOutputDevice();
  417. if (hostApiOutputParameters.device == paNoDevice)
  418. return paDeviceUnavailable;
  419. hostApiOutputParameters.channelCount = outputChannelCount;
  420. hostApiOutputParameters.sampleFormat = sampleFormat;
  421. /* defaultHighOutputLatency is used below instead of
  422. defaultLowOutputLatency because it is more important for the default
  423. stream to work reliably than it is for it to work with the lowest
  424. latency.
  425. */
  426. hostApiOutputParameters.suggestedLatency =
  427. Pa_GetDeviceInfo(hostApiOutputParameters.device)->defaultHighOutputLatency;
  428. hostApiOutputParameters.hostApiSpecificStreamInfo = NULL;
  429. result = Pa_OpenStream(
  430. stream, NULL, &hostApiOutputParameters, sampleRate, framesPerBuffer, paNoFlag, streamCallback, userData);
  431. return result;
  432. }
  433. #endif
  434. int WavegenOpenSound()
  435. {
  436. PaError err, err2;
  437. PaError active;
  438. if (option_waveout || option_quiet) {
  439. // writing to WAV file, not to portaudio
  440. return 0;
  441. }
  442. #if USE_PORTAUDIO == 18
  443. active = Pa_StreamActive(pa_stream);
  444. #else
  445. active = Pa_IsStreamActive(pa_stream);
  446. #endif
  447. if (active == 1)
  448. return 0;
  449. if (active < 0) {
  450. out_channels = 1;
  451. #if USE_PORTAUDIO == 18
  452. err2 = Pa_OpenDefaultStream(&pa_stream, 0, 1, paInt16, samplerate, 512, N_WAV_BUF, WaveCallback, (void *)userdata);
  453. if (err2 == paInvalidChannelCount) {
  454. // failed to open with mono, try stereo
  455. out_channels = 2;
  456. err2 = Pa_OpenDefaultStream(&pa_stream, 0, 2, paInt16, samplerate, 512, N_WAV_BUF, WaveCallback, (void *)userdata);
  457. }
  458. #else
  459. err2 = Pa_OpenDefaultStream2(&pa_stream, 0, 1, paInt16, (double)samplerate, 512, WaveCallback, (void *)userdata);
  460. if (err2 == paInvalidChannelCount) {
  461. // failed to open with mono, try stereo
  462. out_channels = 2;
  463. err2 = Pa_OpenDefaultStream(&pa_stream, 0, 2, paInt16, (double)samplerate, 512, WaveCallback, (void *)userdata);
  464. }
  465. #endif
  466. }
  467. err = Pa_StartStream(pa_stream);
  468. #if USE_PORTAUDIO == 19
  469. if (err == paStreamIsNotStopped) {
  470. // not sure why we need this, but PA v19 seems to need it
  471. err = Pa_StopStream(pa_stream);
  472. err = Pa_StartStream(pa_stream);
  473. }
  474. #endif
  475. if (err != paNoError) {
  476. // exit speak if we can't open the sound device - this is OK if speak is being run for each utterance
  477. exit(2);
  478. }
  479. return 0;
  480. }
  481. int WavegenCloseSound()
  482. {
  483. PaError active;
  484. // check whether speaking has finished, and close the stream
  485. if (pa_stream != NULL) {
  486. #if USE_PORTAUDIO == 18
  487. active = Pa_StreamActive(pa_stream);
  488. #else
  489. active = Pa_IsStreamActive(pa_stream);
  490. #endif
  491. if (WcmdqUsed() == 0) { // also check that the queue is empty
  492. if (active == 0) {
  493. Pa_CloseStream(pa_stream);
  494. pa_stream = NULL;
  495. return 1;
  496. }
  497. } else
  498. WavegenOpenSound(); // still items in the queue, shouldn't be closed
  499. }
  500. return 0;
  501. }
  502. int WavegenInitSound()
  503. {
  504. PaError err;
  505. if (option_quiet)
  506. return 0;
  507. // PortAudio sound output library
  508. err = Pa_Initialize();
  509. pa_init_err = err;
  510. if (err != paNoError) {
  511. fprintf(stderr, "Failed to initialise the PortAudio sound\n");
  512. return 1;
  513. }
  514. return 0;
  515. }
  516. #else
  517. int WavegenOpenSound()
  518. {
  519. return 0;
  520. }
  521. int WavegenCloseSound()
  522. {
  523. return 0;
  524. }
  525. int WavegenInitSound()
  526. {
  527. return 0;
  528. }
  529. #endif
  530. void WavegenInit(int rate, int wavemult_fact)
  531. {
  532. int ix;
  533. double x;
  534. if (wavemult_fact == 0)
  535. wavemult_fact = 60; // default
  536. wvoice = NULL;
  537. samplerate = samplerate_native = rate;
  538. PHASE_INC_FACTOR = 0x8000000 / samplerate; // assumes pitch is Hz*32
  539. Flutter_inc = (64 * samplerate)/rate;
  540. samplecount = 0;
  541. nsamples = 0;
  542. wavephase = 0x7fffffff;
  543. max_hval = 0;
  544. wdata.amplitude = 32;
  545. wdata.amplitude_fmt = 100;
  546. for (ix = 0; ix < N_EMBEDDED_VALUES; ix++)
  547. embedded_value[ix] = embedded_default[ix];
  548. // set up window to generate a spread of harmonics from a
  549. // single peak for HF peaks
  550. wavemult_max = (samplerate * wavemult_fact)/(256 * 50);
  551. if (wavemult_max > N_WAVEMULT) wavemult_max = N_WAVEMULT;
  552. wavemult_offset = wavemult_max/2;
  553. if (samplerate != 22050) {
  554. // wavemult table has preset values for 22050 Hz, we only need to
  555. // recalculate them if we have a different sample rate
  556. for (ix = 0; ix < wavemult_max; ix++) {
  557. x = 127*(1.0 - cos((M_PI*2)*ix/wavemult_max));
  558. wavemult[ix] = (int)x;
  559. }
  560. }
  561. pk_shape = pk_shape2;
  562. #ifdef INCLUDE_KLATT
  563. KlattInit();
  564. #endif
  565. }
  566. int GetAmplitude(void)
  567. {
  568. int amp;
  569. // normal, none, reduced, moderate, strong
  570. static const unsigned char amp_emphasis[5] = { 16, 16, 10, 16, 22 };
  571. amp = (embedded_value[EMBED_A])*55/100;
  572. general_amplitude = amp * amp_emphasis[embedded_value[EMBED_F]] / 16;
  573. return general_amplitude;
  574. }
  575. static void WavegenSetEcho(void)
  576. {
  577. int delay;
  578. int amp;
  579. voicing = wvoice->voicing;
  580. delay = wvoice->echo_delay;
  581. amp = wvoice->echo_amp;
  582. if (delay >= N_ECHO_BUF)
  583. delay = N_ECHO_BUF-1;
  584. if (amp > 100)
  585. amp = 100;
  586. memset(echo_buf, 0, sizeof(echo_buf));
  587. echo_tail = 0;
  588. if (embedded_value[EMBED_H] > 0) {
  589. // set echo from an embedded command in the text
  590. amp = embedded_value[EMBED_H];
  591. delay = 130;
  592. }
  593. if (delay == 0)
  594. amp = 0;
  595. echo_head = (delay * samplerate)/1000;
  596. echo_length = echo_head; // ensure completion of echo at the end of speech. Use 1 delay period?
  597. if (amp == 0)
  598. echo_length = 0;
  599. if (amp > 20)
  600. echo_length = echo_head * 2; // perhaps allow 2 echo periods if the echo is loud.
  601. // echo_amp units are 1/256ths of the amplitude of the original sound.
  602. echo_amp = amp;
  603. // compensate (partially) for increase in amplitude due to echo
  604. general_amplitude = GetAmplitude();
  605. general_amplitude = ((general_amplitude * (500-amp))/500);
  606. }
  607. int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control)
  608. {
  609. // Calculate the amplitude of each harmonics from the formants
  610. // Only for formants 0 to 5
  611. // control 0=initial call, 1=every 64 cycles
  612. // pitch and freqs are Hz<<16
  613. int f;
  614. wavegen_peaks_t *p;
  615. int fp; // centre freq of peak
  616. int fhi; // high freq of peak
  617. int h; // harmonic number
  618. int pk;
  619. int hmax;
  620. int hmax_samplerate; // highest harmonic allowed for the samplerate
  621. int x;
  622. int ix;
  623. int h1;
  624. // initialise as much of *out as we will need
  625. if (wvoice == NULL)
  626. return 1;
  627. hmax = (peaks[wvoice->n_harmonic_peaks].freq + peaks[wvoice->n_harmonic_peaks].right)/pitch;
  628. if (hmax >= MAX_HARMONIC)
  629. hmax = MAX_HARMONIC-1;
  630. // restrict highest harmonic to half the samplerate
  631. hmax_samplerate = (((samplerate * 19)/40) << 16)/pitch; // only 95% of Nyquist freq
  632. if (hmax > hmax_samplerate)
  633. hmax = hmax_samplerate;
  634. for (h = 0; h <= hmax; h++)
  635. htab[h] = 0;
  636. for (pk = 0; pk <= wvoice->n_harmonic_peaks; pk++) {
  637. p = &peaks[pk];
  638. if ((p->height == 0) || (fp = p->freq) == 0)
  639. continue;
  640. fhi = p->freq + p->right;
  641. h = ((p->freq - p->left) / pitch) + 1;
  642. if (h <= 0) h = 1;
  643. for (f = pitch*h; f < fp; f += pitch)
  644. htab[h++] += pk_shape[(fp-f)/(p->left>>8)] * p->height;
  645. for (; f < fhi; f += pitch)
  646. htab[h++] += pk_shape[(f-fp)/(p->right>>8)] * p->height;
  647. }
  648. int y;
  649. int h2;
  650. // increase bass
  651. y = peaks[1].height * 10; // addition as a multiple of 1/256s
  652. h2 = (1000<<16)/pitch; // decrease until 1000Hz
  653. if (h2 > 0) {
  654. x = y/h2;
  655. h = 1;
  656. while (y > 0) {
  657. htab[h++] += y;
  658. y -= x;
  659. }
  660. }
  661. // find the nearest harmonic for HF peaks where we don't use shape
  662. for (; pk < N_PEAKS; pk++) {
  663. x = peaks[pk].height >> 14;
  664. peak_height[pk] = (x * x * 5)/2;
  665. // find the nearest harmonic for HF peaks where we don't use shape
  666. if (control == 0) {
  667. // set this initially, but make changes only at the quiet point
  668. peak_harmonic[pk] = peaks[pk].freq / pitch;
  669. }
  670. // only use harmonics up to half the samplerate
  671. if (peak_harmonic[pk] >= hmax_samplerate)
  672. peak_height[pk] = 0;
  673. }
  674. // convert from the square-rooted values
  675. f = 0;
  676. for (h = 0; h <= hmax; h++, f += pitch) {
  677. x = htab[h] >> 15;
  678. htab[h] = (x * x) >> 8;
  679. if ((ix = (f >> 19)) < N_TONE_ADJUST)
  680. htab[h] = (htab[h] * wvoice->tone_adjust[ix]) >> 13; // index tone_adjust with Hz/8
  681. }
  682. // adjust the amplitude of the first harmonic, affects tonal quality
  683. h1 = htab[1] * option_harmonic1;
  684. htab[1] = h1/8;
  685. // calc intermediate increments of LF harmonics
  686. if (control & 1) {
  687. for (h = 1; h < N_LOWHARM; h++)
  688. harm_inc[h] = (htab[h] - harmspect[h]) >> 3;
  689. }
  690. return hmax; // highest harmonic number
  691. }
  692. static void AdvanceParameters()
  693. {
  694. // Called every 64 samples to increment the formant freq, height, and widths
  695. int x;
  696. int ix;
  697. static int Flutter_ix = 0;
  698. // advance the pitch
  699. wdata.pitch_ix += wdata.pitch_inc;
  700. if ((ix = wdata.pitch_ix>>8) > 127) ix = 127;
  701. x = wdata.pitch_env[ix] * wdata.pitch_range;
  702. wdata.pitch = (x>>8) + wdata.pitch_base;
  703. amp_ix += amp_inc;
  704. /* add pitch flutter */
  705. if (Flutter_ix >= (N_FLUTTER*64))
  706. Flutter_ix = 0;
  707. x = ((int)(Flutter_tab[Flutter_ix >> 6])-0x80) * flutter_amp;
  708. Flutter_ix += Flutter_inc;
  709. wdata.pitch += x;
  710. if (wdata.pitch < 102400)
  711. wdata.pitch = 102400; // min pitch, 25 Hz (25 << 12)
  712. if (samplecount == samplecount_start)
  713. return;
  714. for (ix = 0; ix <= wvoice->n_harmonic_peaks; ix++) {
  715. peaks[ix].freq1 += peaks[ix].freq_inc;
  716. peaks[ix].freq = (int)peaks[ix].freq1;
  717. peaks[ix].height1 += peaks[ix].height_inc;
  718. if ((peaks[ix].height = (int)peaks[ix].height1) < 0)
  719. peaks[ix].height = 0;
  720. peaks[ix].left1 += peaks[ix].left_inc;
  721. peaks[ix].left = (int)peaks[ix].left1;
  722. if (ix < 3) {
  723. peaks[ix].right1 += peaks[ix].right_inc;
  724. peaks[ix].right = (int)peaks[ix].right1;
  725. } else
  726. peaks[ix].right = peaks[ix].left;
  727. }
  728. for (; ix < 8; ix++) {
  729. // formants 6,7,8 don't have a width parameter
  730. if (ix < 7) {
  731. peaks[ix].freq1 += peaks[ix].freq_inc;
  732. peaks[ix].freq = (int)peaks[ix].freq1;
  733. }
  734. peaks[ix].height1 += peaks[ix].height_inc;
  735. if ((peaks[ix].height = (int)peaks[ix].height1) < 0)
  736. peaks[ix].height = 0;
  737. }
  738. }
  739. static double resonator(RESONATOR *r, double input)
  740. {
  741. double x;
  742. x = r->a * input + r->b * r->x1 + r->c * r->x2;
  743. r->x2 = r->x1;
  744. r->x1 = x;
  745. return x;
  746. }
  747. static void setresonator(RESONATOR *rp, int freq, int bwidth, int init)
  748. {
  749. // freq Frequency of resonator in Hz
  750. // bwidth Bandwidth of resonator in Hz
  751. // init Initialize internal data
  752. double x;
  753. double arg;
  754. if (init) {
  755. rp->x1 = 0;
  756. rp->x2 = 0;
  757. }
  758. arg = minus_pi_t * bwidth;
  759. x = exp(arg);
  760. rp->c = -(x * x);
  761. arg = two_pi_t * freq;
  762. rp->b = x * cos(arg) * 2.0;
  763. rp->a = 1.0 - rp->b - rp->c;
  764. }
  765. void InitBreath(void)
  766. {
  767. int ix;
  768. minus_pi_t = -M_PI / samplerate;
  769. two_pi_t = -2.0 * minus_pi_t;
  770. for (ix = 0; ix < N_PEAKS; ix++)
  771. setresonator(&rbreath[ix], 2000, 200, 1);
  772. }
  773. static void SetBreath()
  774. {
  775. int pk;
  776. if (wvoice->breath[0] == 0)
  777. return;
  778. for (pk = 1; pk < N_PEAKS; pk++) {
  779. if (wvoice->breath[pk] != 0) {
  780. // breath[0] indicates that some breath formants are needed
  781. // set the freq from the current ynthesis formant and the width from the voice data
  782. setresonator(&rbreath[pk], peaks[pk].freq >> 16, wvoice->breathw[pk], 0);
  783. }
  784. }
  785. }
  786. static int ApplyBreath(void)
  787. {
  788. int value = 0;
  789. int noise;
  790. int ix;
  791. int amp;
  792. // use two random numbers, for alternate formants
  793. noise = (rand() & 0x3fff) - 0x2000;
  794. for (ix = 1; ix < N_PEAKS; ix++) {
  795. if ((amp = wvoice->breath[ix]) != 0) {
  796. amp *= (peaks[ix].height >> 14);
  797. value += (int)resonator(&rbreath[ix], noise) * amp;
  798. }
  799. }
  800. return value;
  801. }
  802. int Wavegen()
  803. {
  804. unsigned short waveph;
  805. unsigned short theta;
  806. int total;
  807. int h;
  808. int ix;
  809. int z, z1, z2;
  810. int echo;
  811. int ov;
  812. static int maxh, maxh2;
  813. int pk;
  814. signed char c;
  815. int sample;
  816. int amp;
  817. int modn_amp = 1, modn_period;
  818. static int agc = 256;
  819. static int h_switch_sign = 0;
  820. static int cycle_count = 0;
  821. static int amplitude2 = 0; // adjusted for pitch
  822. // continue until the output buffer is full, or
  823. // the required number of samples have been produced
  824. for (;;) {
  825. if ((end_wave == 0) && (samplecount == nsamples))
  826. return 0;
  827. if ((samplecount & 0x3f) == 0) {
  828. // every 64 samples, adjust the parameters
  829. if (samplecount == 0) {
  830. hswitch = 0;
  831. harmspect = hspect[0];
  832. maxh2 = PeaksToHarmspect(peaks, wdata.pitch<<4, hspect[0], 0);
  833. // adjust amplitude to compensate for fewer harmonics at higher pitch
  834. amplitude2 = (wdata.amplitude * (wdata.pitch >> 8) * wdata.amplitude_fmt)/(10000 << 3);
  835. // switch sign of harmonics above about 900Hz, to reduce max peak amplitude
  836. h_switch_sign = 890 / (wdata.pitch >> 12);
  837. } else
  838. AdvanceParameters();
  839. // pitch is Hz<<12
  840. phaseinc = (wdata.pitch>>7) * PHASE_INC_FACTOR;
  841. cycle_samples = samplerate/(wdata.pitch >> 12); // sr/(pitch*2)
  842. hf_factor = wdata.pitch >> 11;
  843. maxh = maxh2;
  844. harmspect = hspect[hswitch];
  845. hswitch ^= 1;
  846. maxh2 = PeaksToHarmspect(peaks, wdata.pitch<<4, hspect[hswitch], 1);
  847. SetBreath();
  848. } else if ((samplecount & 0x07) == 0) {
  849. for (h = 1; h < N_LOWHARM && h <= maxh2 && h <= maxh; h++)
  850. harmspect[h] += harm_inc[h];
  851. // bring automctic gain control back towards unity
  852. if (agc < 256) agc++;
  853. }
  854. samplecount++;
  855. if (wavephase > 0) {
  856. wavephase += phaseinc;
  857. if (wavephase < 0) {
  858. // sign has changed, reached a quiet point in the waveform
  859. cbytes = wavemult_offset - (cycle_samples)/2;
  860. if (samplecount > nsamples)
  861. return 0;
  862. cycle_count++;
  863. for (pk = wvoice->n_harmonic_peaks+1; pk < N_PEAKS; pk++) {
  864. // find the nearest harmonic for HF peaks where we don't use shape
  865. peak_harmonic[pk] = ((peaks[pk].freq / (wdata.pitch*8)) + 1) / 2;
  866. }
  867. // adjust amplitude to compensate for fewer harmonics at higher pitch
  868. amplitude2 = (wdata.amplitude * (wdata.pitch >> 8) * wdata.amplitude_fmt)/(10000 << 3);
  869. if (glottal_flag > 0) {
  870. if (glottal_flag == 3) {
  871. if ((nsamples-samplecount) < (cycle_samples*2)) {
  872. // Vowel before glottal-stop.
  873. // This is the start of the penultimate cycle, reduce its amplitude
  874. glottal_flag = 2;
  875. amplitude2 = (amplitude2 * glottal_reduce)/256;
  876. }
  877. } else if (glottal_flag == 4) {
  878. // Vowel following a glottal-stop.
  879. // This is the start of the second cycle, reduce its amplitude
  880. glottal_flag = 2;
  881. amplitude2 = (amplitude2 * glottal_reduce)/256;
  882. } else
  883. glottal_flag--;
  884. }
  885. if (amplitude_env != NULL) {
  886. // amplitude envelope is only used for creaky voice effect on certain vowels/tones
  887. if ((ix = amp_ix>>8) > 127) ix = 127;
  888. amp = amplitude_env[ix];
  889. amplitude2 = (amplitude2 * amp)/128;
  890. }
  891. // introduce roughness into the sound by reducing the amplitude of
  892. modn_period = 0;
  893. if (voice->roughness < N_ROUGHNESS) {
  894. modn_period = modulation_tab[voice->roughness][modulation_type];
  895. modn_amp = modn_period & 0xf;
  896. modn_period = modn_period >> 4;
  897. }
  898. if (modn_period != 0) {
  899. if (modn_period == 0xf) {
  900. // just once */
  901. amplitude2 = (amplitude2 * modn_amp)/16;
  902. modulation_type = 0;
  903. } else {
  904. // reduce amplitude every [modn_period} cycles
  905. if ((cycle_count % modn_period) == 0)
  906. amplitude2 = (amplitude2 * modn_amp)/16;
  907. }
  908. }
  909. }
  910. } else
  911. wavephase += phaseinc;
  912. waveph = (unsigned short)(wavephase >> 16);
  913. total = 0;
  914. // apply HF peaks, formants 6,7,8
  915. // add a single harmonic and then spread this my multiplying by a
  916. // window. This is to reduce the processing power needed to add the
  917. // higher frequence harmonics.
  918. cbytes++;
  919. if (cbytes >= 0 && cbytes < wavemult_max) {
  920. for (pk = wvoice->n_harmonic_peaks+1; pk < N_PEAKS; pk++) {
  921. theta = peak_harmonic[pk] * waveph;
  922. total += (long)sin_tab[theta >> 5] * peak_height[pk];
  923. }
  924. // spread the peaks by multiplying by a window
  925. total = (long)(total / hf_factor) * wavemult[cbytes];
  926. }
  927. // apply main peaks, formants 0 to 5
  928. #ifdef USE_ASSEMBLER_1
  929. // use an optimised routine for this loop, if available
  930. total += AddSineWaves(waveph, h_switch_sign, maxh, harmspect); // call an assembler code routine
  931. #else
  932. theta = waveph;
  933. for (h = 1; h <= h_switch_sign; h++) {
  934. total += ((int)sin_tab[theta >> 5] * harmspect[h]);
  935. theta += waveph;
  936. }
  937. while (h <= maxh) {
  938. total -= ((int)sin_tab[theta >> 5] * harmspect[h]);
  939. theta += waveph;
  940. h++;
  941. }
  942. #endif
  943. if (voicing != 64)
  944. total = (total >> 6) * voicing;
  945. if (wvoice->breath[0])
  946. total += ApplyBreath();
  947. // mix with sampled wave if required
  948. z2 = 0;
  949. if (wdata.mix_wavefile_ix < wdata.n_mix_wavefile) {
  950. if (wdata.mix_wave_scale == 0) {
  951. // a 16 bit sample
  952. c = wdata.mix_wavefile[wdata.mix_wavefile_ix+wdata.mix_wavefile_offset+1];
  953. sample = wdata.mix_wavefile[wdata.mix_wavefile_ix+wdata.mix_wavefile_offset] + (c * 256);
  954. wdata.mix_wavefile_ix += 2;
  955. } else {
  956. // a 8 bit sample, scaled
  957. sample = (signed char)wdata.mix_wavefile[wdata.mix_wavefile_offset+wdata.mix_wavefile_ix++] * wdata.mix_wave_scale;
  958. }
  959. z2 = (sample * wdata.amplitude_v) >> 10;
  960. z2 = (z2 * wdata.mix_wave_amp)/32;
  961. if ((wdata.mix_wavefile_ix + wdata.mix_wavefile_offset) >= wdata.mix_wavefile_max) // reached the end of available WAV data
  962. wdata.mix_wavefile_offset -= (wdata.mix_wavefile_max*3)/4;
  963. }
  964. z1 = z2 + (((total>>8) * amplitude2) >> 13);
  965. echo = (echo_buf[echo_tail++] * echo_amp);
  966. z1 += echo >> 8;
  967. if (echo_tail >= N_ECHO_BUF)
  968. echo_tail = 0;
  969. z = (z1 * agc) >> 8;
  970. // check for overflow, 16bit signed samples
  971. if (z >= 32768) {
  972. ov = 8388608/z1 - 1; // 8388608 is 2^23, i.e. max value * 256
  973. if (ov < agc) agc = ov; // set agc to number of 1/256ths to multiply the sample by
  974. z = (z1 * agc) >> 8; // reduce sample by agc value to prevent overflow
  975. } else if (z <= -32768) {
  976. ov = -8388608/z1 - 1;
  977. if (ov < agc) agc = ov;
  978. z = (z1 * agc) >> 8;
  979. }
  980. *out_ptr++ = z;
  981. *out_ptr++ = z >> 8;
  982. echo_buf[echo_head++] = z;
  983. if (echo_head >= N_ECHO_BUF)
  984. echo_head = 0;
  985. if (out_ptr >= out_end)
  986. return 1;
  987. }
  988. }
  989. static int PlaySilence(int length, int resume)
  990. {
  991. static int n_samples;
  992. int value = 0;
  993. nsamples = 0;
  994. samplecount = 0;
  995. wavephase = 0x7fffffff;
  996. if (length == 0)
  997. return 0;
  998. if (resume == 0)
  999. n_samples = length;
  1000. while (n_samples-- > 0) {
  1001. value = (echo_buf[echo_tail++] * echo_amp) >> 8;
  1002. if (echo_tail >= N_ECHO_BUF)
  1003. echo_tail = 0;
  1004. *out_ptr++ = value;
  1005. *out_ptr++ = value >> 8;
  1006. echo_buf[echo_head++] = value;
  1007. if (echo_head >= N_ECHO_BUF)
  1008. echo_head = 0;
  1009. if (out_ptr >= out_end)
  1010. return 1;
  1011. }
  1012. return 0;
  1013. }
  1014. static int PlayWave(int length, int resume, unsigned char *data, int scale, int amp)
  1015. {
  1016. static int n_samples;
  1017. static int ix = 0;
  1018. int value;
  1019. signed char c;
  1020. if (resume == 0) {
  1021. n_samples = length;
  1022. ix = 0;
  1023. }
  1024. nsamples = 0;
  1025. samplecount = 0;
  1026. while (n_samples-- > 0) {
  1027. if (scale == 0) {
  1028. // 16 bits data
  1029. c = data[ix+1];
  1030. value = data[ix] + (c * 256);
  1031. ix += 2;
  1032. } else {
  1033. // 8 bit data, shift by the specified scale factor
  1034. value = (signed char)data[ix++] * scale;
  1035. }
  1036. value *= (consonant_amp * general_amplitude); // reduce strength of consonant
  1037. value = value >> 10;
  1038. value = (value * amp)/32;
  1039. value += ((echo_buf[echo_tail++] * echo_amp) >> 8);
  1040. if (value > 32767)
  1041. value = 32768;
  1042. else if (value < -32768)
  1043. value = -32768;
  1044. if (echo_tail >= N_ECHO_BUF)
  1045. echo_tail = 0;
  1046. out_ptr[0] = value;
  1047. out_ptr[1] = value >> 8;
  1048. out_ptr += 2;
  1049. echo_buf[echo_head++] = (value*3)/4;
  1050. if (echo_head >= N_ECHO_BUF)
  1051. echo_head = 0;
  1052. if (out_ptr >= out_end)
  1053. return 1;
  1054. }
  1055. return 0;
  1056. }
  1057. static int SetWithRange0(int value, int max)
  1058. {
  1059. if (value < 0)
  1060. return 0;
  1061. if (value > max)
  1062. return max;
  1063. return value;
  1064. }
  1065. static void SetPitchFormants()
  1066. {
  1067. int ix;
  1068. int factor = 256;
  1069. int pitch_value;
  1070. // adjust formants to give better results for a different voice pitch
  1071. if ((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE)
  1072. pitch_value = MAX_PITCH_VALUE;
  1073. if (pitch_value > 50) {
  1074. // only adjust if the pitch is higher than normal
  1075. factor = 256 + (25 * (pitch_value - 50))/50;
  1076. }
  1077. for (ix = 0; ix <= 5; ix++)
  1078. wvoice->freq[ix] = (wvoice->freq2[ix] * factor)/256;
  1079. factor = embedded_value[EMBED_T]*3;
  1080. wvoice->height[0] = (wvoice->height2[0] * (256 - factor*2))/256;
  1081. wvoice->height[1] = (wvoice->height2[1] * (256 - factor))/256;
  1082. }
  1083. void SetEmbedded(int control, int value)
  1084. {
  1085. // there was an embedded command in the text at this point
  1086. int sign = 0;
  1087. int command;
  1088. command = control & 0x1f;
  1089. if ((control & 0x60) == 0x60)
  1090. sign = -1;
  1091. else if ((control & 0x60) == 0x40)
  1092. sign = 1;
  1093. if (command < N_EMBEDDED_VALUES) {
  1094. if (sign == 0)
  1095. embedded_value[command] = value;
  1096. else
  1097. embedded_value[command] += (value * sign);
  1098. embedded_value[command] = SetWithRange0(embedded_value[command], embedded_max[command]);
  1099. }
  1100. switch (command)
  1101. {
  1102. case EMBED_T:
  1103. WavegenSetEcho(); // and drop through to case P
  1104. case EMBED_P:
  1105. SetPitchFormants();
  1106. break;
  1107. case EMBED_A: // amplitude
  1108. general_amplitude = GetAmplitude();
  1109. break;
  1110. case EMBED_F: // emphasis
  1111. general_amplitude = GetAmplitude();
  1112. break;
  1113. case EMBED_H:
  1114. WavegenSetEcho();
  1115. break;
  1116. }
  1117. }
  1118. void WavegenSetVoice(voice_t *v)
  1119. {
  1120. static voice_t v2;
  1121. memcpy(&v2, v, sizeof(v2));
  1122. wvoice = &v2;
  1123. if (v->peak_shape == 0)
  1124. pk_shape = pk_shape1;
  1125. else
  1126. pk_shape = pk_shape2;
  1127. consonant_amp = (v->consonant_amp * 26) /100;
  1128. if (samplerate <= 11000) {
  1129. consonant_amp = consonant_amp*2; // emphasize consonants at low sample rates
  1130. option_harmonic1 = 6;
  1131. }
  1132. WavegenSetEcho();
  1133. SetPitchFormants();
  1134. MarkerEvent(espeakEVENT_SAMPLERATE, 0, wvoice->samplerate, 0, out_ptr);
  1135. }
  1136. static void SetAmplitude(int length, unsigned char *amp_env, int value)
  1137. {
  1138. amp_ix = 0;
  1139. if (length == 0)
  1140. amp_inc = 0;
  1141. else
  1142. amp_inc = (256 * ENV_LEN * STEPSIZE)/length;
  1143. wdata.amplitude = (value * general_amplitude)/16;
  1144. wdata.amplitude_v = (wdata.amplitude * wvoice->consonant_ampv * 15)/100; // for wave mixed with voiced sounds
  1145. amplitude_env = amp_env;
  1146. }
  1147. void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range)
  1148. {
  1149. int x;
  1150. int base;
  1151. int range;
  1152. int pitch_value;
  1153. if (pitch1 > pitch2) {
  1154. x = pitch1; // swap values
  1155. pitch1 = pitch2;
  1156. pitch2 = x;
  1157. }
  1158. if ((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE)
  1159. pitch_value = MAX_PITCH_VALUE;
  1160. pitch_value -= embedded_value[EMBED_T]; // adjust tone for announcing punctuation
  1161. if (pitch_value < 0)
  1162. pitch_value = 0;
  1163. base = (voice->pitch_base * pitch_adjust_tab[pitch_value])/128;
  1164. range = (voice->pitch_range * embedded_value[EMBED_R])/50;
  1165. // compensate for change in pitch when the range is narrowed or widened
  1166. base -= (range - voice->pitch_range)*18;
  1167. *pitch_base = base + (pitch1 * range)/2;
  1168. *pitch_range = base + (pitch2 * range)/2 - *pitch_base;
  1169. }
  1170. void SetPitch(int length, unsigned char *env, int pitch1, int pitch2)
  1171. {
  1172. // length in samples
  1173. if ((wdata.pitch_env = env) == NULL)
  1174. wdata.pitch_env = env_fall; // default
  1175. wdata.pitch_ix = 0;
  1176. if (length == 0)
  1177. wdata.pitch_inc = 0;
  1178. else
  1179. wdata.pitch_inc = (256 * ENV_LEN * STEPSIZE)/length;
  1180. SetPitch2(wvoice, pitch1, pitch2, &wdata.pitch_base, &wdata.pitch_range);
  1181. // set initial pitch
  1182. wdata.pitch = ((wdata.pitch_env[0] * wdata.pitch_range) >>8) + wdata.pitch_base; // Hz << 12
  1183. flutter_amp = wvoice->flutter;
  1184. }
  1185. void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v)
  1186. {
  1187. int ix;
  1188. DOUBLEX next;
  1189. int length2;
  1190. int length4;
  1191. int qix;
  1192. int cmd;
  1193. static int glottal_reduce_tab1[4] = { 0x30, 0x30, 0x40, 0x50 }; // vowel before [?], amp * 1/256
  1194. static int glottal_reduce_tab2[4] = { 0x90, 0xa0, 0xb0, 0xc0 }; // vowel after [?], amp * 1/256
  1195. harm_sqrt_n = 0;
  1196. end_wave = 1;
  1197. // any additional information in the param1 ?
  1198. modulation_type = modn & 0xff;
  1199. glottal_flag = 0;
  1200. if (modn & 0x400) {
  1201. glottal_flag = 3; // before a glottal stop
  1202. glottal_reduce = glottal_reduce_tab1[(modn >> 8) & 3];
  1203. }
  1204. if (modn & 0x800) {
  1205. glottal_flag = 4; // after a glottal stop
  1206. glottal_reduce = glottal_reduce_tab2[(modn >> 8) & 3];
  1207. }
  1208. for (qix = wcmdq_head+1;; qix++) {
  1209. if (qix >= N_WCMDQ) qix = 0;
  1210. if (qix == wcmdq_tail) break;
  1211. cmd = wcmdq[qix][0];
  1212. if (cmd == WCMD_SPECT) {
  1213. end_wave = 0; // next wave generation is from another spectrum
  1214. break;
  1215. }
  1216. if ((cmd == WCMD_WAVE) || (cmd == WCMD_PAUSE))
  1217. break; // next is not from spectrum, so continue until end of wave cycle
  1218. }
  1219. // round the length to a multiple of the stepsize
  1220. length2 = (length + STEPSIZE/2) & ~0x3f;
  1221. if (length2 == 0)
  1222. length2 = STEPSIZE;
  1223. // add this length to any left over from the previous synth
  1224. samplecount_start = samplecount;
  1225. nsamples += length2;
  1226. length4 = length2/4;
  1227. peaks[7].freq = (7800 * v->freq[7] + v->freqadd[7]*256) << 8;
  1228. peaks[8].freq = (9000 * v->freq[8] + v->freqadd[8]*256) << 8;
  1229. for (ix = 0; ix < 8; ix++) {
  1230. if (ix < 7) {
  1231. peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8;
  1232. peaks[ix].freq = (int)peaks[ix].freq1;
  1233. next = (fr2->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8;
  1234. peaks[ix].freq_inc = ((next - peaks[ix].freq1) * (STEPSIZE/4)) / length4; // lower headroom for fixed point math
  1235. }
  1236. peaks[ix].height1 = (fr1->fheight[ix] * v->height[ix]) << 6;
  1237. peaks[ix].height = (int)peaks[ix].height1;
  1238. next = (fr2->fheight[ix] * v->height[ix]) << 6;
  1239. peaks[ix].height_inc = ((next - peaks[ix].height1) * STEPSIZE) / length2;
  1240. if ((ix <= 5) && (ix <= wvoice->n_harmonic_peaks)) {
  1241. peaks[ix].left1 = (fr1->fwidth[ix] * v->width[ix]) << 10;
  1242. peaks[ix].left = (int)peaks[ix].left1;
  1243. next = (fr2->fwidth[ix] * v->width[ix]) << 10;
  1244. peaks[ix].left_inc = ((next - peaks[ix].left1) * STEPSIZE) / length2;
  1245. if (ix < 3) {
  1246. peaks[ix].right1 = (fr1->fright[ix] * v->width[ix]) << 10;
  1247. peaks[ix].right = (int)peaks[ix].right1;
  1248. next = (fr2->fright[ix] * v->width[ix]) << 10;
  1249. peaks[ix].right_inc = ((next - peaks[ix].right1) * STEPSIZE) / length2;
  1250. } else
  1251. peaks[ix].right = peaks[ix].left;
  1252. }
  1253. }
  1254. }
  1255. static int Wavegen2(int length, int modulation, int resume, frame_t *fr1, frame_t *fr2)
  1256. {
  1257. if (resume == 0)
  1258. SetSynth(length, modulation, fr1, fr2, wvoice);
  1259. return Wavegen();
  1260. }
  1261. void Write4Bytes(FILE *f, int value)
  1262. {
  1263. // Write 4 bytes to a file, least significant first
  1264. int ix;
  1265. for (ix = 0; ix < 4; ix++) {
  1266. fputc(value & 0xff, f);
  1267. value = value >> 8;
  1268. }
  1269. }
  1270. int WavegenFill2()
  1271. {
  1272. // Pick up next wavegen commands from the queue
  1273. // return: 0 output buffer has been filled
  1274. // return: 1 input command queue is now empty
  1275. intptr_t *q;
  1276. int length;
  1277. int result;
  1278. int marker_type;
  1279. static int resume = 0;
  1280. static int echo_complete = 0;
  1281. while (out_ptr < out_end) {
  1282. if (WcmdqUsed() <= 0) {
  1283. if (echo_complete > 0) {
  1284. // continue to play silence until echo is completed
  1285. resume = PlaySilence(echo_complete, resume);
  1286. if (resume == 1)
  1287. return 0; // not yet finished
  1288. }
  1289. return 1; // queue empty, close sound channel
  1290. }
  1291. result = 0;
  1292. q = wcmdq[wcmdq_head];
  1293. length = q[1];
  1294. switch (q[0] & 0xff)
  1295. {
  1296. case WCMD_PITCH:
  1297. SetPitch(length, (unsigned char *)q[2], q[3] >> 16, q[3] & 0xffff);
  1298. break;
  1299. case WCMD_PAUSE:
  1300. if (resume == 0)
  1301. echo_complete -= length;
  1302. wdata.n_mix_wavefile = 0;
  1303. wdata.amplitude_fmt = 100;
  1304. #ifdef INCLUDE_KLATT
  1305. KlattReset(1);
  1306. #endif
  1307. result = PlaySilence(length, resume);
  1308. break;
  1309. case WCMD_WAVE:
  1310. echo_complete = echo_length;
  1311. wdata.n_mix_wavefile = 0;
  1312. #ifdef INCLUDE_KLATT
  1313. KlattReset(1);
  1314. #endif
  1315. result = PlayWave(length, resume, (unsigned char *)q[2], q[3] & 0xff, q[3] >> 8);
  1316. break;
  1317. case WCMD_WAVE2:
  1318. // wave file to be played at the same time as synthesis
  1319. wdata.mix_wave_amp = q[3] >> 8;
  1320. wdata.mix_wave_scale = q[3] & 0xff;
  1321. wdata.n_mix_wavefile = (length & 0xffff);
  1322. wdata.mix_wavefile_max = (length >> 16) & 0xffff;
  1323. if (wdata.mix_wave_scale == 0) {
  1324. wdata.n_mix_wavefile *= 2;
  1325. wdata.mix_wavefile_max *= 2;
  1326. }
  1327. wdata.mix_wavefile_ix = 0;
  1328. wdata.mix_wavefile_offset = 0;
  1329. wdata.mix_wavefile = (unsigned char *)q[2];
  1330. break;
  1331. case WCMD_SPECT2: // as WCMD_SPECT but stop any concurrent wave file
  1332. wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case
  1333. case WCMD_SPECT:
  1334. echo_complete = echo_length;
  1335. result = Wavegen2(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3]);
  1336. break;
  1337. #ifdef INCLUDE_KLATT
  1338. case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file
  1339. wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case
  1340. case WCMD_KLATT:
  1341. echo_complete = echo_length;
  1342. result = Wavegen_Klatt2(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3]);
  1343. break;
  1344. #endif
  1345. case WCMD_MARKER:
  1346. marker_type = q[0] >> 8;
  1347. MarkerEvent(marker_type, q[1], q[2], q[3], out_ptr);
  1348. if (marker_type == 1) // word marker
  1349. current_source_index = q[1] & 0xffffff;
  1350. break;
  1351. case WCMD_AMPLITUDE:
  1352. SetAmplitude(length, (unsigned char *)q[2], q[3]);
  1353. break;
  1354. case WCMD_VOICE:
  1355. WavegenSetVoice((voice_t *)q[2]);
  1356. free((voice_t *)q[2]);
  1357. break;
  1358. case WCMD_EMBEDDED:
  1359. SetEmbedded(q[1], q[2]);
  1360. break;
  1361. case WCMD_MBROLA_DATA:
  1362. result = MbrolaFill(length, resume, (general_amplitude * wvoice->voicing)/64);
  1363. break;
  1364. case WCMD_FMT_AMPLITUDE:
  1365. if ((wdata.amplitude_fmt = q[1]) == 0)
  1366. wdata.amplitude_fmt = 100; // percentage, but value=0 means 100%
  1367. break;
  1368. #if HAVE_SONIC_H
  1369. case WCMD_SONIC_SPEED:
  1370. sonicSpeed = (double)q[1] / 1024;
  1371. break;
  1372. #endif
  1373. }
  1374. if (result == 0) {
  1375. WcmdqIncHead();
  1376. resume = 0;
  1377. } else
  1378. resume = 1;
  1379. }
  1380. return 0;
  1381. }
  1382. #if HAVE_SONIC_H
  1383. // Speed up the audio samples with libsonic.
  1384. static int SpeedUp(short *outbuf, int length_in, int length_out, int end_of_text)
  1385. {
  1386. if (length_in > 0) {
  1387. if (sonicSpeedupStream == NULL)
  1388. sonicSpeedupStream = sonicCreateStream(22050, 1);
  1389. if (sonicGetSpeed(sonicSpeedupStream) != sonicSpeed)
  1390. sonicSetSpeed(sonicSpeedupStream, sonicSpeed);
  1391. sonicWriteShortToStream(sonicSpeedupStream, outbuf, length_in);
  1392. }
  1393. if (sonicSpeedupStream == NULL)
  1394. return 0;
  1395. if (end_of_text)
  1396. sonicFlushStream(sonicSpeedupStream);
  1397. return sonicReadShortFromStream(sonicSpeedupStream, outbuf, length_out);
  1398. }
  1399. #endif
  1400. // Call WavegenFill2, and then speed up the output samples.
  1401. int WavegenFill()
  1402. {
  1403. int finished;
  1404. unsigned char *p_start;
  1405. p_start = out_ptr;
  1406. finished = WavegenFill2();
  1407. #if HAVE_SONIC_H
  1408. if (sonicSpeed > 1.0) {
  1409. int length;
  1410. int max_length;
  1411. max_length = (out_end - p_start);
  1412. length = 2*SpeedUp((short *)p_start, (out_ptr-p_start)/2, max_length/2, finished);
  1413. out_ptr = p_start + length;
  1414. if (length >= max_length)
  1415. finished = 0; // there may be more data to flush
  1416. }
  1417. #endif
  1418. return finished;
  1419. }