eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

synthesize.cpp 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <wctype.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <math.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "translate.h"
  32. extern FILE *f_log;
  33. static void SmoothSpect(void);
  34. // list of phonemes in a clause
  35. int n_phoneme_list=0;
  36. PHONEME_LIST phoneme_list[N_PHONEME_LIST];
  37. int mbrola_delay;
  38. char mbrola_name[20];
  39. int speed_factor1;
  40. int speed_factor2;
  41. int speed_min_sample_len;
  42. static int last_pitch_cmd;
  43. static int last_amp_cmd;
  44. static frame_t *last_frame;
  45. static int last_wcmdq;
  46. static int pitch_length;
  47. static int amp_length;
  48. static int modn_flags;
  49. static int syllable_start;
  50. static int syllable_end;
  51. static int syllable_centre;
  52. static voice_t *new_voice=NULL;
  53. int n_soundicon_tab=N_SOUNDICON_SLOTS;
  54. SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];
  55. #define RMS_GLOTTAL1 35 // vowel before glottal stop
  56. #define RMS_START 28 // 28
  57. #define VOWEL_FRONT_LENGTH 50
  58. // a dummy phoneme_list entry which looks like a pause
  59. static PHONEME_LIST next_pause;
  60. const char *WordToString(unsigned int word)
  61. {//========================================
  62. // Convert a phoneme mnemonic word into a string
  63. int ix;
  64. static char buf[5];
  65. for(ix=0; ix<3; ix++)
  66. buf[ix] = word >> (ix*8);
  67. buf[4] = 0;
  68. return(buf);
  69. }
  70. void SynthesizeInit()
  71. {//==================
  72. last_pitch_cmd = 0;
  73. last_amp_cmd = 0;
  74. last_frame = NULL;
  75. syllable_centre = -1;
  76. // initialise next_pause, a dummy phoneme_list entry
  77. // next_pause.ph = phoneme_tab[phonPAUSE]; // this must be done after voice selection
  78. next_pause.type = phPAUSE;
  79. next_pause.newword = 0;
  80. }
  81. static void EndAmplitude(void)
  82. {//===========================
  83. if(amp_length > 0)
  84. {
  85. if(wcmdq[last_amp_cmd][1] == 0)
  86. wcmdq[last_amp_cmd][1] = amp_length;
  87. amp_length = 0;
  88. }
  89. }
  90. static void EndPitch(int voice_break)
  91. {//==================================
  92. // posssible end of pitch envelope, fill in the length
  93. if((pitch_length > 0) && (last_pitch_cmd >= 0))
  94. {
  95. if(wcmdq[last_pitch_cmd][1] == 0)
  96. wcmdq[last_pitch_cmd][1] = pitch_length;
  97. pitch_length = 0;
  98. }
  99. if(voice_break)
  100. {
  101. last_wcmdq = -1;
  102. last_frame = NULL;
  103. syllable_end = wcmdq_tail;
  104. SmoothSpect();
  105. syllable_centre = -1;
  106. memset(vowel_transition,0,sizeof(vowel_transition));
  107. }
  108. } // end of Synthesize::EndPitch
  109. static void DoAmplitude(int amp, unsigned char *amp_env)
  110. {//=====================================================
  111. long *q;
  112. last_amp_cmd = wcmdq_tail;
  113. amp_length = 0; // total length of vowel with this amplitude envelope
  114. q = wcmdq[wcmdq_tail];
  115. q[0] = WCMD_AMPLITUDE;
  116. q[1] = 0; // fill in later from amp_length
  117. q[2] = (long)amp_env;
  118. q[3] = amp;
  119. WcmdqInc();
  120. } // end of Synthesize::DoAmplitude
  121. static void DoPitch(unsigned char *env, int pitch1, int pitch2)
  122. {//============================================================
  123. long *q;
  124. EndPitch(0);
  125. if(pitch1 == 1024)
  126. {
  127. // pitch was not set
  128. pitch1 = 24;
  129. pitch2 = 33;
  130. env = envelope_data[PITCHfall];
  131. }
  132. last_pitch_cmd = wcmdq_tail;
  133. pitch_length = 0; // total length of spect with this pitch envelope
  134. if(pitch2 < 0)
  135. pitch2 = 0;
  136. q = wcmdq[wcmdq_tail];
  137. q[0] = WCMD_PITCH;
  138. q[1] = 0; // length, fill in later from pitch_length
  139. q[2] = (long)env;
  140. q[3] = (pitch1 << 16) + pitch2;
  141. WcmdqInc();
  142. } // end of Synthesize::DoPitch
  143. int PauseLength(int pause, int control)
  144. {//====================================
  145. int len;
  146. if(control == 0)
  147. len = (pause * speed_factor1)/256;
  148. else
  149. len = (pause * speed_factor2)/256;
  150. if(len < 5) len = 5; // mS, limit the amount to which pauses can be shortened
  151. return(len);
  152. }
  153. static void DoPause(int length, int control)
  154. {//=========================================
  155. // control = 1, less shortening at fast speeds
  156. int len;
  157. len = PauseLength(length, control);
  158. len = (len * samplerate) / 1000; // convert from mS to number of samples
  159. EndPitch(1);
  160. wcmdq[wcmdq_tail][0] = WCMD_PAUSE;
  161. wcmdq[wcmdq_tail][1] = len;
  162. WcmdqInc();
  163. last_frame = NULL;
  164. } // end of Synthesize::DoPause
  165. extern int seq_len_adjust; // temporary fix to advance the start point for playing the wav sample
  166. static int DoSample2(int index, int which, int length_mod, int amp)
  167. {//================================================================
  168. int length;
  169. int length1;
  170. int format;
  171. int min_length;
  172. int start=0;
  173. long *q;
  174. unsigned char *p;
  175. index = index & 0x7fffff;
  176. p = &wavefile_data[index];
  177. format = p[2];
  178. length1 = (p[1] * 256);
  179. length1 += p[0]; // length in bytes
  180. if(seq_len_adjust > 0)
  181. {
  182. start = (seq_len_adjust * samplerate)/1000;
  183. if(format == 0)
  184. start *= 2;
  185. length1 -= start;
  186. index += start;
  187. }
  188. if(length_mod > 0)
  189. length = (length1 * length_mod) / 256;
  190. else
  191. length = length1;
  192. length = (length * speed_factor2)/256;
  193. min_length = speed_min_sample_len;
  194. if(format==0)
  195. min_length *= 2;
  196. if(length < min_length)
  197. length = min_length;
  198. if(length > length1)
  199. length = length1; // don't exceed wavefile length
  200. if(format==0)
  201. length /= 2; // 2 byte samples
  202. index += 4;
  203. if(amp >= 0)
  204. {
  205. last_wcmdq = wcmdq_tail;
  206. q = wcmdq[wcmdq_tail];
  207. if(which & 0x100)
  208. q[0] = WCMD_WAVE2; // mix this with synthesised wave
  209. else
  210. q[0] = WCMD_WAVE;
  211. q[1] = length; // length in samples
  212. q[2] = long(&wavefile_data[index]);
  213. q[3] = format + (amp << 8);
  214. WcmdqInc();
  215. }
  216. return(length);
  217. } // end of Synthesize::DoSample2
  218. int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int amp)
  219. {//====================== ==========================================================
  220. int index;
  221. int match_level;
  222. int amp2;
  223. int result;
  224. EndPitch(1);
  225. index = LookupSound(ph1,ph2,which & 0xff,&match_level,0);
  226. if((index & 0x800000) == 0)
  227. return(0); // not wavefile data
  228. amp2 = wavefile_amp;
  229. if(amp != 0)
  230. amp2 = (amp * wavefile_amp)/20;
  231. if(amp == -1)
  232. amp2 = amp;
  233. result = DoSample2(index,which,length_mod,amp2);
  234. last_frame = NULL;
  235. return(result);
  236. } // end of Synthesize::DoSample
  237. static frame_t *AllocFrame()
  238. {//=========================
  239. // Allocate a temporary spectrum frame for the wavegen queue. Use a pool which is big
  240. // enough to use a round-robin without checks.
  241. // Only needed for modifying spectra for blending to consonants
  242. #define N_FRAME_POOL N_WCMDQ
  243. static int ix=0;
  244. static frame_t frame_pool[N_FRAME_POOL];
  245. ix++;
  246. if(ix >= N_FRAME_POOL)
  247. ix = 0;
  248. return(&frame_pool[ix]);
  249. }
  250. static void set_frame_rms(frame_t *fr, int new_rms)
  251. {//=================================================
  252. // Each frame includes its RMS amplitude value, so to set a new
  253. // RMS just adjust the formant amplitudes by the appropriate ratio
  254. int x;
  255. int h;
  256. int ix;
  257. static const short sqrt_tab[200] = {
  258. 0, 64, 90,110,128,143,156,169,181,192,202,212,221,230,239,247,
  259. 256,263,271,278,286,293,300,306,313,320,326,332,338,344,350,356,
  260. 362,367,373,378,384,389,394,399,404,409,414,419,424,429,434,438,
  261. 443,448,452,457,461,465,470,474,478,483,487,491,495,499,503,507,
  262. 512,515,519,523,527,531,535,539,543,546,550,554,557,561,565,568,
  263. 572,576,579,583,586,590,593,596,600,603,607,610,613,617,620,623,
  264. 627,630,633,636,640,643,646,649,652,655,658,662,665,668,671,674,
  265. 677,680,683,686,689,692,695,698,701,704,706,709,712,715,718,721,
  266. 724,726,729,732,735,738,740,743,746,749,751,754,757,759,762,765,
  267. 768,770,773,775,778,781,783,786,789,791,794,796,799,801,804,807,
  268. 809,812,814,817,819,822,824,827,829,832,834,836,839,841,844,846,
  269. 849,851,853,856,858,861,863,865,868,870,872,875,877,879,882,884,
  270. 886,889,891,893,896,898,900,902};
  271. if(fr->frflags & FRFLAG_KLATT)
  272. {
  273. if(new_rms == -1)
  274. {
  275. fr->klattp[KLATT_AV] = 50;
  276. }
  277. return;
  278. }
  279. if(fr->rms == 0) return; // check for divide by zero
  280. x = (new_rms * 64)/fr->rms;
  281. if(x >= 200) x = 199;
  282. x = sqrt_tab[x]; // sqrt(new_rms/fr->rms)*0x200;
  283. for(ix=0; ix<N_PEAKS; ix++)
  284. {
  285. h = fr->fheight[ix] * x;
  286. fr->fheight[ix] = h/0x200;
  287. }
  288. } /* end of set_frame_rms */
  289. static void formants_reduce_hf(frame_t *fr, int level)
  290. {//====================================================
  291. // change height of peaks 2 to 8, percentage
  292. int ix;
  293. int x;
  294. if(fr->frflags & FRFLAG_KLATT)
  295. return;
  296. for(ix=2; ix<N_PEAKS; ix++)
  297. {
  298. x = fr->fheight[ix] * level;
  299. fr->fheight[ix] = x/100;
  300. }
  301. }
  302. static frame_t *CopyFrame(frame_t *frame1, int copy)
  303. {//=================================================
  304. // create a copy of the specified frame in temporary buffer
  305. frame_t *frame2;
  306. if((copy==0) && (frame1->frflags & FRFLAG_COPIED))
  307. {
  308. // this frame has already been copied in temporary rw memory
  309. return(frame1);
  310. }
  311. frame2 = AllocFrame();
  312. if(frame2 != NULL)
  313. {
  314. memcpy(frame2,frame1,sizeof(frame_t));
  315. frame2->length = 0;
  316. frame2->frflags |= FRFLAG_COPIED;
  317. }
  318. return(frame2);
  319. }
  320. static frame_t *DuplicateLastFrame(frameref_t *seq, int n_frames, int length)
  321. {//==========================================================================
  322. frame_t *fr;
  323. seq[n_frames-1].length = length;
  324. fr = CopyFrame(seq[n_frames-1].frame,1);
  325. seq[n_frames].frame = fr;
  326. seq[n_frames].length = 0;
  327. return fr;
  328. }
  329. static void AdjustFormants(frame_t *fr, int target, int min, int max, int f1_adj, int f3_adj, int hf_reduce, int flags)
  330. {//====================================================================================================================
  331. int x;
  332. //hf_reduce = 70; // ?? using fixed amount rather than the parameter??
  333. target = (target * voice->formant_factor)/256;
  334. x = (target - fr->ffreq[2]) / 2;
  335. if(x > max) x = max;
  336. if(x < min) x = min;
  337. fr->ffreq[2] += x;
  338. fr->ffreq[3] += f3_adj;
  339. if(flags & 0x20)
  340. {
  341. f3_adj = -f3_adj; //. reverse direction for f4,f5 change
  342. }
  343. fr->ffreq[4] += f3_adj;
  344. fr->ffreq[5] += f3_adj;
  345. if(f1_adj==1)
  346. {
  347. x = (235 - fr->ffreq[1]);
  348. if(x < -100) x = -100;
  349. if(x > -60) x = -60;
  350. fr->ffreq[1] += x;
  351. }
  352. if(f1_adj==2)
  353. {
  354. x = (235 - fr->ffreq[1]);
  355. if(x < -300) x = -300;
  356. if(x > -150) x = -150;
  357. fr->ffreq[1] += x;
  358. fr->ffreq[0] += x;
  359. }
  360. if(f1_adj==3)
  361. {
  362. x = (100 - fr->ffreq[1]);
  363. if(x < -400) x = -400;
  364. if(x > -300) x = -400;
  365. fr->ffreq[1] += x;
  366. fr->ffreq[0] += x;
  367. }
  368. formants_reduce_hf(fr,hf_reduce);
  369. }
  370. static int VowelCloseness(frame_t *fr)
  371. {//===================================
  372. // return a value 0-3 depending on the vowel's f1
  373. int f1;
  374. if((f1 = fr->ffreq[1]) < 300)
  375. return(3);
  376. if(f1 < 400)
  377. return(2);
  378. if(f1 < 500)
  379. return(1);
  380. return(0);
  381. }
  382. int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which)
  383. {//==============================================================================================================================
  384. int ix;
  385. int formant;
  386. int next_rms;
  387. int len;
  388. int rms;
  389. int f1;
  390. int f2;
  391. int f2_min;
  392. int f2_max;
  393. int f3_adj;
  394. int f3_amp;
  395. int flags;
  396. int vcolour;
  397. #define N_VCOLOUR 2
  398. // percentage change for each formant in 256ths
  399. static short vcolouring[N_VCOLOUR][5] = {
  400. {243,272,256,256,256}, // palatal consonant follows
  401. {256,256,240,240,240}, // retroflex
  402. };
  403. frame_t *fr = NULL;
  404. if(n_frames < 2)
  405. return(0);
  406. len = (data1 & 0x3f) * 2;
  407. rms = (data1 >> 6) & 0x3f;
  408. flags = (data1 >> 12);
  409. f2 = (data2 & 0x3f) * 50;
  410. f2_min = (((data2 >> 6) & 0x1f) - 15) * 50;
  411. f2_max = (((data2 >> 11) & 0x1f) - 15) * 50;
  412. f3_adj = (((data2 >> 16) & 0x1f) - 15) * 50;
  413. f3_amp = ((data2 >> 21) & 0x1f) * 8;
  414. f1 = ((data2 >> 26) & 0x7);
  415. vcolour = (data2 >> 29);
  416. // fprintf(stderr,"FMT%d %3s %3d-%3d f1=%d f2=%4d %4d %4d f3=%4d %3d\n",
  417. // which,WordToString(other_ph->mnemonic),len,rms,f1,f2,f2_min,f2_max,f3_adj,f3_amp);
  418. if(other_ph->mnemonic == '?')
  419. flags |= 8;
  420. if(which == 1)
  421. {
  422. /* entry to vowel */
  423. fr = CopyFrame(seq[0].frame,0);
  424. seq[0].frame = fr;
  425. seq[0].length = VOWEL_FRONT_LENGTH;
  426. if(len > 0)
  427. seq[0].length = len;
  428. seq[0].frflags |= FRFLAG_LEN_MOD; // reduce length modification
  429. fr->frflags |= FRFLAG_LEN_MOD;
  430. next_rms = seq[1].frame->rms;
  431. if(fr->frflags & FRFLAG_KLATT)
  432. {
  433. fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel
  434. }
  435. if(f2 != 0)
  436. {
  437. if(rms & 0x20)
  438. {
  439. set_frame_rms(fr,(next_rms * (rms & 0x1f))/30);
  440. }
  441. AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
  442. if((rms & 0x20) == 0)
  443. {
  444. set_frame_rms(fr,rms*2);
  445. }
  446. }
  447. else
  448. {
  449. if(flags & 8)
  450. set_frame_rms(fr,(next_rms*24)/32);
  451. else
  452. set_frame_rms(fr,RMS_START);
  453. }
  454. if(flags & 8)
  455. {
  456. // set_frame_rms(fr,next_rms - 5);
  457. modn_flags = 0x800 + (VowelCloseness(fr) << 8);
  458. }
  459. }
  460. else
  461. {
  462. // exit from vowel
  463. rms = rms*2;
  464. if((f2 != 0) || (flags != 0))
  465. {
  466. if(flags & 8)
  467. {
  468. fr = CopyFrame(seq[n_frames-1].frame,0);
  469. seq[n_frames-1].frame = fr;
  470. rms = RMS_GLOTTAL1;
  471. // degree of glottal-stop effect depends on closeness of vowel (indicated by f1 freq)
  472. modn_flags = 0x400 + (VowelCloseness(fr) << 8);
  473. }
  474. else
  475. {
  476. fr = DuplicateLastFrame(seq,n_frames++,len);
  477. if(len > 36)
  478. seq_len_adjust += (len - 36);
  479. if(f2 != 0)
  480. {
  481. AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
  482. }
  483. }
  484. set_frame_rms(fr,rms);
  485. if((vcolour > 0) && (vcolour <= N_VCOLOUR))
  486. {
  487. for(ix=0; ix<n_frames; ix++)
  488. {
  489. fr = CopyFrame(seq[ix].frame,0);
  490. seq[ix].frame = fr;
  491. for(formant=1; formant<=5; formant++)
  492. {
  493. int x;
  494. x = fr->ffreq[formant] * vcolouring[vcolour-1][formant-1];
  495. fr->ffreq[formant] = x / 256;
  496. }
  497. }
  498. }
  499. }
  500. }
  501. if(fr != NULL)
  502. {
  503. if(flags & 4)
  504. fr->frflags |= FRFLAG_FORMANT_RATE;
  505. if(flags & 2)
  506. fr->frflags |= FRFLAG_BREAK; // don't merge with next frame
  507. }
  508. if(flags & 0x40)
  509. DoPause(12,0); // add a short pause after the consonant
  510. if(flags & 16)
  511. return(len);
  512. return(0);
  513. } // end of FormantTransition2
  514. static void SmoothSpect(void)
  515. {//==========================
  516. // Limit the rate of frequence change of formants, to reduce chirping
  517. long *q;
  518. frame_t *frame;
  519. frame_t *frame2;
  520. frame_t *frame1;
  521. frame_t *frame_centre;
  522. int ix;
  523. int len;
  524. int pk;
  525. int modified;
  526. int allowed;
  527. int diff;
  528. if(syllable_start == syllable_end)
  529. return;
  530. if((syllable_centre < 0) || (syllable_centre == syllable_start))
  531. {
  532. syllable_start = syllable_end;
  533. return;
  534. }
  535. q = wcmdq[syllable_centre];
  536. frame_centre = (frame_t *)q[2];
  537. //if(frame_centre->frflags & FRFLAG_KLATT)
  538. // return; // TESTING
  539. // backwards
  540. ix = syllable_centre -1;
  541. frame = frame2 = frame_centre;
  542. for(;;)
  543. {
  544. if(ix < 0) ix = N_WCMDQ-1;
  545. q = wcmdq[ix];
  546. if(q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
  547. break;
  548. if(q[0] <= WCMD_SPECT2)
  549. {
  550. len = q[1] & 0xffff;
  551. frame1 = (frame_t *)q[3];
  552. if(frame1 == frame)
  553. {
  554. q[3] = (long)frame2;
  555. frame1 = frame2;
  556. }
  557. else
  558. break; // doesn't follow on from previous frame
  559. frame = frame2 = (frame_t *)q[2];
  560. modified = 0;
  561. if(frame->frflags & FRFLAG_BREAK)
  562. break;
  563. if(frame->frflags & FRFLAG_FORMANT_RATE)
  564. len = (len * 12)/10; // allow slightly greater rate of change for this frame (was 12/10)
  565. for(pk=0; pk<6; pk++)
  566. {
  567. int f1, f2;
  568. if((frame->frflags & FRFLAG_BREAK_LF) && (pk < 3))
  569. continue;
  570. f1 = frame1->ffreq[pk];
  571. f2 = frame->ffreq[pk];
  572. // backwards
  573. if((diff = f2 - f1) > 0)
  574. {
  575. allowed = f1*2 + f2;
  576. }
  577. else
  578. {
  579. allowed = f1 + f2*2;
  580. }
  581. // the allowed change is specified as percentage (%*10) of the frequency
  582. // take "frequency" as 1/3 from the lower freq
  583. allowed = (allowed * formant_rate[pk])/3000;
  584. allowed = (allowed * len)/256;
  585. if(diff > allowed)
  586. {
  587. if(modified == 0)
  588. {
  589. frame2 = CopyFrame(frame,0);
  590. modified = 1;
  591. }
  592. frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
  593. q[2] = (long)frame2;
  594. }
  595. else
  596. if(diff < -allowed)
  597. {
  598. if(modified == 0)
  599. {
  600. frame2 = CopyFrame(frame,0);
  601. modified = 1;
  602. }
  603. frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
  604. q[2] = (long)frame2;
  605. }
  606. }
  607. }
  608. if(ix == syllable_start)
  609. break;
  610. ix--;
  611. }
  612. // forwards
  613. ix = syllable_centre;
  614. frame = NULL;
  615. for(;;)
  616. {
  617. q = wcmdq[ix];
  618. if(q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
  619. break;
  620. if(q[0] <= WCMD_SPECT2)
  621. {
  622. len = q[1] & 0xffff;
  623. frame1 = (frame_t *)q[2];
  624. if(frame != NULL)
  625. {
  626. if(frame1 == frame)
  627. {
  628. q[2] = (long)frame2;
  629. frame1 = frame2;
  630. }
  631. else
  632. break; // doesn't follow on from previous frame
  633. }
  634. frame = frame2 = (frame_t *)q[3];
  635. modified = 0;
  636. if(frame1->frflags & FRFLAG_BREAK)
  637. break;
  638. if(frame1->frflags & FRFLAG_FORMANT_RATE)
  639. len = (len *6)/5; // allow slightly greater rate of change for this frame
  640. for(pk=0; pk<6; pk++)
  641. {
  642. int f1, f2;
  643. f1 = frame1->ffreq[pk];
  644. f2 = frame->ffreq[pk];
  645. // forwards
  646. if((diff = f2 - f1) > 0)
  647. {
  648. allowed = f1*2 + f2;
  649. }
  650. else
  651. {
  652. allowed = f1 + f2*2;
  653. }
  654. allowed = (allowed * formant_rate[pk])/3000;
  655. allowed = (allowed * len)/256;
  656. if(diff > allowed)
  657. {
  658. if(modified == 0)
  659. {
  660. frame2 = CopyFrame(frame,0);
  661. modified = 1;
  662. }
  663. frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
  664. q[3] = (long)frame2;
  665. }
  666. else
  667. if(diff < -allowed)
  668. {
  669. if(modified == 0)
  670. {
  671. frame2 = CopyFrame(frame,0);
  672. modified = 1;
  673. }
  674. frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
  675. q[3] = (long)frame2;
  676. }
  677. }
  678. }
  679. ix++;
  680. if(ix >= N_WCMDQ) ix = 0;
  681. if(ix == syllable_end)
  682. break;
  683. }
  684. syllable_start = syllable_end;
  685. } // end of SmoothSpect
  686. static void StartSyllable(void)
  687. {//============================
  688. // start of syllable, if not already started
  689. if(syllable_end == syllable_start)
  690. syllable_end = wcmdq_tail;
  691. }
  692. int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
  693. int which, PHONEME_LIST *plist, int modulation)
  694. {//===================================================================================
  695. // which 1 start of phoneme, 2 body and end
  696. // length_mod: 256 = 100%
  697. // modulation: -1 = don't write to wcmdq
  698. int n_frames;
  699. frameref_t *frames;
  700. int frameix;
  701. frame_t *frame1;
  702. frame_t *frame2;
  703. frame_t *fr;
  704. int ix;
  705. long *q;
  706. int len;
  707. int match_level;
  708. int frame_length;
  709. int frame1_length;
  710. int frame2_length;
  711. int length_factor;
  712. int length_mod;
  713. int total_len = 0;
  714. static int wave_flag = 0;
  715. int wcmd_spect = WCMD_SPECT;
  716. length_mod = plist->length;
  717. if(length_mod==0) length_mod=256;
  718. if(which==1)
  719. {
  720. // limit the shortening of sonorants before shortened (eg. unstressed vowels)
  721. if((this_ph->type==phLIQUID) || (prev_ph->type==phLIQUID) || (prev_ph->type==phNASAL))
  722. {
  723. if(length_mod < (len = translator->langopts.param[LOPT_SONORANT_MIN]))
  724. {
  725. length_mod = len;
  726. }
  727. }
  728. }
  729. modn_flags = 0;
  730. frames = LookupSpect(this_ph,prev_ph,next_ph,which,&match_level,&n_frames, plist);
  731. if(frames == NULL)
  732. return(0); // not found
  733. frame1 = frames[0].frame;
  734. frame1_length = frames[0].length;
  735. if(frame1->frflags & FRFLAG_KLATT)
  736. wcmd_spect = WCMD_KLATT;
  737. if(wavefile_ix == 0)
  738. {
  739. if(wave_flag)
  740. {
  741. // cancel any wavefile that was playing previously
  742. wcmd_spect = WCMD_SPECT2;
  743. if(frame1->frflags & FRFLAG_KLATT)
  744. wcmd_spect = WCMD_KLATT2;
  745. wave_flag = 0;
  746. }
  747. else
  748. {
  749. wcmd_spect = WCMD_SPECT;
  750. if(frame1->frflags & FRFLAG_KLATT)
  751. wcmd_spect = WCMD_KLATT;
  752. }
  753. }
  754. if(last_frame != NULL)
  755. {
  756. if(((last_frame->length < 2) || (last_frame->frflags & FRFLAG_VOWEL_CENTRE))
  757. && !(last_frame->frflags & FRFLAG_BREAK))
  758. {
  759. // last frame of previous sequence was zero-length, replace with first of this sequence
  760. wcmdq[last_wcmdq][3] = (long)frame1;
  761. if(last_frame->frflags & FRFLAG_BREAK_LF)
  762. {
  763. // but flag indicates keep HF peaks in last segment
  764. fr = CopyFrame(frame1,1);
  765. for(ix=3; ix<N_PEAKS; ix++)
  766. {
  767. fr->ffreq[ix] = last_frame->ffreq[ix];
  768. fr->fheight[ix] = last_frame->fheight[ix];
  769. }
  770. wcmdq[last_wcmdq][3] = (long)fr;
  771. }
  772. }
  773. }
  774. if((this_ph->type == phVOWEL) && (which == 2))
  775. {
  776. SmoothSpect(); // process previous syllable
  777. // remember the point in the output queue of the centre of the vowel
  778. syllable_centre = wcmdq_tail;
  779. }
  780. frame_length = frame1_length;
  781. for(frameix=1; frameix<n_frames; frameix++)
  782. {
  783. frame2 = frames[frameix].frame;
  784. frame2_length = frames[frameix].length;
  785. if((wavefile_ix != 0) && ((frame1->frflags & FRFLAG_DEFER_WAV)==0))
  786. {
  787. // there is a wave file to play along with this synthesis
  788. seq_len_adjust = 0;
  789. DoSample2(wavefile_ix,which+0x100,0,wavefile_amp);
  790. wave_flag = 1;
  791. wavefile_ix = 0;
  792. }
  793. length_factor = length_mod;
  794. if(frame1->frflags & FRFLAG_LEN_MOD) // reduce effect of length mod
  795. {
  796. length_factor = (length_mod*4 + 256*3)/7;
  797. }
  798. len = (frame_length * samplerate)/1000;
  799. len = (len * length_factor)/256;
  800. if(modulation >= 0)
  801. {
  802. if(frame1->frflags & FRFLAG_MODULATE)
  803. {
  804. modulation = 6;
  805. }
  806. if((frameix == n_frames-1) && (modn_flags & 0xf00))
  807. modulation |= modn_flags; // before or after a glottal stop
  808. }
  809. pitch_length += len;
  810. amp_length += len;
  811. if(frame_length < 2)
  812. {
  813. last_frame = NULL;
  814. frame_length = frame2_length;
  815. frame1 = frame2;
  816. }
  817. else
  818. {
  819. last_wcmdq = wcmdq_tail;
  820. if(modulation >= 0)
  821. {
  822. q = wcmdq[wcmdq_tail];
  823. q[0] = wcmd_spect;
  824. q[1] = len + (modulation << 16);
  825. q[2] = long(frame1);
  826. q[3] = long(frame2);
  827. WcmdqInc();
  828. }
  829. last_frame = frame1 = frame2;
  830. frame_length = frame2_length;
  831. total_len += len;
  832. }
  833. }
  834. return(total_len);
  835. } // end of Synthesize::DoSpect
  836. static void DoMarker(int type, int char_posn, int length, int value)
  837. {//=================================================================
  838. // This could be used to return an index to the word currently being spoken
  839. // Type 1=word, 2=sentence, 3=named marker, 4=play audio, 5=end
  840. wcmdq[wcmdq_tail][0] = WCMD_MARKER;
  841. wcmdq[wcmdq_tail][1] = type;
  842. wcmdq[wcmdq_tail][2] = (char_posn & 0xffffff) | (length << 24);
  843. wcmdq[wcmdq_tail][3] = value;
  844. WcmdqInc();
  845. } // end of Synthesize::DoMarker
  846. void DoVoiceChange(voice_t *v)
  847. {//===========================
  848. // allocate memory for a copy of the voice data, and free it in wavegenfill()
  849. voice_t *v2;
  850. v2 = (voice_t *)malloc(sizeof(voice_t));
  851. memcpy(v2,v,sizeof(voice_t));
  852. wcmdq[wcmdq_tail][0] = WCMD_VOICE;
  853. wcmdq[wcmdq_tail][1] = (long)(v2);
  854. WcmdqInc();
  855. }
  856. static void DoEmbedded(int &embix, int sourceix)
  857. {//=============================================
  858. // There were embedded commands in the text at this point
  859. unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command
  860. unsigned int value;
  861. int command;
  862. do {
  863. word = embedded_list[embix++];
  864. value = word >> 8;
  865. command = word & 0x7f;
  866. switch(command & 0x1f)
  867. {
  868. case EMBED_S: // speed
  869. SetEmbedded((command & 0x60) + EMBED_S2,value); // adjusts embedded_value[EMBED_S2]
  870. SetSpeed(2);
  871. break;
  872. case EMBED_I: // play dynamically loaded wav data (sound icon)
  873. if((int)value < n_soundicon_tab)
  874. {
  875. if(soundicon_tab[value].length != 0)
  876. {
  877. DoPause(10,0); // ensure a break in the speech
  878. wcmdq[wcmdq_tail][0] = WCMD_WAVE;
  879. wcmdq[wcmdq_tail][1] = soundicon_tab[value].length;
  880. wcmdq[wcmdq_tail][2] = (long)soundicon_tab[value].data + 44; // skip WAV header
  881. wcmdq[wcmdq_tail][3] = 0x1500; // 16 bit data, amp=21
  882. WcmdqInc();
  883. }
  884. }
  885. break;
  886. case EMBED_M: // named marker
  887. DoMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value);
  888. break;
  889. case EMBED_U: // play sound
  890. DoMarker(espeakEVENT_PLAY, count_characters+1, 0, value); // always occurs at end of clause
  891. break;
  892. default:
  893. DoPause(10,0); // ensure a break in the speech
  894. wcmdq[wcmdq_tail][0] = WCMD_EMBEDDED;
  895. wcmdq[wcmdq_tail][1] = command;
  896. wcmdq[wcmdq_tail][2] = value;
  897. WcmdqInc();
  898. break;
  899. }
  900. } while ((word & 0x80) == 0);
  901. }
  902. int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume)
  903. {//============================================================
  904. static int ix;
  905. static int embedded_ix;
  906. static int word_count;
  907. PHONEME_LIST *prev;
  908. PHONEME_LIST *next;
  909. PHONEME_LIST *next2;
  910. PHONEME_LIST *p;
  911. int released;
  912. int stress;
  913. int modulation;
  914. int pre_voiced;
  915. int free_min;
  916. unsigned char *pitch_env=NULL;
  917. unsigned char *amp_env;
  918. PHONEME_TAB *ph;
  919. PHONEME_TAB *prev_ph;
  920. static int sourceix=0;
  921. #ifdef TEST_MBROLA
  922. if(mbrola_name[0] != 0)
  923. return(MbrolaGenerate(phoneme_list,n_ph,resume));
  924. #endif
  925. if(option_quiet)
  926. return(0);
  927. if(resume == 0)
  928. {
  929. ix = 1;
  930. embedded_ix=0;
  931. word_count = 0;
  932. pitch_length = 0;
  933. amp_length = 0;
  934. last_frame = NULL;
  935. last_wcmdq = -1;
  936. syllable_start = wcmdq_tail;
  937. syllable_end = wcmdq_tail;
  938. syllable_centre = -1;
  939. last_pitch_cmd = -1;
  940. memset(vowel_transition,0,sizeof(vowel_transition));
  941. }
  942. while(ix < (*n_ph))
  943. {
  944. p = &phoneme_list[ix];
  945. if(p->type == phPAUSE)
  946. free_min = 5;
  947. else
  948. if(p->type != phVOWEL)
  949. free_min = 10; // we need less Q space for non-vowels, and we need to generate phonemes after a vowel so that the pitch_length is filled in
  950. else
  951. free_min = MIN_WCMDQ; // 22
  952. if(WcmdqFree() <= free_min)
  953. return(1); // wait
  954. prev = &phoneme_list[ix-1];
  955. next = &phoneme_list[ix+1];
  956. next2 = &phoneme_list[ix+2];
  957. if(p->synthflags & SFLAG_EMBEDDED)
  958. {
  959. DoEmbedded(embedded_ix, p->sourceix);
  960. }
  961. if(p->newword)
  962. {
  963. if(translator->langopts.param[LOPT_WORD_MERGE] == 0)
  964. last_frame = NULL;
  965. sourceix = (p->sourceix & 0x7ff) + clause_start_char;
  966. if(p->newword & 4)
  967. DoMarker(espeakEVENT_SENTENCE, sourceix, 0, count_sentences); // start of sentence
  968. // if(p->newword & 2)
  969. // DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause
  970. if(p->newword & 1)
  971. DoMarker(espeakEVENT_WORD, sourceix, p->sourceix >> 11, clause_start_word + word_count++);
  972. }
  973. EndAmplitude();
  974. if(p->prepause > 0)
  975. DoPause(p->prepause,1);
  976. if(option_phoneme_events && (p->type != phVOWEL))
  977. {
  978. // Note, for vowels, do the phoneme event after the vowel-start
  979. DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic);
  980. }
  981. switch(p->type)
  982. {
  983. case phPAUSE:
  984. DoPause(p->length,0);
  985. break;
  986. case phSTOP:
  987. released = 0;
  988. if(next->type==phVOWEL) released = 1;
  989. if(next->type==phLIQUID && !next->newword) released = 1;
  990. if(released)
  991. DoSample(p->ph,next->ph,2,0,0);
  992. else
  993. DoSample(p->ph,phoneme_tab[phonPAUSE],2,0,0);
  994. break;
  995. case phFRICATIVE:
  996. if(p->synthflags & SFLAG_LENGTHEN)
  997. DoSample(p->ph,next->ph,2,p->length,0); // play it twice for [s:] etc.
  998. DoSample(p->ph,next->ph,2,p->length,0);
  999. break;
  1000. case phVSTOP:
  1001. pre_voiced = 0;
  1002. if(next->type==phVOWEL)
  1003. {
  1004. DoAmplitude(p->amp,NULL);
  1005. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1006. pre_voiced = 1;
  1007. }
  1008. else
  1009. if((next->type==phLIQUID) && !next->newword)
  1010. {
  1011. DoAmplitude(next->amp,NULL);
  1012. DoPitch(envelope_data[next->env],next->pitch1,next->pitch2);
  1013. pre_voiced = 1;
  1014. }
  1015. else
  1016. {
  1017. if(last_pitch_cmd < 0)
  1018. {
  1019. DoAmplitude(next->amp,NULL);
  1020. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1021. }
  1022. }
  1023. if((prev->type==phVOWEL) || (prev->ph->phflags & phVOWEL2))
  1024. {
  1025. // a period of voicing before the release
  1026. DoSpect(p->ph,phoneme_tab[phonSCHWA],next->ph,1,p,0);
  1027. if(p->synthflags & SFLAG_LENGTHEN)
  1028. {
  1029. DoPause(20,0);
  1030. DoSpect(p->ph,phoneme_tab[phonSCHWA],next->ph,1,p,0);
  1031. }
  1032. }
  1033. else
  1034. {
  1035. if(p->synthflags & SFLAG_LENGTHEN)
  1036. {
  1037. DoPause(50,0);
  1038. }
  1039. }
  1040. if(pre_voiced)
  1041. {
  1042. // followed by a vowel, or liquid + vowel
  1043. StartSyllable();
  1044. DoSpect(p->ph,prev->ph,next->ph,2,p,0);
  1045. }
  1046. else
  1047. {
  1048. // if((prev->type != phVOWEL) && ((prev->ph->phflags & phVOICED)==0) && ((next->ph->phflags & phVOICED)==0))
  1049. // DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE_SHORT],2,p,0);
  1050. // else
  1051. DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0);
  1052. // DoSpect(p->ph,prev->ph,next->ph,2,p,0);
  1053. }
  1054. break;
  1055. case phVFRICATIVE:
  1056. if(next->type==phVOWEL)
  1057. {
  1058. DoAmplitude(p->amp,NULL);
  1059. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1060. }
  1061. else
  1062. if(next->type==phLIQUID)
  1063. {
  1064. DoAmplitude(next->amp,NULL);
  1065. DoPitch(envelope_data[next->env],next->pitch1,next->pitch2);
  1066. }
  1067. else
  1068. {
  1069. if(last_pitch_cmd < 0)
  1070. {
  1071. DoAmplitude(p->amp,NULL);
  1072. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1073. }
  1074. }
  1075. if((next->type==phVOWEL) || ((next->type==phLIQUID)) && (next->newword==0)) // ?? test 14.Aug.2007
  1076. {
  1077. StartSyllable();
  1078. if(p->synthflags & SFLAG_LENGTHEN)
  1079. DoSpect(p->ph,prev->ph,next->ph,2,p,0);
  1080. DoSpect(p->ph,prev->ph,next->ph,2,p,0);
  1081. }
  1082. else
  1083. {
  1084. if(p->synthflags & SFLAG_LENGTHEN)
  1085. DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0);
  1086. DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0);
  1087. }
  1088. break;
  1089. case phNASAL:
  1090. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1091. {
  1092. DoAmplitude(p->amp,NULL);
  1093. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1094. }
  1095. if(prev->type==phNASAL)
  1096. {
  1097. last_frame = NULL;
  1098. }
  1099. if(next->type==phVOWEL)
  1100. {
  1101. StartSyllable();
  1102. DoSpect(p->ph,prev->ph,next->ph,1,p,0);
  1103. }
  1104. else
  1105. if(prev->type==phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE))
  1106. {
  1107. DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0);
  1108. }
  1109. else
  1110. {
  1111. last_frame = NULL; // only for nasal ?
  1112. if(next->type == phLIQUID)
  1113. DoSpect(p->ph,prev->ph,phoneme_tab[phonSONORANT],2,p,0);
  1114. else
  1115. DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,0);
  1116. last_frame = NULL;
  1117. }
  1118. break;
  1119. case phLIQUID:
  1120. modulation = 0;
  1121. if(p->ph->phflags & phTRILL)
  1122. modulation = 5;
  1123. prev_ph = prev->ph;
  1124. // if(p->newword)
  1125. // prev_ph = phoneme_tab[phonPAUSE]; // pronounce fully at the start of a word
  1126. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1127. {
  1128. DoAmplitude(p->amp,NULL);
  1129. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1130. }
  1131. if(prev->type==phNASAL)
  1132. {
  1133. last_frame = NULL;
  1134. }
  1135. if(next->type==phVOWEL)
  1136. {
  1137. StartSyllable();
  1138. DoSpect(p->ph,prev_ph,next->ph,1,p,modulation); // (,)r
  1139. }
  1140. else
  1141. if(prev->type==phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE))
  1142. {
  1143. DoSpect(p->ph,prev_ph,next->ph,1,p,modulation);
  1144. }
  1145. else
  1146. {
  1147. DoSpect(p->ph,prev_ph,next->ph,1,p,modulation);
  1148. }
  1149. break;
  1150. case phVOWEL:
  1151. ph = p->ph;
  1152. stress = p->tone & 0xf;
  1153. // vowel transition from the preceding phoneme
  1154. vowel_transition0 = vowel_transition[0];
  1155. vowel_transition1 = vowel_transition[1];
  1156. pitch_env = envelope_data[p->env];
  1157. amp_env = NULL;
  1158. if(p->tone_ph != 0)
  1159. {
  1160. pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect);
  1161. amp_env = LookupEnvelope(phoneme_tab[p->tone_ph]->after);
  1162. }
  1163. StartSyllable();
  1164. modulation = 2;
  1165. if(stress <= 1)
  1166. modulation = 1; // 16ths
  1167. else
  1168. if(stress >= 7)
  1169. modulation = 3;
  1170. if(prev->type == phVSTOP || prev->type == phVFRICATIVE)
  1171. {
  1172. DoAmplitude(p->amp,amp_env);
  1173. DoPitch(pitch_env,p->pitch1,p->pitch2); // don't use prevocalic rising tone
  1174. DoSpect(ph,prev->ph,next->ph,1,p,modulation);
  1175. }
  1176. else
  1177. if(prev->type==phLIQUID || prev->type==phNASAL)
  1178. {
  1179. DoAmplitude(p->amp,amp_env);
  1180. DoSpect(ph,prev->ph,next->ph,1,p,modulation); // continue with pre-vocalic rising tone
  1181. DoPitch(pitch_env,p->pitch1,p->pitch2);
  1182. }
  1183. else
  1184. {
  1185. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1186. {
  1187. DoAmplitude(p->amp,amp_env);
  1188. DoPitch(pitch_env,p->pitch1,p->pitch2);
  1189. }
  1190. DoSpect(ph,prev->ph,next->ph,1,p,modulation);
  1191. }
  1192. if(option_phoneme_events)
  1193. {
  1194. DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic);
  1195. }
  1196. DoSpect(p->ph,prev->ph,next->ph,2,p,modulation);
  1197. memset(vowel_transition,0,sizeof(vowel_transition));
  1198. break;
  1199. }
  1200. ix++;
  1201. }
  1202. EndPitch(1);
  1203. if(*n_ph > 0)
  1204. {
  1205. DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause
  1206. *n_ph = 0;
  1207. }
  1208. return(0); // finished the phoneme list
  1209. } // end of Generate
  1210. static int timer_on = 0;
  1211. static int paused = 0;
  1212. int SynthOnTimer()
  1213. {//===============
  1214. if(!timer_on)
  1215. {
  1216. return(WavegenCloseSound());
  1217. }
  1218. do {
  1219. if(Generate(phoneme_list,&n_phoneme_list,1)==0)
  1220. {
  1221. SpeakNextClause(NULL,NULL,1);
  1222. }
  1223. } while(skipping_text);
  1224. return(0);
  1225. }
  1226. int SynthStatus()
  1227. {//==============
  1228. return(timer_on | paused);
  1229. }
  1230. int SpeakNextClause(FILE *f_in, const void *text_in, int control)
  1231. {//==============================================================
  1232. // Speak text from file (f_in) or memory (text_in)
  1233. // control 0: start
  1234. // either f_in or text_in is set, the other must be NULL
  1235. // The other calls have f_in and text_in = NULL
  1236. // control 1: speak next text
  1237. // 2: stop
  1238. // 3: pause (toggle)
  1239. // 4: is file being read (0=no, 1=yes)
  1240. // 5: interrupt and flush current text.
  1241. int clause_tone;
  1242. char *voice_change;
  1243. FILE *f_mbrola;
  1244. static FILE *f_text=NULL;
  1245. static const void *p_text=NULL;
  1246. if(control == 4)
  1247. {
  1248. if((f_text == NULL) && (p_text == NULL))
  1249. return(0);
  1250. else
  1251. return(1);
  1252. }
  1253. if(control == 2)
  1254. {
  1255. // stop speaking
  1256. timer_on = 0;
  1257. p_text = NULL;
  1258. if(f_text != NULL)
  1259. {
  1260. fclose(f_text);
  1261. f_text=NULL;
  1262. }
  1263. n_phoneme_list = 0;
  1264. WcmdqStop();
  1265. embedded_value[EMBED_T] = 0;
  1266. return(0);
  1267. }
  1268. if(control == 3)
  1269. {
  1270. // toggle pause
  1271. if(paused == 0)
  1272. {
  1273. timer_on = 0;
  1274. paused = 2;
  1275. }
  1276. else
  1277. {
  1278. WavegenOpenSound();
  1279. timer_on = 1;
  1280. paused = 0;
  1281. Generate(phoneme_list,&n_phoneme_list,0); // re-start from beginning of clause
  1282. }
  1283. return(0);
  1284. }
  1285. if(control == 5)
  1286. {
  1287. // stop speaking, but continue looking for text
  1288. n_phoneme_list = 0;
  1289. WcmdqStop();
  1290. return(0);
  1291. }
  1292. if((f_in != NULL) || (text_in != NULL))
  1293. {
  1294. f_text = f_in;
  1295. p_text = text_in;
  1296. timer_on = 1;
  1297. paused = 0;
  1298. }
  1299. if((f_text==NULL) && (p_text==NULL))
  1300. {
  1301. skipping_text = 0;
  1302. timer_on = 0;
  1303. return(0);
  1304. }
  1305. if((f_text != NULL) && feof(f_text))
  1306. {
  1307. timer_on = 0;
  1308. fclose(f_text);
  1309. f_text=NULL;
  1310. return(0);
  1311. }
  1312. if(current_phoneme_table != voice->phoneme_tab_ix)
  1313. {
  1314. SelectPhonemeTable(voice->phoneme_tab_ix);
  1315. }
  1316. // read the next clause from the input text file, translate it, and generate
  1317. // entries in the wavegen command queue
  1318. p_text = TranslateClause(translator, f_text, p_text, &clause_tone, &voice_change);
  1319. CalcPitches(translator, clause_tone);
  1320. CalcLengths(translator);
  1321. GetTranslatedPhonemeString(translator->phon_out,sizeof(translator->phon_out));
  1322. if(option_phonemes > 0)
  1323. {
  1324. fprintf(f_trans,"%s\n",translator->phon_out);
  1325. if(!iswalpha(0x010d))
  1326. {
  1327. // check that c-caron is recognized as an alphabetic character
  1328. fprintf(stderr,"Warning: Accented letters are not recognized, eg: U+010D\nSet LC_CTYPE to a UTF-8 locale\n");
  1329. }
  1330. }
  1331. if(phoneme_callback != NULL)
  1332. {
  1333. phoneme_callback(translator->phon_out);
  1334. }
  1335. if(skipping_text)
  1336. {
  1337. n_phoneme_list = 0;
  1338. return(1);
  1339. }
  1340. if(mbrola_name[0] != 0)
  1341. {
  1342. #ifdef USE_MBROLA_LIB
  1343. MbrolaTranslate(phoneme_list,n_phoneme_list,NULL);
  1344. #else
  1345. if((f_mbrola = f_trans) == stderr)
  1346. f_mbrola = stdout;
  1347. MbrolaTranslate(phoneme_list,n_phoneme_list,f_mbrola);
  1348. #endif
  1349. }
  1350. Generate(phoneme_list,&n_phoneme_list,0);
  1351. WavegenOpenSound();
  1352. if(voice_change != NULL)
  1353. {
  1354. // voice change at the end of the clause (i.e. clause was terminated by a voice change)
  1355. new_voice = LoadVoiceVariant(voice_change,0); // add a Voice instruction to wavegen at the end of the clause
  1356. }
  1357. if(new_voice)
  1358. {
  1359. // finished the current clause, now change the voice if there was an embedded
  1360. // change voice command at the end of it (i.e. clause was broken at the change voice command)
  1361. DoVoiceChange(voice);
  1362. new_voice = NULL;
  1363. }
  1364. return(1);
  1365. } // end of SpeakNextClause