eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

synthesize.cpp 41KB


  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2010 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <wctype.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <math.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "translate.h"
  32. extern FILE *f_log;
  33. static void SmoothSpect(void);
  34. // list of phonemes in a clause
  35. int n_phoneme_list=0;
  36. PHONEME_LIST phoneme_list[N_PHONEME_LIST];
  37. int mbrola_delay;
  38. char mbrola_name[20];
  39. SPEED_FACTORS speed;
  40. static int last_pitch_cmd;
  41. static int last_amp_cmd;
  42. static frame_t *last_frame;
  43. static int last_wcmdq;
  44. static int pitch_length;
  45. static int amp_length;
  46. static int modn_flags;
  47. static int syllable_start;
  48. static int syllable_end;
  49. static int syllable_centre;
  50. static voice_t *new_voice=NULL;
  51. int n_soundicon_tab=N_SOUNDICON_SLOTS;
  52. SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];
  53. #define RMS_GLOTTAL1 35 // vowel before glottal stop
  54. #define RMS_START 28 // 28
  55. #define VOWEL_FRONT_LENGTH 50
  56. // a dummy phoneme_list entry which looks like a pause
  57. static PHONEME_LIST next_pause;
  58. const char *WordToString(unsigned int word)
  59. {//========================================
  60. // Convert a phoneme mnemonic word into a string
  61. int ix;
  62. static char buf[5];
  63. for(ix=0; ix<3; ix++)
  64. buf[ix] = word >> (ix*8);
  65. buf[4] = 0;
  66. return(buf);
  67. }
  68. void SynthesizeInit()
  69. {//==================
  70. last_pitch_cmd = 0;
  71. last_amp_cmd = 0;
  72. last_frame = NULL;
  73. syllable_centre = -1;
  74. // initialise next_pause, a dummy phoneme_list entry
  75. // next_pause.ph = phoneme_tab[phonPAUSE]; // this must be done after voice selection
  76. next_pause.type = phPAUSE;
  77. next_pause.newword = 0;
  78. }
  79. static void EndAmplitude(void)
  80. {//===========================
  81. if(amp_length > 0)
  82. {
  83. if(wcmdq[last_amp_cmd][1] == 0)
  84. wcmdq[last_amp_cmd][1] = amp_length;
  85. amp_length = 0;
  86. }
  87. }
  88. static void EndPitch(int voice_break)
  89. {//==================================
  90. // posssible end of pitch envelope, fill in the length
  91. if((pitch_length > 0) && (last_pitch_cmd >= 0))
  92. {
  93. if(wcmdq[last_pitch_cmd][1] == 0)
  94. wcmdq[last_pitch_cmd][1] = pitch_length;
  95. pitch_length = 0;
  96. }
  97. if(voice_break)
  98. {
  99. last_wcmdq = -1;
  100. last_frame = NULL;
  101. syllable_end = wcmdq_tail;
  102. SmoothSpect();
  103. syllable_centre = -1;
  104. memset(vowel_transition,0,sizeof(vowel_transition));
  105. }
  106. } // end of Synthesize::EndPitch
  107. static void DoAmplitude(int amp, unsigned char *amp_env)
  108. {//=====================================================
  109. long *q;
  110. last_amp_cmd = wcmdq_tail;
  111. amp_length = 0; // total length of vowel with this amplitude envelope
  112. q = wcmdq[wcmdq_tail];
  113. q[0] = WCMD_AMPLITUDE;
  114. q[1] = 0; // fill in later from amp_length
  115. q[2] = (long)amp_env;
  116. q[3] = amp;
  117. WcmdqInc();
  118. } // end of Synthesize::DoAmplitude
  119. static void DoPitch(unsigned char *env, int pitch1, int pitch2)
  120. {//============================================================
  121. long *q;
  122. EndPitch(0);
  123. if(pitch1 == 1024)
  124. {
  125. // pitch was not set
  126. pitch1 = 24;
  127. pitch2 = 33;
  128. env = envelope_data[PITCHfall];
  129. }
  130. last_pitch_cmd = wcmdq_tail;
  131. pitch_length = 0; // total length of spect with this pitch envelope
  132. if(pitch2 < 0)
  133. pitch2 = 0;
  134. q = wcmdq[wcmdq_tail];
  135. q[0] = WCMD_PITCH;
  136. q[1] = 0; // length, fill in later from pitch_length
  137. q[2] = (long)env;
  138. q[3] = (pitch1 << 16) + pitch2;
  139. WcmdqInc();
  140. } // end of Synthesize::DoPitch
  141. int PauseLength(int pause, int control)
  142. {//====================================
  143. int len;
  144. if(control == 0)
  145. len = (pause * speed.speed_factor1)/256;
  146. else
  147. len = (pause * speed.speed_factor2)/256;
  148. if(len < 5) len = 5; // mS, limit the amount to which pauses can be shortened
  149. return(len);
  150. }
  151. static void DoPause(int length, int control)
  152. {//=========================================
  153. // control = 1, less shortening at fast speeds
  154. int len;
  155. len = PauseLength(length, control);
  156. len = (len * samplerate) / 1000; // convert from mS to number of samples
  157. EndPitch(1);
  158. wcmdq[wcmdq_tail][0] = WCMD_PAUSE;
  159. wcmdq[wcmdq_tail][1] = len;
  160. WcmdqInc();
  161. last_frame = NULL;
  162. } // end of Synthesize::DoPause
  163. extern int seq_len_adjust; // temporary fix to advance the start point for playing the wav sample
  164. static int DoSample2(int index, int which, int std_length, int control, int length_mod, int amp)
  165. {//=============================================================================================
  166. int length;
  167. int wav_length;
  168. int format;
  169. int min_length;
  170. int x;
  171. int len4;
  172. long *q;
  173. unsigned char *p;
  174. index = index & 0x7fffff;
  175. p = &wavefile_data[index];
  176. format = p[2];
  177. wav_length = (p[1] * 256);
  178. wav_length += p[0]; // length in bytes
  179. min_length = speed.min_sample_len;
  180. if(format==0)
  181. min_length *= 2;
  182. if(std_length > 0)
  183. {
  184. std_length = (std_length * samplerate)/1000;
  185. if(format == 0)
  186. std_length *= 2;
  187. x = (min_length * std_length)/wav_length;
  188. if(x > min_length)
  189. min_length = x;
  190. }
  191. else
  192. {
  193. // no length specified, use the length of the stored sound
  194. std_length = wav_length;
  195. }
  196. if(length_mod > 0)
  197. {
  198. std_length = (std_length * length_mod)/256;
  199. }
  200. length = (std_length * speed.speed_factor2)/256;
  201. if(control & pd_DONTLENGTHEN)
  202. {
  203. // this option is used for Stops, with short noise bursts.
  204. // Don't change their length much.
  205. if(length > std_length)
  206. {
  207. // don't let length exceed std_length
  208. length = std_length;
  209. }
  210. else
  211. {
  212. // reduce the reduction in length
  213. // length = (length + std_length)/2;
  214. }
  215. }
  216. if(length < min_length)
  217. length = min_length;
  218. if(format == 0)
  219. {
  220. // 16 bit samples
  221. length /= 2;
  222. wav_length /= 2;
  223. }
  224. if(amp < 0)
  225. return(length);
  226. len4 = wav_length / 4;
  227. index += 4;
  228. if(which & 0x100)
  229. {
  230. // mix this with synthesised wave
  231. last_wcmdq = wcmdq_tail;
  232. q = wcmdq[wcmdq_tail];
  233. q[0] = WCMD_WAVE2;
  234. q[1] = length | (wav_length << 16); // length in samples
  235. q[2] = long(&wavefile_data[index]);
  236. q[3] = format + (amp << 8);
  237. WcmdqInc();
  238. return(length);
  239. }
  240. if(length > wav_length)
  241. {
  242. x = len4*3;
  243. length -= x;
  244. }
  245. else
  246. {
  247. x = length;
  248. length = 0;
  249. }
  250. last_wcmdq = wcmdq_tail;
  251. q = wcmdq[wcmdq_tail];
  252. q[0] = WCMD_WAVE;
  253. q[1] = x; // length in samples
  254. q[2] = long(&wavefile_data[index]);
  255. q[3] = format + (amp << 8);
  256. WcmdqInc();
  257. while(length > len4*3)
  258. {
  259. x = len4;
  260. if(format == 0)
  261. x *= 2;
  262. last_wcmdq = wcmdq_tail;
  263. q = wcmdq[wcmdq_tail];
  264. q[0] = WCMD_WAVE;
  265. q[1] = len4*2; // length in samples
  266. q[2] = long(&wavefile_data[index+x]);
  267. q[3] = format + (amp << 8);
  268. WcmdqInc();
  269. length -= len4*2;
  270. }
  271. if(length > 0)
  272. {
  273. x = wav_length - length;
  274. if(format == 0)
  275. x *= 2;
  276. last_wcmdq = wcmdq_tail;
  277. q = wcmdq[wcmdq_tail];
  278. q[0] = WCMD_WAVE;
  279. q[1] = length; // length in samples
  280. q[2] = long(&wavefile_data[index+x]);
  281. q[3] = format + (amp << 8);
  282. WcmdqInc();
  283. }
  284. return(length);
  285. } // end of DoSample2
  286. #ifdef deleted
  287. int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int amp)
  288. {//====================== ==========================================================
  289. int index;
  290. int match_level;
  291. int amp2;
  292. int result;
  293. EndPitch(1);
  294. index = LookupSound(ph1,ph2,which & 0xff,&match_level,0);
  295. if((index & 0x800000) == 0)
  296. return(0); // not wavefile data
  297. amp2 = wavefile_amp;
  298. if(amp != 0)
  299. amp2 = (amp * wavefile_amp)/20;
  300. if(amp == -1)
  301. amp2 = amp;
  302. result = DoSample2(index,which,length_mod,0,amp2);
  303. last_frame = NULL;
  304. return(result);
  305. } // end of DoSample
  306. #endif
  307. int DoSample3(PHONEME_DATA *phdata, int length_mod, int amp)
  308. {//=========================================================
  309. int amp2;
  310. int len;
  311. EndPitch(1);
  312. if(amp == -1)
  313. {
  314. // just get the length, don't produce sound
  315. amp2 = amp;
  316. }
  317. else
  318. {
  319. amp2 = phdata->sound_param[pd_WAV];
  320. if(amp2 == 0)
  321. amp2 = 100;
  322. amp2 = (amp2 * 32)/100;
  323. }
  324. seq_len_adjust=0;
  325. len = DoSample2(phdata->sound_addr[pd_WAV], 2, phdata->pd_param[pd_LENGTHMOD]*2, phdata->pd_control, length_mod, amp2);
  326. last_frame = NULL;
  327. return(len);
  328. } // end of DoSample3
  329. static frame_t *AllocFrame()
  330. {//=========================
  331. // Allocate a temporary spectrum frame for the wavegen queue. Use a pool which is big
  332. // enough to use a round-robin without checks.
  333. // Only needed for modifying spectra for blending to consonants
  334. #define N_FRAME_POOL N_WCMDQ
  335. static int ix=0;
  336. static frame_t frame_pool[N_FRAME_POOL];
  337. ix++;
  338. if(ix >= N_FRAME_POOL)
  339. ix = 0;
  340. return(&frame_pool[ix]);
  341. }
  342. static void set_frame_rms(frame_t *fr, int new_rms)
  343. {//=================================================
  344. // Each frame includes its RMS amplitude value, so to set a new
  345. // RMS just adjust the formant amplitudes by the appropriate ratio
  346. int x;
  347. int h;
  348. int ix;
  349. static const short sqrt_tab[200] = {
  350. 0, 64, 90,110,128,143,156,169,181,192,202,212,221,230,239,247,
  351. 256,263,271,278,286,293,300,306,313,320,326,332,338,344,350,356,
  352. 362,367,373,378,384,389,394,399,404,409,414,419,424,429,434,438,
  353. 443,448,452,457,461,465,470,474,478,483,487,491,495,499,503,507,
  354. 512,515,519,523,527,531,535,539,543,546,550,554,557,561,565,568,
  355. 572,576,579,583,586,590,593,596,600,603,607,610,613,617,620,623,
  356. 627,630,633,636,640,643,646,649,652,655,658,662,665,668,671,674,
  357. 677,680,683,686,689,692,695,698,701,704,706,709,712,715,718,721,
  358. 724,726,729,732,735,738,740,743,746,749,751,754,757,759,762,765,
  359. 768,770,773,775,778,781,783,786,789,791,794,796,799,801,804,807,
  360. 809,812,814,817,819,822,824,827,829,832,834,836,839,841,844,846,
  361. 849,851,853,856,858,861,863,865,868,870,872,875,877,879,882,884,
  362. 886,889,891,893,896,898,900,902};
  363. if(voice->klattv[0])
  364. {
  365. if(new_rms == -1)
  366. {
  367. fr->klattp[KLATT_AV] = 50;
  368. }
  369. return;
  370. }
  371. if(fr->rms == 0) return; // check for divide by zero
  372. x = (new_rms * 64)/fr->rms;
  373. if(x >= 200) x = 199;
  374. x = sqrt_tab[x]; // sqrt(new_rms/fr->rms)*0x200;
  375. for(ix=0; ix < 8; ix++)
  376. {
  377. h = fr->fheight[ix] * x;
  378. fr->fheight[ix] = h/0x200;
  379. }
  380. } /* end of set_frame_rms */
  381. static void formants_reduce_hf(frame_t *fr, int level)
  382. {//====================================================
  383. // change height of peaks 2 to 8, percentage
  384. int ix;
  385. int x;
  386. if(voice->klattv[0])
  387. return;
  388. for(ix=2; ix < 8; ix++)
  389. {
  390. x = fr->fheight[ix] * level;
  391. fr->fheight[ix] = x/100;
  392. }
  393. }
  394. static frame_t *CopyFrame(frame_t *frame1, int copy)
  395. {//=================================================
  396. // create a copy of the specified frame in temporary buffer
  397. frame_t *frame2;
  398. if((copy==0) && (frame1->frflags & FRFLAG_COPIED))
  399. {
  400. // this frame has already been copied in temporary rw memory
  401. return(frame1);
  402. }
  403. frame2 = AllocFrame();
  404. if(frame2 != NULL)
  405. {
  406. memcpy(frame2,frame1,sizeof(frame_t));
  407. frame2->length = 0;
  408. frame2->frflags |= FRFLAG_COPIED;
  409. }
  410. return(frame2);
  411. }
  412. static frame_t *DuplicateLastFrame(frameref_t *seq, int n_frames, int length)
  413. {//==========================================================================
  414. frame_t *fr;
  415. seq[n_frames-1].length = length;
  416. fr = CopyFrame(seq[n_frames-1].frame,1);
  417. seq[n_frames].frame = fr;
  418. seq[n_frames].length = 0;
  419. return fr;
  420. }
  421. static void AdjustFormants(frame_t *fr, int target, int min, int max, int f1_adj, int f3_adj, int hf_reduce, int flags)
  422. {//====================================================================================================================
  423. int x;
  424. //hf_reduce = 70; // ?? using fixed amount rather than the parameter??
  425. target = (target * voice->formant_factor)/256;
  426. x = (target - fr->ffreq[2]) / 2;
  427. if(x > max) x = max;
  428. if(x < min) x = min;
  429. fr->ffreq[2] += x;
  430. fr->ffreq[3] += f3_adj;
  431. if(flags & 0x20)
  432. {
  433. f3_adj = -f3_adj; //. reverse direction for f4,f5 change
  434. }
  435. fr->ffreq[4] += f3_adj;
  436. fr->ffreq[5] += f3_adj;
  437. if(f1_adj==1)
  438. {
  439. x = (235 - fr->ffreq[1]);
  440. if(x < -100) x = -100;
  441. if(x > -60) x = -60;
  442. fr->ffreq[1] += x;
  443. }
  444. if(f1_adj==2)
  445. {
  446. x = (235 - fr->ffreq[1]);
  447. if(x < -300) x = -300;
  448. if(x > -150) x = -150;
  449. fr->ffreq[1] += x;
  450. fr->ffreq[0] += x;
  451. }
  452. if(f1_adj==3)
  453. {
  454. x = (100 - fr->ffreq[1]);
  455. if(x < -400) x = -400;
  456. if(x > -300) x = -400;
  457. fr->ffreq[1] += x;
  458. fr->ffreq[0] += x;
  459. }
  460. formants_reduce_hf(fr,hf_reduce);
  461. }
  462. static int VowelCloseness(frame_t *fr)
  463. {//===================================
  464. // return a value 0-3 depending on the vowel's f1
  465. int f1;
  466. if((f1 = fr->ffreq[1]) < 300)
  467. return(3);
  468. if(f1 < 400)
  469. return(2);
  470. if(f1 < 500)
  471. return(1);
  472. return(0);
  473. }
  474. int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which)
  475. {//==============================================================================================================================
  476. int ix;
  477. int formant;
  478. int next_rms;
  479. int len;
  480. int rms;
  481. int f1;
  482. int f2;
  483. int f2_min;
  484. int f2_max;
  485. int f3_adj;
  486. int f3_amp;
  487. int flags;
  488. int vcolour;
  489. #define N_VCOLOUR 2
  490. // percentage change for each formant in 256ths
  491. static short vcolouring[N_VCOLOUR][5] = {
  492. {243,272,256,256,256}, // palatal consonant follows
  493. {256,256,240,240,240}, // retroflex
  494. };
  495. frame_t *fr = NULL;
  496. if(n_frames < 2)
  497. return(0);
  498. len = (data1 & 0x3f) * 2;
  499. rms = (data1 >> 6) & 0x3f;
  500. flags = (data1 >> 12);
  501. f2 = (data2 & 0x3f) * 50;
  502. f2_min = (((data2 >> 6) & 0x1f) - 15) * 50;
  503. f2_max = (((data2 >> 11) & 0x1f) - 15) * 50;
  504. f3_adj = (((data2 >> 16) & 0x1f) - 15) * 50;
  505. f3_amp = ((data2 >> 21) & 0x1f) * 8;
  506. f1 = ((data2 >> 26) & 0x7);
  507. vcolour = (data2 >> 29);
  508. // fprintf(stderr,"FMT%d %3s %3d-%3d f1=%d f2=%4d %4d %4d f3=%4d %3d\n",
  509. // which,WordToString(other_ph->mnemonic),len,rms,f1,f2,f2_min,f2_max,f3_adj,f3_amp);
  510. if((other_ph != NULL) && (other_ph->mnemonic == '?'))
  511. flags |= 8;
  512. if(which == 1)
  513. {
  514. /* entry to vowel */
  515. fr = CopyFrame(seq[0].frame,0);
  516. seq[0].frame = fr;
  517. seq[0].length = VOWEL_FRONT_LENGTH;
  518. if(len > 0)
  519. seq[0].length = len;
  520. seq[0].frflags |= FRFLAG_LEN_MOD; // reduce length modification
  521. fr->frflags |= FRFLAG_LEN_MOD;
  522. next_rms = seq[1].frame->rms;
  523. if(voice->klattv[0])
  524. {
  525. // fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel
  526. fr->klattp[KLATT_AV] = seq[1].frame->klattp[KLATT_AV] - 4;
  527. }
  528. if(f2 != 0)
  529. {
  530. if(rms & 0x20)
  531. {
  532. set_frame_rms(fr,(next_rms * (rms & 0x1f))/30);
  533. }
  534. AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
  535. if((rms & 0x20) == 0)
  536. {
  537. set_frame_rms(fr,rms*2);
  538. }
  539. }
  540. else
  541. {
  542. if(flags & 8)
  543. set_frame_rms(fr,(next_rms*24)/32);
  544. else
  545. set_frame_rms(fr,RMS_START);
  546. }
  547. if(flags & 8)
  548. {
  549. // set_frame_rms(fr,next_rms - 5);
  550. modn_flags = 0x800 + (VowelCloseness(fr) << 8);
  551. }
  552. }
  553. else
  554. {
  555. // exit from vowel
  556. rms = rms*2;
  557. if((f2 != 0) || (flags != 0))
  558. {
  559. if(flags & 8)
  560. {
  561. fr = CopyFrame(seq[n_frames-1].frame,0);
  562. seq[n_frames-1].frame = fr;
  563. rms = RMS_GLOTTAL1;
  564. // degree of glottal-stop effect depends on closeness of vowel (indicated by f1 freq)
  565. modn_flags = 0x400 + (VowelCloseness(fr) << 8);
  566. }
  567. else
  568. {
  569. fr = DuplicateLastFrame(seq,n_frames++,len);
  570. if(len > 36)
  571. seq_len_adjust += (len - 36);
  572. if(f2 != 0)
  573. {
  574. AdjustFormants(fr, f2, f2_min, f2_max, f1, f3_adj, f3_amp, flags);
  575. }
  576. }
  577. set_frame_rms(fr,rms);
  578. if((vcolour > 0) && (vcolour <= N_VCOLOUR))
  579. {
  580. for(ix=0; ix<n_frames; ix++)
  581. {
  582. fr = CopyFrame(seq[ix].frame,0);
  583. seq[ix].frame = fr;
  584. for(formant=1; formant<=5; formant++)
  585. {
  586. int x;
  587. x = fr->ffreq[formant] * vcolouring[vcolour-1][formant-1];
  588. fr->ffreq[formant] = x / 256;
  589. }
  590. }
  591. }
  592. }
  593. }
  594. if(fr != NULL)
  595. {
  596. if(flags & 4)
  597. fr->frflags |= FRFLAG_FORMANT_RATE;
  598. if(flags & 2)
  599. fr->frflags |= FRFLAG_BREAK; // don't merge with next frame
  600. }
  601. if(flags & 0x40)
  602. DoPause(12,0); // add a short pause after the consonant
  603. if(flags & 16)
  604. return(len);
  605. return(0);
  606. } // end of FormantTransition2
  607. static void SmoothSpect(void)
  608. {//==========================
  609. // Limit the rate of frequence change of formants, to reduce chirping
  610. long *q;
  611. frame_t *frame;
  612. frame_t *frame2;
  613. frame_t *frame1;
  614. frame_t *frame_centre;
  615. int ix;
  616. int len;
  617. int pk;
  618. int modified;
  619. int allowed;
  620. int diff;
  621. if(syllable_start == syllable_end)
  622. return;
  623. if((syllable_centre < 0) || (syllable_centre == syllable_start))
  624. {
  625. syllable_start = syllable_end;
  626. return;
  627. }
  628. q = wcmdq[syllable_centre];
  629. frame_centre = (frame_t *)q[2];
  630. // backwards
  631. ix = syllable_centre -1;
  632. frame = frame2 = frame_centre;
  633. for(;;)
  634. {
  635. if(ix < 0) ix = N_WCMDQ-1;
  636. q = wcmdq[ix];
  637. if(q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
  638. break;
  639. if(q[0] <= WCMD_SPECT2)
  640. {
  641. len = q[1] & 0xffff;
  642. frame1 = (frame_t *)q[3];
  643. if(frame1 == frame)
  644. {
  645. q[3] = (long)frame2;
  646. frame1 = frame2;
  647. }
  648. else
  649. break; // doesn't follow on from previous frame
  650. frame = frame2 = (frame_t *)q[2];
  651. modified = 0;
  652. if(frame->frflags & FRFLAG_BREAK)
  653. break;
  654. if(frame->frflags & FRFLAG_FORMANT_RATE)
  655. len = (len * 12)/10; // allow slightly greater rate of change for this frame (was 12/10)
  656. for(pk=0; pk<6; pk++)
  657. {
  658. int f1, f2;
  659. if((frame->frflags & FRFLAG_BREAK_LF) && (pk < 3))
  660. continue;
  661. f1 = frame1->ffreq[pk];
  662. f2 = frame->ffreq[pk];
  663. // backwards
  664. if((diff = f2 - f1) > 0)
  665. {
  666. allowed = f1*2 + f2;
  667. }
  668. else
  669. {
  670. allowed = f1 + f2*2;
  671. }
  672. // the allowed change is specified as percentage (%*10) of the frequency
  673. // take "frequency" as 1/3 from the lower freq
  674. allowed = (allowed * formant_rate[pk])/3000;
  675. allowed = (allowed * len)/256;
  676. if(diff > allowed)
  677. {
  678. if(modified == 0)
  679. {
  680. frame2 = CopyFrame(frame,0);
  681. modified = 1;
  682. }
  683. frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
  684. q[2] = (long)frame2;
  685. }
  686. else
  687. if(diff < -allowed)
  688. {
  689. if(modified == 0)
  690. {
  691. frame2 = CopyFrame(frame,0);
  692. modified = 1;
  693. }
  694. frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
  695. q[2] = (long)frame2;
  696. }
  697. }
  698. }
  699. if(ix == syllable_start)
  700. break;
  701. ix--;
  702. }
  703. // forwards
  704. ix = syllable_centre;
  705. frame = NULL;
  706. for(;;)
  707. {
  708. q = wcmdq[ix];
  709. if(q[0] == WCMD_PAUSE || q[0] == WCMD_WAVE)
  710. break;
  711. if(q[0] <= WCMD_SPECT2)
  712. {
  713. len = q[1] & 0xffff;
  714. frame1 = (frame_t *)q[2];
  715. if(frame != NULL)
  716. {
  717. if(frame1 == frame)
  718. {
  719. q[2] = (long)frame2;
  720. frame1 = frame2;
  721. }
  722. else
  723. break; // doesn't follow on from previous frame
  724. }
  725. frame = frame2 = (frame_t *)q[3];
  726. modified = 0;
  727. if(frame1->frflags & FRFLAG_BREAK)
  728. break;
  729. if(frame1->frflags & FRFLAG_FORMANT_RATE)
  730. len = (len *6)/5; // allow slightly greater rate of change for this frame
  731. for(pk=0; pk<6; pk++)
  732. {
  733. int f1, f2;
  734. f1 = frame1->ffreq[pk];
  735. f2 = frame->ffreq[pk];
  736. // forwards
  737. if((diff = f2 - f1) > 0)
  738. {
  739. allowed = f1*2 + f2;
  740. }
  741. else
  742. {
  743. allowed = f1 + f2*2;
  744. }
  745. allowed = (allowed * formant_rate[pk])/3000;
  746. allowed = (allowed * len)/256;
  747. if(diff > allowed)
  748. {
  749. if(modified == 0)
  750. {
  751. frame2 = CopyFrame(frame,0);
  752. modified = 1;
  753. }
  754. frame2->ffreq[pk] = frame1->ffreq[pk] + allowed;
  755. q[3] = (long)frame2;
  756. }
  757. else
  758. if(diff < -allowed)
  759. {
  760. if(modified == 0)
  761. {
  762. frame2 = CopyFrame(frame,0);
  763. modified = 1;
  764. }
  765. frame2->ffreq[pk] = frame1->ffreq[pk] - allowed;
  766. q[3] = (long)frame2;
  767. }
  768. }
  769. }
  770. ix++;
  771. if(ix >= N_WCMDQ) ix = 0;
  772. if(ix == syllable_end)
  773. break;
  774. }
  775. syllable_start = syllable_end;
  776. } // end of SmoothSpect
  777. static void StartSyllable(void)
  778. {//============================
  779. // start of syllable, if not already started
  780. if(syllable_end == syllable_start)
  781. syllable_end = wcmdq_tail;
  782. }
  783. int DoSpect2(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params, PHONEME_LIST *plist, int modulation)
  784. {//========================================================================================================
  785. // which 0 not a vowel, 1 start of vowel, 2 body and end of vowel
  786. // length_mod: 256 = 100%
  787. // modulation: -1 = don't write to wcmdq
  788. int n_frames;
  789. frameref_t *frames;
  790. int frameix;
  791. frame_t *frame1;
  792. frame_t *frame2;
  793. frame_t *fr;
  794. int ix;
  795. long *q;
  796. int len;
  797. int frame_length;
  798. int frame1_length;
  799. int frame2_length;
  800. int length_factor;
  801. int length_mod;
  802. int total_len = 0;
  803. static int wave_flag = 0;
  804. int wcmd_spect = WCMD_SPECT;
  805. if(fmt_params->fmt_addr == 0)
  806. return(0);
  807. length_mod = plist->length;
  808. if(length_mod==0) length_mod=256;
  809. if(which==1)
  810. {
  811. // limit the shortening of sonorants before shortened (eg. unstressed vowels)
  812. if((this_ph->type==phLIQUID) || (plist[-1].type==phLIQUID) || (plist[-1].type==phNASAL))
  813. {
  814. if(length_mod < (len = translator->langopts.param[LOPT_SONORANT_MIN]))
  815. {
  816. length_mod = len;
  817. }
  818. }
  819. }
  820. modn_flags = 0;
  821. frames = LookupSpect(this_ph, which, fmt_params, &n_frames, plist);
  822. if(frames == NULL)
  823. return(0); // not found
  824. frame1 = frames[0].frame;
  825. frame1_length = frames[0].length;
  826. if(voice->klattv[0])
  827. wcmd_spect = WCMD_KLATT;
  828. wavefile_ix = fmt_params->wav_addr;
  829. wavefile_amp = (fmt_params->wav_amp * 32)/100;
  830. if(wavefile_amp == 0)
  831. wavefile_amp = 32;
  832. if(wavefile_ix == 0)
  833. {
  834. if(wave_flag)
  835. {
  836. // cancel any wavefile that was playing previously
  837. wcmd_spect = WCMD_SPECT2;
  838. if(voice->klattv[0])
  839. wcmd_spect = WCMD_KLATT2;
  840. wave_flag = 0;
  841. }
  842. else
  843. {
  844. wcmd_spect = WCMD_SPECT;
  845. if(voice->klattv[0])
  846. wcmd_spect = WCMD_KLATT;
  847. }
  848. }
  849. if(last_frame != NULL)
  850. {
  851. if(((last_frame->length < 2) || (last_frame->frflags & FRFLAG_VOWEL_CENTRE))
  852. && !(last_frame->frflags & FRFLAG_BREAK))
  853. {
  854. // last frame of previous sequence was zero-length, replace with first of this sequence
  855. wcmdq[last_wcmdq][3] = (long)frame1;
  856. if(last_frame->frflags & FRFLAG_BREAK_LF)
  857. {
  858. // but flag indicates keep HF peaks in last segment
  859. fr = CopyFrame(frame1,1);
  860. for(ix=3; ix < 8; ix++)
  861. {
  862. if(ix < 7)
  863. fr->ffreq[ix] = last_frame->ffreq[ix];
  864. fr->fheight[ix] = last_frame->fheight[ix];
  865. }
  866. wcmdq[last_wcmdq][3] = (long)fr;
  867. }
  868. }
  869. }
  870. if((this_ph->type == phVOWEL) && (which == 2))
  871. {
  872. SmoothSpect(); // process previous syllable
  873. // remember the point in the output queue of the centre of the vowel
  874. syllable_centre = wcmdq_tail;
  875. }
  876. frame_length = frame1_length;
  877. for(frameix=1; frameix<n_frames; frameix++)
  878. {
  879. frame2 = frames[frameix].frame;
  880. frame2_length = frames[frameix].length;
  881. if((fmt_params->wav_addr != 0) && ((frame1->frflags & FRFLAG_DEFER_WAV)==0))
  882. {
  883. // there is a wave file to play along with this synthesis
  884. seq_len_adjust = 0;
  885. DoSample2(fmt_params->wav_addr, which+0x100, 0, fmt_params->fmt_control, 0, wavefile_amp);
  886. wave_flag = 1;
  887. wavefile_ix = 0;
  888. fmt_params->wav_addr = 0;
  889. }
  890. length_factor = length_mod;
  891. if(frame1->frflags & FRFLAG_LEN_MOD) // reduce effect of length mod
  892. {
  893. length_factor = (length_mod*(256-speed.speed_factor3) + 256*speed.speed_factor3)/256;
  894. }
  895. len = (frame_length * samplerate)/1000;
  896. len = (len * length_factor)/256;
  897. if(modulation >= 0)
  898. {
  899. if(frame1->frflags & FRFLAG_MODULATE)
  900. {
  901. modulation = 6;
  902. }
  903. if((frameix == n_frames-1) && (modn_flags & 0xf00))
  904. modulation |= modn_flags; // before or after a glottal stop
  905. }
  906. pitch_length += len;
  907. amp_length += len;
  908. if(frame_length < 2)
  909. {
  910. last_frame = NULL;
  911. frame_length = frame2_length;
  912. frame1 = frame2;
  913. }
  914. else
  915. {
  916. last_wcmdq = wcmdq_tail;
  917. if(modulation >= 0)
  918. {
  919. q = wcmdq[wcmdq_tail];
  920. q[0] = wcmd_spect;
  921. q[1] = len + (modulation << 16);
  922. q[2] = long(frame1);
  923. q[3] = long(frame2);
  924. WcmdqInc();
  925. }
  926. last_frame = frame1 = frame2;
  927. frame_length = frame2_length;
  928. total_len += len;
  929. }
  930. }
  931. return(total_len);
  932. } // end of DoSpect
  933. static void DoMarker(int type, int char_posn, int length, int value)
  934. {//=================================================================
  935. // This could be used to return an index to the word currently being spoken
  936. // Type 1=word, 2=sentence, 3=named marker, 4=play audio, 5=end
  937. wcmdq[wcmdq_tail][0] = WCMD_MARKER;
  938. wcmdq[wcmdq_tail][1] = type;
  939. wcmdq[wcmdq_tail][2] = (char_posn & 0xffffff) | (length << 24);
  940. wcmdq[wcmdq_tail][3] = value;
  941. WcmdqInc();
  942. } // end of Synthesize::DoMarker
  943. void DoVoiceChange(voice_t *v)
  944. {//===========================
  945. // allocate memory for a copy of the voice data, and free it in wavegenfill()
  946. voice_t *v2;
  947. v2 = (voice_t *)malloc(sizeof(voice_t));
  948. memcpy(v2,v,sizeof(voice_t));
  949. wcmdq[wcmdq_tail][0] = WCMD_VOICE;
  950. wcmdq[wcmdq_tail][1] = (long)(v2);
  951. WcmdqInc();
  952. }
  953. static void DoEmbedded(int &embix, int sourceix)
  954. {//=============================================
  955. // There were embedded commands in the text at this point
  956. unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command
  957. unsigned int value;
  958. int command;
  959. do {
  960. word = embedded_list[embix++];
  961. value = word >> 8;
  962. command = word & 0x7f;
  963. switch(command & 0x1f)
  964. {
  965. case EMBED_S: // speed
  966. SetEmbedded((command & 0x60) + EMBED_S2,value); // adjusts embedded_value[EMBED_S2]
  967. SetSpeed(2);
  968. break;
  969. case EMBED_I: // play dynamically loaded wav data (sound icon)
  970. if((int)value < n_soundicon_tab)
  971. {
  972. if(soundicon_tab[value].length != 0)
  973. {
  974. DoPause(10,0); // ensure a break in the speech
  975. wcmdq[wcmdq_tail][0] = WCMD_WAVE;
  976. wcmdq[wcmdq_tail][1] = soundicon_tab[value].length;
  977. wcmdq[wcmdq_tail][2] = (long)soundicon_tab[value].data + 44; // skip WAV header
  978. wcmdq[wcmdq_tail][3] = 0x1500; // 16 bit data, amp=21
  979. WcmdqInc();
  980. }
  981. }
  982. break;
  983. case EMBED_M: // named marker
  984. DoMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value);
  985. break;
  986. case EMBED_U: // play sound
  987. DoMarker(espeakEVENT_PLAY, count_characters+1, 0, value); // always occurs at end of clause
  988. break;
  989. default:
  990. DoPause(10,0); // ensure a break in the speech
  991. wcmdq[wcmdq_tail][0] = WCMD_EMBEDDED;
  992. wcmdq[wcmdq_tail][1] = command;
  993. wcmdq[wcmdq_tail][2] = value;
  994. WcmdqInc();
  995. break;
  996. }
  997. } while ((word & 0x80) == 0);
  998. }
  999. int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume)
  1000. {//============================================================
  1001. static int ix;
  1002. static int embedded_ix;
  1003. static int word_count;
  1004. PHONEME_LIST *prev;
  1005. PHONEME_LIST *next;
  1006. PHONEME_LIST *next2;
  1007. PHONEME_LIST *p;
  1008. int released;
  1009. int stress;
  1010. int modulation;
  1011. int pre_voiced;
  1012. int free_min;
  1013. unsigned char *pitch_env=NULL;
  1014. unsigned char *amp_env;
  1015. PHONEME_TAB *ph;
  1016. PHONEME_TAB *prev_ph;
  1017. static int sourceix=0;
  1018. PHONEME_DATA phdata;
  1019. PHONEME_DATA phdata_prev;
  1020. PHONEME_DATA phdata_next;
  1021. PHONEME_DATA phdata_tone;
  1022. FMT_PARAMS fmtp;
  1023. #ifdef TEST_MBROLA
  1024. if(mbrola_name[0] != 0)
  1025. return(MbrolaGenerate(phoneme_list,n_ph,resume));
  1026. #endif
  1027. if(option_quiet)
  1028. return(0);
  1029. if(resume == 0)
  1030. {
  1031. ix = 1;
  1032. embedded_ix=0;
  1033. word_count = 0;
  1034. pitch_length = 0;
  1035. amp_length = 0;
  1036. last_frame = NULL;
  1037. last_wcmdq = -1;
  1038. syllable_start = wcmdq_tail;
  1039. syllable_end = wcmdq_tail;
  1040. syllable_centre = -1;
  1041. last_pitch_cmd = -1;
  1042. memset(vowel_transition,0,sizeof(vowel_transition));
  1043. }
  1044. while(ix < (*n_ph))
  1045. {
  1046. p = &phoneme_list[ix];
  1047. if(p->type == phPAUSE)
  1048. free_min = 5;
  1049. else
  1050. if(p->type != phVOWEL)
  1051. free_min = 10; // we need less Q space for non-vowels, and we need to generate phonemes after a vowel so that the pitch_length is filled in
  1052. else
  1053. free_min = MIN_WCMDQ; // 22
  1054. if(WcmdqFree() <= free_min)
  1055. return(1); // wait
  1056. prev = &phoneme_list[ix-1];
  1057. next = &phoneme_list[ix+1];
  1058. next2 = &phoneme_list[ix+2];
  1059. if(p->synthflags & SFLAG_EMBEDDED)
  1060. {
  1061. DoEmbedded(embedded_ix, p->sourceix);
  1062. }
  1063. if(p->newword)
  1064. {
  1065. if(translator->langopts.param[LOPT_WORD_MERGE] == 0)
  1066. last_frame = NULL;
  1067. sourceix = (p->sourceix & 0x7ff) + clause_start_char;
  1068. if(p->newword & 4)
  1069. DoMarker(espeakEVENT_SENTENCE, sourceix, 0, count_sentences); // start of sentence
  1070. // if(p->newword & 2)
  1071. // DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause
  1072. if(p->newword & 1)
  1073. DoMarker(espeakEVENT_WORD, sourceix, p->sourceix >> 11, clause_start_word + word_count++);
  1074. }
  1075. EndAmplitude();
  1076. if(p->prepause > 0)
  1077. DoPause(p->prepause,1);
  1078. if(option_phoneme_events && (p->type != phVOWEL))
  1079. {
  1080. // Note, for vowels, do the phoneme event after the vowel-start
  1081. DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic);
  1082. }
  1083. switch(p->type)
  1084. {
  1085. case phPAUSE:
  1086. DoPause(p->length,0);
  1087. break;
  1088. case phSTOP:
  1089. released = 0;
  1090. if(next->type==phVOWEL)
  1091. {
  1092. released = 1;
  1093. }
  1094. else
  1095. if(!next->newword)
  1096. {
  1097. if(next->type==phLIQUID) released = 1;
  1098. // if(((p->ph->phflags & phPLACE) == phPLACE_blb) && (next->ph->phflags & phSIBILANT)) released = 1;
  1099. }
  1100. if(released == 0)
  1101. p->synthflags |= SFLAG_NEXT_PAUSE;
  1102. InterpretPhoneme(NULL, 0, p, &phdata);
  1103. phdata.pd_control |= pd_DONTLENGTHEN;
  1104. DoSample3(&phdata, 0, 0);
  1105. break;
  1106. case phFRICATIVE:
  1107. InterpretPhoneme(NULL, 0, p, &phdata);
  1108. if(p->synthflags & SFLAG_LENGTHEN)
  1109. {
  1110. DoSample3(&phdata, p->length, 0); // play it twice for [s:] etc.
  1111. }
  1112. DoSample3(&phdata, p->length, 0);
  1113. break;
  1114. case phVSTOP:
  1115. ph = p->ph;
  1116. memset(&fmtp, 0, sizeof(fmtp));
  1117. fmtp.fmt_control = pd_DONTLENGTHEN;
  1118. pre_voiced = 0;
  1119. if(next->type==phVOWEL)
  1120. {
  1121. DoAmplitude(p->amp,NULL);
  1122. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1123. pre_voiced = 1;
  1124. }
  1125. else
  1126. if((next->type==phLIQUID) && !next->newword)
  1127. {
  1128. DoAmplitude(next->amp,NULL);
  1129. DoPitch(envelope_data[next->env],next->pitch1,next->pitch2);
  1130. pre_voiced = 1;
  1131. }
  1132. else
  1133. {
  1134. if(last_pitch_cmd < 0)
  1135. {
  1136. DoAmplitude(next->amp,NULL);
  1137. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1138. }
  1139. }
  1140. if((prev->type==phVOWEL) || (prev->ph->phflags & phVOWEL2))
  1141. {
  1142. // a period of voicing before the release
  1143. InterpretPhoneme(NULL, 0x01, p, &phdata);
  1144. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1145. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1146. DoSpect2(ph, 0, &fmtp, p, 0);
  1147. if(p->synthflags & SFLAG_LENGTHEN)
  1148. {
  1149. DoPause(20,0);
  1150. DoSpect2(ph, 0, &fmtp, p, 0);
  1151. }
  1152. }
  1153. else
  1154. {
  1155. if(p->synthflags & SFLAG_LENGTHEN)
  1156. {
  1157. DoPause(50,0);
  1158. }
  1159. }
  1160. if(pre_voiced)
  1161. {
  1162. // followed by a vowel, or liquid + vowel
  1163. StartSyllable();
  1164. }
  1165. else
  1166. {
  1167. p->synthflags |= SFLAG_NEXT_PAUSE;
  1168. }
  1169. InterpretPhoneme(NULL,0, p, &phdata);
  1170. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1171. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1172. fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
  1173. fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
  1174. DoSpect2(ph, 0, &fmtp, p, 0);
  1175. if((p->newword == 0) && (next2->newword == 0))
  1176. {
  1177. if(next->type == phVFRICATIVE)
  1178. DoPause(20,0);
  1179. if(next->type == phFRICATIVE)
  1180. DoPause(12,0);
  1181. }
  1182. break;
  1183. case phVFRICATIVE:
  1184. if(next->type==phVOWEL)
  1185. {
  1186. DoAmplitude(p->amp,NULL);
  1187. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1188. }
  1189. else
  1190. if(next->type==phLIQUID)
  1191. {
  1192. DoAmplitude(next->amp,NULL);
  1193. DoPitch(envelope_data[next->env],next->pitch1,next->pitch2);
  1194. }
  1195. else
  1196. {
  1197. if(last_pitch_cmd < 0)
  1198. {
  1199. DoAmplitude(p->amp,NULL);
  1200. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1201. }
  1202. }
  1203. if((next->type==phVOWEL) || ((next->type==phLIQUID) && (next->newword==0))) // ?? test 14.Aug.2007
  1204. {
  1205. StartSyllable();
  1206. }
  1207. else
  1208. {
  1209. p->synthflags |= SFLAG_NEXT_PAUSE;
  1210. }
  1211. InterpretPhoneme(NULL,0, p, &phdata);
  1212. memset(&fmtp, 0, sizeof(fmtp));
  1213. fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
  1214. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1215. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1216. fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
  1217. fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
  1218. if(p->synthflags & SFLAG_LENGTHEN)
  1219. DoSpect2(p->ph, 0, &fmtp, p, 0);
  1220. DoSpect2(p->ph, 0, &fmtp, p, 0);
  1221. break;
  1222. case phNASAL:
  1223. memset(&fmtp, 0, sizeof(fmtp));
  1224. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1225. {
  1226. DoAmplitude(p->amp,NULL);
  1227. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1228. }
  1229. if(prev->type==phNASAL)
  1230. {
  1231. last_frame = NULL;
  1232. }
  1233. InterpretPhoneme(NULL,0, p, &phdata);
  1234. fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
  1235. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1236. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1237. if(next->type==phVOWEL)
  1238. {
  1239. StartSyllable();
  1240. DoSpect2(p->ph, 0, &fmtp, p, 0);
  1241. }
  1242. else
  1243. if(prev->type==phVOWEL && (p->synthflags & SFLAG_SEQCONTINUE))
  1244. {
  1245. DoSpect2(p->ph, 0, &fmtp, p, 0);
  1246. }
  1247. else
  1248. {
  1249. last_frame = NULL; // only for nasal ?
  1250. DoSpect2(p->ph, 0, &fmtp, p, 0);
  1251. last_frame = NULL;
  1252. }
  1253. break;
  1254. case phLIQUID:
  1255. memset(&fmtp, 0, sizeof(fmtp));
  1256. modulation = 0;
  1257. if(p->ph->phflags & phTRILL)
  1258. modulation = 5;
  1259. prev_ph = prev->ph;
  1260. // if(p->newword)
  1261. // prev_ph = phoneme_tab[phonPAUSE]; // pronounce fully at the start of a word
  1262. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1263. {
  1264. DoAmplitude(p->amp,NULL);
  1265. DoPitch(envelope_data[p->env],p->pitch1,p->pitch2);
  1266. }
  1267. if(prev->type==phNASAL)
  1268. {
  1269. last_frame = NULL;
  1270. }
  1271. if(next->type==phVOWEL)
  1272. {
  1273. StartSyllable();
  1274. }
  1275. InterpretPhoneme(NULL, 0, p, &phdata);
  1276. fmtp.std_length = phdata.pd_param[i_SET_LENGTH]*2;
  1277. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1278. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1279. fmtp.wav_addr = phdata.sound_addr[pd_ADDWAV];
  1280. fmtp.wav_amp = phdata.sound_param[pd_ADDWAV];
  1281. DoSpect2(p->ph, 0, &fmtp, p, modulation);
  1282. break;
  1283. case phVOWEL:
  1284. ph = p->ph;
  1285. stress = p->stresslevel & 0xf;
  1286. memset(&fmtp, 0, sizeof(fmtp));
  1287. InterpretPhoneme(NULL, 0, p, &phdata);
  1288. fmtp.std_length = phdata.pd_param[i_SET_LENGTH] * 2;
  1289. if(((fmtp.fmt_addr = phdata.sound_addr[pd_VWLSTART]) != 0) && ((phdata.pd_control & pd_FORNEXTPH) == 0))
  1290. {
  1291. // a vowel start has been specified by the Vowel program
  1292. fmtp.fmt_length = phdata.sound_param[pd_VWLSTART];
  1293. }
  1294. else
  1295. if(prev->type != phPAUSE)
  1296. {
  1297. // check the previous phoneme
  1298. InterpretPhoneme(NULL, 0, prev, &phdata_prev);
  1299. if((fmtp.fmt_addr = phdata_prev.sound_addr[pd_VWLSTART]) != 0)
  1300. {
  1301. // a vowel start has been specified by the Vowel program
  1302. fmtp.fmt2_lenadj = phdata_prev.sound_param[pd_VWLSTART];
  1303. }
  1304. fmtp.transition0 = phdata_prev.vowel_transition[0];
  1305. fmtp.transition1 = phdata_prev.vowel_transition[1];
  1306. }
  1307. if(fmtp.fmt_addr == 0)
  1308. {
  1309. // use the default start for this vowel
  1310. fmtp.use_vowelin = 1;
  1311. fmtp.fmt_control = 1;
  1312. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1313. // fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1314. }
  1315. pitch_env = envelope_data[p->env];
  1316. amp_env = NULL;
  1317. if(p->tone_ph != 0)
  1318. {
  1319. InterpretPhoneme2(p->tone_ph, &phdata_tone);
  1320. pitch_env = LookupEnvelope(phdata_tone.pitch_env);
  1321. amp_env = LookupEnvelope(phdata_tone.amp_env);
  1322. }
  1323. StartSyllable();
  1324. modulation = 2;
  1325. if(stress <= 1)
  1326. modulation = 1; // 16ths
  1327. else
  1328. if(stress >= 7)
  1329. modulation = 3;
  1330. if(prev->type == phVSTOP || prev->type == phVFRICATIVE)
  1331. {
  1332. DoAmplitude(p->amp,amp_env);
  1333. DoPitch(pitch_env,p->pitch1,p->pitch2); // don't use prevocalic rising tone
  1334. DoSpect2(ph, 1, &fmtp, p, modulation);
  1335. }
  1336. else
  1337. if(prev->type==phLIQUID || prev->type==phNASAL)
  1338. {
  1339. DoAmplitude(p->amp,amp_env);
  1340. DoSpect2(ph, 1, &fmtp, p, modulation); // continue with pre-vocalic rising tone
  1341. DoPitch(pitch_env,p->pitch1,p->pitch2);
  1342. }
  1343. else
  1344. {
  1345. if(!(p->synthflags & SFLAG_SEQCONTINUE))
  1346. {
  1347. DoAmplitude(p->amp,amp_env);
  1348. DoPitch(pitch_env,p->pitch1,p->pitch2);
  1349. }
  1350. DoSpect2(ph, 1, &fmtp, p, modulation);
  1351. }
  1352. if(option_phoneme_events)
  1353. {
  1354. DoMarker(espeakEVENT_PHONEME, sourceix, 0, p->ph->mnemonic);
  1355. }
  1356. fmtp.fmt_addr = phdata.sound_addr[pd_FMT];
  1357. fmtp.fmt_length = phdata.sound_param[pd_FMT];
  1358. fmtp.transition0 = 0;
  1359. fmtp.transition1 = 0;
  1360. if((fmtp.fmt2_addr = phdata.sound_addr[pd_VWLEND]) != 0)
  1361. {
  1362. fmtp.fmt2_lenadj = phdata.sound_param[pd_VWLEND];
  1363. }
  1364. else
  1365. if(next->type != phPAUSE)
  1366. {
  1367. fmtp.fmt2_lenadj = 0;
  1368. InterpretPhoneme(NULL, 0, next, &phdata_next);
  1369. fmtp.use_vowelin = 1;
  1370. fmtp.transition0 = phdata_next.vowel_transition[2]; // always do vowel_transition, even if ph_VWLEND ?? consider [N]
  1371. fmtp.transition1 = phdata_next.vowel_transition[3];
  1372. if((fmtp.fmt2_addr = phdata_next.sound_addr[pd_VWLEND]) != 0)
  1373. {
  1374. fmtp.fmt2_lenadj = phdata_next.sound_param[pd_VWLEND];
  1375. }
  1376. }
  1377. DoSpect2(ph, 2, &fmtp, p, modulation);
  1378. break;
  1379. }
  1380. ix++;
  1381. }
  1382. EndPitch(1);
  1383. if(*n_ph > 0)
  1384. {
  1385. DoMarker(espeakEVENT_END, count_characters, 0, count_sentences); // end of clause
  1386. *n_ph = 0;
  1387. }
  1388. return(0); // finished the phoneme list
  1389. } // end of Generate
  1390. static int timer_on = 0;
  1391. static int paused = 0;
  1392. int SynthOnTimer()
  1393. {//===============
  1394. if(!timer_on)
  1395. {
  1396. return(WavegenCloseSound());
  1397. }
  1398. do {
  1399. if(WcmdqUsed() > 0)
  1400. WavegenOpenSound();
  1401. if(Generate(phoneme_list,&n_phoneme_list,1)==0)
  1402. {
  1403. SpeakNextClause(NULL,NULL,1);
  1404. }
  1405. } while(skipping_text);
  1406. return(0);
  1407. }
  1408. int SynthStatus()
  1409. {//==============
  1410. return(timer_on | paused);
  1411. }
  1412. int SpeakNextClause(FILE *f_in, const void *text_in, int control)
  1413. {//==============================================================
  1414. // Speak text from file (f_in) or memory (text_in)
  1415. // control 0: start
  1416. // either f_in or text_in is set, the other must be NULL
  1417. // The other calls have f_in and text_in = NULL
  1418. // control 1: speak next text
  1419. // 2: stop
  1420. // 3: pause (toggle)
  1421. // 4: is file being read (0=no, 1=yes)
  1422. // 5: interrupt and flush current text.
  1423. int clause_tone;
  1424. char *voice_change;
  1425. static FILE *f_text=NULL;
  1426. static const void *p_text=NULL;
  1427. if(control == 4)
  1428. {
  1429. if((f_text == NULL) && (p_text == NULL))
  1430. return(0);
  1431. else
  1432. return(1);
  1433. }
  1434. if(control == 2)
  1435. {
  1436. // stop speaking
  1437. timer_on = 0;
  1438. p_text = NULL;
  1439. if(f_text != NULL)
  1440. {
  1441. fclose(f_text);
  1442. f_text=NULL;
  1443. }
  1444. n_phoneme_list = 0;
  1445. WcmdqStop();
  1446. embedded_value[EMBED_T] = 0;
  1447. return(0);
  1448. }
  1449. if(control == 3)
  1450. {
  1451. // toggle pause
  1452. if(paused == 0)
  1453. {
  1454. timer_on = 0;
  1455. paused = 2;
  1456. }
  1457. else
  1458. {
  1459. WavegenOpenSound();
  1460. timer_on = 1;
  1461. paused = 0;
  1462. Generate(phoneme_list,&n_phoneme_list,0); // re-start from beginning of clause
  1463. }
  1464. return(0);
  1465. }
  1466. if(control == 5)
  1467. {
  1468. // stop speaking, but continue looking for text
  1469. n_phoneme_list = 0;
  1470. WcmdqStop();
  1471. return(0);
  1472. }
  1473. if((f_in != NULL) || (text_in != NULL))
  1474. {
  1475. f_text = f_in;
  1476. p_text = text_in;
  1477. timer_on = 1;
  1478. paused = 0;
  1479. }
  1480. if((f_text==NULL) && (p_text==NULL))
  1481. {
  1482. skipping_text = 0;
  1483. timer_on = 0;
  1484. return(0);
  1485. }
  1486. if((f_text != NULL) && feof(f_text))
  1487. {
  1488. timer_on = 0;
  1489. fclose(f_text);
  1490. f_text=NULL;
  1491. return(0);
  1492. }
  1493. if(current_phoneme_table != voice->phoneme_tab_ix)
  1494. {
  1495. SelectPhonemeTable(voice->phoneme_tab_ix);
  1496. }
  1497. // read the next clause from the input text file, translate it, and generate
  1498. // entries in the wavegen command queue
  1499. p_text = TranslateClause(translator, f_text, p_text, &clause_tone, &voice_change);
  1500. CalcPitches(translator, clause_tone);
  1501. CalcLengths(translator);
  1502. GetTranslatedPhonemeString(translator->phon_out,sizeof(translator->phon_out));
  1503. if(option_phonemes > 0)
  1504. {
  1505. fprintf(f_trans,"%s\n",translator->phon_out);
  1506. if(!iswalpha(0x010d))
  1507. {
  1508. // check that c-caron is recognized as an alphabetic character
  1509. fprintf(stderr,"Warning: Accented letters are not recognized, eg: U+010D\nSet LC_CTYPE to a UTF-8 locale\n");
  1510. }
  1511. }
  1512. if(phoneme_callback != NULL)
  1513. {
  1514. phoneme_callback(translator->phon_out);
  1515. }
  1516. if(skipping_text)
  1517. {
  1518. n_phoneme_list = 0;
  1519. return(1);
  1520. }
  1521. if(mbrola_name[0] != 0)
  1522. {
  1523. #ifdef USE_MBROLA_LIB
  1524. MbrolaTranslate(phoneme_list,n_phoneme_list,NULL);
  1525. #else
  1526. {
  1527. FILE *f_mbrola;
  1528. if((f_mbrola = f_trans) == stderr)
  1529. f_mbrola = stdout;
  1530. MbrolaTranslate(phoneme_list,n_phoneme_list,f_mbrola);
  1531. }
  1532. #endif
  1533. }
  1534. Generate(phoneme_list,&n_phoneme_list,0);
  1535. WavegenOpenSound();
  1536. if(voice_change != NULL)
  1537. {
  1538. // voice change at the end of the clause (i.e. clause was terminated by a voice change)
  1539. new_voice = LoadVoiceVariant(voice_change,0); // add a Voice instruction to wavegen at the end of the clause
  1540. }
  1541. if(new_voice)
  1542. {
  1543. // finished the current clause, now change the voice if there was an embedded
  1544. // change voice command at the end of it (i.e. clause was broken at the change voice command)
  1545. DoVoiceChange(voice);
  1546. new_voice = NULL;
  1547. }
  1548. return(1);
  1549. } // end of SpeakNextClause