eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

setlengths.cpp 23KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2011 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <wctype.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "voice.h"
  28. #include "translate.h"
  29. extern int GetAmplitude(void);
  30. extern void DoSonicSpeed(int value);
  31. extern int saved_parameters[];
  32. // convert from words-per-minute to internal speed factor
  33. // Use this to calibrate speed for wpm 80-350
  34. static unsigned char speed_lookup[] = {
  35. 255, 255, 255, 255, 255, // 80
  36. 253, 249, 245, 242, 238, // 85
  37. 235, 232, 228, 225, 222, // 90
  38. 218, 216, 213, 210, 207, // 95
  39. 204, 201, 198, 196, 193, // 100
  40. 191, 188, 186, 183, 181, // 105
  41. 179, 176, 174, 172, 169, // 110
  42. 168, 165, 163, 161, 159, // 115
  43. 158, 155, 153, 152, 150, // 120
  44. 148, 146, 145, 143, 141, // 125
  45. 139, 137, 136, 135, 133, // 130
  46. 131, 130, 129, 127, 126, // 135
  47. 124, 123, 122, 120, 119, // 140
  48. 118, 117, 115, 114, 113, // 145
  49. 112, 111, 110, 109, 107, // 150
  50. 106, 105, 104, 103, 102, // 155
  51. 101, 100, 99, 98, 97, // 160
  52. 96, 95, 94, 93, 92, // 165
  53. 91, 90, 89, 89, 88, // 170
  54. 87, 86, 85, 84, 83, // 175
  55. 82, 82, 81, 80, 80, // 180
  56. 79, 78, 77, 76, 76, // 185
  57. 75, 75, 74, 73, 72, // 190
  58. 71, 71, 70, 69, 69, // 195
  59. 68, 67, 67, 66, 66, // 200
  60. 65, 64, 64, 63, 62, // 205
  61. 62, 61, 61, 60, 59, // 210
  62. 59, 58, 58, 57, 57, // 215
  63. 56, 56, 55, 54, 54, // 220
  64. 53, 53, 52, 52, 52, // 225
  65. 51, 50, 50, 49, 49, // 230
  66. 48, 48, 47, 47, 46, // 235
  67. 46, 46, 45, 45, 44, // 240
  68. 44, 44, 43, 43, 42, // 245
  69. 41, 40, 40, 40, 39, // 250
  70. 39, 39, 38, 38, 38, // 255
  71. 37, 37, 37, 36, 36, // 260
  72. 35, 35, 35, 35, 34, // 265
  73. 34, 34, 33, 33, 33, // 270
  74. 32, 32, 31, 31, 31, // 275
  75. 30, 30, 30, 29, 29, // 280
  76. 29, 29, 28, 28, 27, // 285
  77. 27, 27, 27, 26, 26, // 290
  78. 26, 26, 25, 25, 25, // 295
  79. 24, 24, 24, 24, 23, // 300
  80. 23, 23, 23, 22, 22, // 305
  81. 22, 21, 21, 21, 21, // 310
  82. 20, 20, 20, 20, 19, // 315
  83. 19, 19, 18, 18, 17, // 320
  84. 17, 17, 16, 16, 16, // 325
  85. 16, 16, 16, 15, 15, // 330
  86. 15, 15, 14, 14, 14, // 335
  87. 13, 13, 13, 12, 12, // 340
  88. 12, 12, 11, 11, 11, // 345
  89. 11, 10, 10, 10, 9, // 350
  90. 9, 9, 8, 8, 8, // 355
  91. };
  92. // speed_factor1 adjustments for speeds 350 to 374: pauses
  93. static unsigned char pause_factor_350[] = {
  94. 22,22,22,22,22,22,22,21,21,21, // 350
  95. 21,20,20,19,19,18,17,16,15,15, // 360
  96. 15,15,15,15,15}; // 370
  97. // wav_factor adjustments for speeds 350 to 450
  98. // Use this to calibrate speed for wpm 350-450
  99. static unsigned char wav_factor_350[] = {
  100. 120, 121, 120, 119, 119, // 350
  101. 118, 118, 117, 116, 116, // 355
  102. 115, 114, 113, 112, 112, // 360
  103. 111, 111, 110, 109, 108, // 365
  104. 107, 106, 106, 104, 103, // 370
  105. 103, 102, 102, 102, 101, // 375
  106. 101, 99, 98, 98, 97, // 380
  107. 96, 96, 95, 94, 93, // 385
  108. 91, 90, 91, 90, 89, // 390
  109. 88, 86, 85, 86, 85, // 395
  110. 85, 84, 82, 81, 80, // 400
  111. 79, 77, 78, 78, 76, // 405
  112. 77, 75, 75, 74, 73, // 410
  113. 71, 72, 70, 69, 69, // 415
  114. 69, 67, 65, 64, 63, // 420
  115. 63, 63, 61, 61, 59, // 425
  116. 59, 59, 58, 56, 57, // 430
  117. 58, 56, 54, 53, 52, // 435
  118. 52, 53, 52, 52, 50, // 440
  119. 48, 47, 47, 45, 46, // 445
  120. 45}; // 450
  121. static int speed1 = 130;
  122. static int speed2 = 121;
  123. static int speed3 = 118;
  124. //#define TEST_SPEED
  125. #ifdef INCLUDE_SONIC
  126. void SetSpeed(int control)
  127. {//=======================
  128. int x;
  129. int s1;
  130. int wpm;
  131. int wpm2;
  132. int wpm_value;
  133. double sonic;
  134. speed.loud_consonants = 0;
  135. speed.min_sample_len = 450;
  136. speed.lenmod_factor = 110; // controls the effect of FRFLAG_LEN_MOD reduce length change
  137. speed.lenmod2_factor = 100;
  138. speed.min_pause = 5;
  139. wpm = embedded_value[EMBED_S];
  140. if(control == 2)
  141. wpm = embedded_value[EMBED_S2];
  142. wpm_value = wpm;
  143. if(voice->speed_percent > 0)
  144. {
  145. wpm = (wpm * voice->speed_percent)/100;
  146. }
  147. if(control & 2)
  148. {
  149. DoSonicSpeed(1 * 1024);
  150. }
  151. if((wpm_value > 450) || ((wpm_value > speed.fast_settings[0]) && (wpm > 350)))
  152. {
  153. wpm2 = wpm;
  154. wpm = 175;
  155. // set special eSpeak speed parameters for Sonic use
  156. // The eSpeak output will be speeded up by at least x2
  157. x = 73;
  158. if(control & 1)
  159. {
  160. speed1 = (x * voice->speedf1)/256;
  161. speed2 = (x * voice->speedf2)/256;
  162. speed3 = (x * voice->speedf3)/256;
  163. }
  164. if(control & 2)
  165. {
  166. sonic = ((double)wpm2)/wpm;
  167. DoSonicSpeed((int)(sonic * 1024));
  168. speed.pause_factor = 85;
  169. speed.clause_pause_factor = 80;
  170. speed.min_pause = 22;
  171. speed.min_sample_len = 450*2;
  172. speed.wav_factor = 211;
  173. speed.lenmod_factor = 210;
  174. speed.lenmod2_factor = 170;
  175. }
  176. return;
  177. }
  178. #ifdef TEST_SPEED
  179. if(wpm > 1000)
  180. {
  181. // TESTING
  182. // test = wpm / 1000;
  183. wpm = wpm % 1000;
  184. }
  185. #endif
  186. if(wpm > 450)
  187. wpm = 450;
  188. if(wpm > 360)
  189. {
  190. speed.loud_consonants = (wpm - 360) / 8;
  191. }
  192. wpm2 = wpm;
  193. if(wpm > 359) wpm2 = 359;
  194. if(wpm < 80) wpm2 = 80;
  195. x = speed_lookup[wpm2-80];
  196. if(wpm >= 380)
  197. x = 7;
  198. if(wpm >= 400)
  199. x = 6;
  200. if(control & 1)
  201. {
  202. // set speed factors for different syllable positions within a word
  203. // these are used in CalcLengths()
  204. speed1 = (x * voice->speedf1)/256;
  205. speed2 = (x * voice->speedf2)/256;
  206. speed3 = (x * voice->speedf3)/256;
  207. if(x <= 7)
  208. {
  209. speed1 = x;
  210. speed2 = speed3 = x - 1;
  211. }
  212. }
  213. if(control & 2)
  214. {
  215. // these are used in synthesis file
  216. if(wpm > 350)
  217. {
  218. speed.lenmod_factor = 85 - (wpm - 350) / 3;
  219. speed.lenmod2_factor = 60 - (wpm - 350) / 8;
  220. }
  221. else
  222. if(wpm > 250)
  223. {
  224. speed.lenmod_factor = 110 - (wpm - 250)/4;
  225. speed.lenmod2_factor = 110 - (wpm - 250)/2;
  226. }
  227. s1 = (x * voice->speedf1)/256;
  228. if(wpm >= 170)
  229. speed.wav_factor = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  230. else
  231. speed.wav_factor = 128 + (128*s1)/130; // = 215 at 170 wpm
  232. if(wpm >= 350)
  233. {
  234. speed.wav_factor = wav_factor_350[wpm-350];
  235. }
  236. if(wpm >= 390)
  237. {
  238. speed.min_sample_len = 450 - (wpm - 400)/2;
  239. if(wpm > 440)
  240. speed.min_sample_len = 420 - (wpm - 440);
  241. }
  242. // adjust for different sample rates
  243. speed.min_sample_len = (speed.min_sample_len * samplerate_native) / 22050;
  244. speed.pause_factor = (256 * s1)/115; // full speed adjustment, used for pause length
  245. speed.clause_pause_factor = 0;
  246. if(wpm > 430)
  247. {
  248. speed.pause_factor = 12;
  249. // speed.clause_pause_factor = 15;
  250. }
  251. else
  252. if(wpm > 400)
  253. {
  254. speed.pause_factor = 13;
  255. // speed.clause_pause_factor = 15;
  256. }
  257. else
  258. if(wpm > 374)
  259. {
  260. speed.pause_factor = 14;
  261. }
  262. else
  263. if(wpm > 350)
  264. {
  265. speed.pause_factor = pause_factor_350[wpm - 350];
  266. }
  267. if(speed.clause_pause_factor == 0)
  268. {
  269. // restrict the reduction of pauses between clauses
  270. if((speed.clause_pause_factor = speed.pause_factor) < 16)
  271. speed.clause_pause_factor = 16;
  272. }
  273. }
  274. #ifdef TEST_SPEED
  275. //if(control==3)
  276. printf("%3d: speedf %d %d %d pause=%d %d wav=%d lenmod=%d %d\n",wpm,speed1,speed2,speed3, speed.pause_factor,speed.clause_pause_factor, speed.wav_factor,speed.lenmod_factor,speed.lenmod2_factor);
  277. #endif
  278. } // end of SetSpeed
  279. #else // not using sonic speed-up
  280. void SetSpeed(int control)
  281. {//=======================
  282. // This is the earlier version of SetSpeed() before sonic speed-up was added
  283. int x;
  284. int s1;
  285. int wpm;
  286. int wpm2;
  287. speed.loud_consonants = 0;
  288. speed.min_sample_len = 450;
  289. speed.lenmod_factor = 110; // controls the effect of FRFLAG_LEN_MOD reduce length change
  290. speed.lenmod2_factor = 100;
  291. wpm = embedded_value[EMBED_S];
  292. if(control == 2)
  293. wpm = embedded_value[EMBED_S2];
  294. #ifdef TEST_SPEED
  295. if(wpm > 1000)
  296. {
  297. // TESTING
  298. test = wpm / 1000;
  299. wpm = wpm % 1000;
  300. }
  301. #endif
  302. if(voice->speed_percent > 0)
  303. {
  304. wpm = (wpm * voice->speed_percent)/100;
  305. }
  306. if(wpm > 450)
  307. wpm = 450;
  308. if(wpm > 360)
  309. {
  310. speed.loud_consonants = (wpm - 360) / 8;
  311. }
  312. wpm2 = wpm;
  313. if(wpm > 359) wpm2 = 359;
  314. if(wpm < 80) wpm2 = 80;
  315. x = speed_lookup[wpm2-80];
  316. if(wpm >= 380)
  317. x = 7;
  318. if(wpm >= 400)
  319. x = 6;
  320. if(control & 1)
  321. {
  322. // set speed factors for different syllable positions within a word
  323. // these are used in CalcLengths()
  324. speed1 = (x * voice->speedf1)/256;
  325. speed2 = (x * voice->speedf2)/256;
  326. speed3 = (x * voice->speedf3)/256;
  327. if(x <= 7)
  328. {
  329. speed1 = x;
  330. speed2 = speed3 = x - 1;
  331. }
  332. }
  333. if(control & 2)
  334. {
  335. // these are used in synthesis file
  336. if(wpm > 350)
  337. {
  338. speed.lenmod_factor = 85 - (wpm - 350) / 3;
  339. speed.lenmod2_factor = 60 - (wpm - 350) / 8;
  340. }
  341. else
  342. if(wpm > 250)
  343. {
  344. speed.lenmod_factor = 110 - (wpm - 250)/4;
  345. speed.lenmod2_factor = 110 - (wpm - 250)/2;
  346. }
  347. s1 = (x * voice->speedf1)/256;
  348. if(wpm >= 170)
  349. speed.wav_factor = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  350. else
  351. speed.wav_factor = 128 + (128*s1)/130; // = 215 at 170 wpm
  352. if(wpm >= 350)
  353. {
  354. speed.wav_factor = wav_factor_350[wpm-350];
  355. }
  356. if(wpm >= 390)
  357. {
  358. speed.min_sample_len = 450 - (wpm - 400)/2;
  359. if(wpm > 440)
  360. speed.min_sample_len = 420 - (wpm - 440);
  361. }
  362. speed.pause_factor = (256 * s1)/115; // full speed adjustment, used for pause length
  363. speed.clause_pause_factor = 0;
  364. if(wpm > 430)
  365. {
  366. speed.pause_factor = 12;
  367. // speed.clause_pause_factor = 15;
  368. }
  369. else
  370. if(wpm > 400)
  371. {
  372. speed.pause_factor = 13;
  373. // speed.clause_pause_factor = 15;
  374. }
  375. else
  376. if(wpm > 374)
  377. {
  378. speed.pause_factor = 14;
  379. }
  380. else
  381. if(wpm > 350)
  382. {
  383. speed.pause_factor = pause_factor_350[wpm - 350];
  384. }
  385. if(speed.clause_pause_factor == 0)
  386. {
  387. // restrict the reduction of pauses between clauses
  388. if((speed.clause_pause_factor = speed.pause_factor) < 16)
  389. speed.clause_pause_factor = 16;
  390. }
  391. }
  392. #ifdef TEST_SPEED
  393. //if(control==3)
  394. printf("%3d: speedf %d %d %d pause=%d %d wav=%d lenmod=%d %d\n",wpm,speed1,speed2,speed3, speed.pause_factor,speed.clause_pause_factor, speed.wav_factor,speed.lenmod_factor,speed.lenmod2_factor);
  395. #endif
  396. } // end of SetSpeed
  397. #endif // of INCLUDE_SONIC
  398. #ifdef deleted
  399. void SetAmplitude(int amp)
  400. {//=======================
  401. static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 };
  402. if((amp >= 0) && (amp <= 20))
  403. {
  404. option_amplitude = (amplitude_factor[amp] * 480)/256;
  405. }
  406. }
  407. #endif
  408. void SetParameter(int parameter, int value, int relative)
  409. {//======================================================
  410. // parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
  411. // relative 0=absolute 1=relative
  412. int new_value = value;
  413. int default_value;
  414. if(relative)
  415. {
  416. if(parameter < 5)
  417. {
  418. default_value = param_defaults[parameter];
  419. new_value = default_value + (default_value * value)/100;
  420. }
  421. }
  422. param_stack[0].parameter[parameter] = new_value;
  423. saved_parameters[parameter] = new_value;
  424. switch(parameter)
  425. {
  426. case espeakRATE:
  427. embedded_value[EMBED_S] = new_value;
  428. embedded_value[EMBED_S2] = new_value;
  429. SetSpeed(3);
  430. break;
  431. case espeakVOLUME:
  432. embedded_value[EMBED_A] = new_value;
  433. GetAmplitude();
  434. break;
  435. case espeakPITCH:
  436. if(new_value > 99) new_value = 99;
  437. if(new_value < 0) new_value = 0;
  438. embedded_value[EMBED_P] = new_value;
  439. break;
  440. case espeakRANGE:
  441. if(new_value > 99) new_value = 99;
  442. embedded_value[EMBED_R] = new_value;
  443. break;
  444. case espeakLINELENGTH:
  445. option_linelength = new_value;
  446. break;
  447. case espeakWORDGAP:
  448. option_wordgap = new_value;
  449. break;
  450. case espeakINTONATION:
  451. if((new_value & 0xff) != 0)
  452. translator->langopts.intonation_group = new_value & 0xff;
  453. option_tone_flags = new_value;
  454. break;
  455. default:
  456. break;
  457. }
  458. } // end of SetParameter
  459. static void DoEmbedded2(int *embix)
  460. {//================================
  461. // There were embedded commands in the text at this point
  462. unsigned int word;
  463. do {
  464. word = embedded_list[(*embix)++];
  465. if((word & 0x1f) == EMBED_S)
  466. {
  467. // speed
  468. SetEmbedded(word & 0x7f, word >> 8); // adjusts embedded_value[EMBED_S]
  469. SetSpeed(1);
  470. }
  471. } while((word & 0x80) == 0);
  472. }
  473. void CalcLengths(Translator *tr)
  474. {//==============================
  475. int ix;
  476. int ix2;
  477. PHONEME_LIST *prev;
  478. PHONEME_LIST *next;
  479. PHONEME_LIST *next2;
  480. PHONEME_LIST *next3;
  481. PHONEME_LIST *p;
  482. PHONEME_LIST *p2;
  483. int stress;
  484. int type;
  485. static int more_syllables=0;
  486. int pre_sonorant=0;
  487. int pre_voiced=0;
  488. int last_pitch = 0;
  489. int pitch_start;
  490. int length_mod;
  491. int len;
  492. int env2;
  493. int end_of_clause;
  494. int embedded_ix = 0;
  495. int min_drop;
  496. int pitch1;
  497. int emphasized;
  498. int tone_mod;
  499. unsigned char *pitch_env=NULL;
  500. PHONEME_DATA phdata_tone;
  501. for(ix=1; ix<n_phoneme_list; ix++)
  502. {
  503. prev = &phoneme_list[ix-1];
  504. p = &phoneme_list[ix];
  505. stress = p->stresslevel & 0x7;
  506. emphasized = p->stresslevel & 0x8;
  507. next = &phoneme_list[ix+1];
  508. if(p->synthflags & SFLAG_EMBEDDED)
  509. {
  510. DoEmbedded2(&embedded_ix);
  511. }
  512. type = p->type;
  513. if(p->synthflags & SFLAG_SYLLABLE)
  514. type = phVOWEL;
  515. switch(type)
  516. {
  517. case phPAUSE:
  518. last_pitch = 0;
  519. break;
  520. case phSTOP:
  521. last_pitch = 0;
  522. if(prev->type == phFRICATIVE)
  523. p->prepause = 25;
  524. else
  525. if((more_syllables > 0) || (stress < 4))
  526. p->prepause = 48;
  527. else
  528. p->prepause = 60;
  529. if(prev->type == phSTOP)
  530. p->prepause = 60;
  531. if((tr->langopts.word_gap & 0x10) && (p->newword))
  532. p->prepause = 60;
  533. if(p->ph->phflags & phLENGTHENSTOP)
  534. p->prepause += 30;
  535. if(p->synthflags & SFLAG_LENGTHEN)
  536. p->prepause += tr->langopts.long_stop;
  537. break;
  538. case phVFRICATIVE:
  539. case phFRICATIVE:
  540. if(p->newword)
  541. {
  542. if((prev->type == phVOWEL) && (p->ph->phflags & phNOPAUSE))
  543. {
  544. }
  545. else
  546. {
  547. p->prepause = 15;
  548. }
  549. }
  550. if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS))
  551. p->prepause = 25;
  552. if(prev->ph->phflags & phBRKAFTER)
  553. p->prepause = 30;
  554. if((tr->langopts.word_gap & 0x10) && (p->newword))
  555. p->prepause = 30;
  556. if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword)
  557. {
  558. if(prev->type == phVOWEL)
  559. p->length = 200; // ?? should do this if it's from a prefix
  560. else
  561. p->length = 150;
  562. }
  563. else
  564. p->length = 256;
  565. if(type == phVFRICATIVE)
  566. {
  567. if(next->type==phVOWEL)
  568. {
  569. pre_voiced = 1;
  570. }
  571. if((prev->type==phVOWEL) || (prev->type == phLIQUID))
  572. {
  573. p->length = (255 + prev->length)/2;
  574. }
  575. }
  576. break;
  577. case phVSTOP:
  578. if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
  579. p->prepause = 30;
  580. if(next->type==phVOWEL || next->type==phLIQUID)
  581. {
  582. if((next->type==phVOWEL) || !next->newword)
  583. pre_voiced = 1;
  584. p->prepause = 40;
  585. if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r')))
  586. p->prepause = 0;
  587. else
  588. if(p->newword==0)
  589. {
  590. if(prev->type==phLIQUID)
  591. p->prepause = 20;
  592. if(prev->type==phNASAL)
  593. p->prepause = 12;
  594. if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS))
  595. p->prepause = 0;
  596. }
  597. }
  598. if((tr->langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
  599. p->prepause = 20;
  600. break;
  601. case phLIQUID:
  602. case phNASAL:
  603. p->amp = tr->stress_amps[0]; // unless changed later
  604. p->length = 256; // TEMPORARY
  605. min_drop = 0;
  606. if(p->newword)
  607. {
  608. if(prev->type==phLIQUID)
  609. p->prepause = 25;
  610. if(prev->type==phVOWEL)
  611. {
  612. if(!(p->ph->phflags & phNOPAUSE))
  613. p->prepause = 12;
  614. }
  615. }
  616. if(next->type==phVOWEL)
  617. {
  618. pre_sonorant = 1;
  619. }
  620. else
  621. {
  622. p->pitch2 = last_pitch;
  623. if((prev->type==phVOWEL) || (prev->type == phLIQUID))
  624. {
  625. p->length = prev->length;
  626. if(p->type == phLIQUID)
  627. {
  628. p->length = speed1;
  629. }
  630. if(next->type == phVSTOP)
  631. {
  632. p->length = (p->length * 160)/100;
  633. }
  634. if(next->type == phVFRICATIVE)
  635. {
  636. p->length = (p->length * 120)/100;
  637. }
  638. }
  639. else
  640. {
  641. for(ix2=ix; ix2<n_phoneme_list; ix2++)
  642. {
  643. if(phoneme_list[ix2].type == phVOWEL)
  644. {
  645. p->pitch2 = phoneme_list[ix2].pitch2;
  646. break;
  647. }
  648. }
  649. }
  650. p->pitch1 = p->pitch2-16;
  651. if(p->pitch2 < 16)
  652. {
  653. p->pitch1 = 0;
  654. }
  655. p->env = PITCHfall;
  656. pre_voiced = 0;
  657. }
  658. break;
  659. case phVOWEL:
  660. min_drop = 0;
  661. next2 = &phoneme_list[ix+2];
  662. next3 = &phoneme_list[ix+3];
  663. if(stress > 7) stress = 7;
  664. if(stress <= 1)
  665. {
  666. stress = stress ^ 1; // swap diminished and unstressed (until we swap stress_amps,stress_lengths in tr_languages)
  667. }
  668. if(pre_sonorant)
  669. p->amp = tr->stress_amps[stress]-1;
  670. else
  671. p->amp = tr->stress_amps[stress];
  672. if(emphasized)
  673. p->amp = 25;
  674. if(ix >= (n_phoneme_list-3))
  675. {
  676. // last phoneme of a clause, limit its amplitude
  677. if(p->amp > tr->langopts.param[LOPT_MAXAMP_EOC])
  678. p->amp = tr->langopts.param[LOPT_MAXAMP_EOC];
  679. }
  680. // is the last syllable of a word ?
  681. more_syllables=0;
  682. end_of_clause = 0;
  683. for(p2 = p+1; p2->newword== 0; p2++)
  684. {
  685. if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
  686. more_syllables++;
  687. if(p2->ph->code == phonPAUSE_CLAUSE)
  688. end_of_clause = 2;
  689. }
  690. if(p2->ph->code == phonPAUSE_CLAUSE)
  691. end_of_clause = 2;
  692. if((p2->newword & 2) && (more_syllables==0))
  693. {
  694. end_of_clause = 2;
  695. }
  696. // calc length modifier
  697. if((next->ph->code == phonPAUSE_VSHORT) && (next2->type == phPAUSE))
  698. {
  699. // if PAUSE_VSHORT is followed by a pause, then use that
  700. next = next2;
  701. next2 = next3;
  702. next3 = &phoneme_list[ix+4];
  703. }
  704. if(more_syllables==0)
  705. {
  706. len = tr->langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod];
  707. if((next->newword) && (tr->langopts.word_gap & 0x20))
  708. {
  709. // consider as a pause + first phoneme of the next word
  710. length_mod = (len + tr->langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
  711. }
  712. else
  713. length_mod = len;
  714. }
  715. else
  716. {
  717. length_mod = tr->langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod];
  718. if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS))
  719. length_mod -= 15;
  720. }
  721. if(more_syllables==0)
  722. length_mod *= speed1;
  723. else
  724. if(more_syllables==1)
  725. length_mod *= speed2;
  726. else
  727. length_mod *= speed3;
  728. length_mod = length_mod / 128;
  729. if(length_mod < 8)
  730. length_mod = 8; // restrict how much lengths can be reduced
  731. if(stress >= 7)
  732. {
  733. // tonic syllable, include a constant component so it doesn't decrease directly with speed
  734. length_mod += tr->langopts.lengthen_tonic;
  735. if(emphasized)
  736. length_mod += (tr->langopts.lengthen_tonic/2);
  737. }
  738. else
  739. if(emphasized)
  740. {
  741. length_mod += tr->langopts.lengthen_tonic;
  742. }
  743. if((len = tr->stress_lengths[stress]) == 0)
  744. len = tr->stress_lengths[6];
  745. length_mod = length_mod * len;
  746. if(p->tone_ph != 0)
  747. {
  748. if((tone_mod = phoneme_tab[p->tone_ph]->std_length) > 0)
  749. {
  750. // a tone phoneme specifies a percentage change to the length
  751. length_mod = (length_mod * tone_mod) / 100;
  752. }
  753. }
  754. if((end_of_clause == 2) && !(tr->langopts.stress_flags & S_NO_EOC_LENGTHEN))
  755. {
  756. // this is the last syllable in the clause, lengthen it - more for short vowels
  757. len = (p->ph->std_length * 2);
  758. if(tr->langopts.stress_flags & S_EO_CLAUSE1)
  759. len=200; // don't lengthen short vowels more than long vowels at end-of-clause
  760. length_mod = length_mod * (256 + (280 - len)/3)/256;
  761. }
  762. if(length_mod > tr->langopts.max_lengthmod*speed1)
  763. {
  764. //limit the vowel length adjustment for some languages
  765. length_mod = (tr->langopts.max_lengthmod*speed1);
  766. }
  767. length_mod = length_mod / 128;
  768. if(p->type != phVOWEL)
  769. {
  770. length_mod = 256; // syllabic consonant
  771. min_drop = 16;
  772. }
  773. p->length = length_mod;
  774. if(p->env >= (N_ENVELOPE_DATA-1))
  775. {
  776. fprintf(stderr,"espeak: Bad intonation data\n");
  777. p->env = 0;
  778. }
  779. // pre-vocalic part
  780. // set last-pitch
  781. env2 = p->env + 1; // version for use with preceding semi-vowel
  782. if(p->tone_ph != 0)
  783. {
  784. InterpretPhoneme2(p->tone_ph, &phdata_tone);
  785. pitch_env = GetEnvelope(phdata_tone.pitch_env);
  786. }
  787. else
  788. {
  789. pitch_env = envelope_data[env2];
  790. }
  791. pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;
  792. if(pre_sonorant || pre_voiced)
  793. {
  794. // set pitch for pre-vocalic part
  795. if(pitch_start == 255)
  796. last_pitch = pitch_start; // pitch is not set
  797. if(pitch_start - last_pitch > 16)
  798. last_pitch = pitch_start - 16;
  799. prev->pitch1 = last_pitch;
  800. prev->pitch2 = pitch_start;
  801. if(last_pitch < pitch_start)
  802. {
  803. prev->env = PITCHrise;
  804. p->env = env2;
  805. }
  806. else
  807. {
  808. prev->env = PITCHfall;
  809. }
  810. prev->length = length_mod;
  811. prev->amp = p->amp;
  812. if((prev->type != phLIQUID) && (prev->amp > 18))
  813. prev->amp = 18;
  814. }
  815. // vowel & post-vocalic part
  816. next->synthflags &= ~SFLAG_SEQCONTINUE;
  817. if(next->type == phNASAL && next2->type != phVOWEL)
  818. next->synthflags |= SFLAG_SEQCONTINUE;
  819. if(next->type == phLIQUID)
  820. {
  821. next->synthflags |= SFLAG_SEQCONTINUE;
  822. if(next2->type == phVOWEL)
  823. {
  824. next->synthflags &= ~SFLAG_SEQCONTINUE;
  825. }
  826. if(next2->type != phVOWEL)
  827. {
  828. if(next->ph->mnemonic == ('/'*256+'r'))
  829. {
  830. next->synthflags &= ~SFLAG_SEQCONTINUE;
  831. // min_drop = 15;
  832. }
  833. }
  834. }
  835. if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop))
  836. {
  837. pitch1 = p->pitch2 - min_drop;
  838. if(pitch1 < 0)
  839. pitch1 = 0;
  840. p->pitch1 = pitch1;
  841. }
  842. last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
  843. pre_sonorant = 0;
  844. pre_voiced = 0;
  845. break;
  846. }
  847. }
  848. } // end of CalcLengths