eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

setlengths.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
  1. /*
  2. * Copyright (C) 2005 to 2011 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <errno.h>
  21. #include <stdint.h>
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <espeak-ng/espeak_ng.h>
  25. #include <espeak-ng/speak_lib.h>
  26. #include <espeak-ng/encoding.h>
  27. #include "readclause.h"
  28. #include "setlengths.h"
  29. #include "synthdata.h"
  30. #include "wavegen.h"
  31. #include "phoneme.h"
  32. #include "voice.h"
  33. #include "synthesize.h"
  34. #include "translate.h"
  35. extern int saved_parameters[];
  36. // convert from words-per-minute to internal speed factor
  37. // Use this to calibrate speed for wpm 80-450 (espeakRATE_MINIMUM - espeakRATE_MAXIMUM)
  38. static unsigned char speed_lookup[] = {
  39. 255, 255, 255, 255, 255, // 80
  40. 253, 249, 245, 242, 238, // 85
  41. 235, 232, 228, 225, 222, // 90
  42. 218, 216, 213, 210, 207, // 95
  43. 204, 201, 198, 196, 193, // 100
  44. 191, 188, 186, 183, 181, // 105
  45. 179, 176, 174, 172, 169, // 110
  46. 168, 165, 163, 161, 159, // 115
  47. 158, 155, 153, 152, 150, // 120
  48. 148, 146, 145, 143, 141, // 125
  49. 139, 137, 136, 135, 133, // 130
  50. 131, 130, 129, 127, 126, // 135
  51. 124, 123, 122, 120, 119, // 140
  52. 118, 117, 115, 114, 113, // 145
  53. 112, 111, 110, 109, 107, // 150
  54. 106, 105, 104, 103, 102, // 155
  55. 101, 100, 99, 98, 97, // 160
  56. 96, 95, 94, 93, 92, // 165
  57. 91, 90, 89, 89, 88, // 170
  58. 87, 86, 85, 84, 83, // 175
  59. 82, 82, 81, 80, 80, // 180
  60. 79, 78, 77, 76, 76, // 185
  61. 75, 75, 74, 73, 72, // 190
  62. 71, 71, 70, 69, 69, // 195
  63. 68, 67, 67, 66, 66, // 200
  64. 65, 64, 64, 63, 62, // 205
  65. 62, 61, 61, 60, 59, // 210
  66. 59, 58, 58, 57, 57, // 215
  67. 56, 56, 55, 54, 54, // 220
  68. 53, 53, 52, 52, 52, // 225
  69. 51, 50, 50, 49, 49, // 230
  70. 48, 48, 47, 47, 46, // 235
  71. 46, 46, 45, 45, 44, // 240
  72. 44, 44, 43, 43, 42, // 245
  73. 41, 40, 40, 40, 39, // 250
  74. 39, 39, 38, 38, 38, // 255
  75. 37, 37, 37, 36, 36, // 260
  76. 35, 35, 35, 35, 34, // 265
  77. 34, 34, 33, 33, 33, // 270
  78. 32, 32, 31, 31, 31, // 275
  79. 30, 30, 30, 29, 29, // 280
  80. 29, 29, 28, 28, 27, // 285
  81. 27, 27, 27, 26, 26, // 290
  82. 26, 26, 25, 25, 25, // 295
  83. 24, 24, 24, 24, 23, // 300
  84. 23, 23, 23, 22, 22, // 305
  85. 22, 21, 21, 21, 21, // 310
  86. 20, 20, 20, 20, 19, // 315
  87. 19, 19, 18, 18, 17, // 320
  88. 17, 17, 16, 16, 16, // 325
  89. 16, 16, 16, 15, 15, // 330
  90. 15, 15, 14, 14, 14, // 335
  91. 13, 13, 13, 12, 12, // 340
  92. 12, 12, 11, 11, 11, // 345
  93. 11, 10, 10, 10, 9, // 350
  94. 9, 9, 8, 8, 8, // 355
  95. };
  96. // speed_factor1 adjustments for speeds 350 to 374: pauses
  97. static unsigned char pause_factor_350[] = {
  98. 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, // 350
  99. 21, 20, 20, 19, 19, 18, 17, 16, 15, 15, // 360
  100. 15, 15, 15, 15, 15 // 370
  101. };
  102. // wav_factor adjustments for speeds 350 to 450
  103. // Use this to calibrate speed for wpm 350-450
  104. static unsigned char wav_factor_350[] = {
  105. 120, 121, 120, 119, 119, // 350
  106. 118, 118, 117, 116, 116, // 355
  107. 115, 114, 113, 112, 112, // 360
  108. 111, 111, 110, 109, 108, // 365
  109. 107, 106, 106, 104, 103, // 370
  110. 103, 102, 102, 102, 101, // 375
  111. 101, 99, 98, 98, 97, // 380
  112. 96, 96, 95, 94, 93, // 385
  113. 91, 90, 91, 90, 89, // 390
  114. 88, 86, 85, 86, 85, // 395
  115. 85, 84, 82, 81, 80, // 400
  116. 79, 77, 78, 78, 76, // 405
  117. 77, 75, 75, 74, 73, // 410
  118. 71, 72, 70, 69, 69, // 415
  119. 69, 67, 65, 64, 63, // 420
  120. 63, 63, 61, 61, 59, // 425
  121. 59, 59, 58, 56, 57, // 430
  122. 58, 56, 54, 53, 52, // 435
  123. 52, 53, 52, 52, 50, // 440
  124. 48, 47, 47, 45, 46, // 445
  125. 45 // 450
  126. };
  127. static int speed1 = 130;
  128. static int speed2 = 121;
  129. static int speed3 = 118;
  130. #if HAVE_SONIC_H
  131. void SetSpeed(int control)
  132. {
  133. int x;
  134. int s1;
  135. int wpm;
  136. int wpm2;
  137. int wpm_value;
  138. double sonic;
  139. speed.min_sample_len = espeakRATE_MAXIMUM;
  140. speed.lenmod_factor = 110; // controls the effect of FRFLAG_LEN_MOD reduce length change
  141. speed.lenmod2_factor = 100;
  142. speed.min_pause = 5;
  143. wpm = embedded_value[EMBED_S];
  144. if (control == 2)
  145. wpm = embedded_value[EMBED_S2];
  146. wpm_value = wpm;
  147. if (voice->speed_percent > 0)
  148. wpm = (wpm * voice->speed_percent)/100;
  149. if (control & 2)
  150. DoSonicSpeed(1 * 1024);
  151. if ((wpm_value >= espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) {
  152. wpm2 = wpm;
  153. wpm = espeakRATE_NORMAL;
  154. // set special eSpeak speed parameters for Sonic use
  155. // The eSpeak output will be speeded up by at least x2
  156. x = 73;
  157. if (control & 1) {
  158. speed1 = (x * voice->speedf1)/256;
  159. speed2 = (x * voice->speedf2)/256;
  160. speed3 = (x * voice->speedf3)/256;
  161. }
  162. if (control & 2) {
  163. sonic = ((double)wpm2)/wpm;
  164. DoSonicSpeed((int)(sonic * 1024));
  165. speed.pause_factor = 85;
  166. speed.clause_pause_factor = espeakRATE_MINIMUM;
  167. speed.min_pause = 22;
  168. speed.min_sample_len = espeakRATE_MAXIMUM*2;
  169. speed.wav_factor = 211;
  170. speed.lenmod_factor = 210;
  171. speed.lenmod2_factor = 170;
  172. }
  173. return;
  174. }
  175. if (wpm > espeakRATE_MAXIMUM)
  176. wpm = espeakRATE_MAXIMUM;
  177. wpm2 = wpm;
  178. if (wpm > 359) wpm2 = 359;
  179. if (wpm < espeakRATE_MINIMUM) wpm2 = espeakRATE_MINIMUM;
  180. x = speed_lookup[wpm2-espeakRATE_MINIMUM];
  181. if (wpm >= 380)
  182. x = 7;
  183. if (wpm >= 400)
  184. x = 6;
  185. if (control & 1) {
  186. // set speed factors for different syllable positions within a word
  187. // these are used in CalcLengths()
  188. speed1 = (x * voice->speedf1)/256;
  189. speed2 = (x * voice->speedf2)/256;
  190. speed3 = (x * voice->speedf3)/256;
  191. if (x <= 7) {
  192. speed1 = x;
  193. speed2 = speed3 = x - 1;
  194. }
  195. }
  196. if (control & 2) {
  197. // these are used in synthesis file
  198. if (wpm > 350) {
  199. speed.lenmod_factor = 85 - (wpm - 350) / 3;
  200. speed.lenmod2_factor = 60 - (wpm - 350) / 8;
  201. } else if (wpm > 250) {
  202. speed.lenmod_factor = 110 - (wpm - 250)/4;
  203. speed.lenmod2_factor = 110 - (wpm - 250)/2;
  204. }
  205. s1 = (x * voice->speedf1)/256;
  206. if (wpm >= 170)
  207. speed.wav_factor = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  208. else
  209. speed.wav_factor = 128 + (128*s1)/130; // = 215 at 170 wpm
  210. if (wpm >= 350)
  211. speed.wav_factor = wav_factor_350[wpm-350];
  212. if (wpm >= 390) {
  213. speed.min_sample_len = espeakRATE_MAXIMUM - (wpm - 400)/2;
  214. if (wpm > 440)
  215. speed.min_sample_len = 420 - (wpm - 440);
  216. }
  217. // adjust for different sample rates
  218. speed.min_sample_len = (speed.min_sample_len * samplerate_native) / 22050;
  219. speed.pause_factor = (256 * s1)/115; // full speed adjustment, used for pause length
  220. speed.clause_pause_factor = 0;
  221. if (wpm > 430)
  222. speed.pause_factor = 12;
  223. else if (wpm > 400)
  224. speed.pause_factor = 13;
  225. else if (wpm > 374)
  226. speed.pause_factor = 14;
  227. else if (wpm > 350)
  228. speed.pause_factor = pause_factor_350[wpm - 350];
  229. if (speed.clause_pause_factor == 0) {
  230. // restrict the reduction of pauses between clauses
  231. if ((speed.clause_pause_factor = speed.pause_factor) < 16)
  232. speed.clause_pause_factor = 16;
  233. }
  234. }
  235. }
  236. #else
  237. void SetSpeed(int control)
  238. {
  239. // This is the earlier version of SetSpeed() before sonic speed-up was added
  240. int x;
  241. int s1;
  242. int wpm;
  243. int wpm2;
  244. speed.min_sample_len = espeakRATE_MAXIMUM;
  245. speed.lenmod_factor = 110; // controls the effect of FRFLAG_LEN_MOD reduce length change
  246. speed.lenmod2_factor = 100;
  247. wpm = embedded_value[EMBED_S];
  248. if (control == 2)
  249. wpm = embedded_value[EMBED_S2];
  250. if (voice->speed_percent > 0)
  251. wpm = (wpm * voice->speed_percent)/100;
  252. if (wpm > espeakRATE_MAXIMUM)
  253. wpm = espeakRATE_MAXIMUM;
  254. wpm2 = wpm;
  255. if (wpm > 359) wpm2 = 359;
  256. if (wpm < espeakRATE_MINIMUM) wpm2 = espeakRATE_MINIMUM;
  257. x = speed_lookup[wpm2-espeakRATE_MINIMUM];
  258. if (wpm >= 380)
  259. x = 7;
  260. if (wpm >= 400)
  261. x = 6;
  262. if (control & 1) {
  263. // set speed factors for different syllable positions within a word
  264. // these are used in CalcLengths()
  265. speed1 = (x * voice->speedf1)/256;
  266. speed2 = (x * voice->speedf2)/256;
  267. speed3 = (x * voice->speedf3)/256;
  268. if (x <= 7) {
  269. speed1 = x;
  270. speed2 = speed3 = x - 1;
  271. }
  272. }
  273. if (control & 2) {
  274. // these are used in synthesis file
  275. if (wpm > 350) {
  276. speed.lenmod_factor = 85 - (wpm - 350) / 3;
  277. speed.lenmod2_factor = 60 - (wpm - 350) / 8;
  278. } else if (wpm > 250) {
  279. speed.lenmod_factor = 110 - (wpm - 250)/4;
  280. speed.lenmod2_factor = 110 - (wpm - 250)/2;
  281. }
  282. s1 = (x * voice->speedf1)/256;
  283. if (wpm >= 170)
  284. speed.wav_factor = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  285. else
  286. speed.wav_factor = 128 + (128*s1)/130; // = 215 at 170 wpm
  287. if (wpm >= 350)
  288. speed.wav_factor = wav_factor_350[wpm-350];
  289. if (wpm >= 390) {
  290. speed.min_sample_len = espeakRATE_MAXIMUM - (wpm - 400)/2;
  291. if (wpm > 440)
  292. speed.min_sample_len = 420 - (wpm - 440);
  293. }
  294. speed.pause_factor = (256 * s1)/115; // full speed adjustment, used for pause length
  295. speed.clause_pause_factor = 0;
  296. if (wpm > 430)
  297. speed.pause_factor = 12;
  298. else if (wpm > 400)
  299. speed.pause_factor = 13;
  300. else if (wpm > 374)
  301. speed.pause_factor = 14;
  302. else if (wpm > 350)
  303. speed.pause_factor = pause_factor_350[wpm - 350];
  304. if (speed.clause_pause_factor == 0) {
  305. // restrict the reduction of pauses between clauses
  306. if ((speed.clause_pause_factor = speed.pause_factor) < 16)
  307. speed.clause_pause_factor = 16;
  308. }
  309. }
  310. }
  311. #endif
  312. espeak_ng_STATUS SetParameter(int parameter, int value, int relative)
  313. {
  314. // parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
  315. // relative 0=absolute 1=relative
  316. int new_value = value;
  317. int default_value;
  318. extern const int param_defaults[N_SPEECH_PARAM];
  319. if (relative) {
  320. if (parameter < 5) {
  321. default_value = param_defaults[parameter];
  322. new_value = default_value + (default_value * value)/100;
  323. }
  324. }
  325. param_stack[0].parameter[parameter] = new_value;
  326. saved_parameters[parameter] = new_value;
  327. switch (parameter)
  328. {
  329. case espeakRATE:
  330. embedded_value[EMBED_S] = new_value;
  331. embedded_value[EMBED_S2] = new_value;
  332. SetSpeed(3);
  333. break;
  334. case espeakVOLUME:
  335. embedded_value[EMBED_A] = new_value;
  336. GetAmplitude();
  337. break;
  338. case espeakPITCH:
  339. if (new_value > 99) new_value = 99;
  340. if (new_value < 0) new_value = 0;
  341. embedded_value[EMBED_P] = new_value;
  342. break;
  343. case espeakRANGE:
  344. if (new_value > 99) new_value = 99;
  345. embedded_value[EMBED_R] = new_value;
  346. break;
  347. case espeakLINELENGTH:
  348. option_linelength = new_value;
  349. break;
  350. case espeakWORDGAP:
  351. option_wordgap = new_value;
  352. break;
  353. case espeakINTONATION:
  354. if ((new_value & 0xff) != 0)
  355. translator->langopts.intonation_group = new_value & 0xff;
  356. option_tone_flags = new_value;
  357. break;
  358. default:
  359. return EINVAL;
  360. }
  361. return ENS_OK;
  362. }
  363. static void DoEmbedded2(int *embix)
  364. {
  365. // There were embedded commands in the text at this point
  366. unsigned int word;
  367. do {
  368. word = embedded_list[(*embix)++];
  369. if ((word & 0x1f) == EMBED_S) {
  370. // speed
  371. SetEmbedded(word & 0x7f, word >> 8); // adjusts embedded_value[EMBED_S]
  372. SetSpeed(1);
  373. }
  374. } while ((word & 0x80) == 0);
  375. }
  376. void CalcLengths(Translator *tr)
  377. {
  378. int ix;
  379. int ix2;
  380. PHONEME_LIST *prev;
  381. PHONEME_LIST *next;
  382. PHONEME_LIST *next2;
  383. PHONEME_LIST *next3;
  384. PHONEME_LIST *p;
  385. PHONEME_LIST *p2;
  386. int stress;
  387. int type;
  388. static int more_syllables = 0;
  389. bool pre_sonorant = false;
  390. bool pre_voiced = false;
  391. int last_pitch = 0;
  392. int pitch_start;
  393. int length_mod;
  394. int next2type;
  395. int len;
  396. int env2;
  397. int end_of_clause;
  398. int embedded_ix = 0;
  399. int min_drop;
  400. int pitch1;
  401. int emphasized;
  402. int tone_mod;
  403. unsigned char *pitch_env = NULL;
  404. PHONEME_DATA phdata_tone;
  405. for (ix = 1; ix < n_phoneme_list; ix++) {
  406. prev = &phoneme_list[ix-1];
  407. p = &phoneme_list[ix];
  408. stress = p->stresslevel & 0x7;
  409. emphasized = p->stresslevel & 0x8;
  410. next = &phoneme_list[ix+1];
  411. if (p->synthflags & SFLAG_EMBEDDED)
  412. DoEmbedded2(&embedded_ix);
  413. type = p->type;
  414. if (p->synthflags & SFLAG_SYLLABLE)
  415. type = phVOWEL;
  416. switch (type)
  417. {
  418. case phPAUSE:
  419. last_pitch = 0;
  420. break;
  421. case phSTOP:
  422. last_pitch = 0;
  423. if (prev->type == phFRICATIVE)
  424. p->prepause = 25;
  425. else if ((more_syllables > 0) || (stress < 4))
  426. p->prepause = 48;
  427. else
  428. p->prepause = 60;
  429. if (prev->type == phSTOP)
  430. p->prepause = 60;
  431. if ((tr->langopts.word_gap & 0x10) && (p->newword))
  432. p->prepause = 60;
  433. if (p->ph->phflags & phLENGTHENSTOP)
  434. p->prepause += 30;
  435. if (p->synthflags & SFLAG_LENGTHEN)
  436. p->prepause += tr->langopts.long_stop;
  437. break;
  438. case phVFRICATIVE:
  439. case phFRICATIVE:
  440. if (p->newword) {
  441. if ((prev->type == phVOWEL) && (p->ph->phflags & phNOPAUSE)) {
  442. } else
  443. p->prepause = 15;
  444. }
  445. if (next->type == phPAUSE && prev->type == phNASAL && !(p->ph->phflags&phVOICELESS))
  446. p->prepause = 25;
  447. if (prev->ph->phflags & phBRKAFTER)
  448. p->prepause = 30;
  449. if ((tr->langopts.word_gap & 0x10) && (p->newword))
  450. p->prepause = 30;
  451. if ((p->ph->phflags & phSIBILANT) && next->type == phSTOP && !next->newword) {
  452. if (prev->type == phVOWEL)
  453. p->length = 200; // ?? should do this if it's from a prefix
  454. else
  455. p->length = 150;
  456. } else
  457. p->length = 256;
  458. if (type == phVFRICATIVE) {
  459. if (next->type == phVOWEL)
  460. pre_voiced = true;
  461. if ((prev->type == phVOWEL) || (prev->type == phLIQUID))
  462. p->length = (255 + prev->length)/2;
  463. }
  464. break;
  465. case phVSTOP:
  466. if (prev->type == phVFRICATIVE || prev->type == phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
  467. p->prepause = 30;
  468. if (next->type == phVOWEL || next->type == phLIQUID) {
  469. if ((next->type == phVOWEL) || !next->newword)
  470. pre_voiced = true;
  471. p->prepause = 40;
  472. if (prev->type == phVOWEL) {
  473. p->prepause = 0; // use murmur instead to link from the preceding vowel
  474. } else if (prev->type == phPAUSE) {
  475. // reduce by the length of the preceding pause
  476. if (prev->length < p->prepause)
  477. p->prepause -= prev->length;
  478. else
  479. p->prepause = 0;
  480. } else if (p->newword == 0) {
  481. if (prev->type == phLIQUID)
  482. p->prepause = 20;
  483. if (prev->type == phNASAL)
  484. p->prepause = 12;
  485. if (prev->type == phSTOP && !(prev->ph->phflags & phVOICELESS))
  486. p->prepause = 0;
  487. }
  488. }
  489. if ((tr->langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
  490. p->prepause = 20;
  491. break;
  492. case phLIQUID:
  493. case phNASAL:
  494. p->amp = tr->stress_amps[0]; // unless changed later
  495. p->length = 256; // TEMPORARY
  496. if (p->newword) {
  497. if (prev->type == phLIQUID)
  498. p->prepause = 25;
  499. if (prev->type == phVOWEL) {
  500. if (!(p->ph->phflags & phNOPAUSE))
  501. p->prepause = 12;
  502. }
  503. }
  504. if (next->type == phVOWEL)
  505. pre_sonorant = true;
  506. else {
  507. p->pitch2 = last_pitch;
  508. if ((prev->type == phVOWEL) || (prev->type == phLIQUID)) {
  509. p->length = prev->length;
  510. if (p->type == phLIQUID)
  511. p->length = speed1;
  512. if (next->type == phVSTOP)
  513. p->length = (p->length * 160)/100;
  514. if (next->type == phVFRICATIVE)
  515. p->length = (p->length * 120)/100;
  516. } else {
  517. for (ix2 = ix; ix2 < n_phoneme_list; ix2++) {
  518. if (phoneme_list[ix2].type == phVOWEL) {
  519. p->pitch2 = phoneme_list[ix2].pitch2;
  520. break;
  521. }
  522. }
  523. }
  524. p->pitch1 = p->pitch2-16;
  525. if (p->pitch2 < 16)
  526. p->pitch1 = 0;
  527. p->env = PITCHfall;
  528. pre_voiced = false;
  529. }
  530. break;
  531. case phVOWEL:
  532. min_drop = 0;
  533. next2 = &phoneme_list[ix+2];
  534. next3 = &phoneme_list[ix+3];
  535. if (stress > 7) stress = 7;
  536. if (stress <= 1)
  537. stress = stress ^ 1; // swap diminished and unstressed (until we swap stress_amps,stress_lengths in tr_languages)
  538. if (pre_sonorant)
  539. p->amp = tr->stress_amps[stress]-1;
  540. else
  541. p->amp = tr->stress_amps[stress];
  542. if (emphasized)
  543. p->amp = 25;
  544. if (ix >= (n_phoneme_list-3)) {
  545. // last phoneme of a clause, limit its amplitude
  546. if (p->amp > tr->langopts.param[LOPT_MAXAMP_EOC])
  547. p->amp = tr->langopts.param[LOPT_MAXAMP_EOC];
  548. }
  549. // is the last syllable of a word ?
  550. more_syllables = 0;
  551. end_of_clause = 0;
  552. for (p2 = p+1; p2->newword == 0; p2++) {
  553. if ((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
  554. more_syllables++;
  555. if (p2->ph->code == phonPAUSE_CLAUSE)
  556. end_of_clause = 2;
  557. }
  558. if (p2->ph->code == phonPAUSE_CLAUSE)
  559. end_of_clause = 2;
  560. if ((p2->newword & PHLIST_END_OF_CLAUSE) && (more_syllables == 0))
  561. end_of_clause = 2;
  562. // calc length modifier
  563. if ((next->ph->code == phonPAUSE_VSHORT) && (next2->type == phPAUSE)) {
  564. // if PAUSE_VSHORT is followed by a pause, then use that
  565. next = next2;
  566. next2 = next3;
  567. next3 = &phoneme_list[ix+4];
  568. }
  569. next2type = next2->ph->length_mod;
  570. if (more_syllables == 0) {
  571. if (next->newword || next2->newword) {
  572. // don't use 2nd phoneme over a word boundary, unless it's a pause
  573. if (next2type != 1)
  574. next2type = 0;
  575. }
  576. len = tr->langopts.length_mods0[next2type *10+ next->ph->length_mod];
  577. if ((next->newword) && (tr->langopts.word_gap & 0x20)) {
  578. // consider as a pause + first phoneme of the next word
  579. length_mod = (len + tr->langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
  580. } else
  581. length_mod = len;
  582. } else {
  583. length_mod = tr->langopts.length_mods[next2type *10+ next->ph->length_mod];
  584. if ((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phVOICELESS))
  585. length_mod -= 15;
  586. }
  587. if (more_syllables == 0)
  588. length_mod *= speed1;
  589. else if (more_syllables == 1)
  590. length_mod *= speed2;
  591. else
  592. length_mod *= speed3;
  593. length_mod = length_mod / 128;
  594. if (length_mod < 8)
  595. length_mod = 8; // restrict how much lengths can be reduced
  596. if (stress >= 7) {
  597. // tonic syllable, include a constant component so it doesn't decrease directly with speed
  598. length_mod += tr->langopts.lengthen_tonic;
  599. if (emphasized)
  600. length_mod += (tr->langopts.lengthen_tonic/2);
  601. } else if (emphasized)
  602. length_mod += tr->langopts.lengthen_tonic;
  603. if ((len = tr->stress_lengths[stress]) == 0)
  604. len = tr->stress_lengths[6];
  605. length_mod = length_mod * len;
  606. if (p->tone_ph != 0) {
  607. if ((tone_mod = phoneme_tab[p->tone_ph]->std_length) > 0) {
  608. // a tone phoneme specifies a percentage change to the length
  609. length_mod = (length_mod * tone_mod) / 100;
  610. }
  611. }
  612. if ((end_of_clause == 2) && !(tr->langopts.stress_flags & S_NO_EOC_LENGTHEN)) {
  613. // this is the last syllable in the clause, lengthen it - more for short vowels
  614. len = (p->ph->std_length * 2);
  615. if (tr->langopts.stress_flags & S_EO_CLAUSE1)
  616. len = 200; // don't lengthen short vowels more than long vowels at end-of-clause
  617. length_mod = length_mod * (256 + (280 - len)/3)/256;
  618. }
  619. if (length_mod > tr->langopts.max_lengthmod*speed1) {
  620. // limit the vowel length adjustment for some languages
  621. length_mod = (tr->langopts.max_lengthmod*speed1);
  622. }
  623. length_mod = length_mod / 128;
  624. if (p->type != phVOWEL) {
  625. length_mod = 256; // syllabic consonant
  626. min_drop = 16;
  627. }
  628. p->length = length_mod;
  629. if (p->env >= (N_ENVELOPE_DATA-1)) {
  630. fprintf(stderr, "espeak: Bad intonation data\n");
  631. p->env = 0;
  632. }
  633. // pre-vocalic part
  634. // set last-pitch
  635. env2 = p->env + 1; // version for use with preceding semi-vowel
  636. if (p->tone_ph != 0) {
  637. InterpretPhoneme2(p->tone_ph, &phdata_tone);
  638. pitch_env = GetEnvelope(phdata_tone.pitch_env);
  639. } else
  640. pitch_env = envelope_data[env2];
  641. pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;
  642. if (pre_sonorant || pre_voiced) {
  643. // set pitch for pre-vocalic part
  644. if (pitch_start == 255)
  645. last_pitch = pitch_start; // pitch is not set
  646. if (pitch_start - last_pitch > 16)
  647. last_pitch = pitch_start - 16;
  648. prev->pitch1 = last_pitch;
  649. prev->pitch2 = pitch_start;
  650. if (last_pitch < pitch_start) {
  651. prev->env = PITCHrise;
  652. p->env = env2;
  653. } else
  654. prev->env = PITCHfall;
  655. prev->length = length_mod;
  656. prev->amp = p->amp;
  657. if ((prev->type != phLIQUID) && (prev->amp > 18))
  658. prev->amp = 18;
  659. }
  660. // vowel & post-vocalic part
  661. next->synthflags &= ~SFLAG_SEQCONTINUE;
  662. if (next->type == phNASAL && next2->type != phVOWEL)
  663. next->synthflags |= SFLAG_SEQCONTINUE;
  664. if (next->type == phLIQUID) {
  665. next->synthflags |= SFLAG_SEQCONTINUE;
  666. if (next2->type == phVOWEL)
  667. next->synthflags &= ~SFLAG_SEQCONTINUE;
  668. if (next2->type != phVOWEL) {
  669. if (next->ph->mnemonic == ('/'*256+'r'))
  670. next->synthflags &= ~SFLAG_SEQCONTINUE;
  671. }
  672. }
  673. if ((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop)) {
  674. pitch1 = p->pitch2 - min_drop;
  675. if (pitch1 < 0)
  676. pitch1 = 0;
  677. p->pitch1 = pitch1;
  678. }
  679. last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
  680. pre_sonorant = false;
  681. pre_voiced = false;
  682. break;
  683. }
  684. }
  685. }
  686. // Tables of the relative lengths of vowels, depending on the
  687. // type of the two phonemes that follow
  688. // indexes are the "length_mod" value for the following phonemes
  689. // use this table if vowel is not the last in the word
  690. static unsigned char length_mods_en[100] = {
  691. // a , t s n d z r N <- next
  692. 100, 120, 100, 105, 100, 110, 110, 100, 95, 100, // a <- next2
  693. 105, 120, 105, 110, 125, 130, 135, 115, 125, 100, // ,
  694. 105, 120, 75, 100, 75, 105, 120, 85, 75, 100, // t
  695. 105, 120, 85, 105, 95, 115, 120, 100, 95, 100, // s
  696. 110, 120, 95, 105, 100, 115, 120, 100, 100, 100, // n
  697. 105, 120, 100, 105, 95, 115, 120, 110, 95, 100, // d
  698. 105, 120, 100, 105, 105, 122, 125, 110, 105, 100, // z
  699. 105, 120, 100, 105, 105, 122, 125, 110, 105, 100, // r
  700. 105, 120, 95, 105, 100, 115, 120, 110, 100, 100, // N
  701. 100, 120, 100, 100, 100, 100, 100, 100, 100, 100
  702. };
  703. // as above, but for the last syllable in a word
  704. static unsigned char length_mods_en0[100] = {
  705. // a , t s n d z r N <- next
  706. 100, 150, 100, 105, 110, 115, 110, 110, 110, 100, // a <- next2
  707. 105, 150, 105, 110, 125, 135, 140, 115, 135, 100, // ,
  708. 105, 150, 90, 105, 90, 122, 135, 100, 90, 100, // t
  709. 105, 150, 100, 105, 100, 122, 135, 100, 100, 100, // s
  710. 105, 150, 100, 105, 105, 115, 135, 110, 105, 100, // n
  711. 105, 150, 100, 105, 105, 122, 130, 120, 125, 100, // d
  712. 105, 150, 100, 105, 110, 122, 125, 115, 110, 100, // z
  713. 105, 150, 100, 105, 105, 122, 135, 120, 105, 100, // r
  714. 105, 150, 100, 105, 105, 115, 135, 110, 105, 100, // N
  715. 100, 100, 100, 100, 100, 100, 100, 100, 100, 100
  716. };
  717. static unsigned char length_mods_equal[100] = {
  718. // a , t s n d z r N <- next
  719. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // a <- next2
  720. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // ,
  721. 110, 120, 100, 110, 100, 110, 110, 110, 100, 110, // t
  722. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // s
  723. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // n
  724. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // d
  725. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // z
  726. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // r
  727. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110, // N
  728. 110, 120, 100, 110, 110, 110, 110, 110, 110, 110
  729. };
  730. static unsigned char *length_mod_tabs[6] = {
  731. length_mods_en,
  732. length_mods_en, // 1
  733. length_mods_en0, // 2
  734. length_mods_equal, // 3
  735. length_mods_equal, // 4
  736. length_mods_equal // 5
  737. };
  738. void SetLengthMods(Translator *tr, int value)
  739. {
  740. int value2;
  741. tr->langopts.length_mods0 = tr->langopts.length_mods = length_mod_tabs[value % 100];
  742. if ((value2 = value / 100) != 0)
  743. tr->langopts.length_mods0 = length_mod_tabs[value2];
  744. }