eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phonemelist.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2013 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "translate.h"
  28. const unsigned char pause_phonemes[8] = {0, phonPAUSE_VSHORT, phonPAUSE_SHORT, phonPAUSE, phonPAUSE_LONG, phonGLOTTALSTOP, phonPAUSE_LONG, phonPAUSE_LONG};
  29. extern int n_ph_list2;
  30. extern PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes
  31. static int SubstitutePhonemes(Translator *tr, PHONEME_LIST *plist_out)
  32. {//===================================================================
  33. // Copy the phonemes list and perform any substitutions that are required for the
  34. // current voice
  35. int ix;
  36. int k;
  37. int replace_flags;
  38. int n_plist_out = 0;
  39. int word_end;
  40. PHONEME_LIST2 *plist2;
  41. PHONEME_TAB *next=NULL;
  42. for(ix=0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++)
  43. {
  44. plist2 = &ph_list2[ix];
  45. // don't do any substitution if the language has been temporarily changed
  46. if(!(plist2->synthflags & SFLAG_SWITCHED_LANG))
  47. {
  48. if(ix < (n_ph_list2 -1))
  49. next = phoneme_tab[ph_list2[ix+1].phcode];
  50. word_end = 0;
  51. if((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  52. word_end = 1; // this phoneme is the end of a word
  53. // check whether a Voice has specified that we should replace this phoneme
  54. for(k=0; k<n_replace_phonemes; k++)
  55. {
  56. if(plist2->phcode == replace_phonemes[k].old_ph)
  57. {
  58. replace_flags = replace_phonemes[k].type;
  59. if((replace_flags & 1) && (word_end == 0))
  60. continue; // this replacement only occurs at the end of a word
  61. if((replace_flags & 2) && ((plist2->stresslevel & 0x7) > 3))
  62. continue; // this replacement doesn't occur in stressed syllables
  63. if((replace_flags & 4) && (plist2->sourceix == 0))
  64. continue; // this replacement only occurs at the start of a word
  65. // substitute the replacement phoneme
  66. plist2->phcode = replace_phonemes[k].new_ph;
  67. if((plist2->stresslevel > 1) && (phoneme_tab[plist2->phcode]->phflags & phUNSTRESSED))
  68. plist2->stresslevel = 0; // the replacement must be unstressed
  69. break;
  70. }
  71. }
  72. if(plist2->phcode == 0)
  73. {
  74. continue; // phoneme has been replaced by NULL, so don't copy it
  75. }
  76. }
  77. // copy phoneme into the output list
  78. memcpy(&plist_out[n_plist_out],plist2,sizeof(PHONEME_LIST2));
  79. plist_out[n_plist_out].ph = phoneme_tab[plist2->phcode];
  80. plist_out[n_plist_out].type = plist_out[n_plist_out].ph->type;
  81. n_plist_out++;
  82. }
  83. return(n_plist_out);
  84. } // end of SubstitutePhonemes
  85. void MakePhonemeList(Translator *tr, int post_pause, int start_sentence)
  86. {//=====================================================================
  87. int ix=0;
  88. int j;
  89. int insert_ph = 0;
  90. PHONEME_LIST *phlist;
  91. PHONEME_TAB *ph;
  92. PHONEME_TAB *next, *next2;
  93. int unstress_count = 0;
  94. int word_stress = 0;
  95. int current_phoneme_tab;
  96. int max_stress;
  97. int voicing;
  98. int regression;
  99. int end_sourceix;
  100. int alternative;
  101. int delete_count;
  102. int word_start;
  103. int inserted;
  104. int deleted;
  105. PHONEME_DATA phdata;
  106. int n_ph_list3;
  107. PHONEME_LIST *plist3;
  108. PHONEME_LIST *plist3_inserted = NULL;
  109. PHONEME_LIST ph_list3[N_PHONEME_LIST];
  110. PHONEME_LIST2 *plist2;
  111. WORD_PH_DATA worddata;
  112. memset(&worddata, 0, sizeof(worddata));
  113. plist2 = ph_list2;
  114. phlist = phoneme_list;
  115. end_sourceix = plist2[n_ph_list2-1].sourceix;
  116. // is the last word of the clause unstressed ?
  117. max_stress = 0;
  118. for(j = n_ph_list2-3; j>=0; j--)
  119. {
  120. // start with the last phoneme (before the terminating pauses) and move backwards
  121. if((plist2[j].stresslevel & 0x7f) > max_stress)
  122. max_stress = plist2[j].stresslevel & 0x7f;
  123. if(plist2[j].sourceix != 0)
  124. break;
  125. }
  126. if(max_stress < 4)
  127. {
  128. // the last word is unstressed, look for a previous word that can be stressed
  129. while(--j >= 0)
  130. {
  131. if(plist2[j].synthflags & SFLAG_PROMOTE_STRESS) // dictionary flags indicated that this stress can be promoted
  132. {
  133. plist2[j].stresslevel = 4; // promote to stressed
  134. break;
  135. }
  136. if(plist2[j].stresslevel >= 4)
  137. {
  138. // found a stressed syllable, so stop looking
  139. break;
  140. }
  141. }
  142. }
  143. // look for switch of phoneme tables
  144. delete_count = 0;
  145. current_phoneme_tab = tr->phoneme_tab_ix;
  146. for(j = 0; j < n_ph_list2; j++)
  147. {
  148. if(current_phoneme_tab != tr->phoneme_tab_ix)
  149. {
  150. plist2[j].synthflags |= SFLAG_SWITCHED_LANG;
  151. }
  152. if(delete_count > 0)
  153. {
  154. memcpy(&plist2[j-delete_count], &plist2[j], sizeof(plist2[0]));
  155. }
  156. if(plist2[j].phcode == phonSWITCH)
  157. {
  158. if((!(plist2[j].synthflags & SFLAG_EMBEDDED)) && (
  159. (plist2[j].tone_ph == current_phoneme_tab) ||
  160. (plist2[j+1].phcode == phonSWITCH) ||
  161. ((plist2[j+1].phcode == phonPAUSE) && (plist2[j+2].phcode == phonSWITCH))
  162. ))
  163. {
  164. // delete this phonSWITCH if it's switching to the current phoneme table, or
  165. // delete this phonSWITCH if its followed by another phonSWITCH
  166. delete_count++;
  167. }
  168. else
  169. {
  170. current_phoneme_tab = plist2[j].tone_ph;
  171. }
  172. }
  173. }
  174. n_ph_list2 -= delete_count;
  175. if((regression = tr->langopts.param[LOPT_REGRESSIVE_VOICING]) != 0)
  176. {
  177. // set consonant clusters to all voiced or all unvoiced
  178. // Regressive
  179. int type;
  180. int stop_propagation = 0;
  181. voicing = 0;
  182. for(j=n_ph_list2-1; j>=0; j--)
  183. {
  184. ph = phoneme_tab[plist2[j].phcode];
  185. if(ph == NULL)
  186. continue;
  187. if(plist2[j].synthflags & SFLAG_SWITCHED_LANG)
  188. {
  189. stop_propagation = 0;
  190. voicing = 0;
  191. if(regression & 0x100)
  192. voicing = 1; // word-end devoicing
  193. continue;
  194. }
  195. type = ph->type;
  196. if(regression & 0x2)
  197. {
  198. // [v] amd [v;] don't cause regression, or [R^]
  199. if(((ph->mnemonic & 0xff) == 'v') || ((ph->mnemonic & 0xff)== 'R'))
  200. {
  201. stop_propagation = 1;
  202. if(regression & 0x10)
  203. voicing = 0;
  204. }
  205. }
  206. if((type==phSTOP) || type==(phFRICATIVE))
  207. {
  208. if((voicing==0) && (regression & 0xf))
  209. {
  210. voicing = 1;
  211. }
  212. else if((voicing==2) && (ph->end_type != 0)) // use end_type field for voicing_switch for consonants
  213. {
  214. plist2[j].phcode = ph->end_type; // change to voiced equivalent
  215. }
  216. }
  217. else if((type==phVSTOP) || type==(phVFRICATIVE))
  218. {
  219. if((voicing==0) && (regression & 0xf))
  220. {
  221. voicing = 2;
  222. }
  223. else if((voicing==1) && (ph->end_type != 0))
  224. {
  225. plist2[j].phcode = ph->end_type; // change to unvoiced equivalent
  226. }
  227. }
  228. else
  229. {
  230. if(regression & 0x8)
  231. {
  232. // LANG=Polish, propagate through liquids and nasals
  233. if((type == phPAUSE) || (type == phVOWEL))
  234. voicing = 0;
  235. }
  236. else
  237. {
  238. voicing = 0;
  239. }
  240. }
  241. if(stop_propagation)
  242. {
  243. voicing = 0;
  244. stop_propagation = 0;
  245. }
  246. if(plist2[j].sourceix)
  247. {
  248. if(regression & 0x04)
  249. {
  250. // stop propagation at a word boundary
  251. voicing = 0;
  252. }
  253. if(regression & 0x100)
  254. {
  255. // devoice word-final consonants, unless propagating voiced
  256. if(voicing == 0)
  257. {
  258. voicing = 1;
  259. }
  260. }
  261. }
  262. }
  263. }
  264. n_ph_list3 = SubstitutePhonemes(tr,ph_list3) - 2;
  265. for(j=0; (j < n_ph_list3) && (ix < N_PHONEME_LIST-3);)
  266. {
  267. if(ph_list3[j].sourceix)
  268. {
  269. // start of a word
  270. int k;
  271. int nextw;
  272. word_stress = 0;
  273. // find the highest stress level in this word
  274. for(nextw=j; nextw < n_ph_list3;)
  275. {
  276. if(ph_list3[nextw].stresslevel > word_stress)
  277. word_stress = ph_list3[nextw].stresslevel;
  278. nextw++;
  279. if(ph_list3[nextw].sourceix)
  280. break; // start of the next word
  281. }
  282. for(k=j; k<nextw; k++)
  283. {
  284. ph_list3[k].wordstress = word_stress;
  285. }
  286. j = nextw;
  287. }
  288. else
  289. {
  290. j++;
  291. }
  292. }
  293. // transfer all the phonemes of the clause into phoneme_list
  294. ph = phoneme_tab[phonPAUSE];
  295. ph_list3[0].ph = ph;
  296. word_start = 1;
  297. for(j=0; insert_ph || ((j < n_ph_list3) && (ix < N_PHONEME_LIST-3)); j++)
  298. {
  299. plist3 = &ph_list3[j];
  300. inserted = 0;
  301. deleted = 0;
  302. if(insert_ph != 0)
  303. {
  304. // we have a (linking) phoneme which we need to insert here
  305. next = phoneme_tab[plist3->phcode]; // this phoneme, i.e. after the insert
  306. // re-use the previous entry for the inserted phoneme.
  307. // That's OK because we don't look backwards from plist3 *** but CountVowelPosition() and isAfterStress does !!!
  308. j--;
  309. plist3 = plist3_inserted = &ph_list3[j];
  310. if(j > 0)
  311. {
  312. // move all previous phonemes in the word back one place
  313. int k;
  314. if(word_start > 0)
  315. {
  316. k = word_start;
  317. word_start--;
  318. }
  319. else
  320. {
  321. k = 2; // No more space, don't loose the start of word mark at ph_list2[word_start]
  322. }
  323. for(; k<=j; k++)
  324. memcpy(&ph_list3[k-1], &ph_list3[k], sizeof(*plist3));
  325. }
  326. memset(&plist3[0], 0, sizeof(*plist3));
  327. plist3->phcode = insert_ph;
  328. ph = phoneme_tab[insert_ph];
  329. plist3->ph = ph;
  330. insert_ph = 0;
  331. inserted = 1; // don't insert the same phoneme repeatedly
  332. }
  333. else
  334. {
  335. // otherwise get the next phoneme from the list
  336. if(plist3->sourceix != 0)
  337. word_start = j;
  338. ph = phoneme_tab[plist3->phcode];
  339. plist3[0].ph = ph;
  340. if(plist3->phcode == phonSWITCH)
  341. {
  342. // change phoneme table
  343. SelectPhonemeTable(plist3->tone_ph);
  344. }
  345. next = phoneme_tab[plist3[1].phcode]; // the phoneme after this one
  346. plist3[1].ph = next;
  347. }
  348. if(ph == NULL) continue;
  349. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  350. if(((alternative = phdata.pd_param[pd_INSERTPHONEME]) > 0) && (inserted == 0))
  351. {
  352. // PROBLEM: if we insert a phoneme before a vowel then we loose the stress.
  353. PHONEME_TAB *ph2;
  354. ph2 = ph;
  355. insert_ph = plist3->phcode;
  356. ph = phoneme_tab[alternative];
  357. plist3->ph = ph;
  358. plist3->phcode = alternative;
  359. if(ph->type == phVOWEL)
  360. {
  361. plist3->synthflags |= SFLAG_SYLLABLE;
  362. if(ph2->type != phVOWEL)
  363. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  364. }
  365. else
  366. plist3->synthflags &= ~SFLAG_SYLLABLE;
  367. // re-interpret the changed phoneme
  368. // But it doesn't obey a second ChangePhoneme()
  369. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  370. }
  371. if((alternative = phdata.pd_param[pd_CHANGEPHONEME]) > 0)
  372. {
  373. PHONEME_TAB *ph2;
  374. ph2 = ph;
  375. ph = phoneme_tab[alternative];
  376. plist3->ph = ph;
  377. plist3->phcode = alternative;
  378. if(alternative == 1)
  379. {
  380. deleted = 1; // NULL phoneme, discard
  381. }
  382. else
  383. {
  384. if(ph->type == phVOWEL)
  385. {
  386. plist3->synthflags |= SFLAG_SYLLABLE;
  387. if(ph2->type != phVOWEL)
  388. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  389. }
  390. else
  391. plist3->synthflags &= ~SFLAG_SYLLABLE;
  392. // re-interpret the changed phoneme
  393. // But it doesn't obey a second ChangePhoneme()
  394. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  395. }
  396. }
  397. if((ph->type == phVOWEL) && (deleted == 0))
  398. {
  399. PHONEME_LIST *p;
  400. // Check for consecutive unstressed syllables, even across word boundaries.
  401. // Do this after changing phonemes according to stress level.
  402. if(plist3->stresslevel <= 1)
  403. {
  404. // an unstressed vowel
  405. unstress_count++;
  406. if(tr->langopts.stress_flags & 0x08)
  407. {
  408. // change sequences of consecutive unstressed vowels in unstressed words to diminished stress (TEST)
  409. for(p=plist3+1; p->type != phPAUSE; p++)
  410. {
  411. if(p->type == phVOWEL)
  412. {
  413. if(p->stresslevel <= 1)
  414. {
  415. if(plist3->wordstress < 4)
  416. plist3->stresslevel = 0;
  417. if(p->wordstress < 4)
  418. p->stresslevel = 0;
  419. }
  420. break;
  421. }
  422. }
  423. }
  424. else
  425. {
  426. if((unstress_count > 1) && ((unstress_count & 1)==0))
  427. {
  428. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  429. // stress. But not for the last phoneme of a stressed word
  430. if((tr->langopts.stress_flags & S_NO_DIM) || ((word_stress > 3) && ((plist3+1)->sourceix!=0)))
  431. {
  432. // An unstressed final vowel of a stressed word
  433. unstress_count=1; // try again for next syllable
  434. }
  435. else
  436. {
  437. plist3->stresslevel = 0; // change stress to 'diminished'
  438. }
  439. }
  440. }
  441. }
  442. else
  443. {
  444. unstress_count = 0;
  445. }
  446. }
  447. if((plist3+1)->synthflags & SFLAG_LENGTHEN)
  448. {
  449. static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
  450. if(strchr(types_double,next->type))
  451. {
  452. // lengthen this consonant by doubling it
  453. insert_ph = next->code;
  454. (plist3+1)->synthflags ^= SFLAG_LENGTHEN;
  455. }
  456. }
  457. if((plist3+1)->sourceix != 0)
  458. {
  459. int x;
  460. if(tr->langopts.vowel_pause && (ph->type != phPAUSE))
  461. {
  462. if((ph->type != phVOWEL) && (tr->langopts.vowel_pause & 0x200))
  463. {
  464. // add a pause after a word which ends in a consonant
  465. insert_ph = phonPAUSE_NOLINK;
  466. }
  467. if(next->type == phVOWEL)
  468. {
  469. if((x = tr->langopts.vowel_pause & 0x0c) != 0)
  470. {
  471. // break before a word which starts with a vowel
  472. if(x == 0xc)
  473. insert_ph = phonPAUSE_NOLINK;
  474. else
  475. insert_ph = phonPAUSE_VSHORT;
  476. }
  477. if((ph->type == phVOWEL) && ((x = tr->langopts.vowel_pause & 0x03) != 0))
  478. {
  479. // adjacent vowels over a word boundary
  480. if(x == 2)
  481. insert_ph = phonPAUSE_SHORT;
  482. else
  483. insert_ph = phonPAUSE_VSHORT;
  484. }
  485. if(((plist3+1)->stresslevel >= 4) && (tr->langopts.vowel_pause & 0x100))
  486. {
  487. // pause before a words which starts with a stressed vowel
  488. insert_ph = phonPAUSE_SHORT;
  489. }
  490. }
  491. }
  492. if((plist3 != plist3_inserted) && (ix > 0))
  493. {
  494. if((x = (tr->langopts.word_gap & 0x7)) != 0)
  495. {
  496. if((x > 1) || ((insert_ph != phonPAUSE_SHORT) && (insert_ph != phonPAUSE_NOLINK)))
  497. {
  498. // don't reduce the pause
  499. insert_ph = pause_phonemes[x];
  500. }
  501. }
  502. if(option_wordgap > 0)
  503. {
  504. insert_ph = phonPAUSE_LONG;
  505. }
  506. }
  507. }
  508. next2 = phoneme_tab[plist3[2].phcode];
  509. plist3[2].ph = next2;
  510. if((insert_ph == 0) && (phdata.pd_param[pd_APPENDPHONEME] != 0))
  511. {
  512. insert_ph = phdata.pd_param[pd_APPENDPHONEME];
  513. }
  514. if(ph->phflags & phVOICED)
  515. {
  516. // check that a voiced consonant is preceded or followed by a vowel or liquid
  517. // and if not, add a short schwa
  518. // not yet implemented
  519. }
  520. if(deleted == 0)
  521. {
  522. phlist[ix].ph = ph;
  523. phlist[ix].type = ph->type;
  524. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  525. phlist[ix].synthflags = plist3->synthflags;
  526. phlist[ix].stresslevel = plist3->stresslevel & 0xf;
  527. phlist[ix].wordstress = plist3->wordstress;
  528. phlist[ix].tone_ph = plist3->tone_ph;
  529. phlist[ix].sourceix = 0;
  530. phlist[ix].phcode = ph->code;
  531. if(plist3->sourceix != 0)
  532. {
  533. phlist[ix].sourceix = plist3->sourceix;
  534. phlist[ix].newword = 1; // this phoneme is the start of a word
  535. if(start_sentence)
  536. {
  537. phlist[ix].newword = 5; // start of sentence + start of word
  538. start_sentence = 0;
  539. }
  540. }
  541. else
  542. {
  543. phlist[ix].newword = 0;
  544. }
  545. // phlist[ix].length = ph->std_length;
  546. phlist[ix].length = phdata.pd_param[i_SET_LENGTH]*2;
  547. if((ph->code == phonPAUSE_LONG) && (option_wordgap > 0) && (plist3[1].sourceix != 0))
  548. {
  549. phlist[ix].ph = phoneme_tab[phonPAUSE_SHORT];
  550. phlist[ix].length = option_wordgap*14; // 10mS per unit at the default speed
  551. }
  552. if(ph->type==phVOWEL || ph->type==phLIQUID || ph->type==phNASAL || ph->type==phVSTOP || ph->type==phVFRICATIVE || (ph->phflags & phPREVOICE))
  553. {
  554. phlist[ix].length = 128; // length_mod
  555. phlist[ix].env = PITCHfall;
  556. }
  557. phlist[ix].prepause = 0;
  558. phlist[ix].amp = 20; // default, will be changed later
  559. phlist[ix].pitch1 = 255;
  560. phlist[ix].pitch2 = 255;
  561. ix++;
  562. }
  563. }
  564. phlist[ix].newword = 2; // end of clause
  565. phlist[ix].phcode = phonPAUSE;
  566. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  567. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  568. phlist[ix].sourceix = end_sourceix;
  569. phlist[ix].synthflags = 0;
  570. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  571. phlist[ix].phcode = phonPAUSE;
  572. phlist[ix].type = phPAUSE;
  573. phlist[ix].length = 0;
  574. phlist[ix].sourceix=0;
  575. phlist[ix].synthflags = 0;
  576. phlist[ix++].ph = phoneme_tab[phonPAUSE_SHORT];
  577. n_phoneme_list = ix;
  578. } // end of MakePhonemeList