eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phonemelist.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "translate.h"
  28. const unsigned char pause_phonemes[8] = {0, phonPAUSE_VSHORT, phonPAUSE_SHORT, phonPAUSE, phonPAUSE_LONG, phonGLOTTALSTOP, phonPAUSE_LONG, phonPAUSE_LONG};
  29. extern int n_ph_list2;
  30. extern PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes
  31. static int SubstitutePhonemes(Translator *tr, PHONEME_LIST *plist_out)
  32. {//===================================================================
  33. // Copy the phonemes list and perform any substitutions that are required for the
  34. // current voice
  35. int ix;
  36. int k;
  37. int replace_flags;
  38. int n_plist_out = 0;
  39. int word_end;
  40. int switched_language = 0;
  41. PHONEME_LIST2 *plist2;
  42. PHONEME_TAB *next=NULL;
  43. for(ix=0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++)
  44. {
  45. plist2 = &ph_list2[ix];
  46. if(plist2->phcode == phonSWITCH)
  47. switched_language ^= 1;
  48. // don't do any substitution if the language has been temporarily changed
  49. if(switched_language == 0)
  50. {
  51. if(ix < (n_ph_list2 -1))
  52. next = phoneme_tab[ph_list2[ix+1].phcode];
  53. word_end = 0;
  54. if((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  55. word_end = 1; // this phoneme is the end of a word
  56. // check whether a Voice has specified that we should replace this phoneme
  57. for(k=0; k<n_replace_phonemes; k++)
  58. {
  59. if(plist2->phcode == replace_phonemes[k].old_ph)
  60. {
  61. replace_flags = replace_phonemes[k].type;
  62. if((replace_flags & 1) && (word_end == 0))
  63. continue; // this replacement only occurs at the end of a word
  64. if((replace_flags & 2) && ((plist2->stresslevel & 0x7) > 3))
  65. continue; // this replacement doesn't occur in stressed syllables
  66. // substitute the replacement phoneme
  67. plist2->phcode = replace_phonemes[k].new_ph;
  68. if((plist2->stresslevel > 1) && (phoneme_tab[plist2->phcode]->phflags & phUNSTRESSED))
  69. plist2->stresslevel = 0; // the replacement must be unstressed
  70. break;
  71. }
  72. }
  73. if(plist2->phcode == 0)
  74. {
  75. continue; // phoneme has been replaced by NULL, so don't copy it
  76. }
  77. }
  78. // copy phoneme into the output list
  79. memcpy(&plist_out[n_plist_out],plist2,sizeof(PHONEME_LIST2));
  80. plist_out[n_plist_out].ph = phoneme_tab[plist2->phcode];
  81. plist_out[n_plist_out].type = plist_out[n_plist_out].ph->type;
  82. n_plist_out++;
  83. }
  84. return(n_plist_out);
  85. } // end of SubstitutePhonemes
  86. void MakePhonemeList(Translator *tr, int post_pause, int start_sentence)
  87. {//=====================================================================
  88. int ix=0;
  89. int j;
  90. int insert_ph = 0;
  91. PHONEME_LIST *phlist;
  92. PHONEME_TAB *ph;
  93. PHONEME_TAB *prev, *next, *next2;
  94. int unstress_count = 0;
  95. int word_stress = 0;
  96. int switched_language = 0;
  97. int max_stress;
  98. int voicing;
  99. int regression;
  100. int end_sourceix;
  101. int alternative;
  102. int first_vowel=0; // first vowel in a word
  103. PHONEME_DATA phdata;
  104. int n_ph_list3;
  105. PHONEME_LIST *plist3;
  106. PHONEME_LIST *plist3_inserted = NULL;
  107. PHONEME_LIST ph_list3[N_PHONEME_LIST];
  108. static PHONEME_LIST2 ph_list2_null = {0,0,0,0,0,0};
  109. PHONEME_LIST2 *plist2 = &ph_list2_null;
  110. plist2 = ph_list2;
  111. phlist = phoneme_list;
  112. end_sourceix = plist2[n_ph_list2-1].sourceix;
  113. // is the last word of the clause unstressed ?
  114. max_stress = 0;
  115. for(j = n_ph_list2-3; j>=0; j--)
  116. {
  117. // start with the last phoneme (before the terminating pauses) and move forwards
  118. if((plist2[j].stresslevel & 0x7f) > max_stress)
  119. max_stress = plist2[j].stresslevel & 0x7f;
  120. if(plist2[j].sourceix != 0)
  121. break;
  122. }
  123. if(max_stress < 4)
  124. {
  125. // the last word is unstressed, look for a previous word that can be stressed
  126. while(--j >= 0)
  127. {
  128. if(plist2[j].synthflags & SFLAG_PROMOTE_STRESS) // dictionary flags indicated that this stress can be promoted
  129. {
  130. plist2[j].stresslevel = 4; // promote to stressed
  131. break;
  132. }
  133. if(plist2[j].stresslevel >= 4)
  134. {
  135. // found a stressed syllable, so stop looking
  136. break;
  137. }
  138. }
  139. }
  140. if((regression = tr->langopts.param[LOPT_REGRESSIVE_VOICING]) != 0)
  141. {
  142. // set consonant clusters to all voiced or all unvoiced
  143. // Regressive
  144. int type;
  145. int word_end_devoice = 0;
  146. voicing = 0;
  147. for(j=n_ph_list2-1; j>=0; j--)
  148. {
  149. ph = phoneme_tab[plist2[j].phcode];
  150. if(ph == NULL)
  151. continue;
  152. if(ph->code == phonSWITCH)
  153. switched_language ^= 1;
  154. if(switched_language)
  155. continue;
  156. type = ph->type;
  157. if(regression & 0x2)
  158. {
  159. // LANG=Russian, [v] amd [v;] don't cause regression, or [R^]
  160. if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R'))
  161. {
  162. if(word_end_devoice == 1)
  163. voicing = 0;
  164. else
  165. type = phLIQUID;
  166. }
  167. }
  168. if((type==phSTOP) || type==(phFRICATIVE))
  169. {
  170. if((voicing==0) && (regression & 0xf))
  171. {
  172. voicing = 1;
  173. }
  174. else
  175. if((voicing==2) && (ph->end_type != 0)) // use end_type field for voicing_switch for consonants
  176. {
  177. plist2[j].phcode = ph->end_type; // change to voiced equivalent
  178. }
  179. }
  180. else
  181. if((type==phVSTOP) || type==(phVFRICATIVE))
  182. {
  183. if((voicing==0) && (regression & 0xf))
  184. {
  185. voicing = 2;
  186. }
  187. else
  188. if((voicing==1) && (ph->end_type != 0))
  189. {
  190. plist2[j].phcode = ph->end_type; // change to unvoiced equivalent
  191. }
  192. }
  193. else
  194. {
  195. if(regression & 0x8)
  196. {
  197. // LANG=Polish, propagate through liquids and nasals
  198. if((type == phPAUSE) || (type == phVOWEL))
  199. voicing = 0;
  200. }
  201. else
  202. {
  203. voicing = 0;
  204. }
  205. }
  206. word_end_devoice = 0;
  207. if(plist2[j].sourceix)
  208. {
  209. if(regression & 0x04)
  210. {
  211. // stop propagation at a word boundary
  212. voicing = 0;
  213. }
  214. if(regression & 0x10)
  215. {
  216. // devoice word-final consonants, unless propagating voiced
  217. if(voicing == 0)
  218. {
  219. voicing = 1;
  220. word_end_devoice = 1;
  221. }
  222. }
  223. }
  224. }
  225. }
  226. n_ph_list3 = SubstitutePhonemes(tr,ph_list3) - 2;
  227. for(j=0; (j < n_ph_list3) && (ix < N_PHONEME_LIST-3);)
  228. {
  229. if(ph_list3[j].sourceix)
  230. {
  231. // start of a word
  232. int k;
  233. int nextw;
  234. word_stress = 0;
  235. // find the highest stress level in this word
  236. for(nextw=j; nextw < n_ph_list3;)
  237. {
  238. if(ph_list3[nextw].stresslevel > word_stress)
  239. word_stress = ph_list3[nextw].stresslevel;
  240. nextw++;
  241. if(ph_list3[nextw].sourceix)
  242. break; // start of the next word
  243. }
  244. for(k=j; k<nextw; k++)
  245. {
  246. ph_list3[k].wordstress = word_stress;
  247. }
  248. j = nextw;
  249. }
  250. else
  251. {
  252. j++;
  253. }
  254. }
  255. // transfer all the phonemes of the clause into phoneme_list
  256. ph = phoneme_tab[phonPAUSE];
  257. ph_list3[0].ph = ph;
  258. switched_language = 0;
  259. for(j=0; insert_ph || ((j < n_ph_list3) && (ix < N_PHONEME_LIST-3)); j++)
  260. {
  261. prev = ph;
  262. plist3 = &ph_list3[j];
  263. if(insert_ph != 0)
  264. {
  265. // we have a (linking) phoneme which we need to insert here
  266. next = phoneme_tab[plist3->phcode]; // this phoneme, i.e. after the insert
  267. // re-use the previous entry for the inserted phoneme.
  268. // That's OK because we don't look backwards from plist3
  269. j--;
  270. plist3 = plist3_inserted = &ph_list3[j];
  271. if(j > 0)
  272. {
  273. memcpy(&plist3[-1], &plist3[0], sizeof(*plist3));
  274. }
  275. memset(&plist3[0], 0, sizeof(*plist3));
  276. plist3->phcode = insert_ph;
  277. ph = phoneme_tab[insert_ph];
  278. plist3->ph = ph;
  279. insert_ph = 0;
  280. }
  281. else
  282. {
  283. // otherwise get the next phoneme from the list
  284. ph = phoneme_tab[plist3->phcode];
  285. plist3[0].ph = ph;
  286. if(plist3->phcode == phonSWITCH)
  287. {
  288. // change phoneme table
  289. SelectPhonemeTable(plist3->tone_ph);
  290. switched_language ^= SFLAG_SWITCHED_LANG;
  291. }
  292. next = phoneme_tab[plist3[1].phcode]; // the phoneme after this one
  293. plist3[1].ph = next;
  294. }
  295. if(ph == NULL) continue;
  296. InterpretPhoneme(tr, 0x100, plist3, &phdata);
  297. if((alternative = phdata.pd_param[pd_INSERTPHONEME]) > 0)
  298. {
  299. // PROBLEM: if we insert a phoneme before a vowel then we loose the stress.
  300. PHONEME_TAB *ph2;
  301. ph2 = ph;
  302. insert_ph = plist3->phcode;
  303. ph = phoneme_tab[alternative];
  304. plist3->ph = ph;
  305. plist3->phcode = alternative;
  306. if(ph->type == phVOWEL)
  307. {
  308. plist3->synthflags |= SFLAG_SYLLABLE;
  309. if(ph2->type != phVOWEL)
  310. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  311. }
  312. else
  313. plist3->synthflags &= ~SFLAG_SYLLABLE;
  314. // re-interpret the changed phoneme
  315. // But it doesn't obey a second ChangePhoneme()
  316. InterpretPhoneme(tr, 0x100, plist3, &phdata);
  317. }
  318. if((alternative = phdata.pd_param[pd_CHANGEPHONEME]) > 0)
  319. {
  320. PHONEME_TAB *ph2;
  321. ph2 = ph;
  322. ph = phoneme_tab[alternative];
  323. plist3->ph = ph;
  324. plist3->phcode = alternative;
  325. if(alternative == 1)
  326. continue; // NULL phoneme, discard
  327. if(ph->type == phVOWEL)
  328. {
  329. plist3->synthflags |= SFLAG_SYLLABLE;
  330. if(ph2->type != phVOWEL)
  331. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  332. }
  333. else
  334. plist3->synthflags &= ~SFLAG_SYLLABLE;
  335. // re-interpret the changed phoneme
  336. // But it doesn't obey a second ChangePhoneme()
  337. InterpretPhoneme(tr, 0x100, plist3, &phdata);
  338. }
  339. if(ph->type == phVOWEL)
  340. {
  341. PHONEME_LIST *p;
  342. // Check for consecutive unstressed syllables, even across word boundaries.
  343. // Do this after changing phonemes according to stress level.
  344. if(plist3->stresslevel <= 1)
  345. {
  346. // an unstressed vowel
  347. unstress_count++;
  348. if(tr->langopts.stress_flags & 0x08)
  349. {
  350. // change sequences of consecutive unstressed vowels in unstressed words to diminished stress (TEST)
  351. for(p=plist3+1; p->type != phPAUSE; p++)
  352. {
  353. if(p->type == phVOWEL)
  354. {
  355. if(p->stresslevel <= 1)
  356. {
  357. if(plist3->wordstress < 4)
  358. plist3->stresslevel = 0;
  359. if(p->wordstress < 4)
  360. p->stresslevel = 0;
  361. }
  362. break;
  363. }
  364. }
  365. }
  366. else
  367. {
  368. if((unstress_count > 1) && ((unstress_count & 1)==0))
  369. {
  370. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  371. // stress. But not for the last phoneme of a stressed word
  372. if((tr->langopts.stress_flags & 0x2) || ((word_stress > 3) && ((plist3+1)->sourceix!=0)))
  373. {
  374. // An unstressed final vowel of a stressed word
  375. unstress_count=1; // try again for next syllable
  376. }
  377. else
  378. {
  379. plist3->stresslevel = 0; // change stress to 'diminished'
  380. }
  381. }
  382. }
  383. }
  384. else
  385. {
  386. unstress_count = 0;
  387. }
  388. }
  389. #ifdef deleted
  390. while((ph->reduce_to != 0) && (!(plist3->synthflags & SFLAG_DICTIONARY) || (tr->langopts.param[LOPT_REDUCE] & 1)))
  391. {
  392. int reduce_level;
  393. int stress_level;
  394. int reduce = 0;
  395. reduce_level = (ph->phflags >> 28) & 7;
  396. if(ph->type == phVOWEL)
  397. {
  398. stress_level = plist3->stress;
  399. }
  400. else
  401. {
  402. // consonant, get stress from the following vowel
  403. if(next->type == phVOWEL)
  404. stress_level = (plist3+1)->stress;
  405. else
  406. break;
  407. }
  408. if((stress_level == 1) && (first_vowel))
  409. stress_level = 0; // ignore 'dimished' stress on first syllable
  410. if(stress_level == 1)
  411. reduce = 1; // stress = 'reduced'
  412. if(stress_level < reduce_level)
  413. reduce =1;
  414. if((word_stress < 4) && (tr->langopts.param[LOPT_REDUCE] & 0x2) && (stress_level >= word_stress))
  415. {
  416. // don't reduce the most stressed syllable in an unstressed word
  417. reduce = 0;
  418. }
  419. if(reduce)
  420. ph = phoneme_tab[ph->reduce_to];
  421. else
  422. break;
  423. }
  424. #endif
  425. if(ph->type == phVOWEL)
  426. first_vowel = 0;
  427. if((plist3+1)->synthflags & SFLAG_LENGTHEN)
  428. {
  429. static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
  430. if(strchr(types_double,next->type))
  431. {
  432. // lengthen this consonant by doubling it
  433. insert_ph = next->code;
  434. (plist3+1)->synthflags ^= SFLAG_LENGTHEN;
  435. }
  436. }
  437. if((plist3+1)->sourceix != 0)
  438. {
  439. int x;
  440. if(tr->langopts.vowel_pause && (ph->type != phPAUSE))
  441. {
  442. if((ph->type != phVOWEL) && (tr->langopts.vowel_pause & 0x200))
  443. {
  444. // add a pause after a word which ends in a consonant
  445. insert_ph = phonPAUSE_NOLINK;
  446. }
  447. if(next->type == phVOWEL)
  448. {
  449. if((x = tr->langopts.vowel_pause & 0x0c) != 0)
  450. {
  451. // break before a word which starts with a vowel
  452. if(x == 0xc)
  453. insert_ph = phonPAUSE_NOLINK;
  454. else
  455. insert_ph = phonPAUSE_VSHORT;
  456. }
  457. if((ph->type == phVOWEL) && ((x = tr->langopts.vowel_pause & 0x03) != 0))
  458. {
  459. // adjacent vowels over a word boundary
  460. if(x == 2)
  461. insert_ph = phonPAUSE_SHORT;
  462. else
  463. insert_ph = phonPAUSE_VSHORT;
  464. }
  465. if(((plist3+1)->stresslevel >= 4) && (tr->langopts.vowel_pause & 0x100))
  466. {
  467. // pause before a words which starts with a stressed vowel
  468. insert_ph = phonPAUSE_SHORT;
  469. }
  470. }
  471. }
  472. if(plist3 != plist3_inserted)
  473. {
  474. if((x = (tr->langopts.word_gap & 0x7)) != 0)
  475. {
  476. if((x > 1) || ((insert_ph != phonPAUSE_SHORT) && (insert_ph != phonPAUSE_NOLINK)))
  477. {
  478. // don't reduce the pause
  479. insert_ph = pause_phonemes[x];
  480. }
  481. }
  482. if(option_wordgap > 0)
  483. {
  484. insert_ph = phonPAUSE_LONG;
  485. }
  486. }
  487. }
  488. next2 = phoneme_tab[plist3[2].phcode];
  489. plist3[2].ph = next2;
  490. if((insert_ph == 0) && (phdata.pd_param[pd_APPENDPHONEME] != 0))
  491. {
  492. insert_ph = phdata.pd_param[pd_APPENDPHONEME];
  493. }
  494. if(ph->phflags & phVOICED)
  495. {
  496. // check that a voiced consonant is preceded or followed by a vowel or liquid
  497. // and if not, add a short schwa
  498. // not yet implemented
  499. }
  500. phlist[ix].ph = ph;
  501. phlist[ix].type = ph->type;
  502. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  503. phlist[ix].synthflags = plist3->synthflags | switched_language;
  504. phlist[ix].stresslevel = plist3->stresslevel & 0xf;
  505. phlist[ix].wordstress = plist3->wordstress;
  506. phlist[ix].tone_ph = plist3->tone_ph;
  507. phlist[ix].sourceix = 0;
  508. phlist[ix].phcode = ph->code;
  509. if(plist3->sourceix != 0)
  510. {
  511. phlist[ix].sourceix = plist3->sourceix;
  512. phlist[ix].newword = 1; // this phoneme is the start of a word
  513. if(start_sentence)
  514. {
  515. phlist[ix].newword = 5; // start of sentence + start of word
  516. start_sentence = 0;
  517. }
  518. }
  519. else
  520. {
  521. phlist[ix].newword = 0;
  522. }
  523. // phlist[ix].length = ph->std_length;
  524. phlist[ix].length = phdata.pd_param[i_SET_LENGTH]*2;
  525. if((ph->code == phonPAUSE_LONG) && (option_wordgap > 0))
  526. {
  527. phlist[ix].ph = phoneme_tab[phonPAUSE_SHORT];
  528. phlist[ix].length = option_wordgap*14; // 10mS per unit at the default speed
  529. }
  530. if(ph->type==phVOWEL || ph->type==phLIQUID || ph->type==phNASAL || ph->type==phVSTOP || ph->type==phVFRICATIVE)
  531. {
  532. phlist[ix].length = 128; // length_mod
  533. phlist[ix].env = PITCHfall;
  534. }
  535. phlist[ix].prepause = 0;
  536. phlist[ix].amp = 20; // default, will be changed later
  537. phlist[ix].pitch1 = 255;
  538. phlist[ix].pitch2 = 255;
  539. ix++;
  540. }
  541. phlist[ix].newword = 2; // end of clause
  542. phlist[ix].phcode = phonPAUSE;
  543. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  544. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  545. phlist[ix].sourceix = end_sourceix;
  546. phlist[ix].synthflags = 0;
  547. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  548. phlist[ix].phcode = phonPAUSE;
  549. phlist[ix].type = phPAUSE;
  550. phlist[ix].length = 0;
  551. phlist[ix].sourceix=0;
  552. phlist[ix].synthflags = 0;
  553. phlist[ix++].ph = phoneme_tab[phonPAUSE_SHORT];
  554. n_phoneme_list = ix;
  555. } // end of MakePhonemeList