eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phonemelist.cpp 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "translate.h"
  28. int Translator::ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch)
  29. {//======================================================================================================
  30. // Called for each phoneme in the phoneme list, to allow a language to make changes
  31. // ph The current phoneme
  32. return(0);
  33. }
  34. int Translator::SubstitutePhonemes(PHONEME_LIST2 *plist_out)
  35. {//=========================================================
  36. // Copy the phonemes list and perform any substitutions that are required for the
  37. // current voice
  38. int ix;
  39. int k;
  40. int replace_flags;
  41. int n_plist_out = 0;
  42. int word_end;
  43. int max_stress = -1;
  44. int switched_language = 0;
  45. int max_stress_posn=0;
  46. int n_syllables = 0;
  47. int syllable = 0;
  48. int syllable_stressed = 0;
  49. PHONEME_LIST2 *plist2;
  50. PHONEME_LIST2 *pl;
  51. PHONEME_TAB *next=NULL;
  52. for(ix=0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++)
  53. {
  54. plist2 = &ph_list2[ix];
  55. if(plist2->phcode == phonSWITCH)
  56. switched_language ^= 1;
  57. // don't do any substitution if the language has been temporarily changed
  58. if(switched_language == 0)
  59. {
  60. if(ix < (n_ph_list2 -1))
  61. next = phoneme_tab[ph_list2[ix+1].phcode];
  62. word_end = 0;
  63. if((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  64. word_end = 1; // this phoneme is the end of a word
  65. if(langopts.phoneme_change != 0)
  66. {
  67. // this language does changes to phonemes after translation
  68. if(plist2->sourceix)
  69. {
  70. // start of a word, find the stressed vowel
  71. syllable = 0;
  72. syllable_stressed = 0;
  73. n_syllables = 0;
  74. max_stress = -1;
  75. max_stress_posn = ix;
  76. for(k=ix; k < n_ph_list2; k++)
  77. {
  78. if(((pl = &ph_list2[k])->sourceix != 0) && (k > ix))
  79. break;
  80. pl->stress &= 0xf;
  81. if(phoneme_tab[pl->phcode]->type == phVOWEL)
  82. {
  83. n_syllables++;
  84. if(pl->stress > max_stress)
  85. {
  86. syllable_stressed = n_syllables;
  87. max_stress = pl->stress;
  88. max_stress_posn = k;
  89. }
  90. }
  91. }
  92. }
  93. if(phoneme_tab[plist2->phcode]->type == phVOWEL)
  94. {
  95. syllable++;
  96. }
  97. // make any language specific changes
  98. int flags;
  99. CHANGEPH ch;
  100. flags = 0;
  101. if(ix == max_stress_posn)
  102. flags |= 2;
  103. if(ix > max_stress_posn)
  104. flags |= 4;
  105. if(ph_list2[ix].synthflags & SFLAG_DICTIONARY)
  106. flags |= 8;
  107. ch.flags = flags | word_end;
  108. ch.stress = plist2->stress;
  109. ch.stress_highest = max_stress;
  110. ch.n_vowels = n_syllables;
  111. ch.vowel_this = syllable;
  112. ch.vowel_stressed = syllable_stressed;
  113. ChangePhonemes(ph_list2, n_ph_list2, ix, phoneme_tab[ph_list2[ix].phcode], &ch);
  114. }
  115. // check whether a Voice has specified that we should replace this phoneme
  116. for(k=0; k<n_replace_phonemes; k++)
  117. {
  118. if(plist2->phcode == replace_phonemes[k].old_ph)
  119. {
  120. replace_flags = replace_phonemes[k].type;
  121. if((replace_flags & 1) && (word_end == 0))
  122. continue; // this replacement only occurs at the end of a word
  123. if((replace_flags & 2) && ((plist2->stress & 0x7) > 3))
  124. continue; // this replacement doesn't occur in stressed syllables
  125. // substitute the replacement phoneme
  126. plist2->phcode = replace_phonemes[k].new_ph;
  127. break;
  128. }
  129. }
  130. if(plist2->phcode == 0)
  131. {
  132. continue; // phoneme has been replaced by NULL, so don't copy it
  133. }
  134. }
  135. // copy phoneme into the output list
  136. memcpy(&plist_out[n_plist_out++],plist2,sizeof(PHONEME_LIST2));
  137. }
  138. return(n_plist_out);
  139. } // end of SubstitutePhonemes
  140. void Translator::MakePhonemeList(int post_pause, int start_sentence)
  141. {//============================================================================================
  142. int ix=0;
  143. int j;
  144. int insert_ph = 0;
  145. int insert_synthflags = 0;
  146. PHONEME_LIST *phlist;
  147. PHONEME_TAB *ph;
  148. PHONEME_TAB *prev, *next, *next2;
  149. int unstress_count = 0;
  150. int word_stress = 0;
  151. int switched_language = 0;
  152. int max_stress;
  153. int voicing;
  154. int regression;
  155. int end_sourceix;
  156. int alternative;
  157. PHONEME_LIST2 ph_list3[N_PHONEME_LIST];
  158. static PHONEME_LIST2 ph_list2_null = {0,0,0,0,0};
  159. PHONEME_LIST2 *plist2 = &ph_list2_null;
  160. phlist = phoneme_list;
  161. end_sourceix = ph_list2[n_ph_list2-1].sourceix;
  162. // is the last word of the clause unstressed ?
  163. max_stress = 0;
  164. for(j=n_ph_list2-3; j>=0; j--)
  165. {
  166. // start with the last phoneme (before the terminating pauses) and move forwards
  167. if((ph_list2[j].stress & 0x7f) > max_stress)
  168. max_stress = ph_list2[j].stress & 0x7f;
  169. if(ph_list2[j].sourceix != 0)
  170. break;
  171. }
  172. if(max_stress < 4)
  173. {
  174. // the last word is unstressed, look for a previous word that can be stressed
  175. while(--j >= 0)
  176. {
  177. if(ph_list2[j].stress & 0x80) // dictionary flags indicated that this stress can be promoted
  178. {
  179. ph_list2[j].stress = 4; // promote to stressed
  180. break;
  181. }
  182. if((ph_list2[j].stress & 0x7f) >= 4)
  183. {
  184. // found a stressed syllable, so stop looking
  185. break;
  186. }
  187. }
  188. }
  189. if((regression = langopts.param[LOPT_REGRESSIVE_VOICING]) != 0)
  190. {
  191. // set consonant clusters to all voiced or all unvoiced
  192. // Regressive
  193. int type;
  194. voicing = 0;
  195. for(j=n_ph_list2-1; j>=0; j--)
  196. {
  197. ph = phoneme_tab[ph_list2[j].phcode];
  198. if(ph == NULL)
  199. continue;
  200. if(ph->code == phonSWITCH)
  201. switched_language ^= 1;
  202. if(switched_language)
  203. continue;
  204. type = ph->type;
  205. if(regression & 0x2)
  206. {
  207. // LANG=Russian, [v] amd [v;] don't cause regression, or [R^]
  208. if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R'))
  209. type = phLIQUID;
  210. }
  211. if((type==phSTOP) || type==(phFRICATIVE))
  212. {
  213. if(voicing==0)
  214. {
  215. voicing = 1;
  216. }
  217. else
  218. if((voicing==2) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  219. {
  220. ph_list2[j].phcode = ph->alternative_ph; // change to voiced equivalent
  221. }
  222. }
  223. else
  224. if((type==phVSTOP) || type==(phVFRICATIVE))
  225. {
  226. if(voicing==0)
  227. {
  228. voicing = 2;
  229. }
  230. else
  231. if((voicing==1) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  232. {
  233. ph_list2[j].phcode = ph->alternative_ph; // change to unvoiced equivalent
  234. }
  235. }
  236. else
  237. {
  238. if(regression & 0x8)
  239. {
  240. // LANG=Polish, propagate through liquids and nasals
  241. if((type == phPAUSE) || (type == phVOWEL))
  242. voicing = 0;
  243. }
  244. else
  245. {
  246. voicing = 0;
  247. }
  248. }
  249. if((regression & 0x4) && (ph_list2[j].sourceix))
  250. {
  251. // stop propagation at a word boundary
  252. voicing = 0;
  253. }
  254. }
  255. }
  256. n_ph_list2 = SubstitutePhonemes(ph_list3) - 2;
  257. // transfer all the phonemes of the clause into phoneme_list
  258. ph = phoneme_tab[phonPAUSE];
  259. switched_language = 0;
  260. for(j=0; insert_ph || ((j<n_ph_list2) && (ix < N_PHONEME_LIST-3)); j++)
  261. {
  262. prev = ph;
  263. plist2 = &ph_list3[j];
  264. if(insert_ph != 0)
  265. {
  266. // we have a (linking) phoneme which we need to insert here
  267. j--;
  268. next = phoneme_tab[plist2->phcode]; // this phoneme, i.e. after the insert
  269. plist2 = &ph_list2_null;
  270. ph = phoneme_tab[insert_ph];
  271. plist2->synthflags = insert_synthflags;
  272. insert_ph = 0;
  273. insert_synthflags = 0;
  274. }
  275. else
  276. {
  277. // otherwise get the next phoneme from the list
  278. ph = phoneme_tab[plist2->phcode];
  279. if(plist2->phcode == phonSWITCH)
  280. {
  281. // change phoneme table
  282. SelectPhonemeTable(plist2->tone_number);
  283. switched_language ^= SFLAG_SWITCHED_LANG;
  284. }
  285. next = phoneme_tab[(plist2+1)->phcode]; // the phoneme after this one
  286. }
  287. if(plist2->sourceix)
  288. {
  289. // start of a word
  290. int k;
  291. word_stress = 0;
  292. // find the highest stress level in this word
  293. for(k=j+1; k < n_ph_list2; k++)
  294. {
  295. if(ph_list3[k].sourceix)
  296. break; // start of the next word
  297. if(ph_list3[k].stress > word_stress)
  298. word_stress = ph_list3[k].stress;
  299. }
  300. }
  301. if(ph == NULL) continue;
  302. if(ph->type == phVOWEL)
  303. {
  304. // check for consecutive unstressed syllables
  305. if(plist2->stress == 0)
  306. {
  307. // an unstressed vowel
  308. unstress_count++;
  309. if((unstress_count > 1) && ((unstress_count & 1)==0))
  310. {
  311. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  312. // stress. But not for the last phoneme of a stressed word
  313. if((langopts.stress_flags & 0x2) || ((word_stress > 3) && ((plist2+1)->sourceix!=0)))
  314. {
  315. // An unstressed final vowel of a stressed word
  316. unstress_count=1; // try again for next syllable
  317. }
  318. else
  319. {
  320. plist2->stress = 1; // change stress to 'diminished'
  321. }
  322. }
  323. }
  324. else
  325. {
  326. unstress_count = 0;
  327. }
  328. }
  329. if(ph->alternative_ph > 0)
  330. {
  331. alternative = 0;
  332. switch(ph->phflags & phALTERNATIVE)
  333. {
  334. // This phoneme changes if vowel follows, or doesn't follow, depending on its phNOTFOLLOWS flag
  335. case phBEFORENOTVOWEL:
  336. if(next->type != phVOWEL)
  337. alternative = ph->alternative_ph;
  338. break;
  339. case phBEFOREVOWELPAUSE:
  340. if((next->type == phVOWEL) || (next->type == phPAUSE))
  341. alternative = ph->alternative_ph;
  342. break;
  343. case phBEFOREVOWEL:
  344. if(next->type == phVOWEL)
  345. alternative = ph->alternative_ph;
  346. break;
  347. }
  348. if(alternative == 1)
  349. continue; // NULL phoneme, discard
  350. if(alternative > 1)
  351. {
  352. PHONEME_TAB *ph2;
  353. ph2 = ph;
  354. ph = phoneme_tab[alternative];
  355. if(ph->type == phVOWEL)
  356. {
  357. plist2->synthflags |= SFLAG_SYLLABLE;
  358. if(ph2->type != phVOWEL)
  359. plist2->stress = 0; // change from non-vowel to vowel, make sure it's unstressed
  360. }
  361. else
  362. plist2->synthflags &= ~SFLAG_SYLLABLE;
  363. }
  364. }
  365. if(langopts.param[LOPT_REDUCE_T])
  366. {
  367. if((ph->mnemonic == 't') && (plist2->sourceix == 0) && ((prev->type == phVOWEL) || (prev->mnemonic == 'n')))
  368. {
  369. if(((plist2+1)->sourceix == 0) && ((plist2+1)->stress < 3) && (next->type == phVOWEL))
  370. {
  371. ph = phoneme_tab[phonT_REDUCED];
  372. }
  373. }
  374. }
  375. while((ph->reduce_to != 0) && (!(plist2->synthflags & SFLAG_DICTIONARY) || (langopts.param[LOPT_REDUCE] & 1)))
  376. {
  377. int reduce_level;
  378. int stress_level;
  379. int reduce = 0;
  380. reduce_level = (ph->phflags >> 28) & 7;
  381. if(ph->type == phVOWEL)
  382. {
  383. stress_level = plist2->stress;
  384. }
  385. else
  386. {
  387. // consonant, get stress from the following vowel
  388. if(next->type == phVOWEL)
  389. stress_level = (plist2+1)->stress;
  390. else
  391. break;
  392. }
  393. if(stress_level == 1)
  394. reduce = 1; // stress = 'reduced'
  395. if(stress_level < reduce_level)
  396. reduce =1;
  397. if((word_stress < 4) && (langopts.param[LOPT_REDUCE] & 0x2) && (stress_level >= word_stress))
  398. {
  399. // don't reduce the most stressed syllable in an unstressed word
  400. reduce = 0;
  401. }
  402. if(reduce)
  403. ph = phoneme_tab[ph->reduce_to];
  404. else
  405. break;
  406. }
  407. if((plist2+1)->synthflags & SFLAG_LENGTHEN)
  408. {
  409. static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
  410. if(strchr(types_double,next->type))
  411. {
  412. // lengthen this consonant by doubling it
  413. insert_ph = next->code;
  414. (plist2+1)->synthflags ^= SFLAG_LENGTHEN;
  415. }
  416. }
  417. if((plist2+1)->sourceix != 0)
  418. {
  419. int x;
  420. if(langopts.word_gap & 1)
  421. {
  422. insert_ph = phonPAUSE_VSHORT;
  423. }
  424. if(langopts.vowel_pause && (ph->type != phPAUSE) && (next->type == phVOWEL))
  425. {
  426. if(langopts.vowel_pause & 0x04)
  427. {
  428. // break before a word which starts with a vowel
  429. insert_ph = phonPAUSE_VSHORT;
  430. }
  431. if((ph->type == phVOWEL) && ((x = langopts.vowel_pause & 0x03) != 0))
  432. {
  433. // adjacent vowels over a word boundary
  434. if(x == 2)
  435. insert_ph = phonPAUSE_SHORT;
  436. else
  437. insert_ph = phonPAUSE_VSHORT;
  438. }
  439. if(((plist2+1)->stress >= 4) && (langopts.vowel_pause & 0x08))
  440. {
  441. // pause before a words which starts with a stressed vowel
  442. insert_ph = phonPAUSE_SHORT;
  443. }
  444. }
  445. }
  446. next2 = phoneme_tab[(plist2+2)->phcode];
  447. #ifdef deleted
  448. if((ph->type != phVOWEL) && (ph->type != phPAUSE) //&& ((ph->phflags & phVOICED)==0)
  449. && (next->type == phLIQUID) && (next->mnemonic != ';') && (next2->type != phVOWEL))
  450. {
  451. // semi-vowel surrounded by consonants. precede by a short schwa
  452. insert_ph = phonSCHWA_SHORT;
  453. insert_synthflags = SFLAG_SYLLABLE;
  454. }
  455. if((ph->type == phLIQUID) && (prev->type != phVOWEL) && (next->type != phVOWEL))
  456. {
  457. // semi-vowel surrounded by consonants. precede by a short schwa
  458. insert_ph = phonSCHWA_SHORT;
  459. insert_synthflags = SFLAG_SYLLABLE;
  460. }
  461. #endif
  462. if((insert_ph == 0) && (ph->link_out != 0) && (((plist2+1)->synthflags & SFLAG_EMBEDDED)==0))
  463. {
  464. if(ph->phflags & phAPPENDPH)
  465. {
  466. // always append the specified phoneme, unless it already is the next phoneme
  467. if((ph->link_out != (plist2+1)->phcode) && (next->type == phVOWEL))
  468. // if(ph->link_out != (plist2+1)->phcode)
  469. {
  470. insert_ph = ph->link_out;
  471. }
  472. }
  473. else
  474. if(((langopts.word_gap & 2)==0) || ((plist2+1)->sourceix == 0))
  475. {
  476. // This phoneme can be linked to a following vowel by inserting a linking phoneme
  477. if(next->type == phVOWEL)
  478. insert_ph = ph->link_out;
  479. else
  480. if(next->code == phonPAUSE_SHORT)
  481. {
  482. // Pause followed by Vowel, replace the Short Pause with the linking phoneme,
  483. if(next2->type == phVOWEL)
  484. (plist2+1)->phcode = ph->link_out; // replace pause by linking phoneme
  485. }
  486. }
  487. }
  488. if(ph->phflags & phVOICED)
  489. {
  490. // check that a voiced consonant is preceded or followed by a vowel or liquid
  491. // and if not, add a short schwa
  492. // not yet implemented
  493. }
  494. phlist[ix].ph = ph;
  495. phlist[ix].type = ph->type;
  496. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  497. phlist[ix].synthflags = plist2->synthflags | switched_language;
  498. phlist[ix].tone = plist2->stress & 0xf;
  499. phlist[ix].tone_ph = plist2->tone_number;
  500. phlist[ix].sourceix = 0;
  501. if(plist2->sourceix != 0)
  502. {
  503. phlist[ix].sourceix = plist2->sourceix;
  504. phlist[ix].newword = 1; // this phoneme is the start of a word
  505. if(start_sentence)
  506. {
  507. phlist[ix].newword = 5; // start of sentence + start of word
  508. start_sentence = 0;
  509. }
  510. }
  511. else
  512. {
  513. phlist[ix].newword = 0;
  514. }
  515. phlist[ix].length = ph->std_length;
  516. if(ph->type==phVOWEL || ph->type==phLIQUID || ph->type==phNASAL || ph->type==phVSTOP || ph->type==phVFRICATIVE)
  517. {
  518. phlist[ix].length = 128; // length_mod
  519. phlist[ix].env = PITCHfall;
  520. }
  521. phlist[ix].prepause = 0;
  522. phlist[ix].amp = 20; // default, will be changed later
  523. phlist[ix].pitch1 = 0x400;
  524. phlist[ix].pitch2 = 0x400;
  525. ix++;
  526. }
  527. phlist[ix].newword = 2; // end of clause
  528. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  529. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  530. phlist[ix].sourceix = end_sourceix;
  531. phlist[ix].synthflags = 0;
  532. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  533. phlist[ix].type = phPAUSE;
  534. phlist[ix].length = 0;
  535. phlist[ix].sourceix=0;
  536. phlist[ix].synthflags = 0;
  537. phlist[ix++].ph = phoneme_tab[phonPAUSE_SHORT];
  538. n_phoneme_list = ix;
  539. } // end of MakePhonemeList