eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phonemelist.cpp 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. /***************************************************************************
  2. * Copyright (C) 2007 by Jonathan Duddington *
  3. * [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 2 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write to the *
  17. * Free Software Foundation, Inc., *
  18. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
  19. ***************************************************************************/
  20. #include "StdAfx.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include "speak_lib.h"
  25. #include "speech.h"
  26. #include "phoneme.h"
  27. #include "synthesize.h"
  28. #include "translate.h"
  29. int Translator::ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch)
  30. {//======================================================================================================
  31. // Called for each phoneme in the phoneme list, to allow a language to make changes
  32. // flags: bit 0=1 last phoneme in a word
  33. // bit 1=1 this is the highest stressed vowel in the current word
  34. // bit 2=1 after the highest stressed vowel in the current word
  35. // bit 3=1 the phonemes were specified explicitly, or found from an entry in the xx_list dictionary
  36. // ph The current phoneme
  37. return(0);
  38. }
  39. int Translator::SubstitutePhonemes(PHONEME_LIST2 *plist_out)
  40. {//=========================================================
  41. // Copy the phonemes list and perform any substitutions that are required for the
  42. // current voice
  43. int ix;
  44. int k;
  45. int replace_flags;
  46. int n_plist_out = 0;
  47. int word_end;
  48. int max_stress = -1;
  49. int switched_language = 0;
  50. int max_stress_posn=0;
  51. int n_syllables = 0;
  52. int syllable = 0;
  53. int syllable_stressed = 0;
  54. PHONEME_LIST2 *plist2;
  55. PHONEME_LIST2 *pl;
  56. PHONEME_TAB *next=NULL;
  57. for(ix=0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++)
  58. {
  59. plist2 = &ph_list2[ix];
  60. if(plist2->phcode == phonSWITCH)
  61. switched_language ^= 1;
  62. // don't do any substitution if the language has been temporarily changed
  63. if(switched_language == 0)
  64. {
  65. if(ix < (n_ph_list2 -1))
  66. next = phoneme_tab[ph_list2[ix+1].phcode];
  67. word_end = 0;
  68. if((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  69. word_end = 1; // this phoneme is the end of a word
  70. if(langopts.phoneme_change != 0)
  71. {
  72. // this language does changes to phonemes after translation
  73. if(plist2->sourceix)
  74. {
  75. // start of a word, find the stressed vowel
  76. syllable = 0;
  77. syllable_stressed = 0;
  78. n_syllables = 0;
  79. max_stress = -1;
  80. max_stress_posn = ix;
  81. for(k=ix; k < n_ph_list2; k++)
  82. {
  83. if(((pl = &ph_list2[k])->sourceix != 0) && (k > ix))
  84. break;
  85. pl->stress &= 0xf;
  86. if(phoneme_tab[pl->phcode]->type == phVOWEL)
  87. {
  88. n_syllables++;
  89. if(pl->stress > max_stress)
  90. {
  91. syllable_stressed = n_syllables;
  92. max_stress = pl->stress;
  93. max_stress_posn = k;
  94. }
  95. }
  96. }
  97. }
  98. if(phoneme_tab[plist2->phcode]->type == phVOWEL)
  99. {
  100. syllable++;
  101. }
  102. // make any language specific changes
  103. int flags;
  104. CHANGEPH ch;
  105. flags = 0;
  106. if(ix == max_stress_posn)
  107. flags |= 2;
  108. if(ix > max_stress_posn)
  109. flags |= 4;
  110. if(ph_list2[ix].synthflags & SFLAG_DICTIONARY)
  111. flags |= 8;
  112. ch.flags = flags | word_end;
  113. ch.stress = plist2->stress;
  114. ch.stress_highest = max_stress;
  115. ch.n_vowels = n_syllables;
  116. ch.vowel_this = syllable;
  117. ch.vowel_stressed = syllable_stressed;
  118. ChangePhonemes(ph_list2, n_ph_list2, ix, phoneme_tab[ph_list2[ix].phcode], &ch);
  119. }
  120. // check whether a Voice has specified that we should replace this phoneme
  121. for(k=0; k<n_replace_phonemes; k++)
  122. {
  123. if(plist2->phcode == replace_phonemes[k].old_ph)
  124. {
  125. replace_flags = replace_phonemes[k].type;
  126. if((replace_flags & 1) && (word_end == 0))
  127. continue; // this replacement only occurs at the end of a word
  128. if((replace_flags & 2) && ((plist2->stress & 0x7) > 3))
  129. continue; // this replacement doesn't occur in stressed syllables
  130. // substitute the replacement phoneme
  131. plist2->phcode = replace_phonemes[k].new_ph;
  132. break;
  133. }
  134. }
  135. if(plist2->phcode == 0)
  136. {
  137. continue; // phoneme has been replaced by NULL, so don't copy it
  138. }
  139. }
  140. // copy phoneme into the output list
  141. memcpy(&plist_out[n_plist_out++],plist2,sizeof(PHONEME_LIST2));
  142. }
  143. return(n_plist_out);
  144. } // end of SubstitutePhonemes
  145. void Translator::MakePhonemeList(int post_pause, int start_sentence)
  146. {//============================================================================================
  147. int ix=0;
  148. int j;
  149. int insert_ph = 0;
  150. int insert_synthflags = 0;
  151. PHONEME_LIST *phlist;
  152. PHONEME_TAB *ph;
  153. PHONEME_TAB *prev, *next, *next2;
  154. int unstress_count = 0;
  155. int word_has_stress = 0;
  156. int switched_language = 0;
  157. int max_stress;
  158. int voicing;
  159. int regression;
  160. int end_sourceix;
  161. int alternative;
  162. PHONEME_LIST2 ph_list3[N_PHONEME_LIST];
  163. static PHONEME_LIST2 ph_list2_null = {0,0,0,0,0};
  164. PHONEME_LIST2 *plist2 = &ph_list2_null;
  165. phlist = phoneme_list;
  166. end_sourceix = ph_list2[n_ph_list2-1].sourceix;
  167. // is the last word of the clause unstressed ?
  168. max_stress = 0;
  169. for(j=n_ph_list2-3; j>=0; j--)
  170. {
  171. // start with the last phoneme (before the terminating pauses) and move forwards
  172. if((ph_list2[j].stress & 0x7f) > max_stress)
  173. max_stress = ph_list2[j].stress & 0x7f;
  174. if(ph_list2[j].sourceix != 0)
  175. break;
  176. }
  177. if(max_stress < 4)
  178. {
  179. // the last word is unstressed, look for a previous word that can be stressed
  180. while(--j >= 0)
  181. {
  182. if(ph_list2[j].stress & 0x80) // dictionary flags indicated that this stress can be promoted
  183. {
  184. ph_list2[j].stress = 4; // promote to stressed
  185. break;
  186. }
  187. if((ph_list2[j].stress & 0x7f) >= 4)
  188. {
  189. // found a stressed syllable, so stop looking
  190. break;
  191. }
  192. }
  193. }
  194. if((regression = langopts.param[LOPT_REGRESSIVE_VOICING]) != 0)
  195. {
  196. // set consonant clusters to all voiced or all unvoiced
  197. // Regressive
  198. int type;
  199. voicing = 0;
  200. for(j=n_ph_list2-1; j>=0; j--)
  201. {
  202. ph = phoneme_tab[ph_list2[j].phcode];
  203. if(ph == NULL)
  204. continue;
  205. if(ph->code == phonSWITCH)
  206. switched_language ^= 1;
  207. if(switched_language)
  208. continue;
  209. type = ph->type;
  210. if(regression & 0x2)
  211. {
  212. // LANG=Russian, [v] amd [v;] don't cause regression, or [R^]
  213. if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R'))
  214. type = phLIQUID;
  215. }
  216. if((type==phSTOP) || type==(phFRICATIVE))
  217. {
  218. if(voicing==0)
  219. {
  220. voicing = 1;
  221. }
  222. else
  223. if((voicing==2) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  224. {
  225. ph_list2[j].phcode = ph->alternative_ph; // change to voiced equivalent
  226. }
  227. }
  228. else
  229. if((type==phVSTOP) || type==(phVFRICATIVE))
  230. {
  231. if(voicing==0)
  232. {
  233. voicing = 2;
  234. }
  235. else
  236. if((voicing==1) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  237. {
  238. ph_list2[j].phcode = ph->alternative_ph; // change to unvoiced equivalent
  239. }
  240. }
  241. else
  242. {
  243. if(regression & 0x8)
  244. {
  245. // LANG=Polish, propagate through liquids and nasals
  246. if((type == phPAUSE) || (type == phVOWEL))
  247. voicing = 0;
  248. }
  249. else
  250. {
  251. voicing = 0;
  252. }
  253. }
  254. if((regression & 0x4) && (ph_list2[j].sourceix))
  255. {
  256. // stop propagation at a word boundary
  257. voicing = 0;
  258. }
  259. }
  260. }
  261. n_ph_list2 = SubstitutePhonemes(ph_list3) - 2;
  262. // transfer all the phonemes of the clause into phoneme_list
  263. ph = phoneme_tab[phonPAUSE];
  264. switched_language = 0;
  265. for(j=0; insert_ph || ((j<n_ph_list2) && (ix < N_PHONEME_LIST-3)); j++)
  266. {
  267. prev = ph;
  268. plist2 = &ph_list3[j];
  269. if(insert_ph != 0)
  270. {
  271. // we have a (linking) phoneme which we need to insert here
  272. j--;
  273. next = phoneme_tab[plist2->phcode]; // this phoneme, i.e. after the insert
  274. plist2 = &ph_list2_null;
  275. ph = phoneme_tab[insert_ph];
  276. plist2->synthflags = insert_synthflags;
  277. insert_ph = 0;
  278. insert_synthflags = 0;
  279. }
  280. else
  281. {
  282. // otherwise get the next phoneme from the list
  283. ph = phoneme_tab[plist2->phcode];
  284. if(plist2->phcode == phonSWITCH)
  285. {
  286. // change phoneme table
  287. SelectPhonemeTable(plist2->tone_number);
  288. switched_language ^= SFLAG_SWITCHED_LANG;
  289. }
  290. next = phoneme_tab[(plist2+1)->phcode]; // the phoneme after this one
  291. }
  292. if(plist2->sourceix)
  293. word_has_stress = 0; // start of a word
  294. if(ph == NULL) continue;
  295. if(ph->type == phVOWEL)
  296. {
  297. // check for consecutive unstressed syllables
  298. if(plist2->stress == 0)
  299. {
  300. // an unstressed vowel
  301. unstress_count++;
  302. if((unstress_count > 1) && ((unstress_count & 1)==0))
  303. {
  304. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  305. // stress. But not for the last phoneme of a stressed word
  306. if((langopts.stress_flags & 0x2) || (word_has_stress && ((plist2+1)->sourceix!=0)))
  307. {
  308. // An unstressed final vowel of a stressed word
  309. unstress_count=1; // try again for next syllable
  310. }
  311. else
  312. {
  313. plist2->stress = 1; // change stress to 'diminished'
  314. }
  315. }
  316. }
  317. else
  318. {
  319. unstress_count = 0;
  320. if(plist2->stress > 3)
  321. word_has_stress = 1; // word has a primary or a secondary stress
  322. }
  323. }
  324. if(ph->alternative_ph > 0)
  325. {
  326. alternative = 0;
  327. switch(ph->phflags & phALTERNATIVE)
  328. {
  329. // This phoneme changes if vowel follows, or doesn't follow, depending on its phNOTFOLLOWS flag
  330. case phBEFORENOTVOWEL:
  331. if(next->type != phVOWEL)
  332. alternative = ph->alternative_ph;
  333. break;
  334. case phBEFOREVOWELPAUSE:
  335. if((next->type == phVOWEL) || (next->type == phPAUSE))
  336. alternative = ph->alternative_ph;
  337. break;
  338. case phBEFOREVOWEL:
  339. if(next->type == phVOWEL)
  340. alternative = ph->alternative_ph;
  341. break;
  342. }
  343. if(alternative == 1)
  344. continue; // NULL phoneme, discard
  345. if(alternative > 1)
  346. {
  347. ph = phoneme_tab[alternative];
  348. if(ph->type == phVOWEL)
  349. plist2->synthflags |= SFLAG_SYLLABLE;
  350. else
  351. plist2->synthflags &= ~SFLAG_SYLLABLE;
  352. }
  353. }
  354. if(langopts.param[LOPT_REDUCE_T])
  355. {
  356. if((ph->mnemonic == 't') && (plist2->sourceix == 0) && ((prev->type == phVOWEL) || (prev->mnemonic == 'n')))
  357. {
  358. if(((plist2+1)->sourceix == 0) && ((plist2+1)->stress < 3) && (next->type == phVOWEL))
  359. {
  360. ph = phoneme_tab[phonT_REDUCED];
  361. }
  362. }
  363. }
  364. if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY))
  365. {
  366. // reduction for vowels has already been done in SetWordStress
  367. int reduce_level;
  368. if(next->type == phVOWEL)
  369. {
  370. reduce_level = (ph->phflags >> 28) & 7;
  371. if((&plist2[1])->stress < reduce_level)
  372. {
  373. // look at the stress of the following vowel
  374. ph = phoneme_tab[ph->reduce_to];
  375. }
  376. }
  377. }
  378. if((plist2+1)->synthflags & SFLAG_LENGTHEN)
  379. {
  380. static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
  381. if(strchr(types_double,next->type))
  382. {
  383. // lengthen this consonant by doubling it
  384. insert_ph = next->code;
  385. (plist2+1)->synthflags ^= SFLAG_LENGTHEN;
  386. }
  387. }
  388. if((plist2+1)->sourceix != 0)
  389. {
  390. int x;
  391. if(langopts.word_gap & 1)
  392. {
  393. insert_ph = phonPAUSE_VSHORT;
  394. }
  395. if(langopts.vowel_pause && (ph->type != phPAUSE) && (next->type == phVOWEL))
  396. {
  397. if(langopts.vowel_pause & 0x04)
  398. {
  399. // break before a word which starts with a vowel
  400. insert_ph = phonPAUSE_VSHORT;
  401. }
  402. if((ph->type == phVOWEL) && ((x = langopts.vowel_pause & 0x03) != 0))
  403. {
  404. // adjacent vowels over a word boundary
  405. if(x == 2)
  406. insert_ph = phonPAUSE_SHORT;
  407. else
  408. insert_ph = phonPAUSE_VSHORT;
  409. }
  410. if(((plist2+1)->stress >= 4) && (langopts.vowel_pause & 0x08))
  411. {
  412. // pause before a words which starts with a stressed vowel
  413. insert_ph = phonPAUSE_SHORT;
  414. }
  415. }
  416. }
  417. next2 = phoneme_tab[(plist2+2)->phcode];
  418. #ifdef deleted
  419. if((ph->type != phVOWEL) && (ph->type != phPAUSE) //&& ((ph->phflags & phVOICED)==0)
  420. && (next->type == phLIQUID) && (next->mnemonic != ';') && (next2->type != phVOWEL))
  421. {
  422. // semi-vowel surrounded by consonants. precede by a short schwa
  423. insert_ph = phonSCHWA_SHORT;
  424. insert_synthflags = SFLAG_SYLLABLE;
  425. }
  426. if((ph->type == phLIQUID) && (prev->type != phVOWEL) && (next->type != phVOWEL))
  427. {
  428. // semi-vowel surrounded by consonants. precede by a short schwa
  429. insert_ph = phonSCHWA_SHORT;
  430. insert_synthflags = SFLAG_SYLLABLE;
  431. }
  432. #endif
  433. if((insert_ph == 0) && (ph->link_out != 0) && (((plist2+1)->synthflags & SFLAG_EMBEDDED)==0))
  434. {
  435. if(ph->phflags & phAPPENDPH)
  436. {
  437. // always append the specified phoneme, unless it already is the next phoneme
  438. if((ph->link_out != (plist2+1)->phcode) && (next->type == phVOWEL))
  439. // if(ph->link_out != (plist2+1)->phcode)
  440. {
  441. insert_ph = ph->link_out;
  442. }
  443. }
  444. else
  445. if(((langopts.word_gap & 2)==0) || ((plist2+1)->sourceix == 0))
  446. {
  447. // This phoneme can be linked to a following vowel by inserting a linking phoneme
  448. if(next->type == phVOWEL)
  449. insert_ph = ph->link_out;
  450. else
  451. if(next->code == phonPAUSE_SHORT)
  452. {
  453. // Pause followed by Vowel, replace the Short Pause with the linking phoneme,
  454. if(next2->type == phVOWEL)
  455. (plist2+1)->phcode = ph->link_out; // replace pause by linking phoneme
  456. }
  457. }
  458. }
  459. if(ph->phflags & phVOICED)
  460. {
  461. // check that a voiced consonant is preceded or followed by a vowel or liquid
  462. // and if not, add a short schwa
  463. // not yet implemented
  464. }
  465. phlist[ix].ph = ph;
  466. phlist[ix].type = ph->type;
  467. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  468. phlist[ix].synthflags = plist2->synthflags | switched_language;
  469. phlist[ix].tone = plist2->stress & 0xf;
  470. phlist[ix].tone_ph = plist2->tone_number;
  471. phlist[ix].sourceix = 0;
  472. if(plist2->sourceix != 0)
  473. {
  474. phlist[ix].sourceix = plist2->sourceix;
  475. phlist[ix].newword = 1; // this phoneme is the start of a word
  476. if(start_sentence)
  477. {
  478. phlist[ix].newword = 5; // start of sentence + start of word
  479. start_sentence = 0;
  480. }
  481. }
  482. else
  483. {
  484. phlist[ix].newword = 0;
  485. }
  486. phlist[ix].length = ph->std_length;
  487. if(ph->type==phVOWEL || ph->type==phLIQUID || ph->type==phNASAL || ph->type==phVSTOP || ph->type==phVFRICATIVE)
  488. {
  489. phlist[ix].length = 128; // length_mod
  490. phlist[ix].env = PITCHfall;
  491. }
  492. phlist[ix].prepause = 0;
  493. phlist[ix].amp = 20; // default, will be changed later
  494. phlist[ix].pitch1 = 0x400;
  495. phlist[ix].pitch2 = 0x400;
  496. ix++;
  497. }
  498. phlist[ix].newword = 2; // end of clause
  499. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  500. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  501. phlist[ix].sourceix = end_sourceix;
  502. phlist[ix].synthflags = 0;
  503. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  504. phlist[ix].type = phPAUSE;
  505. phlist[ix].length = 0;
  506. phlist[ix].sourceix=0;
  507. phlist[ix].synthflags = 0;
  508. phlist[ix++].ph = phoneme_tab[phonPAUSE_VSHORT];
  509. n_phoneme_list = ix;
  510. } // end of MakePhonemeList