eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phonemelist.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*
  2. * Copyright (C) 2005 to 2014 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <stdint.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <espeak-ng/espeak_ng.h>
  25. #include <espeak-ng/speak_lib.h>
  26. #include "encoding.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "translate.h"
  31. const unsigned char pause_phonemes[8] = {
  32. 0, phonPAUSE_VSHORT, phonPAUSE_SHORT, phonPAUSE, phonPAUSE_LONG, phonGLOTTALSTOP, phonPAUSE_LONG, phonPAUSE_LONG
  33. };
  34. extern int n_ph_list2;
  35. extern PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes
  36. static int SubstitutePhonemes(PHONEME_LIST *plist_out)
  37. {
  38. // Copy the phonemes list and perform any substitutions that are required for the
  39. // current voice
  40. int ix;
  41. int k;
  42. int replace_flags;
  43. int n_plist_out = 0;
  44. int word_end;
  45. PHONEME_LIST2 *plist2;
  46. PHONEME_TAB *next = NULL;
  47. for (ix = 0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++) {
  48. plist2 = &ph_list2[ix];
  49. // don't do any substitution if the language has been temporarily changed
  50. if (!(plist2->synthflags & SFLAG_SWITCHED_LANG)) {
  51. if (ix < (n_ph_list2 -1))
  52. next = phoneme_tab[ph_list2[ix+1].phcode];
  53. word_end = 0;
  54. if ((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  55. word_end = 1; // this phoneme is the end of a word
  56. // check whether a Voice has specified that we should replace this phoneme
  57. for (k = 0; k < n_replace_phonemes; k++) {
  58. if (plist2->phcode == replace_phonemes[k].old_ph) {
  59. replace_flags = replace_phonemes[k].type;
  60. if ((replace_flags & 1) && (word_end == 0))
  61. continue; // this replacement only occurs at the end of a word
  62. if ((replace_flags & 2) && ((plist2->stresslevel & 0x7) > 3))
  63. continue; // this replacement doesn't occur in stressed syllables
  64. if ((replace_flags & 4) && (plist2->sourceix == 0))
  65. continue; // this replacement only occurs at the start of a word
  66. // substitute the replacement phoneme
  67. plist2->phcode = replace_phonemes[k].new_ph;
  68. if ((plist2->stresslevel > 1) && (phoneme_tab[plist2->phcode]->phflags & phUNSTRESSED))
  69. plist2->stresslevel = 0; // the replacement must be unstressed
  70. break;
  71. }
  72. }
  73. if (plist2->phcode == 0)
  74. continue; // phoneme has been replaced by NULL, so don't copy it
  75. }
  76. // copy phoneme into the output list
  77. memcpy(&plist_out[n_plist_out], plist2, sizeof(PHONEME_LIST2));
  78. plist_out[n_plist_out].ph = phoneme_tab[plist2->phcode];
  79. plist_out[n_plist_out].type = plist_out[n_plist_out].ph->type;
  80. n_plist_out++;
  81. }
  82. return n_plist_out;
  83. }
  84. void MakePhonemeList(Translator *tr, int post_pause, int start_sentence)
  85. {
  86. int ix = 0;
  87. int j;
  88. int insert_ph = 0;
  89. PHONEME_LIST *phlist;
  90. PHONEME_TAB *ph;
  91. PHONEME_TAB *next, *next2;
  92. int unstress_count = 0;
  93. int word_stress = 0;
  94. int current_phoneme_tab;
  95. int max_stress;
  96. int voicing;
  97. int regression;
  98. int end_sourceix;
  99. int alternative;
  100. int delete_count;
  101. int word_start;
  102. int inserted;
  103. int deleted;
  104. PHONEME_DATA phdata;
  105. int n_ph_list3;
  106. PHONEME_LIST *plist3;
  107. PHONEME_LIST *plist3_inserted = NULL;
  108. PHONEME_LIST ph_list3[N_PHONEME_LIST];
  109. PHONEME_LIST2 *plist2;
  110. WORD_PH_DATA worddata;
  111. memset(&worddata, 0, sizeof(worddata));
  112. plist2 = ph_list2;
  113. phlist = phoneme_list;
  114. end_sourceix = plist2[n_ph_list2-1].sourceix;
  115. // is the last word of the clause unstressed ?
  116. max_stress = 0;
  117. for (j = n_ph_list2-3; j >= 0; j--) {
  118. // start with the last phoneme (before the terminating pauses) and move backwards
  119. if ((plist2[j].stresslevel & 0x7f) > max_stress)
  120. max_stress = plist2[j].stresslevel & 0x7f;
  121. if (plist2[j].sourceix != 0)
  122. break;
  123. }
  124. if (max_stress < 4) {
  125. // the last word is unstressed, look for a previous word that can be stressed
  126. while (--j >= 0) {
  127. if (plist2[j].synthflags & SFLAG_PROMOTE_STRESS) { // dictionary flags indicated that this stress can be promoted
  128. plist2[j].stresslevel = 4; // promote to stressed
  129. break;
  130. }
  131. if (plist2[j].stresslevel >= 4) {
  132. // found a stressed syllable, so stop looking
  133. break;
  134. }
  135. }
  136. }
  137. // look for switch of phoneme tables
  138. delete_count = 0;
  139. current_phoneme_tab = tr->phoneme_tab_ix;
  140. for (j = 0; j < n_ph_list2; j++) {
  141. if (current_phoneme_tab != tr->phoneme_tab_ix)
  142. plist2[j].synthflags |= SFLAG_SWITCHED_LANG;
  143. if (delete_count > 0)
  144. memcpy(&plist2[j-delete_count], &plist2[j], sizeof(plist2[0]));
  145. if (plist2[j].phcode == phonSWITCH) {
  146. if ((!(plist2[j].synthflags & SFLAG_EMBEDDED)) && (
  147. (plist2[j].tone_ph == current_phoneme_tab) ||
  148. (plist2[j+1].phcode == phonSWITCH) ||
  149. ((plist2[j+1].phcode == phonPAUSE) && (plist2[j+2].phcode == phonSWITCH))
  150. )) {
  151. // delete this phonSWITCH if it's switching to the current phoneme table, or
  152. // delete this phonSWITCH if its followed by another phonSWITCH
  153. delete_count++;
  154. } else
  155. current_phoneme_tab = plist2[j].tone_ph;
  156. }
  157. }
  158. n_ph_list2 -= delete_count;
  159. if ((regression = tr->langopts.param[LOPT_REGRESSIVE_VOICING]) != 0) {
  160. // set consonant clusters to all voiced or all unvoiced
  161. // Regressive
  162. int type;
  163. int stop_propagation = 0;
  164. voicing = 0;
  165. for (j = n_ph_list2-1; j >= 0; j--) {
  166. ph = phoneme_tab[plist2[j].phcode];
  167. if (ph == NULL)
  168. continue;
  169. if (plist2[j].synthflags & SFLAG_SWITCHED_LANG) {
  170. stop_propagation = 0;
  171. voicing = 0;
  172. if (regression & 0x100)
  173. voicing = 1; // word-end devoicing
  174. continue;
  175. }
  176. type = ph->type;
  177. if (regression & 0x2) {
  178. // [v] amd [v;] don't cause regression, or [R^]
  179. if (((ph->mnemonic & 0xff) == 'v') || ((ph->mnemonic & 0xff) == 'R')) {
  180. stop_propagation = 1;
  181. if (regression & 0x10)
  182. voicing = 0;
  183. }
  184. }
  185. if ((type == phSTOP) || type == (phFRICATIVE)) {
  186. if ((voicing == 0) && (regression & 0xf))
  187. voicing = 1;
  188. else if ((voicing == 2) && (ph->end_type != 0)) // use end_type field for voicing_switch for consonants
  189. plist2[j].phcode = ph->end_type; // change to voiced equivalent
  190. } else if ((type == phVSTOP) || type == (phVFRICATIVE)) {
  191. if ((voicing == 0) && (regression & 0xf))
  192. voicing = 2;
  193. else if ((voicing == 1) && (ph->end_type != 0))
  194. plist2[j].phcode = ph->end_type; // change to unvoiced equivalent
  195. } else {
  196. if (regression & 0x8) {
  197. // LANG=Polish, propagate through liquids and nasals
  198. if ((type == phPAUSE) || (type == phVOWEL))
  199. voicing = 0;
  200. } else
  201. voicing = 0;
  202. }
  203. if (stop_propagation) {
  204. voicing = 0;
  205. stop_propagation = 0;
  206. }
  207. if (plist2[j].sourceix) {
  208. if (regression & 0x04) {
  209. // stop propagation at a word boundary
  210. voicing = 0;
  211. }
  212. if (regression & 0x100) {
  213. // devoice word-final consonants, unless propagating voiced
  214. if (voicing == 0)
  215. voicing = 1;
  216. }
  217. }
  218. }
  219. }
  220. n_ph_list3 = SubstitutePhonemes(ph_list3) - 2;
  221. for (j = 0; (j < n_ph_list3) && (ix < N_PHONEME_LIST-3);) {
  222. if (ph_list3[j].sourceix) {
  223. // start of a word
  224. int k;
  225. int nextw;
  226. word_stress = 0;
  227. // find the highest stress level in this word
  228. for (nextw = j; nextw < n_ph_list3;) {
  229. if (ph_list3[nextw].stresslevel > word_stress)
  230. word_stress = ph_list3[nextw].stresslevel;
  231. nextw++;
  232. if (ph_list3[nextw].sourceix)
  233. break; // start of the next word
  234. }
  235. for (k = j; k < nextw; k++)
  236. ph_list3[k].wordstress = word_stress;
  237. j = nextw;
  238. } else
  239. j++;
  240. }
  241. // transfer all the phonemes of the clause into phoneme_list
  242. ph = phoneme_tab[phonPAUSE];
  243. ph_list3[0].ph = ph;
  244. word_start = 1;
  245. for (j = 0; insert_ph || ((j < n_ph_list3) && (ix < N_PHONEME_LIST-3)); j++) {
  246. plist3 = &ph_list3[j];
  247. inserted = 0;
  248. deleted = 0;
  249. if (insert_ph != 0) {
  250. // we have a (linking) phoneme which we need to insert here
  251. next = phoneme_tab[plist3->phcode]; // this phoneme, i.e. after the insert
  252. // re-use the previous entry for the inserted phoneme.
  253. // That's OK because we don't look backwards from plist3 *** but CountVowelPosition() and isAfterStress does !!!
  254. j--;
  255. plist3 = plist3_inserted = &ph_list3[j];
  256. if (j > 0) {
  257. // move all previous phonemes in the word back one place
  258. int k;
  259. if (word_start > 0) {
  260. k = word_start;
  261. word_start--;
  262. } else
  263. k = 2; // No more space, don't loose the start of word mark at ph_list2[word_start]
  264. for (; k <= j; k++)
  265. memcpy(&ph_list3[k-1], &ph_list3[k], sizeof(*plist3));
  266. }
  267. memset(&plist3[0], 0, sizeof(*plist3));
  268. plist3->phcode = insert_ph;
  269. ph = phoneme_tab[insert_ph];
  270. plist3->ph = ph;
  271. insert_ph = 0;
  272. inserted = 1; // don't insert the same phoneme repeatedly
  273. } else {
  274. // otherwise get the next phoneme from the list
  275. if (plist3->sourceix != 0)
  276. word_start = j;
  277. ph = phoneme_tab[plist3->phcode];
  278. plist3[0].ph = ph;
  279. if (plist3->phcode == phonSWITCH) {
  280. // change phoneme table
  281. SelectPhonemeTable(plist3->tone_ph);
  282. }
  283. next = phoneme_tab[plist3[1].phcode]; // the phoneme after this one
  284. plist3[1].ph = next;
  285. }
  286. if (ph == NULL) continue;
  287. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  288. if ((alternative = phdata.pd_param[pd_CHANGE_NEXTPHONEME]) > 0) {
  289. ph_list3[j+1].ph = phoneme_tab[alternative];
  290. ph_list3[j+1].phcode = alternative;
  291. ph_list3[j+1].type = phoneme_tab[alternative]->type;
  292. next = phoneme_tab[alternative];
  293. }
  294. if (((alternative = phdata.pd_param[pd_INSERTPHONEME]) > 0) && (inserted == 0)) {
  295. // PROBLEM: if we insert a phoneme before a vowel then we loose the stress.
  296. PHONEME_TAB *ph2;
  297. ph2 = ph;
  298. insert_ph = plist3->phcode;
  299. ph = phoneme_tab[alternative];
  300. plist3->ph = ph;
  301. plist3->phcode = alternative;
  302. if (ph->type == phVOWEL) {
  303. plist3->synthflags |= SFLAG_SYLLABLE;
  304. if (ph2->type != phVOWEL)
  305. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  306. } else
  307. plist3->synthflags &= ~SFLAG_SYLLABLE;
  308. // re-interpret the changed phoneme
  309. // But it doesn't obey a second ChangePhoneme()
  310. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  311. }
  312. if ((alternative = phdata.pd_param[pd_CHANGEPHONEME]) > 0) {
  313. PHONEME_TAB *ph2;
  314. ph2 = ph;
  315. ph = phoneme_tab[alternative];
  316. plist3->ph = ph;
  317. plist3->phcode = alternative;
  318. if (alternative == 1)
  319. deleted = 1; // NULL phoneme, discard
  320. else {
  321. if (ph->type == phVOWEL) {
  322. plist3->synthflags |= SFLAG_SYLLABLE;
  323. if (ph2->type != phVOWEL)
  324. plist3->stresslevel = 0; // change from non-vowel to vowel, make sure it's unstressed
  325. } else
  326. plist3->synthflags &= ~SFLAG_SYLLABLE;
  327. // re-interpret the changed phoneme
  328. // But it doesn't obey a second ChangePhoneme()
  329. InterpretPhoneme(tr, 0x100, plist3, &phdata, &worddata);
  330. }
  331. }
  332. if ((ph->type == phVOWEL) && (deleted == 0)) {
  333. PHONEME_LIST *p;
  334. // Check for consecutive unstressed syllables, even across word boundaries.
  335. // Do this after changing phonemes according to stress level.
  336. if (plist3->stresslevel <= 1) {
  337. // an unstressed vowel
  338. unstress_count++;
  339. if (tr->langopts.stress_flags & 0x08) {
  340. // change sequences of consecutive unstressed vowels in unstressed words to diminished stress (TEST)
  341. for (p = plist3+1; p->type != phPAUSE; p++) {
  342. if (p->type == phVOWEL) {
  343. if (p->stresslevel <= 1) {
  344. if (plist3->wordstress < 4)
  345. plist3->stresslevel = 0;
  346. if (p->wordstress < 4)
  347. p->stresslevel = 0;
  348. }
  349. break;
  350. }
  351. }
  352. } else {
  353. if ((unstress_count > 1) && ((unstress_count & 1) == 0)) {
  354. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  355. // stress. But not for the last phoneme of a stressed word
  356. if ((tr->langopts.stress_flags & S_NO_DIM) || ((word_stress > 3) && ((plist3+1)->sourceix != 0))) {
  357. // An unstressed final vowel of a stressed word
  358. unstress_count = 1; // try again for next syllable
  359. } else
  360. plist3->stresslevel = 0; // change stress to 'diminished'
  361. }
  362. }
  363. } else
  364. unstress_count = 0;
  365. }
  366. if ((plist3+1)->synthflags & SFLAG_LENGTHEN) {
  367. static char types_double[] = { phFRICATIVE, phVFRICATIVE, phNASAL, phLIQUID, 0 };
  368. if ((j > 0) && (strchr(types_double, next->type))) {
  369. // lengthen this consonant by doubling it
  370. // BUT, can't insert a phoneme at position plist3[0] because it crashes PrevPh()
  371. insert_ph = next->code;
  372. (plist3+1)->synthflags ^= SFLAG_LENGTHEN;
  373. }
  374. }
  375. if ((plist3+1)->sourceix != 0) {
  376. int x;
  377. if (tr->langopts.vowel_pause && (ph->type != phPAUSE)) {
  378. if ((ph->type != phVOWEL) && (tr->langopts.vowel_pause & 0x200)) {
  379. // add a pause after a word which ends in a consonant
  380. insert_ph = phonPAUSE_NOLINK;
  381. }
  382. if (next->type == phVOWEL) {
  383. if ((x = tr->langopts.vowel_pause & 0x0c) != 0) {
  384. // break before a word which starts with a vowel
  385. if (x == 0xc)
  386. insert_ph = phonPAUSE_NOLINK;
  387. else
  388. insert_ph = phonPAUSE_VSHORT;
  389. }
  390. if ((ph->type == phVOWEL) && ((x = tr->langopts.vowel_pause & 0x03) != 0)) {
  391. // adjacent vowels over a word boundary
  392. if (x == 2)
  393. insert_ph = phonPAUSE_SHORT;
  394. else
  395. insert_ph = phonPAUSE_VSHORT;
  396. }
  397. if (((plist3+1)->stresslevel >= 4) && (tr->langopts.vowel_pause & 0x100)) {
  398. // pause before a words which starts with a stressed vowel
  399. insert_ph = phonPAUSE_SHORT;
  400. }
  401. }
  402. }
  403. if ((plist3 != plist3_inserted) && (ix > 0)) {
  404. if ((x = (tr->langopts.word_gap & 0x7)) != 0) {
  405. if ((x > 1) || ((insert_ph != phonPAUSE_SHORT) && (insert_ph != phonPAUSE_NOLINK))) {
  406. // don't reduce the pause
  407. insert_ph = pause_phonemes[x];
  408. }
  409. }
  410. if (option_wordgap > 0)
  411. insert_ph = phonPAUSE_LONG;
  412. }
  413. }
  414. next2 = phoneme_tab[plist3[2].phcode];
  415. plist3[2].ph = next2;
  416. if ((insert_ph == 0) && (phdata.pd_param[pd_APPENDPHONEME] != 0))
  417. insert_ph = phdata.pd_param[pd_APPENDPHONEME];
  418. if (deleted == 0) {
  419. phlist[ix].ph = ph;
  420. phlist[ix].type = ph->type;
  421. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  422. phlist[ix].synthflags = plist3->synthflags;
  423. phlist[ix].stresslevel = plist3->stresslevel & 0xf;
  424. phlist[ix].wordstress = plist3->wordstress;
  425. phlist[ix].tone_ph = plist3->tone_ph;
  426. phlist[ix].sourceix = 0;
  427. phlist[ix].phcode = ph->code;
  428. if (plist3->sourceix != 0) {
  429. phlist[ix].sourceix = plist3->sourceix;
  430. phlist[ix].newword = 1; // this phoneme is the start of a word
  431. if (start_sentence) {
  432. phlist[ix].newword = 5; // start of sentence + start of word
  433. start_sentence = 0;
  434. }
  435. } else
  436. phlist[ix].newword = 0;
  437. phlist[ix].length = phdata.pd_param[i_SET_LENGTH]*2;
  438. if ((ph->code == phonPAUSE_LONG) && (option_wordgap > 0) && (plist3[1].sourceix != 0)) {
  439. phlist[ix].ph = phoneme_tab[phonPAUSE_SHORT];
  440. phlist[ix].length = option_wordgap*14; // 10mS per unit at the default speed
  441. }
  442. if (ph->type == phVOWEL || ph->type == phLIQUID || ph->type == phNASAL || ph->type == phVSTOP || ph->type == phVFRICATIVE || (ph->phflags & phPREVOICE)) {
  443. phlist[ix].length = 128; // length_mod
  444. phlist[ix].env = PITCHfall;
  445. }
  446. phlist[ix].prepause = 0;
  447. phlist[ix].amp = 20; // default, will be changed later
  448. phlist[ix].pitch1 = 255;
  449. phlist[ix].pitch2 = 255;
  450. ix++;
  451. }
  452. }
  453. phlist[ix].newword = 2; // end of clause
  454. phlist[ix].phcode = phonPAUSE;
  455. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  456. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  457. phlist[ix].sourceix = end_sourceix;
  458. phlist[ix].synthflags = 0;
  459. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  460. phlist[ix].phcode = phonPAUSE;
  461. phlist[ix].type = phPAUSE;
  462. phlist[ix].length = 0;
  463. phlist[ix].sourceix = 0;
  464. phlist[ix].synthflags = 0;
  465. phlist[ix++].ph = phoneme_tab[phonPAUSE_SHORT];
  466. n_phoneme_list = ix;
  467. }