eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

synthdata.cpp 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <ctype.h>
  23. #include <wctype.h>
  24. #include <string.h>
  25. #include "speak_lib.h"
  26. #include "speech.h"
  27. #include "phoneme.h"
  28. #include "synthesize.h"
  29. #include "voice.h"
  30. #include "translate.h"
  31. #include "wave.h"
  32. const char *version_string = "1.29.06 11.Sep.07";
  33. const int version_phdata = 0x012901;
  34. int option_device_number = -1;
  35. // copy the current phoneme table into here
  36. int n_phoneme_tab;
  37. PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
  38. unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited
  39. unsigned int *phoneme_index=NULL;
  40. char *spects_data=NULL;
  41. unsigned char *wavefile_data=NULL;
  42. static unsigned char *phoneme_tab_data = NULL;
  43. int n_phoneme_tables;
  44. PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  45. static int phoneme_tab_number = 0;
  46. int wavefile_ix; // a wavefile to play along with the synthesis
  47. int wavefile_amp;
  48. int wavefile_ix2;
  49. int wavefile_amp2;
  50. int seq_len_adjust;
  51. int vowel_transition[4];
  52. int vowel_transition0;
  53. int vowel_transition1;
  54. void FormantTransitions(frameref_t *seq, int &n_frames, PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which);
  55. int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which);
  56. const char *PhonemeTabName(void)
  57. {//=============================
  58. return(phoneme_tab_list[phoneme_tab_number].name);
  59. }
  60. static int ReadPhFile(char **ptr, const char *fname)
  61. {//=================================================
  62. FILE *f_in;
  63. char *p;
  64. unsigned int length;
  65. char buf[sizeof(path_home)+40];
  66. sprintf(buf,"%s%c%s",path_home,PATHSEP,fname);
  67. length = GetFileLength(buf);
  68. if((f_in = fopen(buf,"rb")) == NULL)
  69. {
  70. fprintf(stderr,"Can't read data file: '%s'\n",buf);
  71. return(1);
  72. }
  73. if(*ptr != NULL)
  74. Free(*ptr);
  75. if((p = Alloc(length)) == NULL)
  76. {
  77. fclose(f_in);
  78. return(-1);
  79. }
  80. if(fread(p,1,length,f_in) != length)
  81. {
  82. fclose(f_in);
  83. return(-1);
  84. }
  85. *ptr = p;
  86. fclose(f_in);
  87. return(0);
  88. } // end of ReadPhFile
  89. int LoadPhData()
  90. {//=============
  91. int ix;
  92. int n_phonemes;
  93. int version;
  94. int result = 1;
  95. unsigned char *p;
  96. if(ReadPhFile((char **)(&phoneme_tab_data),"phontab") != 0)
  97. return(-1);
  98. if(ReadPhFile((char **)(&phoneme_index),"phonindex") != 0)
  99. return(-1);
  100. if(ReadPhFile((char **)(&spects_data),"phondata") != 0)
  101. return(-1);
  102. wavefile_data = (unsigned char *)spects_data;
  103. // read the version number from the first 4 bytes of phondata
  104. version = 0;
  105. for(ix=0; ix<4; ix++)
  106. {
  107. version += (wavefile_data[ix] << (ix*8));
  108. }
  109. if(version != version_phdata)
  110. {
  111. result = version;
  112. }
  113. // set up phoneme tables
  114. p = phoneme_tab_data;
  115. n_phoneme_tables = p[0];
  116. p+=4;
  117. for(ix=0; ix<n_phoneme_tables; ix++)
  118. {
  119. n_phonemes = p[0];
  120. phoneme_tab_list[ix].n_phonemes = p[0];
  121. phoneme_tab_list[ix].includes = p[1];
  122. p += 4;
  123. memcpy(phoneme_tab_list[ix].name,p,N_PHONEME_TAB_NAME);
  124. p += N_PHONEME_TAB_NAME;
  125. phoneme_tab_list[ix].phoneme_tab_ptr = (PHONEME_TAB *)p;
  126. p += (n_phonemes * sizeof(PHONEME_TAB));
  127. }
  128. if(phoneme_tab_number >= n_phoneme_tables)
  129. phoneme_tab_number = 0;
  130. return(result);
  131. } // end of LoadPhData
  132. void FreePhData(void)
  133. {//==================
  134. Free(phoneme_tab_data);
  135. Free(phoneme_index);
  136. Free(spects_data);
  137. phoneme_tab_data=NULL;
  138. phoneme_index=NULL;
  139. spects_data=NULL;
  140. }
  141. int LookupPh(const char *string)
  142. {//=============================
  143. int ix;
  144. unsigned char c;
  145. unsigned int mnem;
  146. // Pack up to 4 characters into a word
  147. mnem = 0;
  148. for(ix=0; ix<4; ix++)
  149. {
  150. if(string[ix]==0) break;
  151. c = string[ix];
  152. mnem |= (c << (ix*8));
  153. }
  154. for(ix=0; ix<n_phoneme_tab; ix++)
  155. {
  156. if(phoneme_tab[ix] == NULL)
  157. continue;
  158. if(phoneme_tab[ix]->mnemonic == mnem)
  159. return(ix);
  160. }
  161. return(0);
  162. }
  163. static unsigned int LookupSound2(int index, unsigned int other_phcode, int control)
  164. {//================================================================================
  165. // control=1 get formant transition data only
  166. unsigned int code;
  167. unsigned int value, value2;
  168. while((value = phoneme_index[index++]) != 0)
  169. {
  170. if((code = (value & 0xff)) == other_phcode)
  171. {
  172. while(((value2 = phoneme_index[index]) != 0) && ((value2 & 0xff) < 8))
  173. {
  174. switch(value2 & 0xff)
  175. {
  176. case 0:
  177. // next entry is a wavefile to be played along with the synthesis
  178. if(control==0)
  179. {
  180. wavefile_ix = value2 >> 8;
  181. }
  182. break;
  183. case 1:
  184. if(control==0)
  185. {
  186. seq_len_adjust = value2 >> 8;
  187. }
  188. break;
  189. case 2:
  190. if(control==0)
  191. {
  192. seq_len_adjust = value2 >> 8;
  193. seq_len_adjust = -seq_len_adjust;
  194. }
  195. break;
  196. case 3:
  197. if(control==0)
  198. {
  199. wavefile_amp = value2 >> 8;
  200. }
  201. break;
  202. case 4:
  203. // formant transition data, 2 words
  204. vowel_transition[0] = value2 >> 8;
  205. vowel_transition[1] = phoneme_index[index++ + 1];
  206. break;
  207. case 5:
  208. // formant transition data, 2 words
  209. vowel_transition[2] = value2 >> 8;
  210. vowel_transition[3] = phoneme_index[index++ + 1];
  211. break;
  212. }
  213. index++;
  214. }
  215. return(value >> 8);
  216. }
  217. else
  218. if((code == 4) || (code == 5))
  219. {
  220. // formant transition data, ignore next word of data
  221. index++;
  222. }
  223. }
  224. return(3); // not found
  225. } // end of LookupSound2
  226. unsigned int LookupSound(PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which, int *match_level, int control)
  227. {//============================================================================================================
  228. // follows, 1 other_ph preceeds this_ph, 2 other_ph follows this_ph
  229. // control: 1= get formant transition data only
  230. int spect_list;
  231. int spect_list2;
  232. int s_list;
  233. unsigned char virtual_ph;
  234. int result;
  235. int level=0;
  236. unsigned int other_code;
  237. unsigned int other_virtual;
  238. if(control==0)
  239. {
  240. wavefile_ix = 0;
  241. wavefile_amp = 32;
  242. seq_len_adjust = 0;
  243. }
  244. memset(vowel_transition,0,sizeof(vowel_transition));
  245. other_code = other_ph->code;
  246. if(phoneme_tab[other_code]->type == phPAUSE)
  247. other_code = phonPAUSE_SHORT; // use this version of Pause for matching
  248. if(which==1)
  249. {
  250. spect_list = this_ph->after;
  251. virtual_ph = this_ph->start_type;
  252. spect_list2 = phoneme_tab[virtual_ph]->after;
  253. other_virtual = other_ph->end_type;
  254. }
  255. else
  256. {
  257. spect_list = this_ph->before;
  258. virtual_ph = this_ph->end_type;
  259. spect_list2 = phoneme_tab[virtual_ph]->before;
  260. other_virtual = other_ph->start_type;
  261. }
  262. result = 3;
  263. // look for ph1-ph2 combination
  264. if((s_list = spect_list) != 0)
  265. {
  266. if((result = LookupSound2(s_list,other_code,control)) != 3)
  267. {
  268. level = 2;
  269. }
  270. else
  271. if(other_virtual != 0)
  272. {
  273. if((result = LookupSound2(spect_list,other_virtual,control)) != 3)
  274. {
  275. level = 1;
  276. }
  277. }
  278. }
  279. // not found, look in a virtual phoneme if one is given for this phoneme
  280. if((result==3) && (virtual_ph != 0) && ((s_list = spect_list2) != 0))
  281. {
  282. if((result = LookupSound2(s_list,other_code,control)) != 3)
  283. {
  284. level = 1;
  285. }
  286. else
  287. if(other_virtual != 0)
  288. {
  289. if((result = LookupSound2(spect_list2,other_virtual,control)) != 3)
  290. {
  291. level = 1;
  292. }
  293. }
  294. }
  295. if(match_level != NULL)
  296. *match_level = level;
  297. if(result==0)
  298. return(0); // NULL was given in the phoneme source
  299. // note: values = 1 indicates use the default for this phoneme, even though we found a match
  300. // which set a secondary reference
  301. if(result >= 4)
  302. {
  303. // values 1-3 can be used for special codes
  304. // 1 = DFT from the phoneme source file
  305. return(result);
  306. }
  307. // no match found for other_ph, return the default
  308. return(LookupSound2(this_ph->spect,phonPAUSE,control));
  309. } // end of LookupSound
  310. frameref_t *LookupSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
  311. int which, int *match_level, int *n_frames, PHONEME_LIST *plist)
  312. {//=========================================================================================================
  313. int ix;
  314. int nf;
  315. int nf1;
  316. int seq_break;
  317. frameref_t *frames;
  318. int length1;
  319. int length_std;
  320. int length_factor;
  321. SPECT_SEQ *seq;
  322. SPECT_SEQ *seq2;
  323. PHONEME_TAB *next2_ph;
  324. static frameref_t frames_buf[N_SEQ_FRAMES];
  325. PHONEME_TAB *other_ph;
  326. if(which == 1)
  327. other_ph = prev_ph;
  328. else
  329. other_ph = next_ph;
  330. if((ix = LookupSound(this_ph,other_ph,which,match_level,0)) < 4)
  331. return(NULL);
  332. seq = (SPECT_SEQ *)(&spects_data[ix]);
  333. nf = seq->n_frames;
  334. if(nf >= N_SEQ_FRAMES)
  335. nf = N_SEQ_FRAMES - 1;
  336. seq_break = 0;
  337. length1 = 0;
  338. for(ix=0; ix<nf; ix++)
  339. {
  340. frames_buf[ix].frame = &seq->frame[ix];
  341. frames_buf[ix].frflags = seq->frame[ix].frflags;
  342. frames_buf[ix].length = seq->frame[ix].length;
  343. if(seq->frame[ix].frflags & FRFLAG_VOWEL_CENTRE)
  344. seq_break = ix;
  345. }
  346. frames = &frames_buf[0];
  347. if(seq_break > 0)
  348. {
  349. if(which==1)
  350. {
  351. nf = seq_break + 1;
  352. }
  353. else
  354. {
  355. frames = &frames_buf[seq_break]; // body of vowel, skip past initial frames
  356. nf -= seq_break;
  357. }
  358. }
  359. // do we need to modify a frame for blending with a consonant?
  360. if(this_ph->type == phVOWEL)
  361. {
  362. if((which==2) && ((frames[nf-1].frflags & FRFLAG_BREAK) == 0))
  363. {
  364. // lookup formant transition for the following phoneme
  365. if(*match_level == 0)
  366. {
  367. LookupSound(next_ph,this_ph,1,NULL,1);
  368. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next_ph,which);
  369. }
  370. else
  371. if(next_ph->phflags == phVOWEL2)
  372. {
  373. // not really a consonant, rather a coloured vowel
  374. if(LookupSound(next_ph,this_ph,1,NULL,1) == 0)
  375. {
  376. next2_ph = plist[2].ph;
  377. LookupSound(next2_ph,next_ph,1,NULL,1);
  378. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next2_ph,which);
  379. }
  380. }
  381. }
  382. else
  383. {
  384. if(*match_level == 0)
  385. seq_len_adjust = FormantTransition2(frames,nf,vowel_transition0,vowel_transition1,prev_ph,which);
  386. }
  387. // FormantTransitions(frames,nf,this_ph,other_ph,which);
  388. }
  389. nf1 = nf - 1;
  390. for(ix=0; ix<nf1; ix++)
  391. length1 += frames[ix].length;
  392. if((wavefile_ix != 0) && ((wavefile_ix & 0x800000)==0))
  393. {
  394. // a secondary reference has been returned, which is not a wavefile
  395. // add these spectra to the main sequence
  396. seq2 = (SPECT_SEQ *)(&spects_data[wavefile_ix]);
  397. // first frame of the addition just sets the length of the last frame of the main seq
  398. nf--;
  399. for(ix=0; ix<seq2->n_frames; ix++)
  400. {
  401. frames[nf].length = seq2->frame[ix].length;
  402. if(ix > 0)
  403. frames[nf].frame = &seq2->frame[ix];
  404. nf++;
  405. }
  406. wavefile_ix = 0;
  407. }
  408. if((this_ph->type == phVOWEL) && (length1 > 0))
  409. {
  410. if(which==2)
  411. {
  412. // adjust the length of the main part to match the standard length specified for the vowel
  413. // less the front part of the vowel and any added suffix
  414. length_std = this_ph->std_length + seq_len_adjust - 45;
  415. if(length_std < 10)
  416. length_std = 10;
  417. if(plist->synthflags & SFLAG_LENGTHEN)
  418. length_std += phoneme_tab[phonLENGTHEN]->std_length; // phoneme was followed by an extra : symbol
  419. // can adjust vowel length for stressed syllables here
  420. length_factor = (length_std * 256)/ length1;
  421. for(ix=0; ix<nf1; ix++)
  422. {
  423. frames[ix].length = (frames[ix].length * length_factor)/256;
  424. }
  425. }
  426. else
  427. {
  428. // front of a vowel
  429. if(*match_level == 0)
  430. {
  431. // allow very short vowels to have shorter front parts
  432. if(this_ph->std_length < 130)
  433. frames[0].length = (frames[0].length * this_ph->std_length)/130;
  434. }
  435. if(seq_len_adjust != 0)
  436. {
  437. length_std = 0;
  438. for(ix=0; ix<nf1; ix++)
  439. {
  440. length_std += frames[ix].length;
  441. }
  442. length_factor = ((length_std + seq_len_adjust) * 256)/length_std;
  443. for(ix=0; ix<nf1; ix++)
  444. {
  445. frames[ix].length = (frames[ix].length * length_factor)/256;
  446. }
  447. }
  448. }
  449. }
  450. *n_frames = nf;
  451. return(frames);
  452. } // end of LookupSpect
  453. unsigned char *LookupEnvelope(int ix)
  454. {//================================
  455. if(ix==0)
  456. return(NULL);
  457. return((unsigned char *)&spects_data[phoneme_index[ix]]);
  458. }
  459. static void SetUpPhonemeTable(int number, int recursing)
  460. {//=====================================================
  461. int ix;
  462. int includes;
  463. int ph_code;
  464. PHONEME_TAB *phtab;
  465. if(recursing==0)
  466. {
  467. memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
  468. }
  469. if((includes = phoneme_tab_list[number].includes) > 0)
  470. {
  471. // recursively include base phoneme tables
  472. SetUpPhonemeTable(includes-1,1);
  473. }
  474. // now add the phonemes from this table
  475. phtab = phoneme_tab_list[number].phoneme_tab_ptr;
  476. for(ix=0; ix<phoneme_tab_list[number].n_phonemes; ix++)
  477. {
  478. ph_code = phtab[ix].code;
  479. phoneme_tab[ph_code] = &phtab[ix];
  480. if(ph_code > n_phoneme_tab)
  481. n_phoneme_tab = ph_code;
  482. if(recursing == 0)
  483. phoneme_tab_flags[ph_code] |= 1; // not inherited
  484. }
  485. } // end of SetUpPhonemeTable
  486. void SelectPhonemeTable(int number)
  487. {//================================
  488. n_phoneme_tab = 0;
  489. SetUpPhonemeTable(number,0); // recursively for included phoneme tables
  490. n_phoneme_tab++;
  491. } // end of SelectPhonemeTable
  492. int LookupPhonemeTable(const char *name)
  493. {//=====================================
  494. int ix;
  495. for(ix=0; ix<n_phoneme_tables; ix++)
  496. {
  497. if(strcmp(name,phoneme_tab_list[ix].name)==0)
  498. {
  499. phoneme_tab_number = ix;
  500. break;
  501. }
  502. }
  503. if(ix == n_phoneme_tables)
  504. return(-1);
  505. return(ix);
  506. }
  507. int SelectPhonemeTableName(const char *name)
  508. {//=========================================
  509. // Look up a phoneme set by name, and select it if it exists
  510. // Returns the phoneme table number
  511. int ix;
  512. if((ix = LookupPhonemeTable(name)) == -1)
  513. return(-1);
  514. SelectPhonemeTable(ix);
  515. return(ix);
  516. } // end of DelectPhonemeTableName
  517. void LoadConfig(void)
  518. {//==================
  519. // Load configuration file, if one exists
  520. char buf[sizeof(path_home)+10];
  521. FILE *f;
  522. int ix;
  523. char c1;
  524. char *p;
  525. char string[120];
  526. sprintf(buf,"%s%c%s",path_home,PATHSEP,"config");
  527. if((f = fopen(buf,"r"))==NULL)
  528. {
  529. return;
  530. }
  531. while(fgets(buf,sizeof(buf),f)!=NULL)
  532. {
  533. if(memcmp(buf,"tone",4)==0)
  534. {
  535. ReadTonePoints(&buf[5],tone_points);
  536. }
  537. else
  538. if(memcmp(buf,"pa_device",9)==0)
  539. {
  540. sscanf(&buf[7],"%d",&option_device_number);
  541. }
  542. else
  543. if(memcmp(buf,"soundicon",9)==0)
  544. {
  545. ix = sscanf(&buf[10],"_%c %s",&c1,string);
  546. if(ix==2)
  547. {
  548. soundicon_tab[n_soundicon_tab].name = c1;
  549. p = Alloc(strlen(string)+1);
  550. strcpy(p,string);
  551. soundicon_tab[n_soundicon_tab].filename = p;
  552. soundicon_tab[n_soundicon_tab++].length = 0;
  553. }
  554. }
  555. }
  556. } // end of LoadConfig