eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

synthdata.cpp 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /***************************************************************************
  2. * Copyright (C) 2005,2006 by Jonathan Duddington *
  3. * [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 2 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write to the *
  17. * Free Software Foundation, Inc., *
  18. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
  19. ***************************************************************************/
  20. #include "StdAfx.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <ctype.h>
  24. #include <wctype.h>
  25. #include <string.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "voice.h"
  29. #include "phoneme.h"
  30. #include "synthesize.h"
  31. #include "translate.h"
  32. #include "wave.h"
  33. const char *version_string = "1.24.07 11.May.07";
  34. const int version_phdata = 0x012201;
  35. int option_device_number = -1;
  36. // copy the current phoneme table into here
  37. int n_phoneme_tab;
  38. PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
  39. unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited
  40. unsigned int *phoneme_index=NULL;
  41. char *spects_data=NULL;
  42. unsigned char *wavefile_data=NULL;
  43. static unsigned char *phoneme_tab_data = NULL;
  44. int n_phoneme_tables;
  45. PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  46. static int phoneme_tab_number = 0;
  47. int wavefile_ix; // a wavefile to play along with the synthesis
  48. int wavefile_amp;
  49. int wavefile_ix2;
  50. int wavefile_amp2;
  51. int seq_len_adjust;
  52. int vowel_transition[4];
  53. int vowel_transition0;
  54. int vowel_transition1;
  55. void FormantTransitions(frameref_t *seq, int &n_frames, PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which);
  56. int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which);
  57. const char *PhonemeTabName(void)
  58. {//=============================
  59. return(phoneme_tab_list[phoneme_tab_number].name);
  60. }
  61. static int ReadPhFile(char **ptr, const char *fname)
  62. {//=================================================
  63. FILE *f_in;
  64. char *p;
  65. unsigned int length;
  66. char buf[200];
  67. sprintf(buf,"%s%c%s",path_home,PATHSEP,fname);
  68. length = GetFileLength(buf);
  69. if((f_in = fopen(buf,"rb")) == NULL)
  70. {
  71. fprintf(stderr,"Can't read data file: '%s'\n",buf);
  72. return(1);
  73. }
  74. if(*ptr != NULL)
  75. Free(*ptr);
  76. if((p = Alloc(length)) == NULL)
  77. {
  78. fclose(f_in);
  79. return(-1);
  80. }
  81. if(fread(p,1,length,f_in) != length)
  82. {
  83. fclose(f_in);
  84. return(-1);
  85. }
  86. *ptr = p;
  87. fclose(f_in);
  88. return(0);
  89. } // end of ReadPhFile
  90. int LoadPhData()
  91. {//=============
  92. int ix;
  93. int n_phonemes;
  94. int version;
  95. int result = 1;
  96. unsigned char *p;
  97. if(ReadPhFile((char **)(&phoneme_tab_data),"phontab") != 0)
  98. return(-1);
  99. if(ReadPhFile((char **)(&phoneme_index),"phonindex") != 0)
  100. return(-1);
  101. if(ReadPhFile((char **)(&spects_data),"phondata") != 0)
  102. return(-1);
  103. wavefile_data = (unsigned char *)spects_data;
  104. version = *((unsigned int *)spects_data);
  105. if(version != version_phdata)
  106. {
  107. result = version;
  108. }
  109. // set up phoneme tables
  110. p = phoneme_tab_data;
  111. n_phoneme_tables = p[0];
  112. p+=4;
  113. for(ix=0; ix<n_phoneme_tables; ix++)
  114. {
  115. n_phonemes = p[0];
  116. phoneme_tab_list[ix].n_phonemes = p[0];
  117. phoneme_tab_list[ix].includes = p[1];
  118. p += 4;
  119. memcpy(phoneme_tab_list[ix].name,p,N_PHONEME_TAB_NAME);
  120. p += N_PHONEME_TAB_NAME;
  121. phoneme_tab_list[ix].phoneme_tab_ptr = (PHONEME_TAB *)p;
  122. p += (n_phonemes * sizeof(PHONEME_TAB));
  123. }
  124. if(phoneme_tab_number >= n_phoneme_tables)
  125. phoneme_tab_number = 0;
  126. return(result);
  127. } // end of LoadPhData
  128. void FreePhData(void)
  129. {//==================
  130. Free(phoneme_tab_data);
  131. Free(phoneme_index);
  132. Free(spects_data);
  133. }
  134. int LookupPh(const char *string)
  135. {//=============================
  136. int ix;
  137. unsigned char c;
  138. unsigned int mnem;
  139. // Pack up to 4 characters into a word
  140. mnem = 0;
  141. for(ix=0; ix<4; ix++)
  142. {
  143. if(string[ix]==0) break;
  144. c = string[ix];
  145. mnem |= (c << (ix*8));
  146. }
  147. for(ix=0; ix<n_phoneme_tab; ix++)
  148. {
  149. if(phoneme_tab[ix] == NULL)
  150. continue;
  151. if(phoneme_tab[ix]->mnemonic == mnem)
  152. return(ix);
  153. }
  154. return(0);
  155. }
  156. static unsigned int LookupSound2(int index, unsigned int other_phcode, int control)
  157. {//================================================================================
  158. // control=1 get formant transition data only
  159. int code;
  160. unsigned int value, value2;
  161. while((value = phoneme_index[index++]) != 0)
  162. {
  163. if((code = (value & 0xff)) == other_phcode)
  164. {
  165. while(((value2 = phoneme_index[index]) != 0) && ((value2 & 0xff) < 8))
  166. {
  167. switch(value2 & 0xff)
  168. {
  169. case 0:
  170. // next entry is a wavefile to be played along with the synthesis
  171. if(control==0)
  172. {
  173. wavefile_ix = value2 >> 8;
  174. }
  175. break;
  176. case 1:
  177. if(control==0)
  178. seq_len_adjust = value2 >> 8;
  179. break;
  180. case 2:
  181. if(control==0)
  182. seq_len_adjust = -(value2 >> 8);
  183. break;
  184. case 3:
  185. if(control==0)
  186. {
  187. wavefile_amp = value2 >> 8;
  188. }
  189. break;
  190. case 4:
  191. // formant transition data, 2 words
  192. vowel_transition[0] = value2 >> 8;
  193. vowel_transition[1] = phoneme_index[index++ + 1];
  194. break;
  195. case 5:
  196. // formant transition data, 2 words
  197. vowel_transition[2] = value2 >> 8;
  198. vowel_transition[3] = phoneme_index[index++ + 1];
  199. break;
  200. }
  201. index++;
  202. }
  203. return(value >> 8);
  204. }
  205. else
  206. if((code == 4) || (code == 5))
  207. {
  208. // formant transition data, ignore next word of data
  209. index++;
  210. }
  211. }
  212. return(3); // not found
  213. } // end of LookupSound2
  214. unsigned int LookupSound(PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which, int *match_level, int control)
  215. {//============================================================================================================
  216. // follows, 1 other_ph preceeds this_ph, 2 other_ph follows this_ph
  217. // control: 1= get formant transition data only
  218. int spect_list;
  219. int spect_list2;
  220. int s_list;
  221. unsigned char virtual_ph;
  222. int result;
  223. int level=0;
  224. unsigned int other_code;
  225. unsigned int other_virtual;
  226. if(control==0)
  227. {
  228. wavefile_ix = 0;
  229. wavefile_amp = 32;
  230. seq_len_adjust = 0;
  231. }
  232. memset(vowel_transition,0,sizeof(vowel_transition));
  233. other_code = other_ph->code;
  234. if(phoneme_tab[other_code]->type == phPAUSE)
  235. other_code = phonPAUSE_SHORT; // use this version of Pause for matching
  236. if(which==1)
  237. {
  238. spect_list = this_ph->after;
  239. virtual_ph = this_ph->start_type;
  240. spect_list2 = phoneme_tab[virtual_ph]->after;
  241. other_virtual = other_ph->end_type;
  242. }
  243. else
  244. {
  245. spect_list = this_ph->before;
  246. virtual_ph = this_ph->end_type;
  247. spect_list2 = phoneme_tab[virtual_ph]->before;
  248. other_virtual = other_ph->start_type;
  249. }
  250. result = 3;
  251. // look for ph1-ph2 combination
  252. if((s_list = spect_list) != 0)
  253. {
  254. if((result = LookupSound2(s_list,other_code,control)) != 3)
  255. {
  256. level = 2;
  257. }
  258. else
  259. if(other_virtual != 0)
  260. {
  261. if((result = LookupSound2(spect_list,other_virtual,control)) != 3)
  262. {
  263. level = 1;
  264. }
  265. }
  266. }
  267. // not found, look in a virtual phoneme if one is given for this phoneme
  268. if((result==3) && (virtual_ph != 0) && ((s_list = spect_list2) != 0))
  269. {
  270. if((result = LookupSound2(s_list,other_code,control)) != 3)
  271. {
  272. level = 1;
  273. }
  274. else
  275. if(other_virtual != 0)
  276. {
  277. if((result = LookupSound2(spect_list2,other_virtual,control)) != 3)
  278. {
  279. level = 1;
  280. }
  281. }
  282. }
  283. if(match_level != NULL)
  284. *match_level = level;
  285. if(result==0)
  286. return(0); // NULL was given in the phoneme source
  287. // note: values = 1 indicates use the default for this phoneme, even though we found a match
  288. // which set a secondary reference
  289. if(result >= 4)
  290. {
  291. // values 1-3 can be used for special codes
  292. // 1 = DFT from the phoneme source file
  293. return(result);
  294. }
  295. // no match found for other_ph, return the default
  296. return(LookupSound2(this_ph->spect,phonPAUSE,control));
  297. } // end of LookupSound
  298. frameref_t *LookupSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
  299. int which, int *match_level, int *n_frames, PHONEME_LIST *plist)
  300. {//=========================================================================================================
  301. int ix;
  302. int nf;
  303. int nf1;
  304. int seq_break;
  305. frameref_t *frames;
  306. int length1;
  307. int length_std;
  308. int length_factor;
  309. SPECT_SEQ *seq;
  310. SPECT_SEQ *seq2;
  311. PHONEME_TAB *next2_ph;
  312. static frameref_t frames_buf[N_SEQ_FRAMES];
  313. PHONEME_TAB *other_ph;
  314. if(which == 1)
  315. other_ph = prev_ph;
  316. else
  317. other_ph = next_ph;
  318. if((ix = LookupSound(this_ph,other_ph,which,match_level,0)) < 4)
  319. return(NULL);
  320. seq = (SPECT_SEQ *)(&spects_data[ix]);
  321. nf = seq->n_frames;
  322. if(nf >= N_SEQ_FRAMES)
  323. nf = N_SEQ_FRAMES - 1;
  324. seq_break = 0;
  325. length1 = 0;
  326. for(ix=0; ix<nf; ix++)
  327. {
  328. frames_buf[ix].frame = &seq->frame[ix];
  329. frames_buf[ix].frflags = seq->frame[ix].frflags;
  330. frames_buf[ix].length = seq->frame[ix].length;
  331. if(seq->frame[ix].frflags & FRFLAG_VOWEL_CENTRE)
  332. seq_break = ix;
  333. }
  334. frames = &frames_buf[0];
  335. if(seq_break > 0)
  336. {
  337. if(which==1)
  338. {
  339. nf = seq_break + 1;
  340. }
  341. else
  342. {
  343. frames = &frames_buf[seq_break]; // body of vowel, skip past initial frames
  344. nf -= seq_break;
  345. }
  346. }
  347. // do we need to modify a frame for blending with a consonant?
  348. if(this_ph->type == phVOWEL)
  349. {
  350. if((which==2) && ((frames[nf-1].frflags & FRFLAG_BREAK) == 0))
  351. {
  352. // lookup formant transition for the following phoneme
  353. if(*match_level == 0)
  354. {
  355. LookupSound(next_ph,this_ph,1,NULL,1);
  356. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next_ph,which);
  357. }
  358. else
  359. if(next_ph->phflags == phVOWEL2)
  360. {
  361. // not really a consonant, rather a coloured vowel
  362. if(LookupSound(next_ph,this_ph,1,NULL,1) == 0)
  363. {
  364. next2_ph = plist[2].ph;
  365. LookupSound(next2_ph,next_ph,1,NULL,1);
  366. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next2_ph,which);
  367. }
  368. }
  369. }
  370. else
  371. {
  372. if(*match_level == 0)
  373. seq_len_adjust = FormantTransition2(frames,nf,vowel_transition0,vowel_transition1,prev_ph,which);
  374. }
  375. // FormantTransitions(frames,nf,this_ph,other_ph,which);
  376. }
  377. nf1 = nf - 1;
  378. for(ix=0; ix<nf1; ix++)
  379. length1 += frames[ix].length;
  380. if((wavefile_ix != 0) && ((wavefile_ix & 0x800000)==0))
  381. {
  382. // a secondary reference has been returned, which is not a wavefile
  383. // add these spectra to the main sequence
  384. seq2 = (SPECT_SEQ *)(&spects_data[wavefile_ix]);
  385. // first frame of the addition just sets the length of the last frame of the main seq
  386. nf--;
  387. for(ix=0; ix<seq2->n_frames; ix++)
  388. {
  389. frames[nf].length = seq2->frame[ix].length;
  390. if(ix > 0)
  391. frames[nf].frame = &seq2->frame[ix];
  392. nf++;
  393. }
  394. wavefile_ix = 0;
  395. }
  396. if((this_ph->type == phVOWEL) && (length1 > 0))
  397. {
  398. if(which==2)
  399. {
  400. // adjust the length of the main part to match the standard length specified for the vowel
  401. // less the front part of the vowel and any added suffix
  402. length_std = this_ph->std_length + seq_len_adjust - 45;
  403. if(length_std < 10)
  404. length_std = 10;
  405. if(plist->synthflags & SFLAG_LENGTHEN)
  406. length_std += phoneme_tab[phonLENGTHEN]->std_length; // phoneme was followed by an extra : symbol
  407. // can adjust vowel length for stressed syllables here
  408. length_factor = (length_std * 256)/ length1;
  409. for(ix=0; ix<nf1; ix++)
  410. {
  411. frames[ix].length = (frames[ix].length * length_factor)/256;
  412. }
  413. }
  414. else
  415. {
  416. // front of a vowel
  417. if(*match_level == 0)
  418. {
  419. // allow very short vowels to have shorter front parts
  420. if(this_ph->std_length < 130)
  421. frames[0].length = (frames[0].length * this_ph->std_length)/130;
  422. }
  423. if(seq_len_adjust != 0)
  424. {
  425. length_std = 0;
  426. for(ix=0; ix<nf1; ix++)
  427. {
  428. length_std += frames[ix].length;
  429. }
  430. length_factor = ((length_std + seq_len_adjust) * 256)/length_std;
  431. for(ix=0; ix<nf1; ix++)
  432. {
  433. frames[ix].length = (frames[ix].length * length_factor)/256;
  434. }
  435. }
  436. }
  437. }
  438. *n_frames = nf;
  439. return(frames);
  440. } // end of LookupSpect
  441. unsigned char *LookupEnvelope(int ix)
  442. {//================================
  443. if(ix==0)
  444. return(NULL);
  445. return((unsigned char *)&spects_data[phoneme_index[ix]]);
  446. }
  447. static void SetUpPhonemeTable(int number, int recursing)
  448. {//=====================================================
  449. int ix;
  450. int includes;
  451. int ph_code;
  452. PHONEME_TAB *phtab;
  453. if(recursing==0)
  454. {
  455. memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
  456. }
  457. if((includes = phoneme_tab_list[number].includes) > 0)
  458. {
  459. // recursively include base phoneme tables
  460. SetUpPhonemeTable(includes-1,1);
  461. }
  462. // now add the phonemes from this table
  463. phtab = phoneme_tab_list[number].phoneme_tab_ptr;
  464. for(ix=0; ix<phoneme_tab_list[number].n_phonemes; ix++)
  465. {
  466. ph_code = phtab[ix].code;
  467. phoneme_tab[ph_code] = &phtab[ix];
  468. if(ph_code > n_phoneme_tab)
  469. n_phoneme_tab = ph_code;
  470. if(recursing == 0)
  471. phoneme_tab_flags[ph_code] |= 1; // not inherited
  472. }
  473. } // end of SetUpPhonemeTable
  474. void SelectPhonemeTable(int number)
  475. {//================================
  476. n_phoneme_tab = 0;
  477. SetUpPhonemeTable(number,0); // recursively for included phoneme tables
  478. n_phoneme_tab++;
  479. } // end of SelectPhonemeTable
  480. int LookupPhonemeTable(const char *name)
  481. {//=====================================
  482. int ix;
  483. for(ix=0; ix<n_phoneme_tables; ix++)
  484. {
  485. if(strcmp(name,phoneme_tab_list[ix].name)==0)
  486. {
  487. phoneme_tab_number = ix;
  488. break;
  489. }
  490. }
  491. if(ix == n_phoneme_tables)
  492. return(-1);
  493. return(ix);
  494. }
  495. int SelectPhonemeTableName(const char *name)
  496. {//=========================================
  497. // Look up a phoneme set by name, and select it if it exists
  498. // Returns the phoneme table number
  499. int ix;
  500. if((ix = LookupPhonemeTable(name)) == -1)
  501. return(-1);
  502. SelectPhonemeTable(ix);
  503. return(ix);
  504. } // end of DelectPhonemeTableName
  505. void LoadConfig(void)
  506. {//==================
  507. // Load configuration file, if one exists
  508. char buf[130];
  509. FILE *f;
  510. int ix;
  511. char c1;
  512. char *p;
  513. char string[120];
  514. sprintf(buf,"%s%c%s",path_home,PATHSEP,"config");
  515. if((f = fopen(buf,"r"))==NULL)
  516. {
  517. return;
  518. }
  519. while(fgets(buf,sizeof(buf),f)!=NULL)
  520. {
  521. if(memcmp(buf,"tone",4)==0)
  522. {
  523. ReadTonePoints(&buf[5],tone_points);
  524. }
  525. else
  526. if(memcmp(buf,"pa_device",9)==0)
  527. {
  528. sscanf(&buf[7],"%d",&option_device_number);
  529. }
  530. else
  531. if(memcmp(buf,"soundicon",9)==0)
  532. {
  533. ix = sscanf(&buf[10],"_%c %s",&c1,string);
  534. if(ix==2)
  535. {
  536. soundicon_tab[n_soundicon_tab].name = c1;
  537. p = Alloc(strlen(string+1));
  538. strcpy(p,string);
  539. soundicon_tab[n_soundicon_tab].filename = p;
  540. soundicon_tab[n_soundicon_tab++].length = 0;
  541. }
  542. }
  543. }
  544. } // end of LoadConfig