eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

synthdata.cpp 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <ctype.h>
  23. #include <wctype.h>
  24. #include <string.h>
  25. #include "speak_lib.h"
  26. #include "speech.h"
  27. #include "phoneme.h"
  28. #include "synthesize.h"
  29. #include "voice.h"
  30. #include "translate.h"
  31. #include "wave.h"
  32. const char *version_string = "1.39.48 19.Dec.08";
  33. const int version_phdata = 0x013900;
  34. int option_device_number = -1;
  35. // copy the current phoneme table into here
  36. int n_phoneme_tab;
  37. int current_phoneme_table;
  38. PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
  39. unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited
  40. unsigned int *phoneme_index=NULL;
  41. char *spects_data=NULL;
  42. unsigned char *wavefile_data=NULL;
  43. static unsigned char *phoneme_tab_data = NULL;
  44. int n_phoneme_tables;
  45. PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  46. int phoneme_tab_number = 0;
  47. int wavefile_ix; // a wavefile to play along with the synthesis
  48. int wavefile_amp;
  49. int wavefile_ix2;
  50. int wavefile_amp2;
  51. int seq_len_adjust;
  52. int vowel_transition[4];
  53. int vowel_transition0;
  54. int vowel_transition1;
  55. int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which);
  56. static char *ReadPhFile(void *ptr, const char *fname)
  57. {//==================================================
  58. FILE *f_in;
  59. char *p;
  60. unsigned int length;
  61. char buf[sizeof(path_home)+40];
  62. sprintf(buf,"%s%c%s",path_home,PATHSEP,fname);
  63. length = GetFileLength(buf);
  64. if((f_in = fopen(buf,"rb")) == NULL)
  65. {
  66. fprintf(stderr,"Can't read data file: '%s'\n",buf);
  67. return(NULL);
  68. }
  69. if(ptr != NULL)
  70. Free(ptr);
  71. if((p = Alloc(length)) == NULL)
  72. {
  73. fclose(f_in);
  74. return(NULL);
  75. }
  76. if(fread(p,1,length,f_in) != length)
  77. {
  78. fclose(f_in);
  79. return(NULL);
  80. }
  81. fclose(f_in);
  82. return(p);
  83. } // end of ReadPhFile
  84. int LoadPhData()
  85. {//=============
  86. int ix;
  87. int n_phonemes;
  88. int version;
  89. int result = 1;
  90. unsigned char *p;
  91. if((phoneme_tab_data = (unsigned char *)ReadPhFile((void *)(phoneme_tab_data),"phontab")) == NULL)
  92. return(-1);
  93. if((phoneme_index = (unsigned int *)ReadPhFile((void *)(phoneme_index),"phonindex")) == NULL)
  94. return(-1);
  95. if((spects_data = ReadPhFile((void *)(spects_data),"phondata")) == NULL)
  96. return(-1);
  97. wavefile_data = (unsigned char *)spects_data;
  98. // read the version number from the first 4 bytes of phondata
  99. version = 0;
  100. for(ix=0; ix<4; ix++)
  101. {
  102. version += (wavefile_data[ix] << (ix*8));
  103. }
  104. if(version != version_phdata)
  105. {
  106. result = version;
  107. }
  108. // set up phoneme tables
  109. p = phoneme_tab_data;
  110. n_phoneme_tables = p[0];
  111. p+=4;
  112. for(ix=0; ix<n_phoneme_tables; ix++)
  113. {
  114. n_phonemes = p[0];
  115. phoneme_tab_list[ix].n_phonemes = p[0];
  116. phoneme_tab_list[ix].includes = p[1];
  117. p += 4;
  118. memcpy(phoneme_tab_list[ix].name,p,N_PHONEME_TAB_NAME);
  119. p += N_PHONEME_TAB_NAME;
  120. phoneme_tab_list[ix].phoneme_tab_ptr = (PHONEME_TAB *)p;
  121. p += (n_phonemes * sizeof(PHONEME_TAB));
  122. }
  123. if(phoneme_tab_number >= n_phoneme_tables)
  124. phoneme_tab_number = 0;
  125. return(result);
  126. } // end of LoadPhData
  127. void FreePhData(void)
  128. {//==================
  129. Free(phoneme_tab_data);
  130. Free(phoneme_index);
  131. Free(spects_data);
  132. phoneme_tab_data=NULL;
  133. phoneme_index=NULL;
  134. spects_data=NULL;
  135. }
  136. int PhonemeCode(unsigned int mnem)
  137. {//===============================
  138. int ix;
  139. for(ix=0; ix<n_phoneme_tab; ix++)
  140. {
  141. if(phoneme_tab[ix] == NULL)
  142. continue;
  143. if(phoneme_tab[ix]->mnemonic == mnem)
  144. return(phoneme_tab[ix]->code);
  145. }
  146. return(0);
  147. }
  148. int LookupPhonemeString(const char *string)
  149. {//========================================
  150. int ix;
  151. unsigned char c;
  152. unsigned int mnem;
  153. // Pack up to 4 characters into a word
  154. mnem = 0;
  155. for(ix=0; ix<4; ix++)
  156. {
  157. if(string[ix]==0) break;
  158. c = string[ix];
  159. mnem |= (c << (ix*8));
  160. }
  161. return(PhonemeCode(mnem));
  162. }
  163. static unsigned int LookupSound2(int index, unsigned int other_phcode, int control)
  164. {//================================================================================
  165. // control=1 get formant transition data only
  166. unsigned int code;
  167. unsigned int value, value2;
  168. while((value = phoneme_index[index++]) != 0)
  169. {
  170. if((code = (value & 0xff)) == other_phcode)
  171. {
  172. while(((value2 = phoneme_index[index]) != 0) && ((value2 & 0xff) < 8))
  173. {
  174. switch(value2 & 0xff)
  175. {
  176. case 0:
  177. // next entry is a wavefile to be played along with the synthesis
  178. if(control==0)
  179. {
  180. wavefile_ix = value2 >> 8;
  181. }
  182. break;
  183. case 1:
  184. if(control==0)
  185. {
  186. seq_len_adjust = value2 >> 8;
  187. }
  188. break;
  189. case 2:
  190. if(control==0)
  191. {
  192. seq_len_adjust = value2 >> 8;
  193. seq_len_adjust = -seq_len_adjust;
  194. }
  195. break;
  196. case 3:
  197. if(control==0)
  198. {
  199. wavefile_amp = value2 >> 8;
  200. }
  201. break;
  202. case 4:
  203. // formant transition data, 2 words
  204. vowel_transition[0] = value2 >> 8;
  205. vowel_transition[1] = phoneme_index[index++ + 1];
  206. break;
  207. case 5:
  208. // formant transition data, 2 words
  209. vowel_transition[2] = value2 >> 8;
  210. vowel_transition[3] = phoneme_index[index++ + 1];
  211. break;
  212. }
  213. index++;
  214. }
  215. return(value >> 8);
  216. }
  217. else
  218. if((code == 4) || (code == 5))
  219. {
  220. // formant transition data, ignore next word of data
  221. index++;
  222. }
  223. }
  224. return(3); // not found
  225. } // end of LookupSound2
  226. unsigned int LookupSound(PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which, int *match_level, int control)
  227. {//============================================================================================================
  228. // follows, 1 other_ph preceeds this_ph, 2 other_ph follows this_ph
  229. // control: 1= get formant transition data only
  230. int spect_list;
  231. int spect_list2;
  232. int s_list;
  233. unsigned char virtual_ph;
  234. int result;
  235. int level=0;
  236. unsigned int other_code;
  237. unsigned int other_virtual;
  238. if(control==0)
  239. {
  240. wavefile_ix = 0;
  241. wavefile_amp = 32;
  242. seq_len_adjust = 0;
  243. }
  244. memset(vowel_transition,0,sizeof(vowel_transition));
  245. other_code = other_ph->code;
  246. if(phoneme_tab[other_code]->type == phPAUSE)
  247. other_code = phonPAUSE_SHORT; // use this version of Pause for matching
  248. if(which==1)
  249. {
  250. spect_list = this_ph->after;
  251. virtual_ph = this_ph->start_type;
  252. spect_list2 = phoneme_tab[virtual_ph]->after;
  253. other_virtual = other_ph->end_type;
  254. }
  255. else
  256. {
  257. spect_list = this_ph->before;
  258. virtual_ph = this_ph->end_type;
  259. spect_list2 = phoneme_tab[virtual_ph]->before;
  260. other_virtual = other_ph->start_type;
  261. }
  262. result = 3;
  263. // look for ph1-ph2 combination
  264. if((s_list = spect_list) != 0)
  265. {
  266. if((result = LookupSound2(s_list,other_code,control)) != 3)
  267. {
  268. level = 2;
  269. }
  270. else
  271. if(other_virtual != 0)
  272. {
  273. if((result = LookupSound2(spect_list,other_virtual,control)) != 3)
  274. {
  275. level = 1;
  276. }
  277. }
  278. }
  279. // not found, look in a virtual phoneme if one is given for this phoneme
  280. if((result==3) && (virtual_ph != 0) && ((s_list = spect_list2) != 0))
  281. {
  282. if((result = LookupSound2(s_list,other_code,control)) != 3)
  283. {
  284. level = 1;
  285. }
  286. else
  287. if(other_virtual != 0)
  288. {
  289. if((result = LookupSound2(spect_list2,other_virtual,control)) != 3)
  290. {
  291. level = 1;
  292. }
  293. }
  294. }
  295. if(match_level != NULL)
  296. *match_level = level;
  297. if(result==0)
  298. return(0); // NULL was given in the phoneme source
  299. // note: values = 1 indicates use the default for this phoneme, even though we found a match
  300. // which set a secondary reference
  301. if(result >= 4)
  302. {
  303. // values 1-3 can be used for special codes
  304. // 1 = DFT from the phoneme source file
  305. return(result);
  306. }
  307. // no match found for other_ph, return the default
  308. return(LookupSound2(this_ph->spect,phonPAUSE,control));
  309. } // end of LookupSound
  310. frameref_t *LookupSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
  311. int which, int *match_level, int *n_frames, PHONEME_LIST *plist)
  312. {//=========================================================================================================
  313. int ix;
  314. int nf;
  315. int nf1;
  316. int seq_break;
  317. frameref_t *frames;
  318. int length1;
  319. int length_std;
  320. int length_factor;
  321. SPECT_SEQ *seq, *seq2;
  322. SPECT_SEQK *seqk, *seqk2;
  323. PHONEME_TAB *next2_ph;
  324. frame_t *frame;
  325. static frameref_t frames_buf[N_SEQ_FRAMES];
  326. PHONEME_TAB *other_ph;
  327. if(which == 1)
  328. other_ph = prev_ph;
  329. else
  330. other_ph = next_ph;
  331. if((ix = LookupSound(this_ph,other_ph,which,match_level,0)) < 4)
  332. return(NULL);
  333. seq = (SPECT_SEQ *)(&spects_data[ix]);
  334. seqk = (SPECT_SEQK *)seq;
  335. nf = seq->n_frames;
  336. if(nf >= N_SEQ_FRAMES)
  337. nf = N_SEQ_FRAMES - 1;
  338. seq_break = 0;
  339. length1 = 0;
  340. for(ix=0; ix<nf; ix++)
  341. {
  342. if(seq->frame[0].frflags & FRFLAG_KLATT)
  343. frame = &seqk->frame[ix];
  344. else
  345. frame = (frame_t *)&seq->frame[ix];
  346. frames_buf[ix].frame = frame;
  347. frames_buf[ix].frflags = frame->frflags;
  348. frames_buf[ix].length = frame->length;
  349. if(frame->frflags & FRFLAG_VOWEL_CENTRE)
  350. seq_break = ix;
  351. }
  352. frames = &frames_buf[0];
  353. if(seq_break > 0)
  354. {
  355. if(which==1)
  356. {
  357. nf = seq_break + 1;
  358. }
  359. else
  360. {
  361. frames = &frames_buf[seq_break]; // body of vowel, skip past initial frames
  362. nf -= seq_break;
  363. }
  364. }
  365. // do we need to modify a frame for blending with a consonant?
  366. if(this_ph->type == phVOWEL)
  367. {
  368. if((which==2) && ((frames[nf-1].frflags & FRFLAG_BREAK) == 0))
  369. {
  370. // lookup formant transition for the following phoneme
  371. if((*match_level == 0) || (next_ph->type == phNASAL))
  372. {
  373. LookupSound(next_ph,this_ph,1,NULL,1);
  374. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next_ph,which);
  375. }
  376. else
  377. if(next_ph->phflags == phVOWEL2)
  378. {
  379. // not really a consonant, rather a coloured vowel
  380. if(LookupSound(next_ph,this_ph,1,NULL,1) == 0)
  381. {
  382. next2_ph = plist[2].ph;
  383. LookupSound(next2_ph,next_ph,1,NULL,1);
  384. seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next2_ph,which);
  385. }
  386. }
  387. }
  388. else
  389. {
  390. if(*match_level == 0)
  391. seq_len_adjust = FormantTransition2(frames,nf,vowel_transition0,vowel_transition1,prev_ph,which);
  392. }
  393. }
  394. nf1 = nf - 1;
  395. for(ix=0; ix<nf1; ix++)
  396. length1 += frames[ix].length;
  397. if((wavefile_ix != 0) && ((wavefile_ix & 0x800000)==0))
  398. {
  399. // a secondary reference has been returned, which is not a wavefile
  400. // add these spectra to the main sequence
  401. seq2 = (SPECT_SEQ *)(&spects_data[wavefile_ix]);
  402. seqk2 = (SPECT_SEQK *)seq2;
  403. // first frame of the addition just sets the length of the last frame of the main seq
  404. nf--;
  405. for(ix=0; ix<seq2->n_frames; ix++)
  406. {
  407. if(seq2->frame[0].frflags & FRFLAG_KLATT)
  408. frame = &seqk2->frame[ix];
  409. else
  410. frame = (frame_t *)&seq2->frame[ix];
  411. frames[nf].length = frame->length;
  412. if(ix > 0)
  413. {
  414. frames[nf].frame = frame;
  415. frames[nf].frflags = frame->frflags;
  416. }
  417. nf++;
  418. }
  419. wavefile_ix = 0;
  420. }
  421. if((this_ph->type == phVOWEL) && (length1 > 0))
  422. {
  423. if(which==2)
  424. {
  425. // adjust the length of the main part to match the standard length specified for the vowel
  426. // less the front part of the vowel and any added suffix
  427. length_std = this_ph->std_length + seq_len_adjust - 45;
  428. if(length_std < 10)
  429. length_std = 10;
  430. if(plist->synthflags & SFLAG_LENGTHEN)
  431. length_std += phoneme_tab[phonLENGTHEN]->std_length; // phoneme was followed by an extra : symbol
  432. // can adjust vowel length for stressed syllables here
  433. length_factor = (length_std * 256)/ length1;
  434. for(ix=0; ix<nf1; ix++)
  435. {
  436. frames[ix].length = (frames[ix].length * length_factor)/256;
  437. }
  438. }
  439. else
  440. {
  441. // front of a vowel
  442. if(*match_level == 0)
  443. {
  444. // allow very short vowels to have shorter front parts
  445. if(this_ph->std_length < 130)
  446. frames[0].length = (frames[0].length * this_ph->std_length)/130;
  447. }
  448. if(seq_len_adjust != 0)
  449. {
  450. length_std = 0;
  451. for(ix=0; ix<nf1; ix++)
  452. {
  453. length_std += frames[ix].length;
  454. }
  455. length_factor = ((length_std + seq_len_adjust) * 256)/length_std;
  456. for(ix=0; ix<nf1; ix++)
  457. {
  458. frames[ix].length = (frames[ix].length * length_factor)/256;
  459. }
  460. }
  461. }
  462. }
  463. *n_frames = nf;
  464. return(frames);
  465. } // end of LookupSpect
  466. unsigned char *LookupEnvelope(int ix)
  467. {//================================
  468. if(ix==0)
  469. return(NULL);
  470. return((unsigned char *)&spects_data[phoneme_index[ix]]);
  471. }
  472. static void SetUpPhonemeTable(int number, int recursing)
  473. {//=====================================================
  474. int ix;
  475. int includes;
  476. int ph_code;
  477. PHONEME_TAB *phtab;
  478. if(recursing==0)
  479. {
  480. memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
  481. }
  482. if((includes = phoneme_tab_list[number].includes) > 0)
  483. {
  484. // recursively include base phoneme tables
  485. SetUpPhonemeTable(includes-1,1);
  486. }
  487. // now add the phonemes from this table
  488. phtab = phoneme_tab_list[number].phoneme_tab_ptr;
  489. for(ix=0; ix<phoneme_tab_list[number].n_phonemes; ix++)
  490. {
  491. ph_code = phtab[ix].code;
  492. phoneme_tab[ph_code] = &phtab[ix];
  493. if(ph_code > n_phoneme_tab)
  494. n_phoneme_tab = ph_code;
  495. if(recursing == 0)
  496. phoneme_tab_flags[ph_code] |= 1; // not inherited
  497. }
  498. } // end of SetUpPhonemeTable
  499. void SelectPhonemeTable(int number)
  500. {//================================
  501. n_phoneme_tab = 0;
  502. SetUpPhonemeTable(number,0); // recursively for included phoneme tables
  503. n_phoneme_tab++;
  504. current_phoneme_table = number;
  505. } // end of SelectPhonemeTable
  506. int LookupPhonemeTable(const char *name)
  507. {//=====================================
  508. int ix;
  509. for(ix=0; ix<n_phoneme_tables; ix++)
  510. {
  511. if(strcmp(name,phoneme_tab_list[ix].name)==0)
  512. {
  513. phoneme_tab_number = ix;
  514. break;
  515. }
  516. }
  517. if(ix == n_phoneme_tables)
  518. return(-1);
  519. return(ix);
  520. }
  521. int SelectPhonemeTableName(const char *name)
  522. {//=========================================
  523. // Look up a phoneme set by name, and select it if it exists
  524. // Returns the phoneme table number
  525. int ix;
  526. if((ix = LookupPhonemeTable(name)) == -1)
  527. return(-1);
  528. SelectPhonemeTable(ix);
  529. return(ix);
  530. } // end of DelectPhonemeTableName
  531. void LoadConfig(void)
  532. {//==================
  533. // Load configuration file, if one exists
  534. char buf[sizeof(path_home)+10];
  535. FILE *f;
  536. int ix;
  537. char c1;
  538. char *p;
  539. char string[200];
  540. for(ix=0; ix<N_SOUNDICON_SLOTS; ix++)
  541. {
  542. soundicon_tab[ix].filename = NULL;
  543. soundicon_tab[ix].data = NULL;
  544. }
  545. sprintf(buf,"%s%c%s",path_home,PATHSEP,"config");
  546. if((f = fopen(buf,"r"))==NULL)
  547. {
  548. return;
  549. }
  550. while(fgets(buf,sizeof(buf),f)!=NULL)
  551. {
  552. if(memcmp(buf,"tone",4)==0)
  553. {
  554. ReadTonePoints(&buf[5],tone_points);
  555. }
  556. else
  557. if(memcmp(buf,"pa_device",9)==0)
  558. {
  559. sscanf(&buf[7],"%d",&option_device_number);
  560. }
  561. else
  562. if(memcmp(buf,"soundicon",9)==0)
  563. {
  564. ix = sscanf(&buf[10],"_%c %s",&c1,string);
  565. if(ix==2)
  566. {
  567. soundicon_tab[n_soundicon_tab].name = c1;
  568. p = Alloc(strlen(string)+1);
  569. strcpy(p,string);
  570. soundicon_tab[n_soundicon_tab].filename = p;
  571. soundicon_tab[n_soundicon_tab++].length = 0;
  572. }
  573. }
  574. }
  575. } // end of LoadConfig