eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

extras.cpp 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221
  1. /***************************************************************************
  2. * Copyright (C) 2006 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "wx/wx.h"
  20. #include <wx/dirdlg.h>
  21. #include "wx/filename.h"
  22. #include "wx/sound.h"
  23. #include "wx/dir.h"
  24. #include <sys/stat.h>
  25. #include "speak_lib.h"
  26. #include "main.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "spect.h"
  32. #include "translate.h"
  33. #include "options.h"
  34. extern char word_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes
  35. //******************************************************************************************************
  36. FILE *f_wavtest = NULL;
  37. FILE *f_events = NULL;
  38. FILE *OpenWaveFile3(const char *path)
  39. /***********************************/
  40. {
  41. int *p;
  42. FILE *f;
  43. static unsigned char wave_hdr[44] = {
  44. 'R','I','F','F',0,0,0,0,'W','A','V','E','f','m','t',' ',
  45. 0x10,0,0,0,1,0,1,0, 9,0x3d,0,0,0x12,0x7a,0,0,
  46. 2,0,0x10,0,'d','a','t','a', 0,0,0,0 };
  47. if(path == NULL)
  48. return(NULL);
  49. // set the sample rate in the header
  50. p = (int *)(&wave_hdr[24]);
  51. p[0] = samplerate;
  52. p[1] = samplerate * 2;
  53. f = fopen(path,"wb");
  54. if(f != NULL)
  55. {
  56. fwrite(wave_hdr,1,sizeof(wave_hdr),f);
  57. }
  58. return(f);
  59. } // end of OpenWaveFile
  60. void CloseWaveFile3(FILE *f)
  61. /*************************/
  62. {
  63. unsigned int pos;
  64. static int value;
  65. if(f == NULL)
  66. return;
  67. fflush(f);
  68. pos = ftell(f);
  69. value = pos - 8;
  70. fseek(f,4,SEEK_SET);
  71. fwrite(&value,4,1,f);
  72. value = samplerate;
  73. fseek(f,24,SEEK_SET);
  74. fwrite(&value,4,1,f);
  75. value = samplerate*2;
  76. fseek(f,28,SEEK_SET);
  77. fwrite(&value,4,1,f);
  78. value = pos - 44;
  79. fseek(f,40,SEEK_SET);
  80. fwrite(&value,4,1,f);
  81. fclose(f);
  82. } // end of CloseWaveFile3
  83. int TestUriCallback(int type, const char *uri, const char *base)
  84. {//=============================================================
  85. if(strcmp(uri,"hello")==0)
  86. return(1);
  87. return(0);
  88. }
  89. int TestSynthCallback(short *wav, int numsamples, espeak_EVENT *events)
  90. {//====================================================================
  91. int type;
  92. f_events = fopen("/home/jsd1/speechdata/text/events","a");
  93. fprintf(f_events,"--\n");
  94. while((type = events->type) != 0)
  95. {
  96. fprintf(f_events,"%2d (%4d %4ld) %5d %5d (%3d) ",type,events->unique_identifier,(long)events->user_data,events->audio_position,events->text_position,events->length);
  97. if((type==3) || (type==4))
  98. fprintf(f_events,"'%s'\n",events->id.name);
  99. else
  100. if(type==espeakEVENT_PHONEME)
  101. fprintf(f_events,"[%s]\n",WordToString(events->id.number));
  102. else
  103. fprintf(f_events,"%d\n",events->id.number);
  104. events++;
  105. }
  106. if((wav == NULL) && (f_wavtest != NULL))
  107. {
  108. fprintf(f_events,"Finished\n");
  109. CloseWaveFile3(f_wavtest);
  110. f_wavtest = NULL;
  111. }
  112. fclose(f_events);
  113. if(f_wavtest == NULL) return(0);
  114. fwrite(wav,numsamples*2,1,f_wavtest);
  115. return(0);
  116. }
  117. //******************************************************************************************************
  118. #ifdef deleted
  119. static int RuLex_sorter(char **a, char **b)
  120. {//=======================================
  121. char *pa, *pb;
  122. int xa, xb;
  123. int ix;
  124. pa = *a;
  125. pb = *b;
  126. xa = strlen(pa)-1;
  127. xb = strlen(pb)-1;
  128. while((xa >= 0) && (xb >= 0))
  129. {
  130. if((ix = (pa[xa] - pb[xb])) != 0)
  131. return(ix);
  132. xa--;
  133. xb--;
  134. }
  135. return(pa - pb);
  136. } /* end of strcmp2 */
  137. #endif
  138. static const unsigned short KOI8_R[0x60] = {
  139. 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0
  140. 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8
  141. 0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0
  142. 0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8
  143. 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0
  144. 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8
  145. 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0
  146. 0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8
  147. 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0
  148. 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8
  149. 0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0
  150. 0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8
  151. };
  152. #define N_CHARS 34
  153. int *p_unicode;
  154. int unicode[80];
  155. #define PH(c1,c2) (c2<<8)+c1 // combine two characters into an integer for phoneme name
  156. static void DecodePhonemes2(const char *inptr, char *outptr)
  157. //===================================================
  158. // Translate from internal phoneme codes into phoneme mnemonics
  159. // This version is for Lexicon_De()
  160. {
  161. unsigned char phcode;
  162. unsigned char c;
  163. unsigned int mnem;
  164. PHONEME_TAB *ph;
  165. const char *p;
  166. int ix;
  167. int j;
  168. int start;
  169. static const char *stress_chars = "==,,'* ";
  170. unsigned int replace_ph[] = {',',PH('@','-'),'W','3','y','A',PH('A',':'),'*',PH('_','!'),PH('_','|'),PH('O','I'),PH('Y',':'),PH('p','F'),PH('E','2'),0};
  171. const char *replace_ph2[] = {NULL,NULL,"9","@r","Y","a:", "a:", "r", "?", "?", "OY", "2:", "pf" ,"E",NULL};
  172. start = 1;
  173. for(ix=0; (phcode = inptr[ix]) != 0; ix++)
  174. {
  175. if(phcode == 255)
  176. continue; /* indicates unrecognised phoneme */
  177. if((ph = phoneme_tab[phcode]) == NULL)
  178. continue;
  179. if((ph->type == phSTRESS) && (ph->std_length <= 4) && (ph->spect == 0))
  180. {
  181. if(ph->std_length > 2)
  182. *outptr++ = stress_chars[ph->std_length];
  183. }
  184. else
  185. {
  186. mnem = ph->mnemonic;
  187. if(ph->type == phPAUSE)
  188. {
  189. if(start)
  190. continue; // omit initial [?]
  191. if(inptr[ix+1] == phonSCHWA_SHORT)
  192. continue; // omit [?] before [@-*]
  193. }
  194. start = 0;
  195. p = NULL;
  196. for(j=0;;j++)
  197. {
  198. if(replace_ph[j] == 0)
  199. break;
  200. if(mnem == replace_ph[j])
  201. {
  202. p = replace_ph2[j];
  203. if(p == NULL)
  204. mnem = 0;
  205. break;
  206. }
  207. }
  208. if(p != NULL)
  209. {
  210. while((c = *p++) != 0)
  211. {
  212. *outptr++ = c;
  213. }
  214. }
  215. else
  216. if(mnem != 0)
  217. {
  218. while((c = (mnem & 0xff)) != 0)
  219. {
  220. *outptr++ = c;
  221. mnem = mnem >> 8;
  222. }
  223. }
  224. }
  225. }
  226. *outptr = 0; /* string terminator */
  227. } // end of DecodePhonemes2
  228. void Lexicon_De()
  229. {//==============
  230. // Compare eSpeak's translation of German words with a pronunciation lexicon
  231. FILE *f_in;
  232. FILE *f_out;
  233. int ix;
  234. int c;
  235. int c2;
  236. char *p;
  237. int stress;
  238. int count=0;
  239. int start;
  240. int matched=0;
  241. int defer_stress = 0;
  242. char buf[200];
  243. char word[80];
  244. char word2[80];
  245. char type[80];
  246. char pronounce[80];
  247. char pronounce2[80];
  248. char phonemes[80];
  249. char phonemes2[80];
  250. WORD_TAB winfo;
  251. static const char *vowels = "aeiouyAEIOUY29@";
  252. wxString fname = wxFileSelector(_T("German Lexicon"),path_dir1,_T(""),_T(""),_T("*"),wxOPEN);
  253. strcpy(buf,fname.mb_str(wxConvLocal));
  254. if((f_in = fopen(buf,"r")) == NULL)
  255. {
  256. wxLogError(_T("Can't read file ")+fname);
  257. return;
  258. }
  259. path_dir1 = wxFileName(fname).GetPath();
  260. if((f_out = fopen("compare_de","w")) == NULL)
  261. {
  262. wxLogError(_T("Can't write file "));
  263. return;
  264. }
  265. LoadVoice("de",0);
  266. word2[0] = ' ';
  267. while(!feof(f_in))
  268. {
  269. count++;
  270. if(fgets(buf,sizeof(buf),f_in) == NULL)
  271. break;
  272. sscanf(buf,"%s %s %s",word,type,pronounce);
  273. // convert word to lower-case
  274. for(ix=0, p=&word2[1];;)
  275. {
  276. ix += utf8_in(&c,&word[ix]);
  277. c = towlower(c);
  278. p += utf8_out(c,p);
  279. if(c == 0)
  280. break;
  281. }
  282. strcpy(word,&word2[1]);
  283. strcat(&word2[1]," ");
  284. // remove | syllable boundaries
  285. stress=0;
  286. start=1;
  287. for(ix=0, p=pronounce2;;ix++)
  288. {
  289. c = pronounce[ix];
  290. if(c == '\'')
  291. {
  292. stress=4;
  293. continue;
  294. }
  295. if(c == ',')
  296. {
  297. stress=3;
  298. continue;
  299. }
  300. if(c == '|')
  301. continue;
  302. if((c == '?') && start)
  303. continue; // omit initial [?]
  304. if(c == '<')
  305. {
  306. if((c2 = pronounce[ix+1]) == 'i')
  307. {
  308. defer_stress =1;
  309. #ifdef deleted
  310. if(stress == 4)
  311. {
  312. *p++ = 'i';
  313. c =':';
  314. }
  315. else
  316. #endif
  317. {
  318. c = 'I';
  319. }
  320. ix++;
  321. }
  322. }
  323. start =0;
  324. if(defer_stress)
  325. {
  326. defer_stress = 0;
  327. }
  328. else
  329. if(stress && (strchr(vowels,c) != NULL))
  330. {
  331. if(stress == 4)
  332. *p++ = '\'';
  333. if(stress == 3)
  334. *p++ = ',';
  335. stress = 0;
  336. }
  337. *p++ = c;
  338. if(c == 0)
  339. break;
  340. if(strchr("eiouy",c) && pronounce[ix+1] != ':')
  341. *p++ = ':'; // ensure [;] after these vowels
  342. }
  343. // translate
  344. memset(&winfo,0,sizeof(winfo));
  345. TranslateWord(translator,&word2[1],0,&winfo);
  346. DecodePhonemes2(word_phonemes,phonemes); // also need to change some phoneme names
  347. if(strcmp(phonemes,pronounce2) == 0)
  348. {
  349. matched++;
  350. }
  351. else
  352. {
  353. // remove secondary stress
  354. strcpy(phonemes2,phonemes);
  355. p = phonemes;
  356. for(ix=0; ;ix++)
  357. {
  358. if((c = phonemes2[ix]) != ',')
  359. *p++ = c;
  360. if(c == 0)
  361. break;
  362. }
  363. if(strcmp(phonemes,pronounce2) == 0)
  364. {
  365. matched++;
  366. }
  367. else
  368. {
  369. if(strlen(word) < 8)
  370. strcat(word,"\t");
  371. fprintf(f_out,"%s\t%s\t%s\n",word,phonemes,pronounce2);
  372. }
  373. }
  374. }
  375. fclose(f_in);
  376. fclose(f_out);
  377. wxLogStatus(_T("Completed, equal=%d different=%d"),matched,count-matched);
  378. }
  379. void Lexicon_Ru()
  380. {//==============
  381. // compare stress markings in Russian RuLex file with lookup in ru_rules
  382. int ix;
  383. char *p;
  384. int c;
  385. FILE *f_in;
  386. FILE *f_out;
  387. FILE *f_log;
  388. FILE *f_roots;
  389. PHONEME_TAB *ph;
  390. int vcount;
  391. int ru_stress;
  392. int max_stress;
  393. int max_stress_posn;
  394. int n_words=0;
  395. int n_wrong=0;
  396. int wlength;
  397. int input_length;
  398. int sfx;
  399. const char *suffix;
  400. int wlen;
  401. int len;
  402. int check_root;
  403. WORD_TAB winfo;
  404. char word[80];
  405. char word2[80];
  406. int counts[20][20][10];
  407. char phonemes[N_WORD_PHONEMES];
  408. char buf[200];
  409. char fname[sizeof(path_dsource)+20];
  410. // KOI8-R codes for Russian vowels
  411. static unsigned char vowels[] = {0xa3,0xc0,0xc1,0xc5,0xc9,0xcf,0xd1,0xd5,0xd9,0xdc,0};
  412. typedef struct {
  413. const char *suffix;
  414. int syllables;
  415. } SUFFIX;
  416. static SUFFIX suffixes[] = {
  417. {NULL,0},
  418. {"ичу",2},
  419. {"ского",2},
  420. {"ская",2},
  421. {"ски",1},
  422. {"ские",2},
  423. {"ский",1},
  424. {"ским",1},
  425. {"ское",2},
  426. {"ской",1},
  427. {"ском",1},
  428. {"скую",2},
  429. {"а",1},
  430. {"е",1},
  431. {"и",1},
  432. {NULL,0}};
  433. memset(counts,0,sizeof(counts));
  434. if(gui_flag)
  435. {
  436. wxString fname = wxFileSelector(_T("Read lexicon.dict"),path_dictsource,
  437. _T(""),_T(""),_T("*"),wxOPEN);
  438. if(fname.IsEmpty())
  439. return;
  440. strcpy(buf,fname.mb_str(wxConvLocal));
  441. }
  442. else
  443. {
  444. strcpy(buf,"lexicon.dict");
  445. }
  446. if((f_in = fopen(buf,"r")) == NULL)
  447. {
  448. if(gui_flag)
  449. wxLogError(_T("Can't read file: ") + wxString(buf,wxConvLocal));
  450. else
  451. fprintf(stderr,"Can't read file: %s\n",buf);
  452. return;
  453. }
  454. input_length = GetFileLength(buf);
  455. sprintf(fname,"%s%c%s",path_dsource,PATHSEP,"ru_listx_1");
  456. if((f_out = fopen(fname,"w")) == NULL)
  457. {
  458. wxLogError(_T("Can't write to: ")+wxString(fname,wxConvLocal));
  459. fclose(f_in);
  460. return;
  461. }
  462. sprintf(fname,"%s%c%s",path_dsource,PATHSEP,"ru_log");
  463. f_log = fopen(fname,"w");
  464. sprintf(fname,"%s%c%s",path_dsource,PATHSEP,"ru_roots_1");
  465. f_roots = fopen(fname,"w");
  466. LoadVoice("ru",0);
  467. if(gui_flag)
  468. progress = new wxProgressDialog(_T("Lexicon"),_T(""),input_length);
  469. else
  470. fprintf(stderr,"Processing lexicon.dict\n");
  471. for(;;)
  472. {
  473. if(((n_words & 0x3ff) == 0) && gui_flag)
  474. {
  475. progress->Update(ftell(f_in));
  476. }
  477. if(fgets(buf,sizeof(buf),f_in) == NULL)
  478. break;
  479. if(isspace2(buf[0]))
  480. continue;
  481. // convert word from KOI8-R to UTF8
  482. p = buf;
  483. ix = 0;
  484. wlength = 0;
  485. p_unicode = unicode;
  486. while(!isspace2(c = (*p++ & 0xff)))
  487. {
  488. if(c >= 0xa0)
  489. {
  490. c = KOI8_R[c-0xa0];
  491. *p_unicode++ = c;
  492. }
  493. wlength++;
  494. ix += utf8_out(c,&word[ix]);
  495. }
  496. word[ix] = 0;
  497. *p_unicode=0;
  498. sprintf(word2," %s ",word);
  499. // find the marked stress position
  500. vcount = 0;
  501. ru_stress = 0;
  502. while(*p == ' ') p++;
  503. while((c = (*p++ & 0xff)) != '\n')
  504. {
  505. if(c == '+')
  506. {
  507. ru_stress = vcount;
  508. break;
  509. }
  510. if(strchr((char *)vowels,c) != NULL)
  511. {
  512. vcount++;
  513. }
  514. }
  515. // translate
  516. memset(&winfo,0,sizeof(winfo));
  517. TranslateWord(translator, &word2[1],0,&winfo);
  518. DecodePhonemes(word_phonemes,phonemes);
  519. // find the stress position in the translation
  520. max_stress = 0;
  521. max_stress_posn = -1;
  522. vcount = 0;
  523. check_root = 0;
  524. ph = phoneme_tab[phonPAUSE];
  525. for(p=word_phonemes; *p != 0; p++)
  526. {
  527. ph = phoneme_tab[(unsigned int)*p];
  528. if(ph == NULL)
  529. continue;
  530. if(ph->type == phVOWEL)
  531. vcount++;
  532. if(ph->type == phSTRESS)
  533. {
  534. if(ph->std_length > max_stress)
  535. {
  536. max_stress = ph->std_length;
  537. max_stress_posn = vcount+1;
  538. }
  539. }
  540. }
  541. n_words++;
  542. if(ru_stress > vcount)
  543. {
  544. if(f_log != NULL)
  545. {
  546. fprintf(f_log,"%s\t $%d\t // %s\n",word,ru_stress,phonemes);
  547. }
  548. }
  549. else
  550. {
  551. counts[vcount][ru_stress][ph->type]++;
  552. if((vcount > 1) && (ru_stress != max_stress_posn))
  553. {
  554. n_wrong++;
  555. if((ru_stress==0) || (ru_stress > 7))
  556. fprintf(f_out,"// "); // we only have $1 to $7 to indicate stress position
  557. else
  558. check_root = 1;
  559. #define X_COMPACT
  560. fprintf(f_out,"%s",word);
  561. #ifdef X_COMPACT
  562. if(wlength < 8) fputc('\t',f_out);
  563. if(wlength < 16) fputc('\t',f_out);
  564. fprintf(f_out," $%d\n",ru_stress);
  565. #else
  566. while(wlength++ < 20)
  567. fputc(' ',f_out);
  568. fprintf(f_out," $%d //%d %s\n",ru_stress,max_stress_posn,phonemes);
  569. #endif
  570. //CharStats();
  571. }
  572. }
  573. if(check_root)
  574. {
  575. // does this word match any suffixes ?
  576. wlen = strlen(word);
  577. for(sfx=0;(suffix = suffixes[sfx].suffix) != NULL; sfx++)
  578. {
  579. len = strlen(suffix);
  580. if(len >= (wlen-2))
  581. continue;
  582. if(ru_stress > (vcount - suffixes[sfx].syllables))
  583. continue;
  584. if(strcmp(suffix,&word[wlen-len])==0)
  585. {
  586. strcpy(word2,word);
  587. word2[wlen-len] = 0;
  588. // fprintf(f_roots,"%s\t $%d\t\\ %s\n",word2,ru_stress,suffix);
  589. fprintf(f_roots,"%s\t $%d\n",word2,ru_stress);
  590. }
  591. }
  592. }
  593. }
  594. fclose(f_in);
  595. fclose(f_out);
  596. fclose(f_roots);
  597. sprintf(buf,"Lexicon: Total %d OK %d wrong %d",n_words,n_words-n_wrong,n_wrong);
  598. if(gui_flag)
  599. {
  600. delete progress;
  601. wxLogStatus(wxString(buf,wxConvLocal));
  602. }
  603. else
  604. {
  605. fprintf(stderr,"%s\n",buf);
  606. }
  607. if(f_log != NULL)
  608. {
  609. #ifdef deleted
  610. // list tables of frequency of stress position for words of different syllable lengths
  611. int j,k;
  612. for(ix=0; ix<12; ix++)
  613. {
  614. fprintf(f_log,"%2d syl: ",ix);
  615. for(k=0; k<10; k++)
  616. {
  617. fprintf(f_log," %2d :",k);
  618. for(j=0; j<10; j++)
  619. {
  620. fprintf(f_log,"%6d ",counts[ix][j][k]);
  621. }
  622. fprintf(f_log,"\n");
  623. }
  624. fprintf(f_log,"\n\n");
  625. }
  626. #endif
  627. fclose(f_log);
  628. }
  629. } // end of Lexicon_Ru
  630. void CompareLexicon(int id)
  631. {//========================
  632. switch(id)
  633. {
  634. case MENU_LEXICON_RU:
  635. Lexicon_Ru();
  636. break;
  637. case MENU_LEXICON_DE:
  638. Lexicon_De();
  639. break;
  640. }
  641. } // end of CompareLexicon
  642. //******************************************************************************************************
  643. extern int HashDictionary(const char *string);
  644. static int n_words;
  645. struct wcount {
  646. struct wcount *link;
  647. int count;
  648. char *word;
  649. };
  650. static int wfreq_sorter(wcount **p1, wcount **p2)
  651. {//==============================================
  652. int x;
  653. wcount *a, *b;
  654. a = *p1;
  655. b = *p2;
  656. if((x = b->count - a->count) != 0)
  657. return(x);
  658. return(strcmp(a->word,b->word));
  659. }
  660. static void wfreq_add(const char *word, wcount **hashtab)
  661. {//======================================================
  662. wcount *p;
  663. wcount **p2;
  664. int len;
  665. int hash;
  666. hash = HashDictionary(word);
  667. p2 = &hashtab[hash];
  668. p = *p2;
  669. while(p != NULL)
  670. {
  671. if(strcmp(p->word,word)==0)
  672. {
  673. p->count++;
  674. return;
  675. }
  676. p2 = &p->link;
  677. p = *p2;
  678. }
  679. // word not found, add it to the list
  680. len = strlen(word) + 1;
  681. if((p = (wcount *)malloc(sizeof(wcount)+len)) == NULL)
  682. return;
  683. p->count = 1;
  684. p->link = NULL;
  685. p->word = (char *)p + sizeof(wcount);
  686. strcpy(p->word,word);
  687. *p2 = p;
  688. n_words++;
  689. }
  690. void CountWordFreq(wxString path, wcount **hashtab)
  691. {//================================================
  692. // Count the occurances of words in this file
  693. FILE *f_in;
  694. unsigned char c;
  695. int wc;
  696. unsigned int ix, j, k;
  697. int n_chars;
  698. char buf[80];
  699. char wbuf[80];
  700. if((f_in = fopen(path.mb_str(wxConvLocal),"rb")) == NULL)
  701. return;
  702. while(!feof(f_in))
  703. {
  704. while((c = fgetc(f_in)) < 'A')
  705. {
  706. // skip leading spaces, numbers, etc
  707. if(feof(f_in)) break;
  708. }
  709. // read utf8 bytes until a space, number or punctuation
  710. ix = 0;
  711. while(!feof(f_in) && (c >= 'A') && (ix < sizeof(buf)-1))
  712. {
  713. buf[ix++] = c;
  714. c = fgetc(f_in);
  715. }
  716. buf[ix++] = 0;
  717. buf[ix] = 0;
  718. // the buf may contain non-alphabetic characters
  719. j = 0;
  720. n_chars = 0;
  721. for(k=0; k<ix; )
  722. {
  723. k += utf8_in(&wc,&buf[k]);
  724. wc = towlower(wc); // convert to lower case
  725. if(iswalpha(wc))
  726. {
  727. j += utf8_out(wc,&wbuf[j]);
  728. n_chars++;
  729. }
  730. else
  731. {
  732. wbuf[j] = 0;
  733. if(n_chars > 2)
  734. {
  735. wfreq_add(wbuf,hashtab);
  736. }
  737. j = 0;
  738. n_chars = 0;
  739. }
  740. }
  741. }
  742. fclose(f_in);
  743. } // end of CountWordFreq
  744. void MakeWordFreqList()
  745. {//====================
  746. // Read text files from a specified directory and make a list of the most frequently occuring words.
  747. struct wcount *whashtab[N_HASH_DICT];
  748. wcount **w_list;
  749. int ix;
  750. int j;
  751. int hash;
  752. wcount *p;
  753. FILE *f_out;
  754. char buf[200];
  755. char buf2[200];
  756. wxString dir = wxDirSelector(_T("Directory of text files"),path_speaktext);
  757. if(dir.IsEmpty()) return;
  758. memset(whashtab,0,sizeof(whashtab));
  759. wxString path = wxFindFirstFile(dir+_T("/*"),wxFILE);
  760. while (!path.empty())
  761. {
  762. if(path.AfterLast(PATHSEP) != _T("!wordcounts"))
  763. {
  764. CountWordFreq(path,whashtab);
  765. path = wxFindNextFile();
  766. }
  767. }
  768. // put all the words into a list and then sort it
  769. w_list = (wcount **)malloc(sizeof(wcount *) * n_words);
  770. ix = 0;
  771. for(hash=0; hash < N_HASH_DICT; hash++)
  772. {
  773. p = whashtab[hash];
  774. while((p != NULL) && (ix < n_words))
  775. {
  776. w_list[ix++] = p;
  777. p = p->link;
  778. }
  779. }
  780. qsort((void *)w_list,ix,sizeof(wcount *),(int(*)(const void *,const void *))wfreq_sorter);
  781. // write out the sorted list
  782. strcpy(buf,dir.mb_str(wxConvLocal));
  783. sprintf(buf2,"%s/!wordcounts",buf);
  784. if((f_out = fopen(buf2,"w")) == NULL)
  785. return;
  786. for(j=0; j<ix; j++)
  787. {
  788. p = w_list[j];
  789. fprintf(f_out,"%5d %s\n",p->count,p->word);
  790. free(p);
  791. }
  792. fclose(f_out);
  793. } // end of Make WorkFreqList
  794. //******************************************************************************************************
  795. void ConvertToUtf8()
  796. {//=================
  797. // Convert a file from 8bit to UTF8, according to the current voice
  798. unsigned int c;
  799. int ix;
  800. FILE *f_in;
  801. FILE *f_out;
  802. char buf[200];
  803. wxString fname = wxFileSelector(_T("Convert file to UTF8"),wxString(path_home,wxConvLocal),
  804. _T(""),_T(""),_T("*"),wxOPEN);
  805. if(fname.IsEmpty())
  806. return;
  807. strcpy(buf,fname.mb_str(wxConvLocal));
  808. f_in = fopen(buf,"r");
  809. if(f_in == NULL)
  810. {
  811. wxLogError(_T("Can't read file: ")+fname);
  812. return;
  813. }
  814. strcat(buf,"_1");
  815. f_out = fopen(buf,"w");
  816. if(f_out == NULL)
  817. {
  818. wxLogError(_T("Can't create file: ")+wxString(buf,wxConvLocal));
  819. fclose(f_in);
  820. return;
  821. }
  822. while(!feof(f_in))
  823. {
  824. c = fgetc(f_in);
  825. if(c >= 0xa0)
  826. c = translator->charset_a0[c-0xa0];
  827. ix = utf8_out(c,buf);
  828. fwrite(buf,ix,1,f_out);
  829. }
  830. fclose(f_in);
  831. fclose(f_out);
  832. wxLogStatus(_T("Written to: ")+fname+_T("_1"));
  833. } // end of ConvertToItf8
  834. //******************************************************************************************************
  835. //#define calcspeedtab
  836. #ifdef calcspeedtab
  837. // used to set up the presets in the speed_lookup table
  838. // interpolate between a set of measured wpm values
  839. void SetSpeedTab(void)
  840. {//===================
  841. #define N_WPM 19
  842. // Interpolation table to translate from words-per-minute to internal speed
  843. // words-per-minute values (measured)
  844. static float wpm1[N_WPM] =
  845. {0, 82, 96, 108, 124, 134, 147, 162, 174, 189, 224, 259, 273, 289, 307, 326, 346, 361, 370 };
  846. // corresponding internal speed values
  847. static float wpm2[N_WPM] =
  848. {0,253,200, 170, 140, 125, 110, 95, 85, 75, 55, 40, 35, 30, 25, 20, 15, 10, 5 };
  849. unsigned char speed_lookup[290];
  850. unsigned int ix;
  851. float x;
  852. int speed_wpm;
  853. FILE *f;
  854. // convert from word-per-minute to internal speed code
  855. for(speed_wpm=80; speed_wpm<370; speed_wpm++)
  856. {
  857. for(ix=2; ix<N_WPM-2; ix++)
  858. {
  859. if(speed_wpm < wpm1[ix])
  860. break;
  861. }
  862. x = polint(&wpm1[ix-1], &wpm2[ix-1], 3, speed_wpm);
  863. speed_lookup[speed_wpm-80] = (unsigned char)x;
  864. }
  865. f = fopen("speed_lookup","w");
  866. if(f == NULL) return;
  867. for(ix=0; ix<sizeof(speed_lookup); ix++)
  868. {
  869. fprintf(f,"%4d,",speed_lookup[ix]);
  870. if((ix % 5) == 4)
  871. fprintf(f,"\t//%4d\n\t",(ix / 5)*5 + 80);
  872. }
  873. fclose(f);
  874. } // end of SetSpeedTab
  875. #endif
  876. //#define xcharset
  877. #ifdef xcharset
  878. #include "iconv.h"
  879. void CharsetToUnicode(const char *charset)
  880. {//=======================================
  881. // write a 8bit charset to unicode translation table to file
  882. // charset: eg. "ISO-8859-1"
  883. iconv_t cd;
  884. unsigned char inbuf[4];
  885. size_t n_inbuf;
  886. unsigned char outbuf[12];
  887. size_t n_outbuf;
  888. int n;
  889. int ix;
  890. int x, y;
  891. FILE *f;
  892. char *p_inbuf;
  893. char *p_outbuf;
  894. f = fopen("/home/jsd1/tmp1/unicode1","a");
  895. cd = iconv_open("WCHAR_T",charset);
  896. if (cd == (iconv_t) -1)
  897. {
  898. fprintf(stderr,"Error - iconv_open\n");
  899. return;
  900. }
  901. fprintf(f,"towlower_tab\n ");
  902. for(ix=0x80; ix<=0x241; ix++)
  903. {
  904. y = 0;
  905. if(iswalpha(ix))
  906. {
  907. x = towlower(ix);
  908. if(x == ix)
  909. y = 0xff;
  910. else
  911. y = x - ix;
  912. }
  913. if((y == 0xff) || (y < 0))
  914. fprintf(f,"0xff,"); // ignore the 5 obscure cases where uc > lc
  915. else
  916. {
  917. fprintf(f,"%4d,",y);
  918. }
  919. if((ix&15)==15)
  920. fprintf(f," // %x\n ",ix & ~15);
  921. }
  922. fprintf(f,"\n%s\n ",charset);
  923. for(ix=0x80; ix<0x100; ix++)
  924. {
  925. inbuf[0] = ix;
  926. inbuf[1] = 0;
  927. inbuf[2] = 0;
  928. outbuf[0] = 0;
  929. outbuf[1] = 0;
  930. n_inbuf = 2;
  931. n_outbuf = sizeof(outbuf);
  932. p_inbuf = (char *)inbuf;
  933. p_outbuf = (char *)outbuf;
  934. n = iconv(cd, &p_inbuf, &n_inbuf, &p_outbuf, &n_outbuf);
  935. fprintf(f,"0x%.2x%.2x, ",outbuf[1],outbuf[0]);
  936. if((ix&7)==7)
  937. fprintf(f,"// %.2x\n ",ix & ~7);
  938. }
  939. fclose(f);
  940. iconv_close(cd);
  941. }
  942. #endif
  943. #ifdef deleted
  944. void Test2()
  945. {
  946. //
  947. char buf[120];
  948. FILE *f;
  949. FILE *f_out;
  950. unsigned char *p;
  951. f = fopen("/home/jsd1/tmp1/list","r");
  952. if(f == NULL) return;
  953. f_out = fopen("/home/jsd1/tmp1/list_out","w");
  954. if(f_out == NULL) return;
  955. while(!feof(f))
  956. {
  957. if(fgets(buf,sizeof(buf),f) == NULL)
  958. break;
  959. p = (unsigned char *)buf;
  960. while(*p > ' ') p++;
  961. *p = 0;
  962. fprintf(f_out,"%s . . .\n",buf);
  963. }
  964. fclose(f);
  965. fclose(f_out);
  966. }
  967. #endif
  968. const char* text1 = "Hello world. Testing.";
  969. void TestTest(int control)
  970. {//=======================
  971. FILE *f;
  972. unsigned int c;
  973. unsigned int ix=0;
  974. char textbuf[2000];
  975. espeak_VOICE voice;
  976. static unsigned int unique_identifier= 123;
  977. static int user_data = 456;
  978. //CharsetToUnicode("ISO-8859-4");
  979. //CharsetToUnicode("ISCII");
  980. return;
  981. if(control==2)
  982. {
  983. return;
  984. }
  985. memset(&voice,0,sizeof(voice));
  986. f = fopen("/home/jsd1/speechdata/text/test.txt","r");
  987. if(f==NULL)
  988. return;
  989. while(!feof(f) && (ix < sizeof(textbuf)-2))
  990. {
  991. c = fgetc(f);
  992. if(!feof(f))
  993. textbuf[ix++] = c;
  994. }
  995. textbuf[ix] = 0;
  996. fclose(f);
  997. f_wavtest = OpenWaveFile3("/home/jsd1/speechdata/text/test.wav");
  998. f_events = fopen("/home/jsd1/speechdata/text/events","w");
  999. fprintf(f_events,"Type Audio Text Length Id\n");
  1000. fclose(f_events);
  1001. espeak_Initialize(AUDIO_OUTPUT_RETRIEVAL,1000,NULL,1);
  1002. espeak_SetSynthCallback(TestSynthCallback);
  1003. espeak_SetUriCallback(TestUriCallback);
  1004. espeak_Synth(text1, strlen(text1)+1, 0, POS_CHARACTER, 0, espeakSSML|espeakCHARS_UTF8, &unique_identifier, (void *)user_data);
  1005. espeak_Synth(text1, strlen(text1)+1, 0, POS_CHARACTER, 0, espeakSSML|espeakCHARS_UTF8, &unique_identifier, (void *)(user_data+1));
  1006. espeak_SetParameter(espeakPUNCTUATION, 1, 0);
  1007. espeak_Synchronize();
  1008. // espeak_Cancel();
  1009. espeak_SetParameter(espeakPUNCTUATION, 1, 0);
  1010. }