eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vowelchart.cpp 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include <math.h>
  20. #include "wx/wx.h"
  21. #include <wx/dcmemory.h>
  22. #include <wx/dc.h>
  23. #include <wx/bitmap.h>
  24. #include <wx/dirdlg.h>
  25. #include "wx/filename.h"
  26. #include "wx/wfstream.h"
  27. #include "speak_lib.h"
  28. #include "main.h"
  29. #include "speech.h"
  30. #include "phoneme.h"
  31. #include "synthesize.h"
  32. #include "voice.h"
  33. #include "spect.h"
  34. #include "translate.h"
  35. #include "options.h"
  36. /* Read a file of vowel symbols and f1,f2 formants, and produce a vowel diagram
  37. */
  38. extern wxString path_phsource;
  39. extern char *spects_data;
  40. extern int n_phoneme_tables;
  41. // size of the vowelchart png
  42. #define WIDTH 1580
  43. #define HEIGHT 800
  44. #define ROUND(x) ((int) ((x) + 0.5))
  45. static int HslValue (double n1, double n2, double hue)
  46. {//===================================================
  47. double value;
  48. if (hue > 255)
  49. hue -= 255;
  50. else if (hue < 0)
  51. hue += 255;
  52. if (hue < 42.5)
  53. value = n1 + (n2 - n1) * (hue / 42.5);
  54. else if (hue < 127.5)
  55. value = n2;
  56. else if (hue < 170)
  57. value = n1 + (n2 - n1) * ((170 - hue) / 42.5);
  58. else
  59. value = n1;
  60. return ROUND (value * 255.0);
  61. }
  62. /**
  63. * @hue: Hue channel, returns Red channel
  64. * @saturation: Saturation channel, returns Green channel
  65. * @lightness: Lightness channel, returns Blue channel
  66. *
  67. * The arguments are pointers to int, with the values pointed to in the
  68. * following ranges: H [0, 360], L [0, 255], S [0, 255].
  69. *
  70. * The function changes the arguments to point to the RGB value
  71. * corresponding, with the returned values all in the range [0, 255].
  72. **/
  73. void HslToRgb (int *hue, int *saturation, int *lightness)
  74. {//======================================================
  75. double h, s, l;
  76. h = (*hue * 256)/360;
  77. s = *saturation;
  78. l = *lightness;
  79. if (s == 0)
  80. {
  81. /* achromatic case */
  82. *hue = (int)l;
  83. *lightness = (int)l;
  84. *saturation = (int)l;
  85. }
  86. else
  87. {
  88. double m1, m2;
  89. if (l < 128)
  90. m2 = (l * (255 + s)) / 65025.0;
  91. else
  92. m2 = (l + s - (l * s) / 255.0) / 255.0;
  93. m1 = (l / 127.5) - m2;
  94. /* chromatic case */
  95. *hue = HslValue (m1, m2, h + 85);
  96. *saturation = HslValue (m1, m2, h);
  97. *lightness = HslValue (m1, m2, h - 85);
  98. }
  99. }
  100. static int vowel_posn[N_PHONEME_TAB];
  101. static int vowel_posn_ix;
  102. static double log2a(double x)
  103. {//========================
  104. // log2(x) = log(x) / log(2)
  105. return(log(x) / 0.693147);
  106. }
  107. static int VowelX(int f2)
  108. {//======================
  109. return(WIDTH - int((log2a(f2) - 9.40)*WIDTH/1.9));
  110. // return(WIDTH - int((log2a(f2) - 9.49)*WIDTH/1.8));
  111. }
  112. static int VowelY(int f1)
  113. {//======================
  114. return(int((log2a(f1) - 7.85)*HEIGHT/2.15));
  115. }
  116. static int VowelZ(int f3)
  117. {//======================
  118. int z;
  119. // range 2000-3000Hz, log2= 10.96 to 11.55
  120. z = int((log2a(f3) - 11.05)*256/0.50);
  121. if(z < 0) z = 0;
  122. if(z > 255) z = 255;
  123. return(z);
  124. }
  125. static void DrawVowel(wxDC *dc, wxString name, int f1, int f2, int f3, int g1, int g2)
  126. {//==================================================================================
  127. int ix;
  128. int posn;
  129. int collisions;
  130. int x,y,z,x2,y2;
  131. int r,g,b;
  132. wxBrush brush;
  133. y = VowelY(f1);
  134. x = VowelX(f2);
  135. z = VowelZ(f3);
  136. if(y < 0) y = 0;
  137. if(y > (HEIGHT-4)) y= (HEIGHT-4);
  138. if(x < 0) x = 0;
  139. if(x > (WIDTH-12)) x = (WIDTH-12);
  140. r = z;
  141. g = 255;
  142. b = 100 + z/2;
  143. HslToRgb(&r,&g,&b);
  144. brush.SetColour(r,g,b);
  145. dc->SetBrush(brush);
  146. dc->DrawCircle(x,y,4);
  147. // check for a label already at this position
  148. collisions = 0;
  149. posn = (x/8)*WIDTH + (y/8);
  150. for(ix=0; ix<vowel_posn_ix; ix++)
  151. {
  152. if(posn == vowel_posn[ix])
  153. collisions++;
  154. }
  155. vowel_posn[vowel_posn_ix++] = posn;
  156. dc->DrawText(name,x+4,y+(collisions*10));
  157. if(g2 != 0xffff)
  158. {
  159. y2 = VowelY(g1);
  160. x2 = VowelX(g2);
  161. dc->DrawLine(x,y,x2,y2);
  162. }
  163. }
  164. static int VowelChartDir(wxDC *dc, wxBitmap *bitmap)
  165. {//=================================================
  166. int ix;
  167. int nf;
  168. int count = 0;
  169. SpectSeq *spectseq;
  170. SpectFrame *frame1;
  171. SpectFrame *frame2=NULL;
  172. wxFileName filename;
  173. wxString dir = wxDirSelector(_T("Directory of vowel files"),path_phsource);
  174. if(dir.IsEmpty()) return(0);
  175. wxString path = wxFindFirstFile(dir+_T("/*"),wxFILE);
  176. while (!path.empty())
  177. {
  178. if((spectseq = new SpectSeq) == NULL) break;
  179. filename = wxFileName(path);
  180. wxFileInputStream stream(path);
  181. if(stream.Ok() == FALSE)
  182. {
  183. path = wxFindNextFile();
  184. continue;
  185. }
  186. spectseq->Load(stream);
  187. nf = 0;
  188. frame1 = NULL;
  189. if(spectseq->numframes > 0)
  190. {
  191. frame2 = spectseq->frames[0];
  192. }
  193. for(ix=0; ix<spectseq->numframes; ix++)
  194. {
  195. if(spectseq->frames[ix]->keyframe)
  196. {
  197. nf++;
  198. frame2 = spectseq->frames[ix];
  199. if(frame2->markers & FRFLAG_VOWEL_CENTRE)
  200. frame1 = frame2;
  201. }
  202. }
  203. if((nf >= 3) && (frame1 != NULL))
  204. {
  205. DrawVowel(dc,wxString(filename.GetName()),
  206. frame1->peaks[1].pkfreq, frame1->peaks[2].pkfreq, frame1->peaks[3].pkfreq,
  207. frame2->peaks[1].pkfreq, frame2->peaks[2].pkfreq);
  208. count++;
  209. }
  210. delete spectseq;
  211. path = wxFindNextFile();
  212. }
  213. filename.SetPath(dir);
  214. filename.SetFullName(_T("vowelchart.png"));
  215. bitmap->SaveFile(filename.GetFullPath(),wxBITMAP_TYPE_PNG);
  216. return(count);
  217. }
  218. static int VowelChartList(wxDC *dc, wxBitmap *bitmap, char *fname)
  219. {//===============================================================
  220. // Plot a graph of vowel formants.
  221. // y-axis is decreasing f1 (closeness)
  222. // x-axis is decreasing f2 (backness)
  223. FILE *f_in;
  224. int ix;
  225. int f1,f2,f3,g1,g2;
  226. int count=0;
  227. wxFileName filename;
  228. char name[40];
  229. char buf[200];
  230. wxString path;
  231. if(fname != NULL)
  232. {
  233. path = wxString(fname,wxConvLocal);
  234. }
  235. else
  236. {
  237. path = wxFileSelector(_T("Read file of vowel formants"),path_phsource,
  238. _T(""),_T(""),_T("*"),wxOPEN);
  239. }
  240. if(path.IsEmpty())
  241. {
  242. return(0);
  243. }
  244. filename = wxFileName(path);
  245. strcpy(buf,path.mb_str(wxConvLocal));
  246. f_in = fopen(buf,"r");
  247. if(f_in == NULL)
  248. {
  249. wxLogError(_T("Can't read file: %s"),buf);
  250. return(0);
  251. }
  252. while(fgets(buf,sizeof(buf),f_in) != NULL)
  253. {
  254. g2 = 0xffff;
  255. ix = sscanf(buf,"%s %d %d %d %d %d",name,&f1,&f2,&f3,&g1,&g2);
  256. if(ix >= 3)
  257. {
  258. DrawVowel(dc,wxString(name,wxConvLocal),
  259. f1,f2,f3,g1,g2);
  260. count++;
  261. }
  262. }
  263. filename.SetExt(_T("png"));
  264. bitmap->SaveFile(filename.GetFullPath(),wxBITMAP_TYPE_PNG);
  265. return(count);
  266. }
  267. void VowelChart(int control, char *fname)
  268. {//======================================
  269. // Plot a graph of vowel formants.
  270. // y-axis is decreasing f1 (closeness)
  271. // x-axis is decreasing f2 (backness)
  272. // control=1 from directory of lists
  273. // control=2 from single list
  274. // control=3 from directory of phoneme source data files
  275. int ix;
  276. int x,y;
  277. int count;
  278. wxFileName filename;
  279. wxBitmap bitmap(WIDTH,HEIGHT);
  280. // Create a memory DC
  281. wxMemoryDC dc;
  282. dc.SelectObject(bitmap);
  283. dc.SetBrush(*wxWHITE_BRUSH);
  284. dc.SetFont(*wxSWISS_FONT);
  285. dc.Clear();
  286. // draw grid
  287. dc.SetPen(*wxLIGHT_GREY_PEN);
  288. for(ix=200; ix<=1000; ix+=50)
  289. {
  290. y = VowelY(ix);
  291. dc.DrawLine(0,y,WIDTH,y);
  292. if((ix % 100) == 0)
  293. dc.DrawText(wxString::Format(_T("%d"),ix),1,y);
  294. }
  295. for(ix=700; ix<=2400; ix+=100)
  296. {
  297. x = VowelX(ix);
  298. dc.DrawLine(x,0,x,HEIGHT);
  299. if((ix % 200)==0)
  300. dc.DrawText(wxString::Format(_T("%d"),ix),x+1,0);
  301. }
  302. dc.SetPen(*wxBLACK_PEN);
  303. vowel_posn_ix = 0;
  304. if(control==3)
  305. count = VowelChartDir(&dc, &bitmap);
  306. else
  307. count = VowelChartList(&dc, &bitmap, fname);
  308. if(control != 1)
  309. wxLogStatus(_T("Plotted %d vowels"),count);
  310. }
  311. void FindPhonemesUsed(void)
  312. {//========================
  313. int hash;
  314. char *p;
  315. unsigned int *pw;
  316. char *start;
  317. char *next;
  318. unsigned char c;
  319. int count = 0;
  320. int ignore;
  321. char phonetic[N_WORD_PHONEMES];
  322. // look through all the phoneme strings in the **_rules data
  323. // and mark these phoneme codes as used.
  324. p = translator->data_dictrules;
  325. while(*p != 0)
  326. {
  327. if(*p == RULE_CONDITION)
  328. p+=2;
  329. if(*p == RULE_LINENUM)
  330. p+=3;
  331. if(*p == RULE_GROUP_END)
  332. {
  333. p++;
  334. if(*p == 0) break;
  335. }
  336. if(*p == RULE_GROUP_START)
  337. {
  338. if(p[1] == RULE_REPLACEMENTS)
  339. {
  340. p++;
  341. pw = (unsigned int *)(((long)p+4) & ~3); // advance to next word boundary
  342. while(pw[0] != 0)
  343. {
  344. pw += 2; // find the end of the replacement list, each entry is 2 words.
  345. }
  346. p = (char *)(pw+1);
  347. continue;
  348. }
  349. if(p[1] == RULE_LETTERGP2)
  350. {
  351. while(*p != RULE_GROUP_END) p++;
  352. continue;
  353. }
  354. p += (strlen(p)+1);
  355. }
  356. while((c = *p) != 0)
  357. {
  358. if(c == RULE_CONDITION)
  359. p++; // next byte is the condition number, which may be 3 (= RULE_PHONEMES)
  360. if(c == RULE_PHONEMES)
  361. break;
  362. p++;
  363. }
  364. count++;
  365. if(c == RULE_PHONEMES)
  366. {
  367. ignore = 0;
  368. start = p;
  369. p++;
  370. while((c = *p) != 0)
  371. {
  372. if(c == phonSWITCH)
  373. ignore = 1;
  374. if(ignore == 0)
  375. phoneme_tab_flags[c] |= 2;
  376. p++;
  377. }
  378. }
  379. p++;
  380. }
  381. // NOTE, we should recognise langopts.textmode and ignore the *_list file (lang=zh)
  382. for(hash=0; hash<N_HASH_DICT; hash++)
  383. {
  384. p = translator->dict_hashtab[hash];
  385. if(p == NULL)
  386. continue;
  387. while(*p != 0)
  388. {
  389. next = p + p[0];
  390. if((p[1] & 0x80) == 0)
  391. {
  392. p += ((p[1] & 0x3f) + 2);
  393. strcpy(phonetic,p);
  394. p += strlen(phonetic) +1;
  395. // examine flags
  396. ignore = 0;
  397. while(p < next)
  398. {
  399. if(*p == BITNUM_FLAG_TEXTMODE)
  400. {
  401. ignore = 1;
  402. break;
  403. }
  404. p++;
  405. }
  406. if(ignore == 0)
  407. {
  408. p = phonetic;
  409. while((c = *p) != 0)
  410. {
  411. if(c == phonSWITCH)
  412. break;
  413. phoneme_tab_flags[c] |= 2;
  414. p++;
  415. }
  416. }
  417. }
  418. p = next;
  419. }
  420. }
  421. } // end of FindPhonemesUsed
  422. void MakeVowelLists(void)
  423. {//======================
  424. // For each phoneme table, make a list of its vowels and their
  425. // formant frequencies (f1,f2,f3) for use by VowelChart()
  426. int table;
  427. int ix;
  428. int phcode;
  429. PHONEME_TAB *ph;
  430. FILE *f;
  431. SPECT_SEQ *seq;
  432. SPECT_SEQK *seqk;
  433. frame_t *frame;
  434. int match_level;
  435. char dirname[sizeof(path_source)+20];
  436. char fname[sizeof(dirname)+40];
  437. progress = new wxProgressDialog(_T("Vowel charts"),_T(""),n_phoneme_tables);
  438. sprintf(dirname,"%s%s",path_source,"vowelcharts");
  439. mkdir(dirname,S_IRWXU | S_IRGRP | S_IROTH);
  440. for(table=0; table<n_phoneme_tables; table++)
  441. {
  442. sprintf(fname,"%s/%s",dirname,phoneme_tab_list[table].name);
  443. if((f = fopen(fname,"w"))==NULL) continue;
  444. progress->Update(table);
  445. // select the phoneme table by name
  446. // if(SetVoiceByName(phoneme_tab_list[table].name) != 0) continue;
  447. if(SelectPhonemeTableName(phoneme_tab_list[table].name) < 0) continue;
  448. FindPhonemesUsed();
  449. // phoneme table is terminated by a phoneme with no name (=0)
  450. for(phcode=1; phcode < n_phoneme_tab; phcode++)
  451. {
  452. //if((phoneme_tab_flags[phcode] & 3) == 0)
  453. // continue; // inherited, and not used
  454. ph = phoneme_tab[phcode];
  455. if(ph->type != phVOWEL)
  456. continue;
  457. if((ix = LookupSound(ph, phoneme_tab[phonPAUSE], 1, &match_level, 0)) == 0)
  458. continue;
  459. seq = (SPECT_SEQ *)(&spects_data[ix]);
  460. seqk = (SPECT_SEQK *)seq;
  461. if(seq->frame[0].frflags & FRFLAG_KLATT)
  462. frame = &seqk->frame[1];
  463. else
  464. frame = (frame_t *)&seq->frame[1];
  465. fprintf(f,"%s\t %3d %4d %4d",WordToString(ph->mnemonic),
  466. frame->ffreq[1],frame->ffreq[2],frame->ffreq[3]);
  467. if(seq->frame[0].frflags & FRFLAG_KLATT)
  468. frame = &seqk->frame[seqk->n_frames-1];
  469. else
  470. frame = (frame_t *)&seq->frame[seq->n_frames-1];
  471. fprintf(f," %3d %4d %4d\n",frame->ffreq[1],frame->ffreq[2],frame->ffreq[3]);
  472. }
  473. fclose(f);
  474. VowelChart(1,fname); // draw the vowel chart
  475. }
  476. LoadVoice(voice_name2,0); // reset the original phoneme table
  477. delete progress;
  478. }
  479. extern int n_envelopes;
  480. extern char envelope_paths[][80];
  481. extern unsigned char envelope_dat[][128];
  482. #define HT_ENV 140
  483. #define WD_ENV 128*2
  484. void DrawEnvelopes()
  485. {//================
  486. int ix_env;
  487. int y_base;
  488. int x;
  489. FILE *f_txt=NULL;
  490. unsigned char *env;
  491. char name[200];
  492. wxBitmap bitmap(WD_ENV,HT_ENV*n_envelopes);
  493. // Create a memory DC
  494. wxMemoryDC dc;
  495. dc.SelectObject(bitmap);
  496. dc.SetBrush(*wxWHITE_BRUSH);
  497. dc.SetFont(*wxSWISS_FONT);
  498. dc.Clear();
  499. sprintf(name,"%s%s",path_source,"envelopes.txt");
  500. // f_txt = fopen(name,"w");
  501. for(ix_env=0; ix_env<n_envelopes; ix_env++)
  502. {
  503. y_base = HT_ENV * ix_env;
  504. dc.SetPen(*wxLIGHT_GREY_PEN);
  505. dc.DrawLine(0,y_base+0,256,y_base+0);
  506. dc.DrawLine(0,y_base+64,256,y_base+64);
  507. dc.DrawLine(0,y_base+128,256,y_base+128);
  508. dc.DrawLine(128,y_base+0,128,y_base+128);
  509. dc.SetPen(*wxBLACK_PEN);
  510. strncpy0(name,envelope_paths[ix_env],sizeof(name));
  511. dc.DrawText(wxString(name,wxConvLocal),1,y_base);
  512. env = envelope_dat[ix_env];
  513. y_base = y_base+128;
  514. for(x=0; x<127; x++)
  515. {
  516. dc.DrawLine(x*2, y_base-env[x]/2, (x+1)*2, y_base-env[x+1]/2);
  517. }
  518. if(f_txt != NULL)
  519. {
  520. fprintf(f_txt,"%s\n",name);
  521. for(x=0; x<128; x++)
  522. {
  523. fprintf(f_txt," 0x%.2x,",env[x]);
  524. if((x & 0xf) == 0xf)
  525. fputc('\n',f_txt);
  526. }
  527. fputc('\n',f_txt);
  528. }
  529. }
  530. bitmap.SaveFile(path_phsource+_T("/envelopes.png"),wxBITMAP_TYPE_PNG);
  531. if(f_txt != NULL)
  532. fclose(f_txt);
  533. }