eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vowelchart.cpp 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include <math.h>
  20. #include "wx/wx.h"
  21. #include <wx/dcmemory.h>
  22. #include <wx/dc.h>
  23. #include <wx/bitmap.h>
  24. #include <wx/dirdlg.h>
  25. #include "wx/filename.h"
  26. #include "wx/wfstream.h"
  27. #include "speak_lib.h"
  28. #include "main.h"
  29. #include "speech.h"
  30. #include "phoneme.h"
  31. #include "synthesize.h"
  32. #include "voice.h"
  33. #include "spect.h"
  34. #include "translate.h"
  35. #include "options.h"
  36. /* Read a file of vowel symbols and f1,f2 formants, and produce a vowel diagram
  37. */
  38. extern wxString path_phsource;
  39. extern char *spects_data;
  40. extern int n_phoneme_tables;
  41. extern PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  42. // size of the vowelchart png
  43. #define WIDTH 1580
  44. #define HEIGHT 800
  45. #define ROUND(x) ((int) ((x) + 0.5))
  46. static int HslValue (double n1, double n2, double hue)
  47. {//===================================================
  48. double value;
  49. if (hue > 255)
  50. hue -= 255;
  51. else if (hue < 0)
  52. hue += 255;
  53. if (hue < 42.5)
  54. value = n1 + (n2 - n1) * (hue / 42.5);
  55. else if (hue < 127.5)
  56. value = n2;
  57. else if (hue < 170)
  58. value = n1 + (n2 - n1) * ((170 - hue) / 42.5);
  59. else
  60. value = n1;
  61. return ROUND (value * 255.0);
  62. }
  63. /**
  64. * @hue: Hue channel, returns Red channel
  65. * @saturation: Saturation channel, returns Green channel
  66. * @lightness: Lightness channel, returns Blue channel
  67. *
  68. * The arguments are pointers to int, with the values pointed to in the
  69. * following ranges: H [0, 360], L [0, 255], S [0, 255].
  70. *
  71. * The function changes the arguments to point to the RGB value
  72. * corresponding, with the returned values all in the range [0, 255].
  73. **/
  74. void HslToRgb (int *hue, int *saturation, int *lightness)
  75. {//======================================================
  76. double h, s, l;
  77. h = (*hue * 256)/360;
  78. s = *saturation;
  79. l = *lightness;
  80. if (s == 0)
  81. {
  82. /* achromatic case */
  83. *hue = (int)l;
  84. *lightness = (int)l;
  85. *saturation = (int)l;
  86. }
  87. else
  88. {
  89. double m1, m2;
  90. if (l < 128)
  91. m2 = (l * (255 + s)) / 65025.0;
  92. else
  93. m2 = (l + s - (l * s) / 255.0) / 255.0;
  94. m1 = (l / 127.5) - m2;
  95. /* chromatic case */
  96. *hue = HslValue (m1, m2, h + 85);
  97. *saturation = HslValue (m1, m2, h);
  98. *lightness = HslValue (m1, m2, h - 85);
  99. }
  100. }
  101. static int vowel_posn[N_PHONEME_TAB];
  102. static int vowel_posn_ix;
  103. static double log2a(double x)
  104. {//========================
  105. // log2(x) = log(x) / log(2)
  106. return(log(x) / 0.693147);
  107. }
  108. static int VowelX(int f2)
  109. {//======================
  110. return(WIDTH - int((log2a(f2) - 9.40)*WIDTH/1.9));
  111. // return(WIDTH - int((log2a(f2) - 9.49)*WIDTH/1.8));
  112. }
  113. static int VowelY(int f1)
  114. {//======================
  115. return(int((log2a(f1) - 7.85)*HEIGHT/2.15));
  116. }
  117. static int VowelZ(int f3)
  118. {//======================
  119. int z;
  120. // range 2000-3000Hz, log2= 10.96 to 11.55
  121. z = int((log2a(f3) - 11.05)*256/0.50);
  122. if(z < 0) z = 0;
  123. if(z > 255) z = 255;
  124. return(z);
  125. }
  126. static void DrawVowel(wxDC *dc, wxString name, int f1, int f2, int f3, int g1, int g2)
  127. {//==================================================================================
  128. int ix;
  129. int posn;
  130. int collisions;
  131. int x,y,z,x2,y2;
  132. int r,g,b;
  133. wxBrush brush;
  134. y = VowelY(f1);
  135. x = VowelX(f2);
  136. z = VowelZ(f3);
  137. if(y < 0) y = 0;
  138. if(y > (HEIGHT-4)) y= (HEIGHT-4);
  139. if(x < 0) x = 0;
  140. if(x > (WIDTH-12)) x = (WIDTH-12);
  141. r = z;
  142. g = 255;
  143. b = 100 + z/2;
  144. HslToRgb(&r,&g,&b);
  145. brush.SetColour(r,g,b);
  146. dc->SetBrush(brush);
  147. dc->DrawCircle(x,y,4);
  148. // check for a label already at this position
  149. collisions = 0;
  150. posn = (x/8)*WIDTH + (y/8);
  151. for(ix=0; ix<vowel_posn_ix; ix++)
  152. {
  153. if(posn == vowel_posn[ix])
  154. collisions++;
  155. }
  156. vowel_posn[vowel_posn_ix++] = posn;
  157. dc->DrawText(name,x+4,y+(collisions*10));
  158. if(g2 != 0xffff)
  159. {
  160. y2 = VowelY(g1);
  161. x2 = VowelX(g2);
  162. dc->DrawLine(x,y,x2,y2);
  163. }
  164. }
  165. static int VowelChartDir(wxDC *dc, wxBitmap *bitmap)
  166. {//=================================================
  167. int ix;
  168. int nf;
  169. int count = 0;
  170. SpectSeq *spectseq;
  171. SpectFrame *frame1;
  172. SpectFrame *frame2=NULL;
  173. wxFileName filename;
  174. wxString dir = wxDirSelector(_T("Directory of vowel files"),path_phsource);
  175. if(dir.IsEmpty()) return(0);
  176. wxString path = wxFindFirstFile(dir+_T("/*"),wxFILE);
  177. while (!path.empty())
  178. {
  179. if((spectseq = new SpectSeq) == NULL) break;
  180. filename = wxFileName(path);
  181. wxFileInputStream stream(path);
  182. if(stream.Ok() == FALSE)
  183. {
  184. path = wxFindNextFile();
  185. continue;
  186. }
  187. spectseq->Load(stream);
  188. nf = 0;
  189. frame1 = NULL;
  190. if(spectseq->numframes > 0)
  191. {
  192. frame2 = spectseq->frames[0];
  193. }
  194. for(ix=0; ix<spectseq->numframes; ix++)
  195. {
  196. if(spectseq->frames[ix]->keyframe)
  197. {
  198. nf++;
  199. frame2 = spectseq->frames[ix];
  200. if(frame2->markers & FRFLAG_VOWEL_CENTRE)
  201. frame1 = frame2;
  202. }
  203. }
  204. if((nf >= 3) && (frame1 != NULL))
  205. {
  206. DrawVowel(dc,wxString(filename.GetName()),
  207. frame1->peaks[1].pkfreq, frame1->peaks[2].pkfreq, frame1->peaks[3].pkfreq,
  208. frame2->peaks[1].pkfreq, frame2->peaks[2].pkfreq);
  209. count++;
  210. }
  211. delete spectseq;
  212. path = wxFindNextFile();
  213. }
  214. filename.SetPath(dir);
  215. filename.SetFullName(_T("vowelchart.png"));
  216. bitmap->SaveFile(filename.GetFullPath(),wxBITMAP_TYPE_PNG);
  217. return(count);
  218. }
  219. static int VowelChartList(wxDC *dc, wxBitmap *bitmap, char *fname)
  220. {//===============================================================
  221. // Plot a graph of vowel formants.
  222. // y-axis is decreasing f1 (closeness)
  223. // x-axis is decreasing f2 (backness)
  224. FILE *f_in;
  225. int ix;
  226. int f1,f2,f3,g1,g2;
  227. int count=0;
  228. wxFileName filename;
  229. char name[40];
  230. char buf[200];
  231. wxString path;
  232. if(fname != NULL)
  233. {
  234. path = wxString(fname,wxConvLocal);
  235. }
  236. else
  237. {
  238. path = wxFileSelector(_T("Read file of vowel formants"),path_phsource,
  239. _T(""),_T(""),_T("*"),wxOPEN);
  240. }
  241. if(path.IsEmpty())
  242. {
  243. return(0);
  244. }
  245. filename = wxFileName(path);
  246. strcpy(buf,path.mb_str(wxConvLocal));
  247. f_in = fopen(buf,"r");
  248. if(f_in == NULL)
  249. {
  250. wxLogError(_T("Can't read file: %s"),buf);
  251. return(0);
  252. }
  253. while(fgets(buf,sizeof(buf),f_in) != NULL)
  254. {
  255. g2 = 0xffff;
  256. ix = sscanf(buf,"%s %d %d %d %d %d",name,&f1,&f2,&f3,&g1,&g2);
  257. if(ix >= 3)
  258. {
  259. DrawVowel(dc,wxString(name,wxConvLocal),
  260. f1,f2,f3,g1,g2);
  261. count++;
  262. }
  263. }
  264. filename.SetExt(_T("png"));
  265. bitmap->SaveFile(filename.GetFullPath(),wxBITMAP_TYPE_PNG);
  266. return(count);
  267. }
  268. void VowelChart(int control, char *fname)
  269. {//======================================
  270. // Plot a graph of vowel formants.
  271. // y-axis is decreasing f1 (closeness)
  272. // x-axis is decreasing f2 (backness)
  273. // control=1 from directory of lists
  274. // control=2 from single list
  275. // control=3 from directory of phoneme source data files
  276. int ix;
  277. int x,y;
  278. int count;
  279. wxFileName filename;
  280. wxBitmap bitmap(WIDTH,HEIGHT);
  281. // Create a memory DC
  282. wxMemoryDC dc;
  283. dc.SelectObject(bitmap);
  284. dc.SetBrush(*wxWHITE_BRUSH);
  285. dc.SetFont(*wxSWISS_FONT);
  286. dc.Clear();
  287. // draw grid
  288. dc.SetPen(*wxLIGHT_GREY_PEN);
  289. for(ix=200; ix<=1000; ix+=50)
  290. {
  291. y = VowelY(ix);
  292. dc.DrawLine(0,y,WIDTH,y);
  293. if((ix % 100) == 0)
  294. dc.DrawText(wxString::Format(_T("%d"),ix),1,y);
  295. }
  296. for(ix=700; ix<=2400; ix+=100)
  297. {
  298. x = VowelX(ix);
  299. dc.DrawLine(x,0,x,HEIGHT);
  300. if((ix % 200)==0)
  301. dc.DrawText(wxString::Format(_T("%d"),ix),x+1,0);
  302. }
  303. dc.SetPen(*wxBLACK_PEN);
  304. vowel_posn_ix = 0;
  305. if(control==3)
  306. count = VowelChartDir(&dc, &bitmap);
  307. else
  308. count = VowelChartList(&dc, &bitmap, fname);
  309. if(control != 1)
  310. wxLogStatus(_T("Plotted %d vowels"),count);
  311. }
  312. void FindPhonemesUsed(void)
  313. {//========================
  314. int hash;
  315. char *p;
  316. char *start;
  317. char *next;
  318. unsigned char c;
  319. int count = 0;
  320. // look through all the phoneme strings in the **_rules data
  321. // and mark these phoneme codes as used.
  322. p = translator->data_dictrules;
  323. while(*p != 0)
  324. {
  325. if(*p == RULE_CONDITION)
  326. p+=2;
  327. if(*p == RULE_LINENUM)
  328. p+=3;
  329. if(*p == RULE_GROUP_END)
  330. {
  331. p++;
  332. if(*p == 0) break;
  333. }
  334. if(*p == RULE_GROUP_START)
  335. {
  336. if(p[1] == RULE_LETTERGP2)
  337. {
  338. while(*p != RULE_GROUP_END) p++;
  339. continue;
  340. }
  341. p += (strlen(p)+1);
  342. }
  343. while((((c = *p) != RULE_PHONEMES)) && (c != 0)) p++;
  344. count++;
  345. if(c == RULE_PHONEMES)
  346. {
  347. start = p;
  348. p++;
  349. while(*p != 0)
  350. {
  351. phoneme_tab_flags[*p & 0xff] |= 2;
  352. p++;
  353. }
  354. }
  355. p++;
  356. }
  357. for(hash=0; hash<N_HASH_DICT; hash++)
  358. {
  359. p = translator->dict_hashtab[hash];
  360. if(p == NULL)
  361. continue;
  362. while(*p != 0)
  363. {
  364. next = p + p[0];
  365. if((p[1] & 0x80) == 0)
  366. {
  367. p += ((p[1] & 0x3f) + 2);
  368. while(*p != 0)
  369. {
  370. phoneme_tab_flags[*p & 0xff] |= 2;
  371. p++;
  372. }
  373. }
  374. p = next;
  375. }
  376. }
  377. } // end of FindPhonemesUsed
  378. void MakeVowelLists(void)
  379. {//======================
  380. // For each phoneme table, make a list of its vowels and their
  381. // formant frequencies (f1,f2,f3) for use by VowelChart()
  382. int table;
  383. int ix;
  384. int phcode;
  385. PHONEME_TAB *ph;
  386. FILE *f;
  387. SPECT_SEQ *seq;
  388. frame_t *frame;
  389. int match_level;
  390. char dirname[sizeof(path_source)+20];
  391. char fname[sizeof(dirname)+40];
  392. progress = new wxProgressDialog(_T("Vowel charts"),_T(""),n_phoneme_tables);
  393. sprintf(dirname,"%s%s",path_source,"vowelcharts");
  394. mkdir(dirname,S_IRWXU | S_IRGRP | S_IROTH);
  395. for(table=0; table<n_phoneme_tables; table++)
  396. {
  397. sprintf(fname,"%s/%s",dirname,phoneme_tab_list[table].name);
  398. if((f = fopen(fname,"w"))==NULL) continue;
  399. progress->Update(table);
  400. // select the phoneme table by name
  401. // if(SetVoiceByName(phoneme_tab_list[table].name) != 0) continue;
  402. if(SelectPhonemeTableName(phoneme_tab_list[table].name) < 0) continue;
  403. FindPhonemesUsed();
  404. // phoneme table is terminated by a phoneme with no name (=0)
  405. for(phcode=1; phcode < n_phoneme_tab; phcode++)
  406. {
  407. //if((phoneme_tab_flags[phcode] & 3) == 0)
  408. // continue; // inherited, and not used
  409. ph = phoneme_tab[phcode];
  410. if(ph->type != phVOWEL)
  411. continue;
  412. if((ix = LookupSound(ph, phoneme_tab[phonPAUSE], 1, &match_level, 0)) == 0)
  413. continue;
  414. seq = (SPECT_SEQ *)(&spects_data[ix]);
  415. frame = &seq->frame[1];
  416. fprintf(f,"%s\t %3d %4d %4d",WordToString(ph->mnemonic),
  417. frame->ffreq[1],frame->ffreq[2],frame->ffreq[3]);
  418. frame = &seq->frame[seq->n_frames-1];
  419. fprintf(f," %3d %4d %4d\n",frame->ffreq[1],frame->ffreq[2],frame->ffreq[3]);
  420. }
  421. fclose(f);
  422. VowelChart(1,fname); // draw the vowel chart
  423. }
  424. LoadVoice(voice_name2,0); // reset the original phoneme table
  425. delete progress;
  426. }
  427. extern int n_envelopes;
  428. extern char envelope_paths[][80];
  429. extern unsigned char envelope_dat[][128];
  430. #define HT_ENV 140
  431. #define WD_ENV 128*2
  432. void DrawEnvelopes()
  433. {//================
  434. int ix_env;
  435. int y_base;
  436. int x;
  437. unsigned char *env;
  438. char name[80];
  439. wxBitmap bitmap(WD_ENV,HT_ENV*n_envelopes);
  440. // Create a memory DC
  441. wxMemoryDC dc;
  442. dc.SelectObject(bitmap);
  443. dc.SetBrush(*wxWHITE_BRUSH);
  444. dc.SetFont(*wxSWISS_FONT);
  445. dc.Clear();
  446. for(ix_env=0; ix_env<n_envelopes; ix_env++)
  447. {
  448. y_base = HT_ENV * ix_env;
  449. dc.SetPen(*wxLIGHT_GREY_PEN);
  450. dc.DrawLine(0,y_base+0,256,y_base+0);
  451. dc.DrawLine(0,y_base+64,256,y_base+64);
  452. dc.DrawLine(0,y_base+128,256,y_base+128);
  453. dc.DrawLine(128,y_base+0,128,y_base+128);
  454. dc.SetPen(*wxBLACK_PEN);
  455. strncpy0(name,envelope_paths[ix_env],sizeof(name));
  456. dc.DrawText(wxString(name,wxConvLocal),1,y_base);
  457. env = envelope_dat[ix_env];
  458. y_base = y_base+128;
  459. for(x=0; x<127; x++)
  460. {
  461. dc.DrawLine(x*2, y_base-env[x]/2, (x+1)*2, y_base-env[x+1]/2);
  462. }
  463. }
  464. bitmap.SaveFile(path_phsource+_T("/envelopes.png"),wxBITMAP_TYPE_PNG);
  465. }