eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.cpp 8.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * Copyright (C) 2013-2015 by Reece H. Dunn *
  5. * *
  6. * This program is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 3 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * You should have received a copy of the GNU General Public License *
  17. * along with this program; if not, write see: *
  18. * <http://www.gnu.org/licenses/>. *
  19. ***************************************************************************/
  20. #include "wx/wx.h"
  21. #include "speak_lib.h"
  22. #include "speech.h"
  23. #include "phoneme.h"
  24. #include "synthesize.h"
  25. #include "voice.h"
  26. #include "spect.h"
  27. #include "wx/txtstrm.h"
  28. #include "wx/datstrm.h"
  29. extern "C" int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control);
  30. extern unsigned char pk_shape1[];
  31. extern int pk_select;
  32. extern char voice_name[];
  33. static int frame_width;
  34. int pk_select;
  35. #define DRAWPEAKWIDTH 2000
  36. #define PEAKSHAPEW 256
  37. #include <math.h>
  38. static int default_freq[N_PEAKS] =
  39. {200,500,1200,3000,3500,4000,6900,7800,9000};
  40. static int default_width[N_PEAKS] =
  41. {750,500,550,550,600,700,700,700,700};
  42. static int default_klt_bw[N_PEAKS] =
  43. {89,90,140,260,260,260,500,500,500};
  44. float polint(float xa[],float ya[],int n,float x)
  45. {//==============================================
  46. // General polinomial interpolation routine, xa[1...n] ya[1...n]
  47. int i,m,ns=1;
  48. float den,dif,dift,ho,hp,w;
  49. float y; // result
  50. float c[9],d[9];
  51. dif=fabs(x-xa[1]);
  52. for(i=1;i<=n;i++){
  53. if((dift=fabs(x-xa[i])) < dif) {
  54. ns=i;
  55. dif=dift;
  56. }
  57. c[i]=ya[i];
  58. d[i]=ya[i];
  59. }
  60. y=ya[ns--];
  61. for(m=1;m<n;m++) {
  62. for(i=1;i<=n-m;i++) {
  63. ho=xa[i]-x;
  64. hp=xa[i+m]-x;
  65. w=c[i+1]-d[i];
  66. if((den=ho-hp) == 0.0)
  67. {
  68. // fprintf(stderr,"Error in routine 'polint'");
  69. return(ya[2]); // two input xa are identical
  70. }
  71. den=w/den;
  72. d[i]=hp*den;
  73. c[i]=ho*den;
  74. }
  75. y += ((2*ns < (n-m) ? c[ns+1] : d[ns--]));
  76. }
  77. return(y);
  78. } // end of polint
  79. static void PeaksZero(peak_t *sp, peak_t *zero)
  80. {//=====================================
  81. int pk;
  82. memcpy(zero,sp,sizeof(peak_t)*N_PEAKS);
  83. for(pk=0; pk<N_PEAKS; pk++)
  84. zero[pk].pkheight = 0;
  85. } // end of PeaksZero
  86. SpectFrame::SpectFrame(SpectFrame *copy)
  87. {//=====================================
  88. int ix;
  89. keyframe = 0;
  90. spect = NULL;
  91. markers = 0;
  92. pitch = 0;
  93. nx = 0;
  94. time = 0;
  95. length = 0;
  96. amp_adjust = 100;
  97. length_adjust = 0;
  98. for(ix=0; ix<N_PEAKS; ix++)
  99. {
  100. formants[ix].freq = 0;
  101. peaks[ix].pkfreq = default_freq[ix];
  102. peaks[ix].pkheight = 0;
  103. peaks[ix].pkwidth = default_width[ix];
  104. peaks[ix].pkright = default_width[ix];
  105. peaks[ix].klt_bw = default_klt_bw[ix];
  106. peaks[ix].klt_ap = 0;
  107. peaks[ix].klt_bp = default_klt_bw[ix];
  108. }
  109. memset(klatt_param, 0, sizeof(klatt_param));
  110. klatt_param[KLATT_AV] = 59;
  111. klatt_param[KLATT_Kopen] = 40;
  112. if(copy != NULL)
  113. {
  114. *this = *copy;
  115. spect = new USHORT[nx];
  116. memcpy(spect,copy->spect,sizeof(USHORT)*nx);
  117. }
  118. }
  119. SpectFrame::~SpectFrame()
  120. {//=======================
  121. if(spect != NULL)
  122. delete spect;
  123. }
  124. int SpectFrame::Load(wxInputStream& stream, int file_format_type)
  125. {//==============================================================
  126. int ix;
  127. int x;
  128. unsigned short *spect_data;
  129. wxDataInputStream s(stream);
  130. time = s.ReadDouble();
  131. pitch = s.ReadDouble();
  132. length = s.ReadDouble();
  133. dx = s.ReadDouble();
  134. nx = s.Read16();
  135. markers = s.Read16();
  136. amp_adjust = s.Read16();
  137. if(file_format_type == 2)
  138. {
  139. ix = s.Read16(); // spare
  140. ix = s.Read16(); // spare
  141. }
  142. for(ix=0; ix<N_PEAKS; ix++)
  143. {
  144. formants[ix].freq = s.Read16();
  145. formants[ix].bandw = s.Read16();
  146. peaks[ix].pkfreq = s.Read16();
  147. if((peaks[ix].pkheight = s.Read16()) > 0)
  148. keyframe = 1;
  149. peaks[ix].pkwidth = s.Read16();
  150. peaks[ix].pkright = s.Read16();
  151. if(file_format_type == 2)
  152. {
  153. peaks[ix].klt_bw = s.Read16();
  154. peaks[ix].klt_ap = s.Read16();
  155. peaks[ix].klt_bp = s.Read16();
  156. }
  157. }
  158. if(file_format_type > 0)
  159. {
  160. for(ix=0; ix<N_KLATTP2; ix++)
  161. {
  162. klatt_param[ix] = s.Read16();
  163. }
  164. }
  165. spect_data = new USHORT[nx];
  166. if(spect_data == NULL)
  167. {
  168. wxLogError(_T("Failed to allocate memory"));
  169. return(1);
  170. }
  171. max_y = 0;
  172. for(ix=0; ix<nx; ix++)
  173. {
  174. x = spect_data[ix] = s.Read16();
  175. if(x > max_y) max_y = x;
  176. }
  177. spect = spect_data;
  178. return(0);
  179. } // End of SpectFrame::Load
  180. double SpectFrame::GetRms(int seq_amplitude)
  181. {//=========================================
  182. int h;
  183. float total=0;
  184. int maxh;
  185. int height;
  186. int htab[400];
  187. wavegen_peaks_t wpeaks[9];
  188. for(h=0; h<9; h++)
  189. {
  190. height = (peaks[h].pkheight * seq_amplitude * amp_adjust)/10000;
  191. wpeaks[h].height = height << 8;
  192. wpeaks[h].freq = peaks[h].pkfreq << 16;
  193. wpeaks[h].left = peaks[h].pkwidth << 16;
  194. wpeaks[h].right = peaks[h].pkright << 16;
  195. }
  196. maxh = PeaksToHarmspect(wpeaks,90<<16,htab,0);
  197. for(h=1; h<maxh; h++)
  198. {
  199. total += ((htab[h] * htab[h]) >> 10);
  200. }
  201. rms = sqrt(total) / 7.25;
  202. return(rms);
  203. }
  204. SpectSeq::SpectSeq(int n)
  205. {//======================
  206. numframes = n;
  207. if(n > 0)
  208. frames = new SpectFrame* [n];
  209. else
  210. frames = NULL;
  211. pk_select = 1;
  212. grid = 1;
  213. duration = 0;
  214. pitch1 = 0;
  215. pitch2 = 0;
  216. bass_reduction = 0;
  217. max_x = 3000;
  218. max_y = 1;
  219. file_format = 0;
  220. }
  221. SpectSeq::~SpectSeq()
  222. {//==================
  223. int ix;
  224. if(frames != NULL)
  225. {
  226. for(ix=0; ix<numframes; ix++)
  227. {
  228. if(frames[ix] != NULL)
  229. delete frames[ix];
  230. }
  231. delete frames;
  232. }
  233. }
  234. static float GetFrameLength(SpectSeq &spect, int frame)
  235. {//===============================================================
  236. int ix;
  237. float adjust=0;
  238. if(frame >= spect.numframes-1) return(0);
  239. for(ix=frame+1; ix<spect.numframes-1; ix++)
  240. {
  241. if(spect.frames[ix]->keyframe) break; // reached next keyframe
  242. adjust += spect.frames[ix]->length_adjust;
  243. }
  244. return ((spect.frames[ix]->time - spect.frames[frame]->time) * 1000.0 + adjust);
  245. }
  246. int SpectSeq::Load(wxInputStream & stream)
  247. {//=======================================
  248. int n;
  249. int ix;
  250. unsigned int id1, id2;
  251. int set_max_y=0;
  252. float time_offset;
  253. wxDataInputStream s(stream);
  254. id1 = s.Read32();
  255. id2 = s.Read32();
  256. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEQ))
  257. {
  258. file_format = 0; // eSpeak formants
  259. }
  260. else
  261. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEK))
  262. {
  263. file_format = 1; // formants for Klatt synthesizer
  264. }
  265. else
  266. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSQ2))
  267. {
  268. file_format = 2; // formants for Klatt synthesizer
  269. }
  270. else
  271. {
  272. fprintf(stderr, "Unsupported spectral file format.\n");
  273. return(1);
  274. }
  275. name = s.ReadString();
  276. n = s.Read16();
  277. amplitude = s.Read16();
  278. max_y = s.Read16();
  279. s.Read16();
  280. if(n==0) return(0);
  281. if(frames != NULL) delete frames;
  282. frames = new SpectFrame* [n];
  283. numframes = 0;
  284. max_x = 3000;
  285. if(max_y == 0)
  286. {
  287. set_max_y = 1;
  288. max_y = 1;
  289. }
  290. for(ix = 0; ix < n; ix++)
  291. {
  292. SpectFrame *frame = new SpectFrame;
  293. if(frame->Load(stream, file_format) != 0)
  294. {
  295. delete frame;
  296. break;
  297. }
  298. frames[numframes++] = frame;
  299. if(set_max_y && (frame->max_y > max_y))
  300. max_y = frame->max_y;
  301. if(frame->nx * frame->dx > max_x) max_x = int(frame->nx * frame->dx);
  302. }
  303. max_x = 9000; // disable auto-xscaling
  304. frame_width = int((FRAME_WIDTH*max_x)/MAX_DISPLAY_FREQ);
  305. if(frame_width > FRAME_WIDTH) frame_width = FRAME_WIDTH;
  306. // start times from zero
  307. time_offset = frames[0]->time;
  308. for(ix=0; ix<numframes; ix++)
  309. frames[ix]->time -= time_offset;
  310. pitch1 = pitchenv.pitch1;
  311. pitch2 = pitchenv.pitch2;
  312. duration = int(frames[numframes-1]->time * 1000);
  313. if(max_y < 400)
  314. max_y = 200;
  315. else
  316. max_y = 29000; // disable auto height scaling
  317. for(ix=0; ix<numframes; ix++)
  318. {
  319. if(frames[ix]->keyframe)
  320. frames[ix]->length_adjust = frames[ix]->length - GetFrameLength(*this,ix);
  321. }
  322. return(0);
  323. } // end of SpectSeq::Load