eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.cpp 8.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * Copyright (C) 2013-2015 by Reece H. Dunn *
  5. * *
  6. * This program is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 3 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * You should have received a copy of the GNU General Public License *
  17. * along with this program; if not, write see: *
  18. * <http://www.gnu.org/licenses/>. *
  19. ***************************************************************************/
  20. #include "wx/wx.h"
  21. #include "speak_lib.h"
  22. #include "speech.h"
  23. #include "phoneme.h"
  24. #include "synthesize.h"
  25. #include "voice.h"
  26. #include "spect.h"
  27. #include "wx/txtstrm.h"
  28. #include "wx/datstrm.h"
  29. extern "C" int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control);
  30. extern unsigned char pk_shape1[];
  31. extern int pk_select;
  32. extern char voice_name[];
  33. static int frame_width;
  34. int pk_select;
  35. #define DRAWPEAKWIDTH 2000
  36. #define PEAKSHAPEW 256
  37. #include <math.h>
  38. static int default_freq[N_PEAKS] =
  39. {200,500,1200,3000,3500,4000,6900,7800,9000};
  40. static int default_width[N_PEAKS] =
  41. {750,500,550,550,600,700,700,700,700};
  42. static int default_klt_bw[N_PEAKS] =
  43. {89,90,140,260,260,260,500,500,500};
  44. float polint(float xa[],float ya[],int n,float x)
  45. {//==============================================
  46. // General polinomial interpolation routine, xa[1...n] ya[1...n]
  47. int i,m,ns=1;
  48. float den,dif,dift,ho,hp,w;
  49. float y; // result
  50. float c[9],d[9];
  51. dif=fabs(x-xa[1]);
  52. for(i=1;i<=n;i++){
  53. if((dift=fabs(x-xa[i])) < dif) {
  54. ns=i;
  55. dif=dift;
  56. }
  57. c[i]=ya[i];
  58. d[i]=ya[i];
  59. }
  60. y=ya[ns--];
  61. for(m=1;m<n;m++) {
  62. for(i=1;i<=n-m;i++) {
  63. ho=xa[i]-x;
  64. hp=xa[i+m]-x;
  65. w=c[i+1]-d[i];
  66. if((den=ho-hp) == 0.0)
  67. {
  68. // fprintf(stderr,"Error in routine 'polint'");
  69. return(ya[2]); // two input xa are identical
  70. }
  71. den=w/den;
  72. d[i]=hp*den;
  73. c[i]=ho*den;
  74. }
  75. y += ((2*ns < (n-m) ? c[ns+1] : d[ns--]));
  76. }
  77. return(y);
  78. } // end of polint
  79. static void PeaksZero(peak_t *sp, peak_t *zero)
  80. {//=====================================
  81. int pk;
  82. memcpy(zero,sp,sizeof(peak_t)*N_PEAKS);
  83. for(pk=0; pk<N_PEAKS; pk++)
  84. zero[pk].pkheight = 0;
  85. } // end of PeaksZero
  86. SpectFrame::SpectFrame(SpectFrame *copy)
  87. {//=====================================
  88. int ix;
  89. keyframe = 0;
  90. spect = NULL;
  91. markers = 0;
  92. pitch = 0;
  93. nx = 0;
  94. time = 0;
  95. length = 0;
  96. amp_adjust = 100;
  97. length_adjust = 0;
  98. for(ix=0; ix<N_PEAKS; ix++)
  99. {
  100. formants[ix].freq = 0;
  101. peaks[ix].pkfreq = default_freq[ix];
  102. peaks[ix].pkheight = 0;
  103. peaks[ix].pkwidth = default_width[ix];
  104. peaks[ix].pkright = default_width[ix];
  105. peaks[ix].klt_bw = default_klt_bw[ix];
  106. peaks[ix].klt_ap = 0;
  107. peaks[ix].klt_bp = default_klt_bw[ix];
  108. }
  109. memset(klatt_param, 0, sizeof(klatt_param));
  110. klatt_param[KLATT_AV] = 59;
  111. klatt_param[KLATT_Kopen] = 40;
  112. if(copy != NULL)
  113. {
  114. *this = *copy;
  115. spect = new USHORT[nx];
  116. memcpy(spect,copy->spect,sizeof(USHORT)*nx);
  117. }
  118. }
  119. SpectFrame::~SpectFrame()
  120. {//=======================
  121. if(spect != NULL)
  122. delete spect;
  123. }
  124. int SpectFrame::Load(wxInputStream& stream, int file_format_type)
  125. {//==============================================================
  126. int ix;
  127. int x;
  128. unsigned short *spect_data;
  129. wxDataInputStream s(stream);
  130. time = s.ReadDouble();
  131. pitch = s.ReadDouble();
  132. length = s.ReadDouble();
  133. dx = s.ReadDouble();
  134. nx = s.Read16();
  135. markers = s.Read16();
  136. amp_adjust = s.Read16();
  137. if(file_format_type == 2)
  138. {
  139. ix = s.Read16(); // spare
  140. ix = s.Read16(); // spare
  141. }
  142. for(ix=0; ix<N_PEAKS; ix++)
  143. {
  144. formants[ix].freq = s.Read16();
  145. formants[ix].bandw = s.Read16();
  146. peaks[ix].pkfreq = s.Read16();
  147. if((peaks[ix].pkheight = s.Read16()) > 0)
  148. keyframe = 1;
  149. peaks[ix].pkwidth = s.Read16();
  150. peaks[ix].pkright = s.Read16();
  151. if(file_format_type == 2)
  152. {
  153. peaks[ix].klt_bw = s.Read16();
  154. peaks[ix].klt_ap = s.Read16();
  155. peaks[ix].klt_bp = s.Read16();
  156. }
  157. }
  158. if(file_format_type > 0)
  159. {
  160. for(ix=0; ix<N_KLATTP2; ix++)
  161. {
  162. klatt_param[ix] = s.Read16();
  163. }
  164. }
  165. spect_data = new USHORT[nx];
  166. if(spect_data == NULL)
  167. {
  168. wxLogError(_T("Failed to allocate memory"));
  169. return(1);
  170. }
  171. max_y = 0;
  172. for(ix=0; ix<nx; ix++)
  173. {
  174. x = spect_data[ix] = s.Read16();
  175. if(x > max_y) max_y = x;
  176. }
  177. spect = spect_data;
  178. return(0);
  179. } // End of SpectFrame::Load
  180. double SpectFrame::GetRms(int seq_amplitude)
  181. {//=========================================
  182. int h;
  183. float total=0;
  184. int maxh;
  185. int height;
  186. int htab[400];
  187. wavegen_peaks_t wpeaks[9];
  188. for(h=0; h<9; h++)
  189. {
  190. height = (peaks[h].pkheight * seq_amplitude * amp_adjust)/10000;
  191. wpeaks[h].height = height << 8;
  192. wpeaks[h].freq = peaks[h].pkfreq << 16;
  193. wpeaks[h].left = peaks[h].pkwidth << 16;
  194. wpeaks[h].right = peaks[h].pkright << 16;
  195. }
  196. maxh = PeaksToHarmspect(wpeaks,90<<16,htab,0);
  197. for(h=1; h<maxh; h++)
  198. {
  199. total += ((htab[h] * htab[h]) >> 10);
  200. }
  201. rms = sqrt(total) / 7.25;
  202. return(rms);
  203. }
  204. SpectSeq::SpectSeq(int n)
  205. {//======================
  206. numframes = n;
  207. if(n > 0)
  208. frames = new SpectFrame* [n];
  209. else
  210. frames = NULL;
  211. pk_select = 1;
  212. grid = 1;
  213. duration = 0;
  214. pitch1 = 0;
  215. pitch2 = 0;
  216. bass_reduction = 0;
  217. max_x = 3000;
  218. max_y = 1;
  219. file_format = 0;
  220. }
  221. SpectSeq::~SpectSeq()
  222. {//==================
  223. int ix;
  224. if(frames != NULL)
  225. {
  226. for(ix=0; ix<numframes; ix++)
  227. {
  228. if(frames[ix] != NULL)
  229. delete frames[ix];
  230. }
  231. delete frames;
  232. }
  233. }
  234. float SpectSeq::GetFrameLength(int frame, int plus, int *original)
  235. {//===============================================================
  236. int ix;
  237. float adjust=0;
  238. if(frame >= numframes-1) return(0);
  239. // include the adjustment for this frame ?
  240. if(plus) adjust = frames[frame]->length_adjust;
  241. for(ix=frame+1; ix<numframes-1; ix++)
  242. {
  243. if(frames[ix]->keyframe) break; // reached next keyframe
  244. adjust += frames[ix]->length_adjust;
  245. }
  246. if(original != NULL)
  247. *original = int((frames[ix]->time - frames[frame]->time) * 1000.0 + 0.5);
  248. return ((frames[ix]->time - frames[frame]->time) * 1000.0 + adjust);
  249. }
  250. int SpectSeq::Load(wxInputStream & stream)
  251. {//=======================================
  252. int n;
  253. int ix;
  254. unsigned int id1, id2;
  255. int set_max_y=0;
  256. float time_offset;
  257. wxDataInputStream s(stream);
  258. id1 = s.Read32();
  259. id2 = s.Read32();
  260. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEQ))
  261. {
  262. file_format = 0; // eSpeak formants
  263. }
  264. else
  265. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEK))
  266. {
  267. file_format = 1; // formants for Klatt synthesizer
  268. }
  269. else
  270. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSQ2))
  271. {
  272. file_format = 2; // formants for Klatt synthesizer
  273. }
  274. else
  275. {
  276. fprintf(stderr, "Unsupported spectral file format.\n");
  277. return(1);
  278. }
  279. name = s.ReadString();
  280. n = s.Read16();
  281. amplitude = s.Read16();
  282. max_y = s.Read16();
  283. s.Read16();
  284. if(n==0) return(0);
  285. if(frames != NULL) delete frames;
  286. frames = new SpectFrame* [n];
  287. numframes = 0;
  288. max_x = 3000;
  289. if(max_y == 0)
  290. {
  291. set_max_y = 1;
  292. max_y = 1;
  293. }
  294. for(ix = 0; ix < n; ix++)
  295. {
  296. SpectFrame *frame = new SpectFrame;
  297. if(frame->Load(stream, file_format) != 0)
  298. {
  299. delete frame;
  300. break;
  301. }
  302. frames[numframes++] = frame;
  303. if(set_max_y && (frame->max_y > max_y))
  304. max_y = frame->max_y;
  305. if(frame->nx * frame->dx > max_x) max_x = int(frame->nx * frame->dx);
  306. }
  307. max_x = 9000; // disable auto-xscaling
  308. frame_width = int((FRAME_WIDTH*max_x)/MAX_DISPLAY_FREQ);
  309. if(frame_width > FRAME_WIDTH) frame_width = FRAME_WIDTH;
  310. // start times from zero
  311. time_offset = frames[0]->time;
  312. for(ix=0; ix<numframes; ix++)
  313. frames[ix]->time -= time_offset;
  314. pitch1 = pitchenv.pitch1;
  315. pitch2 = pitchenv.pitch2;
  316. duration = int(frames[numframes-1]->time * 1000);
  317. if(max_y < 400)
  318. max_y = 200;
  319. else
  320. max_y = 29000; // disable auto height scaling
  321. for(ix=0; ix<numframes; ix++)
  322. {
  323. if(frames[ix]->keyframe)
  324. frames[ix]->length_adjust = frames[ix]->length - GetFrameLength(ix,0,NULL);
  325. }
  326. return(0);
  327. } // end of SpectSeq::Load