eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.c 9.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. /*
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2013-2015 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #if HAVE_STDINT_H
  21. #include <stdint.h>
  22. #endif
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "voice.h"
  28. #include "spect.h"
  29. #include <math.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. extern double ConvertFromIeeeExtended(unsigned char *bytes);
  33. extern int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control);
  34. extern unsigned char pk_shape1[];
  35. extern int pk_select;
  36. extern char voice_name[];
  37. static int frame_width;
  38. int pk_select;
  39. #define DRAWPEAKWIDTH 2000
  40. #define PEAKSHAPEW 256
  41. static int default_freq[N_PEAKS] =
  42. {200,500,1200,3000,3500,4000,6900,7800,9000};
  43. static int default_width[N_PEAKS] =
  44. {750,500,550,550,600,700,700,700,700};
  45. static int default_klt_bw[N_PEAKS] =
  46. {89,90,140,260,260,260,500,500,500};
  47. static double read_double(FILE *stream)
  48. {
  49. unsigned char bytes[10];
  50. fread(bytes,sizeof(char),10,stream);
  51. return ConvertFromIeeeExtended(bytes);
  52. }
  53. float polint(float xa[],float ya[],int n,float x)
  54. {
  55. // General polinomial interpolation routine, xa[1...n] ya[1...n]
  56. int i,m,ns=1;
  57. float den,dif,dift,ho,hp,w;
  58. float y; // result
  59. float c[9],d[9];
  60. dif=fabs(x-xa[1]);
  61. for(i=1; i<=n; i++) {
  62. if((dift=fabs(x-xa[i])) < dif) {
  63. ns=i;
  64. dif=dift;
  65. }
  66. c[i]=ya[i];
  67. d[i]=ya[i];
  68. }
  69. y=ya[ns--];
  70. for(m=1; m<n; m++) {
  71. for(i=1; i<=n-m; i++) {
  72. ho=xa[i]-x;
  73. hp=xa[i+m]-x;
  74. w=c[i+1]-d[i];
  75. if((den=ho-hp) == 0.0)
  76. {
  77. return(ya[2]); // two input xa are identical
  78. }
  79. den=w/den;
  80. d[i]=hp*den;
  81. c[i]=ho*den;
  82. }
  83. y += ((2*ns < (n-m) ? c[ns+1] : d[ns--]));
  84. }
  85. return(y);
  86. }
  87. static SpectFrame *SpectFrameCreate()
  88. {
  89. int ix;
  90. SpectFrame *frame;
  91. frame = malloc(sizeof(SpectFrame));
  92. frame->keyframe = 0;
  93. frame->spect = NULL;
  94. frame->markers = 0;
  95. frame->pitch = 0;
  96. frame->nx = 0;
  97. frame->time = 0;
  98. frame->length = 0;
  99. frame->amp_adjust = 100;
  100. frame->length_adjust = 0;
  101. for(ix=0; ix<N_PEAKS; ix++)
  102. {
  103. frame->formants[ix].freq = 0;
  104. frame->peaks[ix].pkfreq = default_freq[ix];
  105. frame->peaks[ix].pkheight = 0;
  106. frame->peaks[ix].pkwidth = default_width[ix];
  107. frame->peaks[ix].pkright = default_width[ix];
  108. frame->peaks[ix].klt_bw = default_klt_bw[ix];
  109. frame->peaks[ix].klt_ap = 0;
  110. frame->peaks[ix].klt_bp = default_klt_bw[ix];
  111. }
  112. memset(frame->klatt_param, 0, sizeof(frame->klatt_param));
  113. frame->klatt_param[KLATT_AV] = 59;
  114. frame->klatt_param[KLATT_Kopen] = 40;
  115. return frame;
  116. }
  117. static void SpectFrameDestroy(SpectFrame *frame)
  118. {
  119. if(frame->spect != NULL)
  120. free(frame->spect);
  121. free(frame);
  122. }
  123. int LoadFrame(SpectFrame *frame, FILE *stream, int file_format_type)
  124. {
  125. short ix;
  126. short x;
  127. unsigned short *spect_data;
  128. frame->time = read_double(stream);
  129. frame->pitch = read_double(stream);
  130. frame->length = read_double(stream);
  131. frame->dx = read_double(stream);
  132. fread(&frame->nx,sizeof(short),1,stream);
  133. fread(&frame->markers,sizeof(short),1,stream);
  134. fread(&frame->amp_adjust,sizeof(short),1,stream);
  135. if(file_format_type == 2)
  136. {
  137. fread(&ix,sizeof(short),1,stream); // spare
  138. fread(&ix,sizeof(short),1,stream); // spare
  139. }
  140. for(ix=0; ix<N_PEAKS; ix++)
  141. {
  142. fread(&frame->formants[ix].freq,sizeof(short),1,stream);
  143. fread(&frame->formants[ix].bandw,sizeof(short),1,stream);
  144. fread(&frame->peaks[ix].pkfreq,sizeof(short),1,stream);
  145. fread(&frame->peaks[ix].pkheight,sizeof(short),1,stream);
  146. fread(&frame->peaks[ix].pkwidth,sizeof(short),1,stream);
  147. fread(&frame->peaks[ix].pkright,sizeof(short),1,stream);
  148. if(frame->peaks[ix].pkheight > 0)
  149. frame->keyframe = 1;
  150. if(file_format_type == 2)
  151. {
  152. fread(&frame->peaks[ix].klt_bw,sizeof(short),1,stream);
  153. fread(&frame->peaks[ix].klt_ap,sizeof(short),1,stream);
  154. fread(&frame->peaks[ix].klt_bp,sizeof(short),1,stream);
  155. }
  156. }
  157. if(file_format_type > 0)
  158. {
  159. for(ix=0; ix<N_KLATTP2; ix++)
  160. {
  161. fread(frame->klatt_param + ix,sizeof(short),1,stream);
  162. }
  163. }
  164. spect_data = malloc(sizeof(USHORT) * frame->nx);
  165. if(spect_data == NULL)
  166. {
  167. fprintf(stderr,"Failed to allocate memory\n");
  168. return(1);
  169. }
  170. frame->max_y = 0;
  171. for(ix=0; ix<frame->nx; ix++)
  172. {
  173. fread(&x,sizeof(short),1,stream);
  174. spect_data[ix] = x;
  175. if(x > frame->max_y) frame->max_y = x;
  176. }
  177. frame->spect = spect_data;
  178. return(0);
  179. }
  180. double GetFrameRms(SpectFrame *frame, int seq_amplitude)
  181. {
  182. int h;
  183. float total=0;
  184. int maxh;
  185. int height;
  186. int htab[400];
  187. wavegen_peaks_t wpeaks[9];
  188. for(h=0; h<9; h++)
  189. {
  190. height = (frame->peaks[h].pkheight * seq_amplitude * frame->amp_adjust)/10000;
  191. wpeaks[h].height = height << 8;
  192. wpeaks[h].freq = frame->peaks[h].pkfreq << 16;
  193. wpeaks[h].left = frame->peaks[h].pkwidth << 16;
  194. wpeaks[h].right = frame->peaks[h].pkright << 16;
  195. }
  196. maxh = PeaksToHarmspect(wpeaks,90<<16,htab,0);
  197. for(h=1; h<maxh; h++)
  198. {
  199. total += ((htab[h] * htab[h]) >> 10);
  200. }
  201. frame->rms = sqrt(total) / 7.25;
  202. return(frame->rms);
  203. }
  204. SpectSeq *SpectSeqCreate()
  205. {
  206. SpectSeq *spect = malloc(sizeof(SpectSeq));
  207. spect->numframes = 0;
  208. spect->frames = NULL;
  209. spect->name = NULL;
  210. pk_select = 1;
  211. spect->grid = 1;
  212. spect->duration = 0;
  213. spect->pitch1 = 0;
  214. spect->pitch2 = 0;
  215. spect->bass_reduction = 0;
  216. spect->max_x = 3000;
  217. spect->max_y = 1;
  218. spect->file_format = 0;
  219. return spect;
  220. }
  221. void SpectSeqDestroy(SpectSeq *spect)
  222. {
  223. int ix;
  224. if(spect->frames != NULL)
  225. {
  226. for(ix=0; ix<spect->numframes; ix++)
  227. {
  228. if(spect->frames[ix] != NULL)
  229. SpectFrameDestroy(spect->frames[ix]);
  230. }
  231. free(spect->frames);
  232. }
  233. free(spect->name);
  234. free(spect);
  235. }
  236. static float GetFrameLength(SpectSeq *spect, int frame)
  237. {
  238. int ix;
  239. float adjust=0;
  240. if(frame >= spect->numframes-1) return(0);
  241. for(ix=frame+1; ix<spect->numframes-1; ix++)
  242. {
  243. if(spect->frames[ix]->keyframe) break; // reached next keyframe
  244. adjust += spect->frames[ix]->length_adjust;
  245. }
  246. return ((spect->frames[ix]->time - spect->frames[frame]->time) * 1000.0 + adjust);
  247. }
  248. int LoadSpectSeq(SpectSeq *spect, const char *filename)
  249. {
  250. short n, temp;
  251. int ix;
  252. uint32_t id1, id2, name_len;
  253. int set_max_y=0;
  254. float time_offset;
  255. FILE *stream = fopen(filename, "rb");
  256. if(stream == NULL)
  257. {
  258. fprintf(stderr, "Failed to open: '%s'", filename);
  259. return(0);
  260. }
  261. fread(&id1,sizeof(uint32_t),1,stream);
  262. fread(&id2,sizeof(uint32_t),1,stream);
  263. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEQ))
  264. {
  265. spect->file_format = 0; // eSpeak formants
  266. }
  267. else
  268. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEK))
  269. {
  270. spect->file_format = 1; // formants for Klatt synthesizer
  271. }
  272. else
  273. if((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSQ2))
  274. {
  275. spect->file_format = 2; // formants for Klatt synthesizer
  276. }
  277. else
  278. {
  279. fprintf(stderr, "Unsupported spectral file format.\n");
  280. fclose(stream);
  281. return(1);
  282. }
  283. fread(&name_len,sizeof(uint32_t),1,stream);
  284. if (name_len > 0)
  285. {
  286. spect->name = (char *)malloc(name_len);
  287. fread(spect->name,sizeof(char),name_len,stream);
  288. }
  289. else
  290. spect->name = NULL;
  291. fread(&n,sizeof(short),1,stream);
  292. fread(&spect->amplitude,sizeof(short),1,stream);
  293. fread(&spect->max_y,sizeof(short),1,stream);
  294. fread(&temp,sizeof(short),1,stream); // unused
  295. if(n==0)
  296. {
  297. fclose(stream);
  298. return(0);
  299. }
  300. if(spect->frames != NULL)
  301. {
  302. for(ix=0; ix<spect->numframes; ix++)
  303. {
  304. if(spect->frames[ix] != NULL)
  305. SpectFrameDestroy(spect->frames[ix]);
  306. }
  307. free(spect->frames);
  308. }
  309. spect->frames = malloc(sizeof(SpectFrame) * n);
  310. spect->numframes = 0;
  311. spect->max_x = 3000;
  312. if(spect->max_y == 0)
  313. {
  314. set_max_y = 1;
  315. spect->max_y = 1;
  316. }
  317. for(ix = 0; ix < n; ix++)
  318. {
  319. SpectFrame *frame = SpectFrameCreate();
  320. if(LoadFrame(frame, stream, spect->file_format) != 0)
  321. {
  322. free(frame);
  323. break;
  324. }
  325. spect->frames[spect->numframes++] = frame;
  326. if(set_max_y && (frame->max_y > spect->max_y))
  327. spect->max_y = frame->max_y;
  328. if(frame->nx * frame->dx > spect->max_x) spect->max_x = (int)(frame->nx * frame->dx);
  329. }
  330. spect->max_x = 9000; // disable auto-xscaling
  331. frame_width = (int)((FRAME_WIDTH*spect->max_x)/MAX_DISPLAY_FREQ);
  332. if(frame_width > FRAME_WIDTH) frame_width = FRAME_WIDTH;
  333. // start times from zero
  334. time_offset = spect->frames[0]->time;
  335. for(ix=0; ix<spect->numframes; ix++)
  336. spect->frames[ix]->time -= time_offset;
  337. spect->pitch1 = spect->pitchenv.pitch1;
  338. spect->pitch2 = spect->pitchenv.pitch2;
  339. spect->duration = (int)(spect->frames[spect->numframes-1]->time * 1000);
  340. if(spect->max_y < 400)
  341. spect->max_y = 200;
  342. else
  343. spect->max_y = 29000; // disable auto height scaling
  344. for(ix=0; ix<spect->numframes; ix++)
  345. {
  346. if(spect->frames[ix]->keyframe)
  347. spect->frames[ix]->length_adjust = spect->frames[ix]->length - GetFrameLength(spect,ix);
  348. }
  349. fclose(stream);
  350. return(0);
  351. }