eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

spect.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2013-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <errno.h>
  21. #include <math.h>
  22. #include <stdint.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <endian.h>
  27. #include <espeak-ng/espeak_ng.h>
  28. #include <espeak-ng/speak_lib.h>
  29. #include "spect.h"
  30. #include "ieee80.h" // for ConvertFromIeeeExtended
  31. #include "wavegen.h" // for wavegen_peaks_t, PeaksToHarmspect
  32. #include "synthesize.h" // for KLATT_AV, KLATT_Kopen, N_KLATTP2
  33. #include "voice.h" // for N_PEAKS
  34. static int frame_width;
  35. static int default_freq[N_PEAKS] =
  36. { 200, 500, 1200, 3000, 3500, 4000, 6900, 7800, 9000 };
  37. static int default_width[N_PEAKS] =
  38. { 750, 500, 550, 550, 600, 700, 700, 700, 700 };
  39. static int default_klt_bw[N_PEAKS] =
  40. { 89, 90, 140, 260, 260, 260, 500, 500, 500 };
  41. static double read_double(FILE *stream)
  42. {
  43. unsigned char bytes[10];
  44. fread(bytes, sizeof(char), 10, stream);
  45. return ieee_extended_to_double(bytes);
  46. }
  47. float polint(float xa[], float ya[], int n, float x)
  48. {
  49. // General polinomial interpolation routine, xa[1...n] ya[1...n]
  50. int i, m, ns = 1;
  51. float den, dif, dift, ho, hp, w;
  52. float y; // result
  53. float c[9], d[9];
  54. dif = fabs(x-xa[0]);
  55. for (i = 1; i <= n; i++) {
  56. if ((dift = fabs(x-xa[i-1])) < dif) {
  57. ns = i;
  58. dif = dift;
  59. }
  60. c[i] = ya[i-1];
  61. d[i] = ya[i-1];
  62. }
  63. y = ya[--ns];
  64. for (m = 1; m < n; m++) {
  65. for (i = 1; i <= n-m; i++) {
  66. ho = xa[i-1]-x;
  67. hp = xa[i+m-1]-x;
  68. w = c[i+1]-d[i];
  69. if ((den = ho-hp) == 0.0)
  70. return ya[1]; // two input xa are identical
  71. den = w/den;
  72. d[i] = hp*den;
  73. c[i] = ho*den;
  74. }
  75. y += ((2*ns < (n-m) ? c[ns+1] : d[ns--]));
  76. }
  77. return y;
  78. }
  79. static SpectFrame *SpectFrameCreate()
  80. {
  81. int ix;
  82. SpectFrame *frame;
  83. frame = malloc(sizeof(SpectFrame));
  84. if (!frame)
  85. return NULL;
  86. frame->keyframe = 0;
  87. frame->spect = NULL;
  88. frame->markers = 0;
  89. frame->pitch = 0;
  90. frame->nx = 0;
  91. frame->time = 0;
  92. frame->length = 0;
  93. frame->amp_adjust = 100;
  94. frame->length_adjust = 0;
  95. for (ix = 0; ix < N_PEAKS; ix++) {
  96. frame->formants[ix].freq = 0;
  97. frame->peaks[ix].pkfreq = default_freq[ix];
  98. frame->peaks[ix].pkheight = 0;
  99. frame->peaks[ix].pkwidth = default_width[ix];
  100. frame->peaks[ix].pkright = default_width[ix];
  101. frame->peaks[ix].klt_bw = default_klt_bw[ix];
  102. frame->peaks[ix].klt_ap = 0;
  103. frame->peaks[ix].klt_bp = default_klt_bw[ix];
  104. }
  105. memset(frame->klatt_param, 0, sizeof(frame->klatt_param));
  106. frame->klatt_param[KLATT_AV] = 59;
  107. frame->klatt_param[KLATT_Kopen] = 40;
  108. return frame;
  109. }
  110. static void SpectFrameDestroy(SpectFrame *frame)
  111. {
  112. if (frame->spect != NULL)
  113. free(frame->spect);
  114. free(frame);
  115. }
  116. static espeak_ng_STATUS LoadFrame(SpectFrame *frame, FILE *stream, int file_format_type)
  117. {
  118. short ix;
  119. short x;
  120. unsigned short *spect_data;
  121. frame->time = read_double(stream);
  122. frame->pitch = read_double(stream);
  123. frame->length = read_double(stream);
  124. frame->dx = read_double(stream);
  125. fread(&frame->nx, sizeof(short), 1, stream);
  126. fread(&frame->markers, sizeof(short), 1, stream);
  127. fread(&frame->amp_adjust, sizeof(short), 1, stream);
  128. frame->nx = le16toh(frame->nx);
  129. frame->markers = le16toh(frame->markers);
  130. frame->amp_adjust = le16toh(frame->amp_adjust);
  131. if (file_format_type == 2) {
  132. fread(&ix, sizeof(short), 1, stream); // spare
  133. fread(&ix, sizeof(short), 1, stream); // spare
  134. }
  135. for (ix = 0; ix < N_PEAKS; ix++) {
  136. fread(&frame->formants[ix].freq, sizeof(short), 1, stream);
  137. fread(&frame->formants[ix].bandw, sizeof(short), 1, stream);
  138. fread(&frame->peaks[ix].pkfreq, sizeof(short), 1, stream);
  139. fread(&frame->peaks[ix].pkheight, sizeof(short), 1, stream);
  140. fread(&frame->peaks[ix].pkwidth, sizeof(short), 1, stream);
  141. fread(&frame->peaks[ix].pkright, sizeof(short), 1, stream);
  142. frame->formants[ix].freq = le16toh(frame->formants[ix].freq);
  143. frame->formants[ix].bandw = le16toh(frame->formants[ix].bandw);
  144. frame->peaks[ix].pkfreq = le16toh(frame->peaks[ix].pkfreq);
  145. frame->peaks[ix].pkheight = le16toh(frame->peaks[ix].pkheight);
  146. frame->peaks[ix].pkwidth = le16toh(frame->peaks[ix].pkwidth);
  147. frame->peaks[ix].pkright = le16toh(frame->peaks[ix].pkright);
  148. if (frame->peaks[ix].pkheight > 0)
  149. frame->keyframe = 1;
  150. if (file_format_type == 2) {
  151. fread(&frame->peaks[ix].klt_bw, sizeof(short), 1, stream);
  152. fread(&frame->peaks[ix].klt_ap, sizeof(short), 1, stream);
  153. fread(&frame->peaks[ix].klt_bp, sizeof(short), 1, stream);
  154. frame->peaks[ix].klt_bw = le16toh(frame->peaks[ix].klt_bw);
  155. frame->peaks[ix].klt_ap = le16toh(frame->peaks[ix].klt_ap);
  156. frame->peaks[ix].klt_bp = le16toh(frame->peaks[ix].klt_bp);
  157. }
  158. }
  159. if (file_format_type > 0) {
  160. for (ix = 0; ix < N_KLATTP2; ix++)
  161. {
  162. fread(frame->klatt_param + ix, sizeof(short), 1, stream);
  163. frame->klatt_param[ix] = le16toh(frame->klatt_param[ix]);
  164. }
  165. }
  166. spect_data = malloc(sizeof(unsigned short) * frame->nx);
  167. if (spect_data == NULL)
  168. return ENOMEM;
  169. frame->max_y = 0;
  170. for (ix = 0; ix < frame->nx; ix++) {
  171. fread(&x, sizeof(short), 1, stream);
  172. x = le16toh(x);
  173. spect_data[ix] = x;
  174. if (x > frame->max_y) frame->max_y = x;
  175. }
  176. frame->spect = spect_data;
  177. return ENS_OK;
  178. }
  179. double GetFrameRms(SpectFrame *frame, int seq_amplitude)
  180. {
  181. int h;
  182. float total = 0;
  183. int maxh;
  184. int height;
  185. int htab[400];
  186. wavegen_peaks_t wpeaks[9];
  187. for (h = 0; h < 9; h++) {
  188. height = (frame->peaks[h].pkheight * seq_amplitude * frame->amp_adjust)/10000;
  189. wpeaks[h].height = height << 8;
  190. wpeaks[h].freq = frame->peaks[h].pkfreq << 16;
  191. wpeaks[h].left = frame->peaks[h].pkwidth << 16;
  192. wpeaks[h].right = frame->peaks[h].pkright << 16;
  193. }
  194. maxh = PeaksToHarmspect(wpeaks, 90<<16, htab, 0);
  195. for (h = 1; h < maxh; h++)
  196. total += ((htab[h] * htab[h]) >> 10);
  197. frame->rms = sqrt(total) / 7.25;
  198. return frame->rms;
  199. }
  200. #pragma GCC visibility push(default)
  201. SpectSeq *SpectSeqCreate()
  202. {
  203. SpectSeq *spect = malloc(sizeof(SpectSeq));
  204. if (!spect)
  205. return NULL;
  206. spect->numframes = 0;
  207. spect->frames = NULL;
  208. spect->name = NULL;
  209. spect->grid = 1;
  210. spect->duration = 0;
  211. spect->pitch1 = 0;
  212. spect->pitch2 = 0;
  213. spect->bass_reduction = 0;
  214. spect->max_x = 3000;
  215. spect->max_y = 1;
  216. spect->file_format = 0;
  217. return spect;
  218. }
  219. void SpectSeqDestroy(SpectSeq *spect)
  220. {
  221. int ix;
  222. if (spect->frames != NULL) {
  223. for (ix = 0; ix < spect->numframes; ix++) {
  224. if (spect->frames[ix] != NULL)
  225. SpectFrameDestroy(spect->frames[ix]);
  226. }
  227. free(spect->frames);
  228. }
  229. free(spect->name);
  230. free(spect);
  231. }
  232. #pragma GCC visibility pop
  233. static float GetFrameLength(SpectSeq *spect, int frame)
  234. {
  235. int ix;
  236. float adjust = 0;
  237. if (frame >= spect->numframes-1) return 0;
  238. for (ix = frame+1; ix < spect->numframes-1; ix++) {
  239. if (spect->frames[ix]->keyframe)
  240. break; // reached next keyframe
  241. adjust += spect->frames[ix]->length_adjust;
  242. }
  243. return (spect->frames[ix]->time - spect->frames[frame]->time) * 1000.0 + adjust;
  244. }
  245. #pragma GCC visibility push(default)
  246. espeak_ng_STATUS LoadSpectSeq(SpectSeq *spect, const char *filename)
  247. {
  248. short n, temp;
  249. int ix;
  250. uint32_t id1, id2, name_len;
  251. int set_max_y = 0;
  252. float time_offset;
  253. FILE *stream = fopen(filename, "rb");
  254. if (stream == NULL) {
  255. fprintf(stderr, "Failed to open: '%s'", filename);
  256. return errno;
  257. }
  258. fread(&id1, sizeof(uint32_t), 1, stream);
  259. id1 = le32toh(id1);
  260. fread(&id2, sizeof(uint32_t), 1, stream);
  261. id2 = le32toh(id2);
  262. if ((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEQ))
  263. spect->file_format = 0; // eSpeak formants
  264. else if ((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSEK))
  265. spect->file_format = 1; // formants for Klatt synthesizer
  266. else if ((id1 == FILEID1_SPECTSEQ) && (id2 == FILEID2_SPECTSQ2))
  267. spect->file_format = 2; // formants for Klatt synthesizer
  268. else {
  269. fprintf(stderr, "Unsupported spectral file format.\n");
  270. fclose(stream);
  271. return ENS_UNSUPPORTED_PHON_FORMAT;
  272. }
  273. fread(&name_len, sizeof(uint32_t), 1, stream);
  274. name_len = le32toh(name_len);
  275. if (name_len > 0) {
  276. if ((spect->name = (char *)malloc(name_len)) == NULL) {
  277. fclose(stream);
  278. return ENOMEM;
  279. }
  280. fread(spect->name, sizeof(char), name_len, stream);
  281. } else
  282. spect->name = NULL;
  283. fread(&n, sizeof(short), 1, stream);
  284. fread(&spect->amplitude, sizeof(short), 1, stream);
  285. fread(&spect->max_y, sizeof(short), 1, stream);
  286. fread(&temp, sizeof(short), 1, stream); // unused
  287. n = le16toh(n);
  288. spect->amplitude = le16toh(spect->amplitude);
  289. spect->max_y = le16toh(spect->max_y);
  290. temp = le16toh(temp);
  291. if (n == 0) {
  292. fclose(stream);
  293. return ENS_NO_SPECT_FRAMES;
  294. }
  295. if (spect->frames != NULL) {
  296. for (ix = 0; ix < spect->numframes; ix++) {
  297. if (spect->frames[ix] != NULL)
  298. SpectFrameDestroy(spect->frames[ix]);
  299. }
  300. free(spect->frames);
  301. }
  302. spect->frames = calloc(n, sizeof(SpectFrame *));
  303. spect->numframes = 0;
  304. spect->max_x = 3000;
  305. if (spect->max_y == 0) {
  306. set_max_y = 1;
  307. spect->max_y = 1;
  308. }
  309. for (ix = 0; ix < n; ix++) {
  310. SpectFrame *frame = SpectFrameCreate();
  311. if (!frame) {
  312. fclose(stream);
  313. return ENOMEM;
  314. }
  315. espeak_ng_STATUS status = LoadFrame(frame, stream, spect->file_format);
  316. if (status != ENS_OK) {
  317. free(frame);
  318. fclose(stream);
  319. return status;
  320. }
  321. spect->frames[spect->numframes++] = frame;
  322. if (set_max_y && (frame->max_y > spect->max_y))
  323. spect->max_y = frame->max_y;
  324. if (frame->nx * frame->dx > spect->max_x) spect->max_x = (int)(frame->nx * frame->dx);
  325. }
  326. spect->max_x = 9000; // disable auto-xscaling
  327. frame_width = (int)((FRAME_WIDTH*spect->max_x)/MAX_DISPLAY_FREQ);
  328. if (frame_width > FRAME_WIDTH) frame_width = FRAME_WIDTH;
  329. // start times from zero
  330. time_offset = spect->frames[0]->time;
  331. for (ix = 0; ix < spect->numframes; ix++)
  332. spect->frames[ix]->time -= time_offset;
  333. spect->pitch1 = spect->pitchenv.pitch1;
  334. spect->pitch2 = spect->pitchenv.pitch2;
  335. spect->duration = (int)(spect->frames[spect->numframes-1]->time * 1000);
  336. if (spect->max_y < 400)
  337. spect->max_y = 200;
  338. else
  339. spect->max_y = 29000; // disable auto height scaling
  340. for (ix = 0; ix < spect->numframes; ix++) {
  341. if (spect->frames[ix]->keyframe)
  342. spect->frames[ix]->length_adjust = spect->frames[ix]->length - GetFrameLength(spect, ix);
  343. }
  344. fclose(stream);
  345. return ENS_OK;
  346. }
  347. #pragma GCC visibility pop