eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

wavegen.c 40KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486
  1. /*
  2. * Copyright (C) 2005 to 2013 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. // this version keeps wavemult window as a constant fraction
  20. // of the cycle length - but that spreads out the HF peaks too much
  21. #include "config.h"
  22. #include <math.h>
  23. #include <stdbool.h>
  24. #include <stdint.h>
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <espeak-ng/espeak_ng.h>
  29. #include <espeak-ng/speak_lib.h>
  30. #include "wavegen.h"
  31. #include "common.h" // for espeak_rand
  32. #include "synthesize.h" // for WGEN_DATA, RESONATOR, frame_t
  33. #include "mbrola.h" // for MbrolaFill, MbrolaReset, mbrola...
  34. #if USE_KLATT
  35. #include "klatt.h"
  36. #endif
  37. #if USE_LIBSONIC
  38. #include "sonic.h"
  39. #endif
  40. #include "sintab.h"
  41. #include "speech.h"
  42. static void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v);
  43. static voice_t *wvoice = NULL;
  44. static int option_harmonic1 = 10;
  45. static int flutter_amp = 64;
  46. static int general_amplitude = 60;
  47. static int consonant_amp = 26;
  48. int embedded_value[N_EMBEDDED_VALUES];
  49. static int PHASE_INC_FACTOR;
  50. int samplerate = 0; // this is set by Wavegeninit()
  51. static wavegen_peaks_t peaks[N_PEAKS];
  52. static int peak_harmonic[N_PEAKS];
  53. static int peak_height[N_PEAKS];
  54. int echo_head;
  55. int echo_tail;
  56. int echo_amp = 0;
  57. short echo_buf[N_ECHO_BUF];
  58. static int echo_length = 0; // period (in sample\) to ensure completion of echo at the end of speech, set in WavegenSetEcho()
  59. static int voicing;
  60. static RESONATOR rbreath[N_PEAKS];
  61. #define N_LOWHARM 30
  62. #define MAX_HARMONIC 400 // 400 * 50Hz = 20 kHz, more than enough
  63. static int harm_inc[N_LOWHARM]; // only for these harmonics do we interpolate amplitude between steps
  64. static int *harmspect;
  65. static int hswitch = 0;
  66. static int hspect[2][MAX_HARMONIC]; // 2 copies, we interpolate between then
  67. static int nsamples = 0; // number to do
  68. static int modulation_type = 0;
  69. static int glottal_flag = 0;
  70. static int glottal_reduce = 0;
  71. static WGEN_DATA wdata;
  72. static int amp_ix;
  73. static int amp_inc;
  74. static unsigned char *amplitude_env = NULL;
  75. static int samplecount = 0; // number done
  76. static int samplecount_start = 0; // count at start of this segment
  77. static int end_wave = 0; // continue to end of wave cycle
  78. static int wavephase;
  79. static int phaseinc;
  80. static int cycle_samples; // number of samples in a cycle at current pitch
  81. static int cbytes;
  82. static int hf_factor;
  83. static double minus_pi_t;
  84. static double two_pi_t;
  85. unsigned char *out_ptr;
  86. unsigned char *out_end;
  87. espeak_ng_OUTPUT_HOOKS* output_hooks = NULL;
  88. static int const_f0 = 0;
  89. // the queue of operations passed to wavegen from sythesize
  90. intptr_t wcmdq[N_WCMDQ][4];
  91. int wcmdq_head = 0;
  92. int wcmdq_tail = 0;
  93. // pitch,speed,
  94. const int embedded_default[N_EMBEDDED_VALUES] = { 0, 50, espeakRATE_NORMAL, 100, 50, 0, 0, 0, espeakRATE_NORMAL, 0, 0, 0, 0, 0, 0 };
  95. static const int embedded_max[N_EMBEDDED_VALUES] = { 0, 0x7fff, 2000, 300, 99, 99, 99, 0, 2000, 0, 0, 0, 0, 4, 0 };
  96. #if USE_LIBSONIC
  97. static sonicStream sonicSpeedupStream = NULL;
  98. static double sonicSpeed = 1.0;
  99. #endif
  100. // 1st index=roughness
  101. // 2nd index=modulation_type
  102. // value: bits 0-3 amplitude (16ths), bits 4-7 every n cycles
  103. #define N_ROUGHNESS 8
  104. static const unsigned char modulation_tab[N_ROUGHNESS][8] = {
  105. { 0, 0x00, 0x00, 0x00, 0, 0x46, 0xf2, 0x29 },
  106. { 0, 0x2f, 0x00, 0x2f, 0, 0x45, 0xf2, 0x29 },
  107. { 0, 0x2f, 0x00, 0x2e, 0, 0x45, 0xf2, 0x28 },
  108. { 0, 0x2e, 0x00, 0x2d, 0, 0x34, 0xf2, 0x28 },
  109. { 0, 0x2d, 0x2d, 0x2c, 0, 0x34, 0xf2, 0x28 },
  110. { 0, 0x2b, 0x2b, 0x2b, 0, 0x34, 0xf2, 0x28 },
  111. { 0, 0x2a, 0x2a, 0x2a, 0, 0x34, 0xf2, 0x28 },
  112. { 0, 0x29, 0x29, 0x29, 0, 0x34, 0xf2, 0x28 },
  113. };
  114. // Flutter table, to add natural variations to the pitch
  115. #define N_FLUTTER 0x170
  116. static int Flutter_inc;
  117. static const unsigned char Flutter_tab[N_FLUTTER] = {
  118. 0x80, 0x9b, 0xb5, 0xcb, 0xdc, 0xe8, 0xed, 0xec,
  119. 0xe6, 0xdc, 0xce, 0xbf, 0xb0, 0xa3, 0x98, 0x90,
  120. 0x8c, 0x8b, 0x8c, 0x8f, 0x92, 0x94, 0x95, 0x92,
  121. 0x8c, 0x83, 0x78, 0x69, 0x59, 0x49, 0x3c, 0x31,
  122. 0x2a, 0x29, 0x2d, 0x36, 0x44, 0x56, 0x69, 0x7d,
  123. 0x8f, 0x9f, 0xaa, 0xb1, 0xb2, 0xad, 0xa4, 0x96,
  124. 0x87, 0x78, 0x69, 0x5c, 0x53, 0x4f, 0x4f, 0x55,
  125. 0x5e, 0x6b, 0x7a, 0x88, 0x96, 0xa2, 0xab, 0xb0,
  126. 0xb1, 0xae, 0xa8, 0xa0, 0x98, 0x91, 0x8b, 0x88,
  127. 0x89, 0x8d, 0x94, 0x9d, 0xa8, 0xb2, 0xbb, 0xc0,
  128. 0xc1, 0xbd, 0xb4, 0xa5, 0x92, 0x7c, 0x63, 0x4a,
  129. 0x32, 0x1e, 0x0e, 0x05, 0x02, 0x05, 0x0f, 0x1e,
  130. 0x30, 0x44, 0x59, 0x6d, 0x7f, 0x8c, 0x96, 0x9c,
  131. 0x9f, 0x9f, 0x9d, 0x9b, 0x99, 0x99, 0x9c, 0xa1,
  132. 0xa9, 0xb3, 0xbf, 0xca, 0xd5, 0xdc, 0xe0, 0xde,
  133. 0xd8, 0xcc, 0xbb, 0xa6, 0x8f, 0x77, 0x60, 0x4b,
  134. 0x3a, 0x2e, 0x28, 0x29, 0x2f, 0x3a, 0x48, 0x59,
  135. 0x6a, 0x7a, 0x86, 0x90, 0x94, 0x95, 0x91, 0x89,
  136. 0x80, 0x75, 0x6b, 0x62, 0x5c, 0x5a, 0x5c, 0x61,
  137. 0x69, 0x74, 0x80, 0x8a, 0x94, 0x9a, 0x9e, 0x9d,
  138. 0x98, 0x90, 0x86, 0x7c, 0x71, 0x68, 0x62, 0x60,
  139. 0x63, 0x6b, 0x78, 0x88, 0x9b, 0xaf, 0xc2, 0xd2,
  140. 0xdf, 0xe6, 0xe7, 0xe2, 0xd7, 0xc6, 0xb2, 0x9c,
  141. 0x84, 0x6f, 0x5b, 0x4b, 0x40, 0x39, 0x37, 0x38,
  142. 0x3d, 0x43, 0x4a, 0x50, 0x54, 0x56, 0x55, 0x52,
  143. 0x4d, 0x48, 0x42, 0x3f, 0x3e, 0x41, 0x49, 0x56,
  144. 0x67, 0x7c, 0x93, 0xab, 0xc3, 0xd9, 0xea, 0xf6,
  145. 0xfc, 0xfb, 0xf4, 0xe7, 0xd5, 0xc0, 0xaa, 0x94,
  146. 0x80, 0x71, 0x64, 0x5d, 0x5a, 0x5c, 0x61, 0x68,
  147. 0x70, 0x77, 0x7d, 0x7f, 0x7f, 0x7b, 0x74, 0x6b,
  148. 0x61, 0x57, 0x4e, 0x48, 0x46, 0x48, 0x4e, 0x59,
  149. 0x66, 0x75, 0x84, 0x93, 0x9f, 0xa7, 0xab, 0xaa,
  150. 0xa4, 0x99, 0x8b, 0x7b, 0x6a, 0x5b, 0x4e, 0x46,
  151. 0x43, 0x45, 0x4d, 0x5a, 0x6b, 0x7f, 0x92, 0xa6,
  152. 0xb8, 0xc5, 0xcf, 0xd3, 0xd2, 0xcd, 0xc4, 0xb9,
  153. 0xad, 0xa1, 0x96, 0x8e, 0x89, 0x87, 0x87, 0x8a,
  154. 0x8d, 0x91, 0x92, 0x91, 0x8c, 0x84, 0x78, 0x68,
  155. 0x55, 0x41, 0x2e, 0x1c, 0x0e, 0x05, 0x01, 0x05,
  156. 0x0f, 0x1f, 0x34, 0x4d, 0x68, 0x81, 0x9a, 0xb0,
  157. 0xc1, 0xcd, 0xd3, 0xd3, 0xd0, 0xc8, 0xbf, 0xb5,
  158. 0xab, 0xa4, 0x9f, 0x9c, 0x9d, 0xa0, 0xa5, 0xaa,
  159. 0xae, 0xb1, 0xb0, 0xab, 0xa3, 0x96, 0x87, 0x76,
  160. 0x63, 0x51, 0x42, 0x36, 0x2f, 0x2d, 0x31, 0x3a,
  161. 0x48, 0x59, 0x6b, 0x7e, 0x8e, 0x9c, 0xa6, 0xaa,
  162. 0xa9, 0xa3, 0x98, 0x8a, 0x7b, 0x6c, 0x5d, 0x52,
  163. 0x4a, 0x48, 0x4a, 0x50, 0x5a, 0x67, 0x75, 0x82
  164. };
  165. // waveform shape table for HF peaks, formants 6,7,8
  166. #define N_WAVEMULT 128
  167. static int wavemult_offset = 0;
  168. static int wavemult_max = 0;
  169. // the presets are for 22050 Hz sample rate.
  170. // A different rate will need to recalculate the presets in WavegenInit()
  171. static unsigned char wavemult[N_WAVEMULT] = {
  172. 0, 0, 0, 2, 3, 5, 8, 11, 14, 18, 22, 27, 32, 37, 43, 49,
  173. 55, 62, 69, 76, 83, 90, 98, 105, 113, 121, 128, 136, 144, 152, 159, 166,
  174. 174, 181, 188, 194, 201, 207, 213, 218, 224, 228, 233, 237, 240, 244, 246, 249,
  175. 251, 252, 253, 253, 253, 253, 252, 251, 249, 246, 244, 240, 237, 233, 228, 224,
  176. 218, 213, 207, 201, 194, 188, 181, 174, 166, 159, 152, 144, 136, 128, 121, 113,
  177. 105, 98, 90, 83, 76, 69, 62, 55, 49, 43, 37, 32, 27, 22, 18, 14,
  178. 11, 8, 5, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  179. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  180. };
  181. // set from y = pow(2,x) * 128, x=-1 to 1
  182. #define MAX_PITCH_VALUE 101
  183. static const unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1] = {
  184. 64, 65, 66, 67, 68, 69, 70, 71,
  185. 72, 73, 74, 75, 76, 77, 78, 79,
  186. 80, 81, 82, 83, 84, 86, 87, 88,
  187. 89, 91, 92, 93, 94, 96, 97, 98,
  188. 100, 101, 103, 104, 105, 107, 108, 110,
  189. 111, 113, 115, 116, 118, 119, 121, 123,
  190. 124, 126, 128, 130, 132, 133, 135, 137,
  191. 139, 141, 143, 145, 147, 149, 151, 153,
  192. 155, 158, 160, 162, 164, 167, 169, 171,
  193. 174, 176, 179, 181, 184, 186, 189, 191,
  194. 194, 197, 199, 202, 205, 208, 211, 214,
  195. 217, 220, 223, 226, 229, 232, 236, 239,
  196. 242, 246, 249, 252, 254, 255
  197. };
  198. void WcmdqStop(void)
  199. {
  200. wcmdq_head = 0;
  201. wcmdq_tail = 0;
  202. #if USE_LIBSONIC
  203. if (sonicSpeedupStream != NULL) {
  204. sonicDestroyStream(sonicSpeedupStream);
  205. sonicSpeedupStream = NULL;
  206. }
  207. #endif
  208. #if USE_MBROLA
  209. if (mbrola_name[0] != 0)
  210. MbrolaReset();
  211. #endif
  212. }
  213. int WcmdqFree(void)
  214. {
  215. int i;
  216. i = wcmdq_head - wcmdq_tail;
  217. if (i <= 0) i += N_WCMDQ;
  218. return i;
  219. }
  220. int WcmdqUsed(void)
  221. {
  222. return N_WCMDQ - WcmdqFree();
  223. }
  224. void WcmdqInc(void)
  225. {
  226. wcmdq_tail++;
  227. if (wcmdq_tail >= N_WCMDQ) wcmdq_tail = 0;
  228. }
  229. static void WcmdqIncHead(void)
  230. {
  231. MAKE_MEM_UNDEFINED(&wcmdq[wcmdq_head], sizeof(wcmdq[wcmdq_head]));
  232. wcmdq_head++;
  233. if (wcmdq_head >= N_WCMDQ) wcmdq_head = 0;
  234. }
  235. #define PEAKSHAPEW 256
  236. static const unsigned char pk_shape1[PEAKSHAPEW+1] = {
  237. 255, 254, 254, 254, 254, 254, 253, 253, 252, 251, 251, 250, 249, 248, 247, 246,
  238. 245, 244, 242, 241, 239, 238, 236, 234, 233, 231, 229, 227, 225, 223, 220, 218,
  239. 216, 213, 211, 209, 207, 205, 203, 201, 199, 197, 195, 193, 191, 189, 187, 185,
  240. 183, 180, 178, 176, 173, 171, 169, 166, 164, 161, 159, 156, 154, 151, 148, 146,
  241. 143, 140, 138, 135, 132, 129, 126, 123, 120, 118, 115, 112, 108, 105, 102, 99,
  242. 96, 95, 93, 91, 90, 88, 86, 85, 83, 82, 80, 79, 77, 76, 74, 73,
  243. 72, 70, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
  244. 55, 54, 53, 52, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 46,
  245. 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 43, 43, 43, 43, 44, 43,
  246. 42, 42, 41, 40, 40, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33,
  247. 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 26, 26, 25, 25, 24, 24,
  248. 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16,
  249. 16, 15, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10,
  250. 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5,
  251. 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2,
  252. 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  253. 0
  254. };
  255. static const unsigned char pk_shape2[PEAKSHAPEW+1] = {
  256. 255, 254, 254, 254, 254, 254, 254, 254, 254, 254, 253, 253, 253, 253, 252, 252,
  257. 252, 251, 251, 251, 250, 250, 249, 249, 248, 248, 247, 247, 246, 245, 245, 244,
  258. 243, 243, 242, 241, 239, 237, 235, 233, 231, 229, 227, 225, 223, 221, 218, 216,
  259. 213, 211, 208, 205, 203, 200, 197, 194, 191, 187, 184, 181, 178, 174, 171, 167,
  260. 163, 160, 156, 152, 148, 144, 140, 136, 132, 127, 123, 119, 114, 110, 105, 100,
  261. 96, 94, 91, 88, 86, 83, 81, 78, 76, 74, 71, 69, 66, 64, 62, 60,
  262. 57, 55, 53, 51, 49, 47, 44, 42, 40, 38, 36, 34, 32, 30, 29, 27,
  263. 25, 23, 21, 19, 18, 16, 14, 12, 11, 9, 7, 6, 4, 3, 1, 0,
  264. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  265. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  266. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  267. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  268. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  269. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  270. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  271. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  272. 0
  273. };
  274. static const unsigned char *pk_shape;
  275. void WavegenInit(int rate, int wavemult_fact)
  276. {
  277. int ix;
  278. double x;
  279. if (wavemult_fact == 0)
  280. wavemult_fact = 60; // default
  281. wvoice = NULL;
  282. samplerate = rate;
  283. PHASE_INC_FACTOR = 0x8000000 / samplerate; // assumes pitch is Hz*32
  284. Flutter_inc = (64 * samplerate)/rate;
  285. samplecount = 0;
  286. nsamples = 0;
  287. wavephase = 0x7fffffff;
  288. wdata.amplitude = 32;
  289. wdata.amplitude_fmt = 100;
  290. for (ix = 0; ix < N_EMBEDDED_VALUES; ix++)
  291. embedded_value[ix] = embedded_default[ix];
  292. // set up window to generate a spread of harmonics from a
  293. // single peak for HF peaks
  294. wavemult_max = (samplerate * wavemult_fact)/(256 * 50);
  295. if (wavemult_max > N_WAVEMULT) wavemult_max = N_WAVEMULT;
  296. wavemult_offset = wavemult_max/2;
  297. if (samplerate != 22050) {
  298. // wavemult table has preset values for 22050 Hz, we only need to
  299. // recalculate them if we have a different sample rate
  300. for (ix = 0; ix < wavemult_max; ix++) {
  301. x = 127*(1.0 - cos((M_PI*2)*ix/wavemult_max));
  302. wavemult[ix] = (int)x;
  303. }
  304. }
  305. pk_shape = pk_shape2;
  306. #if USE_KLATT
  307. KlattInit();
  308. #endif
  309. }
  310. void WavegenFini(void)
  311. {
  312. #if USE_KLATT
  313. KlattFini();
  314. #endif
  315. }
  316. int GetAmplitude(void)
  317. {
  318. int amp;
  319. // normal, none, reduced, moderate, strong
  320. static const unsigned char amp_emphasis[5] = { 16, 16, 10, 16, 22 };
  321. amp = (embedded_value[EMBED_A])*55/100;
  322. general_amplitude = amp * amp_emphasis[embedded_value[EMBED_F]] / 16;
  323. return general_amplitude;
  324. }
  325. static void WavegenSetEcho(void)
  326. {
  327. if (wvoice == NULL)
  328. return;
  329. int delay;
  330. int amp;
  331. voicing = wvoice->voicing;
  332. delay = wvoice->echo_delay;
  333. amp = wvoice->echo_amp;
  334. if (delay >= N_ECHO_BUF)
  335. delay = N_ECHO_BUF-1;
  336. if (amp > 100)
  337. amp = 100;
  338. memset(echo_buf, 0, sizeof(echo_buf));
  339. echo_tail = 0;
  340. if (embedded_value[EMBED_H] > 0) {
  341. // set echo from an embedded command in the text
  342. amp = embedded_value[EMBED_H];
  343. delay = 130;
  344. }
  345. if (delay == 0)
  346. amp = 0;
  347. echo_head = (delay * samplerate)/1000;
  348. echo_length = echo_head; // ensure completion of echo at the end of speech. Use 1 delay period?
  349. if (amp == 0)
  350. echo_length = 0;
  351. if (amp > 20)
  352. echo_length = echo_head * 2; // perhaps allow 2 echo periods if the echo is loud.
  353. // echo_amp units are 1/256ths of the amplitude of the original sound.
  354. echo_amp = amp;
  355. // compensate (partially) for increase in amplitude due to echo
  356. general_amplitude = GetAmplitude();
  357. general_amplitude = ((general_amplitude * (500-amp))/500);
  358. }
  359. int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control)
  360. {
  361. if (wvoice == NULL)
  362. return 1;
  363. // Calculate the amplitude of each harmonics from the formants
  364. // Only for formants 0 to 5
  365. // control 0=initial call, 1=every 64 cycles
  366. // pitch and freqs are Hz<<16
  367. int f;
  368. wavegen_peaks_t *p;
  369. int fp; // centre freq of peak
  370. int fhi; // high freq of peak
  371. int h; // harmonic number
  372. int pk;
  373. int hmax;
  374. int hmax_samplerate; // highest harmonic allowed for the samplerate
  375. int x;
  376. int h1;
  377. // initialise as much of *out as we will need
  378. hmax = (peaks[wvoice->n_harmonic_peaks].freq + peaks[wvoice->n_harmonic_peaks].right)/pitch;
  379. if (hmax >= MAX_HARMONIC)
  380. hmax = MAX_HARMONIC-1;
  381. // restrict highest harmonic to half the samplerate
  382. hmax_samplerate = (((samplerate * 19)/40) << 16)/pitch; // only 95% of Nyquist freq
  383. if (hmax > hmax_samplerate)
  384. hmax = hmax_samplerate;
  385. for (h = 0; h <= hmax; h++)
  386. htab[h] = 0;
  387. for (pk = 0; pk <= wvoice->n_harmonic_peaks; pk++) {
  388. p = &peaks[pk];
  389. if ((p->height == 0) || (fp = p->freq) == 0)
  390. continue;
  391. fhi = p->freq + p->right;
  392. h = ((p->freq - p->left) / pitch) + 1;
  393. if (h <= 0) h = 1;
  394. for (f = pitch*h; f < fp; f += pitch)
  395. htab[h++] += pk_shape[(fp-f)/(p->left>>8)] * p->height;
  396. for (; f < fhi; f += pitch)
  397. htab[h++] += pk_shape[(f-fp)/(p->right>>8)] * p->height;
  398. }
  399. int y;
  400. int h2;
  401. // increase bass
  402. y = peaks[1].height * 10; // addition as a multiple of 1/256s
  403. h2 = (1000<<16)/pitch; // decrease until 1000Hz
  404. if (h2 > 0) {
  405. x = y/h2;
  406. h = 1;
  407. while (y > 0) {
  408. htab[h++] += y;
  409. y -= x;
  410. }
  411. }
  412. // find the nearest harmonic for HF peaks where we don't use shape
  413. for (; pk < N_PEAKS; pk++) {
  414. x = peaks[pk].height >> 14;
  415. peak_height[pk] = (x * x * 5)/2;
  416. // find the nearest harmonic for HF peaks where we don't use shape
  417. if (control == 0) {
  418. // set this initially, but make changes only at the quiet point
  419. peak_harmonic[pk] = peaks[pk].freq / pitch;
  420. }
  421. // only use harmonics up to half the samplerate
  422. if (peak_harmonic[pk] >= hmax_samplerate)
  423. peak_height[pk] = 0;
  424. }
  425. // convert from the square-rooted values
  426. f = 0;
  427. for (h = 0; h <= hmax; h++, f += pitch) {
  428. x = htab[h] >> 15;
  429. htab[h] = (x * x) >> 8;
  430. int ix;
  431. if ((ix = (f >> 19)) < N_TONE_ADJUST)
  432. htab[h] = (htab[h] * wvoice->tone_adjust[ix]) >> 13; // index tone_adjust with Hz/8
  433. }
  434. // adjust the amplitude of the first harmonic, affects tonal quality
  435. h1 = htab[1] * option_harmonic1;
  436. htab[1] = h1/8;
  437. // calc intermediate increments of LF harmonics
  438. if (control & 1) {
  439. for (h = 1; h < N_LOWHARM; h++)
  440. harm_inc[h] = (htab[h] - harmspect[h]) >> 3;
  441. }
  442. return hmax; // highest harmonic number
  443. }
  444. static void AdvanceParameters(void)
  445. {
  446. // Called every 64 samples to increment the formant freq, height, and widths
  447. if (wvoice == NULL)
  448. return;
  449. int x = 0;
  450. int ix;
  451. static int Flutter_ix = 0;
  452. // advance the pitch
  453. wdata.pitch_ix += wdata.pitch_inc;
  454. if ((ix = wdata.pitch_ix>>8) > 127) ix = 127;
  455. if (wdata.pitch_env) x = wdata.pitch_env[ix] * wdata.pitch_range;
  456. wdata.pitch = (x>>8) + wdata.pitch_base;
  457. amp_ix += amp_inc;
  458. /* add pitch flutter */
  459. if (Flutter_ix >= (N_FLUTTER*64))
  460. Flutter_ix = 0;
  461. x = ((int)(Flutter_tab[Flutter_ix >> 6])-0x80) * flutter_amp;
  462. Flutter_ix += Flutter_inc;
  463. wdata.pitch += x;
  464. if(const_f0)
  465. wdata.pitch = (const_f0<<12);
  466. if (wdata.pitch < 102400)
  467. wdata.pitch = 102400; // min pitch, 25 Hz (25 << 12)
  468. if (samplecount == samplecount_start)
  469. return;
  470. for (ix = 0; ix <= wvoice->n_harmonic_peaks; ix++) {
  471. peaks[ix].freq1 += peaks[ix].freq_inc;
  472. peaks[ix].freq = (int)peaks[ix].freq1;
  473. peaks[ix].height1 += peaks[ix].height_inc;
  474. if ((peaks[ix].height = (int)peaks[ix].height1) < 0)
  475. peaks[ix].height = 0;
  476. peaks[ix].left1 += peaks[ix].left_inc;
  477. peaks[ix].left = (int)peaks[ix].left1;
  478. if (ix < 3) {
  479. peaks[ix].right1 += peaks[ix].right_inc;
  480. peaks[ix].right = (int)peaks[ix].right1;
  481. } else
  482. peaks[ix].right = peaks[ix].left;
  483. }
  484. for (; ix < 8; ix++) {
  485. // formants 6,7,8 don't have a width parameter
  486. if (ix < 7) {
  487. peaks[ix].freq1 += peaks[ix].freq_inc;
  488. peaks[ix].freq = (int)peaks[ix].freq1;
  489. }
  490. peaks[ix].height1 += peaks[ix].height_inc;
  491. if ((peaks[ix].height = (int)peaks[ix].height1) < 0)
  492. peaks[ix].height = 0;
  493. }
  494. }
  495. static double resonator(RESONATOR *r, double input)
  496. {
  497. double x;
  498. x = r->a * input + r->b * r->x1 + r->c * r->x2;
  499. r->x2 = r->x1;
  500. r->x1 = x;
  501. return x;
  502. }
  503. static void setresonator(RESONATOR *rp, int freq, int bwidth, int init)
  504. {
  505. // freq Frequency of resonator in Hz
  506. // bwidth Bandwidth of resonator in Hz
  507. // init Initialize internal data
  508. double x;
  509. double arg;
  510. if (init) {
  511. rp->x1 = 0;
  512. rp->x2 = 0;
  513. }
  514. arg = minus_pi_t * bwidth;
  515. x = exp(arg);
  516. rp->c = -(x * x);
  517. arg = two_pi_t * freq;
  518. rp->b = x * cos(arg) * 2.0;
  519. rp->a = 1.0 - rp->b - rp->c;
  520. }
  521. void InitBreath(void)
  522. {
  523. int ix;
  524. minus_pi_t = -M_PI / samplerate;
  525. two_pi_t = -2.0 * minus_pi_t;
  526. for (ix = 0; ix < N_PEAKS; ix++)
  527. setresonator(&rbreath[ix], 2000, 200, 1);
  528. }
  529. static void SetBreath(void)
  530. {
  531. int pk;
  532. if (wvoice == NULL || wvoice->breath[0] == 0)
  533. return;
  534. for (pk = 1; pk < N_PEAKS; pk++) {
  535. if (wvoice->breath[pk] != 0) {
  536. // breath[0] indicates that some breath formants are needed
  537. // set the freq from the current synthesis formant and the width from the voice data
  538. setresonator(&rbreath[pk], peaks[pk].freq >> 16, wvoice->breathw[pk], 0);
  539. }
  540. }
  541. }
  542. static int ApplyBreath(void)
  543. {
  544. if (wvoice == NULL)
  545. return 0;
  546. int value = 0;
  547. int noise;
  548. int ix;
  549. // use two random numbers, for alternate formants
  550. noise = espeak_rand(-0x2000, 0x1fff);
  551. for (ix = 1; ix < N_PEAKS; ix++) {
  552. int amp;
  553. if ((amp = wvoice->breath[ix]) != 0) {
  554. amp *= (peaks[ix].height >> 14);
  555. value += (int)resonator(&rbreath[ix], noise) * amp;
  556. }
  557. }
  558. return value;
  559. }
  560. static int Wavegen(int length, int modulation, bool resume, frame_t *fr1, frame_t *fr2, voice_t *wvoice)
  561. {
  562. if (resume == false)
  563. SetSynth(length, modulation, fr1, fr2, wvoice);
  564. if (wvoice == NULL)
  565. return 0;
  566. unsigned short waveph;
  567. unsigned short theta;
  568. int total;
  569. int h;
  570. int ix;
  571. int z, z1, z2;
  572. int echo;
  573. int ov;
  574. static int maxh, maxh2;
  575. int pk;
  576. signed char c;
  577. int sample;
  578. int amp;
  579. int modn_amp = 1, modn_period;
  580. static int agc = 256;
  581. static int h_switch_sign = 0;
  582. static int cycle_count = 0;
  583. static int amplitude2 = 0; // adjusted for pitch
  584. // continue until the output buffer is full, or
  585. // the required number of samples have been produced
  586. for (;;) {
  587. if ((end_wave == 0) && (samplecount == nsamples))
  588. return 0;
  589. if ((samplecount & 0x3f) == 0) {
  590. // every 64 samples, adjust the parameters
  591. if (samplecount == 0) {
  592. hswitch = 0;
  593. harmspect = hspect[0];
  594. maxh2 = PeaksToHarmspect(peaks, wdata.pitch<<4, hspect[0], 0);
  595. // adjust amplitude to compensate for fewer harmonics at higher pitch
  596. amplitude2 = (wdata.amplitude * (wdata.pitch >> 8) * wdata.amplitude_fmt)/(10000 << 3);
  597. // switch sign of harmonics above about 900Hz, to reduce max peak amplitude
  598. h_switch_sign = 890 / (wdata.pitch >> 12);
  599. } else
  600. AdvanceParameters();
  601. // pitch is Hz<<12
  602. phaseinc = (wdata.pitch>>7) * PHASE_INC_FACTOR;
  603. cycle_samples = samplerate/(wdata.pitch >> 12); // sr/(pitch*2)
  604. hf_factor = wdata.pitch >> 11;
  605. maxh = maxh2;
  606. harmspect = hspect[hswitch];
  607. hswitch ^= 1;
  608. maxh2 = PeaksToHarmspect(peaks, wdata.pitch<<4, hspect[hswitch], 1);
  609. SetBreath();
  610. } else if ((samplecount & 0x07) == 0) {
  611. for (h = 1; h < N_LOWHARM && h <= maxh2 && h <= maxh; h++)
  612. harmspect[h] += harm_inc[h];
  613. // bring automatic gain control back towards unity
  614. if (agc < 256) agc++;
  615. }
  616. samplecount++;
  617. if (wavephase > 0) {
  618. wavephase += phaseinc;
  619. if (wavephase < 0) {
  620. // sign has changed, reached a quiet point in the waveform
  621. cbytes = wavemult_offset - (cycle_samples)/2;
  622. if (samplecount > nsamples)
  623. return 0;
  624. cycle_count++;
  625. for (pk = wvoice->n_harmonic_peaks+1; pk < N_PEAKS; pk++) {
  626. // find the nearest harmonic for HF peaks where we don't use shape
  627. peak_harmonic[pk] = ((peaks[pk].freq / (wdata.pitch*8)) + 1) / 2;
  628. }
  629. // adjust amplitude to compensate for fewer harmonics at higher pitch
  630. amplitude2 = (wdata.amplitude * (wdata.pitch >> 8) * wdata.amplitude_fmt)/(10000 << 3);
  631. if (glottal_flag > 0) {
  632. if (glottal_flag == 3) {
  633. if ((nsamples-samplecount) < (cycle_samples*2)) {
  634. // Vowel before glottal-stop.
  635. // This is the start of the penultimate cycle, reduce its amplitude
  636. glottal_flag = 2;
  637. amplitude2 = (amplitude2 * glottal_reduce)/256;
  638. }
  639. } else if (glottal_flag == 4) {
  640. // Vowel following a glottal-stop.
  641. // This is the start of the second cycle, reduce its amplitude
  642. glottal_flag = 2;
  643. amplitude2 = (amplitude2 * glottal_reduce)/256;
  644. } else
  645. glottal_flag--;
  646. }
  647. if (amplitude_env != NULL) {
  648. // amplitude envelope is only used for creaky voice effect on certain vowels/tones
  649. if ((ix = amp_ix>>8) > 127) ix = 127;
  650. amp = amplitude_env[ix];
  651. amplitude2 = (amplitude2 * amp)/128;
  652. }
  653. // introduce roughness into the sound by reducing the amplitude of
  654. modn_period = 0;
  655. if (voice->roughness < N_ROUGHNESS) {
  656. modn_period = modulation_tab[voice->roughness][modulation_type];
  657. modn_amp = modn_period & 0xf;
  658. modn_period = modn_period >> 4;
  659. }
  660. if (modn_period != 0) {
  661. if (modn_period == 0xf) {
  662. // just once */
  663. amplitude2 = (amplitude2 * modn_amp)/16;
  664. modulation_type = 0;
  665. } else {
  666. // reduce amplitude every [modn_period} cycles
  667. if ((cycle_count % modn_period) == 0)
  668. amplitude2 = (amplitude2 * modn_amp)/16;
  669. }
  670. }
  671. }
  672. } else
  673. wavephase += phaseinc;
  674. waveph = (unsigned short)(wavephase >> 16);
  675. total = 0;
  676. // apply HF peaks, formants 6,7,8
  677. // add a single harmonic and then spread this my multiplying by a
  678. // window. This is to reduce the processing power needed to add the
  679. // higher frequence harmonics.
  680. cbytes++;
  681. if (cbytes >= 0 && cbytes < wavemult_max) {
  682. for (pk = wvoice->n_harmonic_peaks+1; pk < N_PEAKS; pk++) {
  683. theta = peak_harmonic[pk] * waveph;
  684. total += (long)sin_tab[theta >> 5] * peak_height[pk];
  685. }
  686. // spread the peaks by multiplying by a window
  687. total = (long)(total / hf_factor) * wavemult[cbytes];
  688. }
  689. // apply main peaks, formants 0 to 5
  690. theta = waveph;
  691. for (h = 1; h <= h_switch_sign; h++) {
  692. total += ((int)sin_tab[theta >> 5] * harmspect[h]);
  693. theta += waveph;
  694. }
  695. while (h <= maxh) {
  696. total -= ((int)sin_tab[theta >> 5] * harmspect[h]);
  697. theta += waveph;
  698. h++;
  699. }
  700. if (voicing != 64)
  701. total = (total >> 6) * voicing;
  702. if (wvoice->breath[0])
  703. total += ApplyBreath();
  704. // mix with sampled wave if required
  705. z2 = 0;
  706. if (wdata.mix_wavefile_ix < wdata.n_mix_wavefile) {
  707. if (wdata.mix_wave_scale == 0) {
  708. // a 16 bit sample
  709. c = wdata.mix_wavefile[wdata.mix_wavefile_ix+wdata.mix_wavefile_offset+1];
  710. sample = wdata.mix_wavefile[wdata.mix_wavefile_ix+wdata.mix_wavefile_offset] + (c * 256);
  711. wdata.mix_wavefile_ix += 2;
  712. } else {
  713. // a 8 bit sample, scaled
  714. sample = (signed char)wdata.mix_wavefile[wdata.mix_wavefile_offset+wdata.mix_wavefile_ix++] * wdata.mix_wave_scale;
  715. }
  716. z2 = (sample * wdata.amplitude_v) >> 10;
  717. z2 = (z2 * wdata.mix_wave_amp)/32;
  718. if ((wdata.mix_wavefile_ix + wdata.mix_wavefile_offset) >= wdata.mix_wavefile_max) // reached the end of available WAV data
  719. wdata.mix_wavefile_offset -= (wdata.mix_wavefile_max*3)/4;
  720. }
  721. z1 = z2 + (((total>>8) * amplitude2) >> 13);
  722. echo = (echo_buf[echo_tail++] * echo_amp);
  723. z1 += echo >> 8;
  724. if (echo_tail >= N_ECHO_BUF)
  725. echo_tail = 0;
  726. z = (z1 * agc) >> 8;
  727. // check for overflow, 16bit signed samples
  728. if (z >= 32768) {
  729. ov = 8388608/z1 - 1; // 8388608 is 2^23, i.e. max value * 256
  730. if (ov < agc) agc = ov; // set agc to number of 1/256ths to multiply the sample by
  731. z = (z1 * agc) >> 8; // reduce sample by agc value to prevent overflow
  732. } else if (z <= -32768) {
  733. ov = -8388608/z1 - 1;
  734. if (ov < agc) agc = ov;
  735. z = (z1 * agc) >> 8;
  736. }
  737. *out_ptr++ = z;
  738. *out_ptr++ = z >> 8;
  739. if(output_hooks && output_hooks->outputVoiced) output_hooks->outputVoiced(z);
  740. echo_buf[echo_head++] = z;
  741. if (echo_head >= N_ECHO_BUF)
  742. echo_head = 0;
  743. if (out_ptr + 2 > out_end)
  744. return 1;
  745. }
  746. }
  747. static int PlaySilence(int length, bool resume)
  748. {
  749. static int n_samples;
  750. nsamples = 0;
  751. samplecount = 0;
  752. wavephase = 0x7fffffff;
  753. if (length == 0)
  754. return 0;
  755. if (resume == false)
  756. n_samples = length;
  757. int value = 0;
  758. while (n_samples-- > 0) {
  759. value = (echo_buf[echo_tail++] * echo_amp) >> 8;
  760. if (echo_tail >= N_ECHO_BUF)
  761. echo_tail = 0;
  762. *out_ptr++ = value;
  763. *out_ptr++ = value >> 8;
  764. if(output_hooks && output_hooks->outputSilence) output_hooks->outputSilence(value);
  765. echo_buf[echo_head++] = value;
  766. if (echo_head >= N_ECHO_BUF)
  767. echo_head = 0;
  768. if (out_ptr + 2 > out_end)
  769. return 1;
  770. }
  771. return 0;
  772. }
  773. static int PlayWave(int length, bool resume, unsigned char *data, int scale, int amp)
  774. {
  775. static int n_samples;
  776. static int ix = 0;
  777. int value;
  778. signed char c;
  779. if (resume == false) {
  780. n_samples = length;
  781. ix = 0;
  782. }
  783. nsamples = 0;
  784. samplecount = 0;
  785. while (n_samples-- > 0) {
  786. if (scale == 0) {
  787. // 16 bits data
  788. c = data[ix+1];
  789. value = data[ix] + (c * 256);
  790. ix += 2;
  791. } else {
  792. // 8 bit data, shift by the specified scale factor
  793. value = (signed char)data[ix++] * scale;
  794. }
  795. value *= (consonant_amp * general_amplitude); // reduce strength of consonant
  796. value = value >> 10;
  797. value = (value * amp)/32;
  798. value += ((echo_buf[echo_tail++] * echo_amp) >> 8);
  799. if (value > 32767)
  800. value = 32767;
  801. else if (value < -32768)
  802. value = -32768;
  803. if (echo_tail >= N_ECHO_BUF)
  804. echo_tail = 0;
  805. out_ptr[0] = value;
  806. out_ptr[1] = value >> 8;
  807. if(output_hooks && output_hooks->outputUnvoiced) output_hooks->outputUnvoiced(value);
  808. out_ptr += 2;
  809. echo_buf[echo_head++] = (value*3)/4;
  810. if (echo_head >= N_ECHO_BUF)
  811. echo_head = 0;
  812. if (out_ptr + 2 > out_end)
  813. return 1;
  814. }
  815. return 0;
  816. }
  817. static int SetWithRange0(int value, int max)
  818. {
  819. if (value < 0)
  820. return 0;
  821. if (value > max)
  822. return max;
  823. return value;
  824. }
  825. static void SetPitchFormants(void)
  826. {
  827. if (wvoice == NULL)
  828. return;
  829. int ix;
  830. int factor = 256;
  831. int pitch_value;
  832. // adjust formants to give better results for a different voice pitch
  833. if ((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE)
  834. pitch_value = MAX_PITCH_VALUE;
  835. if (pitch_value > 50) {
  836. // only adjust if the pitch is higher than normal
  837. factor = 256 + (25 * (pitch_value - 50))/50;
  838. }
  839. for (ix = 0; ix <= 5; ix++)
  840. wvoice->freq[ix] = (wvoice->freq2[ix] * factor)/256;
  841. factor = embedded_value[EMBED_T]*3;
  842. wvoice->height[0] = (wvoice->height2[0] * (256 - factor*2))/256;
  843. wvoice->height[1] = (wvoice->height2[1] * (256 - factor))/256;
  844. }
  845. void SetEmbedded(int control, int value)
  846. {
  847. // there was an embedded command in the text at this point
  848. int sign = 0;
  849. int command;
  850. command = control & 0x1f;
  851. if ((control & 0x60) == 0x60)
  852. sign = -1;
  853. else if ((control & 0x60) == 0x40)
  854. sign = 1;
  855. if (command < N_EMBEDDED_VALUES) {
  856. if (sign == 0)
  857. embedded_value[command] = value;
  858. else
  859. embedded_value[command] += (value * sign);
  860. embedded_value[command] = SetWithRange0(embedded_value[command], embedded_max[command]);
  861. }
  862. switch (command)
  863. {
  864. case EMBED_T:
  865. WavegenSetEcho(); // and drop through to case P
  866. case EMBED_P:
  867. SetPitchFormants();
  868. break;
  869. case EMBED_A: // amplitude
  870. general_amplitude = GetAmplitude();
  871. break;
  872. case EMBED_F: // emphasis
  873. general_amplitude = GetAmplitude();
  874. break;
  875. case EMBED_H:
  876. WavegenSetEcho();
  877. break;
  878. }
  879. }
  880. void WavegenSetVoice(voice_t *v)
  881. {
  882. static voice_t v2;
  883. memcpy(&v2, v, sizeof(v2));
  884. wvoice = &v2;
  885. if (v->peak_shape == 0)
  886. pk_shape = pk_shape1;
  887. else
  888. pk_shape = pk_shape2;
  889. consonant_amp = (v->consonant_amp * 26) /100;
  890. if (samplerate <= 11000) {
  891. consonant_amp = consonant_amp*2; // emphasize consonants at low sample rates
  892. option_harmonic1 = 6;
  893. }
  894. WavegenSetEcho();
  895. SetPitchFormants();
  896. MarkerEvent(espeakEVENT_SAMPLERATE, 0, wvoice->samplerate, 0, out_ptr);
  897. }
  898. static void SetAmplitude(int length, unsigned char *amp_env, int value)
  899. {
  900. if (wvoice == NULL)
  901. return;
  902. amp_ix = 0;
  903. if (length == 0)
  904. amp_inc = 0;
  905. else
  906. amp_inc = (256 * ENV_LEN * STEPSIZE)/length;
  907. wdata.amplitude = (value * general_amplitude)/16;
  908. wdata.amplitude_v = (wdata.amplitude * wvoice->consonant_ampv * 15)/100; // for wave mixed with voiced sounds
  909. amplitude_env = amp_env;
  910. }
  911. void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range)
  912. {
  913. int base;
  914. int range;
  915. int pitch_value;
  916. if (pitch1 > pitch2) {
  917. int x;
  918. x = pitch1; // swap values
  919. pitch1 = pitch2;
  920. pitch2 = x;
  921. }
  922. if ((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE)
  923. pitch_value = MAX_PITCH_VALUE;
  924. pitch_value -= embedded_value[EMBED_T]; // adjust tone for announcing punctuation
  925. if (pitch_value < 0)
  926. pitch_value = 0;
  927. base = (voice->pitch_base * pitch_adjust_tab[pitch_value])/128;
  928. range = (voice->pitch_range * embedded_value[EMBED_R])/50;
  929. // compensate for change in pitch when the range is narrowed or widened
  930. base -= (range - voice->pitch_range)*18;
  931. *pitch_base = base + (pitch1 * range)/2;
  932. *pitch_range = base + (pitch2 * range)/2 - *pitch_base;
  933. }
  934. static void SetPitch(int length, unsigned char *env, int pitch1, int pitch2)
  935. {
  936. if (wvoice == NULL)
  937. return;
  938. // length in samples
  939. if ((wdata.pitch_env = env) == NULL)
  940. wdata.pitch_env = env_fall; // default
  941. wdata.pitch_ix = 0;
  942. if (length == 0)
  943. wdata.pitch_inc = 0;
  944. else
  945. wdata.pitch_inc = (256 * ENV_LEN * STEPSIZE)/length;
  946. SetPitch2(wvoice, pitch1, pitch2, &wdata.pitch_base, &wdata.pitch_range);
  947. // set initial pitch
  948. wdata.pitch = ((wdata.pitch_env[0] * wdata.pitch_range) >>8) + wdata.pitch_base; // Hz << 12
  949. flutter_amp = wvoice->flutter;
  950. }
  951. static void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v)
  952. {
  953. if (wvoice == NULL || v == NULL)
  954. return;
  955. int ix;
  956. double next;
  957. int length2;
  958. int length4;
  959. int qix;
  960. static const int glottal_reduce_tab1[4] = { 0x30, 0x30, 0x40, 0x50 }; // vowel before [?], amp * 1/256
  961. static const int glottal_reduce_tab2[4] = { 0x90, 0xa0, 0xb0, 0xc0 }; // vowel after [?], amp * 1/256
  962. end_wave = 1;
  963. // any additional information in the param1 ?
  964. modulation_type = modn & 0xff;
  965. glottal_flag = 0;
  966. if (modn & 0x400) {
  967. glottal_flag = 3; // before a glottal stop
  968. glottal_reduce = glottal_reduce_tab1[(modn >> 8) & 3];
  969. }
  970. if (modn & 0x800) {
  971. glottal_flag = 4; // after a glottal stop
  972. glottal_reduce = glottal_reduce_tab2[(modn >> 8) & 3];
  973. }
  974. for (qix = wcmdq_head+1;; qix++) {
  975. if (qix >= N_WCMDQ) qix = 0;
  976. if (qix == wcmdq_tail) break;
  977. int cmd = wcmdq[qix][0];
  978. if (cmd == WCMD_SPECT) {
  979. end_wave = 0; // next wave generation is from another spectrum
  980. break;
  981. }
  982. if ((cmd == WCMD_WAVE) || (cmd == WCMD_PAUSE))
  983. break; // next is not from spectrum, so continue until end of wave cycle
  984. }
  985. // round the length to a multiple of the stepsize
  986. length2 = (length + STEPSIZE/2) & ~0x3f;
  987. if (length2 == 0)
  988. length2 = STEPSIZE;
  989. // add this length to any left over from the previous synth
  990. samplecount_start = samplecount;
  991. nsamples += length2;
  992. length4 = length2/4;
  993. peaks[7].freq = (7800 * v->freq[7] + v->freqadd[7]*256) << 8;
  994. peaks[8].freq = (9000 * v->freq[8] + v->freqadd[8]*256) << 8;
  995. for (ix = 0; ix < 8; ix++) {
  996. if (ix < 7) {
  997. peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8;
  998. peaks[ix].freq = (int)peaks[ix].freq1;
  999. next = (fr2->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8;
  1000. peaks[ix].freq_inc = ((next - peaks[ix].freq1) * (STEPSIZE/4)) / length4; // lower headroom for fixed point math
  1001. }
  1002. peaks[ix].height1 = (fr1->fheight[ix] * v->height[ix]) << 6;
  1003. peaks[ix].height = (int)peaks[ix].height1;
  1004. next = (fr2->fheight[ix] * v->height[ix]) << 6;
  1005. peaks[ix].height_inc = ((next - peaks[ix].height1) * STEPSIZE) / length2;
  1006. if ((ix <= 5) && (ix <= wvoice->n_harmonic_peaks)) {
  1007. peaks[ix].left1 = (fr1->fwidth[ix] * v->width[ix]) << 10;
  1008. peaks[ix].left = (int)peaks[ix].left1;
  1009. next = (fr2->fwidth[ix] * v->width[ix]) << 10;
  1010. peaks[ix].left_inc = ((next - peaks[ix].left1) * STEPSIZE) / length2;
  1011. if (ix < 3) {
  1012. peaks[ix].right1 = (fr1->fright[ix] * v->width[ix]) << 10;
  1013. peaks[ix].right = (int)peaks[ix].right1;
  1014. next = (fr2->fright[ix] * v->width[ix]) << 10;
  1015. peaks[ix].right_inc = ((next - peaks[ix].right1) * STEPSIZE) / length2;
  1016. } else
  1017. peaks[ix].right = peaks[ix].left;
  1018. }
  1019. }
  1020. }
  1021. void Write4Bytes(FILE *f, int value)
  1022. {
  1023. // Write 4 bytes to a file, least significant first
  1024. int ix;
  1025. for (ix = 0; ix < 4; ix++) {
  1026. fputc(value & 0xff, f);
  1027. value = value >> 8;
  1028. }
  1029. }
  1030. static int WavegenFill2(void)
  1031. {
  1032. // Pick up next wavegen commands from the queue
  1033. // return: 0 output buffer has been filled
  1034. // return: 1 input command queue is now empty
  1035. intptr_t *q;
  1036. int length;
  1037. int result;
  1038. int marker_type;
  1039. static bool resume = false;
  1040. static int echo_complete = 0;
  1041. if (wdata.pitch < 102400)
  1042. wdata.pitch = 102400; // min pitch, 25 Hz (25 << 12)
  1043. while (out_ptr < out_end) {
  1044. if (WcmdqUsed() <= 0) {
  1045. if (echo_complete > 0) {
  1046. // continue to play silence until echo is completed
  1047. resume = PlaySilence(echo_complete, resume);
  1048. if (resume == true)
  1049. return 0; // not yet finished
  1050. }
  1051. return 1; // queue empty, close sound channel
  1052. }
  1053. result = 0;
  1054. q = wcmdq[wcmdq_head];
  1055. length = q[1];
  1056. switch (q[0] & 0xff)
  1057. {
  1058. case WCMD_PITCH:
  1059. SetPitch(length, (unsigned char *)q[2], q[3] >> 16, q[3] & 0xffff);
  1060. break;
  1061. case WCMD_PHONEME_ALIGNMENT:
  1062. {
  1063. char* data = (char*)q[1];
  1064. output_hooks->outputPhoSymbol(data,q[2]);
  1065. free(data);
  1066. }
  1067. break;
  1068. case WCMD_PAUSE:
  1069. if (resume == false)
  1070. echo_complete -= length;
  1071. wdata.n_mix_wavefile = 0;
  1072. wdata.amplitude_fmt = 100;
  1073. #if USE_KLATT
  1074. KlattReset(1);
  1075. #endif
  1076. result = PlaySilence(length, resume);
  1077. break;
  1078. case WCMD_WAVE:
  1079. echo_complete = echo_length;
  1080. wdata.n_mix_wavefile = 0;
  1081. #if USE_KLATT
  1082. KlattReset(1);
  1083. #endif
  1084. result = PlayWave(length, resume, (unsigned char *)q[2], q[3] & 0xff, q[3] >> 8);
  1085. break;
  1086. case WCMD_WAVE2:
  1087. // wave file to be played at the same time as synthesis
  1088. wdata.mix_wave_amp = q[3] >> 8;
  1089. wdata.mix_wave_scale = q[3] & 0xff;
  1090. wdata.n_mix_wavefile = (length & 0xffff);
  1091. wdata.mix_wavefile_max = (length >> 16) & 0xffff;
  1092. if (wdata.mix_wave_scale == 0) {
  1093. wdata.n_mix_wavefile *= 2;
  1094. wdata.mix_wavefile_max *= 2;
  1095. }
  1096. wdata.mix_wavefile_ix = 0;
  1097. wdata.mix_wavefile_offset = 0;
  1098. wdata.mix_wavefile = (unsigned char *)q[2];
  1099. break;
  1100. case WCMD_SPECT2: // as WCMD_SPECT but stop any concurrent wave file
  1101. wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case
  1102. case WCMD_SPECT:
  1103. echo_complete = echo_length;
  1104. result = Wavegen(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3], wvoice);
  1105. break;
  1106. #if USE_KLATT
  1107. case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file
  1108. wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case
  1109. case WCMD_KLATT:
  1110. echo_complete = echo_length;
  1111. result = Wavegen_Klatt(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3], &wdata, wvoice);
  1112. break;
  1113. #endif
  1114. case WCMD_MARKER:
  1115. marker_type = q[0] >> 8;
  1116. MarkerEvent(marker_type, q[1], * (int *) & q[2], * ((int *) & q[2] + 1), out_ptr);
  1117. break;
  1118. case WCMD_AMPLITUDE:
  1119. SetAmplitude(length, (unsigned char *)q[2], q[3]);
  1120. break;
  1121. case WCMD_VOICE:
  1122. WavegenSetVoice((voice_t *)q[2]);
  1123. free((voice_t *)q[2]);
  1124. break;
  1125. case WCMD_EMBEDDED:
  1126. SetEmbedded(q[1], q[2]);
  1127. break;
  1128. #if USE_MBROLA
  1129. case WCMD_MBROLA_DATA:
  1130. if (wvoice != NULL)
  1131. result = MbrolaFill(length, resume, (general_amplitude * wvoice->voicing)/64);
  1132. break;
  1133. #endif
  1134. case WCMD_FMT_AMPLITUDE:
  1135. if ((wdata.amplitude_fmt = q[1]) == 0)
  1136. wdata.amplitude_fmt = 100; // percentage, but value=0 means 100%
  1137. break;
  1138. #if USE_LIBSONIC
  1139. case WCMD_SONIC_SPEED:
  1140. sonicSpeed = (double)q[1] / 1024;
  1141. if (sonicSpeedupStream && (sonicSpeed <= 1.0)) {
  1142. sonicFlushStream(sonicSpeedupStream);
  1143. int length = (out_end - out_ptr);
  1144. length = sonicReadShortFromStream(sonicSpeedupStream, (short*)out_ptr, length/2);
  1145. #ifdef ARCH_BIG
  1146. {
  1147. unsigned i;
  1148. for (i = 0; i < length/2; i++) {
  1149. unsigned short v = ((unsigned short *) out_ptr)[i];
  1150. out_ptr[i*2] = v & 0xff;
  1151. out_ptr[i*2+1] = v >> 8;
  1152. }
  1153. }
  1154. #endif
  1155. out_ptr += length * 2;
  1156. }
  1157. break;
  1158. #endif
  1159. }
  1160. if (result == 0) {
  1161. WcmdqIncHead();
  1162. resume = false;
  1163. } else
  1164. resume = true;
  1165. }
  1166. return 0;
  1167. }
  1168. #if USE_LIBSONIC
  1169. // Speed up the audio samples with libsonic.
  1170. static int SpeedUp(short *outbuf, int length_in, int length_out, int end_of_text)
  1171. {
  1172. #ifdef ARCH_BIG
  1173. unsigned i;
  1174. #endif
  1175. if (length_in > 0) {
  1176. if (sonicSpeedupStream == NULL)
  1177. sonicSpeedupStream = sonicCreateStream(22050, 1);
  1178. if (sonicGetSpeed(sonicSpeedupStream) != sonicSpeed)
  1179. sonicSetSpeed(sonicSpeedupStream, sonicSpeed);
  1180. #ifdef ARCH_BIG
  1181. for (i = 0; i < length_in; i++) {
  1182. unsigned short v = ((unsigned char*) outbuf)[i*2] | (((unsigned char *)outbuf)[i*2+1] << 8);
  1183. ((unsigned short *) outbuf)[i] = v;
  1184. }
  1185. #endif
  1186. sonicWriteShortToStream(sonicSpeedupStream, outbuf, length_in);
  1187. }
  1188. if (sonicSpeedupStream == NULL)
  1189. return 0;
  1190. if (end_of_text)
  1191. sonicFlushStream(sonicSpeedupStream);
  1192. int ret = sonicReadShortFromStream(sonicSpeedupStream, outbuf, length_out);
  1193. #ifdef ARCH_BIG
  1194. for (i = 0; i < length_out; i++) {
  1195. unsigned short v = ((unsigned short *) outbuf)[i];
  1196. ((unsigned char *)outbuf)[i*2] = v & 0xff;
  1197. ((unsigned char *)outbuf)[i*2+1] = v >> 8;
  1198. }
  1199. #endif
  1200. return ret;
  1201. }
  1202. #endif
  1203. // Call WavegenFill2, and then speed up the output samples.
  1204. int WavegenFill(void)
  1205. {
  1206. int finished;
  1207. #if USE_LIBSONIC
  1208. unsigned char *p_start;
  1209. p_start = out_ptr;
  1210. #endif
  1211. finished = WavegenFill2();
  1212. #if USE_LIBSONIC
  1213. if (sonicSpeed > 1.0) {
  1214. int length;
  1215. int max_length;
  1216. max_length = (out_end - p_start);
  1217. length = 2*SpeedUp((short *)p_start, (out_ptr-p_start)/2, max_length/2, finished);
  1218. out_ptr = p_start + length;
  1219. if (length >= max_length)
  1220. finished = 0; // there may be more data to flush
  1221. }
  1222. #endif
  1223. return finished;
  1224. }
  1225. #pragma GCC visibility push(default)
  1226. ESPEAK_NG_API espeak_ng_STATUS
  1227. espeak_ng_SetOutputHooks(espeak_ng_OUTPUT_HOOKS* hooks)
  1228. {
  1229. output_hooks = hooks;
  1230. return 0;
  1231. }
  1232. ESPEAK_NG_API espeak_ng_STATUS
  1233. espeak_ng_SetConstF0(int f0)
  1234. {
  1235. const_f0 = f0;
  1236. return ENS_OK;
  1237. }
  1238. #pragma GCC visibility pop