eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

klatt.h 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /*
  2. * Copyright (C) 2008 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2017 Reece H. Dunn
  5. *
  6. * Based on a re-implementation by:
  7. * (c) 1993,94 Jon Iles and Nick Ing-Simmons
  8. * of the Klatt cascade-parallel formant synthesizer
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 3 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  22. */
  23. #ifndef ESPEAK_NG_KLATT_H
  24. #define ESPEAK_NG_KLATT_H
  25. #include "voice.h" // for voice_t
  26. #include "synthesize.h" // for frame_t, WGEN_DATA
  27. #ifdef __cplusplus
  28. extern "C"
  29. {
  30. #endif
  31. #define CASCADE_PARALLEL 1 /* Type of synthesis model */
  32. #define ALL_PARALLEL 2
  33. #define IMPULSIVE 1 /* Type of voicing source */
  34. #define NATURAL 2
  35. #define SAMPLED 3
  36. #define SAMPLED2 4
  37. /* typedef's that need to be exported */
  38. typedef long flag;
  39. /* Resonator Structure */
  40. typedef struct {
  41. double a;
  42. double b;
  43. double c;
  44. double p1;
  45. double p2;
  46. double a_inc;
  47. double b_inc;
  48. double c_inc;
  49. } resonator_t, *resonator_ptr;
  50. /* Structure for Klatt Globals */
  51. typedef struct {
  52. flag synthesis_model; /* cascade-parallel or all-parallel */
  53. flag outsl; /* Output waveform selector */
  54. long samrate; /* Number of output samples per second */
  55. long FLPhz; /* Frequeny of glottal downsample low-pass filter */
  56. long BLPhz; /* Bandwidth of glottal downsample low-pass filter */
  57. flag glsource; /* Type of glottal source */
  58. int f0_flutter; /* Percentage of f0 flutter 0-100 */
  59. long nspfr; /* number of samples per frame */
  60. long nper; /* Counter for number of samples in a pitch period */
  61. long ns;
  62. long T0; /* Fundamental period in output samples times 4 */
  63. long nopen; /* Number of samples in open phase of period */
  64. long nmod; /* Position in period to begin noise amp. modul */
  65. long nrand; /* Variable used by random number generator */
  66. double pulse_shape_a; /* Makes waveshape of glottal pulse when open */
  67. double pulse_shape_b; /* Makes waveshape of glottal pulse when open */
  68. double minus_pi_t;
  69. double two_pi_t;
  70. double onemd;
  71. double decay;
  72. double amp_bypas; /* AB converted to linear gain */
  73. double amp_voice; /* AVdb converted to linear gain */
  74. double par_amp_voice; /* AVpdb converted to linear gain */
  75. double amp_aspir; /* AP converted to linear gain */
  76. double amp_frica; /* AF converted to linear gain */
  77. double amp_breth; /* ATURB converted to linear gain */
  78. double amp_gain0; /* G0 converted to linear gain */
  79. int num_samples; /* number of glottal samples */
  80. double sample_factor; /* multiplication factor for glottal samples */
  81. short *natural_samples; /* pointer to an array of glottal samples */
  82. long original_f0; /* original value of f0 not modified by flutter */
  83. int fadein;
  84. int fadeout; // set to 64 to cause fadeout over 64 samples
  85. int scale_wav; // depends on the voicing source
  86. #define N_RSN 20
  87. #define Rnz 0 // nasal zero, anti-resonator
  88. #define R1c 1
  89. #define R2c 2
  90. #define R3c 3
  91. #define R4c 4
  92. #define R5c 5
  93. #define R6c 6
  94. #define R7c 7
  95. #define R8c 8
  96. #define Rnpc 9 // nasal pole
  97. #define Rparallel 10
  98. #define Rnpp 10
  99. #define R1p 11
  100. #define R2p 12
  101. #define R3p 13
  102. #define R4p 14
  103. #define R5p 15
  104. #define R6p 16
  105. #define RGL 17
  106. #define RLP 18
  107. #define Rout 19
  108. resonator_t rsn[N_RSN]; // internal storage for resonators
  109. resonator_t rsn_next[N_RSN];
  110. } klatt_global_t, *klatt_global_ptr;
  111. /* Structure for Klatt Parameters */
  112. #define F_NZ 0 // nasal zero formant
  113. #define F1 1
  114. #define F2 2
  115. #define F3 3
  116. #define F4 4
  117. #define F5 5
  118. #define F6 6
  119. #define F_NP 9 // nasal pole formant
  120. typedef struct {
  121. int F0hz10; /* Voicing fund freq in Hz */
  122. int AVdb; /* Amp of voicing in dB, 0 to 70 */
  123. int Fhz[10]; // formant Hz, F_NZ to F6 to F_NP
  124. int Bhz[10];
  125. int Ap[10]; /* Amp of parallel formants in dB, 0 to 80 */
  126. int Bphz[10]; /* Parallel formants bw in Hz, 40 to 1000 */
  127. int ASP; /* Amp of aspiration in dB, 0 to 70 */
  128. int Kopen; /* # of samples in open period, 10 to 65 */
  129. int Aturb; /* Breathiness in voicing, 0 to 80 */
  130. int TLTdb; /* Voicing spectral tilt in dB, 0 to 24 */
  131. int AF; /* Amp of frication in dB, 0 to 80 */
  132. int Kskew; /* Skewness of alternate periods, 0 to 40 in sample#/2 */
  133. int AB; /* Amp of bypass fric. in dB, 0 to 80 */
  134. int AVpdb; /* Amp of voicing, par in dB, 0 to 70 */
  135. int Gain0; /* Overall gain, 60 dB is unity, 0 to 60 */
  136. int AVdb_tmp; // copy of AVdb, which is changed within parwave()
  137. int Fhz_next[10]; // Fhz for the next chunk, so we can do interpolation of resonator (a,b,c) parameters
  138. int Bhz_next[10];
  139. } klatt_frame_t, *klatt_frame_ptr;
  140. typedef struct {
  141. int freq; // Hz
  142. int bw; // klatt bandwidth
  143. int ap; // parallel amplitude
  144. int bp; // parallel bandwidth
  145. double freq1; // floating point versions of the above
  146. double bw1;
  147. double ap1;
  148. double bp1;
  149. double freq_inc; // increment by this every 64 samples
  150. double bw_inc;
  151. double ap_inc;
  152. double bp_inc;
  153. } klatt_peaks_t;
  154. void KlattInit(void);
  155. void KlattReset(int control);
  156. int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice);
  157. #ifdef __cplusplus
  158. }
  159. #endif
  160. #endif