eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phoneme.h 9.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /*
  2. * Copyright (C) 2005 to 2010 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2017 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ESPEAK_NG_PHONEME_H
  20. #define ESPEAK_NG_PHONEME_H
  21. #include <espeak-ng/espeak_ng.h>
  22. #ifdef __cplusplus
  23. extern "C"
  24. {
  25. #endif
  26. // See docs/phonemes.md for the list of supported features.
  27. typedef enum {
  28. # define FEATURE_T(a, b, c) ((a << 16) | (b << 8) | (c))
  29. // invalid phoneme feature name
  30. inv = 0,
  31. // manner of articulation
  32. nas = FEATURE_T('n', 'a', 's'),
  33. stp = FEATURE_T('s', 't', 'p'),
  34. afr = FEATURE_T('a', 'f', 'r'),
  35. frc = FEATURE_T('f', 'r', 'c'),
  36. flp = FEATURE_T('f', 'l', 'p'),
  37. trl = FEATURE_T('t', 'r', 'l'),
  38. apr = FEATURE_T('a', 'p', 'r'),
  39. clk = FEATURE_T('c', 'l', 'k'),
  40. ejc = FEATURE_T('e', 'j', 'c'),
  41. imp = FEATURE_T('i', 'm', 'p'),
  42. vwl = FEATURE_T('v', 'w', 'l'),
  43. lat = FEATURE_T('l', 'a', 't'),
  44. sib = FEATURE_T('s', 'i', 'b'),
  45. // place of articulation
  46. blb = FEATURE_T('b', 'l', 'b'),
  47. lbd = FEATURE_T('l', 'b', 'd'),
  48. bld = FEATURE_T('b', 'l', 'd'),
  49. dnt = FEATURE_T('d', 'n', 't'),
  50. alv = FEATURE_T('a', 'l', 'v'),
  51. pla = FEATURE_T('p', 'l', 'a'),
  52. rfx = FEATURE_T('r', 'f', 'x'),
  53. alp = FEATURE_T('a', 'l', 'p'),
  54. pal = FEATURE_T('p', 'a', 'l'),
  55. vel = FEATURE_T('v', 'e', 'l'),
  56. lbv = FEATURE_T('l', 'b', 'v'),
  57. uvl = FEATURE_T('u', 'v', 'l'),
  58. phr = FEATURE_T('p', 'h', 'r'),
  59. glt = FEATURE_T('g', 'l', 't'),
  60. // voice
  61. vcd = FEATURE_T('v', 'c', 'd'),
  62. vls = FEATURE_T('v', 'l', 's'),
  63. // vowel height
  64. hgh = FEATURE_T('h', 'g', 'h'),
  65. smh = FEATURE_T('s', 'm', 'h'),
  66. umd = FEATURE_T('u', 'm', 'd'),
  67. mid = FEATURE_T('m', 'i', 'd'),
  68. lmd = FEATURE_T('l', 'm', 'd'),
  69. sml = FEATURE_T('s', 'm', 'l'),
  70. low = FEATURE_T('l', 'o', 'w'),
  71. // vowel backness
  72. fnt = FEATURE_T('f', 'n', 't'),
  73. cnt = FEATURE_T('c', 'n', 't'),
  74. bck = FEATURE_T('b', 'c', 'k'),
  75. // rounding
  76. unr = FEATURE_T('u', 'n', 'r'),
  77. rnd = FEATURE_T('r', 'n', 'd'),
  78. // articulation
  79. lgl = FEATURE_T('l', 'g', 'l'),
  80. idt = FEATURE_T('i', 'd', 't'),
  81. apc = FEATURE_T('a', 'p', 'c'),
  82. lmn = FEATURE_T('l', 'm', 'n'),
  83. // air flow
  84. egs = FEATURE_T('e', 'g', 's'),
  85. igs = FEATURE_T('i', 'g', 's'),
  86. // phonation
  87. brv = FEATURE_T('b', 'r', 'v'),
  88. slv = FEATURE_T('s', 'l', 'v'),
  89. stv = FEATURE_T('s', 't', 'v'),
  90. crv = FEATURE_T('c', 'r', 'v'),
  91. glc = FEATURE_T('g', 'l', 'c'),
  92. // rounding and labialization
  93. ptr = FEATURE_T('p', 't', 'r'),
  94. cmp = FEATURE_T('c', 'm', 'p'),
  95. mrd = FEATURE_T('m', 'r', 'd'),
  96. lrd = FEATURE_T('l', 'r', 'd'),
  97. // syllabicity
  98. syl = FEATURE_T('s', 'y', 'l'),
  99. nsy = FEATURE_T('n', 's', 'y'),
  100. // consonant release
  101. asp = FEATURE_T('a', 's', 'p'),
  102. nrs = FEATURE_T('n', 'r', 's'),
  103. lrs = FEATURE_T('l', 'r', 's'),
  104. unx = FEATURE_T('u', 'n', 'x'),
  105. // coarticulation
  106. pzd = FEATURE_T('p', 'z', 'd'),
  107. vzd = FEATURE_T('v', 'z', 'd'),
  108. fzd = FEATURE_T('f', 'z', 'd'),
  109. nzd = FEATURE_T('n', 'z', 'd'),
  110. rzd = FEATURE_T('r', 'z', 'd'),
  111. // tongue root
  112. atr = FEATURE_T('a', 't', 'r'),
  113. rtr = FEATURE_T('r', 't', 'r'),
  114. // fortis and lenis
  115. fts = FEATURE_T('f', 't', 's'),
  116. lns = FEATURE_T('l', 'n', 's'),
  117. // length
  118. est = FEATURE_T('e', 's', 't'),
  119. hlg = FEATURE_T('h', 'l', 'g'),
  120. lng = FEATURE_T('l', 'n', 'g'),
  121. elg = FEATURE_T('e', 'l', 'g'),
  122. # undef FEATURE_T
  123. } phoneme_feature_t;
  124. phoneme_feature_t phoneme_feature_from_string(const char *feature);
  125. // phoneme types
  126. #define phPAUSE 0
  127. #define phSTRESS 1
  128. #define phVOWEL 2
  129. #define phLIQUID 3
  130. #define phSTOP 4
  131. #define phVSTOP 5
  132. #define phFRICATIVE 6
  133. #define phVFRICATIVE 7
  134. #define phNASAL 8
  135. #define phVIRTUAL 9
  136. #define phDELETED 14
  137. #define phINVALID 15
  138. // places of articulation (phARTICULATION)
  139. #define phPLACE_BILABIAL 1
  140. #define phPLACE_LABIODENTAL 2
  141. #define phPLACE_DENTAL 3
  142. #define phPLACE_ALVEOLAR 4
  143. #define phPLACE_RETROFLEX 5
  144. #define phPLACE_PALATO_ALVEOLAR 6
  145. #define phPLACE_PALATAL 7
  146. #define phPLACE_VELAR 8
  147. #define phPLACE_LABIO_VELAR 9
  148. #define phPLACE_UVULAR 10
  149. #define phPLACE_PHARYNGEAL 11
  150. #define phPLACE_GLOTTAL 12
  151. // phflags
  152. #define phFLAGBIT_UNSTRESSED 1
  153. #define phFLAGBIT_VOICELESS 3
  154. #define phFLAGBIT_VOICED 4
  155. #define phFLAGBIT_SIBILANT 5
  156. #define phFLAGBIT_NOLINK 6
  157. #define phFLAGBIT_TRILL 7
  158. #define phFLAGBIT_PALATAL 9
  159. #define phFLAGBIT_BRKAFTER 14 // [*] add a post-pause
  160. #define phARTICULATION 0xf0000 // bits 16-19
  161. #define phFLAGBIT_NONSYLLABIC 20 // don't count this vowel as a syllable when finding the stress position
  162. #define phFLAGBIT_LONG 21
  163. #define phFLAGBIT_LENGTHENSTOP 22 // make the pre-pause slightly longer
  164. #define phFLAGBIT_RHOTIC 23
  165. #define phFLAGBIT_NOPAUSE 24
  166. #define phFLAGBIT_PREVOICE 25 // for voiced stops
  167. #define phFLAGBIT_FLAG1 28
  168. #define phFLAGBIT_FLAG2 29
  169. #define phFLAGBIT_LOCAL 31 // used during compilation
  170. // phoneme properties
  171. #define phUNSTRESSED (1 << phFLAGBIT_UNSTRESSED)
  172. #define phVOICELESS (1 << phFLAGBIT_VOICELESS)
  173. #define phVOICED (1 << phFLAGBIT_VOICED)
  174. #define phSIBILANT (1 << phFLAGBIT_SIBILANT)
  175. #define phNOLINK (1 << phFLAGBIT_NOLINK)
  176. #define phTRILL (1 << phFLAGBIT_TRILL)
  177. #define phPALATAL (1 << phFLAGBIT_PALATAL)
  178. #define phBRKAFTER (1 << phFLAGBIT_BRKAFTER)
  179. #define phNONSYLLABIC (1 << phFLAGBIT_NONSYLLABIC)
  180. #define phLONG (1 << phFLAGBIT_LONG)
  181. #define phLENGTHENSTOP (1 << phFLAGBIT_LENGTHENSTOP)
  182. #define phRHOTIC (1 << phFLAGBIT_RHOTIC)
  183. #define phNOPAUSE (1 << phFLAGBIT_NOPAUSE)
  184. #define phPREVOICE (1 << phFLAGBIT_PREVOICE)
  185. #define phFLAG1 (1 << phFLAGBIT_FLAG1)
  186. #define phFLAG2 (1 << phFLAGBIT_FLAG2)
  187. #define phLOCAL (1 << phFLAGBIT_LOCAL)
  188. // fixed phoneme code numbers, these can be used from the program code
  189. #define phonCONTROL 1
  190. #define phonSTRESS_U 2
  191. #define phonSTRESS_D 3
  192. #define phonSTRESS_2 4
  193. #define phonSTRESS_3 5
  194. #define phonSTRESS_P 6
  195. #define phonSTRESS_P2 7 // priority stress within a word
  196. #define phonSTRESS_PREV 8
  197. #define phonPAUSE 9
  198. #define phonPAUSE_SHORT 10
  199. #define phonPAUSE_NOLINK 11
  200. #define phonLENGTHEN 12
  201. #define phonSCHWA 13
  202. #define phonSCHWA_SHORT 14
  203. #define phonEND_WORD 15
  204. #define phonDEFAULTTONE 17
  205. #define phonCAPITAL 18
  206. #define phonGLOTTALSTOP 19
  207. #define phonSYLLABIC 20
  208. #define phonSWITCH 21
  209. #define phonX1 22 // a language specific action
  210. #define phonPAUSE_VSHORT 23
  211. #define phonPAUSE_LONG 24
  212. #define phonT_REDUCED 25
  213. #define phonSTRESS_TONIC 26
  214. #define phonPAUSE_CLAUSE 27
  215. #define phonVOWELTYPES 28 // 28 to 33
  216. extern const unsigned char pause_phonemes[8]; // 0, vshort, short, pause, long, glottalstop
  217. #define N_PHONEME_TABS 150 // number of phoneme tables
  218. #define N_PHONEME_TAB 256 // max phonemes in a phoneme table
  219. #define N_PHONEME_TAB_NAME 32 // must be multiple of 4
  220. // main table of phonemes, index by phoneme number (1-254)
  221. typedef struct {
  222. unsigned int mnemonic; // Up to 4 characters. The first char is in the l.s.byte
  223. unsigned int phflags; // bits 16-19 place of articulation
  224. unsigned short program; // index into phondata file
  225. unsigned char code; // the phoneme number
  226. unsigned char type; // phVOWEL, phPAUSE, phSTOP etc
  227. unsigned char start_type;
  228. unsigned char end_type; // vowels: endtype; consonant: voicing switch
  229. unsigned char std_length; // for vowels, in mS/2; for phSTRESS phonemes, this is the stress/tone type
  230. unsigned char length_mod; // a length_mod group number, used to access length_mod_tab
  231. } PHONEME_TAB;
  232. espeak_ng_STATUS
  233. phoneme_add_feature(PHONEME_TAB *phoneme,
  234. phoneme_feature_t feature);
  235. // Several phoneme tables may be loaded into memory. phoneme_tab points to
  236. // one for the current voice
  237. extern int n_phoneme_tab;
  238. extern int current_phoneme_table;
  239. extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
  240. extern unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited
  241. typedef struct {
  242. char name[N_PHONEME_TAB_NAME];
  243. PHONEME_TAB *phoneme_tab_ptr;
  244. int n_phonemes;
  245. int includes; // also include the phonemes from this other phoneme table
  246. } PHONEME_TAB_LIST;
  247. // table of phonemes to be replaced with different phonemes, for the current voice
  248. #define N_REPLACE_PHONEMES 60
  249. typedef struct {
  250. unsigned char old_ph;
  251. unsigned char new_ph;
  252. char type; // 0=always replace, 1=only at end of word
  253. } REPLACE_PHONEMES;
  254. extern int n_replace_phonemes;
  255. extern REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES];
  256. // Table of phoneme programs and lengths. Used by MakeVowelLists
  257. typedef struct {
  258. unsigned int addr;
  259. unsigned int length;
  260. } PHONEME_PROG_LOG;
  261. #define PH(c1, c2) (c2<<8)+c1 // combine two characters into an integer for phoneme name
  262. #define PH3(c1, c2, c3) (c3<<16)+(c2<<8)+c1
  263. #define PhonemeCode2(c1, c2) PhonemeCode((c2<<8)+c1)
  264. int LookupPhonemeString(const char *string);
  265. int PhonemeCode(unsigned int mnem);
  266. const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme);
  267. void DecodePhonemes(const char *inptr, char *outptr);
  268. extern const char *WordToString(unsigned int word);
  269. extern PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  270. extern int phoneme_tab_number;
  271. #ifdef __cplusplus
  272. }
  273. #endif
  274. #endif