eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

phoneme.h 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /*
  2. * Copyright (C) 2005 to 2010 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2017 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #ifdef __cplusplus
  20. extern "C"
  21. {
  22. #endif
  23. // See docs/phonemes.md for the list of supported features.
  24. typedef enum {
  25. # define FEATURE_T(a, b, c) ((a << 16) | (b << 8) | (c))
  26. // invalid phoneme feature name
  27. inv = 0,
  28. // manner of articulation
  29. nas = FEATURE_T('n', 'a', 's'),
  30. stp = FEATURE_T('s', 't', 'p'),
  31. afr = FEATURE_T('a', 'f', 'r'),
  32. frc = FEATURE_T('f', 'r', 'c'),
  33. flp = FEATURE_T('f', 'l', 'p'),
  34. trl = FEATURE_T('t', 'r', 'l'),
  35. apr = FEATURE_T('a', 'p', 'r'),
  36. clk = FEATURE_T('c', 'l', 'k'),
  37. ejc = FEATURE_T('e', 'j', 'c'),
  38. imp = FEATURE_T('i', 'm', 'p'),
  39. vwl = FEATURE_T('v', 'w', 'l'),
  40. lat = FEATURE_T('l', 'a', 't'),
  41. sib = FEATURE_T('s', 'i', 'b'),
  42. // place of articulation
  43. blb = FEATURE_T('b', 'l', 'b'),
  44. lbd = FEATURE_T('l', 'b', 'd'),
  45. bld = FEATURE_T('b', 'l', 'd'),
  46. dnt = FEATURE_T('d', 'n', 't'),
  47. alv = FEATURE_T('a', 'l', 'v'),
  48. pla = FEATURE_T('p', 'l', 'a'),
  49. rfx = FEATURE_T('r', 'f', 'x'),
  50. alp = FEATURE_T('a', 'l', 'p'),
  51. pal = FEATURE_T('p', 'a', 'l'),
  52. vel = FEATURE_T('v', 'e', 'l'),
  53. lbv = FEATURE_T('l', 'b', 'v'),
  54. uvl = FEATURE_T('u', 'v', 'l'),
  55. phr = FEATURE_T('p', 'h', 'r'),
  56. glt = FEATURE_T('g', 'l', 't'),
  57. // voice
  58. vcd = FEATURE_T('v', 'c', 'd'),
  59. vls = FEATURE_T('v', 'l', 's'),
  60. // vowel height
  61. hgh = FEATURE_T('h', 'g', 'h'),
  62. smh = FEATURE_T('s', 'm', 'h'),
  63. umd = FEATURE_T('u', 'm', 'd'),
  64. mid = FEATURE_T('m', 'i', 'd'),
  65. lmd = FEATURE_T('l', 'm', 'd'),
  66. sml = FEATURE_T('s', 'm', 'l'),
  67. low = FEATURE_T('l', 'o', 'w'),
  68. // vowel backness
  69. fnt = FEATURE_T('f', 'n', 't'),
  70. cnt = FEATURE_T('c', 'n', 't'),
  71. bck = FEATURE_T('b', 'c', 'k'),
  72. // rounding
  73. unr = FEATURE_T('u', 'n', 'r'),
  74. rnd = FEATURE_T('r', 'n', 'd'),
  75. // articulation
  76. lgl = FEATURE_T('l', 'g', 'l'),
  77. idt = FEATURE_T('i', 'd', 't'),
  78. apc = FEATURE_T('a', 'p', 'c'),
  79. lmn = FEATURE_T('l', 'm', 'n'),
  80. // air flow
  81. egs = FEATURE_T('e', 'g', 's'),
  82. igs = FEATURE_T('i', 'g', 's'),
  83. // phonation
  84. brv = FEATURE_T('b', 'r', 'v'),
  85. slv = FEATURE_T('s', 'l', 'v'),
  86. stv = FEATURE_T('s', 't', 'v'),
  87. crv = FEATURE_T('c', 'r', 'v'),
  88. glc = FEATURE_T('g', 'l', 'c'),
  89. // rounding and labialization
  90. ptr = FEATURE_T('p', 't', 'r'),
  91. cmp = FEATURE_T('c', 'm', 'p'),
  92. mrd = FEATURE_T('m', 'r', 'd'),
  93. lrd = FEATURE_T('l', 'r', 'd'),
  94. // syllabicity
  95. syl = FEATURE_T('s', 'y', 'l'),
  96. nsy = FEATURE_T('n', 's', 'y'),
  97. // consonant release
  98. asp = FEATURE_T('a', 's', 'p'),
  99. nrs = FEATURE_T('n', 'r', 's'),
  100. lrs = FEATURE_T('l', 'r', 's'),
  101. unx = FEATURE_T('u', 'n', 'x'),
  102. // coarticulation
  103. pzd = FEATURE_T('p', 'z', 'd'),
  104. vzd = FEATURE_T('v', 'z', 'd'),
  105. fzd = FEATURE_T('f', 'z', 'd'),
  106. nzd = FEATURE_T('n', 'z', 'd'),
  107. rzd = FEATURE_T('r', 'z', 'd'),
  108. // tongue root
  109. atr = FEATURE_T('a', 't', 'r'),
  110. rtr = FEATURE_T('r', 't', 'r'),
  111. // fortis and lenis
  112. fts = FEATURE_T('f', 't', 's'),
  113. lns = FEATURE_T('l', 'n', 's'),
  114. // length
  115. est = FEATURE_T('e', 's', 't'),
  116. hlg = FEATURE_T('h', 'l', 'g'),
  117. lng = FEATURE_T('l', 'n', 'g'),
  118. elg = FEATURE_T('e', 'l', 'g'),
  119. # undef FEATURE_T
  120. } phoneme_feature_t;
  121. phoneme_feature_t phoneme_feature_from_string(const char *feature);
  122. // phoneme types
  123. #define phPAUSE 0
  124. #define phSTRESS 1
  125. #define phVOWEL 2
  126. #define phLIQUID 3
  127. #define phSTOP 4
  128. #define phVSTOP 5
  129. #define phFRICATIVE 6
  130. #define phVFRICATIVE 7
  131. #define phNASAL 8
  132. #define phVIRTUAL 9
  133. #define phDELETED 14
  134. #define phINVALID 15
  135. // places of articulation (phARTICULATION)
  136. #define phPLACE_BILABIAL 1
  137. #define phPLACE_LABIODENTAL 2
  138. #define phPLACE_DENTAL 3
  139. #define phPLACE_ALVEOLAR 4
  140. #define phPLACE_RETROFLEX 5
  141. #define phPLACE_PALATO_ALVEOLAR 6
  142. #define phPLACE_PALATAL 7
  143. #define phPLACE_VELAR 8
  144. #define phPLACE_LABIO_VELAR 9
  145. #define phPLACE_UVULAR 10
  146. #define phPLACE_PHARYNGEAL 11
  147. #define phPLACE_GLOTTAL 12
  148. // phflags
  149. #define phFLAGBIT_UNSTRESSED 1
  150. #define phFLAGBIT_VOICELESS 3
  151. #define phFLAGBIT_VOICED 4
  152. #define phFLAGBIT_SIBILANT 5
  153. #define phFLAGBIT_NOLINK 6
  154. #define phFLAGBIT_TRILL 7
  155. #define phFLAGBIT_PALATAL 9
  156. #define phFLAGBIT_BRKAFTER 14 // [*] add a post-pause
  157. #define phARTICULATION 0xf0000 // bits 16-19
  158. #define phFLAGBIT_NONSYLLABIC 20 // don't count this vowel as a syllable when finding the stress position
  159. #define phFLAGBIT_LONG 21
  160. #define phFLAGBIT_LENGTHENSTOP 22 // make the pre-pause slightly longer
  161. #define phFLAGBIT_RHOTIC 23
  162. #define phFLAGBIT_NOPAUSE 24
  163. #define phFLAGBIT_PREVOICE 25 // for voiced stops
  164. #define phFLAGBIT_FLAG1 28
  165. #define phFLAGBIT_FLAG2 29
  166. #define phFLAGBIT_LOCAL 31 // used during compilation
  167. // phoneme properties
  168. #define phUNSTRESSED (1 << phFLAGBIT_UNSTRESSED)
  169. #define phVOICELESS (1 << phFLAGBIT_VOICELESS)
  170. #define phVOICED (1 << phFLAGBIT_VOICED)
  171. #define phSIBILANT (1 << phFLAGBIT_SIBILANT)
  172. #define phNOLINK (1 << phFLAGBIT_NOLINK)
  173. #define phTRILL (1 << phFLAGBIT_TRILL)
  174. #define phPALATAL (1 << phFLAGBIT_PALATAL)
  175. #define phBRKAFTER (1 << phFLAGBIT_BRKAFTER)
  176. #define phNONSYLLABIC (1 << phFLAGBIT_NONSYLLABIC)
  177. #define phLONG (1 << phFLAGBIT_LONG)
  178. #define phLENGTHENSTOP (1 << phFLAGBIT_LENGTHENSTOP)
  179. #define phRHOTIC (1 << phFLAGBIT_RHOTIC)
  180. #define phNOPAUSE (1 << phFLAGBIT_NOPAUSE)
  181. #define phPREVOICE (1 << phFLAGBIT_PREVOICE)
  182. #define phFLAG1 (1 << phFLAGBIT_FLAG1)
  183. #define phFLAG2 (1 << phFLAGBIT_FLAG2)
  184. #define phLOCAL (1 << phFLAGBIT_LOCAL)
  185. // fixed phoneme code numbers, these can be used from the program code
  186. #define phonCONTROL 1
  187. #define phonSTRESS_U 2
  188. #define phonSTRESS_D 3
  189. #define phonSTRESS_2 4
  190. #define phonSTRESS_3 5
  191. #define phonSTRESS_P 6
  192. #define phonSTRESS_P2 7 // priority stress within a word
  193. #define phonSTRESS_PREV 8
  194. #define phonPAUSE 9
  195. #define phonPAUSE_SHORT 10
  196. #define phonPAUSE_NOLINK 11
  197. #define phonLENGTHEN 12
  198. #define phonSCHWA 13
  199. #define phonSCHWA_SHORT 14
  200. #define phonEND_WORD 15
  201. #define phonDEFAULTTONE 17
  202. #define phonCAPITAL 18
  203. #define phonGLOTTALSTOP 19
  204. #define phonSYLLABIC 20
  205. #define phonSWITCH 21
  206. #define phonX1 22 // a language specific action
  207. #define phonPAUSE_VSHORT 23
  208. #define phonPAUSE_LONG 24
  209. #define phonT_REDUCED 25
  210. #define phonSTRESS_TONIC 26
  211. #define phonPAUSE_CLAUSE 27
  212. #define phonVOWELTYPES 28 // 28 to 33
  213. extern const unsigned char pause_phonemes[8]; // 0, vshort, short, pause, long, glottalstop
  214. #define N_PHONEME_TABS 150 // number of phoneme tables
  215. #define N_PHONEME_TAB 256 // max phonemes in a phoneme table
  216. #define N_PHONEME_TAB_NAME 32 // must be multiple of 4
  217. // main table of phonemes, index by phoneme number (1-254)
  218. typedef struct {
  219. unsigned int mnemonic; // Up to 4 characters. The first char is in the l.s.byte
  220. unsigned int phflags; // bits 16-19 place of articulation
  221. unsigned short program; // index into phondata file
  222. unsigned char code; // the phoneme number
  223. unsigned char type; // phVOWEL, phPAUSE, phSTOP etc
  224. unsigned char start_type;
  225. unsigned char end_type; // vowels: endtype; consonant: voicing switch
  226. unsigned char std_length; // for vowels, in mS/2; for phSTRESS phonemes, this is the stress/tone type
  227. unsigned char length_mod; // a length_mod group number, used to access length_mod_tab
  228. } PHONEME_TAB;
  229. espeak_ng_STATUS
  230. phoneme_add_feature(PHONEME_TAB *phoneme,
  231. phoneme_feature_t feature);
  232. // Several phoneme tables may be loaded into memory. phoneme_tab points to
  233. // one for the current voice
  234. extern int n_phoneme_tab;
  235. extern int current_phoneme_table;
  236. extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
  237. extern unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited
  238. typedef struct {
  239. char name[N_PHONEME_TAB_NAME];
  240. PHONEME_TAB *phoneme_tab_ptr;
  241. int n_phonemes;
  242. int includes; // also include the phonemes from this other phoneme table
  243. } PHONEME_TAB_LIST;
  244. // table of phonemes to be replaced with different phonemes, for the current voice
  245. #define N_REPLACE_PHONEMES 60
  246. typedef struct {
  247. unsigned char old_ph;
  248. unsigned char new_ph;
  249. char type; // 0=always replace, 1=only at end of word
  250. } REPLACE_PHONEMES;
  251. extern int n_replace_phonemes;
  252. extern REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES];
  253. // Table of phoneme programs and lengths. Used by MakeVowelLists
  254. typedef struct {
  255. unsigned int addr;
  256. unsigned int length;
  257. } PHONEME_PROG_LOG;
  258. #define PH(c1, c2) (c2<<8)+c1 // combine two characters into an integer for phoneme name
  259. #define PH3(c1, c2, c3) (c3<<16)+(c2<<8)+c1
  260. #define PhonemeCode2(c1, c2) PhonemeCode((c2<<8)+c1)
  261. int LookupPhonemeString(const char *string);
  262. int PhonemeCode(unsigned int mnem);
  263. const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme);
  264. void DecodePhonemes(const char *inptr, char *outptr);
  265. extern const char *WordToString(unsigned int word);
  266. extern PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
  267. extern int phoneme_tab_number;
  268. #ifdef __cplusplus
  269. }
  270. #endif