eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.cpp 44KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2010 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <wctype.h>
  25. #include <wchar.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "translate.h"
  32. #define M_NAME 0
  33. #define M_SMALLCAP 1
  34. #define M_TURNED 2
  35. #define M_REVERSED 3
  36. #define M_CURL 4
  37. #define M_ACUTE 5
  38. #define M_BREVE 6
  39. #define M_CARON 7
  40. #define M_CEDILLA 8
  41. #define M_CIRCUMFLEX 9
  42. #define M_DIAERESIS 10
  43. #define M_DOUBLE_ACUTE 11
  44. #define M_DOT_ABOVE 12
  45. #define M_GRAVE 13
  46. #define M_MACRON 14
  47. #define M_OGONEK 15
  48. #define M_RING 16
  49. #define M_STROKE 17
  50. #define M_TILDE 18
  51. #define M_BAR 19
  52. #define M_RETROFLEX 20
  53. #define M_HOOK 21
  54. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  55. #define M_IMPLOSIVE M_HOOK
  56. static int n_digit_lookup;
  57. static char *digit_lookup;
  58. static int speak_missing_thousands;
  59. static int number_control;
  60. typedef struct {
  61. const char *name;
  62. int flags;
  63. } ACCENTS;
  64. // these are tokens to look up in the *_list file.
  65. static ACCENTS accents_tab[] = {
  66. {"_lig", 1},
  67. {"_smc", 1}, // smallcap
  68. {"_tur", 1}, // turned
  69. {"_rev", 1}, // reversed
  70. {"_crl", 0}, // curl
  71. {"_acu", 0}, // acute
  72. {"_brv", 0}, // breve
  73. {"_hac", 0}, // caron/hacek
  74. {"_ced", 0}, // cedilla
  75. {"_cir", 0}, // circumflex
  76. {"_dia", 0}, // diaeresis
  77. {"_ac2", 0}, // double acute
  78. {"_dot", 0}, // dot
  79. {"_grv", 0}, // grave
  80. {"_mcn", 0}, // macron
  81. {"_ogo", 0}, // ogonek
  82. {"_rng", 0}, // ring
  83. {"_stk", 0}, // stroke
  84. {"_tld", 0}, // tilde
  85. {"_bar", 0}, // bar
  86. {"_rfx", 0}, // retroflex
  87. {"_hok", 0}, // hook
  88. };
  89. #define CAPITAL 0
  90. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  91. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
  92. #define L_ALPHA 60 // U+3B1
  93. #define L_SCHWA 61 // U+259
  94. #define L_OPEN_E 62 // U+25B
  95. #define L_GAMMA 63 // U+3B3
  96. #define L_IOTA 64 // U+3B9
  97. #define L_OE 65 // U+153
  98. #define L_OMEGA 66 // U+3C9
  99. #define L_PHI 67 // U+3C6
  100. #define L_ESH 68 // U+283
  101. #define L_UPSILON 69 // U+3C5
  102. #define L_EZH 70 // U+292
  103. #define L_GLOTTAL 71 // U+294
  104. #define L_RTAP 72 // U+27E
  105. static const short non_ascii_tab[] = {
  106. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  107. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };
  108. // characters U+00e0 to U+017f
  109. static const unsigned short letter_accents_0e0[] = {
  110. LETTER('a',M_GRAVE,0), // U+00e0
  111. LETTER('a',M_ACUTE,0),
  112. LETTER('a',M_CIRCUMFLEX,0),
  113. LETTER('a',M_TILDE,0),
  114. LETTER('a',M_DIAERESIS,0),
  115. LETTER('a',M_RING,0),
  116. LIGATURE('a','e',0),
  117. LETTER('c',M_CEDILLA,0),
  118. LETTER('e',M_GRAVE,0),
  119. LETTER('e',M_ACUTE,0),
  120. LETTER('e',M_CIRCUMFLEX,0),
  121. LETTER('e',M_DIAERESIS,0),
  122. LETTER('i',M_GRAVE,0),
  123. LETTER('i',M_ACUTE,0),
  124. LETTER('i',M_CIRCUMFLEX,0),
  125. LETTER('i',M_DIAERESIS,0),
  126. LETTER('d',M_NAME,0), // eth // U+00f0
  127. LETTER('n',M_TILDE,0),
  128. LETTER('o',M_GRAVE,0),
  129. LETTER('o',M_ACUTE,0),
  130. LETTER('o',M_CIRCUMFLEX,0),
  131. LETTER('o',M_TILDE,0),
  132. LETTER('o',M_DIAERESIS,0),
  133. 0, // division sign
  134. LETTER('o',M_STROKE,0),
  135. LETTER('u',M_GRAVE,0),
  136. LETTER('u',M_ACUTE,0),
  137. LETTER('u',M_CIRCUMFLEX,0),
  138. LETTER('u',M_DIAERESIS,0),
  139. LETTER('y',M_ACUTE,0),
  140. LETTER('t',M_NAME,0), // thorn
  141. LETTER('y',M_DIAERESIS,0),
  142. CAPITAL, // U+0100
  143. LETTER('a',M_MACRON,0),
  144. CAPITAL,
  145. LETTER('a',M_BREVE,0),
  146. CAPITAL,
  147. LETTER('a',M_OGONEK,0),
  148. CAPITAL,
  149. LETTER('c',M_ACUTE,0),
  150. CAPITAL,
  151. LETTER('c',M_CIRCUMFLEX,0),
  152. CAPITAL,
  153. LETTER('c',M_DOT_ABOVE,0),
  154. CAPITAL,
  155. LETTER('c',M_CARON,0),
  156. CAPITAL,
  157. LETTER('d',M_CARON,0),
  158. CAPITAL, // U+0110
  159. LETTER('d',M_STROKE,0),
  160. CAPITAL,
  161. LETTER('e',M_MACRON,0),
  162. CAPITAL,
  163. LETTER('e',M_BREVE,0),
  164. CAPITAL,
  165. LETTER('e',M_DOT_ABOVE,0),
  166. CAPITAL,
  167. LETTER('e',M_OGONEK,0),
  168. CAPITAL,
  169. LETTER('e',M_CARON,0),
  170. CAPITAL,
  171. LETTER('g',M_CIRCUMFLEX,0),
  172. CAPITAL,
  173. LETTER('g',M_BREVE,0),
  174. CAPITAL, // U+0120
  175. LETTER('g',M_DOT_ABOVE,0),
  176. CAPITAL,
  177. LETTER('g',M_CEDILLA,0),
  178. CAPITAL,
  179. LETTER('h',M_CIRCUMFLEX,0),
  180. CAPITAL,
  181. LETTER('h',M_STROKE,0),
  182. CAPITAL,
  183. LETTER('i',M_TILDE,0),
  184. CAPITAL,
  185. LETTER('i',M_MACRON,0),
  186. CAPITAL,
  187. LETTER('i',M_BREVE,0),
  188. CAPITAL,
  189. LETTER('i',M_OGONEK,0),
  190. CAPITAL, // U+0130
  191. LETTER('i',M_NAME,0), // dotless i
  192. CAPITAL,
  193. LIGATURE('i','j',0),
  194. CAPITAL,
  195. LETTER('j',M_CIRCUMFLEX,0),
  196. CAPITAL,
  197. LETTER('k',M_CEDILLA,0),
  198. LETTER('k',M_NAME,0), // kra
  199. CAPITAL,
  200. LETTER('l',M_ACUTE,0),
  201. CAPITAL,
  202. LETTER('l',M_CEDILLA,0),
  203. CAPITAL,
  204. LETTER('l',M_CARON,0),
  205. CAPITAL,
  206. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  207. CAPITAL,
  208. LETTER('l',M_STROKE,0),
  209. CAPITAL,
  210. LETTER('n',M_ACUTE,0),
  211. CAPITAL,
  212. LETTER('n',M_CEDILLA,0),
  213. CAPITAL,
  214. LETTER('n',M_CARON,0),
  215. LETTER('n',M_NAME,0), // apostrophe n
  216. CAPITAL,
  217. LETTER('n',M_NAME,0), // eng
  218. CAPITAL,
  219. LETTER('o',M_MACRON,0),
  220. CAPITAL,
  221. LETTER('o',M_BREVE,0),
  222. CAPITAL, // U+0150
  223. LETTER('o',M_DOUBLE_ACUTE,0),
  224. CAPITAL,
  225. LIGATURE('o','e',0),
  226. CAPITAL,
  227. LETTER('r',M_ACUTE,0),
  228. CAPITAL,
  229. LETTER('r',M_CEDILLA,0),
  230. CAPITAL,
  231. LETTER('r',M_CARON,0),
  232. CAPITAL,
  233. LETTER('s',M_ACUTE,0),
  234. CAPITAL,
  235. LETTER('s',M_CIRCUMFLEX,0),
  236. CAPITAL,
  237. LETTER('s',M_CEDILLA,0),
  238. CAPITAL, // U+0160
  239. LETTER('s',M_CARON,0),
  240. CAPITAL,
  241. LETTER('t',M_CEDILLA,0),
  242. CAPITAL,
  243. LETTER('t',M_CARON,0),
  244. CAPITAL,
  245. LETTER('t',M_STROKE,0),
  246. CAPITAL,
  247. LETTER('u',M_TILDE,0),
  248. CAPITAL,
  249. LETTER('u',M_MACRON,0),
  250. CAPITAL,
  251. LETTER('u',M_BREVE,0),
  252. CAPITAL,
  253. LETTER('u',M_RING,0),
  254. CAPITAL, // U+0170
  255. LETTER('u',M_DOUBLE_ACUTE,0),
  256. CAPITAL,
  257. LETTER('u',M_OGONEK,0),
  258. CAPITAL,
  259. LETTER('w',M_CIRCUMFLEX,0),
  260. CAPITAL,
  261. LETTER('y',M_CIRCUMFLEX,0),
  262. CAPITAL, // Y-DIAERESIS
  263. CAPITAL,
  264. LETTER('z',M_ACUTE,0),
  265. CAPITAL,
  266. LETTER('z',M_DOT_ABOVE,0),
  267. CAPITAL,
  268. LETTER('z',M_CARON,0),
  269. LETTER('s',M_NAME,0), // long-s // U+17f
  270. };
  271. // characters U+0250 to U+029F
  272. static const unsigned short letter_accents_250[] = {
  273. LETTER('a',M_TURNED,0), // U+250
  274. LETTER(L_ALPHA,0,0),
  275. LETTER(L_ALPHA,M_TURNED,0),
  276. LETTER('b',M_IMPLOSIVE,0),
  277. 0, // open-o
  278. LETTER('c',M_CURL,0),
  279. LETTER('d',M_RETROFLEX,0),
  280. LETTER('d',M_IMPLOSIVE,0),
  281. LETTER('e',M_REVERSED,0), // U+258
  282. 0, // schwa
  283. LETTER(L_SCHWA,M_HOOK,0),
  284. 0, // open-e
  285. LETTER(L_OPEN_E,M_REVERSED,0),
  286. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  287. 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
  288. LETTER('j',M_BAR,0),
  289. LETTER('g',M_IMPLOSIVE,0), // U+260
  290. LETTER('g',0,0),
  291. LETTER('g',M_SMALLCAP,0),
  292. LETTER(L_GAMMA,0,0),
  293. 0, // ramshorn
  294. LETTER('h',M_TURNED,0),
  295. LETTER('h',M_HOOK,0),
  296. 0,//LETTER(L_HENG,M_HOOK,0),
  297. LETTER('i',M_BAR,0), // U+268
  298. LETTER(L_IOTA,0,0),
  299. LETTER('i',M_SMALLCAP,0),
  300. LETTER('l',M_TILDE,0),
  301. LETTER('l',M_BAR,0),
  302. LETTER('l',M_RETROFLEX,0),
  303. LIGATURE('l','z',0),
  304. LETTER('m',M_TURNED,0),
  305. 0,//LETTER('m',M_TURNED,M_LEG), // U+270
  306. LETTER('m',M_HOOK,0),
  307. 0,//LETTER('n',M_LEFTHOOK,0),
  308. LETTER('n',M_RETROFLEX,0),
  309. LETTER('n',M_SMALLCAP,0),
  310. LETTER('o',M_BAR,0),
  311. LIGATURE('o','e',M_SMALLCAP),
  312. 0,//LETTER(L_OMEGA,M_CLOSED,0),
  313. LETTER(L_PHI,0,0), // U+278
  314. LETTER('r',M_TURNED,0),
  315. 0,//LETTER('r',M_TURNED,M_LEG),
  316. LETTER('r',M_RETROFLEX,M_TURNED),
  317. 0,//LETTER('r',M_LEG,0),
  318. LETTER('r',M_RETROFLEX,0),
  319. 0, // r-tap
  320. LETTER(L_RTAP,M_REVERSED,0),
  321. LETTER('r',M_SMALLCAP,0), // U+280
  322. LETTER('r',M_TURNED,M_SMALLCAP),
  323. LETTER('s',M_RETROFLEX,0),
  324. 0, // esh
  325. 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
  326. LETTER(L_ESH,M_REVERSED,0),
  327. LETTER(L_ESH,M_CURL,0),
  328. LETTER('t',M_TURNED,0),
  329. LETTER('t',M_RETROFLEX,0), // U+288
  330. LETTER('u',M_BAR,0),
  331. LETTER(L_UPSILON,0,0),
  332. LETTER('v',M_HOOK,0),
  333. LETTER('v',M_TURNED,0),
  334. LETTER('w',M_TURNED,0),
  335. LETTER('y',M_TURNED,0),
  336. LETTER('y',M_SMALLCAP,0),
  337. LETTER('z',M_RETROFLEX,0), // U+290
  338. LETTER('z',M_CURL,0),
  339. 0, // ezh
  340. LETTER(L_EZH,M_CURL,0),
  341. 0, // glottal stop
  342. LETTER(L_GLOTTAL,M_REVERSED,0),
  343. LETTER(L_GLOTTAL,M_TURNED,0),
  344. 0,//LETTER('c',M_LONG,0),
  345. 0, // bilabial click // U+298
  346. LETTER('b',M_SMALLCAP,0),
  347. 0,//LETTER(L_OPEN_E,M_CLOSED,0),
  348. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  349. LETTER('h',M_SMALLCAP,0),
  350. LETTER('j',M_CURL,0),
  351. LETTER('k',M_TURNED,0),
  352. LETTER('l',M_SMALLCAP,0),
  353. LETTER('q',M_HOOK,0), // U+2a0
  354. LETTER(L_GLOTTAL,M_STROKE,0),
  355. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  356. LIGATURE('d','z',0),
  357. 0, // dezh
  358. LIGATURE('d','z',M_CURL),
  359. LIGATURE('t','s',0),
  360. 0, // tesh
  361. LIGATURE('t','s',M_CURL),
  362. };
  363. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  364. {//========================================================================
  365. int len;
  366. char single_letter[10];
  367. single_letter[0] = 0;
  368. single_letter[1] = '_';
  369. len = utf8_out(letter, &single_letter[2]);
  370. single_letter[len+2] = ' ';
  371. single_letter[len+3] = 0;
  372. if(Lookup(tr, &single_letter[1], ph_buf) == 0)
  373. {
  374. single_letter[1] = ' ';
  375. if(Lookup(tr, &single_letter[2], ph_buf) == 0)
  376. {
  377. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
  378. }
  379. }
  380. return(ph_buf[0]);
  381. }
  382. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  383. {//=========================================================================
  384. // lookup the character in the accents table
  385. int accent_data = 0;
  386. int accent1 = 0;
  387. int accent2 = 0;
  388. int basic_letter;
  389. int letter2=0;
  390. char ph_letter1[30];
  391. char ph_letter2[30];
  392. char ph_accent1[30];
  393. char ph_accent2[30];
  394. ph_accent2[0] = 0;
  395. if((letter >= 0xe0) && (letter < 0x17f))
  396. {
  397. accent_data = letter_accents_0e0[letter - 0xe0];
  398. }
  399. else
  400. if((letter >= 0x250) && (letter <= 0x2a8))
  401. {
  402. accent_data = letter_accents_250[letter - 0x250];
  403. }
  404. if(accent_data != 0)
  405. {
  406. basic_letter = (accent_data & 0x3f) + 59;
  407. if(basic_letter < 'a')
  408. basic_letter = non_ascii_tab[basic_letter-59];
  409. if(accent_data & 0x8000)
  410. {
  411. letter2 = (accent_data >> 6) & 0x3f;
  412. letter2 += 59;
  413. accent2 = (accent_data >> 12) & 0x7;
  414. }
  415. else
  416. {
  417. accent1 = (accent_data >> 6) & 0x1f;
  418. accent2 = (accent_data >> 11) & 0xf;
  419. }
  420. if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
  421. {
  422. if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
  423. {
  424. if(accent2 != 0)
  425. {
  426. if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
  427. {
  428. // break;
  429. }
  430. if(accents_tab[accent2].flags & 1)
  431. {
  432. strcpy(ph_buf,ph_accent2);
  433. ph_buf += strlen(ph_buf);
  434. ph_accent2[0] = 0;
  435. }
  436. }
  437. if(letter2 != 0)
  438. {
  439. //ligature
  440. LookupLetter2(tr, letter2, ph_letter2);
  441. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  442. }
  443. else
  444. {
  445. if(accent1 == 0)
  446. strcpy(ph_buf, ph_letter1);
  447. else
  448. if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
  449. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  450. else
  451. sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  452. }
  453. }
  454. }
  455. }
  456. } // end of LookupAccentedLetter
  457. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  458. {//==============================================================================================
  459. // control, bit 0: not the first letter of a word
  460. int len;
  461. static char single_letter[10] = {0,0};
  462. unsigned int dict_flags[2];
  463. char ph_buf3[40];
  464. char *ptr;
  465. ph_buf1[0] = 0;
  466. len = utf8_out(letter,&single_letter[2]);
  467. single_letter[len+2] = ' ';
  468. if(next_byte == -1)
  469. {
  470. // speaking normal text, not individual characters
  471. if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
  472. return;
  473. single_letter[1] = '_';
  474. if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
  475. return; // the character is specified as _* so ignore it when speaking normal text
  476. // check whether this character is specified for English
  477. if(tr->translator_name == L('e','n'))
  478. return; // we are already using English
  479. SetTranslator2("en");
  480. if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
  481. {
  482. // yes, switch to English and re-translate the word
  483. sprintf(ph_buf1,"%c",phonSWITCH);
  484. }
  485. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  486. return;
  487. }
  488. if((letter <= 32) || iswspace(letter))
  489. {
  490. // lookup space as _&32 etc.
  491. sprintf(&single_letter[1],"_#%d ",letter);
  492. Lookup(tr, &single_letter[1], ph_buf1);
  493. return;
  494. }
  495. if(next_byte != ' ')
  496. next_byte = RULE_SPELLING;
  497. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  498. single_letter[1] = '_';
  499. // if the $accent flag is set for this letter, use the accents table (below)
  500. dict_flags[1] = 0;
  501. ptr = &single_letter[1];
  502. if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
  503. {
  504. single_letter[1] = ' ';
  505. if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
  506. {
  507. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  508. }
  509. }
  510. if(ph_buf3[0] == 0)
  511. {
  512. LookupAccentedLetter(tr, letter, ph_buf3);
  513. }
  514. strcpy(ph_buf1, ph_buf3);
  515. if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  516. {
  517. return;
  518. }
  519. dict_flags[0] = 0;
  520. dict_flags[1] = 0;
  521. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  522. } // end of LookupLetter
  523. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  524. {//=========================================================================
  525. // get pronunciation for an isolated letter
  526. // return number of bytes used by the letter
  527. // control bit 0: a non-initial letter in a word
  528. // bit 1: say 'capital'
  529. int n_bytes;
  530. int letter;
  531. int len;
  532. int save_option_phonemes;
  533. char *p2;
  534. char *pbuf;
  535. char capital[20];
  536. char ph_buf[60];
  537. char ph_buf2[60];
  538. char hexbuf[6];
  539. ph_buf[0] = 0;
  540. capital[0] = 0;
  541. n_bytes = utf8_in(&letter,word);
  542. if((letter & 0xfff00) == 0x0e000)
  543. {
  544. letter &= 0xff; // uncode private usage area
  545. }
  546. if(control & 2)
  547. {
  548. // include CAPITAL information
  549. if(iswupper(letter))
  550. {
  551. Lookup(tr, "_cap", capital);
  552. }
  553. }
  554. letter = towlower2(letter);
  555. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  556. if(ph_buf[0] == phonSWITCH)
  557. {
  558. strcpy(phonemes,ph_buf);
  559. return(0);
  560. }
  561. if((ph_buf[0] == 0) && (tr->translator_name != L('e','n')))
  562. {
  563. // speak as English, check whether there is a translation for this character
  564. SetTranslator2("en");
  565. save_option_phonemes = option_phonemes;
  566. option_phonemes = 0;
  567. LookupLetter(translator2, letter, word[n_bytes], ph_buf, control & 1);
  568. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  569. option_phonemes = save_option_phonemes;
  570. if(ph_buf[0] != 0)
  571. {
  572. sprintf(phonemes,"%cen",phonSWITCH);
  573. return(0);
  574. }
  575. }
  576. if(ph_buf[0] == 0)
  577. {
  578. // character name not found
  579. if(iswalpha(letter))
  580. Lookup(tr, "_?A", ph_buf);
  581. if((ph_buf[0]==0) && !iswspace(letter))
  582. Lookup(tr, "_??", ph_buf);
  583. if(ph_buf[0] != 0)
  584. {
  585. // speak the hexadecimal number of the character code
  586. sprintf(hexbuf,"%x",letter);
  587. pbuf = ph_buf;
  588. for(p2 = hexbuf; *p2 != 0; p2++)
  589. {
  590. pbuf += strlen(pbuf);
  591. *pbuf++ = phonPAUSE_VSHORT;
  592. LookupLetter(tr, *p2, 0, pbuf, 1);
  593. }
  594. }
  595. }
  596. len = strlen(phonemes);
  597. if(tr->langopts.accents & 2)
  598. sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
  599. else
  600. sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  601. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  602. {
  603. strcpy(&phonemes[len],ph_buf2);
  604. }
  605. return(n_bytes);
  606. } // end of TranslateLetter
  607. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  608. {//=============================================================================
  609. // Individual letter names, reduce the stress of some.
  610. int ix;
  611. unsigned int c;
  612. int n_stress=0;
  613. int count;
  614. unsigned char buf[N_WORD_PHONEMES];
  615. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  616. {
  617. if(c == phonSTRESS_P)
  618. {
  619. n_stress++;
  620. }
  621. buf[ix] = c;
  622. }
  623. buf[ix] = 0;
  624. count = 0;
  625. for(ix=0; (c = buf[ix]) != 0; ix++)
  626. {
  627. if((c == phonSTRESS_P) && (n_chars > 1))
  628. {
  629. count++;
  630. if(tr->langopts.spelling_stress == 1)
  631. {
  632. // stress on initial letter when spelling
  633. if(count > 1)
  634. c = phonSTRESS_3;
  635. }
  636. else
  637. {
  638. if(count != n_stress)
  639. {
  640. if(((count % 3) != 0) || (count == n_stress-1))
  641. c = phonSTRESS_3; // reduce to secondary stress
  642. }
  643. }
  644. }
  645. else
  646. if(c == 0xff)
  647. {
  648. if((control < 2) || (ix==0))
  649. continue; // don't insert pauses
  650. if(control == 4)
  651. c = phonPAUSE; // pause after each character
  652. if(((count % 3) == 0) || (control > 2))
  653. c = phonPAUSE_NOLINK; // pause following a primary stress
  654. else
  655. c = phonPAUSE_VSHORT;
  656. // else
  657. // continue; // remove marker
  658. }
  659. *phonemes++ = c;
  660. }
  661. if(control >= 2)
  662. *phonemes++ = phonPAUSE_NOLINK;
  663. *phonemes = 0;
  664. } // end of SetSpellingStress
  665. // Numbers
  666. static char ph_ordinal2[12];
  667. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  668. {//==============================================================================================
  669. int ordinal = 0;
  670. int c2;
  671. int nextflags;
  672. if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
  673. {
  674. if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
  675. {
  676. if(word_end[0] == '.')
  677. utf8_in(&c2, &word_end[2]);
  678. else
  679. utf8_in(&c2, &word_end[1]);
  680. if((word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
  681. {
  682. // ordinal number is indicated by dot after the number
  683. // but not if the next word starts with an upper-case letter
  684. // (c2 == 0) is for cases such as, "2.,"
  685. ordinal = 2;
  686. if(word_end[0] == '.')
  687. word_end[0] = ' ';
  688. if((roman==0) && (tr->translator_name == L('h','u')))
  689. {
  690. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  691. nextflags = 0;
  692. if(IsAlpha(c2))
  693. {
  694. nextflags = TranslateWord(tr, &word_end[2], 0, NULL);
  695. }
  696. if((tr->prev_dict_flags & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  697. ordinal = 0; // TEST 09.02.10
  698. if(nextflags & FLAG_ALT_TRANS)
  699. ordinal = 0;
  700. if(nextflags & FLAG_ALT3_TRANS)
  701. {
  702. if(word[-2] == '-')
  703. ordinal = 0; // eg. december 2-5. között
  704. if(tr->prev_dict_flags & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  705. ordinal = 0x22;
  706. }
  707. }
  708. }
  709. }
  710. }
  711. return(ordinal);
  712. } // end of CheckDotOrdinal
  713. static int hu_number_e(const char *word)
  714. {//=====================================
  715. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  716. if((word[0] == 'a') || (word[0] == 'e'))
  717. {
  718. if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  719. return(0);
  720. return(1);
  721. }
  722. return(0);
  723. } // end of hu_numnber_e
  724. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  725. {//=========================================================================
  726. int c;
  727. char *p;
  728. const char *p2;
  729. int acc;
  730. int prev;
  731. int value;
  732. int subtract;
  733. int repeat = 0;
  734. int n_digits = 0;
  735. char *word_start;
  736. int num_control = 0;
  737. unsigned int flags[2];
  738. char ph_roman[30];
  739. char number_chars[N_WORD_BYTES];
  740. static const char *roman_numbers = "ixcmvld";
  741. static int roman_values[] = {1,10,100,1000,5,50,500};
  742. acc = 0;
  743. prev = 0;
  744. subtract = 0x7fff;
  745. ph_out[0] = 0;
  746. flags[0] = 0;
  747. flags[1] = 0;
  748. if((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER))
  749. return(0);
  750. word_start = word;
  751. while((c = *word++) != ' ')
  752. {
  753. if((p2 = strchr(roman_numbers,c)) == NULL)
  754. return(0);
  755. value = roman_values[p2 - roman_numbers];
  756. if(value == prev)
  757. {
  758. repeat++;
  759. if(repeat >= 3)
  760. return(0);
  761. }
  762. else
  763. repeat = 0;
  764. if((prev > 1) && (prev != 10) && (prev != 100))
  765. {
  766. if(value >= prev)
  767. return(0);
  768. }
  769. if((prev != 0) && (prev < value))
  770. {
  771. if(((acc % 10) != 0) || ((prev*10) < value))
  772. return(0);
  773. subtract = prev;
  774. value -= subtract;
  775. }
  776. else
  777. if(value >= subtract)
  778. return(0);
  779. else
  780. acc += prev;
  781. prev = value;
  782. n_digits++;
  783. }
  784. acc += prev;
  785. if(acc < tr->langopts.min_roman)
  786. return(0);
  787. if(acc > tr->langopts.max_roman)
  788. return(0);
  789. Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
  790. p = &ph_out[0];
  791. if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
  792. {
  793. strcpy(ph_out,ph_roman);
  794. p = &ph_out[strlen(ph_roman)];
  795. }
  796. sprintf(number_chars," %d ",acc);
  797. if(word[0] == '.')
  798. {
  799. // dot has not been removed. This implies that there was no space after it
  800. return(0);
  801. }
  802. if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
  803. wtab[0].flags |= FLAG_ORDINAL;
  804. if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
  805. {
  806. if(tr->translator_name == L('h','u'))
  807. {
  808. if(!(wtab[0].flags & FLAG_ORDINAL))
  809. {
  810. if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word))
  811. {
  812. // should use the 'e' form of the number
  813. num_control |= 1;
  814. }
  815. else
  816. return(0);
  817. }
  818. }
  819. }
  820. tr->prev_dict_flags = 0;
  821. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  822. if(tr->langopts.numbers & NUM_ROMAN_AFTER)
  823. strcat(ph_out,ph_roman);
  824. return(1);
  825. } // end of TranslateRoman
  826. static const char *M_Variant(int value)
  827. {//====================================
  828. // returns M, or perhaps MA for some cases
  829. if((translator->langopts.numbers2 & 0x100) && (value >= 2) && (value <= 4))
  830. return("0MA"); // Czech, Slovak
  831. else
  832. if(((value % 100) < 10) || ((value % 100) > 20)) // but not teens, 10 to 19
  833. {
  834. if ((translator->langopts.numbers2 & 0x40) &&
  835. ((value % 10)>=2) &&
  836. ((value % 10)<=4))
  837. {
  838. // for Polish language - two forms of plural!
  839. return("0MA");
  840. }
  841. if((translator->langopts.numbers2 & 0x80) &&
  842. ((value % 10)==1))
  843. {
  844. return("1MA");
  845. }
  846. }
  847. return("0M");
  848. }
  849. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  850. {//=======================================================================================================
  851. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  852. int found;
  853. int found_value=0;
  854. char string[12];
  855. char ph_of[12];
  856. char ph_thousands[40];
  857. char ph_buf[40];
  858. ph_of[0] = 0;
  859. // first look for a match with the exact value of thousands
  860. if(value > 0)
  861. {
  862. if(thousands_exact & 1)
  863. {
  864. if(thousands_exact & 2)
  865. {
  866. // ordinal number
  867. sprintf(string,"_%dM%do",value,thousandplex);
  868. found_value = Lookup(tr, string, ph_thousands);
  869. }
  870. if(!found_value & (number_control & 1))
  871. {
  872. // look for the 'e' variant
  873. sprintf(string,"_%dM%de",value,thousandplex);
  874. found_value = Lookup(tr, string, ph_thousands);
  875. }
  876. if(!found_value)
  877. {
  878. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  879. sprintf(string,"_%dM%dx",value,thousandplex);
  880. found_value = Lookup(tr, string, ph_thousands);
  881. }
  882. }
  883. if(found_value == 0)
  884. {
  885. sprintf(string,"_%dM%d",value,thousandplex);
  886. found_value = Lookup(tr, string, ph_thousands);
  887. }
  888. }
  889. if(found_value == 0)
  890. {
  891. if((value % 100) >= 20)
  892. {
  893. Lookup(tr, "_0of", ph_of);
  894. }
  895. found = 0;
  896. if(thousands_exact & 1)
  897. {
  898. if(thousands_exact & 2)
  899. {
  900. // ordinal number
  901. sprintf(string,"_%s%do",M_Variant(value), thousandplex);
  902. found = Lookup(tr, string, ph_thousands);
  903. }
  904. if(!found && (number_control & 1))
  905. {
  906. // look for the 'e' variant
  907. sprintf(string,"_%s%de",M_Variant(value), thousandplex);
  908. found = Lookup(tr, string, ph_thousands);
  909. }
  910. if(!found)
  911. {
  912. // is there a different pronunciation if there are no hundreds,tens,or units ?
  913. sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
  914. found = Lookup(tr, string, ph_thousands);
  915. }
  916. }
  917. if(found == 0)
  918. {
  919. sprintf(string,"_%s%d",M_Variant(value), thousandplex);
  920. if(Lookup(tr, string, ph_thousands) == 0)
  921. {
  922. if(thousandplex > 3)
  923. {
  924. sprintf(string,"_0M%d", thousandplex-1);
  925. if(Lookup(tr, string, ph_buf) == 0)
  926. {
  927. // say "millions" if this name is not available and neither is the next lower
  928. Lookup(tr, "_0M2", ph_thousands);
  929. speak_missing_thousands = 3;
  930. }
  931. }
  932. if(ph_thousands[0] == 0)
  933. {
  934. // repeat "thousand" if higher order names are not available
  935. sprintf(string,"_%dM1",value);
  936. if((found_value = Lookup(tr, string, ph_thousands)) == 0)
  937. Lookup(tr, "_0M1", ph_thousands);
  938. speak_missing_thousands = 2;
  939. }
  940. }
  941. }
  942. }
  943. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  944. return(found_value);
  945. }
  946. static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
  947. {//========================================================================
  948. // Lookup a 2 digit number
  949. // control bit 0: ordinal number
  950. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  951. // control bit 2: tens and units only, no higher digits
  952. // control bit 3: use feminine form of '2' (for thousands
  953. // control bit 4: speak zero tens
  954. // control bit 5: variant of ordinal number (lang=hu)
  955. int found;
  956. int ix;
  957. int units;
  958. int tens;
  959. int used_and=0;
  960. int found_ordinal = 0;
  961. int next_phtype;
  962. int ord_type = 'o';
  963. char string[12]; // for looking up entries in *_list
  964. char ph_ordinal[20];
  965. char ph_tens[50];
  966. char ph_digits[50];
  967. char ph_and[12];
  968. units = value % 10;
  969. tens = value / 10;
  970. found = 0;
  971. ph_ordinal[0] = 0;
  972. ph_tens[0] = 0;
  973. ph_digits[0] = 0;
  974. ph_and[0] = 0;
  975. if(control & 0x20)
  976. {
  977. ord_type = 'q';
  978. }
  979. if((control & 2) && (n_digit_lookup == 2))
  980. {
  981. // pronunciation of the final 2 digits has already been found
  982. strcpy(ph_out, digit_lookup);
  983. }
  984. else
  985. {
  986. if(digit_lookup[0] == 0)
  987. {
  988. // is there a special pronunciation for this 2-digit number
  989. if(control & 8)
  990. {
  991. sprintf(string,"_%df",value);
  992. found = Lookup(tr, string, ph_digits);
  993. }
  994. else
  995. if(control & 1)
  996. {
  997. strcpy(ph_ordinal, ph_ordinal2);
  998. if(control & 4)
  999. {
  1000. sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1001. found = Lookup(tr, string, ph_digits);
  1002. }
  1003. if(found == 0)
  1004. {
  1005. sprintf(string,"_%d%c",value,ord_type);
  1006. found = Lookup(tr, string, ph_digits);
  1007. }
  1008. found_ordinal = found;
  1009. }
  1010. if(found == 0)
  1011. {
  1012. if(control & 2)
  1013. {
  1014. // the final tens and units of a number
  1015. if(number_control & 1)
  1016. {
  1017. // look for 'e' variant
  1018. sprintf(string,"_%de",value);
  1019. found = Lookup(tr, string, ph_digits);
  1020. }
  1021. }
  1022. else
  1023. {
  1024. // followed by hundreds or thousands etc
  1025. sprintf(string,"_%da",value);
  1026. found = Lookup(tr, string, ph_digits);
  1027. }
  1028. if(!found)
  1029. {
  1030. sprintf(string,"_%d",value);
  1031. found = Lookup(tr, string, ph_digits);
  1032. }
  1033. }
  1034. }
  1035. // no, speak as tens+units
  1036. if((control & 0x10) && (value < 10))
  1037. {
  1038. // speak leading zero
  1039. Lookup(tr, "_0", ph_tens);
  1040. }
  1041. else
  1042. {
  1043. if(found)
  1044. {
  1045. ph_tens[0] = 0;
  1046. }
  1047. else
  1048. {
  1049. if((control & 1) && ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1050. {
  1051. sprintf(string,"_%dX%c", tens, ord_type);
  1052. if(Lookup(tr, string, ph_tens) != 0)
  1053. {
  1054. found_ordinal = 1;
  1055. }
  1056. }
  1057. if(found_ordinal == 0)
  1058. {
  1059. sprintf(string,"_%dX", tens);
  1060. Lookup(tr, string, ph_tens);
  1061. }
  1062. if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
  1063. {
  1064. // tens not found, (for example) 73 is 60+13
  1065. units = (value % 20);
  1066. sprintf(string,"_%dX", tens & 0xfe);
  1067. Lookup(tr, string, ph_tens);
  1068. }
  1069. ph_digits[0] = 0;
  1070. if(units > 0)
  1071. {
  1072. found = 0;
  1073. if((control & 2) && (digit_lookup[0] != 0))
  1074. {
  1075. // we have an entry for this digit (possibly together with the next word)
  1076. strcpy(ph_digits, digit_lookup);
  1077. found_ordinal = 1;
  1078. ph_ordinal[0] = 0;
  1079. }
  1080. else
  1081. {
  1082. if(control & 8)
  1083. {
  1084. // is there a variant form of this number?
  1085. sprintf(string,"_%df",units);
  1086. found = Lookup(tr, string, ph_digits);
  1087. }
  1088. if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
  1089. {
  1090. // ordinal
  1091. sprintf(string,"_%d%c",units,ord_type);
  1092. if((found = Lookup(tr, string, ph_digits)) != 0)
  1093. {
  1094. found_ordinal = 1;
  1095. }
  1096. }
  1097. if(found == 0)
  1098. {
  1099. if((number_control & 1) && (control & 2))
  1100. {
  1101. // look for 'e' variant
  1102. sprintf(string,"_%de",units);
  1103. found = Lookup(tr, string, ph_digits);
  1104. }
  1105. else
  1106. if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
  1107. {
  1108. // followed by hundreds or thousands (or tens)
  1109. sprintf(string,"_%da",units);
  1110. found = Lookup(tr, string, ph_digits);
  1111. }
  1112. }
  1113. if(found == 0)
  1114. {
  1115. sprintf(string,"_%d",units);
  1116. Lookup(tr, string, ph_digits);
  1117. }
  1118. }
  1119. }
  1120. }
  1121. }
  1122. if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
  1123. {
  1124. if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1125. Lookup(tr, "_ord20", ph_ordinal);
  1126. if(ph_ordinal[0] == 0)
  1127. Lookup(tr, "_ord", ph_ordinal);
  1128. }
  1129. if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
  1130. {
  1131. Lookup(tr, "_0and", ph_and);
  1132. if(tr->langopts.numbers & NUM_SWAP_TENS)
  1133. sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
  1134. else
  1135. sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
  1136. used_and = 1;
  1137. }
  1138. else
  1139. {
  1140. if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
  1141. {
  1142. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1143. if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
  1144. {
  1145. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1146. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1147. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1148. ph_tens[ix] = 0;
  1149. }
  1150. }
  1151. sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
  1152. }
  1153. }
  1154. if(tr->langopts.numbers & NUM_SINGLE_STRESS)
  1155. {
  1156. // only one primary stress
  1157. found = 0;
  1158. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  1159. {
  1160. if(ph_out[ix] == phonSTRESS_P)
  1161. {
  1162. if(found)
  1163. ph_out[ix] = phonSTRESS_3;
  1164. else
  1165. found = 1;
  1166. }
  1167. }
  1168. }
  1169. return(used_and);
  1170. } // end of LookupNum2
  1171. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1172. {//=============================================================================================================
  1173. // Translate a 3 digit number
  1174. // control bit 0, previous thousands
  1175. // bit 1, ordinal number
  1176. // bit 5 variant form of ordinal number
  1177. int found;
  1178. int hundreds;
  1179. int tensunits;
  1180. int x;
  1181. int exact;
  1182. int ordinal;
  1183. char string[12]; // for looking up entries in **_list
  1184. char buf1[100];
  1185. char buf2[100];
  1186. char ph_100[20];
  1187. char ph_10T[20];
  1188. char ph_digits[50];
  1189. char ph_thousands[50];
  1190. char ph_hundred_and[12];
  1191. char ph_thousand_and[12];
  1192. ordinal = control & 0x22;
  1193. hundreds = value / 100;
  1194. tensunits = value % 100;
  1195. buf1[0] = 0;
  1196. ph_thousands[0] = 0;
  1197. ph_thousand_and[0] = 0;
  1198. if(hundreds > 0)
  1199. {
  1200. found = 0;
  1201. if(ordinal && (tensunits == 0))
  1202. {
  1203. // ordinal number, with no tens or units
  1204. found = Lookup(tr, "_0Co", ph_100);
  1205. }
  1206. if(found == 0)
  1207. {
  1208. Lookup(tr, "_0C", ph_100);
  1209. }
  1210. if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
  1211. {
  1212. // speak numbers such as 1984 as years: nineteen-eighty-four
  1213. // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
  1214. }
  1215. else
  1216. if(hundreds >= 10)
  1217. {
  1218. ph_digits[0] = 0;
  1219. exact = 0;
  1220. if ((value % 1000) == 0)
  1221. exact = 1;
  1222. if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0)
  1223. {
  1224. x = 0;
  1225. if(tr->langopts.numbers2 & (1 << (thousandplex+1)))
  1226. x = 8; // use variant (feminine) for before thousands and millions
  1227. LookupNum2(tr, hundreds/10, x, ph_digits);
  1228. }
  1229. if(tr->langopts.numbers2 & 0x200)
  1230. sprintf(ph_thousands,"%s%s",ph_10T,ph_digits); // say "thousands" before its number, not after
  1231. else
  1232. sprintf(ph_thousands,"%s%s",ph_digits,ph_10T);
  1233. hundreds %= 10;
  1234. if(hundreds == 0)
  1235. ph_100[0] = 0;
  1236. suppress_null = 1;
  1237. }
  1238. ph_digits[0] = 0;
  1239. if(hundreds > 0)
  1240. {
  1241. if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1242. {
  1243. Lookup(tr, "_0and", ph_thousand_and);
  1244. }
  1245. suppress_null = 1;
  1246. found = 0;
  1247. if(tensunits == 0)
  1248. {
  1249. // is there a special pronunciation for exactly n00 ?
  1250. if(ordinal)
  1251. {
  1252. // ordinal number
  1253. sprintf(string, "_%dCo", hundreds);
  1254. found = Lookup(tr, string, ph_digits);
  1255. }
  1256. if(!found)
  1257. {
  1258. sprintf(string,"_%dC0",hundreds);
  1259. found = Lookup(tr, string, ph_digits);
  1260. }
  1261. }
  1262. if(!found)
  1263. {
  1264. sprintf(string,"_%dC",hundreds);
  1265. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1266. }
  1267. if(found)
  1268. {
  1269. ph_100[0] = 0;
  1270. }
  1271. else
  1272. {
  1273. if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
  1274. {
  1275. LookupNum2(tr, hundreds, 0, ph_digits);
  1276. }
  1277. }
  1278. }
  1279. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  1280. }
  1281. ph_hundred_and[0] = 0;
  1282. if(tensunits > 0)
  1283. {
  1284. if((tr->langopts.numbers & NUM_HUNDRED_AND) && ((value > 100) || ((control & 1) && (thousandplex==0))))
  1285. {
  1286. Lookup(tr, "_0and", ph_hundred_and);
  1287. }
  1288. if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1289. {
  1290. Lookup(tr, "_0and", ph_hundred_and);
  1291. }
  1292. }
  1293. buf2[0] = 0;
  1294. if((tensunits != 0) || (suppress_null == 0))
  1295. {
  1296. x = 0;
  1297. if(thousandplex==0)
  1298. {
  1299. x = 2; // allow "eins" for 1 rather than "ein"
  1300. if(ordinal)
  1301. x = 3; // ordinal number
  1302. if((value < 100) && !(control & 1))
  1303. x |= 4; // tens and units only, no higher digits
  1304. if(ordinal & 0x20)
  1305. x |= 0x20; // variant form of ordinal number
  1306. }
  1307. else
  1308. {
  1309. if(tr->langopts.numbers2 & (1 << thousandplex))
  1310. x = 8; // use variant (feminine) for before thousands and millions
  1311. }
  1312. if(LookupNum2(tr, tensunits, x, buf2) != 0)
  1313. {
  1314. if(tr->langopts.numbers & NUM_SINGLE_AND)
  1315. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1316. }
  1317. }
  1318. sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);
  1319. return(0);
  1320. } // end of LookupNum3
  1321. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1322. {//=====================================================================================================================
  1323. // Number translation with various options
  1324. // the "word" may be up to 4 digits
  1325. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1326. int n_digits;
  1327. int value;
  1328. int ix;
  1329. int digix;
  1330. unsigned char c;
  1331. int suppress_null = 0;
  1332. int decimal_point = 0;
  1333. int thousandplex = 0;
  1334. int thousands_exact = 1;
  1335. int thousands_inc = 0;
  1336. int prev_thousands = 0;
  1337. int ordinal = 0;
  1338. int this_value;
  1339. int decimal_count;
  1340. int max_decimal_count;
  1341. int decimal_mode;
  1342. int hyphen;
  1343. int suffix_ix;
  1344. int skipwords = 0;
  1345. char *p;
  1346. char string[20]; // for looking up entries in **_list
  1347. char buf1[100];
  1348. char ph_append[50];
  1349. char ph_buf[200];
  1350. char ph_buf2[50];
  1351. char ph_zeros[50];
  1352. char suffix[20];
  1353. char buf_digit_lookup[50];
  1354. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1355. *flags = 0;
  1356. n_digit_lookup = 0;
  1357. buf_digit_lookup[0] = 0;
  1358. digit_lookup = buf_digit_lookup;
  1359. number_control = control;
  1360. for(ix=0; isdigit(word[ix]); ix++) ;
  1361. n_digits = ix;
  1362. value = this_value = atoi(word);
  1363. // is there a previous thousands part (as a previous "word") ?
  1364. if((n_digits == 3) && (word[-2] == tr->langopts.thousands_sep) && isdigit(word[-3]))
  1365. {
  1366. prev_thousands = 1;
  1367. }
  1368. else
  1369. if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
  1370. {
  1371. // thousands groups can be separated by spaces
  1372. if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && isdigit(word[-2]))
  1373. {
  1374. prev_thousands = 1;
  1375. }
  1376. }
  1377. if(prev_thousands == 0)
  1378. {
  1379. speak_missing_thousands = 0;
  1380. }
  1381. ph_ordinal2[0] = 0;
  1382. ph_zeros[0] = 0;
  1383. if(prev_thousands || (word[0] != '0'))
  1384. {
  1385. // don't check for ordinal if the number has a leading zero
  1386. ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0);
  1387. }
  1388. if((word[ix] == '.') && !isdigit(word[ix+1]) && !isdigit(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
  1389. {
  1390. // remove dot unless followed by another number
  1391. word[ix] = 0;
  1392. }
  1393. if(ordinal == 0)
  1394. {
  1395. // look for an ordinal number suffix after the number
  1396. ix++;
  1397. hyphen = 0;
  1398. p = suffix;
  1399. if(wtab[0].flags & FLAG_HYPHEN_AFTER)
  1400. {
  1401. *p++ = '-';
  1402. hyphen = 1;
  1403. ix++;
  1404. }
  1405. while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1406. {
  1407. *p++ = word[ix++];
  1408. }
  1409. *p = 0;
  1410. if(suffix[0] != 0)
  1411. {
  1412. if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1413. {
  1414. ordinal = 2;
  1415. }
  1416. else
  1417. if(!isdigit(suffix[0])) // not _#9 (tab)
  1418. {
  1419. sprintf(string,"_#%s",suffix);
  1420. if(Lookup(tr, string, ph_ordinal2))
  1421. {
  1422. // this is an ordinal suffix
  1423. ordinal = 2;
  1424. }
  1425. }
  1426. if(ordinal)
  1427. {
  1428. flags[0] |= FLAG_SKIPWORDS;
  1429. skipwords = 1;
  1430. }
  1431. }
  1432. }
  1433. if(wtab[0].flags & FLAG_ORDINAL)
  1434. ordinal = 2;
  1435. ph_append[0] = 0;
  1436. ph_buf2[0] = 0;
  1437. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
  1438. {
  1439. if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
  1440. {
  1441. // looks like a time 02:30, omit the leading zero
  1442. }
  1443. else
  1444. {
  1445. if(n_digits > 3)
  1446. {
  1447. flags[0] &= ~FLAG_SKIPWORDS;
  1448. return(0); // long number string with leading zero, speak as individual digits
  1449. }
  1450. // speak leading zeros
  1451. for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1452. {
  1453. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1454. }
  1455. }
  1456. }
  1457. if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1458. thousands_inc = 1;
  1459. else
  1460. if(word[n_digits] == tr->langopts.thousands_sep)
  1461. thousands_inc = 2;
  1462. suffix_ix = n_digits+2;
  1463. if(thousands_inc > 0)
  1464. {
  1465. // if the following "words" are three-digit groups, count them and add
  1466. // a "thousand"/"million" suffix to this one
  1467. digix = n_digits + thousands_inc;
  1468. while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) &&
  1469. isdigit(word[digix]) && isdigit(word[digix+1]) && isdigit(word[digix+2]) && !isdigit(word[digix+3]) && !isdigit(word[digix-1]))
  1470. {
  1471. if((word[digix] != '0') || (word[digix+1] != '0') || (word[digix+2] != '0'))
  1472. thousands_exact = 0;
  1473. thousandplex++;
  1474. digix += 3;
  1475. if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
  1476. {
  1477. suffix_ix = digix+2;
  1478. digix += thousands_inc;
  1479. }
  1480. else
  1481. break;
  1482. }
  1483. }
  1484. if((value == 0) && prev_thousands)
  1485. {
  1486. suppress_null = 1;
  1487. }
  1488. if(tr->translator_name == L('h','u'))
  1489. {
  1490. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1491. if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix]))
  1492. {
  1493. number_control |= 1; // use _1e variant of number
  1494. }
  1495. }
  1496. if((word[n_digits] == tr->langopts.decimal_sep) && isdigit(word[n_digits+1]))
  1497. {
  1498. // this "word" ends with a decimal point
  1499. Lookup(tr, "_dpt", ph_append);
  1500. decimal_point = 1;
  1501. }
  1502. else
  1503. if(suppress_null == 0)
  1504. {
  1505. if(thousands_inc > 0)
  1506. {
  1507. if((thousandplex > 0) && (value < 1000))
  1508. {
  1509. if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
  1510. {
  1511. // found an exact match for N thousand
  1512. value = 0;
  1513. suppress_null = 1;
  1514. }
  1515. }
  1516. }
  1517. }
  1518. else
  1519. if(speak_missing_thousands == 1)
  1520. {
  1521. // speak this thousandplex if there was no word for the previous thousandplex
  1522. sprintf(string,"_0M%d",thousandplex+1);
  1523. if(Lookup(tr, string, buf1)==0)
  1524. {
  1525. sprintf(string,"_0M%d",thousandplex);
  1526. Lookup(tr, string, ph_append);
  1527. }
  1528. }
  1529. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1530. {
  1531. Lookup(tr, "_.", ph_append);
  1532. }
  1533. if(thousandplex == 0)
  1534. {
  1535. char *p2;
  1536. // look for combinations of the number with the next word
  1537. p = word;
  1538. while(isdigit(p[1])) p++; // just use the last digit
  1539. if(isdigit(p[-1]))
  1540. {
  1541. p2 = p - 1;
  1542. if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  1543. {
  1544. n_digit_lookup = 2;
  1545. }
  1546. }
  1547. if((buf_digit_lookup[0] == 0) && (*p != '0'))
  1548. {
  1549. // not found, lookup only the last digit
  1550. if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  1551. {
  1552. n_digit_lookup = 1;
  1553. }
  1554. }
  1555. }
  1556. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal);
  1557. if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  1558. sprintf(ph_out,"%s%s%s%s",ph_zeros,ph_append,ph_buf2,ph_buf); // say "thousands" before its number
  1559. else
  1560. sprintf(ph_out,"%s%s%s%s",ph_zeros,ph_buf2,ph_buf,ph_append);
  1561. while(decimal_point)
  1562. {
  1563. n_digits++;
  1564. decimal_count = 0;
  1565. while(isdigit(word[n_digits+decimal_count]))
  1566. decimal_count++;
  1567. // if(decimal_count > 1)
  1568. {
  1569. max_decimal_count = 2;
  1570. switch(decimal_mode = (tr->langopts.numbers & 0xe000))
  1571. {
  1572. case NUM_DFRACTION_4:
  1573. max_decimal_count = 5;
  1574. case NUM_DFRACTION_2:
  1575. // French/Polish decimal fraction
  1576. while(word[n_digits] == '0')
  1577. {
  1578. Lookup(tr, "_0", buf1);
  1579. strcat(ph_out,buf1);
  1580. decimal_count--;
  1581. n_digits++;
  1582. }
  1583. if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
  1584. {
  1585. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  1586. strcat(ph_out,buf1);
  1587. n_digits += decimal_count;
  1588. }
  1589. break;
  1590. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  1591. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  1592. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
  1593. if((word[n_digits]=='0') || (decimal_mode == NUM_DFRACTION_5))
  1594. {
  1595. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  1596. sprintf(string,"_0Z%d",decimal_count);
  1597. if(Lookup(tr, string, buf1) == 0)
  1598. break; // revert to speaking single digits
  1599. strcat(ph_buf,buf1);
  1600. }
  1601. strcat(ph_out,ph_buf);
  1602. n_digits += decimal_count;
  1603. break;
  1604. case NUM_DFRACTION_3:
  1605. // Romanian decimal fractions
  1606. if((decimal_count <= 4) && (word[n_digits] != '0'))
  1607. {
  1608. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  1609. strcat(ph_out,buf1);
  1610. n_digits += decimal_count;
  1611. }
  1612. break;
  1613. }
  1614. }
  1615. while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  1616. {
  1617. // speak any remaining decimal fraction digits individually
  1618. value = word[n_digits++] - '0';
  1619. LookupNum2(tr, value, 2, buf1);
  1620. strcat(ph_out,buf1);
  1621. }
  1622. // something after the decimal part ?
  1623. if(Lookup(tr, "_dpt2", buf1))
  1624. strcat(ph_out,buf1);
  1625. if((c == tr->langopts.decimal_sep) && isdigit(word[n_digits+1]))
  1626. {
  1627. Lookup(tr, "_dpt", buf1);
  1628. strcat(ph_out,buf1);
  1629. }
  1630. else
  1631. {
  1632. decimal_point = 0;
  1633. }
  1634. }
  1635. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  1636. {
  1637. int next_char;
  1638. char *p;
  1639. p = &word[n_digits+1];
  1640. p += utf8_in(&next_char,p);
  1641. if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  1642. utf8_in(&next_char,p);
  1643. if(!iswalpha(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
  1644. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  1645. }
  1646. *flags |= FLAG_FOUND;
  1647. speak_missing_thousands--;
  1648. if(skipwords)
  1649. dictionary_skipwords = skipwords;
  1650. return(1);
  1651. } // end of TranslateNumber_1
  1652. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1653. {//=============================================================================================================
  1654. if(option_sayas == SAYAS_DIGITS1)
  1655. return(0); // speak digits individually
  1656. if(tr->langopts.numbers != 0)
  1657. return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
  1658. return(0);
  1659. } // end of TranslateNumber