eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.c 58KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2015 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * Copyright (C) 2015 Reece H. Dunn *
  5. * *
  6. * This program is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 3 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * You should have received a copy of the GNU General Public License *
  17. * along with this program; if not, see: *
  18. * <http://www.gnu.org/licenses/>. *
  19. ***************************************************************************/
  20. #include <stdbool.h>
  21. #include <stdio.h>
  22. #include <ctype.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <wctype.h>
  26. #include <wchar.h>
  27. #include "speak_lib.h"
  28. #include "speech.h"
  29. #include "phoneme.h"
  30. #include "synthesize.h"
  31. #include "voice.h"
  32. #include "translate.h"
  33. #define M_LIGATURE 0x8000
  34. #define M_NAME 0
  35. #define M_SMALLCAP 1
  36. #define M_TURNED 2
  37. #define M_REVERSED 3
  38. #define M_CURL 4
  39. #define M_ACUTE 5
  40. #define M_BREVE 6
  41. #define M_CARON 7
  42. #define M_CEDILLA 8
  43. #define M_CIRCUMFLEX 9
  44. #define M_DIAERESIS 10
  45. #define M_DOUBLE_ACUTE 11
  46. #define M_DOT_ABOVE 12
  47. #define M_GRAVE 13
  48. #define M_MACRON 14
  49. #define M_OGONEK 15
  50. #define M_RING 16
  51. #define M_STROKE 17
  52. #define M_TILDE 18
  53. #define M_BAR 19
  54. #define M_RETROFLEX 20
  55. #define M_HOOK 21
  56. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  57. #define M_IMPLOSIVE M_HOOK
  58. static int n_digit_lookup;
  59. static char *digit_lookup;
  60. static int speak_missing_thousands;
  61. static int number_control;
  62. typedef struct {
  63. const char *name;
  64. int accent_flags; // bit 0, say before the letter name
  65. } ACCENTS;
  66. // these are tokens to look up in the *_list file.
  67. static ACCENTS accents_tab[] = {
  68. {"_lig", 1},
  69. {"_smc", 0}, // smallcap
  70. {"_tur", 0}, // turned
  71. {"_rev", 0}, // reversed
  72. {"_crl", 0}, // curl
  73. {"_acu", 0}, // acute
  74. {"_brv", 0}, // breve
  75. {"_hac", 0}, // caron/hacek
  76. {"_ced", 0}, // cedilla
  77. {"_cir", 0}, // circumflex
  78. {"_dia", 0}, // diaeresis
  79. {"_ac2", 0}, // double acute
  80. {"_dot", 0}, // dot
  81. {"_grv", 0}, // grave
  82. {"_mcn", 0}, // macron
  83. {"_ogo", 0}, // ogonek
  84. {"_rng", 0}, // ring
  85. {"_stk", 0}, // stroke
  86. {"_tld", 0}, // tilde
  87. {"_bar", 0}, // bar
  88. {"_rfx", 0}, // retroflex
  89. {"_hok", 0}, // hook
  90. };
  91. #define CAPITAL 0
  92. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  93. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+M_LIGATURE
  94. #define L_ALPHA 60 // U+3B1
  95. #define L_SCHWA 61 // U+259
  96. #define L_OPEN_E 62 // U+25B
  97. #define L_GAMMA 63 // U+3B3
  98. #define L_IOTA 64 // U+3B9
  99. #define L_OE 65 // U+153
  100. #define L_OMEGA 66 // U+3C9
  101. #define L_PHI 67 // U+3C6
  102. #define L_ESH 68 // U+283
  103. #define L_UPSILON 69 // U+3C5
  104. #define L_EZH 70 // U+292
  105. #define L_GLOTTAL 71 // U+294
  106. #define L_RTAP 72 // U+27E
  107. #define L_RLONG 73 // U+27C
  108. static const short non_ascii_tab[] = {
  109. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  110. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e, 0x27c
  111. };
  112. // characters U+00e0 to U+017f
  113. static const unsigned short letter_accents_0e0[] = {
  114. LETTER('a',M_GRAVE,0), // U+00e0
  115. LETTER('a',M_ACUTE,0),
  116. LETTER('a',M_CIRCUMFLEX,0),
  117. LETTER('a',M_TILDE,0),
  118. LETTER('a',M_DIAERESIS,0),
  119. LETTER('a',M_RING,0),
  120. LIGATURE('a','e',0),
  121. LETTER('c',M_CEDILLA,0),
  122. LETTER('e',M_GRAVE,0),
  123. LETTER('e',M_ACUTE,0),
  124. LETTER('e',M_CIRCUMFLEX,0),
  125. LETTER('e',M_DIAERESIS,0),
  126. LETTER('i',M_GRAVE,0),
  127. LETTER('i',M_ACUTE,0),
  128. LETTER('i',M_CIRCUMFLEX,0),
  129. LETTER('i',M_DIAERESIS,0),
  130. LETTER('d',M_NAME,0), // eth // U+00f0
  131. LETTER('n',M_TILDE,0),
  132. LETTER('o',M_GRAVE,0),
  133. LETTER('o',M_ACUTE,0),
  134. LETTER('o',M_CIRCUMFLEX,0),
  135. LETTER('o',M_TILDE,0),
  136. LETTER('o',M_DIAERESIS,0),
  137. 0, // division sign
  138. LETTER('o',M_STROKE,0),
  139. LETTER('u',M_GRAVE,0),
  140. LETTER('u',M_ACUTE,0),
  141. LETTER('u',M_CIRCUMFLEX,0),
  142. LETTER('u',M_DIAERESIS,0),
  143. LETTER('y',M_ACUTE,0),
  144. LETTER('t',M_NAME,0), // thorn
  145. LETTER('y',M_DIAERESIS,0),
  146. CAPITAL, // U+0100
  147. LETTER('a',M_MACRON,0),
  148. CAPITAL,
  149. LETTER('a',M_BREVE,0),
  150. CAPITAL,
  151. LETTER('a',M_OGONEK,0),
  152. CAPITAL,
  153. LETTER('c',M_ACUTE,0),
  154. CAPITAL,
  155. LETTER('c',M_CIRCUMFLEX,0),
  156. CAPITAL,
  157. LETTER('c',M_DOT_ABOVE,0),
  158. CAPITAL,
  159. LETTER('c',M_CARON,0),
  160. CAPITAL,
  161. LETTER('d',M_CARON,0),
  162. CAPITAL, // U+0110
  163. LETTER('d',M_STROKE,0),
  164. CAPITAL,
  165. LETTER('e',M_MACRON,0),
  166. CAPITAL,
  167. LETTER('e',M_BREVE,0),
  168. CAPITAL,
  169. LETTER('e',M_DOT_ABOVE,0),
  170. CAPITAL,
  171. LETTER('e',M_OGONEK,0),
  172. CAPITAL,
  173. LETTER('e',M_CARON,0),
  174. CAPITAL,
  175. LETTER('g',M_CIRCUMFLEX,0),
  176. CAPITAL,
  177. LETTER('g',M_BREVE,0),
  178. CAPITAL, // U+0120
  179. LETTER('g',M_DOT_ABOVE,0),
  180. CAPITAL,
  181. LETTER('g',M_CEDILLA,0),
  182. CAPITAL,
  183. LETTER('h',M_CIRCUMFLEX,0),
  184. CAPITAL,
  185. LETTER('h',M_STROKE,0),
  186. CAPITAL,
  187. LETTER('i',M_TILDE,0),
  188. CAPITAL,
  189. LETTER('i',M_MACRON,0),
  190. CAPITAL,
  191. LETTER('i',M_BREVE,0),
  192. CAPITAL,
  193. LETTER('i',M_OGONEK,0),
  194. CAPITAL, // U+0130
  195. LETTER('i',M_NAME,0), // dotless i
  196. CAPITAL,
  197. LIGATURE('i','j',0),
  198. CAPITAL,
  199. LETTER('j',M_CIRCUMFLEX,0),
  200. CAPITAL,
  201. LETTER('k',M_CEDILLA,0),
  202. LETTER('k',M_NAME,0), // kra
  203. CAPITAL,
  204. LETTER('l',M_ACUTE,0),
  205. CAPITAL,
  206. LETTER('l',M_CEDILLA,0),
  207. CAPITAL,
  208. LETTER('l',M_CARON,0),
  209. CAPITAL,
  210. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  211. CAPITAL,
  212. LETTER('l',M_STROKE,0),
  213. CAPITAL,
  214. LETTER('n',M_ACUTE,0),
  215. CAPITAL,
  216. LETTER('n',M_CEDILLA,0),
  217. CAPITAL,
  218. LETTER('n',M_CARON,0),
  219. LETTER('n',M_NAME,0), // apostrophe n
  220. CAPITAL,
  221. LETTER('n',M_NAME,0), // eng
  222. CAPITAL,
  223. LETTER('o',M_MACRON,0),
  224. CAPITAL,
  225. LETTER('o',M_BREVE,0),
  226. CAPITAL, // U+0150
  227. LETTER('o',M_DOUBLE_ACUTE,0),
  228. CAPITAL,
  229. LIGATURE('o','e',0),
  230. CAPITAL,
  231. LETTER('r',M_ACUTE,0),
  232. CAPITAL,
  233. LETTER('r',M_CEDILLA,0),
  234. CAPITAL,
  235. LETTER('r',M_CARON,0),
  236. CAPITAL,
  237. LETTER('s',M_ACUTE,0),
  238. CAPITAL,
  239. LETTER('s',M_CIRCUMFLEX,0),
  240. CAPITAL,
  241. LETTER('s',M_CEDILLA,0),
  242. CAPITAL, // U+0160
  243. LETTER('s',M_CARON,0),
  244. CAPITAL,
  245. LETTER('t',M_CEDILLA,0),
  246. CAPITAL,
  247. LETTER('t',M_CARON,0),
  248. CAPITAL,
  249. LETTER('t',M_STROKE,0),
  250. CAPITAL,
  251. LETTER('u',M_TILDE,0),
  252. CAPITAL,
  253. LETTER('u',M_MACRON,0),
  254. CAPITAL,
  255. LETTER('u',M_BREVE,0),
  256. CAPITAL,
  257. LETTER('u',M_RING,0),
  258. CAPITAL, // U+0170
  259. LETTER('u',M_DOUBLE_ACUTE,0),
  260. CAPITAL,
  261. LETTER('u',M_OGONEK,0),
  262. CAPITAL,
  263. LETTER('w',M_CIRCUMFLEX,0),
  264. CAPITAL,
  265. LETTER('y',M_CIRCUMFLEX,0),
  266. CAPITAL, // Y-DIAERESIS
  267. CAPITAL,
  268. LETTER('z',M_ACUTE,0),
  269. CAPITAL,
  270. LETTER('z',M_DOT_ABOVE,0),
  271. CAPITAL,
  272. LETTER('z',M_CARON,0),
  273. LETTER('s',M_NAME,0), // long-s // U+17f
  274. };
  275. // characters U+0250 to U+029F
  276. static const unsigned short letter_accents_250[] = {
  277. LETTER('a',M_TURNED,0), // U+250
  278. LETTER(L_ALPHA,0,0),
  279. LETTER(L_ALPHA,M_TURNED,0),
  280. LETTER('b',M_IMPLOSIVE,0),
  281. 0, // open-o
  282. LETTER('c',M_CURL,0),
  283. LETTER('d',M_RETROFLEX,0),
  284. LETTER('d',M_IMPLOSIVE,0),
  285. LETTER('e',M_REVERSED,0), // U+258
  286. 0, // schwa
  287. LETTER(L_SCHWA,M_HOOK,0),
  288. 0, // open-e
  289. LETTER(L_OPEN_E,M_REVERSED,0),
  290. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  291. 0,
  292. LETTER('j',M_BAR,0),
  293. LETTER('g',M_IMPLOSIVE,0), // U+260
  294. LETTER('g',0,0),
  295. LETTER('g',M_SMALLCAP,0),
  296. LETTER(L_GAMMA,0,0),
  297. 0, // ramshorn
  298. LETTER('h',M_TURNED,0),
  299. LETTER('h',M_HOOK,0),
  300. 0,
  301. LETTER('i',M_BAR,0), // U+268
  302. LETTER(L_IOTA,0,0),
  303. LETTER('i',M_SMALLCAP,0),
  304. LETTER('l',M_TILDE,0),
  305. LETTER('l',M_BAR,0),
  306. LETTER('l',M_RETROFLEX,0),
  307. LIGATURE('l','z',0),
  308. LETTER('m',M_TURNED,0),
  309. 0,
  310. LETTER('m',M_HOOK,0),
  311. 0,
  312. LETTER('n',M_RETROFLEX,0),
  313. LETTER('n',M_SMALLCAP,0),
  314. LETTER('o',M_BAR,0),
  315. LIGATURE('o','e',M_SMALLCAP),
  316. 0,
  317. LETTER(L_PHI,0,0), // U+278
  318. LETTER('r',M_TURNED,0),
  319. LETTER(L_RLONG,M_TURNED,0),
  320. LETTER('r',M_RETROFLEX,M_TURNED),
  321. 0,
  322. LETTER('r',M_RETROFLEX,0),
  323. 0, // r-tap
  324. LETTER(L_RTAP,M_REVERSED,0),
  325. LETTER('r',M_SMALLCAP,0), // U+280
  326. LETTER('r',M_TURNED,M_SMALLCAP),
  327. LETTER('s',M_RETROFLEX,0),
  328. 0, // esh
  329. LETTER('j',M_HOOK,0),
  330. LETTER(L_ESH,M_REVERSED,0),
  331. LETTER(L_ESH,M_CURL,0),
  332. LETTER('t',M_TURNED,0),
  333. LETTER('t',M_RETROFLEX,0), // U+288
  334. LETTER('u',M_BAR,0),
  335. LETTER(L_UPSILON,0,0),
  336. LETTER('v',M_HOOK,0),
  337. LETTER('v',M_TURNED,0),
  338. LETTER('w',M_TURNED,0),
  339. LETTER('y',M_TURNED,0),
  340. LETTER('y',M_SMALLCAP,0),
  341. LETTER('z',M_RETROFLEX,0), // U+290
  342. LETTER('z',M_CURL,0),
  343. 0, // ezh
  344. LETTER(L_EZH,M_CURL,0),
  345. 0, // glottal stop
  346. LETTER(L_GLOTTAL,M_REVERSED,0),
  347. LETTER(L_GLOTTAL,M_TURNED,0),
  348. 0,
  349. 0, // bilabial click // U+298
  350. LETTER('b',M_SMALLCAP,0),
  351. 0,
  352. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  353. LETTER('h',M_SMALLCAP,0),
  354. LETTER('j',M_CURL,0),
  355. LETTER('k',M_TURNED,0),
  356. LETTER('l',M_SMALLCAP,0),
  357. LETTER('q',M_HOOK,0), // U+2a0
  358. LETTER(L_GLOTTAL,M_STROKE,0),
  359. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  360. LIGATURE('d','z',0),
  361. 0, // dezh
  362. LIGATURE('d','z',M_CURL),
  363. LIGATURE('t','s',0),
  364. 0, // tesh
  365. LIGATURE('t','s',M_CURL),
  366. };
  367. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  368. {
  369. int len;
  370. char single_letter[10];
  371. single_letter[0] = 0;
  372. single_letter[1] = '_';
  373. len = utf8_out(letter, &single_letter[2]);
  374. single_letter[len+2] = ' ';
  375. single_letter[len+3] = 0;
  376. if(Lookup(tr, &single_letter[1], ph_buf) == 0)
  377. {
  378. single_letter[1] = ' ';
  379. if(Lookup(tr, &single_letter[2], ph_buf) == 0)
  380. {
  381. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
  382. }
  383. }
  384. return(ph_buf[0]);
  385. }
  386. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  387. {
  388. // lookup the character in the accents table
  389. int accent_data = 0;
  390. int accent1 = 0;
  391. int accent2 = 0;
  392. int flags1, flags2;
  393. int basic_letter;
  394. int letter2=0;
  395. char ph_letter1[30];
  396. char ph_letter2[30];
  397. char ph_accent1[30];
  398. char ph_accent2[30];
  399. ph_accent2[0] = 0;
  400. if((letter >= 0xe0) && (letter < 0x17f))
  401. {
  402. accent_data = letter_accents_0e0[letter - 0xe0];
  403. }
  404. else if((letter >= 0x250) && (letter <= 0x2a8))
  405. {
  406. accent_data = letter_accents_250[letter - 0x250];
  407. }
  408. if(accent_data != 0)
  409. {
  410. basic_letter = (accent_data & 0x3f) + 59;
  411. if(basic_letter < 'a')
  412. basic_letter = non_ascii_tab[basic_letter-59];
  413. if(accent_data & M_LIGATURE)
  414. {
  415. letter2 = (accent_data >> 6) & 0x3f;
  416. letter2 += 59;
  417. accent2 = (accent_data >> 12) & 0x7;
  418. }
  419. else
  420. {
  421. accent1 = (accent_data >> 6) & 0x1f;
  422. accent2 = (accent_data >> 11) & 0xf;
  423. }
  424. if((accent1==0) && !(accent_data & M_LIGATURE))
  425. {
  426. // just a letter name, not an accented character or ligature
  427. return;
  428. }
  429. if((flags1 = Lookup(tr, accents_tab[accent1].name, ph_accent1)) != 0)
  430. {
  431. if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
  432. {
  433. if(accent2 != 0)
  434. {
  435. flags2 = Lookup(tr, accents_tab[accent2].name, ph_accent2);
  436. if(flags2 & FLAG_ACCENT_BEFORE)
  437. {
  438. strcpy(ph_buf,ph_accent2);
  439. ph_buf += strlen(ph_buf);
  440. ph_accent2[0] = 0;
  441. }
  442. }
  443. if(letter2 != 0)
  444. {
  445. //ligature
  446. LookupLetter2(tr, letter2, ph_letter2);
  447. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  448. }
  449. else
  450. {
  451. if(accent1 == 0)
  452. strcpy(ph_buf, ph_letter1);
  453. else if((tr->langopts.accents & 1) || (flags1 & FLAG_ACCENT_BEFORE) || (accents_tab[accent1].accent_flags & 1))
  454. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  455. else
  456. sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  457. }
  458. }
  459. }
  460. }
  461. }
  462. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  463. {
  464. // control, bit 0: not the first letter of a word
  465. int len;
  466. static char single_letter[10] = {0,0};
  467. unsigned int dict_flags[2];
  468. char ph_buf3[40];
  469. ph_buf1[0] = 0;
  470. len = utf8_out(letter,&single_letter[2]);
  471. single_letter[len+2] = ' ';
  472. if(next_byte == -1)
  473. {
  474. // speaking normal text, not individual characters
  475. if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
  476. return;
  477. single_letter[1] = '_';
  478. if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
  479. return; // the character is specified as _* so ignore it when speaking normal text
  480. // check whether this character is specified for English
  481. if(tr->translator_name == L('e','n'))
  482. return; // we are already using English
  483. SetTranslator2("en");
  484. if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
  485. {
  486. // yes, switch to English and re-translate the word
  487. sprintf(ph_buf1,"%c",phonSWITCH);
  488. }
  489. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  490. return;
  491. }
  492. if((letter <= 32) || iswspace(letter))
  493. {
  494. // lookup space as _&32 etc.
  495. sprintf(&single_letter[1],"_#%d ",letter);
  496. Lookup(tr, &single_letter[1], ph_buf1);
  497. return;
  498. }
  499. if(next_byte != ' ')
  500. next_byte = RULE_SPELLING;
  501. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  502. single_letter[1] = '_';
  503. // if the $accent flag is set for this letter, use the accents table (below)
  504. dict_flags[1] = 0;
  505. if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
  506. {
  507. single_letter[1] = ' ';
  508. if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
  509. {
  510. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  511. }
  512. }
  513. if(ph_buf3[0] == 0)
  514. {
  515. LookupAccentedLetter(tr, letter, ph_buf3);
  516. }
  517. strcpy(ph_buf1, ph_buf3);
  518. if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  519. {
  520. return;
  521. }
  522. dict_flags[0] = 0;
  523. dict_flags[1] = 0;
  524. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  525. }
  526. // unicode ranges for non-ascii digits 0-9
  527. static const int number_ranges[] = {
  528. 0x660, 0x6f0, // arabic
  529. 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
  530. 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
  531. 0
  532. }; // these must be in ascending order
  533. int NonAsciiNumber(int letter)
  534. {
  535. // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
  536. const int *p;
  537. int base;
  538. for(p=number_ranges; (base = *p) != 0; p++)
  539. {
  540. if(letter < base)
  541. break; // not found
  542. if(letter < (base+10))
  543. return(letter-base+'0');
  544. }
  545. return(-1);
  546. }
  547. #define L_SUB 0x4000 // subscript
  548. #define L_SUP 0x8000 // superscript
  549. static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
  550. // this list must be in ascending order
  551. static unsigned short derived_letters[] = {
  552. 0x00aa, 'a'+L_SUP,
  553. 0x00b2, '2'+L_SUP,
  554. 0x00b3, '3'+L_SUP,
  555. 0x00b9, '1'+L_SUP,
  556. 0x00ba, 'o'+L_SUP,
  557. 0x02b0, 'h'+L_SUP,
  558. 0x02b1, 0x266+L_SUP,
  559. 0x02b2, 'j'+L_SUP,
  560. 0x02b3, 'r'+L_SUP,
  561. 0x02b4, 0x279+L_SUP,
  562. 0x02b5, 0x27b+L_SUP,
  563. 0x02b6, 0x281+L_SUP,
  564. 0x02b7, 'w'+L_SUP,
  565. 0x02b8, 'y'+L_SUP,
  566. 0x02c0, 0x294+L_SUP,
  567. 0x02c1, 0x295+L_SUP,
  568. 0x02e0, 0x263+L_SUP,
  569. 0x02e1, 'l'+L_SUP,
  570. 0x02e2, 's'+L_SUP,
  571. 0x02e3, 'x'+L_SUP,
  572. 0x2070, '0'+L_SUP,
  573. 0x2071, 'i'+L_SUP,
  574. 0x2074, '4'+L_SUP,
  575. 0x2075, '5'+L_SUP,
  576. 0x2076, '6'+L_SUP,
  577. 0x2077, '7'+L_SUP,
  578. 0x2078, '8'+L_SUP,
  579. 0x2079, '9'+L_SUP,
  580. 0x207a, '+'+L_SUP,
  581. 0x207b, '-'+L_SUP,
  582. 0x207c, '='+L_SUP,
  583. 0x207d, '('+L_SUP,
  584. 0x207e, ')'+L_SUP,
  585. 0x207f, 'n'+L_SUP,
  586. 0x2080, '0'+L_SUB,
  587. 0x2081, '1'+L_SUB,
  588. 0x2082, '2'+L_SUB,
  589. 0x2083, '3'+L_SUB,
  590. 0x2084, '4'+L_SUB,
  591. 0x2085, '5'+L_SUB,
  592. 0x2086, '6'+L_SUB,
  593. 0x2087, '7'+L_SUB,
  594. 0x2088, '8'+L_SUB,
  595. 0x2089, '9'+L_SUB,
  596. 0x208a, '+'+L_SUB,
  597. 0x208b, '-'+L_SUB,
  598. 0x208c, '='+L_SUB,
  599. 0x208d, '('+L_SUB,
  600. 0x208e, ')'+L_SUB,
  601. 0x2090, 'a'+L_SUB,
  602. 0x2091, 'e'+L_SUB,
  603. 0x2092, 'o'+L_SUB,
  604. 0x2093, 'x'+L_SUB,
  605. 0x2094, 0x259+L_SUB,
  606. 0x2095, 'h'+L_SUB,
  607. 0x2096, 'k'+L_SUB,
  608. 0x2097, 'l'+L_SUB,
  609. 0x2098, 'm'+L_SUB,
  610. 0x2099, 'n'+L_SUB,
  611. 0x209a, 'p'+L_SUB,
  612. 0x209b, 's'+L_SUB,
  613. 0x209c, 't'+L_SUB,
  614. 0,0
  615. };
  616. static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"}; // names, using phonemes available to all languages
  617. int IsSuperscript(int letter)
  618. {
  619. // is this a subscript or superscript letter ?
  620. int ix;
  621. int c;
  622. for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
  623. {
  624. if(c > letter)
  625. break;
  626. if(c == letter)
  627. return(derived_letters[ix+1]);
  628. }
  629. return(0);
  630. }
  631. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  632. {
  633. // get pronunciation for an isolated letter
  634. // return number of bytes used by the letter
  635. // control bit 0: a non-initial letter in a word
  636. // bit 1: say 'capital'
  637. // bit 2: say character code for unknown letters
  638. int n_bytes;
  639. int letter;
  640. int len;
  641. int ix;
  642. int c;
  643. char *p2;
  644. char *pbuf;
  645. const char *modifier;
  646. ALPHABET *alphabet;
  647. int al_offset;
  648. int al_flags;
  649. int language;
  650. int number;
  651. int phontab_1;
  652. int speak_letter_number;
  653. char capital[30];
  654. char ph_buf[80];
  655. char ph_buf2[80];
  656. char ph_alphabet[80];
  657. char hexbuf[12];
  658. static char pause_string[] = {phonPAUSE, 0};
  659. ph_buf[0] = 0;
  660. ph_alphabet[0] = 0;
  661. capital[0] = 0;
  662. phontab_1 = translator->phoneme_tab_ix;
  663. n_bytes = utf8_in(&letter,word);
  664. if((letter & 0xfff00) == 0x0e000)
  665. {
  666. letter &= 0xff; // uncode private usage area
  667. }
  668. if(control & 2)
  669. {
  670. // include CAPITAL information
  671. if(iswupper2(letter))
  672. {
  673. Lookup(tr, "_cap", capital);
  674. }
  675. }
  676. letter = towlower2(letter);
  677. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  678. if(ph_buf[0] == 0)
  679. {
  680. // is this a subscript or superscript letter ?
  681. if((c = IsSuperscript(letter)) != 0)
  682. {
  683. letter = c & 0x3fff;
  684. if((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL))
  685. {
  686. // don't say "superscript" during normal text reading
  687. Lookup(tr, modifier, capital);
  688. if(capital[0] == 0)
  689. {
  690. capital[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  691. Lookup(translator2, modifier, &capital[3]);
  692. if(capital[3] != 0)
  693. {
  694. capital[0] = phonPAUSE;
  695. capital[1] = phonSWITCH;
  696. len = strlen(&capital[3]);
  697. capital[len+3] = phonSWITCH;
  698. capital[len+4] = phontab_1;
  699. capital[len+5] = 0;
  700. }
  701. }
  702. }
  703. }
  704. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  705. }
  706. if(ph_buf[0] == phonSWITCH)
  707. {
  708. strcpy(phonemes,ph_buf);
  709. return(0);
  710. }
  711. if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
  712. {
  713. // convert a non-ascii number to 0-9
  714. LookupLetter(tr, number, 0, ph_buf, control & 1);
  715. }
  716. al_offset = 0;
  717. al_flags = 0;
  718. if((alphabet = AlphabetFromChar(letter)) != NULL)
  719. {
  720. al_offset = alphabet->offset;
  721. al_flags = alphabet->flags;
  722. }
  723. if(alphabet != current_alphabet)
  724. {
  725. // speak the name of the alphabet
  726. current_alphabet = alphabet;
  727. if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
  728. {
  729. if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
  730. {
  731. // don't say the alphabet name
  732. }
  733. else
  734. {
  735. ph_buf2[0] = 0;
  736. if(Lookup(translator, alphabet->name, ph_alphabet) == 0) // the original language for the current voice
  737. {
  738. // Can't find the local name for this alphabet, use the English name
  739. ph_alphabet[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  740. Lookup(translator2, alphabet->name, ph_buf2);
  741. }
  742. else if(translator != tr)
  743. {
  744. phontab_1 = tr->phoneme_tab_ix;
  745. strcpy(ph_buf2, ph_alphabet);
  746. ph_alphabet[2] = translator->phoneme_tab_ix;
  747. }
  748. if(ph_buf2[0] != 0)
  749. {
  750. // we used a different language for the alphabet name (now in ph_buf2)
  751. ph_alphabet[0] = phonPAUSE;
  752. ph_alphabet[1] = phonSWITCH;
  753. strcpy(&ph_alphabet[3], ph_buf2);
  754. len = strlen(ph_buf2) + 3;
  755. ph_alphabet[len] = phonSWITCH;
  756. ph_alphabet[len+1] = phontab_1;
  757. ph_alphabet[len+2] = 0;
  758. }
  759. }
  760. }
  761. }
  762. // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
  763. if(ph_buf[0] == 0)
  764. {
  765. if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
  766. language = translator->langopts.alt_alphabet_lang;
  767. else
  768. if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
  769. language = alphabet->language;
  770. else
  771. language = L('e','n');
  772. if((language != tr->translator_name) || (language == L('k','o')))
  773. {
  774. char *p3;
  775. int initial, code;
  776. char hangul_buf[12];
  777. // speak in the language for this alphabet (or English)
  778. ph_buf[2] = SetTranslator2(WordToString2(language));
  779. if(translator2 != NULL)
  780. {
  781. if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
  782. {
  783. // Special case for Korean letters.
  784. // break a syllable hangul into 2 or 3 individual jamo
  785. hangul_buf[0] = ' ';
  786. p3 = &hangul_buf[1];
  787. if((initial = (code/28)/21) != 11)
  788. {
  789. p3 += utf8_out(initial + 0x1100, p3);
  790. }
  791. utf8_out(((code/28) % 21) + 0x1161, p3); // medial
  792. utf8_out((code % 28) + 0x11a7, &p3[3]); // final
  793. p3[6] = ' ';
  794. p3[7] = 0;
  795. ph_buf[3] = 0;
  796. TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
  797. SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
  798. }
  799. else
  800. {
  801. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  802. }
  803. if(ph_buf[3] == phonSWITCH)
  804. {
  805. // another level of language change
  806. ph_buf[2] = SetTranslator2(&ph_buf[4]);
  807. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  808. }
  809. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  810. if(ph_buf[3] != 0)
  811. {
  812. ph_buf[0] = phonPAUSE;
  813. ph_buf[1] = phonSWITCH;
  814. len = strlen(&ph_buf[3]) + 3;
  815. ph_buf[len] = phonSWITCH; // switch back
  816. ph_buf[len+1] = tr->phoneme_tab_ix;
  817. ph_buf[len+2] = 0;
  818. }
  819. }
  820. }
  821. }
  822. if(ph_buf[0] == 0)
  823. {
  824. // character name not found
  825. if(ph_buf[0]== 0)
  826. {
  827. speak_letter_number = 1;
  828. if(!(al_flags & AL_NO_SYMBOL))
  829. {
  830. if(iswalpha2(letter))
  831. Lookup(translator, "_?A", ph_buf);
  832. if((ph_buf[0]==0) && !iswspace(letter))
  833. Lookup(translator, "_??", ph_buf);
  834. if(ph_buf[0] == 0)
  835. {
  836. EncodePhonemes("l'et@", ph_buf, NULL);
  837. }
  838. }
  839. if(!(control & 4) && (al_flags & AL_NOT_CODE))
  840. {
  841. // don't speak the character code number, unless we want full details of this character
  842. speak_letter_number = 0;
  843. }
  844. if(speak_letter_number)
  845. {
  846. if(al_offset == 0x2800)
  847. {
  848. // braille dots symbol, list the numbered dots
  849. p2 = hexbuf;
  850. for(ix=0; ix<8; ix++)
  851. {
  852. if(letter & (1 << ix))
  853. {
  854. *p2++ = '1'+ix;
  855. }
  856. }
  857. *p2 = 0;
  858. }
  859. else
  860. {
  861. // speak the hexadecimal number of the character code
  862. sprintf(hexbuf,"%x",letter);
  863. }
  864. pbuf = ph_buf;
  865. for(p2 = hexbuf; *p2 != 0; p2++)
  866. {
  867. pbuf += strlen(pbuf);
  868. *pbuf++ = phonPAUSE_VSHORT;
  869. LookupLetter(translator, *p2, 0, pbuf, 1);
  870. if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
  871. {
  872. // This language has no translation for 'a' to 'f', speak English names using base phonemes
  873. EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
  874. }
  875. }
  876. strcat(pbuf, pause_string);
  877. }
  878. }
  879. }
  880. len = strlen(phonemes);
  881. if(tr->langopts.accents & 2) // 'capital' before or after the word ?
  882. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
  883. else
  884. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  885. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  886. {
  887. strcpy(&phonemes[len],ph_buf2);
  888. }
  889. return(n_bytes);
  890. }
  891. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  892. {
  893. // Individual letter names, reduce the stress of some.
  894. int ix;
  895. unsigned int c;
  896. int n_stress=0;
  897. int prev = 0;
  898. int count;
  899. unsigned char buf[N_WORD_PHONEMES];
  900. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  901. {
  902. if((c == phonSTRESS_P) && (prev != phonSWITCH))
  903. {
  904. n_stress++;
  905. }
  906. buf[ix] = prev = c;
  907. }
  908. buf[ix] = 0;
  909. count = 0;
  910. prev = 0;
  911. for(ix=0; (c = buf[ix]) != 0; ix++)
  912. {
  913. if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
  914. {
  915. count++;
  916. if(tr->langopts.spelling_stress == 1)
  917. {
  918. // stress on initial letter when spelling
  919. if(count > 1)
  920. c = phonSTRESS_3;
  921. }
  922. else
  923. {
  924. if(count != n_stress)
  925. {
  926. if(((count % 3) != 0) || (count == n_stress-1))
  927. c = phonSTRESS_3; // reduce to secondary stress
  928. }
  929. }
  930. }
  931. else if(c == 0xff)
  932. {
  933. if((control < 2) || (ix==0))
  934. continue; // don't insert pauses
  935. if(control == 4)
  936. c = phonPAUSE; // pause after each character
  937. if(((count % 3) == 0) || (control > 2))
  938. c = phonPAUSE_NOLINK; // pause following a primary stress
  939. else
  940. c = phonPAUSE_VSHORT;
  941. }
  942. *phonemes++ = prev = c;
  943. }
  944. if(control >= 2)
  945. *phonemes++ = phonPAUSE_NOLINK;
  946. *phonemes = 0;
  947. }
  948. // Numbers
  949. static char ph_ordinal2[12];
  950. static char ph_ordinal2x[12];
  951. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  952. {
  953. int ordinal = 0;
  954. int c2;
  955. int nextflags;
  956. if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
  957. {
  958. if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
  959. {
  960. if(word_end[0] == '.')
  961. utf8_in(&c2, &word_end[2]);
  962. else
  963. utf8_in(&c2, &word_end[0]);
  964. if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
  965. {
  966. // ordinal number is indicated by dot after the number
  967. // but not if the next word starts with an upper-case letter
  968. // (c2 == 0) is for cases such as, "2.,"
  969. ordinal = 2;
  970. if(word_end[0] == '.')
  971. word_end[0] = ' ';
  972. if((roman==0) && (tr->translator_name == L('h','u')))
  973. {
  974. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  975. nextflags = 0;
  976. if(IsAlpha(c2))
  977. {
  978. nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
  979. }
  980. if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  981. ordinal = 0; // TEST 09.02.10
  982. if(nextflags & FLAG_ALT_TRANS)
  983. ordinal = 0;
  984. if(nextflags & FLAG_ALT3_TRANS)
  985. {
  986. if(word[-2] == '-')
  987. ordinal = 0; // eg. december 2-5. között
  988. if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  989. ordinal = 0x22;
  990. }
  991. }
  992. }
  993. }
  994. }
  995. return(ordinal);
  996. }
  997. static int hu_number_e(const char *word, int thousandplex, int value)
  998. {
  999. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  1000. if((word[0] == 'a') || (word[0] == 'e'))
  1001. {
  1002. if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  1003. return(0);
  1004. if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  1005. return(0); // 1000-el
  1006. return(1);
  1007. }
  1008. return(0);
  1009. }
  1010. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  1011. {
  1012. int c;
  1013. char *p;
  1014. const char *p2;
  1015. int acc;
  1016. int prev;
  1017. int value;
  1018. int subtract;
  1019. int repeat = 0;
  1020. int n_digits = 0;
  1021. char *word_start;
  1022. int num_control = 0;
  1023. unsigned int flags[2];
  1024. char ph_roman[30];
  1025. char number_chars[N_WORD_BYTES];
  1026. static const char *roman_numbers = "ixcmvld";
  1027. static int roman_values[] = {1,10,100,1000,5,50,500};
  1028. acc = 0;
  1029. prev = 0;
  1030. subtract = 0x7fff;
  1031. ph_out[0] = 0;
  1032. flags[0] = 0;
  1033. flags[1] = 0;
  1034. if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
  1035. return(0); // not '2xx'
  1036. if(word[1] == ' ')
  1037. {
  1038. if((tr->langopts.numbers & (NUM_ROMAN_CAPITALS | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT)) && (wtab[0].flags & FLAG_HAS_DOT))
  1039. {
  1040. // allow single letter Roman ordinal followed by dot.
  1041. }
  1042. else
  1043. return(0); // only one letter, don't speak as a Roman Number
  1044. }
  1045. word_start = word;
  1046. while((c = *word++) != ' ')
  1047. {
  1048. if((p2 = strchr(roman_numbers,c)) == NULL)
  1049. return(0);
  1050. value = roman_values[p2 - roman_numbers];
  1051. if(value == prev)
  1052. {
  1053. repeat++;
  1054. if(repeat >= 3)
  1055. return(0);
  1056. }
  1057. else
  1058. repeat = 0;
  1059. if((prev > 1) && (prev != 10) && (prev != 100))
  1060. {
  1061. if(value >= prev)
  1062. return(0);
  1063. }
  1064. if((prev != 0) && (prev < value))
  1065. {
  1066. if(((acc % 10) != 0) || ((prev*10) < value))
  1067. return(0);
  1068. subtract = prev;
  1069. value -= subtract;
  1070. }
  1071. else if(value >= subtract)
  1072. return(0);
  1073. else
  1074. acc += prev;
  1075. prev = value;
  1076. n_digits++;
  1077. }
  1078. if(IsDigit09(word[0]))
  1079. return(0); // eg. 'xx2'
  1080. acc += prev;
  1081. if(acc < tr->langopts.min_roman)
  1082. return(0);
  1083. if(acc > tr->langopts.max_roman)
  1084. return(0);
  1085. Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
  1086. p = &ph_out[0];
  1087. if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
  1088. {
  1089. strcpy(ph_out,ph_roman);
  1090. p = &ph_out[strlen(ph_roman)];
  1091. }
  1092. sprintf(number_chars," %d %s ",acc, tr->langopts.roman_suffix);
  1093. if(word[0] == '.')
  1094. {
  1095. // dot has not been removed. This implies that there was no space after it
  1096. return(0);
  1097. }
  1098. if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
  1099. wtab[0].flags |= FLAG_ORDINAL;
  1100. if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
  1101. {
  1102. if(tr->translator_name == L('h','u'))
  1103. {
  1104. if(!(wtab[0].flags & FLAG_ORDINAL))
  1105. {
  1106. if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
  1107. {
  1108. // should use the 'e' form of the number
  1109. num_control |= 1;
  1110. }
  1111. else
  1112. return(0);
  1113. }
  1114. }
  1115. else
  1116. {
  1117. wtab[0].flags |= FLAG_ORDINAL;
  1118. }
  1119. }
  1120. tr->prev_dict_flags[0] = 0;
  1121. tr->prev_dict_flags[1] = 0;
  1122. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  1123. if(tr->langopts.numbers & NUM_ROMAN_AFTER)
  1124. strcat(ph_out,ph_roman);
  1125. return(1);
  1126. }
  1127. static const char *M_Variant(int value)
  1128. {
  1129. // returns M, or perhaps MA or MB for some cases
  1130. int teens = 0;
  1131. if(((value % 100) > 10) && ((value % 100) < 20))
  1132. teens = 1;
  1133. switch((translator->langopts.numbers2 >> 6) & 0x7)
  1134. {
  1135. case 1: // lang=ru use singular for xx1 except for x11
  1136. if((teens == 0) && ((value % 10) == 1))
  1137. return("1M");
  1138. break;
  1139. case 2: // lang=cs,sk
  1140. if((value >= 2) && (value <= 4))
  1141. return("0MA");
  1142. break;
  1143. case 3: // lang=pl
  1144. if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  1145. return("0MA");
  1146. break;
  1147. case 4: // lang=lt
  1148. if((teens == 1) || ((value % 10) == 0))
  1149. return("0MB");
  1150. if((value % 10) == 1)
  1151. return("0MA");
  1152. break;
  1153. case 5: // lang=bs,hr,sr
  1154. if(teens == 0)
  1155. {
  1156. if((value % 10) == 1)
  1157. return("1M");
  1158. if(((value % 10) >= 2) && ((value % 10) <= 4))
  1159. return("0MA");
  1160. }
  1161. break;
  1162. }
  1163. return("0M");
  1164. }
  1165. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  1166. {
  1167. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  1168. int found;
  1169. int found_value=0;
  1170. char string[12];
  1171. char ph_of[12];
  1172. char ph_thousands[40];
  1173. char ph_buf[40];
  1174. ph_of[0] = 0;
  1175. // first look for a match with the exact value of thousands
  1176. if(value > 0)
  1177. {
  1178. if(thousands_exact & 1)
  1179. {
  1180. if(thousands_exact & 2)
  1181. {
  1182. // ordinal number
  1183. sprintf(string,"_%dM%do",value,thousandplex);
  1184. found_value = Lookup(tr, string, ph_thousands);
  1185. }
  1186. if(!found_value & (number_control & 1))
  1187. {
  1188. // look for the 'e' variant
  1189. sprintf(string,"_%dM%de",value,thousandplex);
  1190. found_value = Lookup(tr, string, ph_thousands);
  1191. }
  1192. if(!found_value)
  1193. {
  1194. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  1195. sprintf(string,"_%dM%dx",value,thousandplex);
  1196. found_value = Lookup(tr, string, ph_thousands);
  1197. }
  1198. }
  1199. if(found_value == 0)
  1200. {
  1201. sprintf(string,"_%dM%d",value,thousandplex);
  1202. found_value = Lookup(tr, string, ph_thousands);
  1203. }
  1204. }
  1205. if(found_value == 0)
  1206. {
  1207. if((value % 100) >= 20)
  1208. {
  1209. Lookup(tr, "_0of", ph_of);
  1210. }
  1211. found = 0;
  1212. if(thousands_exact & 1)
  1213. {
  1214. if(thousands_exact & 2)
  1215. {
  1216. // ordinal number
  1217. sprintf(string,"_%s%do",M_Variant(value), thousandplex);
  1218. found = Lookup(tr, string, ph_thousands);
  1219. }
  1220. if(!found && (number_control & 1))
  1221. {
  1222. // look for the 'e' variant
  1223. sprintf(string,"_%s%de",M_Variant(value), thousandplex);
  1224. found = Lookup(tr, string, ph_thousands);
  1225. }
  1226. if(!found)
  1227. {
  1228. // is there a different pronunciation if there are no hundreds,tens,or units ?
  1229. sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
  1230. found = Lookup(tr, string, ph_thousands);
  1231. }
  1232. }
  1233. if(found == 0)
  1234. {
  1235. sprintf(string,"_%s%d",M_Variant(value), thousandplex);
  1236. if(Lookup(tr, string, ph_thousands) == 0)
  1237. {
  1238. if(thousandplex > 3)
  1239. {
  1240. sprintf(string,"_0M%d", thousandplex-1);
  1241. if(Lookup(tr, string, ph_buf) == 0)
  1242. {
  1243. // say "millions" if this name is not available and neither is the next lower
  1244. Lookup(tr, "_0M2", ph_thousands);
  1245. speak_missing_thousands = 3;
  1246. }
  1247. }
  1248. if(ph_thousands[0] == 0)
  1249. {
  1250. // repeat "thousand" if higher order names are not available
  1251. sprintf(string,"_%dM1",value);
  1252. if((found_value = Lookup(tr, string, ph_thousands)) == 0)
  1253. Lookup(tr, "_0M1", ph_thousands);
  1254. speak_missing_thousands = 2;
  1255. }
  1256. }
  1257. }
  1258. }
  1259. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  1260. if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  1261. return(1);
  1262. return(found_value);
  1263. }
  1264. static int LookupNum2(Translator *tr, int value, int thousandplex, const int control, char *ph_out)
  1265. {
  1266. // Lookup a 2 digit number
  1267. // control bit 0: ordinal number
  1268. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  1269. // control bit 2: tens and units only, no higher digits
  1270. // control bit 3: use feminine form of '2' (for thousands
  1271. // control bit 4: speak zero tens
  1272. // control bit 5: variant of ordinal number (lang=hu)
  1273. // bit 8 followed by decimal fraction
  1274. // bit 9: use #f form for both tens and units (lang=ml)
  1275. int found;
  1276. int ix;
  1277. int units;
  1278. int tens;
  1279. int is_ordinal;
  1280. int used_and=0;
  1281. int found_ordinal = 0;
  1282. int next_phtype;
  1283. int ord_type = 'o';
  1284. char string[12]; // for looking up entries in *_list
  1285. char ph_ordinal[20];
  1286. char ph_tens[50];
  1287. char ph_digits[50];
  1288. char ph_and[12];
  1289. units = value % 10;
  1290. tens = value / 10;
  1291. found = 0;
  1292. ph_ordinal[0] = 0;
  1293. ph_tens[0] = 0;
  1294. ph_digits[0] = 0;
  1295. ph_and[0] = 0;
  1296. if(control & 0x20)
  1297. {
  1298. ord_type = 'q';
  1299. }
  1300. is_ordinal = control & 1;
  1301. if((control & 2) && (n_digit_lookup == 2))
  1302. {
  1303. // pronunciation of the final 2 digits has already been found
  1304. strcpy(ph_out, digit_lookup);
  1305. }
  1306. else
  1307. {
  1308. if(digit_lookup[0] == 0)
  1309. {
  1310. // is there a special pronunciation for this 2-digit number
  1311. if(control & 8)
  1312. {
  1313. // is there a feminine or thousands-variant form?
  1314. sprintf(string,"_%dfx",value);
  1315. if((found = Lookup(tr, string, ph_digits)) == 0)
  1316. {
  1317. sprintf(string,"_%df",value);
  1318. found = Lookup(tr, string, ph_digits);
  1319. }
  1320. }
  1321. else if(is_ordinal)
  1322. {
  1323. strcpy(ph_ordinal, ph_ordinal2);
  1324. if(control & 4)
  1325. {
  1326. sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1327. if((found = Lookup(tr, string, ph_digits)) != 0)
  1328. {
  1329. if(ph_ordinal2x[0] != 0)
  1330. strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
  1331. }
  1332. }
  1333. if(found == 0)
  1334. {
  1335. sprintf(string,"_%d%c",value,ord_type);
  1336. found = Lookup(tr, string, ph_digits);
  1337. }
  1338. found_ordinal = found;
  1339. }
  1340. if(found == 0)
  1341. {
  1342. if(control & 2)
  1343. {
  1344. // the final tens and units of a number
  1345. if(number_control & 1)
  1346. {
  1347. // look for 'e' variant
  1348. sprintf(string,"_%de",value);
  1349. found = Lookup(tr, string, ph_digits);
  1350. }
  1351. }
  1352. else
  1353. {
  1354. // followed by hundreds or thousands etc
  1355. if((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1356. sprintf(string, "_%do", value); // LANG=TA
  1357. else
  1358. sprintf(string, "_%da", value);
  1359. found = Lookup(tr, string, ph_digits);
  1360. }
  1361. if(!found)
  1362. {
  1363. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
  1364. {
  1365. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  1366. }
  1367. else
  1368. {
  1369. sprintf(string,"_%d",value);
  1370. found = Lookup(tr, string, ph_digits);
  1371. }
  1372. }
  1373. }
  1374. }
  1375. // no, speak as tens+units
  1376. if((value < 10) && (control & 0x10))
  1377. {
  1378. // speak leading zero
  1379. Lookup(tr, "_0", ph_tens);
  1380. }
  1381. else
  1382. {
  1383. if(found)
  1384. {
  1385. ph_tens[0] = 0;
  1386. }
  1387. else
  1388. {
  1389. if(is_ordinal)
  1390. {
  1391. sprintf(string,"_%dX%c", tens, ord_type);
  1392. if(Lookup(tr, string, ph_tens) != 0)
  1393. {
  1394. found_ordinal = 1;
  1395. if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1396. {
  1397. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1398. strcat(ph_tens, ph_ordinal2);
  1399. }
  1400. }
  1401. }
  1402. if(found_ordinal == 0)
  1403. {
  1404. if(control & 0x200)
  1405. sprintf(string, "_%dXf", tens);
  1406. else
  1407. sprintf(string,"_%dX", tens);
  1408. Lookup(tr, string, ph_tens);
  1409. }
  1410. if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
  1411. {
  1412. // tens not found, (for example) 73 is 60+13
  1413. units = (value % 20);
  1414. sprintf(string,"_%dX", tens & 0xfe);
  1415. Lookup(tr, string, ph_tens);
  1416. }
  1417. ph_digits[0] = 0;
  1418. if(units > 0)
  1419. {
  1420. found = 0;
  1421. if((control & 2) && (digit_lookup[0] != 0))
  1422. {
  1423. // we have an entry for this digit (possibly together with the next word)
  1424. strcpy(ph_digits, digit_lookup);
  1425. found_ordinal = 1;
  1426. ph_ordinal[0] = 0;
  1427. }
  1428. else
  1429. {
  1430. if(control & 8)
  1431. {
  1432. // is there a variant form of this number?
  1433. sprintf(string,"_%df",units);
  1434. found = Lookup(tr, string, ph_digits);
  1435. }
  1436. if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
  1437. {
  1438. // ordinal
  1439. sprintf(string,"_%d%c",units,ord_type);
  1440. if((found = Lookup(tr, string, ph_digits)) != 0)
  1441. {
  1442. found_ordinal = 1;
  1443. }
  1444. }
  1445. if(found == 0)
  1446. {
  1447. if((number_control & 1) && (control & 2))
  1448. {
  1449. // look for 'e' variant
  1450. sprintf(string,"_%de",units);
  1451. found = Lookup(tr, string, ph_digits);
  1452. }
  1453. else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
  1454. {
  1455. // followed by hundreds or thousands (or tens)
  1456. if((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1457. sprintf(string, "_%do", units); // LANG=TA, only for 100s, 1000s
  1458. else
  1459. sprintf(string, "_%da", units);
  1460. found = Lookup(tr, string, ph_digits);
  1461. }
  1462. }
  1463. if(found == 0)
  1464. {
  1465. sprintf(string,"_%d",units);
  1466. Lookup(tr, string, ph_digits);
  1467. }
  1468. }
  1469. }
  1470. }
  1471. }
  1472. if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
  1473. {
  1474. if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1475. Lookup(tr, "_ord20", ph_ordinal);
  1476. if(ph_ordinal[0] == 0)
  1477. Lookup(tr, "_ord", ph_ordinal);
  1478. }
  1479. if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
  1480. {
  1481. Lookup(tr, "_0and", ph_and);
  1482. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
  1483. ph_and[0] = 0;
  1484. if(tr->langopts.numbers & NUM_SWAP_TENS)
  1485. sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
  1486. else
  1487. sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
  1488. used_and = 1;
  1489. }
  1490. else
  1491. {
  1492. if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
  1493. {
  1494. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1495. if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
  1496. {
  1497. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1498. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1499. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1500. ph_tens[ix] = 0;
  1501. }
  1502. }
  1503. if((tr->langopts.numbers2 & NUM2_ORDINAL_DROP_VOWEL) && (ph_ordinal[0] != 0))
  1504. {
  1505. ix = sprintf(ph_out,"%s%s", ph_tens, ph_digits);
  1506. if((ix > 0) && (phoneme_tab[(unsigned char)(ph_out[ix-1])]->type == phVOWEL))
  1507. ix--;
  1508. sprintf(&ph_out[ix], "%s", ph_ordinal);
  1509. }
  1510. else
  1511. {
  1512. sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
  1513. }
  1514. }
  1515. }
  1516. if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
  1517. {
  1518. // only one primary stress, on the first part (tens)
  1519. found = 0;
  1520. for(ix=0; ix < (signed)strlen(ph_out); ix++)
  1521. {
  1522. if(ph_out[ix] == phonSTRESS_P)
  1523. {
  1524. if(found)
  1525. ph_out[ix] = phonSTRESS_3;
  1526. else
  1527. found = 1;
  1528. }
  1529. }
  1530. }
  1531. else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
  1532. {
  1533. // only one primary stress
  1534. found = 0;
  1535. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  1536. {
  1537. if(ph_out[ix] == phonSTRESS_P)
  1538. {
  1539. if(found)
  1540. ph_out[ix] = phonSTRESS_3;
  1541. else
  1542. found = 1;
  1543. }
  1544. }
  1545. }
  1546. return(used_and);
  1547. }
  1548. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1549. {
  1550. // Translate a 3 digit number
  1551. // control bit 0, previous thousands
  1552. // bit 1, ordinal number
  1553. // bit 5 variant form of ordinal number
  1554. // bit 8 followed by decimal fraction
  1555. int found;
  1556. int hundreds;
  1557. int tensunits;
  1558. int x;
  1559. int ix;
  1560. int exact;
  1561. int ordinal;
  1562. int tplex;
  1563. int say_zero_hundred=0;
  1564. int say_one_hundred;
  1565. char string[12]; // for looking up entries in **_list
  1566. char buf1[100];
  1567. char buf2[100];
  1568. char ph_100[20];
  1569. char ph_10T[20];
  1570. char ph_digits[50];
  1571. char ph_thousands[50];
  1572. char ph_hundred_and[12];
  1573. char ph_thousand_and[12];
  1574. ordinal = control & 0x22;
  1575. hundreds = value / 100;
  1576. tensunits = value % 100;
  1577. buf1[0] = 0;
  1578. ph_thousands[0] = 0;
  1579. ph_thousand_and[0] = 0;
  1580. if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1581. {
  1582. say_zero_hundred = 1; // lang=vi
  1583. }
  1584. if((hundreds > 0) || say_zero_hundred)
  1585. {
  1586. found = 0;
  1587. if(ordinal && (tensunits == 0))
  1588. {
  1589. // ordinal number, with no tens or units
  1590. found = Lookup(tr, "_0Co", ph_100);
  1591. }
  1592. if(found == 0)
  1593. {
  1594. if(tensunits==0)
  1595. {
  1596. // special form for exact hundreds?
  1597. found = Lookup(tr, "_0C0", ph_100);
  1598. }
  1599. if(!found)
  1600. {
  1601. Lookup(tr, "_0C", ph_100);
  1602. }
  1603. }
  1604. if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
  1605. {
  1606. // speak numbers such as 1984 as years: nineteen-eighty-four
  1607. }
  1608. else if(hundreds >= 10)
  1609. {
  1610. ph_digits[0] = 0;
  1611. exact = 0;
  1612. if ((value % 1000) == 0)
  1613. exact = 1;
  1614. tplex = thousandplex+1;
  1615. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1616. {
  1617. tplex = 0;
  1618. }
  1619. if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
  1620. {
  1621. x = 0;
  1622. if(tr->langopts.numbers2 & (1 << tplex))
  1623. x = 8; // use variant (feminine) for before thousands and millions
  1624. if(tr->translator_name == L('m','l'))
  1625. x = 0x208;
  1626. LookupNum2(tr, hundreds/10, thousandplex, x, ph_digits);
  1627. }
  1628. if(tr->langopts.numbers2 & 0x200)
  1629. sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD); // say "thousands" before its number, not after
  1630. else
  1631. sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
  1632. hundreds %= 10;
  1633. if((hundreds == 0) && (say_zero_hundred == 0))
  1634. ph_100[0] = 0;
  1635. suppress_null = 1;
  1636. control |= 1;
  1637. }
  1638. ph_digits[0] = 0;
  1639. if((hundreds > 0) || say_zero_hundred)
  1640. {
  1641. if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1642. {
  1643. Lookup(tr, "_0and", ph_thousand_and);
  1644. }
  1645. suppress_null = 1;
  1646. found = 0;
  1647. if((ordinal)
  1648. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
  1649. {
  1650. // ordinal number
  1651. sprintf(string, "_%dCo", hundreds);
  1652. found = Lookup(tr, string, ph_digits);
  1653. if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
  1654. {
  1655. // Use ordinal form of hundreds, as well as for tens and units
  1656. // Add ordinal suffix to the hundreds
  1657. strcat(ph_digits, ph_ordinal2);
  1658. }
  1659. }
  1660. if((hundreds == 0) && say_zero_hundred)
  1661. {
  1662. Lookup(tr, "_0", ph_digits);
  1663. }
  1664. else
  1665. {
  1666. if((hundreds==1) && (tr->langopts.numbers2 & NUM2_OMIT_1_HUNDRED_ONLY) && ((control & 1)==0))
  1667. {
  1668. // only look for special 100 if there are previous thousands
  1669. }
  1670. else
  1671. {
  1672. if((!found) && (tensunits == 0))
  1673. {
  1674. // is there a special pronunciation for exactly n00 ?
  1675. sprintf(string,"_%dC0",hundreds);
  1676. found = Lookup(tr, string, ph_digits);
  1677. }
  1678. if(!found)
  1679. {
  1680. sprintf(string,"_%dC",hundreds);
  1681. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1682. }
  1683. }
  1684. if(found)
  1685. {
  1686. ph_100[0] = 0;
  1687. }
  1688. else
  1689. {
  1690. say_one_hundred = 1;
  1691. if(hundreds == 1)
  1692. {
  1693. if((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) != 0)
  1694. say_one_hundred = 0;
  1695. }
  1696. if(say_one_hundred != 0)
  1697. {
  1698. LookupNum2(tr, hundreds, thousandplex, 0, ph_digits);
  1699. }
  1700. }
  1701. }
  1702. }
  1703. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  1704. }
  1705. ph_hundred_and[0] = 0;
  1706. if(tensunits > 0)
  1707. {
  1708. if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1709. {
  1710. // Don't use "and" if we apply ordinal to both hundreds and units
  1711. }
  1712. else
  1713. {
  1714. if((value > 100) || ((control & 1) && (thousandplex==0)))
  1715. {
  1716. if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1717. {
  1718. Lookup(tr, "_0and", ph_hundred_and);
  1719. }
  1720. }
  1721. if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1722. {
  1723. Lookup(tr, "_0and", ph_hundred_and);
  1724. }
  1725. }
  1726. }
  1727. buf2[0] = 0;
  1728. if((tensunits != 0) || (suppress_null == 0))
  1729. {
  1730. x = 0;
  1731. if(thousandplex==0)
  1732. {
  1733. x = 2; // allow "eins" for 1 rather than "ein"
  1734. if(ordinal)
  1735. x = 3; // ordinal number
  1736. if((value < 100) && !(control & 1))
  1737. x |= 4; // tens and units only, no higher digits
  1738. if(ordinal & 0x20)
  1739. x |= 0x20; // variant form of ordinal number
  1740. }
  1741. else
  1742. {
  1743. if(tr->langopts.numbers2 & (1 << thousandplex))
  1744. x = 8; // use variant (feminine) for before thousands and millions
  1745. }
  1746. if((tr->translator_name == L('m','l')) && (thousandplex == 1))
  1747. {
  1748. x |= 0x208; // use #f form for both tens and units
  1749. }
  1750. if((tr->langopts.numbers2 & NUM2_ZERO_TENS) && ((control & 1) || (hundreds > 0)))
  1751. {
  1752. // LANG=zh,
  1753. x |= 0x10;
  1754. }
  1755. if(LookupNum2(tr, tensunits, thousandplex, x | (control & 0x100), buf2) != 0)
  1756. {
  1757. if(tr->langopts.numbers & NUM_SINGLE_AND)
  1758. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1759. }
  1760. }
  1761. else
  1762. {
  1763. if(ph_ordinal2[0] != 0)
  1764. {
  1765. ix = strlen(buf1);
  1766. if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1767. buf1[ix-1] = 0; // remove pause before addding ordinal suffix
  1768. strcpy(buf2, ph_ordinal2);
  1769. }
  1770. }
  1771. sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
  1772. return(0);
  1773. }
  1774. bool CheckThousandsGroup(char *word, int group_len)
  1775. {
  1776. // Is this a group of 3 digits which looks like a thousands group?
  1777. int ix;
  1778. if(IsDigit09(word[group_len]) || IsDigit09(-1))
  1779. return(false);
  1780. for(ix=0; ix < group_len; ix++)
  1781. {
  1782. if(!IsDigit09(word[ix]))
  1783. return(false);
  1784. }
  1785. return(true);
  1786. }
  1787. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1788. {
  1789. // Number translation with various options
  1790. // the "word" may be up to 4 digits
  1791. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1792. int n_digits;
  1793. int value;
  1794. int ix;
  1795. int digix;
  1796. unsigned char c;
  1797. int suppress_null = 0;
  1798. int decimal_point = 0;
  1799. int thousandplex = 0;
  1800. int thousands_exact = 1;
  1801. int thousands_inc = 0;
  1802. int prev_thousands = 0;
  1803. int ordinal = 0;
  1804. int this_value;
  1805. int decimal_count;
  1806. int max_decimal_count;
  1807. int decimal_mode;
  1808. int suffix_ix;
  1809. int skipwords = 0;
  1810. int group_len;
  1811. int len;
  1812. char *p;
  1813. char string[32]; // for looking up entries in **_list
  1814. char buf1[100];
  1815. char ph_append[50];
  1816. char ph_buf[200];
  1817. char ph_buf2[50];
  1818. char ph_zeros[50];
  1819. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1820. char buf_digit_lookup[50];
  1821. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1822. *flags = 0;
  1823. n_digit_lookup = 0;
  1824. buf_digit_lookup[0] = 0;
  1825. digit_lookup = buf_digit_lookup;
  1826. number_control = control;
  1827. for(ix=0; IsDigit09(word[ix]); ix++);
  1828. n_digits = ix;
  1829. value = this_value = atoi(word);
  1830. group_len = 3;
  1831. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1832. group_len = 4;
  1833. // is there a previous thousands part (as a previous "word") ?
  1834. if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
  1835. {
  1836. prev_thousands = 1;
  1837. }
  1838. else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
  1839. {
  1840. // thousands groups can be separated by spaces
  1841. if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
  1842. {
  1843. prev_thousands = 1;
  1844. }
  1845. }
  1846. if(prev_thousands == 0)
  1847. {
  1848. speak_missing_thousands = 0;
  1849. }
  1850. ph_ordinal2[0] = 0;
  1851. ph_zeros[0] = 0;
  1852. if(prev_thousands || (word[0] != '0'))
  1853. {
  1854. // don't check for ordinal if the number has a leading zero
  1855. ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0);
  1856. }
  1857. if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
  1858. {
  1859. // remove dot unless followed by another number
  1860. word[ix] = 0;
  1861. }
  1862. if((ordinal == 0) || (tr->translator_name == L('h','u')))
  1863. {
  1864. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1865. // look for an ordinal number suffix after the number
  1866. ix++;
  1867. p = suffix;
  1868. if(wtab[0].flags & FLAG_HYPHEN_AFTER)
  1869. {
  1870. *p++ = '-';
  1871. ix++;
  1872. }
  1873. while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1874. {
  1875. *p++ = word[ix++];
  1876. }
  1877. *p = 0;
  1878. if(suffix[0] != 0)
  1879. {
  1880. if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1881. {
  1882. ordinal = 2;
  1883. }
  1884. else if(!IsDigit09(suffix[0])) // not _#9 (tab)
  1885. {
  1886. sprintf(string,"_#%s",suffix);
  1887. if(Lookup(tr, string, ph_ordinal2))
  1888. {
  1889. // this is an ordinal suffix
  1890. ordinal = 2;
  1891. flags[0] |= FLAG_SKIPWORDS;
  1892. skipwords = 1;
  1893. sprintf(string,"_x#%s",suffix);
  1894. Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
  1895. }
  1896. }
  1897. }
  1898. }
  1899. if(wtab[0].flags & FLAG_ORDINAL)
  1900. ordinal = 2;
  1901. ph_append[0] = 0;
  1902. ph_buf2[0] = 0;
  1903. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
  1904. {
  1905. if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
  1906. {
  1907. // looks like a time 02:30, omit the leading zero
  1908. }
  1909. else
  1910. {
  1911. if(n_digits > 3)
  1912. {
  1913. flags[0] &= ~FLAG_SKIPWORDS;
  1914. return(0); // long number string with leading zero, speak as individual digits
  1915. }
  1916. // speak leading zeros
  1917. for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1918. {
  1919. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1920. }
  1921. }
  1922. }
  1923. if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1924. thousands_inc = 1;
  1925. else if(word[n_digits] == tr->langopts.thousands_sep)
  1926. thousands_inc = 2;
  1927. suffix_ix = n_digits+2;
  1928. if(thousands_inc > 0)
  1929. {
  1930. // if the following "words" are three-digit groups, count them and add
  1931. // a "thousand"/"million" suffix to this one
  1932. digix = n_digits + thousands_inc;
  1933. while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
  1934. {
  1935. for(ix=0; ix<group_len; ix++)
  1936. {
  1937. if(word[digix+ix] != '0')
  1938. {
  1939. thousands_exact = 0;
  1940. break;
  1941. }
  1942. }
  1943. thousandplex++;
  1944. digix += group_len;
  1945. if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
  1946. {
  1947. suffix_ix = digix+2;
  1948. digix += thousands_inc;
  1949. }
  1950. else
  1951. break;
  1952. }
  1953. }
  1954. if((value == 0) && prev_thousands)
  1955. {
  1956. suppress_null = 1;
  1957. }
  1958. if(tr->translator_name == L('h','u'))
  1959. {
  1960. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1961. if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1962. {
  1963. number_control |= 1; // use _1e variant of number
  1964. }
  1965. }
  1966. if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  1967. {
  1968. // this "word" ends with a decimal point
  1969. Lookup(tr, "_dpt", ph_append);
  1970. decimal_point = 0x100;
  1971. }
  1972. else if(suppress_null == 0)
  1973. {
  1974. if(thousands_inc > 0)
  1975. {
  1976. if(thousandplex > 0)
  1977. {
  1978. if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
  1979. {
  1980. // found an exact match for N thousand
  1981. value = 0;
  1982. suppress_null = 1;
  1983. }
  1984. }
  1985. }
  1986. }
  1987. else
  1988. if(speak_missing_thousands == 1)
  1989. {
  1990. // speak this thousandplex if there was no word for the previous thousandplex
  1991. sprintf(string,"_0M%d",thousandplex+1);
  1992. if(Lookup(tr, string, buf1)==0)
  1993. {
  1994. sprintf(string,"_0M%d",thousandplex);
  1995. Lookup(tr, string, ph_append);
  1996. }
  1997. }
  1998. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1999. {
  2000. Lookup(tr, "_.", ph_append);
  2001. }
  2002. if(thousandplex == 0)
  2003. {
  2004. char *p2;
  2005. // look for combinations of the number with the next word
  2006. p = word;
  2007. while(IsDigit09(p[1])) p++; // just use the last digit
  2008. if(IsDigit09(p[-1]))
  2009. {
  2010. p2 = p - 1;
  2011. if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  2012. {
  2013. n_digit_lookup = 2;
  2014. }
  2015. }
  2016. if((buf_digit_lookup[0] == 0) && (*p != '0'))
  2017. {
  2018. // LANG=hu ?
  2019. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  2020. if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  2021. {
  2022. n_digit_lookup = 1;
  2023. }
  2024. }
  2025. if(prev_thousands == 0)
  2026. {
  2027. if((decimal_point == 0) && (ordinal == 0))
  2028. {
  2029. // Look for special pronunciation for this number in isolation (LANG=kl)
  2030. sprintf(string, "_%dn", value);
  2031. if(Lookup(tr, string, ph_out))
  2032. {
  2033. return(1);
  2034. }
  2035. }
  2036. if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
  2037. {
  2038. // LANG=si, say "percent" before the number
  2039. p2 = word;
  2040. while((*p2 != ' ') && (*p2 != 0))
  2041. {
  2042. p2++;
  2043. }
  2044. if(p2[1] == '%')
  2045. {
  2046. Lookup(tr, "%", ph_out);
  2047. ph_out += strlen(ph_out);
  2048. p2[1] = ' ';
  2049. }
  2050. }
  2051. }
  2052. }
  2053. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  2054. if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  2055. sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf); // say "thousands" before its number
  2056. else
  2057. sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
  2058. while(decimal_point)
  2059. {
  2060. n_digits++;
  2061. decimal_count = 0;
  2062. while(IsDigit09(word[n_digits+decimal_count]))
  2063. decimal_count++;
  2064. max_decimal_count = 2;
  2065. switch(decimal_mode = (tr->langopts.numbers & 0xe000))
  2066. {
  2067. case NUM_DFRACTION_4:
  2068. max_decimal_count = 5;
  2069. case NUM_DFRACTION_2:
  2070. // French/Polish decimal fraction
  2071. while(word[n_digits] == '0')
  2072. {
  2073. Lookup(tr, "_0", buf1);
  2074. strcat(ph_out,buf1);
  2075. decimal_count--;
  2076. n_digits++;
  2077. }
  2078. if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
  2079. {
  2080. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2081. strcat(ph_out,buf1);
  2082. n_digits += decimal_count;
  2083. }
  2084. break;
  2085. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  2086. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  2087. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  2088. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
  2089. if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
  2090. {
  2091. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  2092. sprintf(string,"_0Z%d",decimal_count);
  2093. if(Lookup(tr, string, buf1) == 0)
  2094. break; // revert to speaking single digits
  2095. if(decimal_mode == NUM_DFRACTION_6)
  2096. strcat(ph_out, buf1);
  2097. else
  2098. strcat(ph_buf, buf1);
  2099. }
  2100. strcat(ph_out,ph_buf);
  2101. n_digits += decimal_count;
  2102. break;
  2103. case NUM_DFRACTION_3:
  2104. // Romanian decimal fractions
  2105. if((decimal_count <= 4) && (word[n_digits] != '0'))
  2106. {
  2107. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2108. strcat(ph_out,buf1);
  2109. n_digits += decimal_count;
  2110. }
  2111. break;
  2112. case NUM_DFRACTION_7:
  2113. // alternative form of decimal fraction digits, except the final digit
  2114. while(decimal_count-- > 1)
  2115. {
  2116. sprintf(string,"_%cd", word[n_digits]);
  2117. if(Lookup(tr, string, buf1) == 0)
  2118. break;
  2119. n_digits++;
  2120. strcat(ph_out, buf1);
  2121. }
  2122. }
  2123. while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  2124. {
  2125. // speak any remaining decimal fraction digits individually
  2126. value = word[n_digits++] - '0';
  2127. LookupNum2(tr, value, 0, 2, buf1);
  2128. len = strlen(ph_out);
  2129. sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
  2130. }
  2131. // something after the decimal part ?
  2132. if(Lookup(tr, "_dpt2", buf1))
  2133. strcat(ph_out,buf1);
  2134. if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  2135. {
  2136. Lookup(tr, "_dpt", buf1);
  2137. strcat(ph_out,buf1);
  2138. }
  2139. else
  2140. {
  2141. decimal_point = 0;
  2142. }
  2143. }
  2144. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  2145. {
  2146. int next_char;
  2147. char *p;
  2148. p = &word[n_digits+1];
  2149. p += utf8_in(&next_char,p);
  2150. if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  2151. utf8_in(&next_char,p);
  2152. if(!iswalpha2(next_char) && (thousands_exact==0))
  2153. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  2154. }
  2155. *flags |= FLAG_FOUND;
  2156. speak_missing_thousands--;
  2157. if(skipwords)
  2158. dictionary_skipwords = skipwords;
  2159. return(1);
  2160. }
  2161. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  2162. {
  2163. if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  2164. return(0); // speak digits individually
  2165. if(tr->langopts.numbers != 0)
  2166. {
  2167. return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
  2168. }
  2169. return(0);
  2170. }