eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.cpp 50KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2011 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #ifdef ANDROID
  25. #include "android_wchar.h"
  26. #else
  27. #include <wctype.h>
  28. #include <wchar.h>
  29. #endif
  30. #include "speak_lib.h"
  31. #include "speech.h"
  32. #include "phoneme.h"
  33. #include "synthesize.h"
  34. #include "voice.h"
  35. #include "translate.h"
  36. #define M_NAME 0
  37. #define M_SMALLCAP 1
  38. #define M_TURNED 2
  39. #define M_REVERSED 3
  40. #define M_CURL 4
  41. #define M_ACUTE 5
  42. #define M_BREVE 6
  43. #define M_CARON 7
  44. #define M_CEDILLA 8
  45. #define M_CIRCUMFLEX 9
  46. #define M_DIAERESIS 10
  47. #define M_DOUBLE_ACUTE 11
  48. #define M_DOT_ABOVE 12
  49. #define M_GRAVE 13
  50. #define M_MACRON 14
  51. #define M_OGONEK 15
  52. #define M_RING 16
  53. #define M_STROKE 17
  54. #define M_TILDE 18
  55. #define M_BAR 19
  56. #define M_RETROFLEX 20
  57. #define M_HOOK 21
  58. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  59. #define M_IMPLOSIVE M_HOOK
  60. static int n_digit_lookup;
  61. static char *digit_lookup;
  62. static int speak_missing_thousands;
  63. static int number_control;
  64. typedef struct {
  65. const char *name;
  66. int flags;
  67. } ACCENTS;
  68. // these are tokens to look up in the *_list file.
  69. static ACCENTS accents_tab[] = {
  70. {"_lig", 1},
  71. {"_smc", 1}, // smallcap
  72. {"_tur", 1}, // turned
  73. {"_rev", 1}, // reversed
  74. {"_crl", 0}, // curl
  75. {"_acu", 0}, // acute
  76. {"_brv", 0}, // breve
  77. {"_hac", 0}, // caron/hacek
  78. {"_ced", 0}, // cedilla
  79. {"_cir", 0}, // circumflex
  80. {"_dia", 0}, // diaeresis
  81. {"_ac2", 0}, // double acute
  82. {"_dot", 0}, // dot
  83. {"_grv", 0}, // grave
  84. {"_mcn", 0}, // macron
  85. {"_ogo", 0}, // ogonek
  86. {"_rng", 0}, // ring
  87. {"_stk", 0}, // stroke
  88. {"_tld", 0}, // tilde
  89. {"_bar", 0}, // bar
  90. {"_rfx", 0}, // retroflex
  91. {"_hok", 0}, // hook
  92. };
  93. #define CAPITAL 0
  94. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  95. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
  96. #define L_ALPHA 60 // U+3B1
  97. #define L_SCHWA 61 // U+259
  98. #define L_OPEN_E 62 // U+25B
  99. #define L_GAMMA 63 // U+3B3
  100. #define L_IOTA 64 // U+3B9
  101. #define L_OE 65 // U+153
  102. #define L_OMEGA 66 // U+3C9
  103. #define L_PHI 67 // U+3C6
  104. #define L_ESH 68 // U+283
  105. #define L_UPSILON 69 // U+3C5
  106. #define L_EZH 70 // U+292
  107. #define L_GLOTTAL 71 // U+294
  108. #define L_RTAP 72 // U+27E
  109. static const short non_ascii_tab[] = {
  110. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  111. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };
  112. // characters U+00e0 to U+017f
  113. static const unsigned short letter_accents_0e0[] = {
  114. LETTER('a',M_GRAVE,0), // U+00e0
  115. LETTER('a',M_ACUTE,0),
  116. LETTER('a',M_CIRCUMFLEX,0),
  117. LETTER('a',M_TILDE,0),
  118. LETTER('a',M_DIAERESIS,0),
  119. LETTER('a',M_RING,0),
  120. LIGATURE('a','e',0),
  121. LETTER('c',M_CEDILLA,0),
  122. LETTER('e',M_GRAVE,0),
  123. LETTER('e',M_ACUTE,0),
  124. LETTER('e',M_CIRCUMFLEX,0),
  125. LETTER('e',M_DIAERESIS,0),
  126. LETTER('i',M_GRAVE,0),
  127. LETTER('i',M_ACUTE,0),
  128. LETTER('i',M_CIRCUMFLEX,0),
  129. LETTER('i',M_DIAERESIS,0),
  130. LETTER('d',M_NAME,0), // eth // U+00f0
  131. LETTER('n',M_TILDE,0),
  132. LETTER('o',M_GRAVE,0),
  133. LETTER('o',M_ACUTE,0),
  134. LETTER('o',M_CIRCUMFLEX,0),
  135. LETTER('o',M_TILDE,0),
  136. LETTER('o',M_DIAERESIS,0),
  137. 0, // division sign
  138. LETTER('o',M_STROKE,0),
  139. LETTER('u',M_GRAVE,0),
  140. LETTER('u',M_ACUTE,0),
  141. LETTER('u',M_CIRCUMFLEX,0),
  142. LETTER('u',M_DIAERESIS,0),
  143. LETTER('y',M_ACUTE,0),
  144. LETTER('t',M_NAME,0), // thorn
  145. LETTER('y',M_DIAERESIS,0),
  146. CAPITAL, // U+0100
  147. LETTER('a',M_MACRON,0),
  148. CAPITAL,
  149. LETTER('a',M_BREVE,0),
  150. CAPITAL,
  151. LETTER('a',M_OGONEK,0),
  152. CAPITAL,
  153. LETTER('c',M_ACUTE,0),
  154. CAPITAL,
  155. LETTER('c',M_CIRCUMFLEX,0),
  156. CAPITAL,
  157. LETTER('c',M_DOT_ABOVE,0),
  158. CAPITAL,
  159. LETTER('c',M_CARON,0),
  160. CAPITAL,
  161. LETTER('d',M_CARON,0),
  162. CAPITAL, // U+0110
  163. LETTER('d',M_STROKE,0),
  164. CAPITAL,
  165. LETTER('e',M_MACRON,0),
  166. CAPITAL,
  167. LETTER('e',M_BREVE,0),
  168. CAPITAL,
  169. LETTER('e',M_DOT_ABOVE,0),
  170. CAPITAL,
  171. LETTER('e',M_OGONEK,0),
  172. CAPITAL,
  173. LETTER('e',M_CARON,0),
  174. CAPITAL,
  175. LETTER('g',M_CIRCUMFLEX,0),
  176. CAPITAL,
  177. LETTER('g',M_BREVE,0),
  178. CAPITAL, // U+0120
  179. LETTER('g',M_DOT_ABOVE,0),
  180. CAPITAL,
  181. LETTER('g',M_CEDILLA,0),
  182. CAPITAL,
  183. LETTER('h',M_CIRCUMFLEX,0),
  184. CAPITAL,
  185. LETTER('h',M_STROKE,0),
  186. CAPITAL,
  187. LETTER('i',M_TILDE,0),
  188. CAPITAL,
  189. LETTER('i',M_MACRON,0),
  190. CAPITAL,
  191. LETTER('i',M_BREVE,0),
  192. CAPITAL,
  193. LETTER('i',M_OGONEK,0),
  194. CAPITAL, // U+0130
  195. LETTER('i',M_NAME,0), // dotless i
  196. CAPITAL,
  197. LIGATURE('i','j',0),
  198. CAPITAL,
  199. LETTER('j',M_CIRCUMFLEX,0),
  200. CAPITAL,
  201. LETTER('k',M_CEDILLA,0),
  202. LETTER('k',M_NAME,0), // kra
  203. CAPITAL,
  204. LETTER('l',M_ACUTE,0),
  205. CAPITAL,
  206. LETTER('l',M_CEDILLA,0),
  207. CAPITAL,
  208. LETTER('l',M_CARON,0),
  209. CAPITAL,
  210. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  211. CAPITAL,
  212. LETTER('l',M_STROKE,0),
  213. CAPITAL,
  214. LETTER('n',M_ACUTE,0),
  215. CAPITAL,
  216. LETTER('n',M_CEDILLA,0),
  217. CAPITAL,
  218. LETTER('n',M_CARON,0),
  219. LETTER('n',M_NAME,0), // apostrophe n
  220. CAPITAL,
  221. LETTER('n',M_NAME,0), // eng
  222. CAPITAL,
  223. LETTER('o',M_MACRON,0),
  224. CAPITAL,
  225. LETTER('o',M_BREVE,0),
  226. CAPITAL, // U+0150
  227. LETTER('o',M_DOUBLE_ACUTE,0),
  228. CAPITAL,
  229. LIGATURE('o','e',0),
  230. CAPITAL,
  231. LETTER('r',M_ACUTE,0),
  232. CAPITAL,
  233. LETTER('r',M_CEDILLA,0),
  234. CAPITAL,
  235. LETTER('r',M_CARON,0),
  236. CAPITAL,
  237. LETTER('s',M_ACUTE,0),
  238. CAPITAL,
  239. LETTER('s',M_CIRCUMFLEX,0),
  240. CAPITAL,
  241. LETTER('s',M_CEDILLA,0),
  242. CAPITAL, // U+0160
  243. LETTER('s',M_CARON,0),
  244. CAPITAL,
  245. LETTER('t',M_CEDILLA,0),
  246. CAPITAL,
  247. LETTER('t',M_CARON,0),
  248. CAPITAL,
  249. LETTER('t',M_STROKE,0),
  250. CAPITAL,
  251. LETTER('u',M_TILDE,0),
  252. CAPITAL,
  253. LETTER('u',M_MACRON,0),
  254. CAPITAL,
  255. LETTER('u',M_BREVE,0),
  256. CAPITAL,
  257. LETTER('u',M_RING,0),
  258. CAPITAL, // U+0170
  259. LETTER('u',M_DOUBLE_ACUTE,0),
  260. CAPITAL,
  261. LETTER('u',M_OGONEK,0),
  262. CAPITAL,
  263. LETTER('w',M_CIRCUMFLEX,0),
  264. CAPITAL,
  265. LETTER('y',M_CIRCUMFLEX,0),
  266. CAPITAL, // Y-DIAERESIS
  267. CAPITAL,
  268. LETTER('z',M_ACUTE,0),
  269. CAPITAL,
  270. LETTER('z',M_DOT_ABOVE,0),
  271. CAPITAL,
  272. LETTER('z',M_CARON,0),
  273. LETTER('s',M_NAME,0), // long-s // U+17f
  274. };
  275. // characters U+0250 to U+029F
  276. static const unsigned short letter_accents_250[] = {
  277. LETTER('a',M_TURNED,0), // U+250
  278. LETTER(L_ALPHA,0,0),
  279. LETTER(L_ALPHA,M_TURNED,0),
  280. LETTER('b',M_IMPLOSIVE,0),
  281. 0, // open-o
  282. LETTER('c',M_CURL,0),
  283. LETTER('d',M_RETROFLEX,0),
  284. LETTER('d',M_IMPLOSIVE,0),
  285. LETTER('e',M_REVERSED,0), // U+258
  286. 0, // schwa
  287. LETTER(L_SCHWA,M_HOOK,0),
  288. 0, // open-e
  289. LETTER(L_OPEN_E,M_REVERSED,0),
  290. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  291. 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
  292. LETTER('j',M_BAR,0),
  293. LETTER('g',M_IMPLOSIVE,0), // U+260
  294. LETTER('g',0,0),
  295. LETTER('g',M_SMALLCAP,0),
  296. LETTER(L_GAMMA,0,0),
  297. 0, // ramshorn
  298. LETTER('h',M_TURNED,0),
  299. LETTER('h',M_HOOK,0),
  300. 0,//LETTER(L_HENG,M_HOOK,0),
  301. LETTER('i',M_BAR,0), // U+268
  302. LETTER(L_IOTA,0,0),
  303. LETTER('i',M_SMALLCAP,0),
  304. LETTER('l',M_TILDE,0),
  305. LETTER('l',M_BAR,0),
  306. LETTER('l',M_RETROFLEX,0),
  307. LIGATURE('l','z',0),
  308. LETTER('m',M_TURNED,0),
  309. 0,//LETTER('m',M_TURNED,M_LEG), // U+270
  310. LETTER('m',M_HOOK,0),
  311. 0,//LETTER('n',M_LEFTHOOK,0),
  312. LETTER('n',M_RETROFLEX,0),
  313. LETTER('n',M_SMALLCAP,0),
  314. LETTER('o',M_BAR,0),
  315. LIGATURE('o','e',M_SMALLCAP),
  316. 0,//LETTER(L_OMEGA,M_CLOSED,0),
  317. LETTER(L_PHI,0,0), // U+278
  318. LETTER('r',M_TURNED,0),
  319. 0,//LETTER('r',M_TURNED,M_LEG),
  320. LETTER('r',M_RETROFLEX,M_TURNED),
  321. 0,//LETTER('r',M_LEG,0),
  322. LETTER('r',M_RETROFLEX,0),
  323. 0, // r-tap
  324. LETTER(L_RTAP,M_REVERSED,0),
  325. LETTER('r',M_SMALLCAP,0), // U+280
  326. LETTER('r',M_TURNED,M_SMALLCAP),
  327. LETTER('s',M_RETROFLEX,0),
  328. 0, // esh
  329. 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
  330. LETTER(L_ESH,M_REVERSED,0),
  331. LETTER(L_ESH,M_CURL,0),
  332. LETTER('t',M_TURNED,0),
  333. LETTER('t',M_RETROFLEX,0), // U+288
  334. LETTER('u',M_BAR,0),
  335. LETTER(L_UPSILON,0,0),
  336. LETTER('v',M_HOOK,0),
  337. LETTER('v',M_TURNED,0),
  338. LETTER('w',M_TURNED,0),
  339. LETTER('y',M_TURNED,0),
  340. LETTER('y',M_SMALLCAP,0),
  341. LETTER('z',M_RETROFLEX,0), // U+290
  342. LETTER('z',M_CURL,0),
  343. 0, // ezh
  344. LETTER(L_EZH,M_CURL,0),
  345. 0, // glottal stop
  346. LETTER(L_GLOTTAL,M_REVERSED,0),
  347. LETTER(L_GLOTTAL,M_TURNED,0),
  348. 0,//LETTER('c',M_LONG,0),
  349. 0, // bilabial click // U+298
  350. LETTER('b',M_SMALLCAP,0),
  351. 0,//LETTER(L_OPEN_E,M_CLOSED,0),
  352. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  353. LETTER('h',M_SMALLCAP,0),
  354. LETTER('j',M_CURL,0),
  355. LETTER('k',M_TURNED,0),
  356. LETTER('l',M_SMALLCAP,0),
  357. LETTER('q',M_HOOK,0), // U+2a0
  358. LETTER(L_GLOTTAL,M_STROKE,0),
  359. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  360. LIGATURE('d','z',0),
  361. 0, // dezh
  362. LIGATURE('d','z',M_CURL),
  363. LIGATURE('t','s',0),
  364. 0, // tesh
  365. LIGATURE('t','s',M_CURL),
  366. };
  367. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  368. {//========================================================================
  369. int len;
  370. char single_letter[10];
  371. single_letter[0] = 0;
  372. single_letter[1] = '_';
  373. len = utf8_out(letter, &single_letter[2]);
  374. single_letter[len+2] = ' ';
  375. single_letter[len+3] = 0;
  376. if(Lookup(tr, &single_letter[1], ph_buf) == 0)
  377. {
  378. single_letter[1] = ' ';
  379. if(Lookup(tr, &single_letter[2], ph_buf) == 0)
  380. {
  381. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
  382. }
  383. }
  384. return(ph_buf[0]);
  385. }
  386. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  387. {//=========================================================================
  388. // lookup the character in the accents table
  389. int accent_data = 0;
  390. int accent1 = 0;
  391. int accent2 = 0;
  392. int basic_letter;
  393. int letter2=0;
  394. char ph_letter1[30];
  395. char ph_letter2[30];
  396. char ph_accent1[30];
  397. char ph_accent2[30];
  398. ph_accent2[0] = 0;
  399. if((letter >= 0xe0) && (letter < 0x17f))
  400. {
  401. accent_data = letter_accents_0e0[letter - 0xe0];
  402. }
  403. else
  404. if((letter >= 0x250) && (letter <= 0x2a8))
  405. {
  406. accent_data = letter_accents_250[letter - 0x250];
  407. }
  408. if(accent_data != 0)
  409. {
  410. basic_letter = (accent_data & 0x3f) + 59;
  411. if(basic_letter < 'a')
  412. basic_letter = non_ascii_tab[basic_letter-59];
  413. if(accent_data & 0x8000)
  414. {
  415. letter2 = (accent_data >> 6) & 0x3f;
  416. letter2 += 59;
  417. accent2 = (accent_data >> 12) & 0x7;
  418. }
  419. else
  420. {
  421. accent1 = (accent_data >> 6) & 0x1f;
  422. accent2 = (accent_data >> 11) & 0xf;
  423. }
  424. if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
  425. {
  426. if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
  427. {
  428. if(accent2 != 0)
  429. {
  430. if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
  431. {
  432. // break;
  433. }
  434. if(accents_tab[accent2].flags & 1)
  435. {
  436. strcpy(ph_buf,ph_accent2);
  437. ph_buf += strlen(ph_buf);
  438. ph_accent2[0] = 0;
  439. }
  440. }
  441. if(letter2 != 0)
  442. {
  443. //ligature
  444. LookupLetter2(tr, letter2, ph_letter2);
  445. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  446. }
  447. else
  448. {
  449. if(accent1 == 0)
  450. strcpy(ph_buf, ph_letter1);
  451. else
  452. if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
  453. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  454. else
  455. sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  456. }
  457. }
  458. }
  459. }
  460. } // end of LookupAccentedLetter
  461. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  462. {//==============================================================================================
  463. // control, bit 0: not the first letter of a word
  464. int len;
  465. static char single_letter[10] = {0,0};
  466. unsigned int dict_flags[2];
  467. char ph_buf3[40];
  468. ph_buf1[0] = 0;
  469. len = utf8_out(letter,&single_letter[2]);
  470. single_letter[len+2] = ' ';
  471. if(next_byte == -1)
  472. {
  473. // speaking normal text, not individual characters
  474. if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
  475. return;
  476. single_letter[1] = '_';
  477. if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
  478. return; // the character is specified as _* so ignore it when speaking normal text
  479. // check whether this character is specified for English
  480. if(tr->translator_name == L('e','n'))
  481. return; // we are already using English
  482. SetTranslator2("en");
  483. if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
  484. {
  485. // yes, switch to English and re-translate the word
  486. sprintf(ph_buf1,"%c",phonSWITCH);
  487. }
  488. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  489. return;
  490. }
  491. if((letter <= 32) || iswspace(letter))
  492. {
  493. // lookup space as _&32 etc.
  494. sprintf(&single_letter[1],"_#%d ",letter);
  495. Lookup(tr, &single_letter[1], ph_buf1);
  496. return;
  497. }
  498. if(next_byte != ' ')
  499. next_byte = RULE_SPELLING;
  500. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  501. single_letter[1] = '_';
  502. // if the $accent flag is set for this letter, use the accents table (below)
  503. dict_flags[1] = 0;
  504. if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
  505. {
  506. single_letter[1] = ' ';
  507. if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
  508. {
  509. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  510. }
  511. }
  512. if(ph_buf3[0] == 0)
  513. {
  514. LookupAccentedLetter(tr, letter, ph_buf3);
  515. }
  516. strcpy(ph_buf1, ph_buf3);
  517. if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  518. {
  519. return;
  520. }
  521. dict_flags[0] = 0;
  522. dict_flags[1] = 0;
  523. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  524. } // end of LookupLetter
  525. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  526. {//=========================================================================
  527. // get pronunciation for an isolated letter
  528. // return number of bytes used by the letter
  529. // control bit 0: a non-initial letter in a word
  530. // bit 1: say 'capital'
  531. int n_bytes;
  532. int letter;
  533. int len;
  534. int ix;
  535. int save_option_phonemes;
  536. char *p2;
  537. char *pbuf;
  538. char capital[20];
  539. char ph_buf[80];
  540. char ph_buf2[80];
  541. char hexbuf[6];
  542. ph_buf[0] = 0;
  543. capital[0] = 0;
  544. n_bytes = utf8_in(&letter,word);
  545. if((letter & 0xfff00) == 0x0e000)
  546. {
  547. letter &= 0xff; // uncode private usage area
  548. }
  549. if(control & 2)
  550. {
  551. // include CAPITAL information
  552. if(iswupper(letter))
  553. {
  554. Lookup(tr, "_cap", capital);
  555. }
  556. }
  557. letter = towlower2(letter);
  558. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  559. if(ph_buf[0] == phonSWITCH)
  560. {
  561. strcpy(phonemes,ph_buf);
  562. return(0);
  563. }
  564. if((ph_buf[0] == 0) && (tr->translator_name != L('e','n')))
  565. {
  566. // speak as English, check whether there is a translation for this character
  567. SetTranslator2("en");
  568. save_option_phonemes = option_phonemes;
  569. option_phonemes = 0;
  570. LookupLetter(translator2, letter, word[n_bytes], ph_buf, control & 1);
  571. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  572. option_phonemes = save_option_phonemes;
  573. if(ph_buf[0] != 0)
  574. {
  575. sprintf(phonemes,"%cen",phonSWITCH);
  576. return(0);
  577. }
  578. }
  579. if(ph_buf[0] == 0)
  580. {
  581. // character name not found
  582. if((letter >= 0x2800) && (letter <= 0x28ff))
  583. {
  584. // braille dots symbol
  585. Lookup(tr, "_braille", ph_buf);
  586. if(ph_buf[0] == 0)
  587. {
  588. EncodePhonemes("br'e:l", ph_buf, NULL);
  589. }
  590. if(ph_buf[0] != 0)
  591. {
  592. pbuf = ph_buf + strlen(ph_buf);
  593. for(ix=0; ix<8; ix++)
  594. {
  595. if(letter & (1 << ix))
  596. {
  597. *pbuf++ = phonPAUSE_VSHORT;
  598. LookupLetter(tr, '1'+ix, 0, pbuf, 1);
  599. pbuf += strlen(pbuf);
  600. }
  601. }
  602. }
  603. }
  604. if(ph_buf[0]== 0)
  605. {
  606. if(iswalpha(letter))
  607. Lookup(tr, "_?A", ph_buf);
  608. if((ph_buf[0]==0) && !iswspace(letter))
  609. Lookup(tr, "_??", ph_buf);
  610. if(ph_buf[0] != 0)
  611. {
  612. // speak the hexadecimal number of the character code
  613. sprintf(hexbuf,"%x",letter);
  614. pbuf = ph_buf;
  615. for(p2 = hexbuf; *p2 != 0; p2++)
  616. {
  617. pbuf += strlen(pbuf);
  618. *pbuf++ = phonPAUSE_VSHORT;
  619. LookupLetter(tr, *p2, 0, pbuf, 1);
  620. }
  621. }
  622. }
  623. }
  624. len = strlen(phonemes);
  625. if(tr->langopts.accents & 2)
  626. sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
  627. else
  628. sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  629. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  630. {
  631. strcpy(&phonemes[len],ph_buf2);
  632. }
  633. return(n_bytes);
  634. } // end of TranslateLetter
  635. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  636. {//=============================================================================
  637. // Individual letter names, reduce the stress of some.
  638. int ix;
  639. unsigned int c;
  640. int n_stress=0;
  641. int count;
  642. unsigned char buf[N_WORD_PHONEMES];
  643. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  644. {
  645. if(c == phonSTRESS_P)
  646. {
  647. n_stress++;
  648. }
  649. buf[ix] = c;
  650. }
  651. buf[ix] = 0;
  652. count = 0;
  653. for(ix=0; (c = buf[ix]) != 0; ix++)
  654. {
  655. if((c == phonSTRESS_P) && (n_chars > 1))
  656. {
  657. count++;
  658. if(tr->langopts.spelling_stress == 1)
  659. {
  660. // stress on initial letter when spelling
  661. if(count > 1)
  662. c = phonSTRESS_3;
  663. }
  664. else
  665. {
  666. if(count != n_stress)
  667. {
  668. if(((count % 3) != 0) || (count == n_stress-1))
  669. c = phonSTRESS_3; // reduce to secondary stress
  670. }
  671. }
  672. }
  673. else
  674. if(c == 0xff)
  675. {
  676. if((control < 2) || (ix==0))
  677. continue; // don't insert pauses
  678. if(control == 4)
  679. c = phonPAUSE; // pause after each character
  680. if(((count % 3) == 0) || (control > 2))
  681. c = phonPAUSE_NOLINK; // pause following a primary stress
  682. else
  683. c = phonPAUSE_VSHORT;
  684. // else
  685. // continue; // remove marker
  686. }
  687. *phonemes++ = c;
  688. }
  689. if(control >= 2)
  690. *phonemes++ = phonPAUSE_NOLINK;
  691. *phonemes = 0;
  692. } // end of SetSpellingStress
  693. // Numbers
  694. static char ph_ordinal2[12];
  695. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  696. {//==============================================================================================
  697. int ordinal = 0;
  698. int c2;
  699. int nextflags;
  700. if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
  701. {
  702. if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
  703. {
  704. if(word_end[0] == '.')
  705. utf8_in(&c2, &word_end[2]);
  706. else
  707. utf8_in(&c2, &word_end[0]);
  708. if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
  709. {
  710. // ordinal number is indicated by dot after the number
  711. // but not if the next word starts with an upper-case letter
  712. // (c2 == 0) is for cases such as, "2.,"
  713. ordinal = 2;
  714. if(word_end[0] == '.')
  715. word_end[0] = ' ';
  716. if((roman==0) && (tr->translator_name == L('h','u')))
  717. {
  718. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  719. nextflags = 0;
  720. if(IsAlpha(c2))
  721. {
  722. nextflags = TranslateWord(tr, &word_end[2], 0, NULL);
  723. }
  724. if((tr->prev_dict_flags & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  725. ordinal = 0; // TEST 09.02.10
  726. if(nextflags & FLAG_ALT_TRANS)
  727. ordinal = 0;
  728. if(nextflags & FLAG_ALT3_TRANS)
  729. {
  730. if(word[-2] == '-')
  731. ordinal = 0; // eg. december 2-5. között
  732. if(tr->prev_dict_flags & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  733. ordinal = 0x22;
  734. }
  735. }
  736. }
  737. }
  738. }
  739. return(ordinal);
  740. } // end of CheckDotOrdinal
  741. static int hu_number_e(const char *word, int thousandplex, int value)
  742. {//==================================================================
  743. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  744. if((word[0] == 'a') || (word[0] == 'e'))
  745. {
  746. if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  747. return(0);
  748. if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  749. return(0); // 1000-el
  750. return(1);
  751. }
  752. return(0);
  753. } // end of hu_numnber_e
  754. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  755. {//=========================================================================
  756. int c;
  757. char *p;
  758. const char *p2;
  759. int acc;
  760. int prev;
  761. int value;
  762. int subtract;
  763. int repeat = 0;
  764. int n_digits = 0;
  765. char *word_start;
  766. int num_control = 0;
  767. unsigned int flags[2];
  768. char ph_roman[30];
  769. char number_chars[N_WORD_BYTES];
  770. static const char *roman_numbers = "ixcmvld";
  771. static int roman_values[] = {1,10,100,1000,5,50,500};
  772. acc = 0;
  773. prev = 0;
  774. subtract = 0x7fff;
  775. ph_out[0] = 0;
  776. flags[0] = 0;
  777. flags[1] = 0;
  778. if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || isdigit(word[-2]))
  779. return(0); // not '2xx'
  780. word_start = word;
  781. while((c = *word++) != ' ')
  782. {
  783. if((p2 = strchr(roman_numbers,c)) == NULL)
  784. return(0);
  785. value = roman_values[p2 - roman_numbers];
  786. if(value == prev)
  787. {
  788. repeat++;
  789. if(repeat >= 3)
  790. return(0);
  791. }
  792. else
  793. repeat = 0;
  794. if((prev > 1) && (prev != 10) && (prev != 100))
  795. {
  796. if(value >= prev)
  797. return(0);
  798. }
  799. if((prev != 0) && (prev < value))
  800. {
  801. if(((acc % 10) != 0) || ((prev*10) < value))
  802. return(0);
  803. subtract = prev;
  804. value -= subtract;
  805. }
  806. else
  807. if(value >= subtract)
  808. return(0);
  809. else
  810. acc += prev;
  811. prev = value;
  812. n_digits++;
  813. }
  814. if(isdigit(word[0]))
  815. return(0); // eg. 'xx2'
  816. acc += prev;
  817. if(acc < tr->langopts.min_roman)
  818. return(0);
  819. if(acc > tr->langopts.max_roman)
  820. return(0);
  821. Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
  822. p = &ph_out[0];
  823. if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
  824. {
  825. strcpy(ph_out,ph_roman);
  826. p = &ph_out[strlen(ph_roman)];
  827. }
  828. sprintf(number_chars," %d ",acc);
  829. if(word[0] == '.')
  830. {
  831. // dot has not been removed. This implies that there was no space after it
  832. return(0);
  833. }
  834. if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
  835. wtab[0].flags |= FLAG_ORDINAL;
  836. if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
  837. {
  838. if(tr->translator_name == L('h','u'))
  839. {
  840. if(!(wtab[0].flags & FLAG_ORDINAL))
  841. {
  842. if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
  843. {
  844. // should use the 'e' form of the number
  845. num_control |= 1;
  846. }
  847. else
  848. return(0);
  849. }
  850. }
  851. else
  852. {
  853. wtab[0].flags |= FLAG_ORDINAL;
  854. }
  855. }
  856. tr->prev_dict_flags = 0;
  857. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  858. if(tr->langopts.numbers & NUM_ROMAN_AFTER)
  859. strcat(ph_out,ph_roman);
  860. return(1);
  861. } // end of TranslateRoman
  862. static const char *M_Variant(int value)
  863. {//====================================
  864. // returns M, or perhaps MA or MB for some cases
  865. int teens = 0;
  866. if(((value % 100) > 10) && ((value % 100) < 20))
  867. teens = 1;
  868. switch((translator->langopts.numbers2 >> 6) & 0x7)
  869. {
  870. case 1: // lang=ru use singular for xx1 except for x11
  871. if((teens == 0) && ((value % 10) == 1))
  872. return("1M");
  873. break;
  874. case 2: // lang=cs,sk
  875. if((value >= 2) && (value <= 4))
  876. return("0MA");
  877. break;
  878. case 3: // lang=pl
  879. if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  880. return("0MA");
  881. break;
  882. case 4: // lang=lt
  883. if((teens == 1) || ((value % 10) == 0))
  884. return("0MB");
  885. if((value % 10) == 1)
  886. return("0MA");
  887. break;
  888. case 5: // lang=bs,hr,sr
  889. if(teens == 0)
  890. {
  891. if((value % 10) == 1)
  892. return("1M");
  893. if(((value % 10) >= 2) && ((value % 10) <= 4))
  894. return("0MA");
  895. }
  896. break;
  897. }
  898. return("0M");
  899. }
  900. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  901. {//=======================================================================================================
  902. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  903. int found;
  904. int found_value=0;
  905. char string[12];
  906. char ph_of[12];
  907. char ph_thousands[40];
  908. char ph_buf[40];
  909. ph_of[0] = 0;
  910. // first look for a match with the exact value of thousands
  911. if(value > 0)
  912. {
  913. if(thousands_exact & 1)
  914. {
  915. if(thousands_exact & 2)
  916. {
  917. // ordinal number
  918. sprintf(string,"_%dM%do",value,thousandplex);
  919. found_value = Lookup(tr, string, ph_thousands);
  920. }
  921. if(!found_value & (number_control & 1))
  922. {
  923. // look for the 'e' variant
  924. sprintf(string,"_%dM%de",value,thousandplex);
  925. found_value = Lookup(tr, string, ph_thousands);
  926. }
  927. if(!found_value)
  928. {
  929. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  930. sprintf(string,"_%dM%dx",value,thousandplex);
  931. found_value = Lookup(tr, string, ph_thousands);
  932. }
  933. }
  934. if(found_value == 0)
  935. {
  936. sprintf(string,"_%dM%d",value,thousandplex);
  937. found_value = Lookup(tr, string, ph_thousands);
  938. }
  939. }
  940. if(found_value == 0)
  941. {
  942. if((value % 100) >= 20)
  943. {
  944. Lookup(tr, "_0of", ph_of);
  945. }
  946. found = 0;
  947. if(thousands_exact & 1)
  948. {
  949. if(thousands_exact & 2)
  950. {
  951. // ordinal number
  952. sprintf(string,"_%s%do",M_Variant(value), thousandplex);
  953. found = Lookup(tr, string, ph_thousands);
  954. }
  955. if(!found && (number_control & 1))
  956. {
  957. // look for the 'e' variant
  958. sprintf(string,"_%s%de",M_Variant(value), thousandplex);
  959. found = Lookup(tr, string, ph_thousands);
  960. }
  961. if(!found)
  962. {
  963. // is there a different pronunciation if there are no hundreds,tens,or units ?
  964. sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
  965. found = Lookup(tr, string, ph_thousands);
  966. }
  967. }
  968. if(found == 0)
  969. {
  970. sprintf(string,"_%s%d",M_Variant(value), thousandplex);
  971. if(Lookup(tr, string, ph_thousands) == 0)
  972. {
  973. if(thousandplex > 3)
  974. {
  975. sprintf(string,"_0M%d", thousandplex-1);
  976. if(Lookup(tr, string, ph_buf) == 0)
  977. {
  978. // say "millions" if this name is not available and neither is the next lower
  979. Lookup(tr, "_0M2", ph_thousands);
  980. speak_missing_thousands = 3;
  981. }
  982. }
  983. if(ph_thousands[0] == 0)
  984. {
  985. // repeat "thousand" if higher order names are not available
  986. sprintf(string,"_%dM1",value);
  987. if((found_value = Lookup(tr, string, ph_thousands)) == 0)
  988. Lookup(tr, "_0M1", ph_thousands);
  989. speak_missing_thousands = 2;
  990. }
  991. }
  992. }
  993. }
  994. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  995. if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  996. return(1);
  997. return(found_value);
  998. } // end f LookupThousands
  999. static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
  1000. {//=============================================================================
  1001. // Lookup a 2 digit number
  1002. // control bit 0: ordinal number
  1003. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  1004. // control bit 2: tens and units only, no higher digits
  1005. // control bit 3: use feminine form of '2' (for thousands
  1006. // control bit 4: speak zero tens
  1007. // control bit 5: variant of ordinal number (lang=hu)
  1008. // bit 8 followed by decimal fraction
  1009. int found;
  1010. int ix;
  1011. int units;
  1012. int tens;
  1013. int is_ordinal;
  1014. int used_and=0;
  1015. int found_ordinal = 0;
  1016. int next_phtype;
  1017. int ord_type = 'o';
  1018. char string[12]; // for looking up entries in *_list
  1019. char ph_ordinal[20];
  1020. char ph_tens[50];
  1021. char ph_digits[50];
  1022. char ph_and[12];
  1023. units = value % 10;
  1024. tens = value / 10;
  1025. found = 0;
  1026. ph_ordinal[0] = 0;
  1027. ph_tens[0] = 0;
  1028. ph_digits[0] = 0;
  1029. ph_and[0] = 0;
  1030. if(control & 0x20)
  1031. {
  1032. ord_type = 'q';
  1033. }
  1034. is_ordinal = control & 1;
  1035. if((control & 2) && (n_digit_lookup == 2))
  1036. {
  1037. // pronunciation of the final 2 digits has already been found
  1038. strcpy(ph_out, digit_lookup);
  1039. }
  1040. else
  1041. {
  1042. if(digit_lookup[0] == 0)
  1043. {
  1044. // is there a special pronunciation for this 2-digit number
  1045. if(control & 8)
  1046. {
  1047. // is there a feminine form?
  1048. sprintf(string,"_%df",value);
  1049. found = Lookup(tr, string, ph_digits);
  1050. }
  1051. else
  1052. if(is_ordinal)
  1053. {
  1054. strcpy(ph_ordinal, ph_ordinal2);
  1055. if(control & 4)
  1056. {
  1057. sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1058. found = Lookup(tr, string, ph_digits);
  1059. }
  1060. if(found == 0)
  1061. {
  1062. sprintf(string,"_%d%c",value,ord_type);
  1063. found = Lookup(tr, string, ph_digits);
  1064. }
  1065. found_ordinal = found;
  1066. }
  1067. if(found == 0)
  1068. {
  1069. if(control & 2)
  1070. {
  1071. // the final tens and units of a number
  1072. if(number_control & 1)
  1073. {
  1074. // look for 'e' variant
  1075. sprintf(string,"_%de",value);
  1076. found = Lookup(tr, string, ph_digits);
  1077. }
  1078. }
  1079. else
  1080. {
  1081. // followed by hundreds or thousands etc
  1082. sprintf(string,"_%da",value);
  1083. found = Lookup(tr, string, ph_digits);
  1084. }
  1085. if(!found)
  1086. {
  1087. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
  1088. {
  1089. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  1090. }
  1091. else
  1092. {
  1093. sprintf(string,"_%d",value);
  1094. found = Lookup(tr, string, ph_digits);
  1095. }
  1096. }
  1097. }
  1098. }
  1099. // no, speak as tens+units
  1100. if((control & 0x10) && (value < 10))
  1101. {
  1102. // speak leading zero
  1103. Lookup(tr, "_0", ph_tens);
  1104. }
  1105. else
  1106. {
  1107. if(found)
  1108. {
  1109. ph_tens[0] = 0;
  1110. }
  1111. else
  1112. {
  1113. if((is_ordinal) &&
  1114. ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
  1115. {
  1116. sprintf(string,"_%dX%c", tens, ord_type);
  1117. if(Lookup(tr, string, ph_tens) != 0)
  1118. {
  1119. found_ordinal = 1;
  1120. if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1121. {
  1122. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1123. strcat(ph_tens, ph_ordinal2);
  1124. }
  1125. }
  1126. }
  1127. if(found_ordinal == 0)
  1128. {
  1129. sprintf(string,"_%dX", tens);
  1130. Lookup(tr, string, ph_tens);
  1131. }
  1132. if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
  1133. {
  1134. // tens not found, (for example) 73 is 60+13
  1135. units = (value % 20);
  1136. sprintf(string,"_%dX", tens & 0xfe);
  1137. Lookup(tr, string, ph_tens);
  1138. }
  1139. ph_digits[0] = 0;
  1140. if(units > 0)
  1141. {
  1142. found = 0;
  1143. if((control & 2) && (digit_lookup[0] != 0))
  1144. {
  1145. // we have an entry for this digit (possibly together with the next word)
  1146. strcpy(ph_digits, digit_lookup);
  1147. found_ordinal = 1;
  1148. ph_ordinal[0] = 0;
  1149. }
  1150. else
  1151. {
  1152. if(control & 8)
  1153. {
  1154. // is there a variant form of this number?
  1155. sprintf(string,"_%df",units);
  1156. found = Lookup(tr, string, ph_digits);
  1157. }
  1158. if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
  1159. {
  1160. // ordinal
  1161. sprintf(string,"_%d%c",units,ord_type);
  1162. if((found = Lookup(tr, string, ph_digits)) != 0)
  1163. {
  1164. found_ordinal = 1;
  1165. }
  1166. }
  1167. if(found == 0)
  1168. {
  1169. if((number_control & 1) && (control & 2))
  1170. {
  1171. // look for 'e' variant
  1172. sprintf(string,"_%de",units);
  1173. found = Lookup(tr, string, ph_digits);
  1174. }
  1175. else
  1176. if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
  1177. {
  1178. // followed by hundreds or thousands (or tens)
  1179. sprintf(string,"_%da",units);
  1180. found = Lookup(tr, string, ph_digits);
  1181. }
  1182. }
  1183. if(found == 0)
  1184. {
  1185. sprintf(string,"_%d",units);
  1186. Lookup(tr, string, ph_digits);
  1187. }
  1188. }
  1189. }
  1190. }
  1191. }
  1192. if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
  1193. {
  1194. if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1195. Lookup(tr, "_ord20", ph_ordinal);
  1196. if(ph_ordinal[0] == 0)
  1197. Lookup(tr, "_ord", ph_ordinal);
  1198. }
  1199. if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
  1200. {
  1201. Lookup(tr, "_0and", ph_and);
  1202. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1203. ph_and[0] = 0;
  1204. if(tr->langopts.numbers & NUM_SWAP_TENS)
  1205. sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
  1206. else
  1207. sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
  1208. used_and = 1;
  1209. }
  1210. else
  1211. {
  1212. if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
  1213. {
  1214. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1215. if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
  1216. {
  1217. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1218. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1219. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1220. ph_tens[ix] = 0;
  1221. }
  1222. }
  1223. sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
  1224. }
  1225. }
  1226. if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
  1227. {
  1228. // only one primary stress, on the first part (tens)
  1229. found = 0;
  1230. for(ix=0; ix < (signed)strlen(ph_out); ix++)
  1231. {
  1232. if(ph_out[ix] == phonSTRESS_P)
  1233. {
  1234. if(found)
  1235. ph_out[ix] = phonSTRESS_3;
  1236. else
  1237. found = 1;
  1238. }
  1239. }
  1240. }
  1241. else
  1242. if(tr->langopts.numbers & NUM_SINGLE_STRESS)
  1243. {
  1244. // only one primary stress
  1245. found = 0;
  1246. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  1247. {
  1248. if(ph_out[ix] == phonSTRESS_P)
  1249. {
  1250. if(found)
  1251. ph_out[ix] = phonSTRESS_3;
  1252. else
  1253. found = 1;
  1254. }
  1255. }
  1256. }
  1257. return(used_and);
  1258. } // end of LookupNum2
  1259. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1260. {//=============================================================================================================
  1261. // Translate a 3 digit number
  1262. // control bit 0, previous thousands
  1263. // bit 1, ordinal number
  1264. // bit 5 variant form of ordinal number
  1265. // bit 8 followed by decimal fraction
  1266. int found;
  1267. int hundreds;
  1268. int tensunits;
  1269. int x;
  1270. int ix;
  1271. int exact;
  1272. int ordinal;
  1273. int tplex;
  1274. int say_zero_hundred=0;
  1275. char string[12]; // for looking up entries in **_list
  1276. char buf1[100];
  1277. char buf2[100];
  1278. char ph_100[20];
  1279. char ph_10T[20];
  1280. char ph_digits[50];
  1281. char ph_thousands[50];
  1282. char ph_hundred_and[12];
  1283. char ph_thousand_and[12];
  1284. ordinal = control & 0x22;
  1285. hundreds = value / 100;
  1286. tensunits = value % 100;
  1287. buf1[0] = 0;
  1288. ph_thousands[0] = 0;
  1289. ph_thousand_and[0] = 0;
  1290. if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1291. {
  1292. say_zero_hundred = 1; // lang=vi
  1293. }
  1294. if((hundreds > 0) || say_zero_hundred)
  1295. {
  1296. found = 0;
  1297. if(ordinal && (tensunits == 0))
  1298. {
  1299. // ordinal number, with no tens or units
  1300. found = Lookup(tr, "_0Co", ph_100);
  1301. }
  1302. if(found == 0)
  1303. {
  1304. if(tensunits==0)
  1305. {
  1306. // special form for exact hundreds?
  1307. found = Lookup(tr, "_0C0", ph_100);
  1308. }
  1309. if(!found)
  1310. {
  1311. Lookup(tr, "_0C", ph_100);
  1312. }
  1313. }
  1314. if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
  1315. {
  1316. // speak numbers such as 1984 as years: nineteen-eighty-four
  1317. // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
  1318. }
  1319. else
  1320. if(hundreds >= 10)
  1321. {
  1322. ph_digits[0] = 0;
  1323. exact = 0;
  1324. if ((value % 1000) == 0)
  1325. exact = 1;
  1326. tplex = thousandplex+1;
  1327. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1328. {
  1329. tplex = 0;
  1330. }
  1331. if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
  1332. {
  1333. x = 0;
  1334. if(tr->langopts.numbers2 & (1 << tplex))
  1335. x = 8; // use variant (feminine) for before thousands and millions
  1336. LookupNum2(tr, hundreds/10, x, ph_digits);
  1337. }
  1338. if(tr->langopts.numbers2 & 0x200)
  1339. sprintf(ph_thousands,"%s%s",ph_10T,ph_digits); // say "thousands" before its number, not after
  1340. else
  1341. sprintf(ph_thousands,"%s%s",ph_digits,ph_10T);
  1342. hundreds %= 10;
  1343. if((hundreds == 0) && (say_zero_hundred == 0))
  1344. ph_100[0] = 0;
  1345. suppress_null = 1;
  1346. }
  1347. ph_digits[0] = 0;
  1348. if((hundreds > 0) || say_zero_hundred)
  1349. {
  1350. if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1351. {
  1352. Lookup(tr, "_0and", ph_thousand_and);
  1353. }
  1354. suppress_null = 1;
  1355. found = 0;
  1356. if((ordinal)
  1357. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
  1358. {
  1359. // ordinal number
  1360. sprintf(string, "_%dCo", hundreds);
  1361. found = Lookup(tr, string, ph_digits);
  1362. if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
  1363. {
  1364. // Use ordinal form of hundreds, as well as for tens and units
  1365. // Add ordinal suffix to the hundreds
  1366. strcat(ph_digits, ph_ordinal2);
  1367. }
  1368. }
  1369. if((hundreds == 0) && say_zero_hundred)
  1370. {
  1371. Lookup(tr, "_0", ph_digits);
  1372. }
  1373. else
  1374. {
  1375. if((!found) && (tensunits == 0))
  1376. {
  1377. // is there a special pronunciation for exactly n00 ?
  1378. sprintf(string,"_%dC0",hundreds);
  1379. found = Lookup(tr, string, ph_digits);
  1380. }
  1381. if(!found)
  1382. {
  1383. sprintf(string,"_%dC",hundreds);
  1384. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1385. }
  1386. if(found)
  1387. {
  1388. ph_100[0] = 0;
  1389. }
  1390. else
  1391. {
  1392. if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
  1393. {
  1394. LookupNum2(tr, hundreds, 0, ph_digits);
  1395. }
  1396. }
  1397. }
  1398. }
  1399. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  1400. }
  1401. ph_hundred_and[0] = 0;
  1402. if(tensunits > 0)
  1403. {
  1404. if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1405. {
  1406. // Don't use "and" if we apply ordinal to both hundreds and units
  1407. }
  1408. else
  1409. {
  1410. if((value > 100) || ((control & 1) && (thousandplex==0)))
  1411. {
  1412. if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1413. {
  1414. Lookup(tr, "_0and", ph_hundred_and);
  1415. }
  1416. }
  1417. if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1418. {
  1419. Lookup(tr, "_0and", ph_hundred_and);
  1420. }
  1421. }
  1422. }
  1423. buf2[0] = 0;
  1424. if((tensunits != 0) || (suppress_null == 0))
  1425. {
  1426. x = 0;
  1427. if(thousandplex==0)
  1428. {
  1429. x = 2; // allow "eins" for 1 rather than "ein"
  1430. if(ordinal)
  1431. x = 3; // ordinal number
  1432. if((value < 100) && !(control & 1))
  1433. x |= 4; // tens and units only, no higher digits
  1434. if(ordinal & 0x20)
  1435. x |= 0x20; // variant form of ordinal number
  1436. }
  1437. else
  1438. {
  1439. if(tr->langopts.numbers2 & (1 << thousandplex))
  1440. x = 8; // use variant (feminine) for before thousands and millions
  1441. }
  1442. if(LookupNum2(tr, tensunits, x | control & 0x100, buf2) != 0)
  1443. {
  1444. if(tr->langopts.numbers & NUM_SINGLE_AND)
  1445. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1446. }
  1447. }
  1448. else
  1449. {
  1450. if(ph_ordinal2[0] != 0)
  1451. {
  1452. ix = strlen(buf1);
  1453. if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1454. buf1[ix-1] = 0; // remove pause before addding ordinal suffix
  1455. strcpy(buf2, ph_ordinal2);
  1456. }
  1457. }
  1458. sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);
  1459. return(0);
  1460. } // end of LookupNum3
  1461. bool CheckThousandsGroup(char *word, int group_len)
  1462. {//================================================
  1463. // Is this a group of 3 digits which looks like a thousands group?
  1464. int ix;
  1465. if(isdigit(word[group_len]) || isdigit(-1))
  1466. return(false);
  1467. for(ix=0; ix < group_len; ix++)
  1468. {
  1469. if(!isdigit(word[ix]))
  1470. return(false);
  1471. }
  1472. return(true);
  1473. }
  1474. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1475. {//=====================================================================================================================
  1476. // Number translation with various options
  1477. // the "word" may be up to 4 digits
  1478. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1479. int n_digits;
  1480. int value;
  1481. int ix;
  1482. int digix;
  1483. unsigned char c;
  1484. int suppress_null = 0;
  1485. int decimal_point = 0;
  1486. int thousandplex = 0;
  1487. int thousands_exact = 1;
  1488. int thousands_inc = 0;
  1489. int prev_thousands = 0;
  1490. int ordinal = 0;
  1491. int dot_ordinal;
  1492. int this_value;
  1493. int decimal_count;
  1494. int max_decimal_count;
  1495. int decimal_mode;
  1496. int suffix_ix;
  1497. int skipwords = 0;
  1498. int group_len;
  1499. char *p;
  1500. char string[32]; // for looking up entries in **_list
  1501. char buf1[100];
  1502. char ph_append[50];
  1503. char ph_buf[200];
  1504. char ph_buf2[50];
  1505. char ph_zeros[50];
  1506. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1507. char buf_digit_lookup[50];
  1508. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1509. *flags = 0;
  1510. n_digit_lookup = 0;
  1511. buf_digit_lookup[0] = 0;
  1512. digit_lookup = buf_digit_lookup;
  1513. number_control = control;
  1514. for(ix=0; isdigit(word[ix]); ix++) ;
  1515. n_digits = ix;
  1516. value = this_value = atoi(word);
  1517. group_len = 3;
  1518. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1519. group_len = 4;
  1520. // is there a previous thousands part (as a previous "word") ?
  1521. if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && isdigit(word[-3]))
  1522. {
  1523. prev_thousands = 1;
  1524. }
  1525. else
  1526. if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
  1527. {
  1528. // thousands groups can be separated by spaces
  1529. if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && isdigit(word[-2]))
  1530. {
  1531. prev_thousands = 1;
  1532. }
  1533. }
  1534. if(prev_thousands == 0)
  1535. {
  1536. speak_missing_thousands = 0;
  1537. }
  1538. ph_ordinal2[0] = 0;
  1539. ph_zeros[0] = 0;
  1540. if(prev_thousands || (word[0] != '0'))
  1541. {
  1542. // don't check for ordinal if the number has a leading zero
  1543. if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
  1544. dot_ordinal = 1;
  1545. }
  1546. if((word[ix] == '.') && !isdigit(word[ix+1]) && !isdigit(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
  1547. {
  1548. // remove dot unless followed by another number
  1549. word[ix] = 0;
  1550. }
  1551. if((ordinal == 0) || (tr->translator_name == L('h','u')))
  1552. {
  1553. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1554. // look for an ordinal number suffix after the number
  1555. ix++;
  1556. p = suffix;
  1557. if(wtab[0].flags & FLAG_HYPHEN_AFTER)
  1558. {
  1559. *p++ = '-';
  1560. ix++;
  1561. }
  1562. while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1563. {
  1564. *p++ = word[ix++];
  1565. }
  1566. *p = 0;
  1567. if(suffix[0] != 0)
  1568. {
  1569. if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1570. {
  1571. ordinal = 2;
  1572. }
  1573. else
  1574. if(!isdigit(suffix[0])) // not _#9 (tab)
  1575. {
  1576. sprintf(string,"_#%s",suffix);
  1577. if(Lookup(tr, string, ph_ordinal2))
  1578. {
  1579. // this is an ordinal suffix
  1580. ordinal = 2;
  1581. flags[0] |= FLAG_SKIPWORDS;
  1582. skipwords = 1;
  1583. }
  1584. }
  1585. }
  1586. }
  1587. if(wtab[0].flags & FLAG_ORDINAL)
  1588. ordinal = 2;
  1589. ph_append[0] = 0;
  1590. ph_buf2[0] = 0;
  1591. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
  1592. {
  1593. if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
  1594. {
  1595. // looks like a time 02:30, omit the leading zero
  1596. }
  1597. else
  1598. {
  1599. if(n_digits > 3)
  1600. {
  1601. flags[0] &= ~FLAG_SKIPWORDS;
  1602. return(0); // long number string with leading zero, speak as individual digits
  1603. }
  1604. // speak leading zeros
  1605. for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1606. {
  1607. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1608. }
  1609. }
  1610. }
  1611. if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1612. thousands_inc = 1;
  1613. else
  1614. if(word[n_digits] == tr->langopts.thousands_sep)
  1615. thousands_inc = 2;
  1616. suffix_ix = n_digits+2;
  1617. if(thousands_inc > 0)
  1618. {
  1619. // if the following "words" are three-digit groups, count them and add
  1620. // a "thousand"/"million" suffix to this one
  1621. digix = n_digits + thousands_inc;
  1622. while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
  1623. {
  1624. for(ix=0; ix<group_len; ix++)
  1625. {
  1626. if(word[digix+ix] != '0')
  1627. {
  1628. thousands_exact = 0;
  1629. break;
  1630. }
  1631. }
  1632. thousandplex++;
  1633. digix += group_len;
  1634. if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
  1635. {
  1636. suffix_ix = digix+2;
  1637. digix += thousands_inc;
  1638. }
  1639. else
  1640. break;
  1641. }
  1642. }
  1643. if((value == 0) && prev_thousands)
  1644. {
  1645. suppress_null = 1;
  1646. }
  1647. if(tr->translator_name == L('h','u'))
  1648. {
  1649. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1650. if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1651. {
  1652. number_control |= 1; // use _1e variant of number
  1653. }
  1654. }
  1655. if((word[n_digits] == tr->langopts.decimal_sep) && isdigit(word[n_digits+1]))
  1656. {
  1657. // this "word" ends with a decimal point
  1658. Lookup(tr, "_dpt", ph_append);
  1659. decimal_point = 0x100;
  1660. }
  1661. else
  1662. if(suppress_null == 0)
  1663. {
  1664. if(thousands_inc > 0)
  1665. {
  1666. if(thousandplex > 0)
  1667. // if((thousandplex > 0) && (value < 1000))
  1668. {
  1669. if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
  1670. {
  1671. // found an exact match for N thousand
  1672. value = 0;
  1673. suppress_null = 1;
  1674. }
  1675. }
  1676. }
  1677. }
  1678. else
  1679. if(speak_missing_thousands == 1)
  1680. {
  1681. // speak this thousandplex if there was no word for the previous thousandplex
  1682. sprintf(string,"_0M%d",thousandplex+1);
  1683. if(Lookup(tr, string, buf1)==0)
  1684. {
  1685. sprintf(string,"_0M%d",thousandplex);
  1686. Lookup(tr, string, ph_append);
  1687. }
  1688. }
  1689. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1690. {
  1691. Lookup(tr, "_.", ph_append);
  1692. }
  1693. if(thousandplex == 0)
  1694. {
  1695. char *p2;
  1696. // look for combinations of the number with the next word
  1697. p = word;
  1698. while(isdigit(p[1])) p++; // just use the last digit
  1699. if(isdigit(p[-1]))
  1700. {
  1701. p2 = p - 1;
  1702. if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  1703. {
  1704. n_digit_lookup = 2;
  1705. }
  1706. }
  1707. // if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
  1708. if((buf_digit_lookup[0] == 0) && (*p != '0'))
  1709. {
  1710. // LANG=hu ?
  1711. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  1712. if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  1713. {
  1714. n_digit_lookup = 1;
  1715. }
  1716. }
  1717. if(prev_thousands == 0)
  1718. {
  1719. if((decimal_point == 0) && (ordinal == 0))
  1720. {
  1721. // Look for special pronunciation for this number in isolation (LANG=kl)
  1722. sprintf(string, "_%dn", value);
  1723. if(Lookup(tr, string, ph_out))
  1724. {
  1725. return(1);
  1726. }
  1727. }
  1728. if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
  1729. {
  1730. // LANG=si, say "percent" before the number
  1731. p2 = word;
  1732. while((*p2 != ' ') && (*p2 != 0))
  1733. {
  1734. p2++;
  1735. }
  1736. if(p2[1] == '%')
  1737. {
  1738. Lookup(tr, "%", ph_out);
  1739. ph_out += strlen(ph_out);
  1740. p2[1] = ' ';
  1741. }
  1742. }
  1743. }
  1744. }
  1745. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  1746. if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  1747. sprintf(ph_out,"%s%s%s%s",ph_zeros,ph_append,ph_buf2,ph_buf); // say "thousands" before its number
  1748. else
  1749. sprintf(ph_out,"%s%s%s%s",ph_zeros,ph_buf2,ph_buf,ph_append);
  1750. while(decimal_point)
  1751. {
  1752. n_digits++;
  1753. decimal_count = 0;
  1754. while(isdigit(word[n_digits+decimal_count]))
  1755. decimal_count++;
  1756. // if(decimal_count > 1)
  1757. {
  1758. max_decimal_count = 2;
  1759. switch(decimal_mode = (tr->langopts.numbers & 0xe000))
  1760. {
  1761. case NUM_DFRACTION_4:
  1762. max_decimal_count = 5;
  1763. case NUM_DFRACTION_2:
  1764. // French/Polish decimal fraction
  1765. while(word[n_digits] == '0')
  1766. {
  1767. Lookup(tr, "_0", buf1);
  1768. strcat(ph_out,buf1);
  1769. decimal_count--;
  1770. n_digits++;
  1771. }
  1772. if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
  1773. {
  1774. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  1775. strcat(ph_out,buf1);
  1776. n_digits += decimal_count;
  1777. }
  1778. break;
  1779. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  1780. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  1781. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  1782. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
  1783. if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
  1784. {
  1785. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  1786. sprintf(string,"_0Z%d",decimal_count);
  1787. if(Lookup(tr, string, buf1) == 0)
  1788. break; // revert to speaking single digits
  1789. if(decimal_mode == NUM_DFRACTION_6)
  1790. strcat(ph_out, buf1);
  1791. else
  1792. strcat(ph_buf, buf1);
  1793. }
  1794. strcat(ph_out,ph_buf);
  1795. n_digits += decimal_count;
  1796. break;
  1797. case NUM_DFRACTION_3:
  1798. // Romanian decimal fractions
  1799. if((decimal_count <= 4) && (word[n_digits] != '0'))
  1800. {
  1801. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  1802. strcat(ph_out,buf1);
  1803. n_digits += decimal_count;
  1804. }
  1805. break;
  1806. case NUM_DFRACTION_7:
  1807. // alternative form of decimal fraction digits, except the final digit
  1808. while(decimal_count-- > 1)
  1809. {
  1810. sprintf(string,"_%cd", word[n_digits]);
  1811. if(Lookup(tr, string, buf1) == 0)
  1812. break;
  1813. n_digits++;
  1814. strcat(ph_out, buf1);
  1815. }
  1816. }
  1817. }
  1818. while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  1819. {
  1820. // speak any remaining decimal fraction digits individually
  1821. value = word[n_digits++] - '0';
  1822. LookupNum2(tr, value, 2, buf1);
  1823. strcat(ph_out,buf1);
  1824. }
  1825. // something after the decimal part ?
  1826. if(Lookup(tr, "_dpt2", buf1))
  1827. strcat(ph_out,buf1);
  1828. if((c == tr->langopts.decimal_sep) && isdigit(word[n_digits+1]))
  1829. {
  1830. Lookup(tr, "_dpt", buf1);
  1831. strcat(ph_out,buf1);
  1832. }
  1833. else
  1834. {
  1835. decimal_point = 0;
  1836. }
  1837. }
  1838. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  1839. {
  1840. int next_char;
  1841. char *p;
  1842. p = &word[n_digits+1];
  1843. p += utf8_in(&next_char,p);
  1844. if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  1845. utf8_in(&next_char,p);
  1846. if(!iswalpha(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
  1847. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  1848. }
  1849. *flags |= FLAG_FOUND;
  1850. speak_missing_thousands--;
  1851. if(skipwords)
  1852. dictionary_skipwords = skipwords;
  1853. return(1);
  1854. } // end of TranslateNumber_1
  1855. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1856. {//=============================================================================================================
  1857. if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  1858. return(0); // speak digits individually
  1859. if(tr->langopts.numbers != 0)
  1860. {
  1861. return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
  1862. }
  1863. return(0);
  1864. } // end of TranslateNumber