eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.cpp 59KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2014 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <wctype.h>
  25. #include <wchar.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "translate.h"
  32. #define M_NAME 0
  33. #define M_SMALLCAP 1
  34. #define M_TURNED 2
  35. #define M_REVERSED 3
  36. #define M_CURL 4
  37. #define M_ACUTE 5
  38. #define M_BREVE 6
  39. #define M_CARON 7
  40. #define M_CEDILLA 8
  41. #define M_CIRCUMFLEX 9
  42. #define M_DIAERESIS 10
  43. #define M_DOUBLE_ACUTE 11
  44. #define M_DOT_ABOVE 12
  45. #define M_GRAVE 13
  46. #define M_MACRON 14
  47. #define M_OGONEK 15
  48. #define M_RING 16
  49. #define M_STROKE 17
  50. #define M_TILDE 18
  51. #define M_BAR 19
  52. #define M_RETROFLEX 20
  53. #define M_HOOK 21
  54. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  55. #define M_IMPLOSIVE M_HOOK
  56. static int n_digit_lookup;
  57. static char *digit_lookup;
  58. static int speak_missing_thousands;
  59. static int number_control;
  60. typedef struct {
  61. const char *name;
  62. int accent_flags; // bit 0, say before the letter name
  63. } ACCENTS;
  64. // these are tokens to look up in the *_list file.
  65. static ACCENTS accents_tab[] = {
  66. {"_lig", 1},
  67. {"_smc", 0}, // smallcap
  68. {"_tur", 0}, // turned
  69. {"_rev", 0}, // reversed
  70. {"_crl", 0}, // curl
  71. {"_acu", 0}, // acute
  72. {"_brv", 0}, // breve
  73. {"_hac", 0}, // caron/hacek
  74. {"_ced", 0}, // cedilla
  75. {"_cir", 0}, // circumflex
  76. {"_dia", 0}, // diaeresis
  77. {"_ac2", 0}, // double acute
  78. {"_dot", 0}, // dot
  79. {"_grv", 0}, // grave
  80. {"_mcn", 0}, // macron
  81. {"_ogo", 0}, // ogonek
  82. {"_rng", 0}, // ring
  83. {"_stk", 0}, // stroke
  84. {"_tld", 0}, // tilde
  85. {"_bar", 0}, // bar
  86. {"_rfx", 0}, // retroflex
  87. {"_hok", 0}, // hook
  88. };
  89. #define CAPITAL 0
  90. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  91. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
  92. #define L_ALPHA 60 // U+3B1
  93. #define L_SCHWA 61 // U+259
  94. #define L_OPEN_E 62 // U+25B
  95. #define L_GAMMA 63 // U+3B3
  96. #define L_IOTA 64 // U+3B9
  97. #define L_OE 65 // U+153
  98. #define L_OMEGA 66 // U+3C9
  99. #define L_PHI 67 // U+3C6
  100. #define L_ESH 68 // U+283
  101. #define L_UPSILON 69 // U+3C5
  102. #define L_EZH 70 // U+292
  103. #define L_GLOTTAL 71 // U+294
  104. #define L_RTAP 72 // U+27E
  105. static const short non_ascii_tab[] = {
  106. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  107. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e
  108. };
  109. // characters U+00e0 to U+017f
  110. static const unsigned short letter_accents_0e0[] = {
  111. LETTER('a',M_GRAVE,0), // U+00e0
  112. LETTER('a',M_ACUTE,0),
  113. LETTER('a',M_CIRCUMFLEX,0),
  114. LETTER('a',M_TILDE,0),
  115. LETTER('a',M_DIAERESIS,0),
  116. LETTER('a',M_RING,0),
  117. LIGATURE('a','e',0),
  118. LETTER('c',M_CEDILLA,0),
  119. LETTER('e',M_GRAVE,0),
  120. LETTER('e',M_ACUTE,0),
  121. LETTER('e',M_CIRCUMFLEX,0),
  122. LETTER('e',M_DIAERESIS,0),
  123. LETTER('i',M_GRAVE,0),
  124. LETTER('i',M_ACUTE,0),
  125. LETTER('i',M_CIRCUMFLEX,0),
  126. LETTER('i',M_DIAERESIS,0),
  127. LETTER('d',M_NAME,0), // eth // U+00f0
  128. LETTER('n',M_TILDE,0),
  129. LETTER('o',M_GRAVE,0),
  130. LETTER('o',M_ACUTE,0),
  131. LETTER('o',M_CIRCUMFLEX,0),
  132. LETTER('o',M_TILDE,0),
  133. LETTER('o',M_DIAERESIS,0),
  134. 0, // division sign
  135. LETTER('o',M_STROKE,0),
  136. LETTER('u',M_GRAVE,0),
  137. LETTER('u',M_ACUTE,0),
  138. LETTER('u',M_CIRCUMFLEX,0),
  139. LETTER('u',M_DIAERESIS,0),
  140. LETTER('y',M_ACUTE,0),
  141. LETTER('t',M_NAME,0), // thorn
  142. LETTER('y',M_DIAERESIS,0),
  143. CAPITAL, // U+0100
  144. LETTER('a',M_MACRON,0),
  145. CAPITAL,
  146. LETTER('a',M_BREVE,0),
  147. CAPITAL,
  148. LETTER('a',M_OGONEK,0),
  149. CAPITAL,
  150. LETTER('c',M_ACUTE,0),
  151. CAPITAL,
  152. LETTER('c',M_CIRCUMFLEX,0),
  153. CAPITAL,
  154. LETTER('c',M_DOT_ABOVE,0),
  155. CAPITAL,
  156. LETTER('c',M_CARON,0),
  157. CAPITAL,
  158. LETTER('d',M_CARON,0),
  159. CAPITAL, // U+0110
  160. LETTER('d',M_STROKE,0),
  161. CAPITAL,
  162. LETTER('e',M_MACRON,0),
  163. CAPITAL,
  164. LETTER('e',M_BREVE,0),
  165. CAPITAL,
  166. LETTER('e',M_DOT_ABOVE,0),
  167. CAPITAL,
  168. LETTER('e',M_OGONEK,0),
  169. CAPITAL,
  170. LETTER('e',M_CARON,0),
  171. CAPITAL,
  172. LETTER('g',M_CIRCUMFLEX,0),
  173. CAPITAL,
  174. LETTER('g',M_BREVE,0),
  175. CAPITAL, // U+0120
  176. LETTER('g',M_DOT_ABOVE,0),
  177. CAPITAL,
  178. LETTER('g',M_CEDILLA,0),
  179. CAPITAL,
  180. LETTER('h',M_CIRCUMFLEX,0),
  181. CAPITAL,
  182. LETTER('h',M_STROKE,0),
  183. CAPITAL,
  184. LETTER('i',M_TILDE,0),
  185. CAPITAL,
  186. LETTER('i',M_MACRON,0),
  187. CAPITAL,
  188. LETTER('i',M_BREVE,0),
  189. CAPITAL,
  190. LETTER('i',M_OGONEK,0),
  191. CAPITAL, // U+0130
  192. LETTER('i',M_NAME,0), // dotless i
  193. CAPITAL,
  194. LIGATURE('i','j',0),
  195. CAPITAL,
  196. LETTER('j',M_CIRCUMFLEX,0),
  197. CAPITAL,
  198. LETTER('k',M_CEDILLA,0),
  199. LETTER('k',M_NAME,0), // kra
  200. CAPITAL,
  201. LETTER('l',M_ACUTE,0),
  202. CAPITAL,
  203. LETTER('l',M_CEDILLA,0),
  204. CAPITAL,
  205. LETTER('l',M_CARON,0),
  206. CAPITAL,
  207. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  208. CAPITAL,
  209. LETTER('l',M_STROKE,0),
  210. CAPITAL,
  211. LETTER('n',M_ACUTE,0),
  212. CAPITAL,
  213. LETTER('n',M_CEDILLA,0),
  214. CAPITAL,
  215. LETTER('n',M_CARON,0),
  216. LETTER('n',M_NAME,0), // apostrophe n
  217. CAPITAL,
  218. LETTER('n',M_NAME,0), // eng
  219. CAPITAL,
  220. LETTER('o',M_MACRON,0),
  221. CAPITAL,
  222. LETTER('o',M_BREVE,0),
  223. CAPITAL, // U+0150
  224. LETTER('o',M_DOUBLE_ACUTE,0),
  225. CAPITAL,
  226. LIGATURE('o','e',0),
  227. CAPITAL,
  228. LETTER('r',M_ACUTE,0),
  229. CAPITAL,
  230. LETTER('r',M_CEDILLA,0),
  231. CAPITAL,
  232. LETTER('r',M_CARON,0),
  233. CAPITAL,
  234. LETTER('s',M_ACUTE,0),
  235. CAPITAL,
  236. LETTER('s',M_CIRCUMFLEX,0),
  237. CAPITAL,
  238. LETTER('s',M_CEDILLA,0),
  239. CAPITAL, // U+0160
  240. LETTER('s',M_CARON,0),
  241. CAPITAL,
  242. LETTER('t',M_CEDILLA,0),
  243. CAPITAL,
  244. LETTER('t',M_CARON,0),
  245. CAPITAL,
  246. LETTER('t',M_STROKE,0),
  247. CAPITAL,
  248. LETTER('u',M_TILDE,0),
  249. CAPITAL,
  250. LETTER('u',M_MACRON,0),
  251. CAPITAL,
  252. LETTER('u',M_BREVE,0),
  253. CAPITAL,
  254. LETTER('u',M_RING,0),
  255. CAPITAL, // U+0170
  256. LETTER('u',M_DOUBLE_ACUTE,0),
  257. CAPITAL,
  258. LETTER('u',M_OGONEK,0),
  259. CAPITAL,
  260. LETTER('w',M_CIRCUMFLEX,0),
  261. CAPITAL,
  262. LETTER('y',M_CIRCUMFLEX,0),
  263. CAPITAL, // Y-DIAERESIS
  264. CAPITAL,
  265. LETTER('z',M_ACUTE,0),
  266. CAPITAL,
  267. LETTER('z',M_DOT_ABOVE,0),
  268. CAPITAL,
  269. LETTER('z',M_CARON,0),
  270. LETTER('s',M_NAME,0), // long-s // U+17f
  271. };
  272. // characters U+0250 to U+029F
  273. static const unsigned short letter_accents_250[] = {
  274. LETTER('a',M_TURNED,0), // U+250
  275. LETTER(L_ALPHA,0,0),
  276. LETTER(L_ALPHA,M_TURNED,0),
  277. LETTER('b',M_IMPLOSIVE,0),
  278. 0, // open-o
  279. LETTER('c',M_CURL,0),
  280. LETTER('d',M_RETROFLEX,0),
  281. LETTER('d',M_IMPLOSIVE,0),
  282. LETTER('e',M_REVERSED,0), // U+258
  283. 0, // schwa
  284. LETTER(L_SCHWA,M_HOOK,0),
  285. 0, // open-e
  286. LETTER(L_OPEN_E,M_REVERSED,0),
  287. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  288. 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
  289. LETTER('j',M_BAR,0),
  290. LETTER('g',M_IMPLOSIVE,0), // U+260
  291. LETTER('g',0,0),
  292. LETTER('g',M_SMALLCAP,0),
  293. LETTER(L_GAMMA,0,0),
  294. 0, // ramshorn
  295. LETTER('h',M_TURNED,0),
  296. LETTER('h',M_HOOK,0),
  297. 0,//LETTER(L_HENG,M_HOOK,0),
  298. LETTER('i',M_BAR,0), // U+268
  299. LETTER(L_IOTA,0,0),
  300. LETTER('i',M_SMALLCAP,0),
  301. LETTER('l',M_TILDE,0),
  302. LETTER('l',M_BAR,0),
  303. LETTER('l',M_RETROFLEX,0),
  304. LIGATURE('l','z',0),
  305. LETTER('m',M_TURNED,0),
  306. 0,//LETTER('m',M_TURNED,M_LEG), // U+270
  307. LETTER('m',M_HOOK,0),
  308. 0,//LETTER('n',M_LEFTHOOK,0),
  309. LETTER('n',M_RETROFLEX,0),
  310. LETTER('n',M_SMALLCAP,0),
  311. LETTER('o',M_BAR,0),
  312. LIGATURE('o','e',M_SMALLCAP),
  313. 0,//LETTER(L_OMEGA,M_CLOSED,0),
  314. LETTER(L_PHI,0,0), // U+278
  315. LETTER('r',M_TURNED,0),
  316. 0,//LETTER('r',M_TURNED,M_LEG),
  317. LETTER('r',M_RETROFLEX,M_TURNED),
  318. 0,//LETTER('r',M_LEG,0),
  319. LETTER('r',M_RETROFLEX,0),
  320. 0, // r-tap
  321. LETTER(L_RTAP,M_REVERSED,0),
  322. LETTER('r',M_SMALLCAP,0), // U+280
  323. LETTER('r',M_TURNED,M_SMALLCAP),
  324. LETTER('s',M_RETROFLEX,0),
  325. 0, // esh
  326. 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
  327. LETTER(L_ESH,M_REVERSED,0),
  328. LETTER(L_ESH,M_CURL,0),
  329. LETTER('t',M_TURNED,0),
  330. LETTER('t',M_RETROFLEX,0), // U+288
  331. LETTER('u',M_BAR,0),
  332. LETTER(L_UPSILON,0,0),
  333. LETTER('v',M_HOOK,0),
  334. LETTER('v',M_TURNED,0),
  335. LETTER('w',M_TURNED,0),
  336. LETTER('y',M_TURNED,0),
  337. LETTER('y',M_SMALLCAP,0),
  338. LETTER('z',M_RETROFLEX,0), // U+290
  339. LETTER('z',M_CURL,0),
  340. 0, // ezh
  341. LETTER(L_EZH,M_CURL,0),
  342. 0, // glottal stop
  343. LETTER(L_GLOTTAL,M_REVERSED,0),
  344. LETTER(L_GLOTTAL,M_TURNED,0),
  345. 0,//LETTER('c',M_LONG,0),
  346. 0, // bilabial click // U+298
  347. LETTER('b',M_SMALLCAP,0),
  348. 0,//LETTER(L_OPEN_E,M_CLOSED,0),
  349. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  350. LETTER('h',M_SMALLCAP,0),
  351. LETTER('j',M_CURL,0),
  352. LETTER('k',M_TURNED,0),
  353. LETTER('l',M_SMALLCAP,0),
  354. LETTER('q',M_HOOK,0), // U+2a0
  355. LETTER(L_GLOTTAL,M_STROKE,0),
  356. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  357. LIGATURE('d','z',0),
  358. 0, // dezh
  359. LIGATURE('d','z',M_CURL),
  360. LIGATURE('t','s',0),
  361. 0, // tesh
  362. LIGATURE('t','s',M_CURL),
  363. };
  364. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  365. { //========================================================================
  366. int len;
  367. char single_letter[10];
  368. single_letter[0] = 0;
  369. single_letter[1] = '_';
  370. len = utf8_out(letter, &single_letter[2]);
  371. single_letter[len+2] = ' ';
  372. single_letter[len+3] = 0;
  373. if(Lookup(tr, &single_letter[1], ph_buf) == 0)
  374. {
  375. single_letter[1] = ' ';
  376. if(Lookup(tr, &single_letter[2], ph_buf) == 0)
  377. {
  378. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
  379. }
  380. }
  381. return(ph_buf[0]);
  382. }
  383. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  384. {//=========================================================================
  385. // lookup the character in the accents table
  386. int accent_data = 0;
  387. int accent1 = 0;
  388. int accent2 = 0;
  389. int flags1, flags2;
  390. int basic_letter;
  391. int letter2=0;
  392. char ph_letter1[30];
  393. char ph_letter2[30];
  394. char ph_accent1[30];
  395. char ph_accent2[30];
  396. ph_accent2[0] = 0;
  397. if((letter >= 0xe0) && (letter < 0x17f))
  398. {
  399. accent_data = letter_accents_0e0[letter - 0xe0];
  400. }
  401. else if((letter >= 0x250) && (letter <= 0x2a8))
  402. {
  403. accent_data = letter_accents_250[letter - 0x250];
  404. }
  405. if(accent_data != 0)
  406. {
  407. basic_letter = (accent_data & 0x3f) + 59;
  408. if(basic_letter < 'a')
  409. basic_letter = non_ascii_tab[basic_letter-59];
  410. if(accent_data & 0x8000)
  411. {
  412. letter2 = (accent_data >> 6) & 0x3f;
  413. letter2 += 59;
  414. accent2 = (accent_data >> 12) & 0x7;
  415. }
  416. else
  417. {
  418. accent1 = (accent_data >> 6) & 0x1f;
  419. accent2 = (accent_data >> 11) & 0xf;
  420. }
  421. if((flags1 = Lookup(tr, accents_tab[accent1].name, ph_accent1)) != 0)
  422. {
  423. if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
  424. {
  425. if(accent2 != 0)
  426. {
  427. if((flags2 = Lookup(tr, accents_tab[accent2].name, ph_accent2)) == 0)
  428. {
  429. // break;
  430. }
  431. if(flags2 & FLAG_ACCENT_BEFORE)
  432. {
  433. strcpy(ph_buf,ph_accent2);
  434. ph_buf += strlen(ph_buf);
  435. ph_accent2[0] = 0;
  436. }
  437. }
  438. if(letter2 != 0)
  439. {
  440. //ligature
  441. LookupLetter2(tr, letter2, ph_letter2);
  442. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  443. }
  444. else
  445. {
  446. if(accent1 == 0)
  447. strcpy(ph_buf, ph_letter1);
  448. else if((tr->langopts.accents & 1) || (flags1 & FLAG_ACCENT_BEFORE) || (accents_tab[accent1].accent_flags & 1))
  449. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  450. else
  451. sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  452. }
  453. }
  454. }
  455. }
  456. } // end of LookupAccentedLetter
  457. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  458. {//==============================================================================================
  459. // control, bit 0: not the first letter of a word
  460. int len;
  461. static char single_letter[10] = {0,0};
  462. unsigned int dict_flags[2];
  463. char ph_buf3[40];
  464. ph_buf1[0] = 0;
  465. len = utf8_out(letter,&single_letter[2]);
  466. single_letter[len+2] = ' ';
  467. if(next_byte == -1)
  468. {
  469. // speaking normal text, not individual characters
  470. if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
  471. return;
  472. single_letter[1] = '_';
  473. if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
  474. return; // the character is specified as _* so ignore it when speaking normal text
  475. // check whether this character is specified for English
  476. if(tr->translator_name == L('e','n'))
  477. return; // we are already using English
  478. SetTranslator2("en");
  479. if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
  480. {
  481. // yes, switch to English and re-translate the word
  482. sprintf(ph_buf1,"%c",phonSWITCH);
  483. }
  484. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  485. return;
  486. }
  487. if((letter <= 32) || iswspace(letter))
  488. {
  489. // lookup space as _&32 etc.
  490. sprintf(&single_letter[1],"_#%d ",letter);
  491. Lookup(tr, &single_letter[1], ph_buf1);
  492. return;
  493. }
  494. if(next_byte != ' ')
  495. next_byte = RULE_SPELLING;
  496. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  497. single_letter[1] = '_';
  498. // if the $accent flag is set for this letter, use the accents table (below)
  499. dict_flags[1] = 0;
  500. if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
  501. {
  502. single_letter[1] = ' ';
  503. if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
  504. {
  505. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  506. }
  507. }
  508. if(ph_buf3[0] == 0)
  509. {
  510. LookupAccentedLetter(tr, letter, ph_buf3);
  511. }
  512. strcpy(ph_buf1, ph_buf3);
  513. if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  514. {
  515. return;
  516. }
  517. dict_flags[0] = 0;
  518. dict_flags[1] = 0;
  519. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  520. } // end of LookupLetter
  521. // unicode ranges for non-ascii digits 0-9
  522. static const int number_ranges[] = {
  523. 0x660, 0x6f0, // arabic
  524. 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
  525. 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
  526. 0 }; // these must be in ascending order
  527. int NonAsciiNumber(int letter)
  528. {//============================
  529. // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
  530. const int *p;
  531. int base;
  532. for(p=number_ranges; (base = *p) != 0; p++)
  533. {
  534. if(letter < base)
  535. break; // not found
  536. if(letter < (base+10))
  537. return(letter-base+'0');
  538. }
  539. return(-1);
  540. }
  541. #define L_SUB 0x4000 // subscript
  542. #define L_SUP 0x8000 // superscript
  543. static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
  544. // this list must be in ascending order
  545. static unsigned short derived_letters[] = {
  546. 0x00aa, 'a'+L_SUP,
  547. 0x00b2, '2'+L_SUP,
  548. 0x00b3, '3'+L_SUP,
  549. 0x00b9, '1'+L_SUP,
  550. 0x00ba, 'o'+L_SUP,
  551. 0x02b0, 'h'+L_SUP,
  552. 0x02b1, 0x266+L_SUP,
  553. 0x02b2, 'j'+L_SUP,
  554. 0x02b3, 'r'+L_SUP,
  555. 0x02b4, 0x279+L_SUP,
  556. 0x02b5, 0x27b+L_SUP,
  557. 0x02b6, 0x281+L_SUP,
  558. 0x02b7, 'w'+L_SUP,
  559. 0x02b8, 'y'+L_SUP,
  560. 0x02c0, 0x294+L_SUP,
  561. 0x02c1, 0x295+L_SUP,
  562. 0x02e0, 0x263+L_SUP,
  563. 0x02e1, 'l'+L_SUP,
  564. 0x02e2, 's'+L_SUP,
  565. 0x02e3, 'x'+L_SUP,
  566. 0x2070, '0'+L_SUP,
  567. 0x2071, 'i'+L_SUP,
  568. 0x2074, '4'+L_SUP,
  569. 0x2075, '5'+L_SUP,
  570. 0x2076, '6'+L_SUP,
  571. 0x2077, '7'+L_SUP,
  572. 0x2078, '8'+L_SUP,
  573. 0x2079, '9'+L_SUP,
  574. 0x207a, '+'+L_SUP,
  575. 0x207b, '-'+L_SUP,
  576. 0x207c, '='+L_SUP,
  577. 0x207d, '('+L_SUP,
  578. 0x207e, ')'+L_SUP,
  579. 0x207f, 'n'+L_SUP,
  580. 0x2080, '0'+L_SUB,
  581. 0x2081, '1'+L_SUB,
  582. 0x2082, '2'+L_SUB,
  583. 0x2083, '3'+L_SUB,
  584. 0x2084, '4'+L_SUB,
  585. 0x2085, '5'+L_SUB,
  586. 0x2086, '6'+L_SUB,
  587. 0x2087, '7'+L_SUB,
  588. 0x2088, '8'+L_SUB,
  589. 0x2089, '9'+L_SUB,
  590. 0x208a, '+'+L_SUB,
  591. 0x208b, '-'+L_SUB,
  592. 0x208c, '='+L_SUB,
  593. 0x208d, '('+L_SUB,
  594. 0x208e, ')'+L_SUB,
  595. 0x2090, 'a'+L_SUB,
  596. 0x2091, 'e'+L_SUB,
  597. 0x2092, 'o'+L_SUB,
  598. 0x2093, 'x'+L_SUB,
  599. 0x2094, 0x259+L_SUB,
  600. 0x2095, 'h'+L_SUB,
  601. 0x2096, 'k'+L_SUB,
  602. 0x2097, 'l'+L_SUB,
  603. 0x2098, 'm'+L_SUB,
  604. 0x2099, 'n'+L_SUB,
  605. 0x209a, 'p'+L_SUB,
  606. 0x209b, 's'+L_SUB,
  607. 0x209c, 't'+L_SUB,
  608. 0,0};
  609. static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"}; // names, using phonemes available to all languages
  610. int IsSuperscript(int letter)
  611. {//===========================
  612. // is this a subscript or superscript letter ?
  613. int ix;
  614. int c;
  615. for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
  616. {
  617. if(c > letter)
  618. break;
  619. if(c == letter)
  620. return(derived_letters[ix+1]);
  621. }
  622. return(0);
  623. }
  624. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  625. {//=========================================================================
  626. // get pronunciation for an isolated letter
  627. // return number of bytes used by the letter
  628. // control bit 0: a non-initial letter in a word
  629. // bit 1: say 'capital'
  630. // bit 2: say character code for unknown letters
  631. int n_bytes;
  632. int letter;
  633. int len;
  634. int ix;
  635. int c;
  636. char *p2;
  637. char *pbuf;
  638. const char *modifier;
  639. ALPHABET *alphabet;
  640. int al_offset;
  641. int al_flags;
  642. int language;
  643. int number;
  644. int phontab_1;
  645. int speak_letter_number;
  646. char capital[30];
  647. char ph_buf[80];
  648. char ph_buf2[80];
  649. char ph_alphabet[80];
  650. char hexbuf[12];
  651. static char pause_string[] = {phonPAUSE, 0};
  652. ph_buf[0] = 0;
  653. ph_alphabet[0] = 0;
  654. capital[0] = 0;
  655. phontab_1 = translator->phoneme_tab_ix;
  656. n_bytes = utf8_in(&letter,word);
  657. if((letter & 0xfff00) == 0x0e000)
  658. {
  659. letter &= 0xff; // uncode private usage area
  660. }
  661. if(control & 2)
  662. {
  663. // include CAPITAL information
  664. if(iswupper2(letter))
  665. {
  666. Lookup(tr, "_cap", capital);
  667. }
  668. }
  669. letter = towlower2(letter);
  670. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  671. if(ph_buf[0] == 0)
  672. {
  673. // is this a subscript or superscript letter ?
  674. if((c = IsSuperscript(letter)) != 0)
  675. {
  676. letter = c & 0x3fff;
  677. if((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL))
  678. {
  679. // don't say "superscript" during normal text reading
  680. Lookup(tr, modifier, capital);
  681. if(capital[0] == 0)
  682. {
  683. capital[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  684. Lookup(translator2, modifier, &capital[3]);
  685. if(capital[3] != 0)
  686. {
  687. capital[0] = phonPAUSE;
  688. capital[1] = phonSWITCH;
  689. len = strlen(&capital[3]);
  690. capital[len+3] = phonSWITCH;
  691. capital[len+4] = phontab_1;
  692. capital[len+5] = 0;
  693. }
  694. }
  695. }
  696. }
  697. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  698. }
  699. if(ph_buf[0] == phonSWITCH)
  700. {
  701. strcpy(phonemes,ph_buf);
  702. return(0);
  703. }
  704. if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
  705. {
  706. // convert a non-ascii number to 0-9
  707. LookupLetter(tr, number, 0, ph_buf, control & 1);
  708. }
  709. al_offset = 0;
  710. al_flags = 0;
  711. if((alphabet = AlphabetFromChar(letter)) != NULL)
  712. {
  713. al_offset = alphabet->offset;
  714. al_flags = alphabet->flags;
  715. }
  716. if(alphabet != current_alphabet)
  717. {
  718. // speak the name of the alphabet
  719. current_alphabet = alphabet;
  720. if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
  721. {
  722. if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
  723. {
  724. // don't say the alphabet name
  725. }
  726. else
  727. {
  728. ph_buf2[0] = 0;
  729. if(Lookup(translator, alphabet->name, ph_alphabet) == 0) // the original language for the current voice
  730. {
  731. // Can't find the local name for this alphabet, use the English name
  732. ph_alphabet[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  733. Lookup(translator2, alphabet->name, ph_buf2);
  734. }
  735. else if(translator != tr)
  736. {
  737. phontab_1 = tr->phoneme_tab_ix;
  738. strcpy(ph_buf2, ph_alphabet);
  739. ph_alphabet[2] = translator->phoneme_tab_ix;
  740. }
  741. if(ph_buf2[0] != 0)
  742. {
  743. // we used a different language for the alphabet name (now in ph_buf2)
  744. ph_alphabet[0] = phonPAUSE;
  745. ph_alphabet[1] = phonSWITCH;
  746. strcpy(&ph_alphabet[3], ph_buf2);
  747. len = strlen(ph_buf2) + 3;
  748. ph_alphabet[len] = phonSWITCH;
  749. ph_alphabet[len+1] = phontab_1;
  750. ph_alphabet[len+2] = 0;
  751. }
  752. }
  753. }
  754. }
  755. // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
  756. if(ph_buf[0] == 0)
  757. {
  758. if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
  759. language = translator->langopts.alt_alphabet_lang;
  760. else
  761. if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
  762. language = alphabet->language;
  763. else
  764. language = L('e','n');
  765. if((language != tr->translator_name) || (language == L('k','o')))
  766. {
  767. char *p3;
  768. int initial, code;
  769. char hangul_buf[12];
  770. // speak in the language for this alphabet (or English)
  771. ph_buf[2] = SetTranslator2(WordToString2(language));
  772. if(translator2 != NULL)
  773. {
  774. if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
  775. {
  776. // Special case for Korean letters.
  777. // break a syllable hangul into 2 or 3 individual jamo
  778. hangul_buf[0] = ' ';
  779. p3 = &hangul_buf[1];
  780. if((initial = (code/28)/21) != 11)
  781. {
  782. p3 += utf8_out(initial + 0x1100, p3);
  783. }
  784. utf8_out(((code/28) % 21) + 0x1161, p3); // medial
  785. utf8_out((code % 28) + 0x11a7, &p3[3]); // final
  786. p3[6] = ' ';
  787. p3[7] = 0;
  788. ph_buf[3] = 0;
  789. TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
  790. SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
  791. }
  792. else
  793. {
  794. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  795. }
  796. if(ph_buf[3] == phonSWITCH)
  797. {
  798. // another level of language change
  799. ph_buf[2] = SetTranslator2(&ph_buf[4]);
  800. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  801. }
  802. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  803. if(ph_buf[3] != 0)
  804. {
  805. ph_buf[0] = phonPAUSE;
  806. ph_buf[1] = phonSWITCH;
  807. len = strlen(&ph_buf[3]) + 3;
  808. ph_buf[len] = phonSWITCH; // switch back
  809. ph_buf[len+1] = tr->phoneme_tab_ix;
  810. ph_buf[len+2] = 0;
  811. }
  812. }
  813. }
  814. }
  815. if(ph_buf[0] == 0)
  816. {
  817. // character name not found
  818. if(ph_buf[0]== 0)
  819. {
  820. speak_letter_number = 1;
  821. if(!(al_flags & AL_NO_SYMBOL))
  822. {
  823. if(iswalpha2(letter))
  824. Lookup(translator, "_?A", ph_buf);
  825. if((ph_buf[0]==0) && !iswspace(letter))
  826. Lookup(translator, "_??", ph_buf);
  827. if(ph_buf[0] == 0)
  828. {
  829. EncodePhonemes("l'et@", ph_buf, NULL);
  830. }
  831. }
  832. if(!(control & 4) && (al_flags & AL_NOT_CODE))
  833. {
  834. // don't speak the character code number, unless we want full details of this character
  835. speak_letter_number = 0;
  836. }
  837. // if((ph_alphabet[0] != 0) && speak_letter_number)
  838. // ph_buf[0] = 0; // don't speak "letter" if we speak alphabet name
  839. if(speak_letter_number)
  840. {
  841. if(al_offset == 0x2800)
  842. {
  843. // braille dots symbol, list the numbered dots
  844. p2 = hexbuf;
  845. for(ix=0; ix<8; ix++)
  846. {
  847. if(letter & (1 << ix))
  848. {
  849. *p2++ = '1'+ix;
  850. }
  851. }
  852. *p2 = 0;
  853. }
  854. else
  855. {
  856. // speak the hexadecimal number of the character code
  857. sprintf(hexbuf,"%x",letter);
  858. }
  859. pbuf = ph_buf;
  860. for(p2 = hexbuf; *p2 != 0; p2++)
  861. {
  862. pbuf += strlen(pbuf);
  863. *pbuf++ = phonPAUSE_VSHORT;
  864. LookupLetter(translator, *p2, 0, pbuf, 1);
  865. if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
  866. {
  867. // This language has no translation for 'a' to 'f', speak English names using base phonemes
  868. EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
  869. }
  870. }
  871. strcat(pbuf, pause_string);
  872. }
  873. }
  874. }
  875. len = strlen(phonemes);
  876. if(tr->langopts.accents & 2) // 'capital' before or after the word ?
  877. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
  878. else
  879. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  880. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  881. {
  882. strcpy(&phonemes[len],ph_buf2);
  883. }
  884. return(n_bytes);
  885. } // end of TranslateLetter
  886. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  887. {//=============================================================================
  888. // Individual letter names, reduce the stress of some.
  889. int ix;
  890. unsigned int c;
  891. int n_stress=0;
  892. int prev = 0;
  893. int count;
  894. unsigned char buf[N_WORD_PHONEMES];
  895. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  896. {
  897. if((c == phonSTRESS_P) && (prev != phonSWITCH))
  898. {
  899. n_stress++;
  900. }
  901. buf[ix] = prev = c;
  902. }
  903. buf[ix] = 0;
  904. count = 0;
  905. prev = 0;
  906. for(ix=0; (c = buf[ix]) != 0; ix++)
  907. {
  908. if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
  909. {
  910. count++;
  911. if(tr->langopts.spelling_stress == 1)
  912. {
  913. // stress on initial letter when spelling
  914. if(count > 1)
  915. c = phonSTRESS_3;
  916. }
  917. else
  918. {
  919. if(count != n_stress)
  920. {
  921. if(((count % 3) != 0) || (count == n_stress-1))
  922. c = phonSTRESS_3; // reduce to secondary stress
  923. }
  924. }
  925. }
  926. else if(c == 0xff)
  927. {
  928. if((control < 2) || (ix==0))
  929. continue; // don't insert pauses
  930. if(control == 4)
  931. c = phonPAUSE; // pause after each character
  932. if(((count % 3) == 0) || (control > 2))
  933. c = phonPAUSE_NOLINK; // pause following a primary stress
  934. else
  935. c = phonPAUSE_VSHORT;
  936. }
  937. *phonemes++ = prev = c;
  938. }
  939. if(control >= 2)
  940. *phonemes++ = phonPAUSE_NOLINK;
  941. *phonemes = 0;
  942. } // end of SetSpellingStress
  943. // Numbers
  944. static char ph_ordinal2[12];
  945. static char ph_ordinal2x[12];
  946. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  947. {//==============================================================================================
  948. int ordinal = 0;
  949. int c2;
  950. int nextflags;
  951. if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
  952. {
  953. if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
  954. {
  955. if(word_end[0] == '.')
  956. utf8_in(&c2, &word_end[2]);
  957. else
  958. utf8_in(&c2, &word_end[0]);
  959. if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
  960. {
  961. // ordinal number is indicated by dot after the number
  962. // but not if the next word starts with an upper-case letter
  963. // (c2 == 0) is for cases such as, "2.,"
  964. ordinal = 2;
  965. if(word_end[0] == '.')
  966. word_end[0] = ' ';
  967. if((roman==0) && (tr->translator_name == L('h','u')))
  968. {
  969. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  970. nextflags = 0;
  971. if(IsAlpha(c2))
  972. {
  973. nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
  974. }
  975. if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  976. ordinal = 0; // TEST 09.02.10
  977. if(nextflags & FLAG_ALT_TRANS)
  978. ordinal = 0;
  979. if(nextflags & FLAG_ALT3_TRANS)
  980. {
  981. if(word[-2] == '-')
  982. ordinal = 0; // eg. december 2-5. között
  983. if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  984. ordinal = 0x22;
  985. }
  986. }
  987. }
  988. }
  989. }
  990. return(ordinal);
  991. } // end of CheckDotOrdinal
  992. static int hu_number_e(const char *word, int thousandplex, int value)
  993. {//==================================================================
  994. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  995. if((word[0] == 'a') || (word[0] == 'e'))
  996. {
  997. if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  998. return(0);
  999. if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  1000. return(0); // 1000-el
  1001. return(1);
  1002. }
  1003. return(0);
  1004. } // end of hu_numnber_e
  1005. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  1006. {//=========================================================================
  1007. int c;
  1008. char *p;
  1009. const char *p2;
  1010. int acc;
  1011. int prev;
  1012. int value;
  1013. int subtract;
  1014. int repeat = 0;
  1015. int n_digits = 0;
  1016. char *word_start;
  1017. int num_control = 0;
  1018. unsigned int flags[2];
  1019. char ph_roman[30];
  1020. char number_chars[N_WORD_BYTES];
  1021. static const char *roman_numbers = "ixcmvld";
  1022. static int roman_values[] = {1,10,100,1000,5,50,500};
  1023. acc = 0;
  1024. prev = 0;
  1025. subtract = 0x7fff;
  1026. ph_out[0] = 0;
  1027. flags[0] = 0;
  1028. flags[1] = 0;
  1029. if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
  1030. return(0); // not '2xx'
  1031. if(word[1] == ' ')
  1032. return(0); // only one letter, don't speak as a Roman Number
  1033. word_start = word;
  1034. while((c = *word++) != ' ')
  1035. {
  1036. if((p2 = strchr(roman_numbers,c)) == NULL)
  1037. return(0);
  1038. value = roman_values[p2 - roman_numbers];
  1039. if(value == prev)
  1040. {
  1041. repeat++;
  1042. if(repeat >= 3)
  1043. return(0);
  1044. }
  1045. else
  1046. repeat = 0;
  1047. if((prev > 1) && (prev != 10) && (prev != 100))
  1048. {
  1049. if(value >= prev)
  1050. return(0);
  1051. }
  1052. if((prev != 0) && (prev < value))
  1053. {
  1054. if(((acc % 10) != 0) || ((prev*10) < value))
  1055. return(0);
  1056. subtract = prev;
  1057. value -= subtract;
  1058. }
  1059. else if(value >= subtract)
  1060. return(0);
  1061. else
  1062. acc += prev;
  1063. prev = value;
  1064. n_digits++;
  1065. }
  1066. if(IsDigit09(word[0]))
  1067. return(0); // eg. 'xx2'
  1068. acc += prev;
  1069. if(acc < tr->langopts.min_roman)
  1070. return(0);
  1071. if(acc > tr->langopts.max_roman)
  1072. return(0);
  1073. Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
  1074. p = &ph_out[0];
  1075. if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
  1076. {
  1077. strcpy(ph_out,ph_roman);
  1078. p = &ph_out[strlen(ph_roman)];
  1079. }
  1080. sprintf(number_chars," %d %s ",acc, tr->langopts.roman_suffix);
  1081. if(word[0] == '.')
  1082. {
  1083. // dot has not been removed. This implies that there was no space after it
  1084. return(0);
  1085. }
  1086. if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
  1087. wtab[0].flags |= FLAG_ORDINAL;
  1088. if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
  1089. {
  1090. if(tr->translator_name == L('h','u'))
  1091. {
  1092. if(!(wtab[0].flags & FLAG_ORDINAL))
  1093. {
  1094. if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
  1095. {
  1096. // should use the 'e' form of the number
  1097. num_control |= 1;
  1098. }
  1099. else
  1100. return(0);
  1101. }
  1102. }
  1103. else
  1104. {
  1105. wtab[0].flags |= FLAG_ORDINAL;
  1106. }
  1107. }
  1108. tr->prev_dict_flags[0] = 0;
  1109. tr->prev_dict_flags[1] = 0;
  1110. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  1111. if(tr->langopts.numbers & NUM_ROMAN_AFTER)
  1112. strcat(ph_out,ph_roman);
  1113. return(1);
  1114. } // end of TranslateRoman
  1115. static const char *M_Variant(int value)
  1116. {//====================================
  1117. // returns M, or perhaps MA or MB for some cases
  1118. int teens = 0;
  1119. if(((value % 100) > 10) && ((value % 100) < 20))
  1120. teens = 1;
  1121. switch((translator->langopts.numbers2 >> 6) & 0x7)
  1122. {
  1123. case 1: // lang=ru use singular for xx1 except for x11
  1124. if((teens == 0) && ((value % 10) == 1))
  1125. return("1M");
  1126. break;
  1127. case 2: // lang=cs,sk
  1128. if((value >= 2) && (value <= 4))
  1129. return("0MA");
  1130. break;
  1131. case 3: // lang=pl
  1132. if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  1133. return("0MA");
  1134. break;
  1135. case 4: // lang=lt
  1136. if((teens == 1) || ((value % 10) == 0))
  1137. return("0MB");
  1138. if((value % 10) == 1)
  1139. return("0MA");
  1140. break;
  1141. case 5: // lang=bs,hr,sr
  1142. if(teens == 0)
  1143. {
  1144. if((value % 10) == 1)
  1145. return("1M");
  1146. if(((value % 10) >= 2) && ((value % 10) <= 4))
  1147. return("0MA");
  1148. }
  1149. break;
  1150. }
  1151. return("0M");
  1152. }
  1153. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  1154. {//=======================================================================================================
  1155. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  1156. int found;
  1157. int found_value=0;
  1158. char string[12];
  1159. char ph_of[12];
  1160. char ph_thousands[40];
  1161. char ph_buf[40];
  1162. ph_of[0] = 0;
  1163. // first look for a match with the exact value of thousands
  1164. if(value > 0)
  1165. {
  1166. if(thousands_exact & 1)
  1167. {
  1168. if(thousands_exact & 2)
  1169. {
  1170. // ordinal number
  1171. sprintf(string,"_%dM%do",value,thousandplex);
  1172. found_value = Lookup(tr, string, ph_thousands);
  1173. }
  1174. if(!found_value & (number_control & 1))
  1175. {
  1176. // look for the 'e' variant
  1177. sprintf(string,"_%dM%de",value,thousandplex);
  1178. found_value = Lookup(tr, string, ph_thousands);
  1179. }
  1180. if(!found_value)
  1181. {
  1182. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  1183. sprintf(string,"_%dM%dx",value,thousandplex);
  1184. found_value = Lookup(tr, string, ph_thousands);
  1185. }
  1186. }
  1187. if(found_value == 0)
  1188. {
  1189. sprintf(string,"_%dM%d",value,thousandplex);
  1190. found_value = Lookup(tr, string, ph_thousands);
  1191. }
  1192. }
  1193. if(found_value == 0)
  1194. {
  1195. if((value % 100) >= 20)
  1196. {
  1197. Lookup(tr, "_0of", ph_of);
  1198. }
  1199. found = 0;
  1200. if(thousands_exact & 1)
  1201. {
  1202. if(thousands_exact & 2)
  1203. {
  1204. // ordinal number
  1205. sprintf(string,"_%s%do",M_Variant(value), thousandplex);
  1206. found = Lookup(tr, string, ph_thousands);
  1207. }
  1208. if(!found && (number_control & 1))
  1209. {
  1210. // look for the 'e' variant
  1211. sprintf(string,"_%s%de",M_Variant(value), thousandplex);
  1212. found = Lookup(tr, string, ph_thousands);
  1213. }
  1214. if(!found)
  1215. {
  1216. // is there a different pronunciation if there are no hundreds,tens,or units ?
  1217. sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
  1218. found = Lookup(tr, string, ph_thousands);
  1219. }
  1220. }
  1221. if(found == 0)
  1222. {
  1223. sprintf(string,"_%s%d",M_Variant(value), thousandplex);
  1224. if(Lookup(tr, string, ph_thousands) == 0)
  1225. {
  1226. if(thousandplex > 3)
  1227. {
  1228. sprintf(string,"_0M%d", thousandplex-1);
  1229. if(Lookup(tr, string, ph_buf) == 0)
  1230. {
  1231. // say "millions" if this name is not available and neither is the next lower
  1232. Lookup(tr, "_0M2", ph_thousands);
  1233. speak_missing_thousands = 3;
  1234. }
  1235. }
  1236. if(ph_thousands[0] == 0)
  1237. {
  1238. // repeat "thousand" if higher order names are not available
  1239. sprintf(string,"_%dM1",value);
  1240. if((found_value = Lookup(tr, string, ph_thousands)) == 0)
  1241. Lookup(tr, "_0M1", ph_thousands);
  1242. speak_missing_thousands = 2;
  1243. }
  1244. }
  1245. }
  1246. }
  1247. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  1248. if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  1249. return(1);
  1250. return(found_value);
  1251. } // end f LookupThousands
  1252. static int LookupNum2(Translator *tr, int value, int thousandplex, const int control, char *ph_out)
  1253. {//=============================================================================
  1254. // Lookup a 2 digit number
  1255. // control bit 0: ordinal number
  1256. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  1257. // control bit 2: tens and units only, no higher digits
  1258. // control bit 3: use feminine form of '2' (for thousands
  1259. // control bit 4: speak zero tens
  1260. // control bit 5: variant of ordinal number (lang=hu)
  1261. // bit 8 followed by decimal fraction
  1262. // bit 9: use #f form for both tens and units (lang=ml)
  1263. int found;
  1264. int ix;
  1265. int units;
  1266. int tens;
  1267. int is_ordinal;
  1268. int used_and=0;
  1269. int found_ordinal = 0;
  1270. int next_phtype;
  1271. int ord_type = 'o';
  1272. char string[12]; // for looking up entries in *_list
  1273. char ph_ordinal[20];
  1274. char ph_tens[50];
  1275. char ph_digits[50];
  1276. char ph_and[12];
  1277. units = value % 10;
  1278. tens = value / 10;
  1279. found = 0;
  1280. ph_ordinal[0] = 0;
  1281. ph_tens[0] = 0;
  1282. ph_digits[0] = 0;
  1283. ph_and[0] = 0;
  1284. if(control & 0x20)
  1285. {
  1286. ord_type = 'q';
  1287. }
  1288. is_ordinal = control & 1;
  1289. if((control & 2) && (n_digit_lookup == 2))
  1290. {
  1291. // pronunciation of the final 2 digits has already been found
  1292. strcpy(ph_out, digit_lookup);
  1293. }
  1294. else
  1295. {
  1296. if(digit_lookup[0] == 0)
  1297. {
  1298. // is there a special pronunciation for this 2-digit number
  1299. if(control & 8)
  1300. {
  1301. // is there a feminine or thousands-variant form?
  1302. sprintf(string,"_%dfx",value);
  1303. if((found = Lookup(tr, string, ph_digits)) == 0)
  1304. {
  1305. sprintf(string,"_%df",value);
  1306. found = Lookup(tr, string, ph_digits);
  1307. }
  1308. }
  1309. else if(is_ordinal)
  1310. {
  1311. strcpy(ph_ordinal, ph_ordinal2);
  1312. if(control & 4)
  1313. {
  1314. sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1315. if((found = Lookup(tr, string, ph_digits)) != 0)
  1316. {
  1317. if(ph_ordinal2x[0] != 0)
  1318. strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
  1319. }
  1320. }
  1321. if(found == 0)
  1322. {
  1323. sprintf(string,"_%d%c",value,ord_type);
  1324. found = Lookup(tr, string, ph_digits);
  1325. }
  1326. found_ordinal = found;
  1327. }
  1328. if(found == 0)
  1329. {
  1330. if(control & 2)
  1331. {
  1332. // the final tens and units of a number
  1333. if(number_control & 1)
  1334. {
  1335. // look for 'e' variant
  1336. sprintf(string,"_%de",value);
  1337. found = Lookup(tr, string, ph_digits);
  1338. }
  1339. }
  1340. else
  1341. {
  1342. // followed by hundreds or thousands etc
  1343. if((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1344. sprintf(string, "_%do", value); // LANG=TA
  1345. else
  1346. sprintf(string, "_%da", value);
  1347. found = Lookup(tr, string, ph_digits);
  1348. }
  1349. if(!found)
  1350. {
  1351. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
  1352. {
  1353. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  1354. }
  1355. else
  1356. {
  1357. sprintf(string,"_%d",value);
  1358. found = Lookup(tr, string, ph_digits);
  1359. }
  1360. }
  1361. }
  1362. }
  1363. // no, speak as tens+units
  1364. if((control & 0x10) && (value < 10))
  1365. {
  1366. // speak leading zero
  1367. Lookup(tr, "_0", ph_tens);
  1368. }
  1369. else
  1370. {
  1371. if(found)
  1372. {
  1373. ph_tens[0] = 0;
  1374. }
  1375. else
  1376. {
  1377. if(is_ordinal)
  1378. {
  1379. sprintf(string,"_%dX%c", tens, ord_type);
  1380. if(Lookup(tr, string, ph_tens) != 0)
  1381. {
  1382. found_ordinal = 1;
  1383. if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1384. {
  1385. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1386. strcat(ph_tens, ph_ordinal2);
  1387. }
  1388. }
  1389. }
  1390. if(found_ordinal == 0)
  1391. {
  1392. if(control & 0x200)
  1393. sprintf(string, "_%dXf", tens);
  1394. else
  1395. sprintf(string,"_%dX", tens);
  1396. Lookup(tr, string, ph_tens);
  1397. }
  1398. if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
  1399. {
  1400. // tens not found, (for example) 73 is 60+13
  1401. units = (value % 20);
  1402. sprintf(string,"_%dX", tens & 0xfe);
  1403. Lookup(tr, string, ph_tens);
  1404. }
  1405. ph_digits[0] = 0;
  1406. if(units > 0)
  1407. {
  1408. found = 0;
  1409. if((control & 2) && (digit_lookup[0] != 0))
  1410. {
  1411. // we have an entry for this digit (possibly together with the next word)
  1412. strcpy(ph_digits, digit_lookup);
  1413. found_ordinal = 1;
  1414. ph_ordinal[0] = 0;
  1415. }
  1416. else
  1417. {
  1418. if(control & 8)
  1419. {
  1420. // is there a variant form of this number?
  1421. sprintf(string,"_%df",units);
  1422. found = Lookup(tr, string, ph_digits);
  1423. }
  1424. if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
  1425. {
  1426. // ordinal
  1427. sprintf(string,"_%d%c",units,ord_type);
  1428. if((found = Lookup(tr, string, ph_digits)) != 0)
  1429. {
  1430. found_ordinal = 1;
  1431. }
  1432. }
  1433. if(found == 0)
  1434. {
  1435. if((number_control & 1) && (control & 2))
  1436. {
  1437. // look for 'e' variant
  1438. sprintf(string,"_%de",units);
  1439. found = Lookup(tr, string, ph_digits);
  1440. }
  1441. else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
  1442. {
  1443. // followed by hundreds or thousands (or tens)
  1444. if((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1445. sprintf(string, "_%do", units); // LANG=TA, only for 100s, 1000s
  1446. else
  1447. sprintf(string, "_%da", units);
  1448. found = Lookup(tr, string, ph_digits);
  1449. }
  1450. }
  1451. if(found == 0)
  1452. {
  1453. sprintf(string,"_%d",units);
  1454. Lookup(tr, string, ph_digits);
  1455. }
  1456. }
  1457. }
  1458. }
  1459. }
  1460. if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
  1461. {
  1462. if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1463. Lookup(tr, "_ord20", ph_ordinal);
  1464. if(ph_ordinal[0] == 0)
  1465. Lookup(tr, "_ord", ph_ordinal);
  1466. }
  1467. if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
  1468. {
  1469. Lookup(tr, "_0and", ph_and);
  1470. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
  1471. ph_and[0] = 0;
  1472. if(tr->langopts.numbers & NUM_SWAP_TENS)
  1473. sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
  1474. else
  1475. sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
  1476. used_and = 1;
  1477. }
  1478. else
  1479. {
  1480. if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
  1481. {
  1482. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1483. if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
  1484. {
  1485. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1486. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1487. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1488. ph_tens[ix] = 0;
  1489. }
  1490. }
  1491. if((tr->langopts.numbers2 & NUM2_ORDINAL_DROP_VOWEL) && (ph_ordinal[0] != 0))
  1492. {
  1493. ix = sprintf(ph_out,"%s%s", ph_tens, ph_digits);
  1494. if((ix > 0) && (phoneme_tab[(unsigned char)(ph_out[ix-1])]->type == phVOWEL))
  1495. ix--;
  1496. sprintf(&ph_out[ix], "%s", ph_ordinal);
  1497. }
  1498. else
  1499. {
  1500. sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
  1501. }
  1502. }
  1503. }
  1504. if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
  1505. {
  1506. // only one primary stress, on the first part (tens)
  1507. found = 0;
  1508. for(ix=0; ix < (signed)strlen(ph_out); ix++)
  1509. {
  1510. if(ph_out[ix] == phonSTRESS_P)
  1511. {
  1512. if(found)
  1513. ph_out[ix] = phonSTRESS_3;
  1514. else
  1515. found = 1;
  1516. }
  1517. }
  1518. }
  1519. else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
  1520. {
  1521. // only one primary stress
  1522. found = 0;
  1523. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  1524. {
  1525. if(ph_out[ix] == phonSTRESS_P)
  1526. {
  1527. if(found)
  1528. ph_out[ix] = phonSTRESS_3;
  1529. else
  1530. found = 1;
  1531. }
  1532. }
  1533. }
  1534. return(used_and);
  1535. } // end of LookupNum2
  1536. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1537. {//=============================================================================================================
  1538. // Translate a 3 digit number
  1539. // control bit 0, previous thousands
  1540. // bit 1, ordinal number
  1541. // bit 5 variant form of ordinal number
  1542. // bit 8 followed by decimal fraction
  1543. int found;
  1544. int hundreds;
  1545. int tensunits;
  1546. int x;
  1547. int ix;
  1548. int exact;
  1549. int ordinal;
  1550. int tplex;
  1551. int say_zero_hundred=0;
  1552. int say_one_hundred;
  1553. char string[12]; // for looking up entries in **_list
  1554. char buf1[100];
  1555. char buf2[100];
  1556. char ph_100[20];
  1557. char ph_10T[20];
  1558. char ph_digits[50];
  1559. char ph_thousands[50];
  1560. char ph_hundred_and[12];
  1561. char ph_thousand_and[12];
  1562. ordinal = control & 0x22;
  1563. hundreds = value / 100;
  1564. tensunits = value % 100;
  1565. buf1[0] = 0;
  1566. ph_thousands[0] = 0;
  1567. ph_thousand_and[0] = 0;
  1568. if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1569. {
  1570. say_zero_hundred = 1; // lang=vi
  1571. }
  1572. if((hundreds > 0) || say_zero_hundred)
  1573. {
  1574. found = 0;
  1575. if(ordinal && (tensunits == 0))
  1576. {
  1577. // ordinal number, with no tens or units
  1578. found = Lookup(tr, "_0Co", ph_100);
  1579. }
  1580. if(found == 0)
  1581. {
  1582. if(tensunits==0)
  1583. {
  1584. // special form for exact hundreds?
  1585. found = Lookup(tr, "_0C0", ph_100);
  1586. }
  1587. if(!found)
  1588. {
  1589. Lookup(tr, "_0C", ph_100);
  1590. }
  1591. }
  1592. if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
  1593. {
  1594. // speak numbers such as 1984 as years: nineteen-eighty-four
  1595. // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
  1596. }
  1597. else if(hundreds >= 10)
  1598. {
  1599. ph_digits[0] = 0;
  1600. exact = 0;
  1601. if ((value % 1000) == 0)
  1602. exact = 1;
  1603. tplex = thousandplex+1;
  1604. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1605. {
  1606. tplex = 0;
  1607. }
  1608. if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
  1609. {
  1610. x = 0;
  1611. if(tr->langopts.numbers2 & (1 << tplex))
  1612. x = 8; // use variant (feminine) for before thousands and millions
  1613. if(tr->translator_name == L('m','l'))
  1614. x = 0x208;
  1615. LookupNum2(tr, hundreds/10, thousandplex, x, ph_digits);
  1616. }
  1617. if(tr->langopts.numbers2 & 0x200)
  1618. sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD); // say "thousands" before its number, not after
  1619. else
  1620. sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
  1621. hundreds %= 10;
  1622. if((hundreds == 0) && (say_zero_hundred == 0))
  1623. ph_100[0] = 0;
  1624. suppress_null = 1;
  1625. control |= 1;
  1626. }
  1627. ph_digits[0] = 0;
  1628. if((hundreds > 0) || say_zero_hundred)
  1629. {
  1630. if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1631. {
  1632. Lookup(tr, "_0and", ph_thousand_and);
  1633. }
  1634. suppress_null = 1;
  1635. found = 0;
  1636. if((ordinal)
  1637. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
  1638. {
  1639. // ordinal number
  1640. sprintf(string, "_%dCo", hundreds);
  1641. found = Lookup(tr, string, ph_digits);
  1642. if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
  1643. {
  1644. // Use ordinal form of hundreds, as well as for tens and units
  1645. // Add ordinal suffix to the hundreds
  1646. strcat(ph_digits, ph_ordinal2);
  1647. }
  1648. }
  1649. if((hundreds == 0) && say_zero_hundred)
  1650. {
  1651. Lookup(tr, "_0", ph_digits);
  1652. }
  1653. else
  1654. {
  1655. if((hundreds==1) && (tr->langopts.numbers2 & NUM2_OMIT_1_HUNDRED_ONLY) && ((control & 1)==0))
  1656. {
  1657. // only look for special 100 if there are previous thousands
  1658. }
  1659. else
  1660. {
  1661. if((!found) && (tensunits == 0))
  1662. {
  1663. // is there a special pronunciation for exactly n00 ?
  1664. sprintf(string,"_%dC0",hundreds);
  1665. found = Lookup(tr, string, ph_digits);
  1666. }
  1667. if(!found)
  1668. {
  1669. sprintf(string,"_%dC",hundreds);
  1670. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1671. }
  1672. }
  1673. if(found)
  1674. {
  1675. ph_100[0] = 0;
  1676. }
  1677. else
  1678. {
  1679. say_one_hundred = 1;
  1680. if(hundreds == 1)
  1681. {
  1682. if((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) != 0)
  1683. say_one_hundred = 0;
  1684. }
  1685. if(say_one_hundred != 0)
  1686. {
  1687. LookupNum2(tr, hundreds, thousandplex, 0, ph_digits);
  1688. }
  1689. }
  1690. }
  1691. }
  1692. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  1693. }
  1694. ph_hundred_and[0] = 0;
  1695. if(tensunits > 0)
  1696. {
  1697. if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1698. {
  1699. // Don't use "and" if we apply ordinal to both hundreds and units
  1700. }
  1701. else
  1702. {
  1703. if((value > 100) || ((control & 1) && (thousandplex==0)))
  1704. {
  1705. if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1706. {
  1707. Lookup(tr, "_0and", ph_hundred_and);
  1708. }
  1709. }
  1710. if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1711. {
  1712. Lookup(tr, "_0and", ph_hundred_and);
  1713. }
  1714. }
  1715. }
  1716. buf2[0] = 0;
  1717. if((tensunits != 0) || (suppress_null == 0))
  1718. {
  1719. x = 0;
  1720. if(thousandplex==0)
  1721. {
  1722. x = 2; // allow "eins" for 1 rather than "ein"
  1723. if(ordinal)
  1724. x = 3; // ordinal number
  1725. if((value < 100) && !(control & 1))
  1726. x |= 4; // tens and units only, no higher digits
  1727. if(ordinal & 0x20)
  1728. x |= 0x20; // variant form of ordinal number
  1729. }
  1730. else
  1731. {
  1732. if(tr->langopts.numbers2 & (1 << thousandplex))
  1733. x = 8; // use variant (feminine) for before thousands and millions
  1734. }
  1735. if((tr->translator_name == L('m','l')) && (thousandplex == 1))
  1736. {
  1737. x |= 0x208; // use #f form for both tens and units
  1738. }
  1739. if(LookupNum2(tr, tensunits, thousandplex, x | (control & 0x100), buf2) != 0)
  1740. {
  1741. if(tr->langopts.numbers & NUM_SINGLE_AND)
  1742. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1743. }
  1744. }
  1745. else
  1746. {
  1747. if(ph_ordinal2[0] != 0)
  1748. {
  1749. ix = strlen(buf1);
  1750. if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1751. buf1[ix-1] = 0; // remove pause before addding ordinal suffix
  1752. strcpy(buf2, ph_ordinal2);
  1753. }
  1754. }
  1755. sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
  1756. return(0);
  1757. } // end of LookupNum3
  1758. bool CheckThousandsGroup(char *word, int group_len)
  1759. {//================================================
  1760. // Is this a group of 3 digits which looks like a thousands group?
  1761. int ix;
  1762. if(IsDigit09(word[group_len]) || IsDigit09(-1))
  1763. return(false);
  1764. for(ix=0; ix < group_len; ix++)
  1765. {
  1766. if(!IsDigit09(word[ix]))
  1767. return(false);
  1768. }
  1769. return(true);
  1770. }
  1771. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1772. {//=====================================================================================================================
  1773. // Number translation with various options
  1774. // the "word" may be up to 4 digits
  1775. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1776. int n_digits;
  1777. int value;
  1778. int ix;
  1779. int digix;
  1780. unsigned char c;
  1781. int suppress_null = 0;
  1782. int decimal_point = 0;
  1783. int thousandplex = 0;
  1784. int thousands_exact = 1;
  1785. int thousands_inc = 0;
  1786. int prev_thousands = 0;
  1787. int ordinal = 0;
  1788. int this_value;
  1789. int decimal_count;
  1790. int max_decimal_count;
  1791. int decimal_mode;
  1792. int suffix_ix;
  1793. int skipwords = 0;
  1794. int group_len;
  1795. int len;
  1796. char *p;
  1797. char string[32]; // for looking up entries in **_list
  1798. char buf1[100];
  1799. char ph_append[50];
  1800. char ph_buf[200];
  1801. char ph_buf2[50];
  1802. char ph_zeros[50];
  1803. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1804. char buf_digit_lookup[50];
  1805. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1806. *flags = 0;
  1807. n_digit_lookup = 0;
  1808. buf_digit_lookup[0] = 0;
  1809. digit_lookup = buf_digit_lookup;
  1810. number_control = control;
  1811. for(ix=0; IsDigit09(word[ix]); ix++) ;
  1812. n_digits = ix;
  1813. value = this_value = atoi(word);
  1814. group_len = 3;
  1815. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1816. group_len = 4;
  1817. // is there a previous thousands part (as a previous "word") ?
  1818. if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
  1819. {
  1820. prev_thousands = 1;
  1821. }
  1822. else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
  1823. {
  1824. // thousands groups can be separated by spaces
  1825. if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
  1826. {
  1827. prev_thousands = 1;
  1828. }
  1829. }
  1830. if(prev_thousands == 0)
  1831. {
  1832. speak_missing_thousands = 0;
  1833. }
  1834. ph_ordinal2[0] = 0;
  1835. ph_zeros[0] = 0;
  1836. if(prev_thousands || (word[0] != '0'))
  1837. {
  1838. // don't check for ordinal if the number has a leading zero
  1839. if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
  1840. {
  1841. // dot_ordinal = 1;
  1842. }
  1843. }
  1844. if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
  1845. {
  1846. // remove dot unless followed by another number
  1847. word[ix] = 0;
  1848. }
  1849. if((ordinal == 0) || (tr->translator_name == L('h','u')))
  1850. {
  1851. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1852. // look for an ordinal number suffix after the number
  1853. ix++;
  1854. p = suffix;
  1855. if(wtab[0].flags & FLAG_HYPHEN_AFTER)
  1856. {
  1857. *p++ = '-';
  1858. ix++;
  1859. }
  1860. while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1861. {
  1862. *p++ = word[ix++];
  1863. }
  1864. *p = 0;
  1865. if(suffix[0] != 0)
  1866. {
  1867. if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1868. {
  1869. ordinal = 2;
  1870. }
  1871. else if(!IsDigit09(suffix[0])) // not _#9 (tab)
  1872. {
  1873. sprintf(string,"_#%s",suffix);
  1874. if(Lookup(tr, string, ph_ordinal2))
  1875. {
  1876. // this is an ordinal suffix
  1877. ordinal = 2;
  1878. flags[0] |= FLAG_SKIPWORDS;
  1879. skipwords = 1;
  1880. sprintf(string,"_x#%s",suffix);
  1881. Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
  1882. }
  1883. }
  1884. }
  1885. }
  1886. if(wtab[0].flags & FLAG_ORDINAL)
  1887. ordinal = 2;
  1888. ph_append[0] = 0;
  1889. ph_buf2[0] = 0;
  1890. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
  1891. {
  1892. if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
  1893. {
  1894. // looks like a time 02:30, omit the leading zero
  1895. }
  1896. else
  1897. {
  1898. if(n_digits > 3)
  1899. {
  1900. flags[0] &= ~FLAG_SKIPWORDS;
  1901. return(0); // long number string with leading zero, speak as individual digits
  1902. }
  1903. // speak leading zeros
  1904. for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1905. {
  1906. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1907. }
  1908. }
  1909. }
  1910. if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1911. thousands_inc = 1;
  1912. else if(word[n_digits] == tr->langopts.thousands_sep)
  1913. thousands_inc = 2;
  1914. suffix_ix = n_digits+2;
  1915. if(thousands_inc > 0)
  1916. {
  1917. // if the following "words" are three-digit groups, count them and add
  1918. // a "thousand"/"million" suffix to this one
  1919. digix = n_digits + thousands_inc;
  1920. while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
  1921. {
  1922. for(ix=0; ix<group_len; ix++)
  1923. {
  1924. if(word[digix+ix] != '0')
  1925. {
  1926. thousands_exact = 0;
  1927. break;
  1928. }
  1929. }
  1930. thousandplex++;
  1931. digix += group_len;
  1932. if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
  1933. {
  1934. suffix_ix = digix+2;
  1935. digix += thousands_inc;
  1936. }
  1937. else
  1938. break;
  1939. }
  1940. }
  1941. if((value == 0) && prev_thousands)
  1942. {
  1943. suppress_null = 1;
  1944. }
  1945. if(tr->translator_name == L('h','u'))
  1946. {
  1947. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1948. if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1949. {
  1950. number_control |= 1; // use _1e variant of number
  1951. }
  1952. }
  1953. if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  1954. {
  1955. // this "word" ends with a decimal point
  1956. Lookup(tr, "_dpt", ph_append);
  1957. decimal_point = 0x100;
  1958. }
  1959. else if(suppress_null == 0)
  1960. {
  1961. if(thousands_inc > 0)
  1962. {
  1963. if(thousandplex > 0)
  1964. // if((thousandplex > 0) && (value < 1000))
  1965. {
  1966. if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
  1967. {
  1968. // found an exact match for N thousand
  1969. value = 0;
  1970. suppress_null = 1;
  1971. }
  1972. }
  1973. }
  1974. }
  1975. else
  1976. if(speak_missing_thousands == 1)
  1977. {
  1978. // speak this thousandplex if there was no word for the previous thousandplex
  1979. sprintf(string,"_0M%d",thousandplex+1);
  1980. if(Lookup(tr, string, buf1)==0)
  1981. {
  1982. sprintf(string,"_0M%d",thousandplex);
  1983. Lookup(tr, string, ph_append);
  1984. }
  1985. }
  1986. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1987. {
  1988. Lookup(tr, "_.", ph_append);
  1989. }
  1990. if(thousandplex == 0)
  1991. {
  1992. char *p2;
  1993. // look for combinations of the number with the next word
  1994. p = word;
  1995. while(IsDigit09(p[1])) p++; // just use the last digit
  1996. if(IsDigit09(p[-1]))
  1997. {
  1998. p2 = p - 1;
  1999. if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  2000. {
  2001. n_digit_lookup = 2;
  2002. }
  2003. }
  2004. // if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
  2005. if((buf_digit_lookup[0] == 0) && (*p != '0'))
  2006. {
  2007. // LANG=hu ?
  2008. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  2009. if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  2010. {
  2011. n_digit_lookup = 1;
  2012. }
  2013. }
  2014. if(prev_thousands == 0)
  2015. {
  2016. if((decimal_point == 0) && (ordinal == 0))
  2017. {
  2018. // Look for special pronunciation for this number in isolation (LANG=kl)
  2019. sprintf(string, "_%dn", value);
  2020. if(Lookup(tr, string, ph_out))
  2021. {
  2022. return(1);
  2023. }
  2024. }
  2025. if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
  2026. {
  2027. // LANG=si, say "percent" before the number
  2028. p2 = word;
  2029. while((*p2 != ' ') && (*p2 != 0))
  2030. {
  2031. p2++;
  2032. }
  2033. if(p2[1] == '%')
  2034. {
  2035. Lookup(tr, "%", ph_out);
  2036. ph_out += strlen(ph_out);
  2037. p2[1] = ' ';
  2038. }
  2039. }
  2040. }
  2041. }
  2042. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  2043. if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  2044. sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf); // say "thousands" before its number
  2045. else
  2046. sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
  2047. while(decimal_point)
  2048. {
  2049. n_digits++;
  2050. decimal_count = 0;
  2051. while(IsDigit09(word[n_digits+decimal_count]))
  2052. decimal_count++;
  2053. // if(decimal_count > 1)
  2054. {
  2055. max_decimal_count = 2;
  2056. switch(decimal_mode = (tr->langopts.numbers & 0xe000))
  2057. {
  2058. case NUM_DFRACTION_4:
  2059. max_decimal_count = 5;
  2060. case NUM_DFRACTION_2:
  2061. // French/Polish decimal fraction
  2062. while(word[n_digits] == '0')
  2063. {
  2064. Lookup(tr, "_0", buf1);
  2065. strcat(ph_out,buf1);
  2066. decimal_count--;
  2067. n_digits++;
  2068. }
  2069. if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
  2070. {
  2071. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2072. strcat(ph_out,buf1);
  2073. n_digits += decimal_count;
  2074. }
  2075. break;
  2076. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  2077. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  2078. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  2079. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
  2080. if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
  2081. {
  2082. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  2083. sprintf(string,"_0Z%d",decimal_count);
  2084. if(Lookup(tr, string, buf1) == 0)
  2085. break; // revert to speaking single digits
  2086. if(decimal_mode == NUM_DFRACTION_6)
  2087. strcat(ph_out, buf1);
  2088. else
  2089. strcat(ph_buf, buf1);
  2090. }
  2091. strcat(ph_out,ph_buf);
  2092. n_digits += decimal_count;
  2093. break;
  2094. case NUM_DFRACTION_3:
  2095. // Romanian decimal fractions
  2096. if((decimal_count <= 4) && (word[n_digits] != '0'))
  2097. {
  2098. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2099. strcat(ph_out,buf1);
  2100. n_digits += decimal_count;
  2101. }
  2102. break;
  2103. case NUM_DFRACTION_7:
  2104. // alternative form of decimal fraction digits, except the final digit
  2105. while(decimal_count-- > 1)
  2106. {
  2107. sprintf(string,"_%cd", word[n_digits]);
  2108. if(Lookup(tr, string, buf1) == 0)
  2109. break;
  2110. n_digits++;
  2111. strcat(ph_out, buf1);
  2112. }
  2113. }
  2114. }
  2115. while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  2116. {
  2117. // speak any remaining decimal fraction digits individually
  2118. value = word[n_digits++] - '0';
  2119. LookupNum2(tr, value, 0, 2, buf1);
  2120. len = strlen(ph_out);
  2121. sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
  2122. }
  2123. // something after the decimal part ?
  2124. if(Lookup(tr, "_dpt2", buf1))
  2125. strcat(ph_out,buf1);
  2126. if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  2127. {
  2128. Lookup(tr, "_dpt", buf1);
  2129. strcat(ph_out,buf1);
  2130. }
  2131. else
  2132. {
  2133. decimal_point = 0;
  2134. }
  2135. }
  2136. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  2137. {
  2138. int next_char;
  2139. char *p;
  2140. p = &word[n_digits+1];
  2141. p += utf8_in(&next_char,p);
  2142. if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  2143. utf8_in(&next_char,p);
  2144. if(!iswalpha2(next_char) && (thousands_exact==0))
  2145. // if(!iswalpha2(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
  2146. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  2147. }
  2148. *flags |= FLAG_FOUND;
  2149. speak_missing_thousands--;
  2150. if(skipwords)
  2151. dictionary_skipwords = skipwords;
  2152. return(1);
  2153. } // end of TranslateNumber_1
  2154. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  2155. {//=============================================================================================================
  2156. if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  2157. return(0); // speak digits individually
  2158. if(tr->langopts.numbers != 0)
  2159. {
  2160. return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
  2161. }
  2162. return(0);
  2163. } // end of TranslateNumber