eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.cpp 57KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384
  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2013 by Jonathan Duddington *
  3. * email: [email protected] *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #ifdef ANDROID
  25. #include "android_wchar.h"
  26. #else
  27. #include <wctype.h>
  28. #include <wchar.h>
  29. #endif
  30. #include "speak_lib.h"
  31. #include "speech.h"
  32. #include "phoneme.h"
  33. #include "synthesize.h"
  34. #include "voice.h"
  35. #include "translate.h"
  36. #define M_NAME 0
  37. #define M_SMALLCAP 1
  38. #define M_TURNED 2
  39. #define M_REVERSED 3
  40. #define M_CURL 4
  41. #define M_ACUTE 5
  42. #define M_BREVE 6
  43. #define M_CARON 7
  44. #define M_CEDILLA 8
  45. #define M_CIRCUMFLEX 9
  46. #define M_DIAERESIS 10
  47. #define M_DOUBLE_ACUTE 11
  48. #define M_DOT_ABOVE 12
  49. #define M_GRAVE 13
  50. #define M_MACRON 14
  51. #define M_OGONEK 15
  52. #define M_RING 16
  53. #define M_STROKE 17
  54. #define M_TILDE 18
  55. #define M_BAR 19
  56. #define M_RETROFLEX 20
  57. #define M_HOOK 21
  58. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  59. #define M_IMPLOSIVE M_HOOK
  60. static int n_digit_lookup;
  61. static char *digit_lookup;
  62. static int speak_missing_thousands;
  63. static int number_control;
  64. typedef struct {
  65. const char *name;
  66. int flags;
  67. } ACCENTS;
  68. // these are tokens to look up in the *_list file.
  69. static ACCENTS accents_tab[] = {
  70. {"_lig", 1},
  71. {"_smc", 1}, // smallcap
  72. {"_tur", 1}, // turned
  73. {"_rev", 1}, // reversed
  74. {"_crl", 0}, // curl
  75. {"_acu", 0}, // acute
  76. {"_brv", 0}, // breve
  77. {"_hac", 0}, // caron/hacek
  78. {"_ced", 0}, // cedilla
  79. {"_cir", 0}, // circumflex
  80. {"_dia", 0}, // diaeresis
  81. {"_ac2", 0}, // double acute
  82. {"_dot", 0}, // dot
  83. {"_grv", 0}, // grave
  84. {"_mcn", 0}, // macron
  85. {"_ogo", 0}, // ogonek
  86. {"_rng", 0}, // ring
  87. {"_stk", 0}, // stroke
  88. {"_tld", 0}, // tilde
  89. {"_bar", 0}, // bar
  90. {"_rfx", 0}, // retroflex
  91. {"_hok", 0}, // hook
  92. };
  93. #define CAPITAL 0
  94. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  95. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
  96. #define L_ALPHA 60 // U+3B1
  97. #define L_SCHWA 61 // U+259
  98. #define L_OPEN_E 62 // U+25B
  99. #define L_GAMMA 63 // U+3B3
  100. #define L_IOTA 64 // U+3B9
  101. #define L_OE 65 // U+153
  102. #define L_OMEGA 66 // U+3C9
  103. #define L_PHI 67 // U+3C6
  104. #define L_ESH 68 // U+283
  105. #define L_UPSILON 69 // U+3C5
  106. #define L_EZH 70 // U+292
  107. #define L_GLOTTAL 71 // U+294
  108. #define L_RTAP 72 // U+27E
  109. static const short non_ascii_tab[] = {
  110. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  111. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e
  112. };
  113. // characters U+00e0 to U+017f
  114. static const unsigned short letter_accents_0e0[] = {
  115. LETTER('a',M_GRAVE,0), // U+00e0
  116. LETTER('a',M_ACUTE,0),
  117. LETTER('a',M_CIRCUMFLEX,0),
  118. LETTER('a',M_TILDE,0),
  119. LETTER('a',M_DIAERESIS,0),
  120. LETTER('a',M_RING,0),
  121. LIGATURE('a','e',0),
  122. LETTER('c',M_CEDILLA,0),
  123. LETTER('e',M_GRAVE,0),
  124. LETTER('e',M_ACUTE,0),
  125. LETTER('e',M_CIRCUMFLEX,0),
  126. LETTER('e',M_DIAERESIS,0),
  127. LETTER('i',M_GRAVE,0),
  128. LETTER('i',M_ACUTE,0),
  129. LETTER('i',M_CIRCUMFLEX,0),
  130. LETTER('i',M_DIAERESIS,0),
  131. LETTER('d',M_NAME,0), // eth // U+00f0
  132. LETTER('n',M_TILDE,0),
  133. LETTER('o',M_GRAVE,0),
  134. LETTER('o',M_ACUTE,0),
  135. LETTER('o',M_CIRCUMFLEX,0),
  136. LETTER('o',M_TILDE,0),
  137. LETTER('o',M_DIAERESIS,0),
  138. 0, // division sign
  139. LETTER('o',M_STROKE,0),
  140. LETTER('u',M_GRAVE,0),
  141. LETTER('u',M_ACUTE,0),
  142. LETTER('u',M_CIRCUMFLEX,0),
  143. LETTER('u',M_DIAERESIS,0),
  144. LETTER('y',M_ACUTE,0),
  145. LETTER('t',M_NAME,0), // thorn
  146. LETTER('y',M_DIAERESIS,0),
  147. CAPITAL, // U+0100
  148. LETTER('a',M_MACRON,0),
  149. CAPITAL,
  150. LETTER('a',M_BREVE,0),
  151. CAPITAL,
  152. LETTER('a',M_OGONEK,0),
  153. CAPITAL,
  154. LETTER('c',M_ACUTE,0),
  155. CAPITAL,
  156. LETTER('c',M_CIRCUMFLEX,0),
  157. CAPITAL,
  158. LETTER('c',M_DOT_ABOVE,0),
  159. CAPITAL,
  160. LETTER('c',M_CARON,0),
  161. CAPITAL,
  162. LETTER('d',M_CARON,0),
  163. CAPITAL, // U+0110
  164. LETTER('d',M_STROKE,0),
  165. CAPITAL,
  166. LETTER('e',M_MACRON,0),
  167. CAPITAL,
  168. LETTER('e',M_BREVE,0),
  169. CAPITAL,
  170. LETTER('e',M_DOT_ABOVE,0),
  171. CAPITAL,
  172. LETTER('e',M_OGONEK,0),
  173. CAPITAL,
  174. LETTER('e',M_CARON,0),
  175. CAPITAL,
  176. LETTER('g',M_CIRCUMFLEX,0),
  177. CAPITAL,
  178. LETTER('g',M_BREVE,0),
  179. CAPITAL, // U+0120
  180. LETTER('g',M_DOT_ABOVE,0),
  181. CAPITAL,
  182. LETTER('g',M_CEDILLA,0),
  183. CAPITAL,
  184. LETTER('h',M_CIRCUMFLEX,0),
  185. CAPITAL,
  186. LETTER('h',M_STROKE,0),
  187. CAPITAL,
  188. LETTER('i',M_TILDE,0),
  189. CAPITAL,
  190. LETTER('i',M_MACRON,0),
  191. CAPITAL,
  192. LETTER('i',M_BREVE,0),
  193. CAPITAL,
  194. LETTER('i',M_OGONEK,0),
  195. CAPITAL, // U+0130
  196. LETTER('i',M_NAME,0), // dotless i
  197. CAPITAL,
  198. LIGATURE('i','j',0),
  199. CAPITAL,
  200. LETTER('j',M_CIRCUMFLEX,0),
  201. CAPITAL,
  202. LETTER('k',M_CEDILLA,0),
  203. LETTER('k',M_NAME,0), // kra
  204. CAPITAL,
  205. LETTER('l',M_ACUTE,0),
  206. CAPITAL,
  207. LETTER('l',M_CEDILLA,0),
  208. CAPITAL,
  209. LETTER('l',M_CARON,0),
  210. CAPITAL,
  211. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  212. CAPITAL,
  213. LETTER('l',M_STROKE,0),
  214. CAPITAL,
  215. LETTER('n',M_ACUTE,0),
  216. CAPITAL,
  217. LETTER('n',M_CEDILLA,0),
  218. CAPITAL,
  219. LETTER('n',M_CARON,0),
  220. LETTER('n',M_NAME,0), // apostrophe n
  221. CAPITAL,
  222. LETTER('n',M_NAME,0), // eng
  223. CAPITAL,
  224. LETTER('o',M_MACRON,0),
  225. CAPITAL,
  226. LETTER('o',M_BREVE,0),
  227. CAPITAL, // U+0150
  228. LETTER('o',M_DOUBLE_ACUTE,0),
  229. CAPITAL,
  230. LIGATURE('o','e',0),
  231. CAPITAL,
  232. LETTER('r',M_ACUTE,0),
  233. CAPITAL,
  234. LETTER('r',M_CEDILLA,0),
  235. CAPITAL,
  236. LETTER('r',M_CARON,0),
  237. CAPITAL,
  238. LETTER('s',M_ACUTE,0),
  239. CAPITAL,
  240. LETTER('s',M_CIRCUMFLEX,0),
  241. CAPITAL,
  242. LETTER('s',M_CEDILLA,0),
  243. CAPITAL, // U+0160
  244. LETTER('s',M_CARON,0),
  245. CAPITAL,
  246. LETTER('t',M_CEDILLA,0),
  247. CAPITAL,
  248. LETTER('t',M_CARON,0),
  249. CAPITAL,
  250. LETTER('t',M_STROKE,0),
  251. CAPITAL,
  252. LETTER('u',M_TILDE,0),
  253. CAPITAL,
  254. LETTER('u',M_MACRON,0),
  255. CAPITAL,
  256. LETTER('u',M_BREVE,0),
  257. CAPITAL,
  258. LETTER('u',M_RING,0),
  259. CAPITAL, // U+0170
  260. LETTER('u',M_DOUBLE_ACUTE,0),
  261. CAPITAL,
  262. LETTER('u',M_OGONEK,0),
  263. CAPITAL,
  264. LETTER('w',M_CIRCUMFLEX,0),
  265. CAPITAL,
  266. LETTER('y',M_CIRCUMFLEX,0),
  267. CAPITAL, // Y-DIAERESIS
  268. CAPITAL,
  269. LETTER('z',M_ACUTE,0),
  270. CAPITAL,
  271. LETTER('z',M_DOT_ABOVE,0),
  272. CAPITAL,
  273. LETTER('z',M_CARON,0),
  274. LETTER('s',M_NAME,0), // long-s // U+17f
  275. };
  276. // characters U+0250 to U+029F
  277. static const unsigned short letter_accents_250[] = {
  278. LETTER('a',M_TURNED,0), // U+250
  279. LETTER(L_ALPHA,0,0),
  280. LETTER(L_ALPHA,M_TURNED,0),
  281. LETTER('b',M_IMPLOSIVE,0),
  282. 0, // open-o
  283. LETTER('c',M_CURL,0),
  284. LETTER('d',M_RETROFLEX,0),
  285. LETTER('d',M_IMPLOSIVE,0),
  286. LETTER('e',M_REVERSED,0), // U+258
  287. 0, // schwa
  288. LETTER(L_SCHWA,M_HOOK,0),
  289. 0, // open-e
  290. LETTER(L_OPEN_E,M_REVERSED,0),
  291. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  292. 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
  293. LETTER('j',M_BAR,0),
  294. LETTER('g',M_IMPLOSIVE,0), // U+260
  295. LETTER('g',0,0),
  296. LETTER('g',M_SMALLCAP,0),
  297. LETTER(L_GAMMA,0,0),
  298. 0, // ramshorn
  299. LETTER('h',M_TURNED,0),
  300. LETTER('h',M_HOOK,0),
  301. 0,//LETTER(L_HENG,M_HOOK,0),
  302. LETTER('i',M_BAR,0), // U+268
  303. LETTER(L_IOTA,0,0),
  304. LETTER('i',M_SMALLCAP,0),
  305. LETTER('l',M_TILDE,0),
  306. LETTER('l',M_BAR,0),
  307. LETTER('l',M_RETROFLEX,0),
  308. LIGATURE('l','z',0),
  309. LETTER('m',M_TURNED,0),
  310. 0,//LETTER('m',M_TURNED,M_LEG), // U+270
  311. LETTER('m',M_HOOK,0),
  312. 0,//LETTER('n',M_LEFTHOOK,0),
  313. LETTER('n',M_RETROFLEX,0),
  314. LETTER('n',M_SMALLCAP,0),
  315. LETTER('o',M_BAR,0),
  316. LIGATURE('o','e',M_SMALLCAP),
  317. 0,//LETTER(L_OMEGA,M_CLOSED,0),
  318. LETTER(L_PHI,0,0), // U+278
  319. LETTER('r',M_TURNED,0),
  320. 0,//LETTER('r',M_TURNED,M_LEG),
  321. LETTER('r',M_RETROFLEX,M_TURNED),
  322. 0,//LETTER('r',M_LEG,0),
  323. LETTER('r',M_RETROFLEX,0),
  324. 0, // r-tap
  325. LETTER(L_RTAP,M_REVERSED,0),
  326. LETTER('r',M_SMALLCAP,0), // U+280
  327. LETTER('r',M_TURNED,M_SMALLCAP),
  328. LETTER('s',M_RETROFLEX,0),
  329. 0, // esh
  330. 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
  331. LETTER(L_ESH,M_REVERSED,0),
  332. LETTER(L_ESH,M_CURL,0),
  333. LETTER('t',M_TURNED,0),
  334. LETTER('t',M_RETROFLEX,0), // U+288
  335. LETTER('u',M_BAR,0),
  336. LETTER(L_UPSILON,0,0),
  337. LETTER('v',M_HOOK,0),
  338. LETTER('v',M_TURNED,0),
  339. LETTER('w',M_TURNED,0),
  340. LETTER('y',M_TURNED,0),
  341. LETTER('y',M_SMALLCAP,0),
  342. LETTER('z',M_RETROFLEX,0), // U+290
  343. LETTER('z',M_CURL,0),
  344. 0, // ezh
  345. LETTER(L_EZH,M_CURL,0),
  346. 0, // glottal stop
  347. LETTER(L_GLOTTAL,M_REVERSED,0),
  348. LETTER(L_GLOTTAL,M_TURNED,0),
  349. 0,//LETTER('c',M_LONG,0),
  350. 0, // bilabial click // U+298
  351. LETTER('b',M_SMALLCAP,0),
  352. 0,//LETTER(L_OPEN_E,M_CLOSED,0),
  353. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  354. LETTER('h',M_SMALLCAP,0),
  355. LETTER('j',M_CURL,0),
  356. LETTER('k',M_TURNED,0),
  357. LETTER('l',M_SMALLCAP,0),
  358. LETTER('q',M_HOOK,0), // U+2a0
  359. LETTER(L_GLOTTAL,M_STROKE,0),
  360. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  361. LIGATURE('d','z',0),
  362. 0, // dezh
  363. LIGATURE('d','z',M_CURL),
  364. LIGATURE('t','s',0),
  365. 0, // tesh
  366. LIGATURE('t','s',M_CURL),
  367. };
  368. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  369. { //========================================================================
  370. int len;
  371. char single_letter[10];
  372. single_letter[0] = 0;
  373. single_letter[1] = '_';
  374. len = utf8_out(letter, &single_letter[2]);
  375. single_letter[len+2] = ' ';
  376. single_letter[len+3] = 0;
  377. if(Lookup(tr, &single_letter[1], ph_buf) == 0)
  378. {
  379. single_letter[1] = ' ';
  380. if(Lookup(tr, &single_letter[2], ph_buf) == 0)
  381. {
  382. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL,0,NULL);
  383. }
  384. }
  385. return(ph_buf[0]);
  386. }
  387. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  388. {//=========================================================================
  389. // lookup the character in the accents table
  390. int accent_data = 0;
  391. int accent1 = 0;
  392. int accent2 = 0;
  393. int basic_letter;
  394. int letter2=0;
  395. char ph_letter1[30];
  396. char ph_letter2[30];
  397. char ph_accent1[30];
  398. char ph_accent2[30];
  399. ph_accent2[0] = 0;
  400. if((letter >= 0xe0) && (letter < 0x17f))
  401. {
  402. accent_data = letter_accents_0e0[letter - 0xe0];
  403. }
  404. else if((letter >= 0x250) && (letter <= 0x2a8))
  405. {
  406. accent_data = letter_accents_250[letter - 0x250];
  407. }
  408. if(accent_data != 0)
  409. {
  410. basic_letter = (accent_data & 0x3f) + 59;
  411. if(basic_letter < 'a')
  412. basic_letter = non_ascii_tab[basic_letter-59];
  413. if(accent_data & 0x8000)
  414. {
  415. letter2 = (accent_data >> 6) & 0x3f;
  416. letter2 += 59;
  417. accent2 = (accent_data >> 12) & 0x7;
  418. }
  419. else
  420. {
  421. accent1 = (accent_data >> 6) & 0x1f;
  422. accent2 = (accent_data >> 11) & 0xf;
  423. }
  424. if(Lookup(tr, accents_tab[accent1].name, ph_accent1) != 0)
  425. {
  426. if(LookupLetter2(tr, basic_letter, ph_letter1) != 0)
  427. {
  428. if(accent2 != 0)
  429. {
  430. if(Lookup(tr, accents_tab[accent2].name, ph_accent2) == 0)
  431. {
  432. // break;
  433. }
  434. if(accents_tab[accent2].flags & 1)
  435. {
  436. strcpy(ph_buf,ph_accent2);
  437. ph_buf += strlen(ph_buf);
  438. ph_accent2[0] = 0;
  439. }
  440. }
  441. if(letter2 != 0)
  442. {
  443. //ligature
  444. LookupLetter2(tr, letter2, ph_letter2);
  445. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  446. }
  447. else
  448. {
  449. if(accent1 == 0)
  450. strcpy(ph_buf, ph_letter1);
  451. else if((tr->langopts.accents & 1) || (accents_tab[accent1].flags & 1))
  452. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  453. else
  454. sprintf(ph_buf,"%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  455. }
  456. }
  457. }
  458. }
  459. } // end of LookupAccentedLetter
  460. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  461. {//==============================================================================================
  462. // control, bit 0: not the first letter of a word
  463. int len;
  464. static char single_letter[10] = {0,0};
  465. unsigned int dict_flags[2];
  466. char ph_buf3[40];
  467. ph_buf1[0] = 0;
  468. len = utf8_out(letter,&single_letter[2]);
  469. single_letter[len+2] = ' ';
  470. if(next_byte == -1)
  471. {
  472. // speaking normal text, not individual characters
  473. if(Lookup(tr, &single_letter[2], ph_buf1) != 0)
  474. return;
  475. single_letter[1] = '_';
  476. if(Lookup(tr, &single_letter[1], ph_buf3) != 0)
  477. return; // the character is specified as _* so ignore it when speaking normal text
  478. // check whether this character is specified for English
  479. if(tr->translator_name == L('e','n'))
  480. return; // we are already using English
  481. SetTranslator2("en");
  482. if(Lookup(translator2, &single_letter[2], ph_buf3) != 0)
  483. {
  484. // yes, switch to English and re-translate the word
  485. sprintf(ph_buf1,"%c",phonSWITCH);
  486. }
  487. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  488. return;
  489. }
  490. if((letter <= 32) || iswspace(letter))
  491. {
  492. // lookup space as _&32 etc.
  493. sprintf(&single_letter[1],"_#%d ",letter);
  494. Lookup(tr, &single_letter[1], ph_buf1);
  495. return;
  496. }
  497. if(next_byte != ' ')
  498. next_byte = RULE_SPELLING;
  499. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  500. single_letter[1] = '_';
  501. // if the $accent flag is set for this letter, use the accents table (below)
  502. dict_flags[1] = 0;
  503. if(Lookup(tr, &single_letter[1], ph_buf3) == 0)
  504. {
  505. single_letter[1] = ' ';
  506. if(Lookup(tr, &single_letter[2], ph_buf3) == 0)
  507. {
  508. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  509. }
  510. }
  511. if(ph_buf3[0] == 0)
  512. {
  513. LookupAccentedLetter(tr, letter, ph_buf3);
  514. }
  515. strcpy(ph_buf1, ph_buf3);
  516. if((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  517. {
  518. return;
  519. }
  520. dict_flags[0] = 0;
  521. dict_flags[1] = 0;
  522. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  523. } // end of LookupLetter
  524. // unicode ranges for non-ascii digits 0-9
  525. static const int number_ranges[] = {
  526. 0x660, 0x6f0, // arabic
  527. 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
  528. 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
  529. 0 }; // these must be in ascending order
  530. int NonAsciiNumber(int letter)
  531. {//============================
  532. // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
  533. const int *p;
  534. int base;
  535. for(p=number_ranges; (base = *p) != 0; p++)
  536. {
  537. if(letter < base)
  538. break; // not found
  539. if(letter < (base+10))
  540. return(letter-base+'0');
  541. }
  542. return(-1);
  543. }
  544. #define L_SUB 0x4000 // subscript
  545. #define L_SUP 0x8000 // superscript
  546. static const char *modifiers[] = {NULL, "_sub", "_sup", NULL};
  547. // this list must be in ascending order
  548. static unsigned short derived_letters[] = {
  549. 0x00aa, 'a'+L_SUP,
  550. 0x00b2, '2'+L_SUP,
  551. 0x00b3, '3'+L_SUP,
  552. 0x00b9, '1'+L_SUP,
  553. 0x00ba, 'o'+L_SUP,
  554. 0x02b0, 'h'+L_SUP,
  555. 0x02b1, 0x266+L_SUP,
  556. 0x02b2, 'j'+L_SUP,
  557. 0x02b3, 'r'+L_SUP,
  558. 0x02b4, 0x279+L_SUP,
  559. 0x02b5, 0x27b+L_SUP,
  560. 0x02b6, 0x281+L_SUP,
  561. 0x02b7, 'w'+L_SUP,
  562. 0x02b8, 'y'+L_SUP,
  563. 0x02c0, 0x294+L_SUP,
  564. 0x02c1, 0x295+L_SUP,
  565. 0x02e0, 0x263+L_SUP,
  566. 0x02e1, 'l'+L_SUP,
  567. 0x02e2, 's'+L_SUP,
  568. 0x02e3, 'x'+L_SUP,
  569. 0x2070, '0'+L_SUP,
  570. 0x2071, 'i'+L_SUP,
  571. 0x2074, '4'+L_SUP,
  572. 0x2075, '5'+L_SUP,
  573. 0x2076, '6'+L_SUP,
  574. 0x2077, '7'+L_SUP,
  575. 0x2078, '8'+L_SUP,
  576. 0x2079, '9'+L_SUP,
  577. 0x207a, '+'+L_SUP,
  578. 0x207b, '-'+L_SUP,
  579. 0x207c, '='+L_SUP,
  580. 0x207d, '('+L_SUP,
  581. 0x207e, ')'+L_SUP,
  582. 0x207f, 'n'+L_SUP,
  583. 0x2080, '0'+L_SUB,
  584. 0x2081, '1'+L_SUB,
  585. 0x2082, '2'+L_SUB,
  586. 0x2083, '3'+L_SUB,
  587. 0x2084, '4'+L_SUB,
  588. 0x2085, '5'+L_SUB,
  589. 0x2086, '6'+L_SUB,
  590. 0x2087, '7'+L_SUB,
  591. 0x2088, '8'+L_SUB,
  592. 0x2089, '9'+L_SUB,
  593. 0x208a, '+'+L_SUB,
  594. 0x208b, '-'+L_SUB,
  595. 0x208c, '='+L_SUB,
  596. 0x208d, '('+L_SUB,
  597. 0x208e, ')'+L_SUB,
  598. 0x2090, 'a'+L_SUB,
  599. 0x2091, 'e'+L_SUB,
  600. 0x2092, 'o'+L_SUB,
  601. 0x2093, 'x'+L_SUB,
  602. 0x2094, 0x259+L_SUB,
  603. 0x2095, 'h'+L_SUB,
  604. 0x2096, 'k'+L_SUB,
  605. 0x2097, 'l'+L_SUB,
  606. 0x2098, 'm'+L_SUB,
  607. 0x2099, 'n'+L_SUB,
  608. 0x209a, 'p'+L_SUB,
  609. 0x209b, 's'+L_SUB,
  610. 0x209c, 't'+L_SUB,
  611. 0,0};
  612. static const char *hex_letters[] = {"'e:j","b'i:","s'i:","d'i:","'i:","'ef"}; // names, using phonemes available to all languages
  613. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  614. {//=========================================================================
  615. // get pronunciation for an isolated letter
  616. // return number of bytes used by the letter
  617. // control bit 0: a non-initial letter in a word
  618. // bit 1: say 'capital'
  619. // bit 2: say character code for unknown letters
  620. int n_bytes;
  621. int letter;
  622. int len;
  623. int ix;
  624. int c;
  625. char *p2;
  626. char *pbuf;
  627. const char *modifier;
  628. ALPHABET *alphabet;
  629. int al_offset;
  630. int al_flags;
  631. int language;
  632. int number;
  633. int phontab_1;
  634. int speak_letter_number;
  635. char capital[30];
  636. char ph_buf[80];
  637. char ph_buf2[80];
  638. char ph_alphabet[80];
  639. char hexbuf[12];
  640. static char pause_string[] = {phonPAUSE, 0};
  641. ph_buf[0] = 0;
  642. ph_alphabet[0] = 0;
  643. capital[0] = 0;
  644. phontab_1 = translator->phoneme_tab_ix;
  645. n_bytes = utf8_in(&letter,word);
  646. if((letter & 0xfff00) == 0x0e000)
  647. {
  648. letter &= 0xff; // uncode private usage area
  649. }
  650. if(control & 2)
  651. {
  652. // include CAPITAL information
  653. if(iswupper2(letter))
  654. {
  655. Lookup(tr, "_cap", capital);
  656. }
  657. }
  658. letter = towlower2(letter);
  659. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  660. if(ph_buf[0] == 0)
  661. {
  662. // is this a subscript or superscript letter ?
  663. for(ix=0; (c = derived_letters[ix]) != 0; ix+=2)
  664. {
  665. if(c > letter)
  666. break;
  667. if(c == letter)
  668. {
  669. c = derived_letters[ix+1];
  670. letter = c & 0x3fff;
  671. if((modifier = modifiers[c >> 14]) != NULL)
  672. {
  673. Lookup(tr, modifier, capital);
  674. if(capital[0] == 0)
  675. {
  676. capital[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  677. Lookup(translator2, modifier, &capital[3]);
  678. if(capital[3] != 0)
  679. {
  680. capital[0] = phonPAUSE;
  681. capital[1] = phonSWITCH;
  682. len = strlen(&capital[3]);
  683. capital[len+3] = phonSWITCH;
  684. capital[len+4] = phontab_1;
  685. capital[len+5] = 0;
  686. }
  687. }
  688. }
  689. }
  690. }
  691. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  692. }
  693. if(ph_buf[0] == phonSWITCH)
  694. {
  695. strcpy(phonemes,ph_buf);
  696. return(0);
  697. }
  698. if((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0))
  699. {
  700. // convert a non-ascii number to 0-9
  701. LookupLetter(tr, number, 0, ph_buf, control & 1);
  702. }
  703. al_offset = 0;
  704. al_flags = 0;
  705. if((alphabet = AlphabetFromChar(letter)) != NULL)
  706. {
  707. al_offset = alphabet->offset;
  708. al_flags = alphabet->flags;
  709. }
  710. if(alphabet != current_alphabet)
  711. {
  712. // speak the name of the alphabet
  713. current_alphabet = alphabet;
  714. if((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset))
  715. {
  716. if((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet))
  717. {
  718. // don't say the alphabet name
  719. }
  720. else
  721. {
  722. ph_buf2[0] = 0;
  723. if(Lookup(translator, alphabet->name, ph_alphabet) == 0) // the original language for the current voice
  724. {
  725. // Can't find the local name for this alphabet, use the English name
  726. ph_alphabet[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  727. Lookup(translator2, alphabet->name, ph_buf2);
  728. }
  729. else if(translator != tr)
  730. {
  731. phontab_1 = tr->phoneme_tab_ix;
  732. strcpy(ph_buf2, ph_alphabet);
  733. ph_alphabet[2] = translator->phoneme_tab_ix;
  734. }
  735. if(ph_buf2[0] != 0)
  736. {
  737. // we used a different language for the alphabet name (now in ph_buf2)
  738. ph_alphabet[0] = phonPAUSE;
  739. ph_alphabet[1] = phonSWITCH;
  740. strcpy(&ph_alphabet[3], ph_buf2);
  741. len = strlen(ph_buf2) + 3;
  742. ph_alphabet[len] = phonSWITCH;
  743. ph_alphabet[len+1] = phontab_1;
  744. ph_alphabet[len+2] = 0;
  745. }
  746. }
  747. }
  748. }
  749. // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
  750. if(ph_buf[0] == 0)
  751. {
  752. if((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
  753. language = translator->langopts.alt_alphabet_lang;
  754. else
  755. if((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
  756. language = alphabet->language;
  757. else
  758. language = L('e','n');
  759. if((language != tr->translator_name) || (language == L('k','o')))
  760. {
  761. char *p3;
  762. int initial, code;
  763. char hangul_buf[12];
  764. // speak in the language for this alphabet (or English)
  765. ph_buf[2] = SetTranslator2(WordToString2(language));
  766. if(((code = letter - 0xac00) >= 0) && (letter <= 0xd7af))
  767. {
  768. // Special case for Korean letters.
  769. // break a syllable hangul into 2 or 3 individual jamo
  770. hangul_buf[0] = ' ';
  771. p3 = &hangul_buf[1];
  772. if((initial = (code/28)/21) != 11)
  773. {
  774. p3 += utf8_out(initial + 0x1100, p3);
  775. }
  776. utf8_out(((code/28) % 21) + 0x1161, p3); // medial
  777. utf8_out((code % 28) + 0x11a7, &p3[3]); // final
  778. p3[6] = ' ';
  779. p3[7] = 0;
  780. ph_buf[3] = 0;
  781. TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
  782. SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
  783. }
  784. else
  785. {
  786. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  787. }
  788. if(ph_buf[3] == phonSWITCH)
  789. {
  790. // another level of language change
  791. ph_buf[2] = SetTranslator2(&ph_buf[4]);
  792. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  793. }
  794. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  795. if(ph_buf[3] != 0)
  796. {
  797. ph_buf[0] = phonPAUSE;
  798. ph_buf[1] = phonSWITCH;
  799. len = strlen(&ph_buf[3]) + 3;
  800. ph_buf[len] = phonSWITCH; // switch back
  801. ph_buf[len+1] = tr->phoneme_tab_ix;
  802. ph_buf[len+2] = 0;
  803. }
  804. }
  805. }
  806. if(ph_buf[0] == 0)
  807. {
  808. // character name not found
  809. if(ph_buf[0]== 0)
  810. {
  811. speak_letter_number = 1;
  812. if(!(al_flags & AL_NO_SYMBOL))
  813. {
  814. if(iswalpha2(letter))
  815. Lookup(translator, "_?A", ph_buf);
  816. if((ph_buf[0]==0) && !iswspace(letter))
  817. Lookup(translator, "_??", ph_buf);
  818. if(ph_buf[0] == 0)
  819. {
  820. EncodePhonemes("l'et@", ph_buf, NULL);
  821. }
  822. }
  823. if(!(control & 4) && (al_flags & AL_NOT_CODE))
  824. {
  825. // don't speak the character code number, unless we want full details of this character
  826. speak_letter_number = 0;
  827. }
  828. // if((ph_alphabet[0] != 0) && speak_letter_number)
  829. // ph_buf[0] = 0; // don't speak "letter" if we speak alphabet name
  830. if(speak_letter_number)
  831. {
  832. if(al_offset == 0x2800)
  833. {
  834. // braille dots symbol, list the numbered dots
  835. p2 = hexbuf;
  836. for(ix=0; ix<8; ix++)
  837. {
  838. if(letter & (1 << ix))
  839. {
  840. *p2++ = '1'+ix;
  841. }
  842. }
  843. *p2 = 0;
  844. }
  845. else
  846. {
  847. // speak the hexadecimal number of the character code
  848. sprintf(hexbuf,"%x",letter);
  849. }
  850. pbuf = ph_buf;
  851. for(p2 = hexbuf; *p2 != 0; p2++)
  852. {
  853. pbuf += strlen(pbuf);
  854. *pbuf++ = phonPAUSE_VSHORT;
  855. LookupLetter(translator, *p2, 0, pbuf, 1);
  856. if(((pbuf[0] == 0) || (pbuf[0]==phonSWITCH)) && (*p2 >= 'a'))
  857. {
  858. // This language has no translation for 'a' to 'f', speak English names using base phonemes
  859. EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
  860. }
  861. }
  862. strcat(pbuf, pause_string);
  863. }
  864. }
  865. }
  866. len = strlen(phonemes);
  867. if(tr->langopts.accents & 2) // 'capital' before or after the word ?
  868. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,ph_buf,capital);
  869. else
  870. sprintf(ph_buf2,"%c%s%s%s",0xff,ph_alphabet,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  871. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  872. {
  873. strcpy(&phonemes[len],ph_buf2);
  874. }
  875. return(n_bytes);
  876. } // end of TranslateLetter
  877. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  878. {//=============================================================================
  879. // Individual letter names, reduce the stress of some.
  880. int ix;
  881. unsigned int c;
  882. int n_stress=0;
  883. int prev = 0;
  884. int count;
  885. unsigned char buf[N_WORD_PHONEMES];
  886. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  887. {
  888. if((c == phonSTRESS_P) && (prev != phonSWITCH))
  889. {
  890. n_stress++;
  891. }
  892. buf[ix] = prev = c;
  893. }
  894. buf[ix] = 0;
  895. count = 0;
  896. prev = 0;
  897. for(ix=0; (c = buf[ix]) != 0; ix++)
  898. {
  899. if((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH))
  900. {
  901. count++;
  902. if(tr->langopts.spelling_stress == 1)
  903. {
  904. // stress on initial letter when spelling
  905. if(count > 1)
  906. c = phonSTRESS_3;
  907. }
  908. else
  909. {
  910. if(count != n_stress)
  911. {
  912. if(((count % 3) != 0) || (count == n_stress-1))
  913. c = phonSTRESS_3; // reduce to secondary stress
  914. }
  915. }
  916. }
  917. else if(c == 0xff)
  918. {
  919. if((control < 2) || (ix==0))
  920. continue; // don't insert pauses
  921. if(control == 4)
  922. c = phonPAUSE; // pause after each character
  923. if(((count % 3) == 0) || (control > 2))
  924. c = phonPAUSE_NOLINK; // pause following a primary stress
  925. else
  926. c = phonPAUSE_VSHORT;
  927. }
  928. *phonemes++ = prev = c;
  929. }
  930. if(control >= 2)
  931. *phonemes++ = phonPAUSE_NOLINK;
  932. *phonemes = 0;
  933. } // end of SetSpellingStress
  934. // Numbers
  935. static char ph_ordinal2[12];
  936. static char ph_ordinal2x[12];
  937. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  938. {//==============================================================================================
  939. int ordinal = 0;
  940. int c2;
  941. int nextflags;
  942. if((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE))
  943. {
  944. if(roman || !(wtab[1].flags & FLAG_FIRST_UPPER))
  945. {
  946. if(word_end[0] == '.')
  947. utf8_in(&c2, &word_end[2]);
  948. else
  949. utf8_in(&c2, &word_end[0]);
  950. if((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2)))
  951. {
  952. // ordinal number is indicated by dot after the number
  953. // but not if the next word starts with an upper-case letter
  954. // (c2 == 0) is for cases such as, "2.,"
  955. ordinal = 2;
  956. if(word_end[0] == '.')
  957. word_end[0] = ' ';
  958. if((roman==0) && (tr->translator_name == L('h','u')))
  959. {
  960. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  961. nextflags = 0;
  962. if(IsAlpha(c2))
  963. {
  964. nextflags = TranslateWord(tr, &word_end[2], 0, NULL, NULL);
  965. }
  966. if((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  967. ordinal = 0; // TEST 09.02.10
  968. if(nextflags & FLAG_ALT_TRANS)
  969. ordinal = 0;
  970. if(nextflags & FLAG_ALT3_TRANS)
  971. {
  972. if(word[-2] == '-')
  973. ordinal = 0; // eg. december 2-5. között
  974. if(tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  975. ordinal = 0x22;
  976. }
  977. }
  978. }
  979. }
  980. }
  981. return(ordinal);
  982. } // end of CheckDotOrdinal
  983. static int hu_number_e(const char *word, int thousandplex, int value)
  984. {//==================================================================
  985. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  986. if((word[0] == 'a') || (word[0] == 'e'))
  987. {
  988. if((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  989. return(0);
  990. if(((thousandplex==1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  991. return(0); // 1000-el
  992. return(1);
  993. }
  994. return(0);
  995. } // end of hu_numnber_e
  996. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  997. {//=========================================================================
  998. int c;
  999. char *p;
  1000. const char *p2;
  1001. int acc;
  1002. int prev;
  1003. int value;
  1004. int subtract;
  1005. int repeat = 0;
  1006. int n_digits = 0;
  1007. char *word_start;
  1008. int num_control = 0;
  1009. unsigned int flags[2];
  1010. char ph_roman[30];
  1011. char number_chars[N_WORD_BYTES];
  1012. static const char *roman_numbers = "ixcmvld";
  1013. static int roman_values[] = {1,10,100,1000,5,50,500};
  1014. acc = 0;
  1015. prev = 0;
  1016. subtract = 0x7fff;
  1017. ph_out[0] = 0;
  1018. flags[0] = 0;
  1019. flags[1] = 0;
  1020. if(((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
  1021. return(0); // not '2xx'
  1022. word_start = word;
  1023. while((c = *word++) != ' ')
  1024. {
  1025. if((p2 = strchr(roman_numbers,c)) == NULL)
  1026. return(0);
  1027. value = roman_values[p2 - roman_numbers];
  1028. if(value == prev)
  1029. {
  1030. repeat++;
  1031. if(repeat >= 3)
  1032. return(0);
  1033. }
  1034. else
  1035. repeat = 0;
  1036. if((prev > 1) && (prev != 10) && (prev != 100))
  1037. {
  1038. if(value >= prev)
  1039. return(0);
  1040. }
  1041. if((prev != 0) && (prev < value))
  1042. {
  1043. if(((acc % 10) != 0) || ((prev*10) < value))
  1044. return(0);
  1045. subtract = prev;
  1046. value -= subtract;
  1047. }
  1048. else if(value >= subtract)
  1049. return(0);
  1050. else
  1051. acc += prev;
  1052. prev = value;
  1053. n_digits++;
  1054. }
  1055. if(IsDigit09(word[0]))
  1056. return(0); // eg. 'xx2'
  1057. acc += prev;
  1058. if(acc < tr->langopts.min_roman)
  1059. return(0);
  1060. if(acc > tr->langopts.max_roman)
  1061. return(0);
  1062. Lookup(tr, "_roman",ph_roman); // precede by "roman" if _rom is defined in *_list
  1063. p = &ph_out[0];
  1064. if((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0)
  1065. {
  1066. strcpy(ph_out,ph_roman);
  1067. p = &ph_out[strlen(ph_roman)];
  1068. }
  1069. sprintf(number_chars," %d ",acc);
  1070. if(word[0] == '.')
  1071. {
  1072. // dot has not been removed. This implies that there was no space after it
  1073. return(0);
  1074. }
  1075. if(CheckDotOrdinal(tr, word_start, word, wtab, 1))
  1076. wtab[0].flags |= FLAG_ORDINAL;
  1077. if(tr->langopts.numbers & NUM_ROMAN_ORDINAL)
  1078. {
  1079. if(tr->translator_name == L('h','u'))
  1080. {
  1081. if(!(wtab[0].flags & FLAG_ORDINAL))
  1082. {
  1083. if((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc))
  1084. {
  1085. // should use the 'e' form of the number
  1086. num_control |= 1;
  1087. }
  1088. else
  1089. return(0);
  1090. }
  1091. }
  1092. else
  1093. {
  1094. wtab[0].flags |= FLAG_ORDINAL;
  1095. }
  1096. }
  1097. tr->prev_dict_flags[0] = 0;
  1098. tr->prev_dict_flags[1] = 0;
  1099. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  1100. if(tr->langopts.numbers & NUM_ROMAN_AFTER)
  1101. strcat(ph_out,ph_roman);
  1102. return(1);
  1103. } // end of TranslateRoman
  1104. static const char *M_Variant(int value)
  1105. {//====================================
  1106. // returns M, or perhaps MA or MB for some cases
  1107. int teens = 0;
  1108. if(((value % 100) > 10) && ((value % 100) < 20))
  1109. teens = 1;
  1110. switch((translator->langopts.numbers2 >> 6) & 0x7)
  1111. {
  1112. case 1: // lang=ru use singular for xx1 except for x11
  1113. if((teens == 0) && ((value % 10) == 1))
  1114. return("1M");
  1115. break;
  1116. case 2: // lang=cs,sk
  1117. if((value >= 2) && (value <= 4))
  1118. return("0MA");
  1119. break;
  1120. case 3: // lang=pl
  1121. if((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  1122. return("0MA");
  1123. break;
  1124. case 4: // lang=lt
  1125. if((teens == 1) || ((value % 10) == 0))
  1126. return("0MB");
  1127. if((value % 10) == 1)
  1128. return("0MA");
  1129. break;
  1130. case 5: // lang=bs,hr,sr
  1131. if(teens == 0)
  1132. {
  1133. if((value % 10) == 1)
  1134. return("1M");
  1135. if(((value % 10) >= 2) && ((value % 10) <= 4))
  1136. return("0MA");
  1137. }
  1138. break;
  1139. }
  1140. return("0M");
  1141. }
  1142. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  1143. {//=======================================================================================================
  1144. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  1145. int found;
  1146. int found_value=0;
  1147. char string[12];
  1148. char ph_of[12];
  1149. char ph_thousands[40];
  1150. char ph_buf[40];
  1151. ph_of[0] = 0;
  1152. // first look for a match with the exact value of thousands
  1153. if(value > 0)
  1154. {
  1155. if(thousands_exact & 1)
  1156. {
  1157. if(thousands_exact & 2)
  1158. {
  1159. // ordinal number
  1160. sprintf(string,"_%dM%do",value,thousandplex);
  1161. found_value = Lookup(tr, string, ph_thousands);
  1162. }
  1163. if(!found_value & (number_control & 1))
  1164. {
  1165. // look for the 'e' variant
  1166. sprintf(string,"_%dM%de",value,thousandplex);
  1167. found_value = Lookup(tr, string, ph_thousands);
  1168. }
  1169. if(!found_value)
  1170. {
  1171. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  1172. sprintf(string,"_%dM%dx",value,thousandplex);
  1173. found_value = Lookup(tr, string, ph_thousands);
  1174. }
  1175. }
  1176. if(found_value == 0)
  1177. {
  1178. sprintf(string,"_%dM%d",value,thousandplex);
  1179. found_value = Lookup(tr, string, ph_thousands);
  1180. }
  1181. }
  1182. if(found_value == 0)
  1183. {
  1184. if((value % 100) >= 20)
  1185. {
  1186. Lookup(tr, "_0of", ph_of);
  1187. }
  1188. found = 0;
  1189. if(thousands_exact & 1)
  1190. {
  1191. if(thousands_exact & 2)
  1192. {
  1193. // ordinal number
  1194. sprintf(string,"_%s%do",M_Variant(value), thousandplex);
  1195. found = Lookup(tr, string, ph_thousands);
  1196. }
  1197. if(!found && (number_control & 1))
  1198. {
  1199. // look for the 'e' variant
  1200. sprintf(string,"_%s%de",M_Variant(value), thousandplex);
  1201. found = Lookup(tr, string, ph_thousands);
  1202. }
  1203. if(!found)
  1204. {
  1205. // is there a different pronunciation if there are no hundreds,tens,or units ?
  1206. sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
  1207. found = Lookup(tr, string, ph_thousands);
  1208. }
  1209. }
  1210. if(found == 0)
  1211. {
  1212. sprintf(string,"_%s%d",M_Variant(value), thousandplex);
  1213. if(Lookup(tr, string, ph_thousands) == 0)
  1214. {
  1215. if(thousandplex > 3)
  1216. {
  1217. sprintf(string,"_0M%d", thousandplex-1);
  1218. if(Lookup(tr, string, ph_buf) == 0)
  1219. {
  1220. // say "millions" if this name is not available and neither is the next lower
  1221. Lookup(tr, "_0M2", ph_thousands);
  1222. speak_missing_thousands = 3;
  1223. }
  1224. }
  1225. if(ph_thousands[0] == 0)
  1226. {
  1227. // repeat "thousand" if higher order names are not available
  1228. sprintf(string,"_%dM1",value);
  1229. if((found_value = Lookup(tr, string, ph_thousands)) == 0)
  1230. Lookup(tr, "_0M1", ph_thousands);
  1231. speak_missing_thousands = 2;
  1232. }
  1233. }
  1234. }
  1235. }
  1236. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  1237. if((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  1238. return(1);
  1239. return(found_value);
  1240. } // end f LookupThousands
  1241. static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
  1242. {//=============================================================================
  1243. // Lookup a 2 digit number
  1244. // control bit 0: ordinal number
  1245. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  1246. // control bit 2: tens and units only, no higher digits
  1247. // control bit 3: use feminine form of '2' (for thousands
  1248. // control bit 4: speak zero tens
  1249. // control bit 5: variant of ordinal number (lang=hu)
  1250. // bit 8 followed by decimal fraction
  1251. int found;
  1252. int ix;
  1253. int units;
  1254. int tens;
  1255. int is_ordinal;
  1256. int used_and=0;
  1257. int found_ordinal = 0;
  1258. int next_phtype;
  1259. int ord_type = 'o';
  1260. char string[12]; // for looking up entries in *_list
  1261. char ph_ordinal[20];
  1262. char ph_tens[50];
  1263. char ph_digits[50];
  1264. char ph_and[12];
  1265. units = value % 10;
  1266. tens = value / 10;
  1267. found = 0;
  1268. ph_ordinal[0] = 0;
  1269. ph_tens[0] = 0;
  1270. ph_digits[0] = 0;
  1271. ph_and[0] = 0;
  1272. if(control & 0x20)
  1273. {
  1274. ord_type = 'q';
  1275. }
  1276. is_ordinal = control & 1;
  1277. if((control & 2) && (n_digit_lookup == 2))
  1278. {
  1279. // pronunciation of the final 2 digits has already been found
  1280. strcpy(ph_out, digit_lookup);
  1281. }
  1282. else
  1283. {
  1284. if(digit_lookup[0] == 0)
  1285. {
  1286. // is there a special pronunciation for this 2-digit number
  1287. if(control & 8)
  1288. {
  1289. // is there a feminine form?
  1290. sprintf(string,"_%df",value);
  1291. found = Lookup(tr, string, ph_digits);
  1292. }
  1293. else if(is_ordinal)
  1294. {
  1295. strcpy(ph_ordinal, ph_ordinal2);
  1296. if(control & 4)
  1297. {
  1298. sprintf(string,"_%d%cx",value,ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1299. if((found = Lookup(tr, string, ph_digits)) != 0)
  1300. {
  1301. if(ph_ordinal2x[0] != 0)
  1302. strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
  1303. }
  1304. }
  1305. if(found == 0)
  1306. {
  1307. sprintf(string,"_%d%c",value,ord_type);
  1308. found = Lookup(tr, string, ph_digits);
  1309. }
  1310. found_ordinal = found;
  1311. }
  1312. if(found == 0)
  1313. {
  1314. if(control & 2)
  1315. {
  1316. // the final tens and units of a number
  1317. if(number_control & 1)
  1318. {
  1319. // look for 'e' variant
  1320. sprintf(string,"_%de",value);
  1321. found = Lookup(tr, string, ph_digits);
  1322. }
  1323. }
  1324. else
  1325. {
  1326. // followed by hundreds or thousands etc
  1327. sprintf(string,"_%da",value);
  1328. found = Lookup(tr, string, ph_digits);
  1329. }
  1330. if(!found)
  1331. {
  1332. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
  1333. {
  1334. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  1335. }
  1336. else
  1337. {
  1338. sprintf(string,"_%d",value);
  1339. found = Lookup(tr, string, ph_digits);
  1340. }
  1341. }
  1342. }
  1343. }
  1344. // no, speak as tens+units
  1345. if((control & 0x10) && (value < 10))
  1346. {
  1347. // speak leading zero
  1348. Lookup(tr, "_0", ph_tens);
  1349. }
  1350. else
  1351. {
  1352. if(found)
  1353. {
  1354. ph_tens[0] = 0;
  1355. }
  1356. else
  1357. {
  1358. if(is_ordinal)
  1359. {
  1360. sprintf(string,"_%dX%c", tens, ord_type);
  1361. if(Lookup(tr, string, ph_tens) != 0)
  1362. {
  1363. found_ordinal = 1;
  1364. if((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1365. {
  1366. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1367. strcat(ph_tens, ph_ordinal2);
  1368. }
  1369. }
  1370. }
  1371. if(found_ordinal == 0)
  1372. {
  1373. sprintf(string,"_%dX", tens);
  1374. Lookup(tr, string, ph_tens);
  1375. }
  1376. if((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL))
  1377. {
  1378. // tens not found, (for example) 73 is 60+13
  1379. units = (value % 20);
  1380. sprintf(string,"_%dX", tens & 0xfe);
  1381. Lookup(tr, string, ph_tens);
  1382. }
  1383. ph_digits[0] = 0;
  1384. if(units > 0)
  1385. {
  1386. found = 0;
  1387. if((control & 2) && (digit_lookup[0] != 0))
  1388. {
  1389. // we have an entry for this digit (possibly together with the next word)
  1390. strcpy(ph_digits, digit_lookup);
  1391. found_ordinal = 1;
  1392. ph_ordinal[0] = 0;
  1393. }
  1394. else
  1395. {
  1396. if(control & 8)
  1397. {
  1398. // is there a variant form of this number?
  1399. sprintf(string,"_%df",units);
  1400. found = Lookup(tr, string, ph_digits);
  1401. }
  1402. if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
  1403. {
  1404. // ordinal
  1405. sprintf(string,"_%d%c",units,ord_type);
  1406. if((found = Lookup(tr, string, ph_digits)) != 0)
  1407. {
  1408. found_ordinal = 1;
  1409. }
  1410. }
  1411. if(found == 0)
  1412. {
  1413. if((number_control & 1) && (control & 2))
  1414. {
  1415. // look for 'e' variant
  1416. sprintf(string,"_%de",units);
  1417. found = Lookup(tr, string, ph_digits);
  1418. }
  1419. else if(((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0))
  1420. {
  1421. // followed by hundreds or thousands (or tens)
  1422. sprintf(string,"_%da",units);
  1423. found = Lookup(tr, string, ph_digits);
  1424. }
  1425. }
  1426. if(found == 0)
  1427. {
  1428. sprintf(string,"_%d",units);
  1429. Lookup(tr, string, ph_digits);
  1430. }
  1431. }
  1432. }
  1433. }
  1434. }
  1435. if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
  1436. {
  1437. if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1438. Lookup(tr, "_ord20", ph_ordinal);
  1439. if(ph_ordinal[0] == 0)
  1440. Lookup(tr, "_ord", ph_ordinal);
  1441. }
  1442. if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
  1443. {
  1444. Lookup(tr, "_0and", ph_and);
  1445. if((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
  1446. ph_and[0] = 0;
  1447. if(tr->langopts.numbers & NUM_SWAP_TENS)
  1448. sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
  1449. else
  1450. sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
  1451. used_and = 1;
  1452. }
  1453. else
  1454. {
  1455. if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
  1456. {
  1457. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1458. if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
  1459. {
  1460. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1461. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1462. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1463. ph_tens[ix] = 0;
  1464. }
  1465. }
  1466. sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
  1467. }
  1468. }
  1469. if(tr->langopts.numbers & NUM_SINGLE_STRESS_L)
  1470. {
  1471. // only one primary stress, on the first part (tens)
  1472. found = 0;
  1473. for(ix=0; ix < (signed)strlen(ph_out); ix++)
  1474. {
  1475. if(ph_out[ix] == phonSTRESS_P)
  1476. {
  1477. if(found)
  1478. ph_out[ix] = phonSTRESS_3;
  1479. else
  1480. found = 1;
  1481. }
  1482. }
  1483. }
  1484. else if(tr->langopts.numbers & NUM_SINGLE_STRESS)
  1485. {
  1486. // only one primary stress
  1487. found = 0;
  1488. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  1489. {
  1490. if(ph_out[ix] == phonSTRESS_P)
  1491. {
  1492. if(found)
  1493. ph_out[ix] = phonSTRESS_3;
  1494. else
  1495. found = 1;
  1496. }
  1497. }
  1498. }
  1499. return(used_and);
  1500. } // end of LookupNum2
  1501. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1502. {//=============================================================================================================
  1503. // Translate a 3 digit number
  1504. // control bit 0, previous thousands
  1505. // bit 1, ordinal number
  1506. // bit 5 variant form of ordinal number
  1507. // bit 8 followed by decimal fraction
  1508. int found;
  1509. int hundreds;
  1510. int tensunits;
  1511. int x;
  1512. int ix;
  1513. int exact;
  1514. int ordinal;
  1515. int tplex;
  1516. int say_zero_hundred=0;
  1517. char string[12]; // for looking up entries in **_list
  1518. char buf1[100];
  1519. char buf2[100];
  1520. char ph_100[20];
  1521. char ph_10T[20];
  1522. char ph_digits[50];
  1523. char ph_thousands[50];
  1524. char ph_hundred_and[12];
  1525. char ph_thousand_and[12];
  1526. ordinal = control & 0x22;
  1527. hundreds = value / 100;
  1528. tensunits = value % 100;
  1529. buf1[0] = 0;
  1530. ph_thousands[0] = 0;
  1531. ph_thousand_and[0] = 0;
  1532. if((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1533. {
  1534. say_zero_hundred = 1; // lang=vi
  1535. }
  1536. if((hundreds > 0) || say_zero_hundred)
  1537. {
  1538. found = 0;
  1539. if(ordinal && (tensunits == 0))
  1540. {
  1541. // ordinal number, with no tens or units
  1542. found = Lookup(tr, "_0Co", ph_100);
  1543. }
  1544. if(found == 0)
  1545. {
  1546. if(tensunits==0)
  1547. {
  1548. // special form for exact hundreds?
  1549. found = Lookup(tr, "_0C0", ph_100);
  1550. }
  1551. if(!found)
  1552. {
  1553. Lookup(tr, "_0C", ph_100);
  1554. }
  1555. }
  1556. if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
  1557. {
  1558. // speak numbers such as 1984 as years: nineteen-eighty-four
  1559. // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
  1560. }
  1561. else if(hundreds >= 10)
  1562. {
  1563. ph_digits[0] = 0;
  1564. exact = 0;
  1565. if ((value % 1000) == 0)
  1566. exact = 1;
  1567. tplex = thousandplex+1;
  1568. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1569. {
  1570. tplex = 0;
  1571. }
  1572. if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
  1573. {
  1574. x = 0;
  1575. if(tr->langopts.numbers2 & (1 << tplex))
  1576. x = 8; // use variant (feminine) for before thousands and millions
  1577. LookupNum2(tr, hundreds/10, x, ph_digits);
  1578. }
  1579. if(tr->langopts.numbers2 & 0x200)
  1580. sprintf(ph_thousands,"%s%c%s%c",ph_10T,phonEND_WORD,ph_digits,phonEND_WORD); // say "thousands" before its number, not after
  1581. else
  1582. sprintf(ph_thousands,"%s%c%s%c",ph_digits,phonEND_WORD,ph_10T,phonEND_WORD);
  1583. hundreds %= 10;
  1584. if((hundreds == 0) && (say_zero_hundred == 0))
  1585. ph_100[0] = 0;
  1586. suppress_null = 1;
  1587. }
  1588. ph_digits[0] = 0;
  1589. if((hundreds > 0) || say_zero_hundred)
  1590. {
  1591. if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1592. {
  1593. Lookup(tr, "_0and", ph_thousand_and);
  1594. }
  1595. suppress_null = 1;
  1596. found = 0;
  1597. if((ordinal)
  1598. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
  1599. {
  1600. // ordinal number
  1601. sprintf(string, "_%dCo", hundreds);
  1602. found = Lookup(tr, string, ph_digits);
  1603. if((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0))
  1604. {
  1605. // Use ordinal form of hundreds, as well as for tens and units
  1606. // Add ordinal suffix to the hundreds
  1607. strcat(ph_digits, ph_ordinal2);
  1608. }
  1609. }
  1610. if((hundreds == 0) && say_zero_hundred)
  1611. {
  1612. Lookup(tr, "_0", ph_digits);
  1613. }
  1614. else
  1615. {
  1616. if((!found) && (tensunits == 0))
  1617. {
  1618. // is there a special pronunciation for exactly n00 ?
  1619. sprintf(string,"_%dC0",hundreds);
  1620. found = Lookup(tr, string, ph_digits);
  1621. }
  1622. if(!found)
  1623. {
  1624. sprintf(string,"_%dC",hundreds);
  1625. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1626. }
  1627. if(found)
  1628. {
  1629. ph_100[0] = 0;
  1630. }
  1631. else
  1632. {
  1633. if((hundreds > 1) || ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) == 0))
  1634. {
  1635. LookupNum2(tr, hundreds, 0, ph_digits);
  1636. }
  1637. }
  1638. }
  1639. }
  1640. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  1641. }
  1642. ph_hundred_and[0] = 0;
  1643. if(tensunits > 0)
  1644. {
  1645. if((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
  1646. {
  1647. // Don't use "and" if we apply ordinal to both hundreds and units
  1648. }
  1649. else
  1650. {
  1651. if((value > 100) || ((control & 1) && (thousandplex==0)))
  1652. {
  1653. if((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1654. {
  1655. Lookup(tr, "_0and", ph_hundred_and);
  1656. }
  1657. }
  1658. if((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1659. {
  1660. Lookup(tr, "_0and", ph_hundred_and);
  1661. }
  1662. }
  1663. }
  1664. buf2[0] = 0;
  1665. if((tensunits != 0) || (suppress_null == 0))
  1666. {
  1667. x = 0;
  1668. if(thousandplex==0)
  1669. {
  1670. x = 2; // allow "eins" for 1 rather than "ein"
  1671. if(ordinal)
  1672. x = 3; // ordinal number
  1673. if((value < 100) && !(control & 1))
  1674. x |= 4; // tens and units only, no higher digits
  1675. if(ordinal & 0x20)
  1676. x |= 0x20; // variant form of ordinal number
  1677. }
  1678. else
  1679. {
  1680. if(tr->langopts.numbers2 & (1 << thousandplex))
  1681. x = 8; // use variant (feminine) for before thousands and millions
  1682. }
  1683. if(LookupNum2(tr, tensunits, x | (control & 0x100), buf2) != 0)
  1684. {
  1685. if(tr->langopts.numbers & NUM_SINGLE_AND)
  1686. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1687. }
  1688. }
  1689. else
  1690. {
  1691. if(ph_ordinal2[0] != 0)
  1692. {
  1693. ix = strlen(buf1);
  1694. if((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1695. buf1[ix-1] = 0; // remove pause before addding ordinal suffix
  1696. strcpy(buf2, ph_ordinal2);
  1697. }
  1698. }
  1699. sprintf(ph_out,"%s%s%c%s",buf1,ph_hundred_and,phonEND_WORD,buf2);
  1700. return(0);
  1701. } // end of LookupNum3
  1702. bool CheckThousandsGroup(char *word, int group_len)
  1703. {//================================================
  1704. // Is this a group of 3 digits which looks like a thousands group?
  1705. int ix;
  1706. if(IsDigit09(word[group_len]) || IsDigit09(-1))
  1707. return(false);
  1708. for(ix=0; ix < group_len; ix++)
  1709. {
  1710. if(!IsDigit09(word[ix]))
  1711. return(false);
  1712. }
  1713. return(true);
  1714. }
  1715. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1716. {//=====================================================================================================================
  1717. // Number translation with various options
  1718. // the "word" may be up to 4 digits
  1719. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1720. int n_digits;
  1721. int value;
  1722. int ix;
  1723. int digix;
  1724. unsigned char c;
  1725. int suppress_null = 0;
  1726. int decimal_point = 0;
  1727. int thousandplex = 0;
  1728. int thousands_exact = 1;
  1729. int thousands_inc = 0;
  1730. int prev_thousands = 0;
  1731. int ordinal = 0;
  1732. int this_value;
  1733. int decimal_count;
  1734. int max_decimal_count;
  1735. int decimal_mode;
  1736. int suffix_ix;
  1737. int skipwords = 0;
  1738. int group_len;
  1739. int len;
  1740. char *p;
  1741. char string[32]; // for looking up entries in **_list
  1742. char buf1[100];
  1743. char ph_append[50];
  1744. char ph_buf[200];
  1745. char ph_buf2[50];
  1746. char ph_zeros[50];
  1747. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1748. char buf_digit_lookup[50];
  1749. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1750. *flags = 0;
  1751. n_digit_lookup = 0;
  1752. buf_digit_lookup[0] = 0;
  1753. digit_lookup = buf_digit_lookup;
  1754. number_control = control;
  1755. for(ix=0; IsDigit09(word[ix]); ix++) ;
  1756. n_digits = ix;
  1757. value = this_value = atoi(word);
  1758. group_len = 3;
  1759. if(tr->langopts.numbers2 & NUM2_MYRIADS)
  1760. group_len = 4;
  1761. // is there a previous thousands part (as a previous "word") ?
  1762. if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
  1763. {
  1764. prev_thousands = 1;
  1765. }
  1766. else if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
  1767. {
  1768. // thousands groups can be separated by spaces
  1769. if((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
  1770. {
  1771. prev_thousands = 1;
  1772. }
  1773. }
  1774. if(prev_thousands == 0)
  1775. {
  1776. speak_missing_thousands = 0;
  1777. }
  1778. ph_ordinal2[0] = 0;
  1779. ph_zeros[0] = 0;
  1780. if(prev_thousands || (word[0] != '0'))
  1781. {
  1782. // don't check for ordinal if the number has a leading zero
  1783. if((ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0)) != 0)
  1784. {
  1785. // dot_ordinal = 1;
  1786. }
  1787. }
  1788. if((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE))
  1789. {
  1790. // remove dot unless followed by another number
  1791. word[ix] = 0;
  1792. }
  1793. if((ordinal == 0) || (tr->translator_name == L('h','u')))
  1794. {
  1795. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1796. // look for an ordinal number suffix after the number
  1797. ix++;
  1798. p = suffix;
  1799. if(wtab[0].flags & FLAG_HYPHEN_AFTER)
  1800. {
  1801. *p++ = '-';
  1802. ix++;
  1803. }
  1804. while((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1805. {
  1806. *p++ = word[ix++];
  1807. }
  1808. *p = 0;
  1809. if(suffix[0] != 0)
  1810. {
  1811. if((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1812. {
  1813. ordinal = 2;
  1814. }
  1815. else if(!IsDigit09(suffix[0])) // not _#9 (tab)
  1816. {
  1817. sprintf(string,"_#%s",suffix);
  1818. if(Lookup(tr, string, ph_ordinal2))
  1819. {
  1820. // this is an ordinal suffix
  1821. ordinal = 2;
  1822. flags[0] |= FLAG_SKIPWORDS;
  1823. skipwords = 1;
  1824. sprintf(string,"_x#%s",suffix);
  1825. Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
  1826. }
  1827. }
  1828. }
  1829. }
  1830. if(wtab[0].flags & FLAG_ORDINAL)
  1831. ordinal = 2;
  1832. ph_append[0] = 0;
  1833. ph_buf2[0] = 0;
  1834. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep))
  1835. {
  1836. if((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7]))
  1837. {
  1838. // looks like a time 02:30, omit the leading zero
  1839. }
  1840. else
  1841. {
  1842. if(n_digits > 3)
  1843. {
  1844. flags[0] &= ~FLAG_SKIPWORDS;
  1845. return(0); // long number string with leading zero, speak as individual digits
  1846. }
  1847. // speak leading zeros
  1848. for(ix=0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1849. {
  1850. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1851. }
  1852. }
  1853. }
  1854. if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1855. thousands_inc = 1;
  1856. else if(word[n_digits] == tr->langopts.thousands_sep)
  1857. thousands_inc = 2;
  1858. suffix_ix = n_digits+2;
  1859. if(thousands_inc > 0)
  1860. {
  1861. // if the following "words" are three-digit groups, count them and add
  1862. // a "thousand"/"million" suffix to this one
  1863. digix = n_digits + thousands_inc;
  1864. while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
  1865. {
  1866. for(ix=0; ix<group_len; ix++)
  1867. {
  1868. if(word[digix+ix] != '0')
  1869. {
  1870. thousands_exact = 0;
  1871. break;
  1872. }
  1873. }
  1874. thousandplex++;
  1875. digix += group_len;
  1876. if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
  1877. {
  1878. suffix_ix = digix+2;
  1879. digix += thousands_inc;
  1880. }
  1881. else
  1882. break;
  1883. }
  1884. }
  1885. if((value == 0) && prev_thousands)
  1886. {
  1887. suppress_null = 1;
  1888. }
  1889. if(tr->translator_name == L('h','u'))
  1890. {
  1891. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1892. if((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact==1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1893. {
  1894. number_control |= 1; // use _1e variant of number
  1895. }
  1896. }
  1897. if((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  1898. {
  1899. // this "word" ends with a decimal point
  1900. Lookup(tr, "_dpt", ph_append);
  1901. decimal_point = 0x100;
  1902. }
  1903. else if(suppress_null == 0)
  1904. {
  1905. if(thousands_inc > 0)
  1906. {
  1907. if(thousandplex > 0)
  1908. // if((thousandplex > 0) && (value < 1000))
  1909. {
  1910. if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
  1911. {
  1912. // found an exact match for N thousand
  1913. value = 0;
  1914. suppress_null = 1;
  1915. }
  1916. }
  1917. }
  1918. }
  1919. else
  1920. if(speak_missing_thousands == 1)
  1921. {
  1922. // speak this thousandplex if there was no word for the previous thousandplex
  1923. sprintf(string,"_0M%d",thousandplex+1);
  1924. if(Lookup(tr, string, buf1)==0)
  1925. {
  1926. sprintf(string,"_0M%d",thousandplex);
  1927. Lookup(tr, string, ph_append);
  1928. }
  1929. }
  1930. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1931. {
  1932. Lookup(tr, "_.", ph_append);
  1933. }
  1934. if(thousandplex == 0)
  1935. {
  1936. char *p2;
  1937. // look for combinations of the number with the next word
  1938. p = word;
  1939. while(IsDigit09(p[1])) p++; // just use the last digit
  1940. if(IsDigit09(p[-1]))
  1941. {
  1942. p2 = p - 1;
  1943. if(LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  1944. {
  1945. n_digit_lookup = 2;
  1946. }
  1947. }
  1948. // if((buf_digit_lookup[0] == 0) && (*p != '0') && (dot_ordinal==0))
  1949. if((buf_digit_lookup[0] == 0) && (*p != '0'))
  1950. {
  1951. // LANG=hu ?
  1952. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  1953. if(LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  1954. {
  1955. n_digit_lookup = 1;
  1956. }
  1957. }
  1958. if(prev_thousands == 0)
  1959. {
  1960. if((decimal_point == 0) && (ordinal == 0))
  1961. {
  1962. // Look for special pronunciation for this number in isolation (LANG=kl)
  1963. sprintf(string, "_%dn", value);
  1964. if(Lookup(tr, string, ph_out))
  1965. {
  1966. return(1);
  1967. }
  1968. }
  1969. if(tr->langopts.numbers2 & NUM2_PERCENT_BEFORE)
  1970. {
  1971. // LANG=si, say "percent" before the number
  1972. p2 = word;
  1973. while((*p2 != ' ') && (*p2 != 0))
  1974. {
  1975. p2++;
  1976. }
  1977. if(p2[1] == '%')
  1978. {
  1979. Lookup(tr, "%", ph_out);
  1980. ph_out += strlen(ph_out);
  1981. p2[1] = ' ';
  1982. }
  1983. }
  1984. }
  1985. }
  1986. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  1987. if((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  1988. sprintf(ph_out,"%s%s%c%s%s",ph_zeros,ph_append,phonEND_WORD,ph_buf2,ph_buf); // say "thousands" before its number
  1989. else
  1990. sprintf(ph_out,"%s%s%s%c%s",ph_zeros,ph_buf2,ph_buf,phonEND_WORD,ph_append);
  1991. while(decimal_point)
  1992. {
  1993. n_digits++;
  1994. decimal_count = 0;
  1995. while(IsDigit09(word[n_digits+decimal_count]))
  1996. decimal_count++;
  1997. // if(decimal_count > 1)
  1998. {
  1999. max_decimal_count = 2;
  2000. switch(decimal_mode = (tr->langopts.numbers & 0xe000))
  2001. {
  2002. case NUM_DFRACTION_4:
  2003. max_decimal_count = 5;
  2004. case NUM_DFRACTION_2:
  2005. // French/Polish decimal fraction
  2006. while(word[n_digits] == '0')
  2007. {
  2008. Lookup(tr, "_0", buf1);
  2009. strcat(ph_out,buf1);
  2010. decimal_count--;
  2011. n_digits++;
  2012. }
  2013. if((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits]))
  2014. {
  2015. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2016. strcat(ph_out,buf1);
  2017. n_digits += decimal_count;
  2018. }
  2019. break;
  2020. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  2021. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  2022. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  2023. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
  2024. if((word[n_digits]=='0') || (decimal_mode != NUM_DFRACTION_1))
  2025. {
  2026. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  2027. sprintf(string,"_0Z%d",decimal_count);
  2028. if(Lookup(tr, string, buf1) == 0)
  2029. break; // revert to speaking single digits
  2030. if(decimal_mode == NUM_DFRACTION_6)
  2031. strcat(ph_out, buf1);
  2032. else
  2033. strcat(ph_buf, buf1);
  2034. }
  2035. strcat(ph_out,ph_buf);
  2036. n_digits += decimal_count;
  2037. break;
  2038. case NUM_DFRACTION_3:
  2039. // Romanian decimal fractions
  2040. if((decimal_count <= 4) && (word[n_digits] != '0'))
  2041. {
  2042. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0,0,0);
  2043. strcat(ph_out,buf1);
  2044. n_digits += decimal_count;
  2045. }
  2046. break;
  2047. case NUM_DFRACTION_7:
  2048. // alternative form of decimal fraction digits, except the final digit
  2049. while(decimal_count-- > 1)
  2050. {
  2051. sprintf(string,"_%cd", word[n_digits]);
  2052. if(Lookup(tr, string, buf1) == 0)
  2053. break;
  2054. n_digits++;
  2055. strcat(ph_out, buf1);
  2056. }
  2057. }
  2058. }
  2059. while(IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  2060. {
  2061. // speak any remaining decimal fraction digits individually
  2062. value = word[n_digits++] - '0';
  2063. LookupNum2(tr, value, 2, buf1);
  2064. len = strlen(ph_out);
  2065. sprintf(&ph_out[len],"%c%s", phonEND_WORD, buf1);
  2066. }
  2067. // something after the decimal part ?
  2068. if(Lookup(tr, "_dpt2", buf1))
  2069. strcat(ph_out,buf1);
  2070. if((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1]))
  2071. {
  2072. Lookup(tr, "_dpt", buf1);
  2073. strcat(ph_out,buf1);
  2074. }
  2075. else
  2076. {
  2077. decimal_point = 0;
  2078. }
  2079. }
  2080. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  2081. {
  2082. int next_char;
  2083. char *p;
  2084. p = &word[n_digits+1];
  2085. p += utf8_in(&next_char,p);
  2086. if((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  2087. utf8_in(&next_char,p);
  2088. if(!iswalpha2(next_char) && (thousands_exact==0))
  2089. // if(!iswalpha2(next_char) && !((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact != 0)))
  2090. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  2091. }
  2092. *flags |= FLAG_FOUND;
  2093. speak_missing_thousands--;
  2094. if(skipwords)
  2095. dictionary_skipwords = skipwords;
  2096. return(1);
  2097. } // end of TranslateNumber_1
  2098. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  2099. {//=============================================================================================================
  2100. if((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  2101. return(0); // speak digits individually
  2102. if(tr->langopts.numbers != 0)
  2103. {
  2104. return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
  2105. }
  2106. return(0);
  2107. } // end of TranslateNumber