eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.c 56KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084
  1. /*
  2. * Copyright (C) 2005 to 2015 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <stdbool.h>
  21. #include <stdio.h>
  22. #include <ctype.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #if HAVE_STDINT_H
  26. #include <stdint.h>
  27. #endif
  28. #include <wctype.h>
  29. #include <wchar.h>
  30. #include "espeak_ng.h"
  31. #include "speak_lib.h"
  32. #include "speech.h"
  33. #include "phoneme.h"
  34. #include "synthesize.h"
  35. #include "voice.h"
  36. #include "translate.h"
  37. #define M_LIGATURE 0x8000
  38. #define M_NAME 0
  39. #define M_SMALLCAP 1
  40. #define M_TURNED 2
  41. #define M_REVERSED 3
  42. #define M_CURL 4
  43. #define M_ACUTE 5
  44. #define M_BREVE 6
  45. #define M_CARON 7
  46. #define M_CEDILLA 8
  47. #define M_CIRCUMFLEX 9
  48. #define M_DIAERESIS 10
  49. #define M_DOUBLE_ACUTE 11
  50. #define M_DOT_ABOVE 12
  51. #define M_GRAVE 13
  52. #define M_MACRON 14
  53. #define M_OGONEK 15
  54. #define M_RING 16
  55. #define M_STROKE 17
  56. #define M_TILDE 18
  57. #define M_BAR 19
  58. #define M_RETROFLEX 20
  59. #define M_HOOK 21
  60. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  61. #define M_IMPLOSIVE M_HOOK
  62. static int n_digit_lookup;
  63. static char *digit_lookup;
  64. static int speak_missing_thousands;
  65. static int number_control;
  66. typedef struct {
  67. const char *name;
  68. int accent_flags; // bit 0, say before the letter name
  69. } ACCENTS;
  70. // these are tokens to look up in the *_list file.
  71. static ACCENTS accents_tab[] = {
  72. { "_lig", 1 },
  73. { "_smc", 0 }, // smallcap
  74. { "_tur", 0 }, // turned
  75. { "_rev", 0 }, // reversed
  76. { "_crl", 0 }, // curl
  77. { "_acu", 0 }, // acute
  78. { "_brv", 0 }, // breve
  79. { "_hac", 0 }, // caron/hacek
  80. { "_ced", 0 }, // cedilla
  81. { "_cir", 0 }, // circumflex
  82. { "_dia", 0 }, // diaeresis
  83. { "_ac2", 0 }, // double acute
  84. { "_dot", 0 }, // dot
  85. { "_grv", 0 }, // grave
  86. { "_mcn", 0 }, // macron
  87. { "_ogo", 0 }, // ogonek
  88. { "_rng", 0 }, // ring
  89. { "_stk", 0 }, // stroke
  90. { "_tld", 0 }, // tilde
  91. { "_bar", 0 }, // bar
  92. { "_rfx", 0 }, // retroflex
  93. { "_hok", 0 }, // hook
  94. };
  95. #define CAPITAL 0
  96. #define LETTER(ch, mod1, mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  97. #define LIGATURE(ch1, ch2, mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+M_LIGATURE
  98. #define L_ALPHA 60 // U+3B1
  99. #define L_SCHWA 61 // U+259
  100. #define L_OPEN_E 62 // U+25B
  101. #define L_GAMMA 63 // U+3B3
  102. #define L_IOTA 64 // U+3B9
  103. #define L_PHI 67 // U+3C6
  104. #define L_ESH 68 // U+283
  105. #define L_UPSILON 69 // U+3C5
  106. #define L_EZH 70 // U+292
  107. #define L_GLOTTAL 71 // U+294
  108. #define L_RTAP 72 // U+27E
  109. #define L_RLONG 73 // U+27C
  110. static const short non_ascii_tab[] = {
  111. 0,
  112. 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  113. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e, 0x27c
  114. };
  115. // characters U+00e0 to U+017f
  116. static const unsigned short letter_accents_0e0[] = {
  117. LETTER('a', M_GRAVE, 0), // U+00e0
  118. LETTER('a', M_ACUTE, 0),
  119. LETTER('a', M_CIRCUMFLEX, 0),
  120. LETTER('a', M_TILDE, 0),
  121. LETTER('a', M_DIAERESIS, 0),
  122. LETTER('a', M_RING, 0),
  123. LIGATURE('a', 'e', 0),
  124. LETTER('c', M_CEDILLA, 0),
  125. LETTER('e', M_GRAVE, 0),
  126. LETTER('e', M_ACUTE, 0),
  127. LETTER('e', M_CIRCUMFLEX, 0),
  128. LETTER('e', M_DIAERESIS, 0),
  129. LETTER('i', M_GRAVE, 0),
  130. LETTER('i', M_ACUTE, 0),
  131. LETTER('i', M_CIRCUMFLEX, 0),
  132. LETTER('i', M_DIAERESIS, 0),
  133. LETTER('d', M_NAME, 0), // eth U+00f0
  134. LETTER('n', M_TILDE, 0),
  135. LETTER('o', M_GRAVE, 0),
  136. LETTER('o', M_ACUTE, 0),
  137. LETTER('o', M_CIRCUMFLEX, 0),
  138. LETTER('o', M_TILDE, 0),
  139. LETTER('o', M_DIAERESIS, 0),
  140. 0, // division sign
  141. LETTER('o', M_STROKE, 0),
  142. LETTER('u', M_GRAVE, 0),
  143. LETTER('u', M_ACUTE, 0),
  144. LETTER('u', M_CIRCUMFLEX, 0),
  145. LETTER('u', M_DIAERESIS, 0),
  146. LETTER('y', M_ACUTE, 0),
  147. LETTER('t', M_NAME, 0), // thorn
  148. LETTER('y', M_DIAERESIS, 0),
  149. CAPITAL, // U+0100
  150. LETTER('a', M_MACRON, 0),
  151. CAPITAL,
  152. LETTER('a', M_BREVE, 0),
  153. CAPITAL,
  154. LETTER('a', M_OGONEK, 0),
  155. CAPITAL,
  156. LETTER('c', M_ACUTE, 0),
  157. CAPITAL,
  158. LETTER('c', M_CIRCUMFLEX, 0),
  159. CAPITAL,
  160. LETTER('c', M_DOT_ABOVE, 0),
  161. CAPITAL,
  162. LETTER('c', M_CARON, 0),
  163. CAPITAL,
  164. LETTER('d', M_CARON, 0),
  165. CAPITAL, // U+0110
  166. LETTER('d', M_STROKE, 0),
  167. CAPITAL,
  168. LETTER('e', M_MACRON, 0),
  169. CAPITAL,
  170. LETTER('e', M_BREVE, 0),
  171. CAPITAL,
  172. LETTER('e', M_DOT_ABOVE, 0),
  173. CAPITAL,
  174. LETTER('e', M_OGONEK, 0),
  175. CAPITAL,
  176. LETTER('e', M_CARON, 0),
  177. CAPITAL,
  178. LETTER('g', M_CIRCUMFLEX, 0),
  179. CAPITAL,
  180. LETTER('g', M_BREVE, 0),
  181. CAPITAL, // U+0120
  182. LETTER('g', M_DOT_ABOVE, 0),
  183. CAPITAL,
  184. LETTER('g', M_CEDILLA, 0),
  185. CAPITAL,
  186. LETTER('h', M_CIRCUMFLEX, 0),
  187. CAPITAL,
  188. LETTER('h', M_STROKE, 0),
  189. CAPITAL,
  190. LETTER('i', M_TILDE, 0),
  191. CAPITAL,
  192. LETTER('i', M_MACRON, 0),
  193. CAPITAL,
  194. LETTER('i', M_BREVE, 0),
  195. CAPITAL,
  196. LETTER('i', M_OGONEK, 0),
  197. CAPITAL, // U+0130
  198. LETTER('i', M_NAME, 0), // dotless i
  199. CAPITAL,
  200. LIGATURE('i', 'j', 0),
  201. CAPITAL,
  202. LETTER('j', M_CIRCUMFLEX, 0),
  203. CAPITAL,
  204. LETTER('k', M_CEDILLA, 0),
  205. LETTER('k', M_NAME, 0), // kra
  206. CAPITAL,
  207. LETTER('l', M_ACUTE, 0),
  208. CAPITAL,
  209. LETTER('l', M_CEDILLA, 0),
  210. CAPITAL,
  211. LETTER('l', M_CARON, 0),
  212. CAPITAL,
  213. LETTER('l', M_MIDDLE_DOT, 0), // U+0140
  214. CAPITAL,
  215. LETTER('l', M_STROKE, 0),
  216. CAPITAL,
  217. LETTER('n', M_ACUTE, 0),
  218. CAPITAL,
  219. LETTER('n', M_CEDILLA, 0),
  220. CAPITAL,
  221. LETTER('n', M_CARON, 0),
  222. LETTER('n', M_NAME, 0), // apostrophe n
  223. CAPITAL,
  224. LETTER('n', M_NAME, 0), // eng
  225. CAPITAL,
  226. LETTER('o', M_MACRON, 0),
  227. CAPITAL,
  228. LETTER('o', M_BREVE, 0),
  229. CAPITAL, // U+0150
  230. LETTER('o', M_DOUBLE_ACUTE, 0),
  231. CAPITAL,
  232. LIGATURE('o', 'e', 0),
  233. CAPITAL,
  234. LETTER('r', M_ACUTE, 0),
  235. CAPITAL,
  236. LETTER('r', M_CEDILLA, 0),
  237. CAPITAL,
  238. LETTER('r', M_CARON, 0),
  239. CAPITAL,
  240. LETTER('s', M_ACUTE, 0),
  241. CAPITAL,
  242. LETTER('s', M_CIRCUMFLEX, 0),
  243. CAPITAL,
  244. LETTER('s', M_CEDILLA, 0),
  245. CAPITAL, // U+0160
  246. LETTER('s', M_CARON, 0),
  247. CAPITAL,
  248. LETTER('t', M_CEDILLA, 0),
  249. CAPITAL,
  250. LETTER('t', M_CARON, 0),
  251. CAPITAL,
  252. LETTER('t', M_STROKE, 0),
  253. CAPITAL,
  254. LETTER('u', M_TILDE, 0),
  255. CAPITAL,
  256. LETTER('u', M_MACRON, 0),
  257. CAPITAL,
  258. LETTER('u', M_BREVE, 0),
  259. CAPITAL,
  260. LETTER('u', M_RING, 0),
  261. CAPITAL, // U+0170
  262. LETTER('u', M_DOUBLE_ACUTE, 0),
  263. CAPITAL,
  264. LETTER('u', M_OGONEK, 0),
  265. CAPITAL,
  266. LETTER('w', M_CIRCUMFLEX, 0),
  267. CAPITAL,
  268. LETTER('y', M_CIRCUMFLEX, 0),
  269. CAPITAL, // Y-DIAERESIS
  270. CAPITAL,
  271. LETTER('z', M_ACUTE, 0),
  272. CAPITAL,
  273. LETTER('z', M_DOT_ABOVE, 0),
  274. CAPITAL,
  275. LETTER('z', M_CARON, 0),
  276. LETTER('s', M_NAME, 0), // long-s U+17f
  277. };
  278. // characters U+0250 to U+029F
  279. static const unsigned short letter_accents_250[] = {
  280. LETTER('a', M_TURNED, 0), // U+250
  281. LETTER(L_ALPHA, 0, 0),
  282. LETTER(L_ALPHA, M_TURNED, 0),
  283. LETTER('b', M_IMPLOSIVE, 0),
  284. 0, // open-o
  285. LETTER('c', M_CURL, 0),
  286. LETTER('d', M_RETROFLEX, 0),
  287. LETTER('d', M_IMPLOSIVE, 0),
  288. LETTER('e', M_REVERSED, 0), // U+258
  289. 0, // schwa
  290. LETTER(L_SCHWA, M_HOOK, 0),
  291. 0, // open-e
  292. LETTER(L_OPEN_E, M_REVERSED, 0),
  293. LETTER(L_OPEN_E, M_HOOK, M_REVERSED),
  294. 0,
  295. LETTER('j', M_BAR, 0),
  296. LETTER('g', M_IMPLOSIVE, 0), // U+260
  297. LETTER('g', 0, 0),
  298. LETTER('g', M_SMALLCAP, 0),
  299. LETTER(L_GAMMA, 0, 0),
  300. 0, // ramshorn
  301. LETTER('h', M_TURNED, 0),
  302. LETTER('h', M_HOOK, 0),
  303. 0,
  304. LETTER('i', M_BAR, 0), // U+268
  305. LETTER(L_IOTA, 0, 0),
  306. LETTER('i', M_SMALLCAP, 0),
  307. LETTER('l', M_TILDE, 0),
  308. LETTER('l', M_BAR, 0),
  309. LETTER('l', M_RETROFLEX, 0),
  310. LIGATURE('l', 'z', 0),
  311. LETTER('m', M_TURNED, 0),
  312. 0,
  313. LETTER('m', M_HOOK, 0),
  314. 0,
  315. LETTER('n', M_RETROFLEX, 0),
  316. LETTER('n', M_SMALLCAP, 0),
  317. LETTER('o', M_BAR, 0),
  318. LIGATURE('o', 'e', M_SMALLCAP),
  319. 0,
  320. LETTER(L_PHI, 0, 0), // U+278
  321. LETTER('r', M_TURNED, 0),
  322. LETTER(L_RLONG, M_TURNED, 0),
  323. LETTER('r', M_RETROFLEX, M_TURNED),
  324. 0,
  325. LETTER('r', M_RETROFLEX, 0),
  326. 0, // r-tap
  327. LETTER(L_RTAP, M_REVERSED, 0),
  328. LETTER('r', M_SMALLCAP, 0), // U+280
  329. LETTER('r', M_TURNED, M_SMALLCAP),
  330. LETTER('s', M_RETROFLEX, 0),
  331. 0, // esh
  332. LETTER('j', M_HOOK, 0),
  333. LETTER(L_ESH, M_REVERSED, 0),
  334. LETTER(L_ESH, M_CURL, 0),
  335. LETTER('t', M_TURNED, 0),
  336. LETTER('t', M_RETROFLEX, 0), // U+288
  337. LETTER('u', M_BAR, 0),
  338. LETTER(L_UPSILON, 0, 0),
  339. LETTER('v', M_HOOK, 0),
  340. LETTER('v', M_TURNED, 0),
  341. LETTER('w', M_TURNED, 0),
  342. LETTER('y', M_TURNED, 0),
  343. LETTER('y', M_SMALLCAP, 0),
  344. LETTER('z', M_RETROFLEX, 0), // U+290
  345. LETTER('z', M_CURL, 0),
  346. 0, // ezh
  347. LETTER(L_EZH, M_CURL, 0),
  348. 0, // glottal stop
  349. LETTER(L_GLOTTAL, M_REVERSED, 0),
  350. LETTER(L_GLOTTAL, M_TURNED, 0),
  351. 0,
  352. 0, // bilabial click U+298
  353. LETTER('b', M_SMALLCAP, 0),
  354. 0,
  355. LETTER('g', M_IMPLOSIVE, M_SMALLCAP),
  356. LETTER('h', M_SMALLCAP, 0),
  357. LETTER('j', M_CURL, 0),
  358. LETTER('k', M_TURNED, 0),
  359. LETTER('l', M_SMALLCAP, 0),
  360. LETTER('q', M_HOOK, 0), // U+2a0
  361. LETTER(L_GLOTTAL, M_STROKE, 0),
  362. LETTER(L_GLOTTAL, M_STROKE, M_REVERSED),
  363. LIGATURE('d', 'z', 0),
  364. 0, // dezh
  365. LIGATURE('d', 'z', M_CURL),
  366. LIGATURE('t', 's', 0),
  367. 0, // tesh
  368. LIGATURE('t', 's', M_CURL),
  369. };
  370. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  371. {
  372. int len;
  373. char single_letter[10];
  374. single_letter[0] = 0;
  375. single_letter[1] = '_';
  376. len = utf8_out(letter, &single_letter[2]);
  377. single_letter[len+2] = ' ';
  378. single_letter[len+3] = 0;
  379. if (Lookup(tr, &single_letter[1], ph_buf) == 0) {
  380. single_letter[1] = ' ';
  381. if (Lookup(tr, &single_letter[2], ph_buf) == 0)
  382. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL, 0, NULL);
  383. }
  384. return ph_buf[0];
  385. }
  386. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  387. {
  388. // lookup the character in the accents table
  389. int accent_data = 0;
  390. int accent1 = 0;
  391. int accent2 = 0;
  392. int flags1, flags2;
  393. int basic_letter;
  394. int letter2 = 0;
  395. char ph_letter1[30];
  396. char ph_letter2[30];
  397. char ph_accent1[30];
  398. char ph_accent2[30];
  399. ph_accent2[0] = 0;
  400. if ((letter >= 0xe0) && (letter < 0x17f))
  401. accent_data = letter_accents_0e0[letter - 0xe0];
  402. else if ((letter >= 0x250) && (letter <= 0x2a8))
  403. accent_data = letter_accents_250[letter - 0x250];
  404. if (accent_data != 0) {
  405. basic_letter = (accent_data & 0x3f) + 59;
  406. if (basic_letter < 'a')
  407. basic_letter = non_ascii_tab[basic_letter-59];
  408. if (accent_data & M_LIGATURE) {
  409. letter2 = (accent_data >> 6) & 0x3f;
  410. letter2 += 59;
  411. accent2 = (accent_data >> 12) & 0x7;
  412. } else {
  413. accent1 = (accent_data >> 6) & 0x1f;
  414. accent2 = (accent_data >> 11) & 0xf;
  415. }
  416. if ((accent1 == 0) && !(accent_data & M_LIGATURE)) {
  417. // just a letter name, not an accented character or ligature
  418. return;
  419. }
  420. if ((flags1 = Lookup(tr, accents_tab[accent1].name, ph_accent1)) != 0) {
  421. if (LookupLetter2(tr, basic_letter, ph_letter1) != 0) {
  422. if (accent2 != 0) {
  423. flags2 = Lookup(tr, accents_tab[accent2].name, ph_accent2);
  424. if (flags2 & FLAG_ACCENT_BEFORE) {
  425. strcpy(ph_buf, ph_accent2);
  426. ph_buf += strlen(ph_buf);
  427. ph_accent2[0] = 0;
  428. }
  429. }
  430. if (letter2 != 0) {
  431. // ligature
  432. LookupLetter2(tr, letter2, ph_letter2);
  433. sprintf(ph_buf, "%s%c%s%c%s%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  434. } else {
  435. if (accent1 == 0)
  436. strcpy(ph_buf, ph_letter1);
  437. else if ((tr->langopts.accents & 1) || (flags1 & FLAG_ACCENT_BEFORE) || (accents_tab[accent1].accent_flags & 1))
  438. sprintf(ph_buf, "%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  439. else
  440. sprintf(ph_buf, "%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  441. }
  442. }
  443. }
  444. }
  445. }
  446. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  447. {
  448. // control, bit 0: not the first letter of a word
  449. int len;
  450. static char single_letter[10] = { 0, 0 };
  451. unsigned int dict_flags[2];
  452. char ph_buf3[40];
  453. ph_buf1[0] = 0;
  454. len = utf8_out(letter, &single_letter[2]);
  455. single_letter[len+2] = ' ';
  456. if (next_byte == -1) {
  457. // speaking normal text, not individual characters
  458. if (Lookup(tr, &single_letter[2], ph_buf1) != 0)
  459. return;
  460. single_letter[1] = '_';
  461. if (Lookup(tr, &single_letter[1], ph_buf3) != 0)
  462. return; // the character is specified as _* so ignore it when speaking normal text
  463. // check whether this character is specified for English
  464. if (tr->translator_name == L('e', 'n'))
  465. return; // we are already using English
  466. SetTranslator2("en");
  467. if (Lookup(translator2, &single_letter[2], ph_buf3) != 0) {
  468. // yes, switch to English and re-translate the word
  469. sprintf(ph_buf1, "%c", phonSWITCH);
  470. }
  471. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  472. return;
  473. }
  474. if ((letter <= 32) || iswspace(letter)) {
  475. // lookup space as _&32 etc.
  476. sprintf(&single_letter[1], "_#%d ", letter);
  477. Lookup(tr, &single_letter[1], ph_buf1);
  478. return;
  479. }
  480. if (next_byte != ' ')
  481. next_byte = RULE_SPELLING;
  482. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  483. single_letter[1] = '_';
  484. // if the $accent flag is set for this letter, use the accents table (below)
  485. dict_flags[1] = 0;
  486. if (Lookup(tr, &single_letter[1], ph_buf3) == 0) {
  487. single_letter[1] = ' ';
  488. if (Lookup(tr, &single_letter[2], ph_buf3) == 0)
  489. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL, FLAG_NO_TRACE, NULL);
  490. }
  491. if (ph_buf3[0] == 0)
  492. LookupAccentedLetter(tr, letter, ph_buf3);
  493. strcpy(ph_buf1, ph_buf3);
  494. if ((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  495. return;
  496. dict_flags[0] = 0;
  497. dict_flags[1] = 0;
  498. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  499. }
  500. // unicode ranges for non-ascii digits 0-9 (these must be in ascending order)
  501. static const int number_ranges[] = {
  502. 0x660, 0x6f0, // arabic
  503. 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
  504. 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
  505. 0
  506. };
  507. int NonAsciiNumber(int letter)
  508. {
  509. // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
  510. const int *p;
  511. int base;
  512. for (p = number_ranges; (base = *p) != 0; p++) {
  513. if (letter < base)
  514. break; // not found
  515. if (letter < (base+10))
  516. return letter-base+'0';
  517. }
  518. return -1;
  519. }
  520. #define L_SUB 0x4000 // subscript
  521. #define L_SUP 0x8000 // superscript
  522. static const char *modifiers[] = { NULL, "_sub", "_sup", NULL };
  523. // this list must be in ascending order
  524. static unsigned short derived_letters[] = {
  525. 0x00aa, 'a'+L_SUP,
  526. 0x00b2, '2'+L_SUP,
  527. 0x00b3, '3'+L_SUP,
  528. 0x00b9, '1'+L_SUP,
  529. 0x00ba, 'o'+L_SUP,
  530. 0x02b0, 'h'+L_SUP,
  531. 0x02b1, 0x266+L_SUP,
  532. 0x02b2, 'j'+L_SUP,
  533. 0x02b3, 'r'+L_SUP,
  534. 0x02b4, 0x279+L_SUP,
  535. 0x02b5, 0x27b+L_SUP,
  536. 0x02b6, 0x281+L_SUP,
  537. 0x02b7, 'w'+L_SUP,
  538. 0x02b8, 'y'+L_SUP,
  539. 0x02c0, 0x294+L_SUP,
  540. 0x02c1, 0x295+L_SUP,
  541. 0x02e0, 0x263+L_SUP,
  542. 0x02e1, 'l'+L_SUP,
  543. 0x02e2, 's'+L_SUP,
  544. 0x02e3, 'x'+L_SUP,
  545. 0x2070, '0'+L_SUP,
  546. 0x2071, 'i'+L_SUP,
  547. 0x2074, '4'+L_SUP,
  548. 0x2075, '5'+L_SUP,
  549. 0x2076, '6'+L_SUP,
  550. 0x2077, '7'+L_SUP,
  551. 0x2078, '8'+L_SUP,
  552. 0x2079, '9'+L_SUP,
  553. 0x207a, '+'+L_SUP,
  554. 0x207b, '-'+L_SUP,
  555. 0x207c, '='+L_SUP,
  556. 0x207d, '('+L_SUP,
  557. 0x207e, ')'+L_SUP,
  558. 0x207f, 'n'+L_SUP,
  559. 0x2080, '0'+L_SUB,
  560. 0x2081, '1'+L_SUB,
  561. 0x2082, '2'+L_SUB,
  562. 0x2083, '3'+L_SUB,
  563. 0x2084, '4'+L_SUB,
  564. 0x2085, '5'+L_SUB,
  565. 0x2086, '6'+L_SUB,
  566. 0x2087, '7'+L_SUB,
  567. 0x2088, '8'+L_SUB,
  568. 0x2089, '9'+L_SUB,
  569. 0x208a, '+'+L_SUB,
  570. 0x208b, '-'+L_SUB,
  571. 0x208c, '='+L_SUB,
  572. 0x208d, '('+L_SUB,
  573. 0x208e, ')'+L_SUB,
  574. 0x2090, 'a'+L_SUB,
  575. 0x2091, 'e'+L_SUB,
  576. 0x2092, 'o'+L_SUB,
  577. 0x2093, 'x'+L_SUB,
  578. 0x2094, 0x259+L_SUB,
  579. 0x2095, 'h'+L_SUB,
  580. 0x2096, 'k'+L_SUB,
  581. 0x2097, 'l'+L_SUB,
  582. 0x2098, 'm'+L_SUB,
  583. 0x2099, 'n'+L_SUB,
  584. 0x209a, 'p'+L_SUB,
  585. 0x209b, 's'+L_SUB,
  586. 0x209c, 't'+L_SUB,
  587. 0, 0
  588. };
  589. // names, using phonemes available to all languages
  590. static const char *hex_letters[] = {
  591. "'e:j",
  592. "b'i:",
  593. "s'i:",
  594. "d'i:",
  595. "'i:",
  596. "'ef"
  597. };
  598. int IsSuperscript(int letter)
  599. {
  600. // is this a subscript or superscript letter ?
  601. int ix;
  602. int c;
  603. for (ix = 0; (c = derived_letters[ix]) != 0; ix += 2) {
  604. if (c > letter)
  605. break;
  606. if (c == letter)
  607. return derived_letters[ix+1];
  608. }
  609. return 0;
  610. }
  611. int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
  612. {
  613. // get pronunciation for an isolated letter
  614. // return number of bytes used by the letter
  615. // control bit 0: a non-initial letter in a word
  616. // bit 1: say 'capital'
  617. // bit 2: say character code for unknown letters
  618. int n_bytes;
  619. int letter;
  620. int len;
  621. int ix;
  622. int c;
  623. char *p2;
  624. char *pbuf;
  625. const char *modifier;
  626. ALPHABET *alphabet;
  627. int al_offset;
  628. int al_flags;
  629. int language;
  630. int number;
  631. int phontab_1;
  632. int speak_letter_number;
  633. char capital[30];
  634. char ph_buf[80];
  635. char ph_buf2[80];
  636. char ph_alphabet[80];
  637. char hexbuf[12];
  638. static char pause_string[] = { phonPAUSE, 0 };
  639. ph_buf[0] = 0;
  640. ph_alphabet[0] = 0;
  641. capital[0] = 0;
  642. phontab_1 = translator->phoneme_tab_ix;
  643. n_bytes = utf8_in(&letter, word);
  644. if ((letter & 0xfff00) == 0x0e000)
  645. letter &= 0xff; // uncode private usage area
  646. if (control & 2) {
  647. // include CAPITAL information
  648. if (iswupper2(letter))
  649. Lookup(tr, "_cap", capital);
  650. }
  651. letter = towlower2(letter);
  652. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  653. if (ph_buf[0] == 0) {
  654. // is this a subscript or superscript letter ?
  655. if ((c = IsSuperscript(letter)) != 0) {
  656. letter = c & 0x3fff;
  657. if ((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL)) {
  658. // don't say "superscript" during normal text reading
  659. Lookup(tr, modifier, capital);
  660. if (capital[0] == 0) {
  661. capital[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  662. Lookup(translator2, modifier, &capital[3]);
  663. if (capital[3] != 0) {
  664. capital[0] = phonPAUSE;
  665. capital[1] = phonSWITCH;
  666. len = strlen(&capital[3]);
  667. capital[len+3] = phonSWITCH;
  668. capital[len+4] = phontab_1;
  669. capital[len+5] = 0;
  670. }
  671. }
  672. }
  673. }
  674. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  675. }
  676. if (ph_buf[0] == phonSWITCH) {
  677. strcpy(phonemes, ph_buf);
  678. return 0;
  679. }
  680. if ((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0)) {
  681. // convert a non-ascii number to 0-9
  682. LookupLetter(tr, number, 0, ph_buf, control & 1);
  683. }
  684. al_offset = 0;
  685. al_flags = 0;
  686. if ((alphabet = AlphabetFromChar(letter)) != NULL) {
  687. al_offset = alphabet->offset;
  688. al_flags = alphabet->flags;
  689. }
  690. if (alphabet != current_alphabet) {
  691. // speak the name of the alphabet
  692. current_alphabet = alphabet;
  693. if ((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset)) {
  694. if ((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet)) {
  695. // don't say the alphabet name
  696. } else {
  697. ph_buf2[0] = 0;
  698. if (Lookup(translator, alphabet->name, ph_alphabet) == 0) { // the original language for the current voice
  699. // Can't find the local name for this alphabet, use the English name
  700. ph_alphabet[2] = SetTranslator2("en"); // overwrites previous contents of translator2
  701. Lookup(translator2, alphabet->name, ph_buf2);
  702. } else if (translator != tr) {
  703. phontab_1 = tr->phoneme_tab_ix;
  704. strcpy(ph_buf2, ph_alphabet);
  705. ph_alphabet[2] = translator->phoneme_tab_ix;
  706. }
  707. if (ph_buf2[0] != 0) {
  708. // we used a different language for the alphabet name (now in ph_buf2)
  709. ph_alphabet[0] = phonPAUSE;
  710. ph_alphabet[1] = phonSWITCH;
  711. strcpy(&ph_alphabet[3], ph_buf2);
  712. len = strlen(ph_buf2) + 3;
  713. ph_alphabet[len] = phonSWITCH;
  714. ph_alphabet[len+1] = phontab_1;
  715. ph_alphabet[len+2] = 0;
  716. }
  717. }
  718. }
  719. }
  720. // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
  721. if (ph_buf[0] == 0) {
  722. if ((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
  723. language = translator->langopts.alt_alphabet_lang;
  724. else if ((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
  725. language = alphabet->language;
  726. else
  727. language = L('e', 'n');
  728. if ((language != tr->translator_name) || (language == L('k', 'o'))) {
  729. char *p3;
  730. int initial, code;
  731. char hangul_buf[12];
  732. // speak in the language for this alphabet (or English)
  733. ph_buf[2] = SetTranslator2(WordToString2(language));
  734. if (translator2 != NULL) {
  735. if (((code = letter - 0xac00) >= 0) && (letter <= 0xd7af)) {
  736. // Special case for Korean letters.
  737. // break a syllable hangul into 2 or 3 individual jamo
  738. hangul_buf[0] = ' ';
  739. p3 = &hangul_buf[1];
  740. if ((initial = (code/28)/21) != 11) {
  741. p3 += utf8_out(initial + 0x1100, p3);
  742. }
  743. utf8_out(((code/28) % 21) + 0x1161, p3); // medial
  744. utf8_out((code % 28) + 0x11a7, &p3[3]); // final
  745. p3[6] = ' ';
  746. p3[7] = 0;
  747. ph_buf[3] = 0;
  748. TranslateRules(translator2, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
  749. SetWordStress(translator2, &ph_buf[3], NULL, -1, 0);
  750. } else
  751. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  752. if (ph_buf[3] == phonSWITCH) {
  753. // another level of language change
  754. ph_buf[2] = SetTranslator2(&ph_buf[4]);
  755. LookupLetter(translator2, letter, word[n_bytes], &ph_buf[3], control & 1);
  756. }
  757. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  758. if (ph_buf[3] != 0) {
  759. ph_buf[0] = phonPAUSE;
  760. ph_buf[1] = phonSWITCH;
  761. len = strlen(&ph_buf[3]) + 3;
  762. ph_buf[len] = phonSWITCH; // switch back
  763. ph_buf[len+1] = tr->phoneme_tab_ix;
  764. ph_buf[len+2] = 0;
  765. }
  766. }
  767. }
  768. }
  769. if (ph_buf[0] == 0) {
  770. // character name not found
  771. if (ph_buf[0] == 0) {
  772. speak_letter_number = 1;
  773. if (!(al_flags & AL_NO_SYMBOL)) {
  774. if (iswalpha2(letter))
  775. Lookup(translator, "_?A", ph_buf);
  776. if ((ph_buf[0] == 0) && !iswspace(letter))
  777. Lookup(translator, "_??", ph_buf);
  778. if (ph_buf[0] == 0)
  779. EncodePhonemes("l'et@", ph_buf, NULL);
  780. }
  781. if (!(control & 4) && (al_flags & AL_NOT_CODE)) {
  782. // don't speak the character code number, unless we want full details of this character
  783. speak_letter_number = 0;
  784. }
  785. if (speak_letter_number) {
  786. if (al_offset == 0x2800) {
  787. // braille dots symbol, list the numbered dots
  788. p2 = hexbuf;
  789. for (ix = 0; ix < 8; ix++) {
  790. if (letter & (1 << ix))
  791. *p2++ = '1'+ix;
  792. }
  793. *p2 = 0;
  794. } else {
  795. // speak the hexadecimal number of the character code
  796. sprintf(hexbuf, "%x", letter);
  797. }
  798. pbuf = ph_buf;
  799. for (p2 = hexbuf; *p2 != 0; p2++) {
  800. pbuf += strlen(pbuf);
  801. *pbuf++ = phonPAUSE_VSHORT;
  802. LookupLetter(translator, *p2, 0, pbuf, 1);
  803. if (((pbuf[0] == 0) || (pbuf[0] == phonSWITCH)) && (*p2 >= 'a')) {
  804. // This language has no translation for 'a' to 'f', speak English names using base phonemes
  805. EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
  806. }
  807. }
  808. strcat(pbuf, pause_string);
  809. }
  810. }
  811. }
  812. len = strlen(phonemes);
  813. if (tr->langopts.accents & 2) // 'capital' before or after the word ?
  814. sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, ph_buf, capital);
  815. else
  816. sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, capital, ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  817. if ((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  818. strcpy(&phonemes[len], ph_buf2);
  819. return n_bytes;
  820. }
  821. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  822. {
  823. // Individual letter names, reduce the stress of some.
  824. int ix;
  825. unsigned int c;
  826. int n_stress = 0;
  827. int prev = 0;
  828. int count;
  829. unsigned char buf[N_WORD_PHONEMES];
  830. for (ix = 0; (c = phonemes[ix]) != 0; ix++) {
  831. if ((c == phonSTRESS_P) && (prev != phonSWITCH))
  832. n_stress++;
  833. buf[ix] = prev = c;
  834. }
  835. buf[ix] = 0;
  836. count = 0;
  837. prev = 0;
  838. for (ix = 0; (c = buf[ix]) != 0; ix++) {
  839. if ((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH)) {
  840. count++;
  841. if (tr->langopts.spelling_stress == 1) {
  842. // stress on initial letter when spelling
  843. if (count > 1)
  844. c = phonSTRESS_3;
  845. } else {
  846. if (count != n_stress) {
  847. if (((count % 3) != 0) || (count == n_stress-1))
  848. c = phonSTRESS_3; // reduce to secondary stress
  849. }
  850. }
  851. } else if (c == 0xff) {
  852. if ((control < 2) || (ix == 0))
  853. continue; // don't insert pauses
  854. if (control == 4)
  855. c = phonPAUSE; // pause after each character
  856. if (((count % 3) == 0) || (control > 2))
  857. c = phonPAUSE_NOLINK; // pause following a primary stress
  858. else
  859. c = phonPAUSE_VSHORT;
  860. }
  861. *phonemes++ = prev = c;
  862. }
  863. if (control >= 2)
  864. *phonemes++ = phonPAUSE_NOLINK;
  865. *phonemes = 0;
  866. }
  867. // Numbers
  868. static char ph_ordinal2[12];
  869. static char ph_ordinal2x[12];
  870. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  871. {
  872. int ordinal = 0;
  873. int c2;
  874. int nextflags;
  875. if ((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE)) {
  876. if (roman || !(wtab[1].flags & FLAG_FIRST_UPPER)) {
  877. if (word_end[0] == '.')
  878. utf8_in(&c2, &word_end[2]);
  879. else
  880. utf8_in(&c2, &word_end[0]);
  881. if ((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2))) {
  882. // ordinal number is indicated by dot after the number
  883. // but not if the next word starts with an upper-case letter
  884. // (c2 == 0) is for cases such as, "2.,"
  885. ordinal = 2;
  886. if (word_end[0] == '.')
  887. word_end[0] = ' ';
  888. if ((roman == 0) && (tr->translator_name == L('h', 'u'))) {
  889. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  890. nextflags = 0;
  891. if (IsAlpha(c2))
  892. nextflags = TranslateWord(tr, &word_end[2], NULL, NULL);
  893. if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  894. ordinal = 0; // TEST 09.02.10
  895. if (nextflags & FLAG_ALT_TRANS)
  896. ordinal = 0;
  897. if (nextflags & FLAG_ALT3_TRANS) {
  898. if (word[-2] == '-')
  899. ordinal = 0; // e.g. december 2-5. között
  900. if (tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  901. ordinal = 0x22;
  902. }
  903. }
  904. }
  905. }
  906. }
  907. return ordinal;
  908. }
  909. static int hu_number_e(const char *word, int thousandplex, int value)
  910. {
  911. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  912. if ((word[0] == 'a') || (word[0] == 'e')) {
  913. if ((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  914. return 0;
  915. if (((thousandplex == 1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  916. return 0; // 1000-el
  917. return 1;
  918. }
  919. return 0;
  920. }
  921. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  922. {
  923. int c;
  924. char *p;
  925. const char *p2;
  926. int acc;
  927. int prev;
  928. int value;
  929. int subtract;
  930. int repeat = 0;
  931. int n_digits = 0;
  932. char *word_start;
  933. int num_control = 0;
  934. unsigned int flags[2];
  935. char ph_roman[30];
  936. char number_chars[N_WORD_BYTES];
  937. static const char *roman_numbers = "ixcmvld";
  938. static int roman_values[] = { 1, 10, 100, 1000, 5, 50, 500 };
  939. acc = 0;
  940. prev = 0;
  941. subtract = 0x7fff;
  942. ph_out[0] = 0;
  943. flags[0] = 0;
  944. flags[1] = 0;
  945. if (((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
  946. return 0; // not '2xx'
  947. if (word[1] == ' ') {
  948. if ((tr->langopts.numbers & (NUM_ROMAN_CAPITALS | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT)) && (wtab[0].flags & FLAG_HAS_DOT)) {
  949. // allow single letter Roman ordinal followed by dot.
  950. } else
  951. return 0; // only one letter, don't speak as a Roman Number
  952. }
  953. word_start = word;
  954. while ((c = *word++) != ' ') {
  955. if ((p2 = strchr(roman_numbers, c)) == NULL)
  956. return 0;
  957. value = roman_values[p2 - roman_numbers];
  958. if (value == prev) {
  959. repeat++;
  960. if (repeat >= 3)
  961. return 0;
  962. } else
  963. repeat = 0;
  964. if ((prev > 1) && (prev != 10) && (prev != 100)) {
  965. if (value >= prev)
  966. return 0;
  967. }
  968. if ((prev != 0) && (prev < value)) {
  969. if (((acc % 10) != 0) || ((prev*10) < value))
  970. return 0;
  971. subtract = prev;
  972. value -= subtract;
  973. } else if (value >= subtract)
  974. return 0;
  975. else
  976. acc += prev;
  977. prev = value;
  978. n_digits++;
  979. }
  980. if (IsDigit09(word[0]))
  981. return 0; // e.g. 'xx2'
  982. acc += prev;
  983. if (acc < tr->langopts.min_roman)
  984. return 0;
  985. if (acc > tr->langopts.max_roman)
  986. return 0;
  987. Lookup(tr, "_roman", ph_roman); // precede by "roman" if _rom is defined in *_list
  988. p = &ph_out[0];
  989. if ((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0) {
  990. strcpy(ph_out, ph_roman);
  991. p = &ph_out[strlen(ph_roman)];
  992. }
  993. sprintf(number_chars, " %d %s ", acc, tr->langopts.roman_suffix);
  994. if (word[0] == '.') {
  995. // dot has not been removed. This implies that there was no space after it
  996. return 0;
  997. }
  998. if (CheckDotOrdinal(tr, word_start, word, wtab, 1))
  999. wtab[0].flags |= FLAG_ORDINAL;
  1000. if (tr->langopts.numbers & NUM_ROMAN_ORDINAL) {
  1001. if (tr->translator_name == L('h', 'u')) {
  1002. if (!(wtab[0].flags & FLAG_ORDINAL)) {
  1003. if ((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc)) {
  1004. // should use the 'e' form of the number
  1005. num_control |= 1;
  1006. } else
  1007. return 0;
  1008. }
  1009. } else
  1010. wtab[0].flags |= FLAG_ORDINAL;
  1011. }
  1012. tr->prev_dict_flags[0] = 0;
  1013. tr->prev_dict_flags[1] = 0;
  1014. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  1015. if (tr->langopts.numbers & NUM_ROMAN_AFTER)
  1016. strcat(ph_out, ph_roman);
  1017. return 1;
  1018. }
  1019. static const char *M_Variant(int value)
  1020. {
  1021. // returns M, or perhaps MA or MB for some cases
  1022. int teens = 0;
  1023. if (((value % 100) > 10) && ((value % 100) < 20))
  1024. teens = 1;
  1025. switch ((translator->langopts.numbers2 >> 6) & 0x7)
  1026. {
  1027. case 1: // lang=ru use singular for xx1 except for x11
  1028. if ((teens == 0) && ((value % 10) == 1))
  1029. return "1M";
  1030. break;
  1031. case 2: // lang=cs,sk
  1032. if ((value >= 2) && (value <= 4))
  1033. return "0MA";
  1034. break;
  1035. case 3: // lang=pl
  1036. if ((teens == 0) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  1037. return "0MA";
  1038. break;
  1039. case 4: // lang=lt
  1040. if ((teens == 1) || ((value % 10) == 0))
  1041. return "0MB";
  1042. if ((value % 10) == 1)
  1043. return "0MA";
  1044. break;
  1045. case 5: // lang=bs,hr,sr
  1046. if (teens == 0) {
  1047. if ((value % 10) == 1)
  1048. return "1M";
  1049. if (((value % 10) >= 2) && ((value % 10) <= 4))
  1050. return "0MA";
  1051. }
  1052. break;
  1053. }
  1054. return "0M";
  1055. }
  1056. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  1057. {
  1058. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  1059. int found;
  1060. int found_value = 0;
  1061. char string[12];
  1062. char ph_of[12];
  1063. char ph_thousands[40];
  1064. char ph_buf[40];
  1065. ph_of[0] = 0;
  1066. // first look for a match with the exact value of thousands
  1067. if (value > 0) {
  1068. if (thousands_exact & 1) {
  1069. if (thousands_exact & 2) {
  1070. // ordinal number
  1071. sprintf(string, "_%dM%do", value, thousandplex);
  1072. found_value = Lookup(tr, string, ph_thousands);
  1073. }
  1074. if (!found_value && (number_control & 1)) {
  1075. // look for the 'e' variant
  1076. sprintf(string, "_%dM%de", value, thousandplex);
  1077. found_value = Lookup(tr, string, ph_thousands);
  1078. }
  1079. if (!found_value) {
  1080. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  1081. sprintf(string, "_%dM%dx", value, thousandplex);
  1082. found_value = Lookup(tr, string, ph_thousands);
  1083. }
  1084. }
  1085. if (found_value == 0) {
  1086. sprintf(string, "_%dM%d", value, thousandplex);
  1087. found_value = Lookup(tr, string, ph_thousands);
  1088. }
  1089. }
  1090. if (found_value == 0) {
  1091. if ((value % 100) >= 20)
  1092. Lookup(tr, "_0of", ph_of);
  1093. found = 0;
  1094. if (thousands_exact & 1) {
  1095. if (thousands_exact & 2) {
  1096. // ordinal number
  1097. sprintf(string, "_%s%do", M_Variant(value), thousandplex);
  1098. found = Lookup(tr, string, ph_thousands);
  1099. }
  1100. if (!found && (number_control & 1)) {
  1101. // look for the 'e' variant
  1102. sprintf(string, "_%s%de", M_Variant(value), thousandplex);
  1103. found = Lookup(tr, string, ph_thousands);
  1104. }
  1105. if (!found) {
  1106. // is there a different pronunciation if there are no hundreds,tens,or units ?
  1107. sprintf(string, "_%s%dx", M_Variant(value), thousandplex);
  1108. found = Lookup(tr, string, ph_thousands);
  1109. }
  1110. }
  1111. if (found == 0) {
  1112. sprintf(string, "_%s%d", M_Variant(value), thousandplex);
  1113. if (Lookup(tr, string, ph_thousands) == 0) {
  1114. if (thousandplex > 3) {
  1115. sprintf(string, "_0M%d", thousandplex-1);
  1116. if (Lookup(tr, string, ph_buf) == 0) {
  1117. // say "millions" if this name is not available and neither is the next lower
  1118. Lookup(tr, "_0M2", ph_thousands);
  1119. speak_missing_thousands = 3;
  1120. }
  1121. }
  1122. if (ph_thousands[0] == 0) {
  1123. // repeat "thousand" if higher order names are not available
  1124. sprintf(string, "_%dM1", value);
  1125. if ((found_value = Lookup(tr, string, ph_thousands)) == 0)
  1126. Lookup(tr, "_0M1", ph_thousands);
  1127. speak_missing_thousands = 2;
  1128. }
  1129. }
  1130. }
  1131. }
  1132. sprintf(ph_out, "%s%s", ph_of, ph_thousands);
  1133. if ((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  1134. return 1;
  1135. return found_value;
  1136. }
  1137. static int LookupNum2(Translator *tr, int value, int thousandplex, const int control, char *ph_out)
  1138. {
  1139. // Lookup a 2 digit number
  1140. // control bit 0: ordinal number
  1141. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  1142. // control bit 2: tens and units only, no higher digits
  1143. // control bit 3: use feminine form of '2' (for thousands
  1144. // control bit 4: speak zero tens
  1145. // control bit 5: variant of ordinal number (lang=hu)
  1146. // bit 8 followed by decimal fraction
  1147. // bit 9: use #f form for both tens and units (lang=ml)
  1148. int found;
  1149. int ix;
  1150. int units;
  1151. int tens;
  1152. int is_ordinal;
  1153. int used_and = 0;
  1154. int found_ordinal = 0;
  1155. int next_phtype;
  1156. int ord_type = 'o';
  1157. char string[12]; // for looking up entries in *_list
  1158. char ph_ordinal[20];
  1159. char ph_tens[50];
  1160. char ph_digits[50];
  1161. char ph_and[12];
  1162. units = value % 10;
  1163. tens = value / 10;
  1164. found = 0;
  1165. ph_ordinal[0] = 0;
  1166. ph_tens[0] = 0;
  1167. ph_digits[0] = 0;
  1168. ph_and[0] = 0;
  1169. if (control & 0x20)
  1170. ord_type = 'q';
  1171. is_ordinal = control & 1;
  1172. if ((control & 2) && (n_digit_lookup == 2)) {
  1173. // pronunciation of the final 2 digits has already been found
  1174. strcpy(ph_out, digit_lookup);
  1175. } else {
  1176. if (digit_lookup[0] == 0) {
  1177. // is there a special pronunciation for this 2-digit number
  1178. if (control & 8) {
  1179. // is there a feminine or thousands-variant form?
  1180. sprintf(string, "_%dfx", value);
  1181. if ((found = Lookup(tr, string, ph_digits)) == 0) {
  1182. sprintf(string, "_%df", value);
  1183. found = Lookup(tr, string, ph_digits);
  1184. }
  1185. } else if (is_ordinal) {
  1186. strcpy(ph_ordinal, ph_ordinal2);
  1187. if (control & 4) {
  1188. sprintf(string, "_%d%cx", value, ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  1189. if ((found = Lookup(tr, string, ph_digits)) != 0) {
  1190. if (ph_ordinal2x[0] != 0)
  1191. strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
  1192. }
  1193. }
  1194. if (found == 0) {
  1195. sprintf(string, "_%d%c", value, ord_type);
  1196. found = Lookup(tr, string, ph_digits);
  1197. }
  1198. found_ordinal = found;
  1199. }
  1200. if (found == 0) {
  1201. if (control & 2) {
  1202. // the final tens and units of a number
  1203. if (number_control & 1) {
  1204. // look for 'e' variant
  1205. sprintf(string, "_%de", value);
  1206. found = Lookup(tr, string, ph_digits);
  1207. }
  1208. } else {
  1209. // followed by hundreds or thousands etc
  1210. if ((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1211. sprintf(string, "_%do", value); // LANG=TA
  1212. else
  1213. sprintf(string, "_%da", value);
  1214. found = Lookup(tr, string, ph_digits);
  1215. }
  1216. if (!found) {
  1217. if ((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS)) {
  1218. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  1219. } else {
  1220. sprintf(string, "_%d", value);
  1221. found = Lookup(tr, string, ph_digits);
  1222. }
  1223. }
  1224. }
  1225. }
  1226. // no, speak as tens+units
  1227. if ((value < 10) && (control & 0x10)) {
  1228. // speak leading zero
  1229. Lookup(tr, "_0", ph_tens);
  1230. } else {
  1231. if (found)
  1232. ph_tens[0] = 0;
  1233. else {
  1234. if (is_ordinal) {
  1235. sprintf(string, "_%dX%c", tens, ord_type);
  1236. if (Lookup(tr, string, ph_tens) != 0) {
  1237. found_ordinal = 1;
  1238. if ((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)) {
  1239. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1240. strcat(ph_tens, ph_ordinal2);
  1241. }
  1242. }
  1243. }
  1244. if (found_ordinal == 0) {
  1245. if (control & 0x200)
  1246. sprintf(string, "_%dXf", tens);
  1247. else
  1248. sprintf(string, "_%dX", tens);
  1249. Lookup(tr, string, ph_tens);
  1250. }
  1251. if ((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL)) {
  1252. // tens not found, (for example) 73 is 60+13
  1253. units = (value % 20);
  1254. sprintf(string, "_%dX", tens & 0xfe);
  1255. Lookup(tr, string, ph_tens);
  1256. }
  1257. ph_digits[0] = 0;
  1258. if (units > 0) {
  1259. found = 0;
  1260. if ((control & 2) && (digit_lookup[0] != 0)) {
  1261. // we have an entry for this digit (possibly together with the next word)
  1262. strcpy(ph_digits, digit_lookup);
  1263. found_ordinal = 1;
  1264. ph_ordinal[0] = 0;
  1265. } else {
  1266. if (control & 8) {
  1267. // is there a variant form of this number?
  1268. sprintf(string, "_%df", units);
  1269. found = Lookup(tr, string, ph_digits);
  1270. }
  1271. if ((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0)) {
  1272. // ordinal
  1273. sprintf(string, "_%d%c", units, ord_type);
  1274. if ((found = Lookup(tr, string, ph_digits)) != 0)
  1275. found_ordinal = 1;
  1276. }
  1277. if (found == 0) {
  1278. if ((number_control & 1) && (control & 2)) {
  1279. // look for 'e' variant
  1280. sprintf(string, "_%de", units);
  1281. found = Lookup(tr, string, ph_digits);
  1282. } else if (((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0)) {
  1283. // followed by hundreds or thousands (or tens)
  1284. if ((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1285. sprintf(string, "_%do", units); // LANG=TA, only for 100s, 1000s
  1286. else
  1287. sprintf(string, "_%da", units);
  1288. found = Lookup(tr, string, ph_digits);
  1289. }
  1290. }
  1291. if (found == 0) {
  1292. sprintf(string, "_%d", units);
  1293. Lookup(tr, string, ph_digits);
  1294. }
  1295. }
  1296. }
  1297. }
  1298. }
  1299. if ((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) {
  1300. if ((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1301. Lookup(tr, "_ord20", ph_ordinal);
  1302. if (ph_ordinal[0] == 0)
  1303. Lookup(tr, "_ord", ph_ordinal);
  1304. }
  1305. if ((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) {
  1306. Lookup(tr, "_0and", ph_and);
  1307. if ((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
  1308. ph_and[0] = 0;
  1309. if (tr->langopts.numbers & NUM_SWAP_TENS)
  1310. sprintf(ph_out, "%s%s%s%s", ph_digits, ph_and, ph_tens, ph_ordinal);
  1311. else
  1312. sprintf(ph_out, "%s%s%s%s", ph_tens, ph_and, ph_digits, ph_ordinal);
  1313. used_and = 1;
  1314. } else {
  1315. if (tr->langopts.numbers & NUM_SINGLE_VOWEL) {
  1316. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1317. if (((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) {
  1318. if ((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1319. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1320. if ((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1321. ph_tens[ix] = 0;
  1322. }
  1323. }
  1324. if ((tr->langopts.numbers2 & NUM2_ORDINAL_DROP_VOWEL) && (ph_ordinal[0] != 0)) {
  1325. ix = sprintf(ph_out, "%s%s", ph_tens, ph_digits);
  1326. if ((ix > 0) && (phoneme_tab[(unsigned char)(ph_out[ix-1])]->type == phVOWEL))
  1327. ix--;
  1328. sprintf(&ph_out[ix], "%s", ph_ordinal);
  1329. } else
  1330. sprintf(ph_out, "%s%s%s", ph_tens, ph_digits, ph_ordinal);
  1331. }
  1332. }
  1333. if (tr->langopts.numbers & NUM_SINGLE_STRESS_L) {
  1334. // only one primary stress, on the first part (tens)
  1335. found = 0;
  1336. for (ix = 0; ix < (signed)strlen(ph_out); ix++) {
  1337. if (ph_out[ix] == phonSTRESS_P) {
  1338. if (found)
  1339. ph_out[ix] = phonSTRESS_3;
  1340. else
  1341. found = 1;
  1342. }
  1343. }
  1344. } else if (tr->langopts.numbers & NUM_SINGLE_STRESS) {
  1345. // only one primary stress
  1346. found = 0;
  1347. for (ix = strlen(ph_out)-1; ix >= 0; ix--) {
  1348. if (ph_out[ix] == phonSTRESS_P) {
  1349. if (found)
  1350. ph_out[ix] = phonSTRESS_3;
  1351. else
  1352. found = 1;
  1353. }
  1354. }
  1355. }
  1356. return used_and;
  1357. }
  1358. static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null, int thousandplex, int control)
  1359. {
  1360. // Translate a 3 digit number
  1361. // control bit 0, previous thousands
  1362. // bit 1, ordinal number
  1363. // bit 5 variant form of ordinal number
  1364. // bit 8 followed by decimal fraction
  1365. int found;
  1366. int hundreds;
  1367. int tensunits;
  1368. int x;
  1369. int ix;
  1370. int exact;
  1371. int ordinal;
  1372. int tplex;
  1373. int say_zero_hundred = 0;
  1374. int say_one_hundred;
  1375. char string[12]; // for looking up entries in **_list
  1376. char buf1[100];
  1377. char buf2[100];
  1378. char ph_100[20];
  1379. char ph_10T[20];
  1380. char ph_digits[50];
  1381. char ph_thousands[50];
  1382. char ph_hundred_and[12];
  1383. char ph_thousand_and[12];
  1384. ordinal = control & 0x22;
  1385. hundreds = value / 100;
  1386. tensunits = value % 100;
  1387. buf1[0] = 0;
  1388. ph_thousands[0] = 0;
  1389. ph_thousand_and[0] = 0;
  1390. if ((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1391. say_zero_hundred = 1; // lang=vi
  1392. if ((hundreds > 0) || say_zero_hundred) {
  1393. found = 0;
  1394. if (ordinal && (tensunits == 0)) {
  1395. // ordinal number, with no tens or units
  1396. found = Lookup(tr, "_0Co", ph_100);
  1397. }
  1398. if (found == 0) {
  1399. if (tensunits == 0) {
  1400. // special form for exact hundreds?
  1401. found = Lookup(tr, "_0C0", ph_100);
  1402. }
  1403. if (!found)
  1404. Lookup(tr, "_0C", ph_100);
  1405. }
  1406. if (((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19)) {
  1407. // speak numbers such as 1984 as years: nineteen-eighty-four
  1408. } else if (hundreds >= 10) {
  1409. ph_digits[0] = 0;
  1410. exact = 0;
  1411. if ((value % 1000) == 0)
  1412. exact = 1;
  1413. tplex = thousandplex+1;
  1414. if (tr->langopts.numbers2 & NUM2_MYRIADS)
  1415. tplex = 0;
  1416. if (LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0) {
  1417. x = 0;
  1418. if (tr->langopts.numbers2 & (1 << tplex))
  1419. x = 8; // use variant (feminine) for before thousands and millions
  1420. if (tr->translator_name == L('m', 'l'))
  1421. x = 0x208;
  1422. LookupNum2(tr, hundreds/10, thousandplex, x, ph_digits);
  1423. }
  1424. if (tr->langopts.numbers2 & 0x200)
  1425. sprintf(ph_thousands, "%s%c%s%c", ph_10T, phonEND_WORD, ph_digits, phonEND_WORD); // say "thousands" before its number, not after
  1426. else
  1427. sprintf(ph_thousands, "%s%c%s%c", ph_digits, phonEND_WORD, ph_10T, phonEND_WORD);
  1428. hundreds %= 10;
  1429. if ((hundreds == 0) && (say_zero_hundred == 0))
  1430. ph_100[0] = 0;
  1431. suppress_null = 1;
  1432. control |= 1;
  1433. }
  1434. ph_digits[0] = 0;
  1435. if ((hundreds > 0) || say_zero_hundred) {
  1436. if ((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1437. Lookup(tr, "_0and", ph_thousand_and);
  1438. suppress_null = 1;
  1439. found = 0;
  1440. if ((ordinal)
  1441. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))) {
  1442. // ordinal number
  1443. sprintf(string, "_%dCo", hundreds);
  1444. found = Lookup(tr, string, ph_digits);
  1445. if ((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0)) {
  1446. // Use ordinal form of hundreds, as well as for tens and units
  1447. // Add ordinal suffix to the hundreds
  1448. strcat(ph_digits, ph_ordinal2);
  1449. }
  1450. }
  1451. if ((hundreds == 0) && say_zero_hundred)
  1452. Lookup(tr, "_0", ph_digits);
  1453. else {
  1454. if ((hundreds == 1) && (tr->langopts.numbers2 & NUM2_OMIT_1_HUNDRED_ONLY) && ((control & 1) == 0)) {
  1455. // only look for special 100 if there are previous thousands
  1456. } else {
  1457. if ((!found) && (tensunits == 0)) {
  1458. // is there a special pronunciation for exactly n00 ?
  1459. sprintf(string, "_%dC0", hundreds);
  1460. found = Lookup(tr, string, ph_digits);
  1461. }
  1462. if (!found) {
  1463. sprintf(string, "_%dC", hundreds);
  1464. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1465. }
  1466. }
  1467. if (found)
  1468. ph_100[0] = 0;
  1469. else {
  1470. say_one_hundred = 1;
  1471. if (hundreds == 1) {
  1472. if ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) != 0)
  1473. say_one_hundred = 0;
  1474. }
  1475. if (say_one_hundred != 0)
  1476. LookupNum2(tr, hundreds, thousandplex, 0, ph_digits);
  1477. }
  1478. }
  1479. }
  1480. sprintf(buf1, "%s%s%s%s", ph_thousands, ph_thousand_and, ph_digits, ph_100);
  1481. }
  1482. ph_hundred_and[0] = 0;
  1483. if (tensunits > 0) {
  1484. if ((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)) {
  1485. // Don't use "and" if we apply ordinal to both hundreds and units
  1486. } else {
  1487. if ((value > 100) || ((control & 1) && (thousandplex == 0))) {
  1488. if ((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1489. Lookup(tr, "_0and", ph_hundred_and);
  1490. }
  1491. if ((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1492. Lookup(tr, "_0and", ph_hundred_and);
  1493. }
  1494. }
  1495. buf2[0] = 0;
  1496. if ((tensunits != 0) || (suppress_null == 0)) {
  1497. x = 0;
  1498. if (thousandplex == 0) {
  1499. x = 2; // allow "eins" for 1 rather than "ein"
  1500. if (ordinal)
  1501. x = 3; // ordinal number
  1502. if ((value < 100) && !(control & 1))
  1503. x |= 4; // tens and units only, no higher digits
  1504. if (ordinal & 0x20)
  1505. x |= 0x20; // variant form of ordinal number
  1506. } else if (tr->langopts.numbers2 & (1 << thousandplex))
  1507. x = 8; // use variant (feminine) for before thousands and millions
  1508. if ((tr->translator_name == L('m', 'l')) && (thousandplex == 1))
  1509. x |= 0x208; // use #f form for both tens and units
  1510. if ((tr->langopts.numbers2 & NUM2_ZERO_TENS) && ((control & 1) || (hundreds > 0))) {
  1511. // LANG=zh,
  1512. x |= 0x10;
  1513. }
  1514. if (LookupNum2(tr, tensunits, thousandplex, x | (control & 0x100), buf2) != 0) {
  1515. if (tr->langopts.numbers & NUM_SINGLE_AND)
  1516. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1517. }
  1518. } else {
  1519. if (ph_ordinal2[0] != 0) {
  1520. ix = strlen(buf1);
  1521. if ((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1522. buf1[ix-1] = 0; // remove pause before addding ordinal suffix
  1523. strcpy(buf2, ph_ordinal2);
  1524. }
  1525. }
  1526. sprintf(ph_out, "%s%s%c%s", buf1, ph_hundred_and, phonEND_WORD, buf2);
  1527. return 0;
  1528. }
  1529. bool CheckThousandsGroup(char *word, int group_len)
  1530. {
  1531. // Is this a group of 3 digits which looks like a thousands group?
  1532. int ix;
  1533. if (IsDigit09(word[group_len]) || IsDigit09(-1))
  1534. return false;
  1535. for (ix = 0; ix < group_len; ix++) {
  1536. if (!IsDigit09(word[ix]))
  1537. return false;
  1538. }
  1539. return true;
  1540. }
  1541. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1542. {
  1543. // Number translation with various options
  1544. // the "word" may be up to 4 digits
  1545. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1546. int n_digits;
  1547. int value;
  1548. int ix;
  1549. int digix;
  1550. unsigned char c;
  1551. int suppress_null = 0;
  1552. int decimal_point = 0;
  1553. int thousandplex = 0;
  1554. int thousands_exact = 1;
  1555. int thousands_inc = 0;
  1556. int prev_thousands = 0;
  1557. int ordinal = 0;
  1558. int this_value;
  1559. int decimal_count;
  1560. int max_decimal_count;
  1561. int decimal_mode;
  1562. int suffix_ix;
  1563. int skipwords = 0;
  1564. int group_len;
  1565. int len;
  1566. char *p;
  1567. char string[32]; // for looking up entries in **_list
  1568. char buf1[100];
  1569. char ph_append[50];
  1570. char ph_buf[200];
  1571. char ph_buf2[50];
  1572. char ph_zeros[50];
  1573. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1574. char buf_digit_lookup[50];
  1575. static const char str_pause[2] = { phonPAUSE_NOLINK, 0 };
  1576. *flags = 0;
  1577. n_digit_lookup = 0;
  1578. buf_digit_lookup[0] = 0;
  1579. digit_lookup = buf_digit_lookup;
  1580. number_control = control;
  1581. for (ix = 0; IsDigit09(word[ix]); ix++) ;
  1582. n_digits = ix;
  1583. value = this_value = atoi(word);
  1584. group_len = 3;
  1585. if (tr->langopts.numbers2 & NUM2_MYRIADS)
  1586. group_len = 4;
  1587. // is there a previous thousands part (as a previous "word") ?
  1588. if ((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
  1589. prev_thousands = 1;
  1590. else if ((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE)) {
  1591. // thousands groups can be separated by spaces
  1592. if ((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
  1593. prev_thousands = 1;
  1594. }
  1595. if (prev_thousands == 0)
  1596. speak_missing_thousands = 0;
  1597. ph_ordinal2[0] = 0;
  1598. ph_zeros[0] = 0;
  1599. if (prev_thousands || (word[0] != '0')) {
  1600. // don't check for ordinal if the number has a leading zero
  1601. ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0);
  1602. }
  1603. if ((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE)) {
  1604. // remove dot unless followed by another number
  1605. word[ix] = 0;
  1606. }
  1607. if ((ordinal == 0) || (tr->translator_name == L('h', 'u'))) {
  1608. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1609. // look for an ordinal number suffix after the number
  1610. ix++;
  1611. p = suffix;
  1612. if (wtab[0].flags & FLAG_HYPHEN_AFTER) {
  1613. *p++ = '-';
  1614. ix++;
  1615. }
  1616. while ((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1617. *p++ = word[ix++];
  1618. *p = 0;
  1619. if (suffix[0] != 0) {
  1620. if ((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1621. ordinal = 2;
  1622. else if (!IsDigit09(suffix[0])) { // not _#9 (tab)
  1623. sprintf(string, "_#%s", suffix);
  1624. if (Lookup(tr, string, ph_ordinal2)) {
  1625. // this is an ordinal suffix
  1626. ordinal = 2;
  1627. flags[0] |= FLAG_SKIPWORDS;
  1628. skipwords = 1;
  1629. sprintf(string, "_x#%s", suffix);
  1630. Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
  1631. }
  1632. }
  1633. }
  1634. }
  1635. if (wtab[0].flags & FLAG_ORDINAL)
  1636. ordinal = 2;
  1637. ph_append[0] = 0;
  1638. ph_buf2[0] = 0;
  1639. if ((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep)) {
  1640. if ((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7])) {
  1641. // looks like a time 02:30, omit the leading zero
  1642. } else {
  1643. if (n_digits > 3) {
  1644. flags[0] &= ~FLAG_SKIPWORDS;
  1645. return 0; // long number string with leading zero, speak as individual digits
  1646. }
  1647. // speak leading zeros
  1648. for (ix = 0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1649. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1650. }
  1651. }
  1652. if ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1653. thousands_inc = 1;
  1654. else if (word[n_digits] == tr->langopts.thousands_sep)
  1655. thousands_inc = 2;
  1656. suffix_ix = n_digits+2;
  1657. if (thousands_inc > 0) {
  1658. // if the following "words" are three-digit groups, count them and add
  1659. // a "thousand"/"million" suffix to this one
  1660. digix = n_digits + thousands_inc;
  1661. while (((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len)) {
  1662. for (ix = 0; ix < group_len; ix++) {
  1663. if (word[digix+ix] != '0') {
  1664. thousands_exact = 0;
  1665. break;
  1666. }
  1667. }
  1668. thousandplex++;
  1669. digix += group_len;
  1670. if ((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' '))) {
  1671. suffix_ix = digix+2;
  1672. digix += thousands_inc;
  1673. } else
  1674. break;
  1675. }
  1676. }
  1677. if ((value == 0) && prev_thousands)
  1678. suppress_null = 1;
  1679. if (tr->translator_name == L('h', 'u')) {
  1680. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1681. if ((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact == 1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1682. number_control |= 1; // use _1e variant of number
  1683. }
  1684. if ((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1])) {
  1685. // this "word" ends with a decimal point
  1686. Lookup(tr, "_dpt", ph_append);
  1687. decimal_point = 0x100;
  1688. } else if (suppress_null == 0) {
  1689. if (thousands_inc > 0) {
  1690. if (thousandplex > 0) {
  1691. if ((suppress_null == 0) && (LookupThousands(tr, value, thousandplex, thousands_exact, ph_append))) {
  1692. // found an exact match for N thousand
  1693. value = 0;
  1694. suppress_null = 1;
  1695. }
  1696. }
  1697. }
  1698. } else if (speak_missing_thousands == 1) {
  1699. // speak this thousandplex if there was no word for the previous thousandplex
  1700. sprintf(string, "_0M%d", thousandplex+1);
  1701. if (Lookup(tr, string, buf1) == 0) {
  1702. sprintf(string, "_0M%d", thousandplex);
  1703. Lookup(tr, string, ph_append);
  1704. }
  1705. }
  1706. if ((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1707. Lookup(tr, "_.", ph_append);
  1708. if (thousandplex == 0) {
  1709. char *p2;
  1710. // look for combinations of the number with the next word
  1711. p = word;
  1712. while (IsDigit09(p[1])) p++; // just use the last digit
  1713. if (IsDigit09(p[-1])) {
  1714. p2 = p - 1;
  1715. if (LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  1716. n_digit_lookup = 2;
  1717. }
  1718. if ((buf_digit_lookup[0] == 0) && (*p != '0')) {
  1719. // LANG=hu ?
  1720. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  1721. if (LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  1722. n_digit_lookup = 1;
  1723. }
  1724. if (prev_thousands == 0) {
  1725. if ((decimal_point == 0) && (ordinal == 0)) {
  1726. // Look for special pronunciation for this number in isolation (LANG=kl)
  1727. sprintf(string, "_%dn", value);
  1728. if (Lookup(tr, string, ph_out))
  1729. return 1;
  1730. }
  1731. if (tr->langopts.numbers2 & NUM2_PERCENT_BEFORE) {
  1732. // LANG=si, say "percent" before the number
  1733. p2 = word;
  1734. while ((*p2 != ' ') && (*p2 != 0))
  1735. p2++;
  1736. if (p2[1] == '%') {
  1737. Lookup(tr, "%", ph_out);
  1738. ph_out += strlen(ph_out);
  1739. p2[1] = ' ';
  1740. }
  1741. }
  1742. }
  1743. }
  1744. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  1745. if ((thousandplex > 0) && (tr->langopts.numbers2 & 0x200))
  1746. sprintf(ph_out, "%s%s%c%s%s", ph_zeros, ph_append, phonEND_WORD, ph_buf2, ph_buf); // say "thousands" before its number
  1747. else
  1748. sprintf(ph_out, "%s%s%s%c%s", ph_zeros, ph_buf2, ph_buf, phonEND_WORD, ph_append);
  1749. while (decimal_point) {
  1750. n_digits++;
  1751. decimal_count = 0;
  1752. while (IsDigit09(word[n_digits+decimal_count]))
  1753. decimal_count++;
  1754. max_decimal_count = 2;
  1755. switch (decimal_mode = (tr->langopts.numbers & 0xe000))
  1756. {
  1757. case NUM_DFRACTION_4:
  1758. max_decimal_count = 5;
  1759. // fallthrough:
  1760. case NUM_DFRACTION_2:
  1761. // French/Polish decimal fraction
  1762. while (word[n_digits] == '0') {
  1763. Lookup(tr, "_0", buf1);
  1764. strcat(ph_out, buf1);
  1765. decimal_count--;
  1766. n_digits++;
  1767. }
  1768. if ((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits])) {
  1769. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0, 0, 0);
  1770. strcat(ph_out, buf1);
  1771. n_digits += decimal_count;
  1772. }
  1773. break;
  1774. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  1775. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  1776. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  1777. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0, 0, 0);
  1778. if ((word[n_digits] == '0') || (decimal_mode != NUM_DFRACTION_1)) {
  1779. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  1780. sprintf(string, "_0Z%d", decimal_count);
  1781. if (Lookup(tr, string, buf1) == 0)
  1782. break; // revert to speaking single digits
  1783. if (decimal_mode == NUM_DFRACTION_6)
  1784. strcat(ph_out, buf1);
  1785. else
  1786. strcat(ph_buf, buf1);
  1787. }
  1788. strcat(ph_out, ph_buf);
  1789. n_digits += decimal_count;
  1790. break;
  1791. case NUM_DFRACTION_3:
  1792. // Romanian decimal fractions
  1793. if ((decimal_count <= 4) && (word[n_digits] != '0')) {
  1794. LookupNum3(tr, atoi(&word[n_digits]), buf1, 0, 0, 0);
  1795. strcat(ph_out, buf1);
  1796. n_digits += decimal_count;
  1797. }
  1798. break;
  1799. case NUM_DFRACTION_7:
  1800. // alternative form of decimal fraction digits, except the final digit
  1801. while (decimal_count-- > 1) {
  1802. sprintf(string, "_%cd", word[n_digits]);
  1803. if (Lookup(tr, string, buf1) == 0)
  1804. break;
  1805. n_digits++;
  1806. strcat(ph_out, buf1);
  1807. }
  1808. }
  1809. while (IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10))) {
  1810. // speak any remaining decimal fraction digits individually
  1811. value = word[n_digits++] - '0';
  1812. LookupNum2(tr, value, 0, 2, buf1);
  1813. len = strlen(ph_out);
  1814. sprintf(&ph_out[len], "%c%s", phonEND_WORD, buf1);
  1815. }
  1816. // something after the decimal part ?
  1817. if (Lookup(tr, "_dpt2", buf1))
  1818. strcat(ph_out, buf1);
  1819. if ((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1])) {
  1820. Lookup(tr, "_dpt", buf1);
  1821. strcat(ph_out, buf1);
  1822. } else
  1823. decimal_point = 0;
  1824. }
  1825. if ((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) {
  1826. int next_char;
  1827. char *p;
  1828. p = &word[n_digits+1];
  1829. p += utf8_in(&next_char, p);
  1830. if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  1831. utf8_in(&next_char, p);
  1832. if (!iswalpha2(next_char) && (thousands_exact == 0))
  1833. strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc.
  1834. }
  1835. *flags |= FLAG_FOUND;
  1836. speak_missing_thousands--;
  1837. if (skipwords)
  1838. dictionary_skipwords = skipwords;
  1839. return 1;
  1840. }
  1841. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1842. {
  1843. if ((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  1844. return 0; // speak digits individually
  1845. if (tr->langopts.numbers != 0)
  1846. return TranslateNumber_1(tr, word1, ph_out, flags, wtab, control);
  1847. return 0;
  1848. }