eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.c 49KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824
  1. /*
  2. * Copyright (C) 2005 to 2015 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2016, 2020 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <ctype.h>
  21. #include <stdbool.h>
  22. #include <stdint.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <wctype.h>
  27. #include <errno.h>
  28. #include <limits.h>
  29. #include <espeak-ng/espeak_ng.h>
  30. #include <espeak-ng/speak_lib.h>
  31. #include <espeak-ng/encoding.h>
  32. #include "numbers.h"
  33. #include "common.h"
  34. #include "dictionary.h" // for Lookup, TranslateRules, EncodePhonemes, Look...
  35. #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP...
  36. #include "readclause.h" // for WordToString2
  37. #include "synthdata.h" // for SelectPhonemeTable
  38. #include "synthesize.h" // for phoneme_tab
  39. #include "translate.h" // for Translator, LANGUAGE_OPTIONS, WOR...
  40. #include "voice.h" // for voice, voice_t
  41. #define M_LIGATURE 0x8000
  42. #define M_NAME 0
  43. #define M_SMALLCAP 1
  44. #define M_TURNED 2
  45. #define M_REVERSED 3
  46. #define M_CURL 4
  47. #define M_ACUTE 5
  48. #define M_BREVE 6
  49. #define M_CARON 7
  50. #define M_CEDILLA 8
  51. #define M_CIRCUMFLEX 9
  52. #define M_DIAERESIS 10
  53. #define M_DOUBLE_ACUTE 11
  54. #define M_DOT_ABOVE 12
  55. #define M_GRAVE 13
  56. #define M_MACRON 14
  57. #define M_OGONEK 15
  58. #define M_RING 16
  59. #define M_STROKE 17
  60. #define M_TILDE 18
  61. #define M_BAR 19
  62. #define M_RETROFLEX 20
  63. #define M_HOOK 21
  64. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  65. #define M_IMPLOSIVE M_HOOK
  66. static int n_digit_lookup;
  67. static char *digit_lookup;
  68. static int speak_missing_thousands;
  69. static int number_control;
  70. typedef struct {
  71. const char *name;
  72. int accent_flags; // bit 0, say before the letter name
  73. } ACCENTS;
  74. // these are tokens to look up in the *_list file.
  75. static ACCENTS accents_tab[] = {
  76. { "_lig", 1 },
  77. { "_smc", 0 }, // smallcap
  78. { "_tur", 0 }, // turned
  79. { "_rev", 0 }, // reversed
  80. { "_crl", 0 }, // curl
  81. { "_acu", 0 }, // acute
  82. { "_brv", 0 }, // breve
  83. { "_hac", 0 }, // caron/hacek
  84. { "_ced", 0 }, // cedilla
  85. { "_cir", 0 }, // circumflex
  86. { "_dia", 0 }, // diaeresis
  87. { "_ac2", 0 }, // double acute
  88. { "_dot", 0 }, // dot
  89. { "_grv", 0 }, // grave
  90. { "_mcn", 0 }, // macron
  91. { "_ogo", 0 }, // ogonek
  92. { "_rng", 0 }, // ring
  93. { "_stk", 0 }, // stroke
  94. { "_tld", 0 }, // tilde
  95. { "_bar", 0 }, // bar
  96. { "_rfx", 0 }, // retroflex
  97. { "_hok", 0 }, // hook
  98. };
  99. #define CAPITAL 0
  100. #define LETTER(ch, mod1, mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  101. #define LIGATURE(ch1, ch2, mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+M_LIGATURE
  102. #define L_ALPHA 60 // U+3B1
  103. #define L_SCHWA 61 // U+259
  104. #define L_OPEN_E 62 // U+25B
  105. #define L_GAMMA 63 // U+3B3
  106. #define L_IOTA 64 // U+3B9
  107. #define L_PHI 67 // U+3C6
  108. #define L_ESH 68 // U+283
  109. #define L_UPSILON 69 // U+3C5
  110. #define L_EZH 70 // U+292
  111. #define L_GLOTTAL 71 // U+294
  112. #define L_RTAP 72 // U+27E
  113. #define L_RLONG 73 // U+27C
  114. static const short non_ascii_tab[] = {
  115. 0,
  116. 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  117. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e, 0x27c
  118. };
  119. // characters U+00e0 to U+017f
  120. static const unsigned short letter_accents_0e0[] = {
  121. LETTER('a', M_GRAVE, 0), // U+00e0
  122. LETTER('a', M_ACUTE, 0),
  123. LETTER('a', M_CIRCUMFLEX, 0),
  124. LETTER('a', M_TILDE, 0),
  125. LETTER('a', M_DIAERESIS, 0),
  126. LETTER('a', M_RING, 0),
  127. LIGATURE('a', 'e', 0),
  128. LETTER('c', M_CEDILLA, 0),
  129. LETTER('e', M_GRAVE, 0),
  130. LETTER('e', M_ACUTE, 0),
  131. LETTER('e', M_CIRCUMFLEX, 0),
  132. LETTER('e', M_DIAERESIS, 0),
  133. LETTER('i', M_GRAVE, 0),
  134. LETTER('i', M_ACUTE, 0),
  135. LETTER('i', M_CIRCUMFLEX, 0),
  136. LETTER('i', M_DIAERESIS, 0),
  137. LETTER('d', M_NAME, 0), // eth U+00f0
  138. LETTER('n', M_TILDE, 0),
  139. LETTER('o', M_GRAVE, 0),
  140. LETTER('o', M_ACUTE, 0),
  141. LETTER('o', M_CIRCUMFLEX, 0),
  142. LETTER('o', M_TILDE, 0),
  143. LETTER('o', M_DIAERESIS, 0),
  144. 0, // division sign
  145. LETTER('o', M_STROKE, 0),
  146. LETTER('u', M_GRAVE, 0),
  147. LETTER('u', M_ACUTE, 0),
  148. LETTER('u', M_CIRCUMFLEX, 0),
  149. LETTER('u', M_DIAERESIS, 0),
  150. LETTER('y', M_ACUTE, 0),
  151. LETTER('t', M_NAME, 0), // thorn
  152. LETTER('y', M_DIAERESIS, 0),
  153. CAPITAL, // U+0100
  154. LETTER('a', M_MACRON, 0),
  155. CAPITAL,
  156. LETTER('a', M_BREVE, 0),
  157. CAPITAL,
  158. LETTER('a', M_OGONEK, 0),
  159. CAPITAL,
  160. LETTER('c', M_ACUTE, 0),
  161. CAPITAL,
  162. LETTER('c', M_CIRCUMFLEX, 0),
  163. CAPITAL,
  164. LETTER('c', M_DOT_ABOVE, 0),
  165. CAPITAL,
  166. LETTER('c', M_CARON, 0),
  167. CAPITAL,
  168. LETTER('d', M_CARON, 0),
  169. CAPITAL, // U+0110
  170. LETTER('d', M_STROKE, 0),
  171. CAPITAL,
  172. LETTER('e', M_MACRON, 0),
  173. CAPITAL,
  174. LETTER('e', M_BREVE, 0),
  175. CAPITAL,
  176. LETTER('e', M_DOT_ABOVE, 0),
  177. CAPITAL,
  178. LETTER('e', M_OGONEK, 0),
  179. CAPITAL,
  180. LETTER('e', M_CARON, 0),
  181. CAPITAL,
  182. LETTER('g', M_CIRCUMFLEX, 0),
  183. CAPITAL,
  184. LETTER('g', M_BREVE, 0),
  185. CAPITAL, // U+0120
  186. LETTER('g', M_DOT_ABOVE, 0),
  187. CAPITAL,
  188. LETTER('g', M_CEDILLA, 0),
  189. CAPITAL,
  190. LETTER('h', M_CIRCUMFLEX, 0),
  191. CAPITAL,
  192. LETTER('h', M_STROKE, 0),
  193. CAPITAL,
  194. LETTER('i', M_TILDE, 0),
  195. CAPITAL,
  196. LETTER('i', M_MACRON, 0),
  197. CAPITAL,
  198. LETTER('i', M_BREVE, 0),
  199. CAPITAL,
  200. LETTER('i', M_OGONEK, 0),
  201. CAPITAL, // U+0130
  202. LETTER('i', M_NAME, 0), // dotless i
  203. CAPITAL,
  204. LIGATURE('i', 'j', 0),
  205. CAPITAL,
  206. LETTER('j', M_CIRCUMFLEX, 0),
  207. CAPITAL,
  208. LETTER('k', M_CEDILLA, 0),
  209. LETTER('k', M_NAME, 0), // kra
  210. CAPITAL,
  211. LETTER('l', M_ACUTE, 0),
  212. CAPITAL,
  213. LETTER('l', M_CEDILLA, 0),
  214. CAPITAL,
  215. LETTER('l', M_CARON, 0),
  216. CAPITAL,
  217. LETTER('l', M_MIDDLE_DOT, 0), // U+0140
  218. CAPITAL,
  219. LETTER('l', M_STROKE, 0),
  220. CAPITAL,
  221. LETTER('n', M_ACUTE, 0),
  222. CAPITAL,
  223. LETTER('n', M_CEDILLA, 0),
  224. CAPITAL,
  225. LETTER('n', M_CARON, 0),
  226. LETTER('n', M_NAME, 0), // apostrophe n
  227. CAPITAL,
  228. LETTER('n', M_NAME, 0), // eng
  229. CAPITAL,
  230. LETTER('o', M_MACRON, 0),
  231. CAPITAL,
  232. LETTER('o', M_BREVE, 0),
  233. CAPITAL, // U+0150
  234. LETTER('o', M_DOUBLE_ACUTE, 0),
  235. CAPITAL,
  236. LIGATURE('o', 'e', 0),
  237. CAPITAL,
  238. LETTER('r', M_ACUTE, 0),
  239. CAPITAL,
  240. LETTER('r', M_CEDILLA, 0),
  241. CAPITAL,
  242. LETTER('r', M_CARON, 0),
  243. CAPITAL,
  244. LETTER('s', M_ACUTE, 0),
  245. CAPITAL,
  246. LETTER('s', M_CIRCUMFLEX, 0),
  247. CAPITAL,
  248. LETTER('s', M_CEDILLA, 0),
  249. CAPITAL, // U+0160
  250. LETTER('s', M_CARON, 0),
  251. CAPITAL,
  252. LETTER('t', M_CEDILLA, 0),
  253. CAPITAL,
  254. LETTER('t', M_CARON, 0),
  255. CAPITAL,
  256. LETTER('t', M_STROKE, 0),
  257. CAPITAL,
  258. LETTER('u', M_TILDE, 0),
  259. CAPITAL,
  260. LETTER('u', M_MACRON, 0),
  261. CAPITAL,
  262. LETTER('u', M_BREVE, 0),
  263. CAPITAL,
  264. LETTER('u', M_RING, 0),
  265. CAPITAL, // U+0170
  266. LETTER('u', M_DOUBLE_ACUTE, 0),
  267. CAPITAL,
  268. LETTER('u', M_OGONEK, 0),
  269. CAPITAL,
  270. LETTER('w', M_CIRCUMFLEX, 0),
  271. CAPITAL,
  272. LETTER('y', M_CIRCUMFLEX, 0),
  273. CAPITAL, // Y-DIAERESIS
  274. CAPITAL,
  275. LETTER('z', M_ACUTE, 0),
  276. CAPITAL,
  277. LETTER('z', M_DOT_ABOVE, 0),
  278. CAPITAL,
  279. LETTER('z', M_CARON, 0),
  280. LETTER('s', M_NAME, 0), // long-s U+17f
  281. };
  282. // characters U+0250 to U+029F
  283. static const unsigned short letter_accents_250[] = {
  284. LETTER('a', M_TURNED, 0), // U+250
  285. LETTER(L_ALPHA, 0, 0),
  286. LETTER(L_ALPHA, M_TURNED, 0),
  287. LETTER('b', M_IMPLOSIVE, 0),
  288. 0, // open-o
  289. LETTER('c', M_CURL, 0),
  290. LETTER('d', M_RETROFLEX, 0),
  291. LETTER('d', M_IMPLOSIVE, 0),
  292. LETTER('e', M_REVERSED, 0), // U+258
  293. 0, // schwa
  294. LETTER(L_SCHWA, M_HOOK, 0),
  295. 0, // open-e
  296. LETTER(L_OPEN_E, M_REVERSED, 0),
  297. LETTER(L_OPEN_E, M_HOOK, M_REVERSED),
  298. 0,
  299. LETTER('j', M_BAR, 0),
  300. LETTER('g', M_IMPLOSIVE, 0), // U+260
  301. LETTER('g', 0, 0),
  302. LETTER('g', M_SMALLCAP, 0),
  303. LETTER(L_GAMMA, 0, 0),
  304. 0, // ramshorn
  305. LETTER('h', M_TURNED, 0),
  306. LETTER('h', M_HOOK, 0),
  307. 0,
  308. LETTER('i', M_BAR, 0), // U+268
  309. LETTER(L_IOTA, 0, 0),
  310. LETTER('i', M_SMALLCAP, 0),
  311. LETTER('l', M_TILDE, 0),
  312. LETTER('l', M_BAR, 0),
  313. LETTER('l', M_RETROFLEX, 0),
  314. LIGATURE('l', 'z', 0),
  315. LETTER('m', M_TURNED, 0),
  316. 0,
  317. LETTER('m', M_HOOK, 0),
  318. 0,
  319. LETTER('n', M_RETROFLEX, 0),
  320. LETTER('n', M_SMALLCAP, 0),
  321. LETTER('o', M_BAR, 0),
  322. LIGATURE('o', 'e', M_SMALLCAP),
  323. 0,
  324. LETTER(L_PHI, 0, 0), // U+278
  325. LETTER('r', M_TURNED, 0),
  326. LETTER(L_RLONG, M_TURNED, 0),
  327. LETTER('r', M_RETROFLEX, M_TURNED),
  328. 0,
  329. LETTER('r', M_RETROFLEX, 0),
  330. 0, // r-tap
  331. LETTER(L_RTAP, M_REVERSED, 0),
  332. LETTER('r', M_SMALLCAP, 0), // U+280
  333. LETTER('r', M_TURNED, M_SMALLCAP),
  334. LETTER('s', M_RETROFLEX, 0),
  335. 0, // esh
  336. LETTER('j', M_HOOK, 0),
  337. LETTER(L_ESH, M_REVERSED, 0),
  338. LETTER(L_ESH, M_CURL, 0),
  339. LETTER('t', M_TURNED, 0),
  340. LETTER('t', M_RETROFLEX, 0), // U+288
  341. LETTER('u', M_BAR, 0),
  342. LETTER(L_UPSILON, 0, 0),
  343. LETTER('v', M_HOOK, 0),
  344. LETTER('v', M_TURNED, 0),
  345. LETTER('w', M_TURNED, 0),
  346. LETTER('y', M_TURNED, 0),
  347. LETTER('y', M_SMALLCAP, 0),
  348. LETTER('z', M_RETROFLEX, 0), // U+290
  349. LETTER('z', M_CURL, 0),
  350. 0, // ezh
  351. LETTER(L_EZH, M_CURL, 0),
  352. 0, // glottal stop
  353. LETTER(L_GLOTTAL, M_REVERSED, 0),
  354. LETTER(L_GLOTTAL, M_TURNED, 0),
  355. 0,
  356. 0, // bilabial click U+298
  357. LETTER('b', M_SMALLCAP, 0),
  358. 0,
  359. LETTER('g', M_IMPLOSIVE, M_SMALLCAP),
  360. LETTER('h', M_SMALLCAP, 0),
  361. LETTER('j', M_CURL, 0),
  362. LETTER('k', M_TURNED, 0),
  363. LETTER('l', M_SMALLCAP, 0),
  364. LETTER('q', M_HOOK, 0), // U+2a0
  365. LETTER(L_GLOTTAL, M_STROKE, 0),
  366. LETTER(L_GLOTTAL, M_STROKE, M_REVERSED),
  367. LIGATURE('d', 'z', 0),
  368. 0, // dezh
  369. LIGATURE('d', 'z', M_CURL),
  370. LIGATURE('t', 's', 0),
  371. 0, // tesh
  372. LIGATURE('t', 's', M_CURL),
  373. };
  374. static int LookupLetter2(Translator *tr, unsigned int letter, char *ph_buf)
  375. {
  376. int len;
  377. char single_letter[10];
  378. single_letter[0] = 0;
  379. single_letter[1] = '_';
  380. len = utf8_out(letter, &single_letter[2]);
  381. single_letter[len+2] = ' ';
  382. single_letter[len+3] = 0;
  383. if (Lookup(tr, &single_letter[1], ph_buf) == 0) {
  384. single_letter[1] = ' ';
  385. if (Lookup(tr, &single_letter[2], ph_buf) == 0)
  386. TranslateRules(tr, &single_letter[2], ph_buf, 20, NULL, 0, NULL);
  387. }
  388. return ph_buf[0];
  389. }
  390. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf)
  391. {
  392. // lookup the character in the accents table
  393. int accent_data = 0;
  394. int accent1 = 0;
  395. int accent2 = 0;
  396. int flags1, flags2;
  397. int basic_letter;
  398. int letter2 = 0;
  399. char ph_letter1[30];
  400. char ph_letter2[30];
  401. char ph_accent1[30];
  402. char ph_accent2[30];
  403. ph_accent2[0] = 0;
  404. if ((letter >= 0xe0) && (letter < 0x17f))
  405. accent_data = letter_accents_0e0[letter - 0xe0];
  406. else if ((letter >= 0x250) && (letter <= 0x2a8))
  407. accent_data = letter_accents_250[letter - 0x250];
  408. if (accent_data != 0) {
  409. basic_letter = (accent_data & 0x3f) + 59;
  410. if (basic_letter < 'a')
  411. basic_letter = non_ascii_tab[basic_letter-59];
  412. if (accent_data & M_LIGATURE) {
  413. letter2 = (accent_data >> 6) & 0x3f;
  414. letter2 += 59;
  415. accent2 = (accent_data >> 12) & 0x7;
  416. } else {
  417. accent1 = (accent_data >> 6) & 0x1f;
  418. accent2 = (accent_data >> 11) & 0xf;
  419. }
  420. if ((accent1 == 0) && !(accent_data & M_LIGATURE)) {
  421. // just a letter name, not an accented character or ligature
  422. return;
  423. }
  424. if ((flags1 = Lookup(tr, accents_tab[accent1].name, ph_accent1)) != 0) {
  425. if (LookupLetter2(tr, basic_letter, ph_letter1) != 0) {
  426. if (accent2 != 0) {
  427. flags2 = Lookup(tr, accents_tab[accent2].name, ph_accent2);
  428. if (flags2 & FLAG_ACCENT_BEFORE) {
  429. strcpy(ph_buf, ph_accent2);
  430. ph_buf += strlen(ph_buf);
  431. ph_accent2[0] = 0;
  432. }
  433. }
  434. if (letter2 != 0) {
  435. // ligature
  436. LookupLetter2(tr, letter2, ph_letter2);
  437. sprintf(ph_buf, "%s%c%s%c%s%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  438. } else {
  439. if (accent1 == 0)
  440. strcpy(ph_buf, ph_letter1);
  441. else if ((tr->langopts.accents & 1) || (flags1 & FLAG_ACCENT_BEFORE) || (accents_tab[accent1].accent_flags & 1))
  442. sprintf(ph_buf, "%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  443. else
  444. sprintf(ph_buf, "%c%s%c%s%c", phonSTRESS_2, ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  445. }
  446. }
  447. }
  448. }
  449. }
  450. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf1, int control)
  451. {
  452. // control, bit 0: not the first letter of a word
  453. int len;
  454. char single_letter[10] = { 0, 0 };
  455. unsigned int dict_flags[2];
  456. char ph_buf3[40];
  457. ph_buf1[0] = 0;
  458. len = utf8_out(letter, &single_letter[2]);
  459. single_letter[len+2] = ' ';
  460. if (next_byte == -1) {
  461. // speaking normal text, not individual characters
  462. if (Lookup(tr, &single_letter[2], ph_buf1) != 0)
  463. return;
  464. single_letter[1] = '_';
  465. if (Lookup(tr, &single_letter[1], ph_buf3) != 0)
  466. return; // the character is specified as _* so ignore it when speaking normal text
  467. // check whether this character is specified for English
  468. if (tr->translator_name == L('e', 'n'))
  469. return; // we are already using English
  470. SetTranslator3(ESPEAKNG_DEFAULT_VOICE);
  471. if (Lookup(translator3, &single_letter[2], ph_buf3) != 0) {
  472. // yes, switch to English and re-translate the word
  473. sprintf(ph_buf1, "%c", phonSWITCH);
  474. }
  475. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  476. return;
  477. }
  478. if ((letter <= 32) || iswspace(letter)) {
  479. // lookup space as _&32 etc.
  480. sprintf(&single_letter[1], "_#%d ", letter);
  481. Lookup(tr, &single_letter[1], ph_buf1);
  482. return;
  483. }
  484. if (next_byte != ' ')
  485. next_byte = RULE_SPELLING;
  486. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-31
  487. single_letter[1] = '_';
  488. // if the $accent flag is set for this letter, use the accents table (below)
  489. dict_flags[1] = 0;
  490. if (Lookup(tr, &single_letter[1], ph_buf3) == 0) {
  491. single_letter[1] = ' ';
  492. if (Lookup(tr, &single_letter[2], ph_buf3) == 0)
  493. TranslateRules(tr, &single_letter[2], ph_buf3, sizeof(ph_buf3), NULL, FLAG_NO_TRACE, NULL);
  494. }
  495. if (ph_buf3[0] == 0)
  496. LookupAccentedLetter(tr, letter, ph_buf3);
  497. strcpy(ph_buf1, ph_buf3);
  498. if ((ph_buf1[0] == 0) || (ph_buf1[0] == phonSWITCH))
  499. return;
  500. dict_flags[0] = 0;
  501. dict_flags[1] = 0;
  502. SetWordStress(tr, ph_buf1, dict_flags, -1, control & 1);
  503. }
  504. #define L_SUB 0x4000 // subscript
  505. #define L_SUP 0x8000 // superscript
  506. // this list must be in ascending order
  507. static unsigned short derived_letters[] = {
  508. 0x00aa, 'a'+L_SUP,
  509. 0x00b2, '2'+L_SUP,
  510. 0x00b3, '3'+L_SUP,
  511. 0x00b9, '1'+L_SUP,
  512. 0x00ba, 'o'+L_SUP,
  513. 0x02b0, 'h'+L_SUP,
  514. 0x02b1, 0x266+L_SUP,
  515. 0x02b2, 'j'+L_SUP,
  516. 0x02b3, 'r'+L_SUP,
  517. 0x02b4, 0x279+L_SUP,
  518. 0x02b5, 0x27b+L_SUP,
  519. 0x02b6, 0x281+L_SUP,
  520. 0x02b7, 'w'+L_SUP,
  521. 0x02b8, 'y'+L_SUP,
  522. 0x02c0, 0x294+L_SUP,
  523. 0x02c1, 0x295+L_SUP,
  524. 0x02e0, 0x263+L_SUP,
  525. 0x02e1, 'l'+L_SUP,
  526. 0x02e2, 's'+L_SUP,
  527. 0x02e3, 'x'+L_SUP,
  528. 0x2070, '0'+L_SUP,
  529. 0x2071, 'i'+L_SUP,
  530. 0x2074, '4'+L_SUP,
  531. 0x2075, '5'+L_SUP,
  532. 0x2076, '6'+L_SUP,
  533. 0x2077, '7'+L_SUP,
  534. 0x2078, '8'+L_SUP,
  535. 0x2079, '9'+L_SUP,
  536. 0x207a, '+'+L_SUP,
  537. 0x207b, '-'+L_SUP,
  538. 0x207c, '='+L_SUP,
  539. 0x207d, '('+L_SUP,
  540. 0x207e, ')'+L_SUP,
  541. 0x207f, 'n'+L_SUP,
  542. 0x2080, '0'+L_SUB,
  543. 0x2081, '1'+L_SUB,
  544. 0x2082, '2'+L_SUB,
  545. 0x2083, '3'+L_SUB,
  546. 0x2084, '4'+L_SUB,
  547. 0x2085, '5'+L_SUB,
  548. 0x2086, '6'+L_SUB,
  549. 0x2087, '7'+L_SUB,
  550. 0x2088, '8'+L_SUB,
  551. 0x2089, '9'+L_SUB,
  552. 0x208a, '+'+L_SUB,
  553. 0x208b, '-'+L_SUB,
  554. 0x208c, '='+L_SUB,
  555. 0x208d, '('+L_SUB,
  556. 0x208e, ')'+L_SUB,
  557. 0x2090, 'a'+L_SUB,
  558. 0x2091, 'e'+L_SUB,
  559. 0x2092, 'o'+L_SUB,
  560. 0x2093, 'x'+L_SUB,
  561. 0x2094, 0x259+L_SUB,
  562. 0x2095, 'h'+L_SUB,
  563. 0x2096, 'k'+L_SUB,
  564. 0x2097, 'l'+L_SUB,
  565. 0x2098, 'm'+L_SUB,
  566. 0x2099, 'n'+L_SUB,
  567. 0x209a, 'p'+L_SUB,
  568. 0x209b, 's'+L_SUB,
  569. 0x209c, 't'+L_SUB,
  570. 0, 0
  571. };
  572. int IsSuperscript(int letter)
  573. {
  574. // is this a subscript or superscript letter ?
  575. int ix;
  576. int c;
  577. for (ix = 0; (c = derived_letters[ix]) != 0; ix += 2) {
  578. if (c > letter)
  579. break;
  580. if (c == letter)
  581. return derived_letters[ix+1];
  582. }
  583. return 0;
  584. }
  585. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars)
  586. {
  587. // Individual letter names, reduce the stress of some.
  588. int ix;
  589. unsigned int c;
  590. int n_stress = 0;
  591. int prev = 0;
  592. int count;
  593. unsigned char buf[N_WORD_PHONEMES];
  594. for (ix = 0; (c = phonemes[ix]) != 0; ix++) {
  595. if ((c == phonSTRESS_P) && (prev != phonSWITCH))
  596. n_stress++;
  597. buf[ix] = prev = c;
  598. }
  599. buf[ix] = 0;
  600. count = 0;
  601. prev = 0;
  602. for (ix = 0; (c = buf[ix]) != 0; ix++) {
  603. if ((c == phonSTRESS_P) && (n_chars > 1) && (prev != phonSWITCH)) {
  604. count++;
  605. if (tr->langopts.spelling_stress) {
  606. // stress on initial letter when spelling
  607. if (count > 1)
  608. c = phonSTRESS_3;
  609. } else {
  610. if (count != n_stress) {
  611. if (((count % 3) != 0) || (count == n_stress-1))
  612. c = phonSTRESS_3; // reduce to secondary stress
  613. }
  614. }
  615. } else if (c == 0xff) {
  616. if ((control < 2) || (ix == 0))
  617. continue; // don't insert pauses
  618. if (((count % 3) == 0) || (control > 2))
  619. c = phonPAUSE_NOLINK; // pause following a primary stress
  620. else
  621. c = phonPAUSE_VSHORT;
  622. }
  623. *phonemes++ = prev = c;
  624. }
  625. if (control >= 2)
  626. *phonemes++ = phonPAUSE_NOLINK;
  627. *phonemes = 0;
  628. }
  629. // Numbers
  630. static char ph_ordinal2[12];
  631. static char ph_ordinal2x[12];
  632. static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB *wtab, int roman)
  633. {
  634. int ordinal = 0;
  635. int c2;
  636. int nextflags;
  637. if ((tr->langopts.numbers & NUM_ORDINAL_DOT) && ((word_end[0] == '.') || (wtab[0].flags & FLAG_HAS_DOT)) && !(wtab[1].flags & FLAG_NOSPACE)) {
  638. if (roman || !(wtab[1].flags & FLAG_FIRST_UPPER)) {
  639. if (word_end[0] == '.')
  640. utf8_in(&c2, &word_end[2]);
  641. else
  642. utf8_in(&c2, &word_end[0]);
  643. if ((word_end[0] != 0) && (word_end[1] != 0) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || IsAlpha(c2))) {
  644. // ordinal number is indicated by dot after the number
  645. // but not if the next word starts with an upper-case letter
  646. // (c2 == 0) is for cases such as, "2.,"
  647. ordinal = 2;
  648. if (word_end[0] == '.')
  649. word_end[0] = ' ';
  650. if ((roman == 0) && (tr->translator_name == L('h', 'u'))) {
  651. // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
  652. nextflags = 0;
  653. if (IsAlpha(c2))
  654. nextflags = TranslateWord(tr, &word_end[2], NULL, NULL);
  655. if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
  656. ordinal = 0; // TEST 09.02.10
  657. if (nextflags & FLAG_ALT_TRANS)
  658. ordinal = 0;
  659. if (nextflags & FLAG_ALT3_TRANS) {
  660. if (word[-2] == '-')
  661. ordinal = 0; // e.g. december 2-5. között
  662. if (tr->prev_dict_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT3_TRANS))
  663. ordinal = 0x22;
  664. }
  665. }
  666. }
  667. }
  668. }
  669. return ordinal;
  670. }
  671. static int hu_number_e(const char *word, int thousandplex, int value)
  672. {
  673. // lang-hu: variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt, att. ett
  674. if ((word[0] == 'a') || (word[0] == 'e')) {
  675. if ((word[1] == ' ') || (word[1] == 'z') || ((word[1] == 't') && (word[2] == 't')))
  676. return 0;
  677. if (((thousandplex == 1) || ((value % 1000) == 0)) && (word[1] == 'l'))
  678. return 0; // 1000-el
  679. return 1;
  680. }
  681. return 0;
  682. }
  683. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab)
  684. {
  685. int c;
  686. char *p;
  687. const char *p2;
  688. int acc;
  689. int prev;
  690. int value;
  691. int subtract;
  692. int repeat = 0;
  693. int n_digits = 0;
  694. char *word_start;
  695. int num_control = 0;
  696. unsigned int flags[2];
  697. char ph_roman[30];
  698. char number_chars[N_WORD_BYTES];
  699. static const char *roman_numbers = "ixcmvld";
  700. static const int roman_values[] = { 1, 10, 100, 1000, 5, 50, 500 };
  701. acc = 0;
  702. prev = 0;
  703. subtract = 0x7fff;
  704. ph_out[0] = 0;
  705. flags[0] = 0;
  706. flags[1] = 0;
  707. if (((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && !(wtab[0].flags & FLAG_ALL_UPPER)) || IsDigit09(word[-2]))
  708. return 0; // not '2xx'
  709. if (word[1] == ' ') {
  710. if ((tr->langopts.numbers & (NUM_ROMAN_CAPITALS | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT)) && (wtab[0].flags & FLAG_HAS_DOT)) {
  711. // allow single letter Roman ordinal followed by dot.
  712. } else
  713. return 0; // only one letter, don't speak as a Roman Number
  714. }
  715. word_start = word;
  716. while ((c = *word++) != ' ') {
  717. if ((p2 = strchr(roman_numbers, c)) == NULL)
  718. return 0;
  719. value = roman_values[p2 - roman_numbers];
  720. if (value == prev) {
  721. repeat++;
  722. if (repeat >= 3)
  723. return 0;
  724. } else
  725. repeat = 0;
  726. if ((prev > 1) && (prev != 10) && (prev != 100)) {
  727. if (value >= prev)
  728. return 0;
  729. }
  730. if ((prev != 0) && (prev < value)) {
  731. if (((acc % 10) != 0) || ((prev*10) < value))
  732. return 0;
  733. subtract = prev;
  734. value -= subtract;
  735. } else if (value >= subtract)
  736. return 0;
  737. else
  738. acc += prev;
  739. prev = value;
  740. n_digits++;
  741. }
  742. if (IsDigit09(word[0]))
  743. return 0; // e.g. 'xx2'
  744. acc += prev;
  745. if (acc < tr->langopts.min_roman)
  746. return 0;
  747. if (acc > tr->langopts.max_roman)
  748. return 0;
  749. Lookup(tr, "_roman", ph_roman); // precede by "roman" if _rom is defined in *_list
  750. p = &ph_out[0];
  751. if ((tr->langopts.numbers & NUM_ROMAN_AFTER) == 0) {
  752. strcpy(ph_out, ph_roman);
  753. p = &ph_out[strlen(ph_roman)];
  754. }
  755. sprintf(number_chars, " %d %s ", acc, tr->langopts.roman_suffix);
  756. if (word[0] == '.') {
  757. // dot has not been removed. This implies that there was no space after it
  758. return 0;
  759. }
  760. if (CheckDotOrdinal(tr, word_start, word, wtab, 1))
  761. wtab[0].flags |= FLAG_ORDINAL;
  762. if (tr->langopts.numbers & NUM_ROMAN_ORDINAL) {
  763. if (tr->translator_name == L('h', 'u')) {
  764. if (!(wtab[0].flags & FLAG_ORDINAL)) {
  765. if ((wtab[0].flags & FLAG_HYPHEN_AFTER) && hu_number_e(word, 0, acc)) {
  766. // should use the 'e' form of the number
  767. num_control |= 1;
  768. } else
  769. return 0;
  770. }
  771. } else
  772. wtab[0].flags |= FLAG_ORDINAL;
  773. }
  774. tr->prev_dict_flags[0] = 0;
  775. tr->prev_dict_flags[1] = 0;
  776. TranslateNumber(tr, &number_chars[2], p, flags, wtab, num_control);
  777. if (tr->langopts.numbers & NUM_ROMAN_AFTER)
  778. strcat(ph_out, ph_roman);
  779. return 1;
  780. }
  781. static const char *M_Variant(int value)
  782. {
  783. // returns M, or perhaps MA or MB for some cases
  784. bool teens = false;
  785. if (((value % 100) > 10) && ((value % 100) < 20))
  786. teens = true;
  787. switch (translator->langopts.numbers2 & NUM2_THOUSANDS_VAR_BITS)
  788. {
  789. case NUM2_THOUSANDS_VAR1: // lang=ru
  790. if (teens == false) {
  791. if ((value % 10) == 1)
  792. return "1MA";
  793. if (((value % 10) >= 2) && ((value % 10) <= 4))
  794. return "0MA";
  795. }
  796. break;
  797. case NUM2_THOUSANDS_VAR2: // lang=cs,sk
  798. if ((value >= 2) && (value <= 4))
  799. return "0MA";
  800. break;
  801. case NUM2_THOUSANDS_VAR3: // lang=pl
  802. if ((teens == false) && (((value % 10) >= 2) && ((value % 10) <= 4)))
  803. return "0MA";
  804. break;
  805. case NUM2_THOUSANDS_VAR4: // lang=lt
  806. if ((teens == true) || ((value % 10) == 0))
  807. return "0MB";
  808. if ((value % 10) == 1)
  809. return "0MA";
  810. break;
  811. case NUM2_THOUSANDS_VAR5: // lang=bs,hr,sr
  812. if (teens == false) {
  813. if ((value % 10) == 1)
  814. return "1M";
  815. if (((value % 10) >= 2) && ((value % 10) <= 4))
  816. return "0MA";
  817. }
  818. break;
  819. }
  820. return "0M";
  821. }
  822. static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
  823. {
  824. // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
  825. int found;
  826. int found_value = 0;
  827. char string[14];
  828. char ph_of[12];
  829. char ph_thousands[40];
  830. char ph_buf[40];
  831. ph_of[0] = 0;
  832. // first look for a match with the exact value of thousands
  833. if (value > 0) {
  834. if (thousands_exact & 1) {
  835. if (thousands_exact & 2) {
  836. // ordinal number
  837. sprintf(string, "_%dM%do", value, thousandplex);
  838. found_value = Lookup(tr, string, ph_thousands);
  839. }
  840. if (!found_value && (number_control & 1)) {
  841. // look for the 'e' variant
  842. sprintf(string, "_%dM%de", value, thousandplex);
  843. found_value = Lookup(tr, string, ph_thousands);
  844. }
  845. if (!found_value) {
  846. // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
  847. sprintf(string, "_%dM%dx", value, thousandplex);
  848. found_value = Lookup(tr, string, ph_thousands);
  849. }
  850. }
  851. if (found_value == 0) {
  852. sprintf(string, "_%dM%d", value, thousandplex);
  853. found_value = Lookup(tr, string, ph_thousands);
  854. }
  855. }
  856. if (found_value == 0) {
  857. if ((value % 100) >= 20)
  858. Lookup(tr, "_0of", ph_of);
  859. found = 0;
  860. if (thousands_exact & 1) {
  861. if (thousands_exact & 2) {
  862. // ordinal number
  863. sprintf(string, "_%s%do", M_Variant(value), thousandplex);
  864. found = Lookup(tr, string, ph_thousands);
  865. }
  866. if (!found && (number_control & 1)) {
  867. // look for the 'e' variant
  868. sprintf(string, "_%s%de", M_Variant(value), thousandplex);
  869. found = Lookup(tr, string, ph_thousands);
  870. }
  871. if (!found) {
  872. // is there a different pronunciation if there are no hundreds,tens,or units ?
  873. sprintf(string, "_%s%dx", M_Variant(value), thousandplex);
  874. found = Lookup(tr, string, ph_thousands);
  875. }
  876. }
  877. if (found == 0) {
  878. sprintf(string, "_%s%d", M_Variant(value), thousandplex);
  879. if (Lookup(tr, string, ph_thousands) == 0) {
  880. if (thousandplex > 3) {
  881. sprintf(string, "_0M%d", thousandplex-1);
  882. if (Lookup(tr, string, ph_buf) == 0) {
  883. // say "millions" if this name is not available and neither is the next lower
  884. Lookup(tr, "_0M2", ph_thousands);
  885. speak_missing_thousands = 3;
  886. }
  887. }
  888. if (ph_thousands[0] == 0) {
  889. // repeat "thousand" if higher order names are not available
  890. sprintf(string, "_%dM1", value);
  891. if ((found_value = Lookup(tr, string, ph_thousands)) == 0)
  892. Lookup(tr, "_0M1", ph_thousands);
  893. speak_missing_thousands = 2;
  894. }
  895. }
  896. }
  897. }
  898. sprintf(ph_out, "%s%s", ph_of, ph_thousands);
  899. if ((value == 1) && (thousandplex == 1) && (tr->langopts.numbers & NUM_OMIT_1_THOUSAND))
  900. return 1;
  901. return found_value;
  902. }
  903. static int LookupNum2(Translator *tr, int value, int thousandplex, const int control, char *ph_out)
  904. {
  905. // Lookup a 2 digit number
  906. // control bit 0: ordinal number
  907. // control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
  908. // control bit 2: tens and units only, no higher digits
  909. // control bit 3: use feminine form of '2' (for thousands
  910. // control bit 4: speak zero tens
  911. // control bit 5: variant of ordinal number (lang=hu)
  912. // bit 8 followed by decimal fraction
  913. // bit 9: use #f form for both tens and units (lang=ml)
  914. int found;
  915. int ix;
  916. int units;
  917. int tens;
  918. int is_ordinal;
  919. int used_and = 0;
  920. int found_ordinal = 0;
  921. int next_phtype;
  922. int ord_type = 'o';
  923. char string[12]; // for looking up entries in *_list
  924. char ph_ordinal[20];
  925. char ph_tens[50];
  926. char ph_digits[50];
  927. char ph_and[12];
  928. units = value % 10;
  929. tens = value / 10;
  930. found = 0;
  931. ph_ordinal[0] = 0;
  932. ph_tens[0] = 0;
  933. ph_digits[0] = 0;
  934. ph_and[0] = 0;
  935. if (control & 0x20)
  936. ord_type = 'q';
  937. is_ordinal = control & 1;
  938. if ((control & 2) && (n_digit_lookup == 2)) {
  939. // pronunciation of the final 2 digits has already been found
  940. strcpy(ph_out, digit_lookup);
  941. } else {
  942. if (digit_lookup[0] == 0) {
  943. // is there a special pronunciation for this 2-digit number
  944. if (control & 8) {
  945. // is there a feminine or thousands-variant form?
  946. sprintf(string, "_%dfx", value);
  947. if ((found = Lookup(tr, string, ph_digits)) == 0) {
  948. sprintf(string, "_%df", value);
  949. found = Lookup(tr, string, ph_digits);
  950. }
  951. } else if (is_ordinal) {
  952. strcpy(ph_ordinal, ph_ordinal2);
  953. if (control & 4) {
  954. sprintf(string, "_%d%cx", value, ord_type); // LANG=hu, special word for 1. 2. when there are no higher digits
  955. if ((found = Lookup(tr, string, ph_digits)) != 0) {
  956. if (ph_ordinal2x[0] != 0)
  957. strcpy(ph_ordinal, ph_ordinal2x); // alternate pronunciation (lang=an)
  958. }
  959. }
  960. if (found == 0) {
  961. sprintf(string, "_%d%c", value, ord_type);
  962. found = Lookup(tr, string, ph_digits);
  963. }
  964. found_ordinal = found;
  965. }
  966. if (found == 0) {
  967. if (control & 2) {
  968. // the final tens and units of a number
  969. if (number_control & 1) {
  970. // look for 'e' variant
  971. sprintf(string, "_%de", value);
  972. found = Lookup(tr, string, ph_digits);
  973. }
  974. } else {
  975. // followed by hundreds or thousands etc
  976. if ((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  977. sprintf(string, "_%do", value); // LANG=TA
  978. else
  979. sprintf(string, "_%da", value);
  980. found = Lookup(tr, string, ph_digits);
  981. }
  982. if (!found) {
  983. if ((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS)) {
  984. // don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
  985. } else {
  986. sprintf(string, "_%d", value);
  987. found = Lookup(tr, string, ph_digits);
  988. }
  989. }
  990. }
  991. }
  992. // no, speak as tens+units
  993. if ((value < 10) && (control & 0x10)) {
  994. // speak leading zero
  995. Lookup(tr, "_0", ph_tens);
  996. } else {
  997. if (found)
  998. ph_tens[0] = 0;
  999. else {
  1000. if (is_ordinal) {
  1001. sprintf(string, "_%dX%c", tens, ord_type);
  1002. if (Lookup(tr, string, ph_tens) != 0) {
  1003. found_ordinal = 1;
  1004. if ((units != 0) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)) {
  1005. // Use the ordinal form of tens as well as units. Add the ordinal ending
  1006. strcat(ph_tens, ph_ordinal2);
  1007. }
  1008. }
  1009. }
  1010. if (found_ordinal == 0) {
  1011. if (control & 0x200)
  1012. sprintf(string, "_%dXf", tens);
  1013. else
  1014. sprintf(string, "_%dX", tens);
  1015. Lookup(tr, string, ph_tens);
  1016. }
  1017. if ((ph_tens[0] == 0) && (tr->langopts.numbers & NUM_VIGESIMAL)) {
  1018. // tens not found, (for example) 73 is 60+13
  1019. units = (value % 20);
  1020. sprintf(string, "_%dX", tens & 0xfe);
  1021. Lookup(tr, string, ph_tens);
  1022. }
  1023. ph_digits[0] = 0;
  1024. if (units > 0) {
  1025. found = 0;
  1026. if ((control & 2) && (digit_lookup[0] != 0)) {
  1027. // we have an entry for this digit (possibly together with the next word)
  1028. strcpy(ph_digits, digit_lookup);
  1029. found_ordinal = 1;
  1030. ph_ordinal[0] = 0;
  1031. } else {
  1032. if (control & 8) {
  1033. // is there a variant form of this number?
  1034. sprintf(string, "_%df", units);
  1035. found = Lookup(tr, string, ph_digits);
  1036. }
  1037. if ((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0)) {
  1038. // ordinal
  1039. sprintf(string, "_%d%c", units, ord_type);
  1040. if ((found = Lookup(tr, string, ph_digits)) != 0)
  1041. found_ordinal = 1;
  1042. }
  1043. if (found == 0) {
  1044. if ((number_control & 1) && (control & 2)) {
  1045. // look for 'e' variant
  1046. sprintf(string, "_%de", units);
  1047. found = Lookup(tr, string, ph_digits);
  1048. } else if (((control & 2) == 0) || ((tr->langopts.numbers & NUM_SWAP_TENS) != 0)) {
  1049. // followed by hundreds or thousands (or tens)
  1050. if ((tr->langopts.numbers2 & NUM2_ORDINAL_AND_THOUSANDS) && (thousandplex <= 1))
  1051. sprintf(string, "_%do", units); // LANG=TA, only for 100s, 1000s
  1052. else
  1053. sprintf(string, "_%da", units);
  1054. found = Lookup(tr, string, ph_digits);
  1055. }
  1056. }
  1057. if (found == 0) {
  1058. sprintf(string, "_%d", units);
  1059. Lookup(tr, string, ph_digits);
  1060. }
  1061. }
  1062. }
  1063. }
  1064. }
  1065. if ((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) {
  1066. if ((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
  1067. Lookup(tr, "_ord20", ph_ordinal);
  1068. if (ph_ordinal[0] == 0)
  1069. Lookup(tr, "_ord", ph_ordinal);
  1070. }
  1071. if ((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) {
  1072. Lookup(tr, "_0and", ph_and);
  1073. if ((is_ordinal) && (tr->langopts.numbers2 & NUM2_ORDINAL_NO_AND))
  1074. ph_and[0] = 0;
  1075. if (tr->langopts.numbers & NUM_SWAP_TENS)
  1076. sprintf(ph_out, "%s%s%s%s", ph_digits, ph_and, ph_tens, ph_ordinal);
  1077. else
  1078. sprintf(ph_out, "%s%s%s%s", ph_tens, ph_and, ph_digits, ph_ordinal);
  1079. used_and = 1;
  1080. } else {
  1081. if ((tr->langopts.numbers & NUM_SINGLE_VOWEL) && ph_digits[0] != 0) {
  1082. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  1083. ix = strlen(ph_tens) - 1;
  1084. if ((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  1085. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  1086. if ((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  1087. ph_tens[ix] = 0;
  1088. }
  1089. if ((tr->langopts.numbers2 & NUM2_ORDINAL_DROP_VOWEL) && (ph_ordinal[0] != 0)) {
  1090. ix = sprintf(ph_out, "%s%s", ph_tens, ph_digits);
  1091. if ((ix > 0) && (phoneme_tab[(unsigned char)(ph_out[ix-1])]->type == phVOWEL))
  1092. ix--;
  1093. sprintf(&ph_out[ix], "%s", ph_ordinal);
  1094. } else
  1095. sprintf(ph_out, "%s%s%s", ph_tens, ph_digits, ph_ordinal);
  1096. }
  1097. }
  1098. if (tr->langopts.numbers & NUM_SINGLE_STRESS_L) {
  1099. // only one primary stress, on the first part (tens)
  1100. found = 0;
  1101. for (ix = 0; ix < (signed)strlen(ph_out); ix++) {
  1102. if (ph_out[ix] == phonSTRESS_P) {
  1103. if (found)
  1104. ph_out[ix] = phonSTRESS_3;
  1105. else
  1106. found = 1;
  1107. }
  1108. }
  1109. } else if (tr->langopts.numbers & NUM_SINGLE_STRESS) {
  1110. // only one primary stress
  1111. found = 0;
  1112. for (ix = strlen(ph_out)-1; ix >= 0; ix--) {
  1113. if (ph_out[ix] == phonSTRESS_P) {
  1114. if (found)
  1115. ph_out[ix] = phonSTRESS_3;
  1116. else
  1117. found = 1;
  1118. }
  1119. }
  1120. }
  1121. return used_and;
  1122. }
  1123. static int LookupNum3(Translator *tr, int value, char *ph_out, bool suppress_null, int thousandplex, int control)
  1124. {
  1125. // Translate a 3 digit number
  1126. // control bit 0, previous thousands
  1127. // bit 1, ordinal number
  1128. // bit 5 variant form of ordinal number
  1129. // bit 8 followed by decimal fraction
  1130. int found;
  1131. int hundreds;
  1132. int tensunits;
  1133. int x;
  1134. int ix;
  1135. int exact;
  1136. int ordinal;
  1137. int tplex;
  1138. bool say_zero_hundred = false;
  1139. bool say_one_hundred;
  1140. char string[12]; // for looking up entries in **_list
  1141. char buf1[100];
  1142. char buf2[100];
  1143. char ph_100[20];
  1144. char ph_10T[20];
  1145. char ph_digits[50];
  1146. char ph_thousands[50];
  1147. char ph_hundred_and[12];
  1148. char ph_thousand_and[12];
  1149. ordinal = control & 0x22;
  1150. hundreds = value / 100;
  1151. tensunits = value % 100;
  1152. buf1[0] = 0;
  1153. ph_thousands[0] = 0;
  1154. ph_thousand_and[0] = 0;
  1155. if ((tr->langopts.numbers & NUM_ZERO_HUNDRED) && ((control & 1) || (hundreds >= 10)))
  1156. say_zero_hundred = true; // lang=vi
  1157. if ((hundreds > 0) || say_zero_hundred) {
  1158. found = 0;
  1159. if (ordinal && (tensunits == 0)) {
  1160. // ordinal number, with no tens or units
  1161. found = Lookup(tr, "_0Co", ph_100);
  1162. }
  1163. if (found == 0) {
  1164. if (tensunits == 0) {
  1165. // special form for exact hundreds?
  1166. found = Lookup(tr, "_0C0", ph_100);
  1167. }
  1168. if (!found)
  1169. Lookup(tr, "_0C", ph_100);
  1170. }
  1171. if (((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19)) {
  1172. // speak numbers such as 1984 as years: nineteen-eighty-four
  1173. } else if (hundreds >= 10) {
  1174. ph_digits[0] = 0;
  1175. exact = 0;
  1176. if ((value % 1000) == 0)
  1177. exact = 1;
  1178. tplex = thousandplex+1;
  1179. if (tr->langopts.numbers2 & NUM2_MYRIADS)
  1180. tplex = 0;
  1181. if (LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0) {
  1182. x = 0;
  1183. if (tr->langopts.numbers2 & (1 << tplex) && tplex <= 3)
  1184. x = 8; // use variant (feminine) for before thousands and millions
  1185. if (tr->translator_name == L('m', 'l'))
  1186. x = 0x208;
  1187. LookupNum2(tr, hundreds/10, thousandplex, x, ph_digits);
  1188. }
  1189. if (tr->langopts.numbers2 & NUM2_SWAP_THOUSANDS)
  1190. sprintf(ph_thousands, "%s%c%s%c", ph_10T, phonEND_WORD, ph_digits, phonEND_WORD);
  1191. else
  1192. sprintf(ph_thousands, "%s%c%s%c", ph_digits, phonEND_WORD, ph_10T, phonEND_WORD);
  1193. hundreds %= 10;
  1194. if ((hundreds == 0) && (say_zero_hundred == false))
  1195. ph_100[0] = 0;
  1196. suppress_null = true;
  1197. control |= 1;
  1198. }
  1199. ph_digits[0] = 0;
  1200. if ((hundreds > 0) || say_zero_hundred) {
  1201. if ((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
  1202. Lookup(tr, "_0and", ph_thousand_and);
  1203. suppress_null = true;
  1204. found = 0;
  1205. if ((ordinal)
  1206. && ((tensunits == 0) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))) {
  1207. // ordinal number
  1208. sprintf(string, "_%dCo", hundreds);
  1209. found = Lookup(tr, string, ph_digits);
  1210. if ((tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL) && (tensunits > 0)) {
  1211. // Use ordinal form of hundreds, as well as for tens and units
  1212. // Add ordinal suffix to the hundreds
  1213. strcat(ph_digits, ph_ordinal2);
  1214. }
  1215. }
  1216. if ((hundreds == 0) && say_zero_hundred)
  1217. Lookup(tr, "_0", ph_digits);
  1218. else {
  1219. if ((hundreds == 1) && (tr->langopts.numbers2 & NUM2_OMIT_1_HUNDRED_ONLY) && ((control & 1) == 0)) {
  1220. // only look for special 100 if there are previous thousands
  1221. } else {
  1222. if ((!found) && (tensunits == 0)) {
  1223. // is there a special pronunciation for exactly n00 ?
  1224. sprintf(string, "_%dC0", hundreds);
  1225. found = Lookup(tr, string, ph_digits);
  1226. }
  1227. if (!found) {
  1228. sprintf(string, "_%dC", hundreds);
  1229. found = Lookup(tr, string, ph_digits); // is there a specific pronunciation for n-hundred ?
  1230. }
  1231. }
  1232. if (found)
  1233. ph_100[0] = 0;
  1234. else {
  1235. say_one_hundred = true;
  1236. if (hundreds == 1) {
  1237. if ((tr->langopts.numbers & NUM_OMIT_1_HUNDRED) != 0)
  1238. say_one_hundred = false;
  1239. }
  1240. if (say_one_hundred == true)
  1241. LookupNum2(tr, hundreds, thousandplex, 0, ph_digits);
  1242. }
  1243. }
  1244. }
  1245. sprintf(buf1, "%s%s%s%s", ph_thousands, ph_thousand_and, ph_digits, ph_100);
  1246. }
  1247. ph_hundred_and[0] = 0;
  1248. if (tensunits > 0) {
  1249. if ((control & 2) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)) {
  1250. // Don't use "and" if we apply ordinal to both hundreds and units
  1251. } else {
  1252. if ((value > 100) || ((control & 1) && (thousandplex == 0))) {
  1253. if ((tr->langopts.numbers & NUM_HUNDRED_AND) || ((tr->langopts.numbers & NUM_HUNDRED_AND_DIGIT) && (tensunits < 10)))
  1254. Lookup(tr, "_0and", ph_hundred_and);
  1255. }
  1256. if ((tr->langopts.numbers & NUM_THOUSAND_AND) && (hundreds == 0) && ((control & 1) || (ph_thousands[0] != 0)))
  1257. Lookup(tr, "_0and", ph_hundred_and);
  1258. }
  1259. }
  1260. buf2[0] = 0;
  1261. if ((tensunits != 0) || (suppress_null == false)) {
  1262. x = 0;
  1263. if (thousandplex == 0) {
  1264. x = 2; // allow "eins" for 1 rather than "ein"
  1265. if (ordinal)
  1266. x = 3; // ordinal number
  1267. if ((value < 100) && !(control & 1))
  1268. x |= 4; // tens and units only, no higher digits
  1269. if (ordinal & 0x20)
  1270. x |= 0x20; // variant form of ordinal number
  1271. } else if (tr->langopts.numbers2 & (1 << thousandplex) && thousandplex <= 3)
  1272. x = 8; // use variant (feminine) for before thousands and millions
  1273. if ((tr->translator_name == L('m', 'l')) && (thousandplex == 1))
  1274. x |= 0x208; // use #f form for both tens and units
  1275. if ((tr->langopts.numbers2 & NUM2_ZERO_TENS) && ((control & 1) || (hundreds > 0))) {
  1276. // LANG=zh,
  1277. x |= 0x10;
  1278. }
  1279. if (LookupNum2(tr, tensunits, thousandplex, x | (control & 0x100), buf2) != 0) {
  1280. if (tr->langopts.numbers & NUM_SINGLE_AND)
  1281. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  1282. }
  1283. } else {
  1284. if (ph_ordinal2[0] != 0) {
  1285. ix = strlen(buf1);
  1286. if ((ix > 0) && (buf1[ix-1] == phonPAUSE_SHORT))
  1287. buf1[ix-1] = 0; // remove pause before adding ordinal suffix
  1288. strcpy(buf2, ph_ordinal2);
  1289. }
  1290. }
  1291. sprintf(ph_out, "%s%s%c%s", buf1, ph_hundred_and, phonEND_WORD, buf2);
  1292. return 0;
  1293. }
  1294. static bool CheckThousandsGroup(char *word, int group_len)
  1295. {
  1296. // Is this a group of 3 digits which looks like a thousands group?
  1297. int ix;
  1298. for (ix = 0; ix < group_len; ix++) {
  1299. if (!IsDigit09(word[ix]))
  1300. return false;
  1301. }
  1302. if (IsDigit09(word[group_len]) || IsDigit09(word[-1]))
  1303. return false;
  1304. return true;
  1305. }
  1306. static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1307. {
  1308. // Number translation with various options
  1309. // the "word" may be up to 4 digits
  1310. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1311. int n_digits;
  1312. long value;
  1313. int ix;
  1314. int digix;
  1315. unsigned char c;
  1316. bool suppress_null = false;
  1317. int decimal_point = 0;
  1318. int thousandplex = 0;
  1319. int thousands_exact = 1;
  1320. int thousands_inc = 0;
  1321. int prev_thousands = 0;
  1322. int ordinal = 0;
  1323. long this_value;
  1324. int decimal_count;
  1325. int max_decimal_count;
  1326. int decimal_mode;
  1327. int suffix_ix;
  1328. int skipwords = 0;
  1329. int group_len;
  1330. int len;
  1331. char *p;
  1332. char string[32]; // for looking up entries in **_list
  1333. char buf1[100];
  1334. char ph_append[50];
  1335. char ph_buf[200];
  1336. char ph_buf2[50];
  1337. char ph_zeros[50];
  1338. char suffix[30]; // string[] must be long enough for sizeof(suffix)+2
  1339. char buf_digit_lookup[50];
  1340. static const char str_pause[2] = { phonPAUSE_NOLINK, 0 };
  1341. char *end;
  1342. *flags = 0;
  1343. n_digit_lookup = 0;
  1344. buf_digit_lookup[0] = 0;
  1345. digit_lookup = buf_digit_lookup;
  1346. number_control = control;
  1347. for (ix = 0; IsDigit09(word[ix]); ix++) ;
  1348. n_digits = ix;
  1349. errno = 0;
  1350. this_value = strtol(word, &end, 10);
  1351. if (errno || end == word || this_value > INT_MAX)
  1352. return 0; // long number, speak as individual digits
  1353. value = this_value;
  1354. group_len = 3;
  1355. if (tr->langopts.numbers2 & NUM2_MYRIADS)
  1356. group_len = 4;
  1357. // is there a previous thousands part (as a previous "word") ?
  1358. if ((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && IsDigit09(word[-3]))
  1359. prev_thousands = 1;
  1360. else if ((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE)) {
  1361. // thousands groups can be separated by spaces
  1362. if ((n_digits == 3) && !(wtab->flags & FLAG_MULTIPLE_SPACES) && IsDigit09(word[-2]))
  1363. prev_thousands = 1;
  1364. }
  1365. if (prev_thousands == 0)
  1366. speak_missing_thousands = 0;
  1367. ph_ordinal2[0] = 0;
  1368. ph_zeros[0] = 0;
  1369. if (prev_thousands || (word[0] != '0')) {
  1370. // don't check for ordinal if the number has a leading zero
  1371. ordinal = CheckDotOrdinal(tr, word, &word[ix], wtab, 0);
  1372. }
  1373. if ((word[ix] == '.') && !IsDigit09(word[ix+1]) && !IsDigit09(word[ix+2]) && !(wtab[1].flags & FLAG_NOSPACE)) {
  1374. // remove dot unless followed by another number
  1375. word[ix] = 0;
  1376. }
  1377. if ((ordinal == 0) || (tr->translator_name == L('h', 'u'))) {
  1378. // NOTE lang=hu, allow both dot and ordinal suffix, eg. "december 21.-én"
  1379. // look for an ordinal number suffix after the number
  1380. ix++;
  1381. p = suffix;
  1382. if (wtab[0].flags & FLAG_HYPHEN_AFTER) {
  1383. *p++ = '-';
  1384. ix++;
  1385. }
  1386. while ((word[ix] != 0) && (word[ix] != ' ') && (ix < (int)(sizeof(suffix)-1)))
  1387. *p++ = word[ix++];
  1388. *p = 0;
  1389. if (suffix[0] != 0) {
  1390. if ((tr->langopts.ordinal_indicator != NULL) && (strcmp(suffix, tr->langopts.ordinal_indicator) == 0))
  1391. ordinal = 2;
  1392. else if (!IsDigit09(suffix[0])) { // not _#9 (tab)
  1393. sprintf(string, "_#%s", suffix);
  1394. if (Lookup(tr, string, ph_ordinal2)) {
  1395. // this is an ordinal suffix
  1396. ordinal = 2;
  1397. flags[0] |= FLAG_SKIPWORDS;
  1398. skipwords = 1;
  1399. sprintf(string, "_x#%s", suffix);
  1400. Lookup(tr, string, ph_ordinal2x); // is there an alternate pronunciation?
  1401. }
  1402. }
  1403. }
  1404. }
  1405. if (wtab[0].flags & FLAG_ORDINAL)
  1406. ordinal = 2;
  1407. ph_append[0] = 0;
  1408. ph_buf2[0] = 0;
  1409. if ((word[0] == '0') && (prev_thousands == 0) && (word[1] != ' ') && (word[1] != tr->langopts.decimal_sep)) {
  1410. if ((n_digits == 2) && (word[3] == ':') && IsDigit09(word[5]) && isspace(word[7])) {
  1411. // looks like a time 02:30, omit the leading zero
  1412. } else {
  1413. if (n_digits > 3) {
  1414. flags[0] &= ~FLAG_SKIPWORDS;
  1415. return 0; // long number string with leading zero, speak as individual digits
  1416. }
  1417. // speak leading zeros
  1418. for (ix = 0; (word[ix] == '0') && (ix < (n_digits-1)); ix++)
  1419. Lookup(tr, "_0", &ph_zeros[strlen(ph_zeros)]);
  1420. }
  1421. }
  1422. if ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
  1423. thousands_inc = 1;
  1424. else if (word[n_digits] == tr->langopts.thousands_sep)
  1425. thousands_inc = 2;
  1426. suffix_ix = n_digits+2;
  1427. if (thousands_inc > 0) {
  1428. // if the following "words" are three-digit groups, count them and add
  1429. // a "thousand"/"million" suffix to this one
  1430. digix = n_digits + thousands_inc;
  1431. while (((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len)) {
  1432. for (ix = 0; ix < group_len; ix++) {
  1433. if (word[digix+ix] != '0') {
  1434. thousands_exact = 0;
  1435. break;
  1436. }
  1437. }
  1438. thousandplex++;
  1439. digix += group_len;
  1440. if ((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' '))) {
  1441. suffix_ix = digix+2;
  1442. digix += thousands_inc;
  1443. } else
  1444. break;
  1445. }
  1446. }
  1447. if ((value == 0) && prev_thousands)
  1448. suppress_null = true;
  1449. if (tr->translator_name == L('h', 'u')) {
  1450. // variant form of numbers when followed by hyphen and a suffix starting with 'a' or 'e' (but not a, e, az, ez, azt, ezt
  1451. if ((wtab[thousandplex].flags & FLAG_HYPHEN_AFTER) && (thousands_exact == 1) && hu_number_e(&word[suffix_ix], thousandplex, value))
  1452. number_control |= 1; // use _1e variant of number
  1453. }
  1454. if ((word[n_digits] == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1])) {
  1455. // this "word" ends with a decimal point
  1456. Lookup(tr, "_dpt", ph_append);
  1457. decimal_point = 0x100;
  1458. } else if (suppress_null == false) {
  1459. if (thousands_inc > 0) {
  1460. if (thousandplex > 0) {
  1461. if ((suppress_null == false) && (LookupThousands(tr, value, thousandplex, thousands_exact, ph_append))) {
  1462. // found an exact match for N thousand
  1463. value = 0;
  1464. suppress_null = true;
  1465. }
  1466. }
  1467. }
  1468. } else if (speak_missing_thousands == 1) {
  1469. // speak this thousandplex if there was no word for the previous thousandplex
  1470. sprintf(string, "_0M%d", thousandplex+1);
  1471. if (Lookup(tr, string, buf1) == 0) {
  1472. sprintf(string, "_0M%d", thousandplex);
  1473. Lookup(tr, string, ph_append);
  1474. }
  1475. }
  1476. if ((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1477. Lookup(tr, "_.", ph_append);
  1478. if (thousandplex == 0) {
  1479. char *p2;
  1480. // look for combinations of the number with the next word
  1481. p = word;
  1482. while (IsDigit09(p[1])) p++; // just use the last digit
  1483. if (IsDigit09(p[-1])) {
  1484. p2 = p - 1;
  1485. if (LookupDictList(tr, &p2, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // lookup 2 digits
  1486. n_digit_lookup = 2;
  1487. }
  1488. if ((buf_digit_lookup[0] == 0) && (*p != '0')) {
  1489. // LANG=hu ?
  1490. // not found, lookup only the last digit (?? but not if dot-ordinal has been found)
  1491. if (LookupDictList(tr, &p, buf_digit_lookup, flags, FLAG_SUFX, wtab)) // don't match '0', or entries with $only
  1492. n_digit_lookup = 1;
  1493. }
  1494. if (prev_thousands == 0) {
  1495. if ((decimal_point == 0) && (ordinal == 0)) {
  1496. // Look for special pronunciation for this number in isolation (LANG=kl)
  1497. sprintf(string, "_%dn", value);
  1498. if (Lookup(tr, string, ph_out))
  1499. return 1;
  1500. }
  1501. if (tr->langopts.numbers2 & NUM2_PERCENT_BEFORE) {
  1502. // LANG=si, say "percent" before the number
  1503. p2 = word;
  1504. while ((*p2 != ' ') && (*p2 != 0))
  1505. p2++;
  1506. if (p2[1] == '%') {
  1507. Lookup(tr, "%", ph_out);
  1508. ph_out += strlen(ph_out);
  1509. p2[1] = ' ';
  1510. }
  1511. }
  1512. }
  1513. }
  1514. LookupNum3(tr, value, ph_buf, suppress_null, thousandplex, prev_thousands | ordinal | decimal_point);
  1515. if ((thousandplex > 0) && (tr->langopts.numbers2 & NUM2_SWAP_THOUSANDS))
  1516. sprintf(ph_out, "%s%s%c%s%s", ph_zeros, ph_append, phonEND_WORD, ph_buf2, ph_buf);
  1517. else
  1518. sprintf(ph_out, "%s%s%s%c%s", ph_zeros, ph_buf2, ph_buf, phonEND_WORD, ph_append);
  1519. while (decimal_point) {
  1520. n_digits++;
  1521. decimal_count = 0;
  1522. while (IsDigit09(word[n_digits+decimal_count]))
  1523. decimal_count++;
  1524. max_decimal_count = 2;
  1525. switch (decimal_mode = (tr->langopts.numbers & NUM_DFRACTION_BITS))
  1526. {
  1527. case NUM_DFRACTION_4:
  1528. max_decimal_count = 5;
  1529. // fallthrough:
  1530. case NUM_DFRACTION_2:
  1531. // French/Polish decimal fraction
  1532. while (word[n_digits] == '0') {
  1533. Lookup(tr, "_0", buf1);
  1534. strcat(ph_out, buf1);
  1535. decimal_count--;
  1536. n_digits++;
  1537. }
  1538. if ((decimal_count <= max_decimal_count) && IsDigit09(word[n_digits])) {
  1539. LookupNum3(tr, atoi(&word[n_digits]), buf1, false, 0, 0);
  1540. strcat(ph_out, buf1);
  1541. n_digits += decimal_count;
  1542. }
  1543. break;
  1544. case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero
  1545. case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
  1546. case NUM_DFRACTION_6: // kazakh, always say "tenths" etc, before the decimal fraction
  1547. LookupNum3(tr, atoi(&word[n_digits]), ph_buf, false, 0, 0);
  1548. if ((word[n_digits] == '0') || (decimal_mode != NUM_DFRACTION_1)) {
  1549. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  1550. sprintf(string, "_0Z%d", decimal_count);
  1551. if (Lookup(tr, string, buf1) == 0)
  1552. break; // revert to speaking single digits
  1553. if (decimal_mode == NUM_DFRACTION_6)
  1554. strcat(ph_out, buf1);
  1555. else
  1556. strcat(ph_buf, buf1);
  1557. }
  1558. strcat(ph_out, ph_buf);
  1559. n_digits += decimal_count;
  1560. break;
  1561. case NUM_DFRACTION_3:
  1562. // Romanian decimal fractions
  1563. if ((decimal_count <= 4) && (word[n_digits] != '0')) {
  1564. LookupNum3(tr, atoi(&word[n_digits]), buf1, false, 0, 0);
  1565. strcat(ph_out, buf1);
  1566. n_digits += decimal_count;
  1567. }
  1568. break;
  1569. case NUM_DFRACTION_7:
  1570. // alternative form of decimal fraction digits, except the final digit
  1571. while (decimal_count-- > 1) {
  1572. sprintf(string, "_%cd", word[n_digits]);
  1573. if (Lookup(tr, string, buf1) == 0)
  1574. break;
  1575. n_digits++;
  1576. strcat(ph_out, buf1);
  1577. }
  1578. }
  1579. while (IsDigit09(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10))) {
  1580. // speak any remaining decimal fraction digits individually
  1581. value = word[n_digits++] - '0';
  1582. LookupNum2(tr, value, 0, 2, buf1);
  1583. len = strlen(ph_out);
  1584. sprintf(&ph_out[len], "%c%s", phonEND_WORD, buf1);
  1585. }
  1586. // something after the decimal part ?
  1587. if (Lookup(tr, "_dpt2", buf1))
  1588. strcat(ph_out, buf1);
  1589. if ((c == tr->langopts.decimal_sep) && IsDigit09(word[n_digits+1])) {
  1590. Lookup(tr, "_dpt", buf1);
  1591. strcat(ph_out, buf1);
  1592. } else
  1593. decimal_point = 0;
  1594. }
  1595. if ((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) {
  1596. int next_char;
  1597. char *p;
  1598. p = &word[n_digits+1];
  1599. p += utf8_in(&next_char, p);
  1600. if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  1601. utf8_in(&next_char, p);
  1602. if (!iswalpha(next_char) && (thousands_exact == 0))
  1603. strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc.
  1604. }
  1605. *flags |= FLAG_FOUND;
  1606. speak_missing_thousands--;
  1607. if (skipwords)
  1608. dictionary_skipwords = skipwords;
  1609. return 1;
  1610. }
  1611. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
  1612. {
  1613. if ((option_sayas == SAYAS_DIGITS1) || (wtab[0].flags & FLAG_INDIVIDUAL_DIGITS))
  1614. return 0; // speak digits individually
  1615. if (tr->langopts.numbers != 0)
  1616. return TranslateNumber_1(tr, word1, ph_out, flags, wtab, control);
  1617. return 0;
  1618. }