Fixes for ordinal numbers (lang=hu). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@225 d46cf337-b52f-0410-862d-fd96e6ae7743master
| @@ -589,3 +589,16 @@ a e i o u | |||
| g h j J k l m n | |||
| N n^ p q R s S S; | |||
| t tS v w x z | |||
| Dictionary pa_dict | |||
| a a~ e E e~ E~ i I | |||
| i: i~ I~ o O o: O~ o~ | |||
| r- U u u~ U~ V V~ | |||
| : b bh c ch d d. d.h | |||
| dh f g gh H j J Jh | |||
| k kh l l. m n N n. | |||
| n^ p ph Q R s S t | |||
| t. t.h th v x z | |||
| @@ -262,8 +262,8 @@ U+32f n,0nsI2l'abI2k | |||
| // numeric | |||
| //_0 zero $text // TEST | |||
| _0 z'i@roU | |||
| _0 zero $text // TEST | |||
| //_0 z'i@roU | |||
| _1 w'0n | |||
| ?6 _1 w'Vn | |||
| _2 t'u: | |||
| @@ -323,6 +323,7 @@ _70o s'Ev@nti@ | |||
| _80o 'eIti@ | |||
| _90o n'aInti@ | |||
| _0Co h'Vndr@dT | |||
| _0M1o T'aUz@ndT | |||
| // ABBREVIATIONS | |||
| @@ -54,8 +54,9 @@ _0Z4 ti:zEzR2Ed | |||
| // ordinal numbers | |||
| _ord Edik | |||
| _0o n'ullAdik | |||
| _1ox ElSY: // number = '1' only | |||
| _2ox ma:Sodik // number = '2' only | |||
| _2ox ma:Sodik // number = '2' only | |||
| _1o EJ:Edik | |||
| _2o kEt:Edik | |||
| _3o hAR2_mAdik | |||
| @@ -72,6 +73,9 @@ _60o hAtvAnAdik | |||
| _80o n^oltsvAnAdik | |||
| _0Co sa:zAdik | |||
| _2Co ke:tsa:zAdik | |||
| _0M1o EzR2Edik | |||
| _1M1o EzR2Edik | |||
| _2M1o ke:tEzR2Edik | |||
| // These words mean that a dot after number, immediately preceding, does not mean an ordinal number | |||
| január $alt | |||
| @@ -100,6 +104,15 @@ okt $alt | |||
| now $alt | |||
| dec $alt | |||
| //this following 7 lines means the 7 hungarian day name | |||
| hétfő $alt | |||
| kedd $alt | |||
| szerda $alt | |||
| csütörtök $alt | |||
| péntek $alt | |||
| szombat $alt | |||
| vasárnap $alt | |||
| // accent names | |||
| _lig ligAtu:R2A | |||
| _acu e:lES | |||
| @@ -129,7 +142,7 @@ mm milime:tER2 | |||
| a.m An^n^i||m'int $dot | |||
| dr doktoR2 $dot | |||
| gpu $abbrev | |||
| mvgyosz $abbrev | |||
| mvgyosz Emve:Je:oEs | |||
| OTP $abbrev | |||
| id idY:SEb: $dot | |||
| ill illEtvE $dot | |||
| @@ -21,7 +21,7 @@ _) alattvaló _!'AlAtvAlo: | |||
| .group b | |||
| b b | |||
| // bb b: | |||
| bb b: | |||
| biz (A b'iz, // bizalmas etc. | |||
| biedermeier bi:dER2ma:jER2 | |||
| @@ -32,7 +32,8 @@ _) alattvaló _!'AlAtvAlo: | |||
| ccs tS: | |||
| !) cz ts // in names which start with a capital letter | |||
| ar) csz (e ts | |||
| anar) ch (i C | |||
| harmin) c ts | |||
| anar) ch (i C | |||
| hierar) ch (i C | |||
| me) ch (a C | |||
| te) ch (n C | |||
| @@ -71,13 +72,16 @@ pá) c ts | |||
| .group d | |||
| d d | |||
| dd d: | |||
| dts tS: | |||
| dt t: | |||
| a) dsz ts: | |||
| cselé) d d | |||
| enge) dsz ts: | |||
| engedelmeske) dsz ts: | |||
| hazu) dsz ts: | |||
| mara) dsz ts: | |||
| tu) dsz ts: | |||
| kére) dz ts | |||
| // dd d: | |||
| dz dz | |||
| @@ -98,6 +102,7 @@ kére) dz ts | |||
| dj J: | |||
| min) dny n^ | |||
| .group e | |||
| e E | |||
| D_-_) es (_ %ES | |||
| @@ -115,7 +120,7 @@ _) egyezség _!'EJ:ESSe:g | |||
| .group g | |||
| g g | |||
| // gg g: | |||
| gg g: | |||
| gy J | |||
| ggy J: | |||
| @@ -125,7 +130,7 @@ _) egyezség _!'EJ:ESSe:g | |||
| A) gysz (A Js: | |||
| C) gyj J | |||
| A) gyj (A J: | |||
| ha) gyj J: | |||
| _e) gy (es J: | |||
| _e) gy (et_ J: | |||
| _e) gy (etlen J: | |||
| @@ -175,6 +180,7 @@ _) kétség ke:tS:e:g | |||
| .group l | |||
| l l | |||
| lj j | |||
| ly j | |||
| lly jj | |||
| @@ -182,9 +188,12 @@ _) kétség ke:tS:e:g | |||
| á) ll (j j | |||
| beszé) lj jj | |||
| bére) lj jj | |||
| fáj) l l | |||
| fájla) l (j jj | |||
| gondo) lj jjj | |||
| ke) lj jj | |||
| sajná) lj jj | |||
| sajná) lj jjj | |||
| llj jjj | |||
| C) ly (_ li | |||
| _kéth) ly li | |||
| szamue) lly lli | |||
| @@ -259,7 +268,7 @@ r R2 | |||
| sz s | |||
| ssz ss2 | |||
| szts stS: | |||
| föld) s (ánc S | |||
| s (színű S | |||
| hel) s (inki z | |||
| ki) s (ebb SS | |||
| @@ -296,18 +305,27 @@ pénze) s S | |||
| tc ts: | |||
| tt t: | |||
| tt (C tt | |||
| ttn t:n | |||
| ty c | |||
| // s) ty c: // | |||
| // z) ty c: // keztyű | |||
| ttj tc: | |||
| tty c: | |||
| apá) ts (ág tS: | |||
| becsüle) t t | |||
| cson) t t | |||
| cson) tj c | |||
| ezüs) t t | |||
| éle) t t | |||
| felej) ts tS | |||
| já) tsz (ani ts: | |||
| tsz ts: | |||
| szorí) ts (a tS: | |||
| ürí) ts (e tS: | |||
| mula) ts tS: | |||
| nemze) ts tS: | |||
| néme) ts (ég tS: | |||
| min) t t | |||
| tse tSE | |||
| tso tSo | |||
| @@ -315,6 +333,7 @@ tsa tSA | |||
| tsá tSa: | |||
| tsd tSd | |||
| tsé tSe: | |||
| t (cs t | |||
| ttsé tS:e: | |||
| C) tj c | |||
| A) tj (A c: | |||
| @@ -324,12 +343,14 @@ C) tj c | |||
| _ka) ty (n ti | |||
| vörösmar) ty ti | |||
| á) t (sza t | |||
| á) t (jár t | |||
| á) t (sző t | |||
| á) t (szú t | |||
| á) t (szű t | |||
| bizo) tts (ág tS: | |||
| állí) ts tS: | |||
| állapo) t (sor t | |||
| bará) ts tS: | |||
| dön) ts (ön tS | |||
| elhivato) tts (ág tS: | |||
| @@ -337,10 +358,16 @@ folyama) t (jel t | |||
| kiál) ts tS | |||
| köve) ts (ég tS: | |||
| kür) t t | |||
| kür) tj c | |||
| korlátozo) tts tS: | |||
| neve) ts (ég tS: | |||
| on) ts (u tS | |||
| szen)t (szék t | |||
| szé) t t | |||
| szöve) ts (ég tS: | |||
| pillan) ts tS | |||
| romlo) tts tS: | |||
| tar) ts tS | |||
| á) t (sor t | |||
| á) t (sé t | |||
| ne) t (c t | |||
| @@ -358,6 +385,7 @@ lé) t (szám t | |||
| ké) t (száz t | |||
| ö) t (száz t | |||
| ha) t (száz t | |||
| hé) t (száz t | |||
| vé) ts (ég tS: | |||
| @@ -399,6 +427,7 @@ befeje) z (te s | |||
| bi) z (tons s | |||
| bi) z (tos s | |||
| bron) z (sz z | |||
| csontvá) z z | |||
| e) z (t s | |||
| ho) z (ta s | |||
| ho) z (tá s | |||
| @@ -1,4 +1,5 @@ | |||
| //_xx விழுக்காடு $text // TESTING doesn't reduce vowels | |||
| // This file is UTF8 encoded | |||
| // Spelling to phoneme words and exceptions for Tamil | |||
| @@ -76,8 +77,8 @@ _! a:ctS:Vr,ijVkk,URi | |||
| U+bf9 ru:ba:j | |||
| // abbreviations | |||
| ரூ ru:ba:j $dot | |||
| Rs ru:ba:j $dot | |||
| ரூ ru:ba:j $dot | |||
| Rs ru:pi:z $dot | |||
| // numbers | |||
| _0 suz.ijVm // சுழியம் | |||
| @@ -139,20 +140,23 @@ _1M1x a:jirVm | |||
| _0M1 a:jirVttU | |||
| _1M1 a:jirVttU | |||
| _0M2x lVd.tSVm | |||
| _0M2x lVd.tSVm // 100,000 | |||
| _1M2x orUlVd.tSVm | |||
| _0M2 lVd.tSVttU | |||
| _1M2 orUlVd.tSVttU | |||
| _0M3x ko:d.i | |||
| _0M3x ko:d.i // 10,000,000 | |||
| _1M3x orUko:d.i | |||
| _0M3 ko:d.ie: | |||
| _1M3 orUko:d.ie: | |||
| _0M4 nu:RUko:d.i // not correct, but should be understandable | |||
| _1M4 nu:RUko:d.i | |||
| _0M5 patta:jiRUmko:d.i | |||
| _1M5 patta:jiRUmko:d.i | |||
| _0M4x a:jirVmko:d.i // 10,000,000,000 | |||
| _1M4x a:jirVmko:d.i | |||
| _0M4 a:jirVmko:d.ie: | |||
| _1M4 a:jirVmko:d.ie: | |||
| _0M5 a:jirVma:jirVmko:d.i // 10,000,000,000,000 | |||
| _1M5 a:jirVma:jirVmko:d.i | |||
| _dpt _pul.l.i | |||
| @@ -16,8 +16,11 @@ | |||
| ௭ 7 | |||
| ௮ 8 | |||
| ௯ 9 | |||
| ொ ொ | |||
| ோ ோ | |||
| ௌ ௌ | |||
| . | |||
| .group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae] | |||
| ஂ // anusvara | |||
| @@ -39,7 +42,7 @@ | |||
| எ e | |||
| _) எ ;e // add a short [j] sound at start of word ? | |||
| ஏ e:: | |||
| ஏ e: | |||
| _) ஏ ;e:: // add a short [j] sound at start of word ? | |||
| ஐ aI | |||
| @@ -65,12 +68,14 @@ | |||
| ங NV | |||
| ங (B N | |||
| ச zV | |||
| ச (B z | |||
| ச sV // ?? [z] | |||
| ச (B s | |||
| _) ச sa | |||
| _) ச (B s | |||
| ச்ச tS:V | |||
| ச்ச (B tS: | |||
| ற்ச tS: | |||
| ற்ச (B tS: | |||
| ட்) ச tSV | |||
| ட்) ச (B tS | |||
| ஞ்) ச dZV | |||
| @@ -112,7 +117,7 @@ | |||
| ப்ப ppV | |||
| ப்ப (B pp | |||
| ட்) ப pV | |||
| ட்) ப (B pV | |||
| ட்) ப (B p | |||
| ற்) ப pV | |||
| ற்) ப (B p | |||
| ஃ) ப fV | |||
| @@ -184,6 +189,7 @@ | |||
| ௌ aU | |||
| ் // virama | |||
| ௗ : // aU length mark | |||
| @@ -1,4 +1,4 @@ | |||
| 60 phoneme tables | |||
| 61 phoneme tables | |||
| new total | |||
| base 103 103 | |||
| base2 26 124 | |||
| @@ -17,8 +17,8 @@ | |||
| fi 40 134 | |||
| fr 55 141 | |||
| fr_ca 11 141 | |||
| hi 60 149 | |||
| ta 20 152 | |||
| hi 62 151 | |||
| ta 20 154 | |||
| hu 23 119 | |||
| lv 29 126 | |||
| nl 28 126 | |||
| @@ -53,13 +53,14 @@ | |||
| hy 24 119 | |||
| da 21 118 | |||
| rw 15 131 | |||
| ml 13 151 | |||
| kn 15 151 | |||
| bn 59 155 | |||
| ne 18 157 | |||
| mr 12 149 | |||
| ml 13 153 | |||
| kn 15 153 | |||
| bn 59 157 | |||
| ne 18 159 | |||
| mr 12 151 | |||
| eu 6 125 | |||
| mn 15 114 | |||
| pa 12 152 | |||
| Data file Used by | |||
| b/b [b] base | |||
| @@ -1341,7 +1342,8 @@ vnasal/e_n [e~] af | |||
| vnasal/i_n [i~] pt | |||
| [i~] bn | |||
| [i:~] bn | |||
| vnasal/i_n2 [i~] hi | |||
| vnasal/i_n2 [I~] hi | |||
| [i~] hi | |||
| vnasal/m- [m-] sw | |||
| vnasal/n- [n-] sw | |||
| vnasal/nn- [N-] sw | |||
| @@ -1356,7 +1358,8 @@ vnasal/oo_n2 [O~] hi | |||
| [o] zh | |||
| [O~] bn | |||
| vnasal/oo_n3 [O~] pl | |||
| vnasal/u_n [u~] hi | |||
| vnasal/u_n [U~] hi | |||
| [u~] hi | |||
| [u~] pt | |||
| [u] zh | |||
| [u~] bn | |||
| @@ -1634,6 +1637,7 @@ vowel/aa_9 [a] fi | |||
| [a:] hi | |||
| [a:] bn | |||
| [a] ne | |||
| [a] pa | |||
| vowel/a_en [A] fr | |||
| vowel/@_bck [@] hi | |||
| [@/] hi | |||
| @@ -1641,6 +1645,7 @@ vowel/@_bck [@] hi | |||
| [@] bn | |||
| [V] ne | |||
| [@/] ne | |||
| [@] pa | |||
| vowel/e [e] base2 | |||
| [e:] en | |||
| [eI] en_n | |||
| @@ -1681,6 +1686,7 @@ vowel/e_2 [eI] en_sc | |||
| [e:] no | |||
| [e] bn | |||
| [e:] bn | |||
| [e] pa | |||
| vowel/e_3 [i] en_n | |||
| [e:] hu | |||
| [e] ku | |||
| @@ -1707,6 +1713,7 @@ vowel/ee_2 [E] en | |||
| [E] zh | |||
| [E#] ku | |||
| [&] da | |||
| [E] pa | |||
| vowel/ee#_2 [E-] sv | |||
| [E#] sq | |||
| vowel/ee_3 [&] af | |||
| @@ -1833,6 +1840,7 @@ vowel/i_fnt [i:] en_wi | |||
| [i] bn | |||
| [i:] bn | |||
| [i:] mr | |||
| [i] pa | |||
| vowel/ii [I] en_n | |||
| [I2] en_n | |||
| [I] en_rp | |||
| @@ -1864,6 +1872,7 @@ vowel/ii_3 [I] cy | |||
| [I] no | |||
| [I] tr | |||
| [I] bn | |||
| [I] pa | |||
| vowel/ii#_3 [I2] en_us | |||
| vowel/ii_4 [I] en | |||
| [I2] en | |||
| @@ -1901,6 +1910,8 @@ vowel/@_low [3] en_rp | |||
| [@/] ne | |||
| [@] mr | |||
| [V] mr | |||
| [@] pa | |||
| [V] pa | |||
| vowel/@_low2 [@/] en_us | |||
| [@2] en_us | |||
| vowel/o [o] base2 | |||
| @@ -1937,6 +1948,7 @@ vowel/o_2 [o:] cy | |||
| [o] vi | |||
| [o] da | |||
| [o:] bn | |||
| [o:] pa | |||
| vowel/o-_2 [V] en_n | |||
| [V] en_wm | |||
| vowel/o_3 [oU] en_sc | |||
| @@ -2005,6 +2017,7 @@ vowel/oo_4 [O] base2 | |||
| [O:] hi | |||
| [O] it | |||
| [O] bn | |||
| [O] pa | |||
| vowel/oo_5 [O] pl | |||
| [O] is | |||
| [O] sq | |||
| @@ -2095,6 +2108,7 @@ vowel/u_bck [u] base2 | |||
| [U] mr | |||
| [u:] mr | |||
| [u] mn | |||
| [u] pa | |||
| vowel/u_bck2 [u] fr | |||
| [u:] fr | |||
| [u:] la | |||
| @@ -2120,6 +2134,7 @@ vowel/uu_bck [U] en_wi | |||
| [u] zhy | |||
| [U] bn | |||
| [U] mn | |||
| [U] pa | |||
| vowel/V [3] en_sc | |||
| vowel/V_2 [V] en | |||
| [a] af | |||
| @@ -2130,6 +2145,7 @@ vowel/V_3 [V] en_rp | |||
| [V] hi | |||
| [V] ta | |||
| [V] bn | |||
| [V] pa | |||
| vowel/V_4 [V] en_sc | |||
| [V] da | |||
| vowel/V_6 [V] en_us | |||
| @@ -166,6 +166,12 @@ phoneme i~ | |||
| formants vnasal/i_n2 | |||
| endphoneme | |||
| phoneme I~ | |||
| vowel starttype (i) endtype (i) | |||
| length 170 | |||
| formants vnasal/i_n2 | |||
| endphoneme | |||
| phoneme e~ | |||
| vowel long starttype (e) endtype (e) | |||
| length 220 | |||
| @@ -173,7 +179,7 @@ phoneme e~ | |||
| endphoneme | |||
| phoneme E~ | |||
| vowel starttype (e) endtype (e) | |||
| vowel long starttype (e) endtype (e) | |||
| length 230 | |||
| formants vnasal/ee_n2 | |||
| endphoneme | |||
| @@ -208,6 +214,12 @@ phoneme u~ | |||
| formants vnasal/u_n | |||
| endphoneme | |||
| phoneme U~ | |||
| vowel starttype (u) endtype (u) | |||
| length 170 | |||
| formants vnasal/u_n | |||
| endphoneme | |||
| phoneme r- | |||
| vowel starttype (@) endtype (@) | |||
| @@ -39,7 +39,7 @@ endphoneme | |||
| phoneme e: | |||
| vowel starttype (e) endtype (e) | |||
| length 270 | |||
| length 250 | |||
| formants vowel/e | |||
| endphoneme | |||
| @@ -1369,3 +1369,5 @@ include ph_basque | |||
| phonemetable mn base | |||
| include ph_mongolian | |||
| phonemetable pa hi | |||
| include ph_punjabi | |||
| @@ -49,6 +49,7 @@ static int transpose_min; | |||
| static int transpose_max; | |||
| static int text_mode = 0; | |||
| static int debug_flag = 0; | |||
| static int error_need_dictionary = 0; | |||
| static int hash_counts[N_HASH_DICT]; | |||
| static char *hash_chains[N_HASH_DICT]; | |||
| @@ -223,7 +224,6 @@ static int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| static char nullstring[] = {0}; | |||
| WORD_TAB winfo; | |||
| char decoded_phonemes[128]; | |||
| comment = NULL; | |||
| text_not_phonemes = 0; | |||
| @@ -432,23 +432,37 @@ step=1; // TEST | |||
| if(text_mode) | |||
| text_not_phonemes = 1; | |||
| if(text_not_phonemes != translator->langopts.textmode) | |||
| { | |||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||
| } | |||
| if(text_not_phonemes) | |||
| { | |||
| if(word[0] == '_') | |||
| { | |||
| // This is a special word, used by eSpeak. Translate this into phonemes now | |||
| // memset(&winfo,0,sizeof(winfo)); | |||
| // TranslateWord(translator,phonetic,0,&winfo); // but *_dict is not loaded ? | |||
| // DecodePhonemes(word_phonemes,decoded_phonemes); | |||
| memset(&winfo,0,sizeof(winfo)); | |||
| strcat(phonetic, " "); // need a space to indicate word-boundary | |||
| // PROBLEM vowel reductions are not applied to the translated phonemes | |||
| // condition rules are not applied | |||
| TranslateWord(translator,phonetic,0,&winfo); | |||
| text_not_phonemes = 0; | |||
| strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4); | |||
| if((word_phonemes[0] == 0) && (error_need_dictionary < 3)) | |||
| { | |||
| // the dictionary was not loaded, we need a second attempt | |||
| error_need_dictionary++; | |||
| fprintf(f_log,"%5d: Need to compile dictionary again\n",linenum); | |||
| } | |||
| { | |||
| //char decoded_phonemes[128]; | |||
| //DecodePhonemes(word_phonemes,decoded_phonemes); | |||
| //printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||
| strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||
| } | |||
| // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||
| strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||
| } | |||
| else | |||
| { | |||
| @@ -473,6 +487,12 @@ step=1; // TEST | |||
| } | |||
| } | |||
| if(text_not_phonemes != translator->langopts.textmode) | |||
| { | |||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||
| } | |||
| if(sscanf(word,"U+%x",&wc) == 1) | |||
| { | |||
| // Character code | |||
| @@ -1599,6 +1619,7 @@ int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, cha | |||
| char path[sizeof(path_home)+40]; // path_dsource+20 | |||
| error_count = 0; | |||
| error_need_dictionary = 0; | |||
| memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | |||
| debug_flag = flags & 1; | |||
| @@ -241,9 +241,6 @@ int LoadDictionary(Translator *tr, const char *name, int no_error) | |||
| strcpy(dictionary_name,name); // currently loaded dictionary name | |||
| if(no_error) // don't load dictionary, just set the dictionary_name | |||
| return(1); | |||
| // Load a pronunciation data file into memory | |||
| // bytes 0-3: offset to rules data | |||
| // bytes 4-7: number of hash table entries | |||
| @@ -45,12 +45,12 @@ | |||
| #ifdef deleted | |||
| static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2007\n\n" | |||
| static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2009\n\n" | |||
| "Licensed under GNU General Public License version 3\n" | |||
| "http://espeak.sourceforge.net/"; | |||
| #endif | |||
| static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2007<br>" | |||
| static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2009<br>" | |||
| "<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>" | |||
| "Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>"; | |||
| @@ -585,6 +585,7 @@ void MyFrame::OnTools(wxCommandEvent& event) | |||
| sprintf(fname_log,"%s%s",path_dsource,"dict_log"); | |||
| log = fopen(fname_log,"w"); | |||
| LoadDictionary(translator, dictionary_name, 0); | |||
| if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0) | |||
| { | |||
| wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal)); | |||
| @@ -576,7 +576,7 @@ void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_b | |||
| ph_stress[0] = phonSTRESS_P; | |||
| ph_stress[1] = 0; | |||
| for(p=(unsigned char *)ph_buf3; *p != 0; p++) | |||
| for(p=(unsigned char *)ph_buf3; (*p != 0) && (phoneme_tab[*p] != NULL); p++) | |||
| { | |||
| if(phoneme_tab[*p]->type == phSTRESS) | |||
| ph_stress[0] = 0; // stress is already marked | |||
| @@ -865,6 +865,7 @@ static const char *M_Variant(int value) | |||
| static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out) | |||
| {//======================================================================================================= | |||
| // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr | |||
| int found; | |||
| int found_value=0; | |||
| char string[12]; | |||
| @@ -874,11 +875,20 @@ static int LookupThousands(Translator *tr, int value, int thousandplex, int thou | |||
| ph_of[0] = 0; | |||
| // first look for a match with the exact value of thousands | |||
| if(thousands_exact) | |||
| if(thousands_exact & 1) | |||
| { | |||
| // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||
| sprintf(string,"_%dM%dx",value,thousandplex); | |||
| found_value = Lookup(tr, string, ph_thousands); | |||
| if(thousands_exact & 2) | |||
| { | |||
| // ordinal number | |||
| sprintf(string,"_%dM%do",value,thousandplex); | |||
| found_value = Lookup(tr, string, ph_thousands); | |||
| } | |||
| if(!found_value) | |||
| { | |||
| // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||
| sprintf(string,"_%dM%dx",value,thousandplex); | |||
| found_value = Lookup(tr, string, ph_thousands); | |||
| } | |||
| } | |||
| if(found_value == 0) | |||
| { | |||
| @@ -894,11 +904,20 @@ static int LookupThousands(Translator *tr, int value, int thousandplex, int thou | |||
| } | |||
| found = 0; | |||
| if(thousands_exact) | |||
| if(thousands_exact & 1) | |||
| { | |||
| // is there a different pronunciation if there are no hundreds,tens,or units ? | |||
| sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||
| found = Lookup(tr, string, ph_thousands); | |||
| if(thousands_exact & 2) | |||
| { | |||
| // ordinal number | |||
| sprintf(string,"_%s%do",M_Variant(value), thousandplex); | |||
| found = Lookup(tr, string, ph_thousands); | |||
| } | |||
| if(!found) | |||
| { | |||
| // is there a different pronunciation if there are no hundreds,tens,or units ? | |||
| sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||
| found = Lookup(tr, string, ph_thousands); | |||
| } | |||
| } | |||
| if(found == 0) | |||
| { | |||
| @@ -994,7 +1013,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out) | |||
| { | |||
| units = (value % 10); | |||
| if((control & 1) && ((units == 0) || (tr->langopts.numbers & 0x10))) | |||
| if((control & 1) && ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||
| { | |||
| sprintf(string,"_%dXo",value / 10); | |||
| if(Lookup(tr, string, ph_tens) != 0) | |||
| @@ -1026,7 +1045,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out) | |||
| sprintf(string,"_%df",units); | |||
| found = Lookup(tr, string, ph_digits); | |||
| } | |||
| if((control & 1) && ((tr->langopts.numbers & 0x10) == 0)) | |||
| if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0)) | |||
| { | |||
| // ordinal | |||
| sprintf(string,"_%do",units); | |||
| @@ -1046,16 +1065,16 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out) | |||
| if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) | |||
| { | |||
| if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & 0x10))) | |||
| if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||
| Lookup(tr, "_ord20", ph_ordinal); | |||
| if(ph_ordinal[0] == 0) | |||
| Lookup(tr, "_ord", ph_ordinal); | |||
| } | |||
| if((tr->langopts.numbers & 0x30) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||
| if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||
| { | |||
| Lookup(tr, "_0and", ph_and); | |||
| if(tr->langopts.numbers & 0x10) | |||
| if(tr->langopts.numbers & NUM_SWAP_TENS) | |||
| sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal); | |||
| else | |||
| sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal); | |||
| @@ -1063,7 +1082,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out) | |||
| } | |||
| else | |||
| { | |||
| if(tr->langopts.numbers & 0x200) | |||
| if(tr->langopts.numbers & NUM_SINGLE_VOWEL) | |||
| { | |||
| // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | |||
| if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) | |||
| @@ -1078,7 +1097,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out) | |||
| sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal); | |||
| } | |||
| if(tr->langopts.numbers & 0x100) | |||
| if(tr->langopts.numbers & NUM_SINGLE_STRESS) | |||
| { | |||
| // only one primary stress | |||
| found = 0; | |||
| @@ -1107,6 +1126,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| int tensunits; | |||
| int x; | |||
| int exact; | |||
| int ordinal; | |||
| char string[12]; // for looking up entries in **_list | |||
| char buf1[100]; | |||
| char buf2[100]; | |||
| @@ -1117,6 +1137,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| char ph_hundred_and[12]; | |||
| char ph_thousand_and[12]; | |||
| ordinal = control & 2; | |||
| hundreds = value / 100; | |||
| tensunits = value % 100; | |||
| buf1[0] = 0; | |||
| @@ -1127,7 +1148,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| ph_thousand_and[0] = 0; | |||
| found = 0; | |||
| if((control & 2) && (tensunits == 0)) | |||
| if(ordinal && (tensunits == 0)) | |||
| { | |||
| // ordinal number, with no tens or units | |||
| found = Lookup(tr, "_0Co", ph_100); | |||
| @@ -1137,7 +1158,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| Lookup(tr, "_0C", ph_100); | |||
| } | |||
| if(((tr->langopts.numbers & 0x0800) != 0) && (hundreds == 19)) | |||
| if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19)) | |||
| { | |||
| // speak numbers such as 1984 as years: nineteen-eighty-four | |||
| // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and" | |||
| @@ -1151,7 +1172,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| if ((value % 1000) == 0) | |||
| exact = 1; | |||
| if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact, ph_10T) == 0) | |||
| if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0) | |||
| { | |||
| x = 0; | |||
| if(tr->langopts.numbers2 & (1 << (thousandplex+1))) | |||
| @@ -1173,7 +1194,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| ph_digits[0] = 0; | |||
| if(hundreds > 0) | |||
| { | |||
| if((tr->langopts.numbers & 0x100000) && ((control & 1) || (ph_thousands[0] != 0))) | |||
| if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0))) | |||
| { | |||
| Lookup(tr, "_0and", ph_thousand_and); | |||
| } | |||
| @@ -1184,8 +1205,18 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| if(tensunits == 0) | |||
| { | |||
| // is there a special pronunciation for exactly n00 ? | |||
| sprintf(string,"_%dC0",hundreds); | |||
| found = Lookup(tr, string, ph_digits); | |||
| if(ordinal) | |||
| { | |||
| // ordinal number | |||
| sprintf(string, "_%dCo", hundreds); | |||
| found = Lookup(tr, string, ph_digits); | |||
| } | |||
| if(!found) | |||
| { | |||
| sprintf(string,"_%dC0",hundreds); | |||
| found = Lookup(tr, string, ph_digits); | |||
| } | |||
| } | |||
| if(!found) | |||
| { | |||
| @@ -1210,7 +1241,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| } | |||
| ph_hundred_and[0] = 0; | |||
| if((tr->langopts.numbers & 0x40) && (tensunits != 0)) | |||
| if((tr->langopts.numbers & NUM_HUNDRED_AND) && (tensunits != 0)) | |||
| { | |||
| if((value > 100) || ((control & 1) && (thousandplex==0))) | |||
| { | |||
| @@ -1227,7 +1258,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| if(thousandplex==0) | |||
| { | |||
| x = 2; // allow "eins" for 1 rather than "ein" | |||
| if(control & 2) | |||
| if(ordinal) | |||
| x = 3; // ordinal number | |||
| if((value < 100) && !(control & 1)) | |||
| x |= 4; // tens and units only, no higher digits | |||
| @@ -1240,7 +1271,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null | |||
| if(LookupNum2(tr, tensunits, x, buf2) != 0) | |||
| { | |||
| if(tr->langopts.numbers & 0x80) | |||
| if(tr->langopts.numbers & NUM_SINGLE_AND) | |||
| ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units | |||
| } | |||
| } | |||
| @@ -1293,7 +1324,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| value = this_value = atoi(word); | |||
| ph_ordinal2[0] = 0; | |||
| if((tr->langopts.numbers & 0x10000) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||
| if((tr->langopts.numbers & NUM_ORDINAL_DOT) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||
| { | |||
| // ordinal number is indicated by dot after the number | |||
| ordinal = 2; | |||
| @@ -1352,7 +1383,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| prev_thousands = 1; | |||
| } | |||
| else | |||
| if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & 0x1000)) | |||
| if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE)) | |||
| { | |||
| // thousands groups can be separated by spaces | |||
| if((n_digits == 3) && isdigit(word[-2])) | |||
| @@ -1373,7 +1404,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| } | |||
| } | |||
| if((tr->langopts.numbers & 0x1000) && (word[n_digits] == ' ')) | |||
| if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' ')) | |||
| thousands_inc = 1; | |||
| else | |||
| if(word[n_digits] == tr->langopts.thousands_sep) | |||
| @@ -1462,9 +1493,9 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| max_decimal_count = 2; | |||
| switch(decimal_mode = (tr->langopts.numbers & 0xe000)) | |||
| { | |||
| case 0x8000: | |||
| case NUM_DFRACTION_4: | |||
| max_decimal_count = 5; | |||
| case 0x4000: | |||
| case NUM_DFRACTION_2: | |||
| // French/Polish decimal fraction | |||
| while(word[n_digits] == '0') | |||
| { | |||
| @@ -1481,8 +1512,8 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| } | |||
| break; | |||
| case 0x2000: // italian, say "hundredths" is leading zero | |||
| case 0xa000: // hungarian, always say "tenths" etc. | |||
| case NUM_DFRACTION_1: // italian, say "hundredths" is leading zero | |||
| case NUM_DFRACTION_5: // hungarian, always say "tenths" etc. | |||
| if(decimal_count <= 4) | |||
| { | |||
| LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | |||
| @@ -1500,7 +1531,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| } | |||
| break; | |||
| case 0x6000: | |||
| case NUM_DFRACTION_3: | |||
| // Romanian decimal fractions | |||
| if((decimal_count <= 4) && (word[n_digits] != '0')) | |||
| { | |||
| @@ -1560,7 +1591,7 @@ int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *fla | |||
| if(option_sayas == SAYAS_DIGITS1) | |||
| return(0); // speak digits individually | |||
| if((tr->langopts.numbers & 0x3) == 1) | |||
| if(tr->langopts.numbers != 0) | |||
| return(TranslateNumber_1(tr, word1, ph_out, flags, wflags)); | |||
| return(0); | |||
| @@ -67,8 +67,10 @@ static const char *punct_stop = ".:!?"; // pitch fall if followed by space | |||
| static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum | |||
| // alter tone for announce punctuation or capitals | |||
| static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||
| static const char *tone_punct_off = "\001T"; | |||
| //static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||
| //static const char *tone_punct_off = "\001T\001P"; | |||
| static const char *tone_punct_on = ""; // add reverberation, lower pitch TEST apply no effect | |||
| static const char *tone_punct_off = ""; | |||
| // ignore these characters | |||
| static const unsigned short chars_ignore[] = { | |||
| @@ -1903,6 +1905,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| int c1=' '; // current character | |||
| int c2; // next character | |||
| int cprev=' '; // previous character | |||
| int cprev2=' '; | |||
| int parag; | |||
| int ix = 0; | |||
| int j; | |||
| @@ -1916,6 +1919,8 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| int any_alnum = 0; | |||
| int self_closing; | |||
| int punct_data; | |||
| int is_punctuation; | |||
| int save_c2; | |||
| int stressed_word = 0; | |||
| const char *p; | |||
| wchar_t xml_buf[N_XML_BUF+1]; | |||
| @@ -1975,6 +1980,7 @@ f_input = f_in; // for GetC etc | |||
| } | |||
| } | |||
| cprev2 = cprev; | |||
| cprev = c1; | |||
| c1 = c2; | |||
| @@ -2324,9 +2330,9 @@ if(option_ssml) parag=1; | |||
| if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | |||
| { | |||
| // note: (c2='?') is for when a smart-quote has been replaced by '?' | |||
| buf[ix] = ' '; | |||
| buf[ix+1] = 0; | |||
| is_punctuation = 1; | |||
| if((c1 == '.') && (cprev == '.')) | |||
| { | |||
| c1 = 0x2026; | |||
| @@ -2334,45 +2340,59 @@ if(option_ssml) parag=1; | |||
| } | |||
| nl_count = 0; | |||
| while(!Eof() && iswspace(c2)) | |||
| { | |||
| if(c2 == '\n') | |||
| nl_count++; | |||
| c2 = GetC(); // skip past space(s) | |||
| } | |||
| if(!Eof()) | |||
| save_c2 = c2; | |||
| if(iswspace(c2)) | |||
| { | |||
| UngetC(c2); | |||
| while(!Eof() && iswspace(c2)) | |||
| { | |||
| if(c2 == '\n') | |||
| nl_count++; | |||
| c2 = GetC(); // skip past space(s) | |||
| } | |||
| if(!Eof()) | |||
| { | |||
| UngetC(c2); | |||
| } | |||
| } | |||
| if((nl_count==0) && (c1 == '.')) | |||
| { | |||
| if(iswdigit(cprev) && (tr->langopts.numbers & 0x10000) && islower(c2)) | |||
| // if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT) && islower(c2)) | |||
| if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT)) | |||
| { | |||
| // dot after a number indicates an ordinal number | |||
| c2 = '.'; | |||
| continue; | |||
| is_punctuation = 0; | |||
| } | |||
| else | |||
| if(iswlower(c2)) | |||
| { | |||
| c2 = ' '; | |||
| continue; // next word has no capital letter, this dot is probably from an abbreviation | |||
| // next word has no capital letter, this dot is probably from an abbreviation | |||
| c1 = ' '; | |||
| is_punctuation = 0; | |||
| } | |||
| if(any_alnum==0) | |||
| { | |||
| c2 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||
| continue; | |||
| c1 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||
| is_punctuation = 0; | |||
| } | |||
| } | |||
| punct_data = punct_attributes[punct]; | |||
| if(nl_count > 1) | |||
| c2 = save_c2; | |||
| if(is_punctuation) | |||
| { | |||
| if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||
| return(punct_data + 35); // with a longer pause | |||
| return(CLAUSE_PARAGRAPH); | |||
| buf[ix] = ' '; | |||
| buf[ix+1] = 0; | |||
| punct_data = punct_attributes[punct]; | |||
| if(nl_count > 1) | |||
| { | |||
| if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||
| return(punct_data + 35); // with a longer pause | |||
| return(CLAUSE_PARAGRAPH); | |||
| } | |||
| return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||
| } | |||
| return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||
| } | |||
| } | |||
| @@ -35,7 +35,7 @@ | |||
| #include "translate.h" | |||
| #include "wave.h" | |||
| const char *version_string = "1.41.08 04.Oct.09"; | |||
| const char *version_string = "1.41.11 09.Oct.09"; | |||
| const int version_phdata = 0x014100; | |||
| int option_device_number = -1; | |||
| @@ -48,6 +48,7 @@ | |||
| #define OFFSET_ARMENIAN 0x530 | |||
| #define OFFSET_DEVANAGARI 0x900 | |||
| #define OFFSET_BENGALI 0x980 | |||
| #define OFFSET_GURMUKHI 0xa00 | |||
| #define OFFSET_TAMIL 0xb80 | |||
| #define OFFSET_KANNADA 0xc80 | |||
| #define OFFSET_MALAYALAM 0xd00 | |||
| @@ -165,6 +166,7 @@ static Translator* NewTranslator(void) | |||
| tr->langopts.max_roman = 49; | |||
| tr->langopts.thousands_sep = ','; | |||
| tr->langopts.decimal_sep = '.'; | |||
| tr->langopts.break_numbers = BREAK_THOUSANDS; // 1000, 1000,000 1,000,000 etc | |||
| memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone)); | |||
| @@ -263,7 +265,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.param[LOPT_PREFIXES] = 1; | |||
| SetLetterVowel(tr,'y'); // add 'y' to vowels | |||
| tr->langopts.numbers = 0x8d1 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_ROMAN | NUM_1900; | |||
| tr->langopts.accents = 1; | |||
| } | |||
| break; | |||
| @@ -283,7 +285,7 @@ Translator *SelectTranslator(const char *name) | |||
| SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama | |||
| tr->langopts.numbers = 0x1; | |||
| tr->langopts.numbers2 = NUM2_100000; | |||
| tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||
| } | |||
| break; | |||
| @@ -305,7 +307,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | |||
| tr->langopts.numbers = 0x401; | |||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||
| SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants | |||
| SetLetterVowel(tr,'y'); | |||
| @@ -319,7 +321,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.stress_rule = 0; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x10c59; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_ORDINAL_DOT | NUM_1900; | |||
| } | |||
| break; | |||
| @@ -333,7 +335,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.param[LOPT_PREFIXES] = 1; | |||
| memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | |||
| tr->langopts.numbers = 0x11419 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN; | |||
| SetLetterVowel(tr,'y'); | |||
| } | |||
| break; | |||
| @@ -344,7 +346,7 @@ Translator *SelectTranslator(const char *name) | |||
| SetupTranslator(tr,stress_lengths_en,NULL); | |||
| tr->langopts.stress_rule = 0; | |||
| tr->langopts.numbers = 0x841 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_ROMAN | NUM_1900; | |||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | |||
| } | |||
| break; | |||
| @@ -381,7 +383,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | |||
| tr->langopts.numbers = 0x109; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA; | |||
| tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | |||
| if(name2 == L_grc) | |||
| @@ -410,7 +412,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.unstressed_wd1 = 3; | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.numbers = 0x1409 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ROMAN; | |||
| } | |||
| break; | |||
| @@ -435,7 +437,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | |||
| tr->langopts.numbers = 0x529 + NUM_ROMAN + NUM_ROMAN_AFTER; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_ROMAN | NUM_ROMAN_AFTER; | |||
| if(name2 == L('c','a')) | |||
| { | |||
| @@ -457,7 +459,7 @@ Translator *SelectTranslator(const char *name) | |||
| static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 }; | |||
| SetupTranslator(tr,stress_lengths_eu,stress_amps_eu); | |||
| tr->langopts.stress_rule = 1; // ?? second syllable ?? | |||
| tr->langopts.numbers = 0x569 + NUM_VIGESIMAL; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_VIGESIMAL; | |||
| } | |||
| break; | |||
| @@ -474,7 +476,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.param[LOPT_IT_DOUBLING] = 1; | |||
| tr->langopts.long_stop = 130; | |||
| tr->langopts.numbers = 0x1009; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA + NUM_ALLOW_SPACE; | |||
| SetLetterVowel(tr,'y'); | |||
| // tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3 | |||
| tr->langopts.spelling_stress = 1; | |||
| @@ -493,7 +495,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.stress_flags = 0x0024; // don't use secondary stress | |||
| tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables | |||
| tr->langopts.numbers = 0x1509 + 0x8000 + NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL | NUM_DFRACTION_4; | |||
| SetLetterVowel(tr,'y'); | |||
| } | |||
| break; | |||
| @@ -508,6 +510,7 @@ Translator *SelectTranslator(const char *name) | |||
| case L('h','i'): // Hindi | |||
| case L('n','e'): // Nepali | |||
| case L('p','a'): // Punjabi | |||
| { | |||
| static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | |||
| static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 }; | |||
| @@ -518,9 +521,15 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
| tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
| tr->langopts.numbers = 0x011; | |||
| tr->langopts.numbers2 = NUM2_100000; | |||
| tr->langopts.numbers = NUM_SWAP_TENS; | |||
| tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||
| tr->letter_bits_offset = OFFSET_DEVANAGARI; | |||
| if(name2 == L('p','a')) | |||
| { | |||
| tr->langopts.numbers = 0; // no number rules yet | |||
| tr->letter_bits_offset = OFFSET_GURMUKHI; | |||
| } | |||
| SetIndicLetters(tr); | |||
| } | |||
| break; | |||
| @@ -547,7 +556,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.spelling_stress = 1; | |||
| tr->langopts.accents = 1; | |||
| tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_HUNDRED_AND | NUM_DECIMAL_COMMA | NUM_THOUS_SPACE | NUM_DFRACTION_2 | NUM_ROMAN_UC; | |||
| tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | |||
| tr->langopts.replace_chars = replace_cyrillic_latin; | |||
| @@ -573,7 +582,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.param[LOPT_IT_DOUBLING] = 1; | |||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | |||
| tr->langopts.numbers = 0x1009 + 0xa000 + NUM_ROMAN + NUM_ROMAN_ORDINAL + NUM_ORDINAL_DOT + NUM_OMIT_1_HUNDRED; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_5 | NUM_ROMAN | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT | NUM_OMIT_1_HUNDRED; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.spelling_stress = 1; | |||
| SetLengthMods(tr,3); // all equal | |||
| @@ -595,7 +604,7 @@ SetLengthMods(tr,3); // all equal | |||
| SetLetterBits(tr,LETTERGP_A,hy_vowels); | |||
| SetLetterBits(tr,LETTERGP_C,hy_consonants); | |||
| tr->langopts.max_initial_consonants = 6; | |||
| tr->langopts.numbers = 0x409; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||
| // tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
| } | |||
| break; | |||
| @@ -607,7 +616,7 @@ SetLengthMods(tr,3); // all equal | |||
| SetupTranslator(tr,stress_lengths_id,stress_amps_id); | |||
| tr->langopts.stress_rule = 2; | |||
| tr->langopts.numbers = 0x1009 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_ROMAN; | |||
| tr->langopts.stress_flags = 0x6 | 0x10; | |||
| tr->langopts.accents = 2; // "capital" after letter name | |||
| } | |||
| @@ -629,7 +638,7 @@ SetLengthMods(tr,3); // all equal | |||
| SetLetterBits(tr,3,"jvr"); // Letter group H | |||
| tr->letter_groups[1] = is_lettergroup_B; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x8e9; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SINGLE_AND | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_1900; | |||
| tr->langopts.numbers2 = 0x2; | |||
| } | |||
| break; | |||
| @@ -652,7 +661,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | |||
| tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | |||
| tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2 | |||
| tr->langopts.numbers = 0x2709 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_SINGLE_VOWEL | NUM_OMIT_1_HUNDRED |NUM_DECIMAL_COMMA | NUM_ROMAN | NUM_DFRACTION_1; | |||
| tr->langopts.accents = 2; // Say "Capital" after the letter. | |||
| SetLetterVowel(tr,'y'); | |||
| } | |||
| @@ -686,7 +695,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable | |||
| tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
| tr->langopts.numbers = 0x0401; | |||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||
| } | |||
| break; | |||
| @@ -700,7 +709,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
| tr->langopts.numbers = 0x100461; | |||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED; | |||
| tr->langopts.max_initial_consonants = 2; | |||
| } | |||
| break; | |||
| @@ -713,7 +722,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.unstressed_wd1 = 0; | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.param[LOPT_DIERESES] = 1; | |||
| tr->langopts.numbers = 0x1 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_ROMAN; | |||
| tr->langopts.max_roman = 5000; | |||
| } | |||
| break; | |||
| @@ -728,7 +737,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 0; | |||
| tr->langopts.spelling_stress = 1; | |||
| tr->charset_a0 = charsets[4]; // ISO-8859-4 | |||
| tr->langopts.numbers = 0x409 + 0x8000 + 0x10000; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT; | |||
| tr->langopts.stress_flags = 0x16 + 0x40000; | |||
| } | |||
| break; | |||
| @@ -745,7 +754,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->letter_groups[0] = vowels_cyrillic; | |||
| tr->langopts.stress_rule = 4; // antipenultimate | |||
| tr->langopts.numbers = 0x0429 + 0x4000; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||
| tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | |||
| } | |||
| break; | |||
| @@ -761,7 +770,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.param[LOPT_PREFIXES] = 1; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x11c19; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT; | |||
| memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | |||
| } | |||
| break; | |||
| @@ -773,7 +782,7 @@ SetLengthMods(tr,3); // all equal | |||
| SetupTranslator(tr,stress_lengths_no,NULL); | |||
| tr->langopts.stress_rule = 0; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x11849; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_ALLOW_SPACE | NUM_1900 + NUM_ORDINAL_DOT; | |||
| } | |||
| break; | |||
| @@ -784,7 +793,7 @@ SetLengthMods(tr,3); // all equal | |||
| SetupTranslator(tr,stress_lengths_om,stress_amps_om); | |||
| tr->langopts.stress_rule = 2; | |||
| tr->langopts.stress_flags = 0x16 + 0x80000; | |||
| tr->langopts.stress_flags = 2 + NUM_SWAP_TENS | NUM_THOUS_SPACE | NUM_NOPAUSE; //?? | |||
| } | |||
| break; | |||
| @@ -800,8 +809,8 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | |||
| tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | |||
| tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | |||
| tr->langopts.numbers=0x1009 + 0x4000; | |||
| tr->langopts.numbers2=0x40; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_2; | |||
| tr->langopts.numbers2 = 0x40; | |||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt) | |||
| SetLetterVowel(tr,'y'); | |||
| } | |||
| @@ -817,7 +826,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 3; // stress on final syllable | |||
| tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | |||
| tr->langopts.numbers = 0x069 + 0x4000 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_DFRACTION_2 | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_ROMAN; | |||
| SetLetterVowel(tr,'y'); | |||
| ResetLetterBits(tr,0x2); | |||
| SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y | |||
| @@ -835,7 +844,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_flags = 0x100 + 0x6; | |||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
| tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN; | |||
| tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | |||
| } | |||
| break; | |||
| @@ -850,7 +859,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_flags = 0x16; | |||
| tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | |||
| tr->langopts.numbers = 0x61 + 0x100000 + 0x4000; | |||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_2 | NUM_AND_HUNDRED; | |||
| tr->langopts.numbers2 = 0x200; // say "thousands" before its number | |||
| } | |||
| break; | |||
| @@ -870,7 +879,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.spelling_stress = 1; | |||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | |||
| tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN; | |||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ROMAN; | |||
| tr->langopts.numbers2 = 0x100; | |||
| tr->langopts.thousands_sep = 0; //no thousands separator | |||
| tr->langopts.decimal_sep = ','; | |||
| @@ -897,7 +906,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 2; | |||
| tr->langopts.stress_flags = 0x16 + 0x100; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x69 + 0x8000; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_4; | |||
| tr->langopts.accents = 2; // "capital" after letter name | |||
| } | |||
| break; | |||
| @@ -911,7 +920,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 0; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0x1909; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_1900; | |||
| tr->langopts.accents = 1; | |||
| } | |||
| break; | |||
| @@ -928,8 +937,8 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 2; | |||
| tr->langopts.stress_flags = 0x6 | 0x10; | |||
| tr->langopts.numbers = 0x4e1; | |||
| tr->langopts.numbers2 = NUM2_100000a; | |||
| tr->langopts.numbers = NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_OMIT_1_HUNDRED; | |||
| tr->langopts.break_numbers = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||
| } | |||
| break; | |||
| @@ -946,7 +955,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 0; | |||
| tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
| tr->langopts.numbers2 = NUM2_100000; | |||
| tr->langopts.break_numbers = 0x24a8; // 1000, 100,000 10,000,000 | |||
| if(name2 == L('t','a')) | |||
| { | |||
| @@ -1002,7 +1011,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
| tr->langopts.stress_flags = 0x20; //no automatic secondary stress | |||
| tr->langopts.numbers = 0x1509 + 0x4000; | |||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||
| tr->langopts.max_initial_consonants = 2; | |||
| } | |||
| break; | |||
| @@ -1034,7 +1043,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->letter_groups[0] = vowels_vi; | |||
| tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches() | |||
| tr->langopts.unstressed_wd1 = 2; | |||
| tr->langopts.numbers = 0x0049 + 0x8000; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_DFRACTION_4; | |||
| } | |||
| break; | |||
| @@ -1070,13 +1079,13 @@ SetLengthMods(tr,3); // all equal | |||
| tr->translator_name = name2; | |||
| if(tr->langopts.numbers & 0x8) | |||
| if(tr->langopts.numbers & NUM_DECIMAL_COMMA) | |||
| { | |||
| // use . and ; for thousands and decimal separators | |||
| tr->langopts.thousands_sep = '.'; | |||
| tr->langopts.decimal_sep = ','; | |||
| } | |||
| if(tr->langopts.numbers & 0x4) | |||
| if(tr->langopts.numbers & NUM_THOUS_SPACE) | |||
| { | |||
| tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space | |||
| } | |||
| @@ -1128,7 +1137,7 @@ static void Translator_Russian(Translator *tr) | |||
| tr->langopts.stress_rule = 5; | |||
| tr->langopts.stress_flags = 0x0020; // waas 0x1010 | |||
| tr->langopts.numbers = 0x0409; | |||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||
| tr->langopts.numbers2 = 0xc2; // variant numbers before thousands | |||
| tr->langopts.phoneme_change = 1; | |||
| tr->langopts.testing = 2; | |||
| @@ -366,6 +366,10 @@ int IsAlpha(unsigned int c) | |||
| {//======================== | |||
| // Replacement for iswalph() which also checks for some in-word symbols | |||
| const unsigned short extra_indic_alphas[] = { | |||
| 0xa70,0xa71, // Gurmukhi: tippi, addak | |||
| 0 }; | |||
| if(iswalpha(c)) | |||
| return(1); | |||
| @@ -374,6 +378,8 @@ int IsAlpha(unsigned int c) | |||
| // Indic scripts: Devanagari, Tamil, etc | |||
| if((c & 0x7f) < 0x64) | |||
| return(1); | |||
| if(lookupwchar(extra_indic_alphas, c) != 0) | |||
| return(1); | |||
| return(0); | |||
| } | |||
| @@ -2570,10 +2576,6 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
| int c_temp; | |||
| char *pn; | |||
| char *pw; | |||
| static unsigned int break_numbers1 = 0x49249248; | |||
| static unsigned int break_numbers2 = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||
| static unsigned int break_numbers3 = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||
| unsigned int break_numbers; | |||
| char number_buf[80]; | |||
| // start speaking at a specified word position in the text? | |||
| @@ -2591,7 +2593,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
| // digits should have been converted to Latin alphabet ('0' to '9') | |||
| word = pw = &sbuf[words[ix].start]; | |||
| if(iswdigit(word[0]) && (tr->langopts.numbers2 & NUM2_100000)) | |||
| if(iswdigit(word[0]) && (tr->langopts.break_numbers != BREAK_THOUSANDS)) | |||
| { | |||
| // Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | |||
| pn = number_buf; | |||
| @@ -2628,34 +2630,26 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
| pn = &number_buf[1]; | |||
| nx = n_digits; | |||
| if((tr->langopts.numbers2 & NUM2_100000a) == NUM2_100000a) | |||
| break_numbers = break_numbers3; | |||
| else | |||
| if(tr->langopts.numbers2 & NUM2_100000) | |||
| break_numbers = break_numbers2; | |||
| else | |||
| break_numbers = break_numbers1; | |||
| while(pn < &number_buf[sizeof(number_buf)-3]) | |||
| { | |||
| if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep)) | |||
| break; | |||
| *pn++ = c; | |||
| if((--nx > 0) && (break_numbers & (1 << nx))) | |||
| if((--nx > 0) && (tr->langopts.break_numbers & (1 << nx))) | |||
| { | |||
| if(tr->langopts.thousands_sep != ' ') | |||
| { | |||
| *pn++ = tr->langopts.thousands_sep; | |||
| } | |||
| *pn++ = ' '; | |||
| if(break_numbers & (1 << (nx-1))) | |||
| if(tr->langopts.break_numbers & (1 << (nx-1))) | |||
| { | |||
| // the next group only has 1 digits (i.e. NUM2_10000), make it three | |||
| *pn++ = '0'; | |||
| *pn++ = '0'; | |||
| } | |||
| if(break_numbers & (1 << (nx-2))) | |||
| if(tr->langopts.break_numbers & (1 << (nx-2))) | |||
| { | |||
| // the next group only has 2 digits (i.e. NUM2_10000), make it three | |||
| *pn++ = '0'; | |||
| @@ -328,12 +328,27 @@ typedef struct { | |||
| unsigned char *length_mods; | |||
| unsigned char *length_mods0; | |||
| #define NUM_THOUS_SPACE 0x4 | |||
| #define NUM_DECIMAL_COMMA 0x8 | |||
| #define NUM_SWAP_TENS 0x10 | |||
| #define NUM_AND_UNITS 0x20 | |||
| #define NUM_HUNDRED_AND 0x40 | |||
| #define NUM_SINGLE_AND 0x80 | |||
| #define NUM_SINGLE_STRESS 0x100 | |||
| #define NUM_SINGLE_VOWEL 0x200 | |||
| #define NUM_OMIT_1_HUNDRED 0x400 | |||
| #define NUM_19_HUNDRED 0x800 | |||
| #define NUM_1900 0x800 | |||
| #define NUM_ALLOW_SPACE 0x1000 | |||
| #define NUM_DFRACTION_1 0x2000 | |||
| #define NUM_DFRACTION_2 0x4000 | |||
| #define NUM_DFRACTION_3 0x6000 | |||
| #define NUM_DFRACTION_4 0x8000 | |||
| #define NUM_DFRACTION_5 0xa000 | |||
| #define NUM_ORDINAL_DOT 0x10000 | |||
| #define NUM_ROMAN 0x20000 | |||
| #define NUM_ROMAN_UC 0x40000 | |||
| #define NUM_NOPAUSE 0x80000 | |||
| #define NUM_AND_HUNDRED 0x100000 | |||
| #define NUM_ROMAN_AFTER 0x200000 | |||
| #define NUM_VIGESIMAL 0x400000 | |||
| #define NUM_ROMAN_ORDINAL 0x800000 | |||
| @@ -361,17 +376,15 @@ typedef struct { | |||
| // bit23=Roman numbers are ordinal numbers | |||
| int numbers; | |||
| #define NUM2_100000 0x800 // numbers for 100,000 and 10,000,000 | |||
| #define NUM2_100000a 0xc00 // numbers for 100,000 and 1,000,000 | |||
| // bits 1-4 use variant form of numbers before thousands,millions,etc. | |||
| // bit6=(LANG=pl) two forms of plural, M or MA | |||
| // bit7=(LANG-ru) use MB for 1 thousand, million, etc | |||
| // bit8=(LANG=cs,sk) two forms of plural, M or MA | |||
| // bit9=(LANG=rw) say "thousand" and "million" before its number, not after | |||
| // bit10=(LANG=sw) special word for 100,000 and 1,000,000 | |||
| // bit11=(LANG=hi) special word for 100,000 and 10,000,000 | |||
| int numbers2; | |||
| #define BREAK_THOUSANDS 0x49249248 | |||
| int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000) | |||
| int max_roman; | |||
| int thousands_sep; | |||
| int decimal_sep; | |||
| @@ -794,12 +794,15 @@ static void WavegenSetEcho(void) | |||
| amp = embedded_value[EMBED_H]; | |||
| delay = 130; | |||
| } | |||
| #ifdef deleted | |||
| if(embedded_value[EMBED_T] > 0) | |||
| { | |||
| // announcing punctuation | |||
| // announcing punctuation, add a small echo | |||
| // This seems unpopular | |||
| amp = embedded_value[EMBED_T] * 8; | |||
| delay = 60; | |||
| } | |||
| #endif | |||
| if(delay == 0) | |||
| amp = 0; | |||