Fixes for ordinal numbers (lang=hu). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@225 d46cf337-b52f-0410-862d-fd96e6ae7743master
| g h j J k l m n | g h j J k l m n | ||||
| N n^ p q R s S S; | N n^ p q R s S S; | ||||
| t tS v w x z | t tS v w x z | ||||
| Dictionary pa_dict | |||||
| a a~ e E e~ E~ i I | |||||
| i: i~ I~ o O o: O~ o~ | |||||
| r- U u u~ U~ V V~ | |||||
| : b bh c ch d d. d.h | |||||
| dh f g gh H j J Jh | |||||
| k kh l l. m n N n. | |||||
| n^ p ph Q R s S t | |||||
| t. t.h th v x z |
| // numeric | // numeric | ||||
| //_0 zero $text // TEST | |||||
| _0 z'i@roU | |||||
| _0 zero $text // TEST | |||||
| //_0 z'i@roU | |||||
| _1 w'0n | _1 w'0n | ||||
| ?6 _1 w'Vn | ?6 _1 w'Vn | ||||
| _2 t'u: | _2 t'u: | ||||
| _80o 'eIti@ | _80o 'eIti@ | ||||
| _90o n'aInti@ | _90o n'aInti@ | ||||
| _0Co h'Vndr@dT | _0Co h'Vndr@dT | ||||
| _0M1o T'aUz@ndT | |||||
| // ABBREVIATIONS | // ABBREVIATIONS |
| // ordinal numbers | // ordinal numbers | ||||
| _ord Edik | _ord Edik | ||||
| _0o n'ullAdik | |||||
| _1ox ElSY: // number = '1' only | _1ox ElSY: // number = '1' only | ||||
| _2ox ma:Sodik // number = '2' only | |||||
| _2ox ma:Sodik // number = '2' only | |||||
| _1o EJ:Edik | _1o EJ:Edik | ||||
| _2o kEt:Edik | _2o kEt:Edik | ||||
| _3o hAR2_mAdik | _3o hAR2_mAdik | ||||
| _80o n^oltsvAnAdik | _80o n^oltsvAnAdik | ||||
| _0Co sa:zAdik | _0Co sa:zAdik | ||||
| _2Co ke:tsa:zAdik | _2Co ke:tsa:zAdik | ||||
| _0M1o EzR2Edik | |||||
| _1M1o EzR2Edik | |||||
| _2M1o ke:tEzR2Edik | |||||
| // These words mean that a dot after number, immediately preceding, does not mean an ordinal number | // These words mean that a dot after number, immediately preceding, does not mean an ordinal number | ||||
| január $alt | január $alt | ||||
| now $alt | now $alt | ||||
| dec $alt | dec $alt | ||||
| //this following 7 lines means the 7 hungarian day name | |||||
| hétfő $alt | |||||
| kedd $alt | |||||
| szerda $alt | |||||
| csütörtök $alt | |||||
| péntek $alt | |||||
| szombat $alt | |||||
| vasárnap $alt | |||||
| // accent names | // accent names | ||||
| _lig ligAtu:R2A | _lig ligAtu:R2A | ||||
| _acu e:lES | _acu e:lES | ||||
| a.m An^n^i||m'int $dot | a.m An^n^i||m'int $dot | ||||
| dr doktoR2 $dot | dr doktoR2 $dot | ||||
| gpu $abbrev | gpu $abbrev | ||||
| mvgyosz $abbrev | |||||
| mvgyosz Emve:Je:oEs | |||||
| OTP $abbrev | OTP $abbrev | ||||
| id idY:SEb: $dot | id idY:SEb: $dot | ||||
| ill illEtvE $dot | ill illEtvE $dot |
| .group b | .group b | ||||
| b b | b b | ||||
| // bb b: | |||||
| bb b: | |||||
| biz (A b'iz, // bizalmas etc. | biz (A b'iz, // bizalmas etc. | ||||
| biedermeier bi:dER2ma:jER2 | biedermeier bi:dER2ma:jER2 | ||||
| ccs tS: | ccs tS: | ||||
| !) cz ts // in names which start with a capital letter | !) cz ts // in names which start with a capital letter | ||||
| ar) csz (e ts | ar) csz (e ts | ||||
| anar) ch (i C | |||||
| harmin) c ts | |||||
| anar) ch (i C | |||||
| hierar) ch (i C | hierar) ch (i C | ||||
| me) ch (a C | me) ch (a C | ||||
| te) ch (n C | te) ch (n C | ||||
| .group d | .group d | ||||
| d d | d d | ||||
| dd d: | |||||
| dts tS: | dts tS: | ||||
| dt t: | dt t: | ||||
| a) dsz ts: | a) dsz ts: | ||||
| cselé) d d | cselé) d d | ||||
| enge) dsz ts: | enge) dsz ts: | ||||
| engedelmeske) dsz ts: | engedelmeske) dsz ts: | ||||
| hazu) dsz ts: | |||||
| mara) dsz ts: | mara) dsz ts: | ||||
| tu) dsz ts: | |||||
| kére) dz ts | kére) dz ts | ||||
| // dd d: | // dd d: | ||||
| dz dz | dz dz | ||||
| dj J: | dj J: | ||||
| min) dny n^ | min) dny n^ | ||||
| .group e | .group e | ||||
| e E | e E | ||||
| D_-_) es (_ %ES | D_-_) es (_ %ES | ||||
| .group g | .group g | ||||
| g g | g g | ||||
| // gg g: | |||||
| gg g: | |||||
| gy J | gy J | ||||
| ggy J: | ggy J: | ||||
| A) gysz (A Js: | A) gysz (A Js: | ||||
| C) gyj J | C) gyj J | ||||
| A) gyj (A J: | A) gyj (A J: | ||||
| ha) gyj J: | |||||
| _e) gy (es J: | _e) gy (es J: | ||||
| _e) gy (et_ J: | _e) gy (et_ J: | ||||
| _e) gy (etlen J: | _e) gy (etlen J: | ||||
| .group l | .group l | ||||
| l l | l l | ||||
| lj j | |||||
| ly j | ly j | ||||
| lly jj | lly jj | ||||
| á) ll (j j | á) ll (j j | ||||
| beszé) lj jj | beszé) lj jj | ||||
| bére) lj jj | bére) lj jj | ||||
| fáj) l l | |||||
| fájla) l (j jj | |||||
| gondo) lj jjj | gondo) lj jjj | ||||
| ke) lj jj | ke) lj jj | ||||
| sajná) lj jj | |||||
| sajná) lj jjj | |||||
| llj jjj | |||||
| C) ly (_ li | C) ly (_ li | ||||
| _kéth) ly li | _kéth) ly li | ||||
| szamue) lly lli | szamue) lly lli | ||||
| sz s | sz s | ||||
| ssz ss2 | ssz ss2 | ||||
| szts stS: | szts stS: | ||||
| föld) s (ánc S | |||||
| s (színű S | s (színű S | ||||
| hel) s (inki z | hel) s (inki z | ||||
| ki) s (ebb SS | ki) s (ebb SS | ||||
| tc ts: | tc ts: | ||||
| tt t: | tt t: | ||||
| tt (C tt | tt (C tt | ||||
| ttn t:n | |||||
| ty c | ty c | ||||
| // s) ty c: // | // s) ty c: // | ||||
| // z) ty c: // keztyű | // z) ty c: // keztyű | ||||
| ttj tc: | ttj tc: | ||||
| tty c: | tty c: | ||||
| apá) ts (ág tS: | |||||
| becsüle) t t | |||||
| cson) t t | |||||
| cson) tj c | |||||
| ezüs) t t | |||||
| éle) t t | éle) t t | ||||
| felej) ts tS | |||||
| já) tsz (ani ts: | |||||
| tsz ts: | tsz ts: | ||||
| szorí) ts (a tS: | szorí) ts (a tS: | ||||
| ürí) ts (e tS: | ürí) ts (e tS: | ||||
| mula) ts tS: | mula) ts tS: | ||||
| nemze) ts tS: | nemze) ts tS: | ||||
| néme) ts (ég tS: | |||||
| min) t t | min) t t | ||||
| tse tSE | tse tSE | ||||
| tso tSo | tso tSo | ||||
| tsá tSa: | tsá tSa: | ||||
| tsd tSd | tsd tSd | ||||
| tsé tSe: | tsé tSe: | ||||
| t (cs t | |||||
| ttsé tS:e: | ttsé tS:e: | ||||
| C) tj c | C) tj c | ||||
| A) tj (A c: | A) tj (A c: | ||||
| _ka) ty (n ti | _ka) ty (n ti | ||||
| vörösmar) ty ti | vörösmar) ty ti | ||||
| á) t (sza t | |||||
| á) t (jár t | á) t (jár t | ||||
| á) t (sző t | á) t (sző t | ||||
| á) t (szú t | á) t (szú t | ||||
| á) t (szű t | á) t (szű t | ||||
| bizo) tts (ág tS: | bizo) tts (ág tS: | ||||
| állí) ts tS: | állí) ts tS: | ||||
| állapo) t (sor t | |||||
| bará) ts tS: | bará) ts tS: | ||||
| dön) ts (ön tS | dön) ts (ön tS | ||||
| elhivato) tts (ág tS: | elhivato) tts (ág tS: | ||||
| kiál) ts tS | kiál) ts tS | ||||
| köve) ts (ég tS: | köve) ts (ég tS: | ||||
| kür) t t | kür) t t | ||||
| kür) tj c | |||||
| korlátozo) tts tS: | |||||
| neve) ts (ég tS: | neve) ts (ég tS: | ||||
| on) ts (u tS | |||||
| szen)t (szék t | szen)t (szék t | ||||
| szé) t t | |||||
| szöve) ts (ég tS: | szöve) ts (ég tS: | ||||
| pillan) ts tS | |||||
| romlo) tts tS: | romlo) tts tS: | ||||
| tar) ts tS | |||||
| á) t (sor t | á) t (sor t | ||||
| á) t (sé t | á) t (sé t | ||||
| ne) t (c t | ne) t (c t | ||||
| ké) t (száz t | ké) t (száz t | ||||
| ö) t (száz t | ö) t (száz t | ||||
| ha) t (száz t | ha) t (száz t | ||||
| hé) t (száz t | |||||
| vé) ts (ég tS: | vé) ts (ég tS: | ||||
| bi) z (tons s | bi) z (tons s | ||||
| bi) z (tos s | bi) z (tos s | ||||
| bron) z (sz z | bron) z (sz z | ||||
| csontvá) z z | |||||
| e) z (t s | e) z (t s | ||||
| ho) z (ta s | ho) z (ta s | ||||
| ho) z (tá s | ho) z (tá s |
| //_xx விழுக்காடு $text // TESTING doesn't reduce vowels | |||||
| // This file is UTF8 encoded | // This file is UTF8 encoded | ||||
| // Spelling to phoneme words and exceptions for Tamil | // Spelling to phoneme words and exceptions for Tamil | ||||
| U+bf9 ru:ba:j | U+bf9 ru:ba:j | ||||
| // abbreviations | // abbreviations | ||||
| ரூ ru:ba:j $dot | |||||
| Rs ru:ba:j $dot | |||||
| ரூ ru:ba:j $dot | |||||
| Rs ru:pi:z $dot | |||||
| // numbers | // numbers | ||||
| _0 suz.ijVm // சுழியம் | _0 suz.ijVm // சுழியம் | ||||
| _0M1 a:jirVttU | _0M1 a:jirVttU | ||||
| _1M1 a:jirVttU | _1M1 a:jirVttU | ||||
| _0M2x lVd.tSVm | |||||
| _0M2x lVd.tSVm // 100,000 | |||||
| _1M2x orUlVd.tSVm | _1M2x orUlVd.tSVm | ||||
| _0M2 lVd.tSVttU | _0M2 lVd.tSVttU | ||||
| _1M2 orUlVd.tSVttU | _1M2 orUlVd.tSVttU | ||||
| _0M3x ko:d.i | |||||
| _0M3x ko:d.i // 10,000,000 | |||||
| _1M3x orUko:d.i | _1M3x orUko:d.i | ||||
| _0M3 ko:d.ie: | _0M3 ko:d.ie: | ||||
| _1M3 orUko:d.ie: | _1M3 orUko:d.ie: | ||||
| _0M4 nu:RUko:d.i // not correct, but should be understandable | |||||
| _1M4 nu:RUko:d.i | |||||
| _0M5 patta:jiRUmko:d.i | |||||
| _1M5 patta:jiRUmko:d.i | |||||
| _0M4x a:jirVmko:d.i // 10,000,000,000 | |||||
| _1M4x a:jirVmko:d.i | |||||
| _0M4 a:jirVmko:d.ie: | |||||
| _1M4 a:jirVmko:d.ie: | |||||
| _0M5 a:jirVma:jirVmko:d.i // 10,000,000,000,000 | |||||
| _1M5 a:jirVma:jirVmko:d.i | |||||
| _dpt _pul.l.i | _dpt _pul.l.i |
| ௭ 7 | ௭ 7 | ||||
| ௮ 8 | ௮ 8 | ||||
| ௯ 9 | ௯ 9 | ||||
| ொ ொ | |||||
| ோ ோ | |||||
| ௌ ௌ | |||||
| . | |||||
| .group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae] | .group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae] | ||||
| ஂ // anusvara | ஂ // anusvara | ||||
| எ e | எ e | ||||
| _) எ ;e // add a short [j] sound at start of word ? | _) எ ;e // add a short [j] sound at start of word ? | ||||
| ஏ e:: | |||||
| ஏ e: | |||||
| _) ஏ ;e:: // add a short [j] sound at start of word ? | _) ஏ ;e:: // add a short [j] sound at start of word ? | ||||
| ஐ aI | ஐ aI | ||||
| ங NV | ங NV | ||||
| ங (B N | ங (B N | ||||
| ச zV | |||||
| ச (B z | |||||
| ச sV // ?? [z] | |||||
| ச (B s | |||||
| _) ச sa | _) ச sa | ||||
| _) ச (B s | _) ச (B s | ||||
| ச்ச tS:V | ச்ச tS:V | ||||
| ச்ச (B tS: | ச்ச (B tS: | ||||
| ற்ச tS: | |||||
| ற்ச (B tS: | |||||
| ட்) ச tSV | ட்) ச tSV | ||||
| ட்) ச (B tS | ட்) ச (B tS | ||||
| ஞ்) ச dZV | ஞ்) ச dZV | ||||
| ப்ப ppV | ப்ப ppV | ||||
| ப்ப (B pp | ப்ப (B pp | ||||
| ட்) ப pV | ட்) ப pV | ||||
| ட்) ப (B pV | |||||
| ட்) ப (B p | |||||
| ற்) ப pV | ற்) ப pV | ||||
| ற்) ப (B p | ற்) ப (B p | ||||
| ஃ) ப fV | ஃ) ப fV | ||||
| ௌ aU | ௌ aU | ||||
| ் // virama | ் // virama | ||||
| ௗ : // aU length mark | ௗ : // aU length mark |
| 60 phoneme tables | |||||
| 61 phoneme tables | |||||
| new total | new total | ||||
| base 103 103 | base 103 103 | ||||
| base2 26 124 | base2 26 124 | ||||
| fi 40 134 | fi 40 134 | ||||
| fr 55 141 | fr 55 141 | ||||
| fr_ca 11 141 | fr_ca 11 141 | ||||
| hi 60 149 | |||||
| ta 20 152 | |||||
| hi 62 151 | |||||
| ta 20 154 | |||||
| hu 23 119 | hu 23 119 | ||||
| lv 29 126 | lv 29 126 | ||||
| nl 28 126 | nl 28 126 | ||||
| hy 24 119 | hy 24 119 | ||||
| da 21 118 | da 21 118 | ||||
| rw 15 131 | rw 15 131 | ||||
| ml 13 151 | |||||
| kn 15 151 | |||||
| bn 59 155 | |||||
| ne 18 157 | |||||
| mr 12 149 | |||||
| ml 13 153 | |||||
| kn 15 153 | |||||
| bn 59 157 | |||||
| ne 18 159 | |||||
| mr 12 151 | |||||
| eu 6 125 | eu 6 125 | ||||
| mn 15 114 | mn 15 114 | ||||
| pa 12 152 | |||||
| Data file Used by | Data file Used by | ||||
| b/b [b] base | b/b [b] base | ||||
| vnasal/i_n [i~] pt | vnasal/i_n [i~] pt | ||||
| [i~] bn | [i~] bn | ||||
| [i:~] bn | [i:~] bn | ||||
| vnasal/i_n2 [i~] hi | |||||
| vnasal/i_n2 [I~] hi | |||||
| [i~] hi | |||||
| vnasal/m- [m-] sw | vnasal/m- [m-] sw | ||||
| vnasal/n- [n-] sw | vnasal/n- [n-] sw | ||||
| vnasal/nn- [N-] sw | vnasal/nn- [N-] sw | ||||
| [o] zh | [o] zh | ||||
| [O~] bn | [O~] bn | ||||
| vnasal/oo_n3 [O~] pl | vnasal/oo_n3 [O~] pl | ||||
| vnasal/u_n [u~] hi | |||||
| vnasal/u_n [U~] hi | |||||
| [u~] hi | |||||
| [u~] pt | [u~] pt | ||||
| [u] zh | [u] zh | ||||
| [u~] bn | [u~] bn | ||||
| [a:] hi | [a:] hi | ||||
| [a:] bn | [a:] bn | ||||
| [a] ne | [a] ne | ||||
| [a] pa | |||||
| vowel/a_en [A] fr | vowel/a_en [A] fr | ||||
| vowel/@_bck [@] hi | vowel/@_bck [@] hi | ||||
| [@/] hi | [@/] hi | ||||
| [@] bn | [@] bn | ||||
| [V] ne | [V] ne | ||||
| [@/] ne | [@/] ne | ||||
| [@] pa | |||||
| vowel/e [e] base2 | vowel/e [e] base2 | ||||
| [e:] en | [e:] en | ||||
| [eI] en_n | [eI] en_n | ||||
| [e:] no | [e:] no | ||||
| [e] bn | [e] bn | ||||
| [e:] bn | [e:] bn | ||||
| [e] pa | |||||
| vowel/e_3 [i] en_n | vowel/e_3 [i] en_n | ||||
| [e:] hu | [e:] hu | ||||
| [e] ku | [e] ku | ||||
| [E] zh | [E] zh | ||||
| [E#] ku | [E#] ku | ||||
| [&] da | [&] da | ||||
| [E] pa | |||||
| vowel/ee#_2 [E-] sv | vowel/ee#_2 [E-] sv | ||||
| [E#] sq | [E#] sq | ||||
| vowel/ee_3 [&] af | vowel/ee_3 [&] af | ||||
| [i] bn | [i] bn | ||||
| [i:] bn | [i:] bn | ||||
| [i:] mr | [i:] mr | ||||
| [i] pa | |||||
| vowel/ii [I] en_n | vowel/ii [I] en_n | ||||
| [I2] en_n | [I2] en_n | ||||
| [I] en_rp | [I] en_rp | ||||
| [I] no | [I] no | ||||
| [I] tr | [I] tr | ||||
| [I] bn | [I] bn | ||||
| [I] pa | |||||
| vowel/ii#_3 [I2] en_us | vowel/ii#_3 [I2] en_us | ||||
| vowel/ii_4 [I] en | vowel/ii_4 [I] en | ||||
| [I2] en | [I2] en | ||||
| [@/] ne | [@/] ne | ||||
| [@] mr | [@] mr | ||||
| [V] mr | [V] mr | ||||
| [@] pa | |||||
| [V] pa | |||||
| vowel/@_low2 [@/] en_us | vowel/@_low2 [@/] en_us | ||||
| [@2] en_us | [@2] en_us | ||||
| vowel/o [o] base2 | vowel/o [o] base2 | ||||
| [o] vi | [o] vi | ||||
| [o] da | [o] da | ||||
| [o:] bn | [o:] bn | ||||
| [o:] pa | |||||
| vowel/o-_2 [V] en_n | vowel/o-_2 [V] en_n | ||||
| [V] en_wm | [V] en_wm | ||||
| vowel/o_3 [oU] en_sc | vowel/o_3 [oU] en_sc | ||||
| [O:] hi | [O:] hi | ||||
| [O] it | [O] it | ||||
| [O] bn | [O] bn | ||||
| [O] pa | |||||
| vowel/oo_5 [O] pl | vowel/oo_5 [O] pl | ||||
| [O] is | [O] is | ||||
| [O] sq | [O] sq | ||||
| [U] mr | [U] mr | ||||
| [u:] mr | [u:] mr | ||||
| [u] mn | [u] mn | ||||
| [u] pa | |||||
| vowel/u_bck2 [u] fr | vowel/u_bck2 [u] fr | ||||
| [u:] fr | [u:] fr | ||||
| [u:] la | [u:] la | ||||
| [u] zhy | [u] zhy | ||||
| [U] bn | [U] bn | ||||
| [U] mn | [U] mn | ||||
| [U] pa | |||||
| vowel/V [3] en_sc | vowel/V [3] en_sc | ||||
| vowel/V_2 [V] en | vowel/V_2 [V] en | ||||
| [a] af | [a] af | ||||
| [V] hi | [V] hi | ||||
| [V] ta | [V] ta | ||||
| [V] bn | [V] bn | ||||
| [V] pa | |||||
| vowel/V_4 [V] en_sc | vowel/V_4 [V] en_sc | ||||
| [V] da | [V] da | ||||
| vowel/V_6 [V] en_us | vowel/V_6 [V] en_us |
| formants vnasal/i_n2 | formants vnasal/i_n2 | ||||
| endphoneme | endphoneme | ||||
| phoneme I~ | |||||
| vowel starttype (i) endtype (i) | |||||
| length 170 | |||||
| formants vnasal/i_n2 | |||||
| endphoneme | |||||
| phoneme e~ | phoneme e~ | ||||
| vowel long starttype (e) endtype (e) | vowel long starttype (e) endtype (e) | ||||
| length 220 | length 220 | ||||
| endphoneme | endphoneme | ||||
| phoneme E~ | phoneme E~ | ||||
| vowel starttype (e) endtype (e) | |||||
| vowel long starttype (e) endtype (e) | |||||
| length 230 | length 230 | ||||
| formants vnasal/ee_n2 | formants vnasal/ee_n2 | ||||
| endphoneme | endphoneme | ||||
| formants vnasal/u_n | formants vnasal/u_n | ||||
| endphoneme | endphoneme | ||||
| phoneme U~ | |||||
| vowel starttype (u) endtype (u) | |||||
| length 170 | |||||
| formants vnasal/u_n | |||||
| endphoneme | |||||
| phoneme r- | phoneme r- | ||||
| vowel starttype (@) endtype (@) | vowel starttype (@) endtype (@) |
| phoneme e: | phoneme e: | ||||
| vowel starttype (e) endtype (e) | vowel starttype (e) endtype (e) | ||||
| length 270 | |||||
| length 250 | |||||
| formants vowel/e | formants vowel/e | ||||
| endphoneme | endphoneme | ||||
| phonemetable mn base | phonemetable mn base | ||||
| include ph_mongolian | include ph_mongolian | ||||
| phonemetable pa hi | |||||
| include ph_punjabi |
| static int transpose_max; | static int transpose_max; | ||||
| static int text_mode = 0; | static int text_mode = 0; | ||||
| static int debug_flag = 0; | static int debug_flag = 0; | ||||
| static int error_need_dictionary = 0; | |||||
| static int hash_counts[N_HASH_DICT]; | static int hash_counts[N_HASH_DICT]; | ||||
| static char *hash_chains[N_HASH_DICT]; | static char *hash_chains[N_HASH_DICT]; | ||||
| static char nullstring[] = {0}; | static char nullstring[] = {0}; | ||||
| WORD_TAB winfo; | WORD_TAB winfo; | ||||
| char decoded_phonemes[128]; | |||||
| comment = NULL; | comment = NULL; | ||||
| text_not_phonemes = 0; | text_not_phonemes = 0; | ||||
| if(text_mode) | if(text_mode) | ||||
| text_not_phonemes = 1; | text_not_phonemes = 1; | ||||
| if(text_not_phonemes != translator->langopts.textmode) | |||||
| { | |||||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||||
| } | |||||
| if(text_not_phonemes) | if(text_not_phonemes) | ||||
| { | { | ||||
| if(word[0] == '_') | if(word[0] == '_') | ||||
| { | { | ||||
| // This is a special word, used by eSpeak. Translate this into phonemes now | // This is a special word, used by eSpeak. Translate this into phonemes now | ||||
| // memset(&winfo,0,sizeof(winfo)); | |||||
| // TranslateWord(translator,phonetic,0,&winfo); // but *_dict is not loaded ? | |||||
| // DecodePhonemes(word_phonemes,decoded_phonemes); | |||||
| memset(&winfo,0,sizeof(winfo)); | |||||
| strcat(phonetic, " "); // need a space to indicate word-boundary | |||||
| // PROBLEM vowel reductions are not applied to the translated phonemes | |||||
| // condition rules are not applied | |||||
| TranslateWord(translator,phonetic,0,&winfo); | |||||
| text_not_phonemes = 0; | |||||
| strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4); | |||||
| if((word_phonemes[0] == 0) && (error_need_dictionary < 3)) | |||||
| { | |||||
| // the dictionary was not loaded, we need a second attempt | |||||
| error_need_dictionary++; | |||||
| fprintf(f_log,"%5d: Need to compile dictionary again\n",linenum); | |||||
| } | |||||
| { | |||||
| //char decoded_phonemes[128]; | |||||
| //DecodePhonemes(word_phonemes,decoded_phonemes); | |||||
| //printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes); | //printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes); | ||||
| } | |||||
| } | |||||
| else | |||||
| { | |||||
| // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||||
| strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||||
| } | } | ||||
| // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||||
| strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| } | } | ||||
| } | } | ||||
| if(text_not_phonemes != translator->langopts.textmode) | |||||
| { | |||||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||||
| } | |||||
| if(sscanf(word,"U+%x",&wc) == 1) | if(sscanf(word,"U+%x",&wc) == 1) | ||||
| { | { | ||||
| // Character code | // Character code | ||||
| char path[sizeof(path_home)+40]; // path_dsource+20 | char path[sizeof(path_home)+40]; // path_dsource+20 | ||||
| error_count = 0; | error_count = 0; | ||||
| error_need_dictionary = 0; | |||||
| memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | ||||
| debug_flag = flags & 1; | debug_flag = flags & 1; |
| strcpy(dictionary_name,name); // currently loaded dictionary name | strcpy(dictionary_name,name); // currently loaded dictionary name | ||||
| if(no_error) // don't load dictionary, just set the dictionary_name | |||||
| return(1); | |||||
| // Load a pronunciation data file into memory | // Load a pronunciation data file into memory | ||||
| // bytes 0-3: offset to rules data | // bytes 0-3: offset to rules data | ||||
| // bytes 4-7: number of hash table entries | // bytes 4-7: number of hash table entries |
| #ifdef deleted | #ifdef deleted | ||||
| static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2007\n\n" | |||||
| static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2009\n\n" | |||||
| "Licensed under GNU General Public License version 3\n" | "Licensed under GNU General Public License version 3\n" | ||||
| "http://espeak.sourceforge.net/"; | "http://espeak.sourceforge.net/"; | ||||
| #endif | #endif | ||||
| static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2007<br>" | |||||
| static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2009<br>" | |||||
| "<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>" | "<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>" | ||||
| "Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>"; | "Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>"; | ||||
| sprintf(fname_log,"%s%s",path_dsource,"dict_log"); | sprintf(fname_log,"%s%s",path_dsource,"dict_log"); | ||||
| log = fopen(fname_log,"w"); | log = fopen(fname_log,"w"); | ||||
| LoadDictionary(translator, dictionary_name, 0); | |||||
| if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0) | if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0) | ||||
| { | { | ||||
| wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal)); | wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal)); |
| ph_stress[0] = phonSTRESS_P; | ph_stress[0] = phonSTRESS_P; | ||||
| ph_stress[1] = 0; | ph_stress[1] = 0; | ||||
| for(p=(unsigned char *)ph_buf3; *p != 0; p++) | |||||
| for(p=(unsigned char *)ph_buf3; (*p != 0) && (phoneme_tab[*p] != NULL); p++) | |||||
| { | { | ||||
| if(phoneme_tab[*p]->type == phSTRESS) | if(phoneme_tab[*p]->type == phSTRESS) | ||||
| ph_stress[0] = 0; // stress is already marked | ph_stress[0] = 0; // stress is already marked | ||||
| static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out) | static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out) | ||||
| {//======================================================================================================= | {//======================================================================================================= | ||||
| // thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr | |||||
| int found; | int found; | ||||
| int found_value=0; | int found_value=0; | ||||
| char string[12]; | char string[12]; | ||||
| ph_of[0] = 0; | ph_of[0] = 0; | ||||
| // first look for a match with the exact value of thousands | // first look for a match with the exact value of thousands | ||||
| if(thousands_exact) | |||||
| if(thousands_exact & 1) | |||||
| { | { | ||||
| // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||||
| sprintf(string,"_%dM%dx",value,thousandplex); | |||||
| found_value = Lookup(tr, string, ph_thousands); | |||||
| if(thousands_exact & 2) | |||||
| { | |||||
| // ordinal number | |||||
| sprintf(string,"_%dM%do",value,thousandplex); | |||||
| found_value = Lookup(tr, string, ph_thousands); | |||||
| } | |||||
| if(!found_value) | |||||
| { | |||||
| // is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||||
| sprintf(string,"_%dM%dx",value,thousandplex); | |||||
| found_value = Lookup(tr, string, ph_thousands); | |||||
| } | |||||
| } | } | ||||
| if(found_value == 0) | if(found_value == 0) | ||||
| { | { | ||||
| } | } | ||||
| found = 0; | found = 0; | ||||
| if(thousands_exact) | |||||
| if(thousands_exact & 1) | |||||
| { | { | ||||
| // is there a different pronunciation if there are no hundreds,tens,or units ? | |||||
| sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||||
| found = Lookup(tr, string, ph_thousands); | |||||
| if(thousands_exact & 2) | |||||
| { | |||||
| // ordinal number | |||||
| sprintf(string,"_%s%do",M_Variant(value), thousandplex); | |||||
| found = Lookup(tr, string, ph_thousands); | |||||
| } | |||||
| if(!found) | |||||
| { | |||||
| // is there a different pronunciation if there are no hundreds,tens,or units ? | |||||
| sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||||
| found = Lookup(tr, string, ph_thousands); | |||||
| } | |||||
| } | } | ||||
| if(found == 0) | if(found == 0) | ||||
| { | { | ||||
| { | { | ||||
| units = (value % 10); | units = (value % 10); | ||||
| if((control & 1) && ((units == 0) || (tr->langopts.numbers & 0x10))) | |||||
| if((control & 1) && ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||||
| { | { | ||||
| sprintf(string,"_%dXo",value / 10); | sprintf(string,"_%dXo",value / 10); | ||||
| if(Lookup(tr, string, ph_tens) != 0) | if(Lookup(tr, string, ph_tens) != 0) | ||||
| sprintf(string,"_%df",units); | sprintf(string,"_%df",units); | ||||
| found = Lookup(tr, string, ph_digits); | found = Lookup(tr, string, ph_digits); | ||||
| } | } | ||||
| if((control & 1) && ((tr->langopts.numbers & 0x10) == 0)) | |||||
| if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0)) | |||||
| { | { | ||||
| // ordinal | // ordinal | ||||
| sprintf(string,"_%do",units); | sprintf(string,"_%do",units); | ||||
| if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) | if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) | ||||
| { | { | ||||
| if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & 0x10))) | |||||
| if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||||
| Lookup(tr, "_ord20", ph_ordinal); | Lookup(tr, "_ord20", ph_ordinal); | ||||
| if(ph_ordinal[0] == 0) | if(ph_ordinal[0] == 0) | ||||
| Lookup(tr, "_ord", ph_ordinal); | Lookup(tr, "_ord", ph_ordinal); | ||||
| } | } | ||||
| if((tr->langopts.numbers & 0x30) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||||
| if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||||
| { | { | ||||
| Lookup(tr, "_0and", ph_and); | Lookup(tr, "_0and", ph_and); | ||||
| if(tr->langopts.numbers & 0x10) | |||||
| if(tr->langopts.numbers & NUM_SWAP_TENS) | |||||
| sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal); | sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal); | ||||
| else | else | ||||
| sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal); | sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal); | ||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| if(tr->langopts.numbers & 0x200) | |||||
| if(tr->langopts.numbers & NUM_SINGLE_VOWEL) | |||||
| { | { | ||||
| // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | ||||
| if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) | if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) | ||||
| sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal); | sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal); | ||||
| } | } | ||||
| if(tr->langopts.numbers & 0x100) | |||||
| if(tr->langopts.numbers & NUM_SINGLE_STRESS) | |||||
| { | { | ||||
| // only one primary stress | // only one primary stress | ||||
| found = 0; | found = 0; | ||||
| int tensunits; | int tensunits; | ||||
| int x; | int x; | ||||
| int exact; | int exact; | ||||
| int ordinal; | |||||
| char string[12]; // for looking up entries in **_list | char string[12]; // for looking up entries in **_list | ||||
| char buf1[100]; | char buf1[100]; | ||||
| char buf2[100]; | char buf2[100]; | ||||
| char ph_hundred_and[12]; | char ph_hundred_and[12]; | ||||
| char ph_thousand_and[12]; | char ph_thousand_and[12]; | ||||
| ordinal = control & 2; | |||||
| hundreds = value / 100; | hundreds = value / 100; | ||||
| tensunits = value % 100; | tensunits = value % 100; | ||||
| buf1[0] = 0; | buf1[0] = 0; | ||||
| ph_thousand_and[0] = 0; | ph_thousand_and[0] = 0; | ||||
| found = 0; | found = 0; | ||||
| if((control & 2) && (tensunits == 0)) | |||||
| if(ordinal && (tensunits == 0)) | |||||
| { | { | ||||
| // ordinal number, with no tens or units | // ordinal number, with no tens or units | ||||
| found = Lookup(tr, "_0Co", ph_100); | found = Lookup(tr, "_0Co", ph_100); | ||||
| Lookup(tr, "_0C", ph_100); | Lookup(tr, "_0C", ph_100); | ||||
| } | } | ||||
| if(((tr->langopts.numbers & 0x0800) != 0) && (hundreds == 19)) | |||||
| if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19)) | |||||
| { | { | ||||
| // speak numbers such as 1984 as years: nineteen-eighty-four | // speak numbers such as 1984 as years: nineteen-eighty-four | ||||
| // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and" | // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and" | ||||
| if ((value % 1000) == 0) | if ((value % 1000) == 0) | ||||
| exact = 1; | exact = 1; | ||||
| if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact, ph_10T) == 0) | |||||
| if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0) | |||||
| { | { | ||||
| x = 0; | x = 0; | ||||
| if(tr->langopts.numbers2 & (1 << (thousandplex+1))) | if(tr->langopts.numbers2 & (1 << (thousandplex+1))) | ||||
| ph_digits[0] = 0; | ph_digits[0] = 0; | ||||
| if(hundreds > 0) | if(hundreds > 0) | ||||
| { | { | ||||
| if((tr->langopts.numbers & 0x100000) && ((control & 1) || (ph_thousands[0] != 0))) | |||||
| if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0))) | |||||
| { | { | ||||
| Lookup(tr, "_0and", ph_thousand_and); | Lookup(tr, "_0and", ph_thousand_and); | ||||
| } | } | ||||
| if(tensunits == 0) | if(tensunits == 0) | ||||
| { | { | ||||
| // is there a special pronunciation for exactly n00 ? | // is there a special pronunciation for exactly n00 ? | ||||
| sprintf(string,"_%dC0",hundreds); | |||||
| found = Lookup(tr, string, ph_digits); | |||||
| if(ordinal) | |||||
| { | |||||
| // ordinal number | |||||
| sprintf(string, "_%dCo", hundreds); | |||||
| found = Lookup(tr, string, ph_digits); | |||||
| } | |||||
| if(!found) | |||||
| { | |||||
| sprintf(string,"_%dC0",hundreds); | |||||
| found = Lookup(tr, string, ph_digits); | |||||
| } | |||||
| } | } | ||||
| if(!found) | if(!found) | ||||
| { | { | ||||
| } | } | ||||
| ph_hundred_and[0] = 0; | ph_hundred_and[0] = 0; | ||||
| if((tr->langopts.numbers & 0x40) && (tensunits != 0)) | |||||
| if((tr->langopts.numbers & NUM_HUNDRED_AND) && (tensunits != 0)) | |||||
| { | { | ||||
| if((value > 100) || ((control & 1) && (thousandplex==0))) | if((value > 100) || ((control & 1) && (thousandplex==0))) | ||||
| { | { | ||||
| if(thousandplex==0) | if(thousandplex==0) | ||||
| { | { | ||||
| x = 2; // allow "eins" for 1 rather than "ein" | x = 2; // allow "eins" for 1 rather than "ein" | ||||
| if(control & 2) | |||||
| if(ordinal) | |||||
| x = 3; // ordinal number | x = 3; // ordinal number | ||||
| if((value < 100) && !(control & 1)) | if((value < 100) && !(control & 1)) | ||||
| x |= 4; // tens and units only, no higher digits | x |= 4; // tens and units only, no higher digits | ||||
| if(LookupNum2(tr, tensunits, x, buf2) != 0) | if(LookupNum2(tr, tensunits, x, buf2) != 0) | ||||
| { | { | ||||
| if(tr->langopts.numbers & 0x80) | |||||
| if(tr->langopts.numbers & NUM_SINGLE_AND) | |||||
| ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units | ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units | ||||
| } | } | ||||
| } | } | ||||
| value = this_value = atoi(word); | value = this_value = atoi(word); | ||||
| ph_ordinal2[0] = 0; | ph_ordinal2[0] = 0; | ||||
| if((tr->langopts.numbers & 0x10000) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||||
| if((tr->langopts.numbers & NUM_ORDINAL_DOT) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||||
| { | { | ||||
| // ordinal number is indicated by dot after the number | // ordinal number is indicated by dot after the number | ||||
| ordinal = 2; | ordinal = 2; | ||||
| prev_thousands = 1; | prev_thousands = 1; | ||||
| } | } | ||||
| else | else | ||||
| if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & 0x1000)) | |||||
| if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE)) | |||||
| { | { | ||||
| // thousands groups can be separated by spaces | // thousands groups can be separated by spaces | ||||
| if((n_digits == 3) && isdigit(word[-2])) | if((n_digits == 3) && isdigit(word[-2])) | ||||
| } | } | ||||
| } | } | ||||
| if((tr->langopts.numbers & 0x1000) && (word[n_digits] == ' ')) | |||||
| if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' ')) | |||||
| thousands_inc = 1; | thousands_inc = 1; | ||||
| else | else | ||||
| if(word[n_digits] == tr->langopts.thousands_sep) | if(word[n_digits] == tr->langopts.thousands_sep) | ||||
| max_decimal_count = 2; | max_decimal_count = 2; | ||||
| switch(decimal_mode = (tr->langopts.numbers & 0xe000)) | switch(decimal_mode = (tr->langopts.numbers & 0xe000)) | ||||
| { | { | ||||
| case 0x8000: | |||||
| case NUM_DFRACTION_4: | |||||
| max_decimal_count = 5; | max_decimal_count = 5; | ||||
| case 0x4000: | |||||
| case NUM_DFRACTION_2: | |||||
| // French/Polish decimal fraction | // French/Polish decimal fraction | ||||
| while(word[n_digits] == '0') | while(word[n_digits] == '0') | ||||
| { | { | ||||
| } | } | ||||
| break; | break; | ||||
| case 0x2000: // italian, say "hundredths" is leading zero | |||||
| case 0xa000: // hungarian, always say "tenths" etc. | |||||
| case NUM_DFRACTION_1: // italian, say "hundredths" is leading zero | |||||
| case NUM_DFRACTION_5: // hungarian, always say "tenths" etc. | |||||
| if(decimal_count <= 4) | if(decimal_count <= 4) | ||||
| { | { | ||||
| LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | ||||
| } | } | ||||
| break; | break; | ||||
| case 0x6000: | |||||
| case NUM_DFRACTION_3: | |||||
| // Romanian decimal fractions | // Romanian decimal fractions | ||||
| if((decimal_count <= 4) && (word[n_digits] != '0')) | if((decimal_count <= 4) && (word[n_digits] != '0')) | ||||
| { | { | ||||
| if(option_sayas == SAYAS_DIGITS1) | if(option_sayas == SAYAS_DIGITS1) | ||||
| return(0); // speak digits individually | return(0); // speak digits individually | ||||
| if((tr->langopts.numbers & 0x3) == 1) | |||||
| if(tr->langopts.numbers != 0) | |||||
| return(TranslateNumber_1(tr, word1, ph_out, flags, wflags)); | return(TranslateNumber_1(tr, word1, ph_out, flags, wflags)); | ||||
| return(0); | return(0); |
| static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum | static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum | ||||
| // alter tone for announce punctuation or capitals | // alter tone for announce punctuation or capitals | ||||
| static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||||
| static const char *tone_punct_off = "\001T"; | |||||
| //static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||||
| //static const char *tone_punct_off = "\001T\001P"; | |||||
| static const char *tone_punct_on = ""; // add reverberation, lower pitch TEST apply no effect | |||||
| static const char *tone_punct_off = ""; | |||||
| // ignore these characters | // ignore these characters | ||||
| static const unsigned short chars_ignore[] = { | static const unsigned short chars_ignore[] = { | ||||
| int c1=' '; // current character | int c1=' '; // current character | ||||
| int c2; // next character | int c2; // next character | ||||
| int cprev=' '; // previous character | int cprev=' '; // previous character | ||||
| int cprev2=' '; | |||||
| int parag; | int parag; | ||||
| int ix = 0; | int ix = 0; | ||||
| int j; | int j; | ||||
| int any_alnum = 0; | int any_alnum = 0; | ||||
| int self_closing; | int self_closing; | ||||
| int punct_data; | int punct_data; | ||||
| int is_punctuation; | |||||
| int save_c2; | |||||
| int stressed_word = 0; | int stressed_word = 0; | ||||
| const char *p; | const char *p; | ||||
| wchar_t xml_buf[N_XML_BUF+1]; | wchar_t xml_buf[N_XML_BUF+1]; | ||||
| } | } | ||||
| } | } | ||||
| cprev2 = cprev; | |||||
| cprev = c1; | cprev = c1; | ||||
| c1 = c2; | c1 = c2; | ||||
| if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | ||||
| { | { | ||||
| // note: (c2='?') is for when a smart-quote has been replaced by '?' | // note: (c2='?') is for when a smart-quote has been replaced by '?' | ||||
| buf[ix] = ' '; | |||||
| buf[ix+1] = 0; | |||||
| is_punctuation = 1; | |||||
| if((c1 == '.') && (cprev == '.')) | if((c1 == '.') && (cprev == '.')) | ||||
| { | { | ||||
| c1 = 0x2026; | c1 = 0x2026; | ||||
| } | } | ||||
| nl_count = 0; | nl_count = 0; | ||||
| while(!Eof() && iswspace(c2)) | |||||
| { | |||||
| if(c2 == '\n') | |||||
| nl_count++; | |||||
| c2 = GetC(); // skip past space(s) | |||||
| } | |||||
| if(!Eof()) | |||||
| save_c2 = c2; | |||||
| if(iswspace(c2)) | |||||
| { | { | ||||
| UngetC(c2); | |||||
| while(!Eof() && iswspace(c2)) | |||||
| { | |||||
| if(c2 == '\n') | |||||
| nl_count++; | |||||
| c2 = GetC(); // skip past space(s) | |||||
| } | |||||
| if(!Eof()) | |||||
| { | |||||
| UngetC(c2); | |||||
| } | |||||
| } | } | ||||
| if((nl_count==0) && (c1 == '.')) | if((nl_count==0) && (c1 == '.')) | ||||
| { | { | ||||
| if(iswdigit(cprev) && (tr->langopts.numbers & 0x10000) && islower(c2)) | |||||
| // if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT) && islower(c2)) | |||||
| if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT)) | |||||
| { | { | ||||
| // dot after a number indicates an ordinal number | // dot after a number indicates an ordinal number | ||||
| c2 = '.'; | |||||
| continue; | |||||
| is_punctuation = 0; | |||||
| } | } | ||||
| else | |||||
| if(iswlower(c2)) | if(iswlower(c2)) | ||||
| { | { | ||||
| c2 = ' '; | |||||
| continue; // next word has no capital letter, this dot is probably from an abbreviation | |||||
| // next word has no capital letter, this dot is probably from an abbreviation | |||||
| c1 = ' '; | |||||
| is_punctuation = 0; | |||||
| } | } | ||||
| if(any_alnum==0) | if(any_alnum==0) | ||||
| { | { | ||||
| c2 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||||
| continue; | |||||
| c1 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||||
| is_punctuation = 0; | |||||
| } | } | ||||
| } | } | ||||
| punct_data = punct_attributes[punct]; | |||||
| if(nl_count > 1) | |||||
| c2 = save_c2; | |||||
| if(is_punctuation) | |||||
| { | { | ||||
| if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||||
| return(punct_data + 35); // with a longer pause | |||||
| return(CLAUSE_PARAGRAPH); | |||||
| buf[ix] = ' '; | |||||
| buf[ix+1] = 0; | |||||
| punct_data = punct_attributes[punct]; | |||||
| if(nl_count > 1) | |||||
| { | |||||
| if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||||
| return(punct_data + 35); // with a longer pause | |||||
| return(CLAUSE_PARAGRAPH); | |||||
| } | |||||
| return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||||
| } | } | ||||
| return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||||
| } | } | ||||
| } | } | ||||
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.41.08 04.Oct.09"; | |||||
| const char *version_string = "1.41.11 09.Oct.09"; | |||||
| const int version_phdata = 0x014100; | const int version_phdata = 0x014100; | ||||
| int option_device_number = -1; | int option_device_number = -1; |
| #define OFFSET_ARMENIAN 0x530 | #define OFFSET_ARMENIAN 0x530 | ||||
| #define OFFSET_DEVANAGARI 0x900 | #define OFFSET_DEVANAGARI 0x900 | ||||
| #define OFFSET_BENGALI 0x980 | #define OFFSET_BENGALI 0x980 | ||||
| #define OFFSET_GURMUKHI 0xa00 | |||||
| #define OFFSET_TAMIL 0xb80 | #define OFFSET_TAMIL 0xb80 | ||||
| #define OFFSET_KANNADA 0xc80 | #define OFFSET_KANNADA 0xc80 | ||||
| #define OFFSET_MALAYALAM 0xd00 | #define OFFSET_MALAYALAM 0xd00 | ||||
| tr->langopts.max_roman = 49; | tr->langopts.max_roman = 49; | ||||
| tr->langopts.thousands_sep = ','; | tr->langopts.thousands_sep = ','; | ||||
| tr->langopts.decimal_sep = '.'; | tr->langopts.decimal_sep = '.'; | ||||
| tr->langopts.break_numbers = BREAK_THOUSANDS; // 1000, 1000,000 1,000,000 etc | |||||
| memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone)); | memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone)); | ||||
| tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
| SetLetterVowel(tr,'y'); // add 'y' to vowels | SetLetterVowel(tr,'y'); // add 'y' to vowels | ||||
| tr->langopts.numbers = 0x8d1 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_ROMAN | NUM_1900; | |||||
| tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
| } | } | ||||
| break; | break; | ||||
| SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama | SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama | ||||
| tr->langopts.numbers = 0x1; | tr->langopts.numbers = 0x1; | ||||
| tr->langopts.numbers2 = NUM2_100000; | |||||
| tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | ||||
| tr->langopts.numbers = 0x401; | |||||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||||
| SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants | SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x10c59; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_ORDINAL_DOT | NUM_1900; | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
| memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | ||||
| tr->langopts.numbers = 0x11419 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| } | } | ||||
| break; | break; | ||||
| SetupTranslator(tr,stress_lengths_en,NULL); | SetupTranslator(tr,stress_lengths_en,NULL); | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| tr->langopts.numbers = 0x841 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_ROMAN | NUM_1900; | |||||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | ||||
| tr->langopts.numbers = 0x109; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA; | |||||
| tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | ||||
| if(name2 == L_grc) | if(name2 == L_grc) | ||||
| tr->langopts.unstressed_wd1 = 3; | tr->langopts.unstressed_wd1 = 3; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.numbers = 0x1409 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ROMAN; | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | ||||
| tr->langopts.numbers = 0x529 + NUM_ROMAN + NUM_ROMAN_AFTER; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_ROMAN | NUM_ROMAN_AFTER; | |||||
| if(name2 == L('c','a')) | if(name2 == L('c','a')) | ||||
| { | { | ||||
| static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 }; | static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 }; | ||||
| SetupTranslator(tr,stress_lengths_eu,stress_amps_eu); | SetupTranslator(tr,stress_lengths_eu,stress_amps_eu); | ||||
| tr->langopts.stress_rule = 1; // ?? second syllable ?? | tr->langopts.stress_rule = 1; // ?? second syllable ?? | ||||
| tr->langopts.numbers = 0x569 + NUM_VIGESIMAL; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_VIGESIMAL; | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.param[LOPT_IT_DOUBLING] = 1; | tr->langopts.param[LOPT_IT_DOUBLING] = 1; | ||||
| tr->langopts.long_stop = 130; | tr->langopts.long_stop = 130; | ||||
| tr->langopts.numbers = 0x1009; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA + NUM_ALLOW_SPACE; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| // tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3 | // tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3 | ||||
| tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
| tr->langopts.stress_flags = 0x0024; // don't use secondary stress | tr->langopts.stress_flags = 0x0024; // don't use secondary stress | ||||
| tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables | tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables | ||||
| tr->langopts.numbers = 0x1509 + 0x8000 + NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL | NUM_DFRACTION_4; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| } | } | ||||
| break; | break; | ||||
| case L('h','i'): // Hindi | case L('h','i'): // Hindi | ||||
| case L('n','e'): // Nepali | case L('n','e'): // Nepali | ||||
| case L('p','a'): // Punjabi | |||||
| { | { | ||||
| static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | ||||
| static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 }; | static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 }; | ||||
| tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | ||||
| tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | ||||
| tr->langopts.numbers = 0x011; | |||||
| tr->langopts.numbers2 = NUM2_100000; | |||||
| tr->langopts.numbers = NUM_SWAP_TENS; | |||||
| tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
| tr->letter_bits_offset = OFFSET_DEVANAGARI; | tr->letter_bits_offset = OFFSET_DEVANAGARI; | ||||
| if(name2 == L('p','a')) | |||||
| { | |||||
| tr->langopts.numbers = 0; // no number rules yet | |||||
| tr->letter_bits_offset = OFFSET_GURMUKHI; | |||||
| } | |||||
| SetIndicLetters(tr); | SetIndicLetters(tr); | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
| tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
| tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_HUNDRED_AND | NUM_DECIMAL_COMMA | NUM_THOUS_SPACE | NUM_DFRACTION_2 | NUM_ROMAN_UC; | |||||
| tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | ||||
| tr->langopts.replace_chars = replace_cyrillic_latin; | tr->langopts.replace_chars = replace_cyrillic_latin; | ||||
| tr->langopts.param[LOPT_IT_DOUBLING] = 1; | tr->langopts.param[LOPT_IT_DOUBLING] = 1; | ||||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | ||||
| tr->langopts.numbers = 0x1009 + 0xa000 + NUM_ROMAN + NUM_ROMAN_ORDINAL + NUM_ORDINAL_DOT + NUM_OMIT_1_HUNDRED; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_5 | NUM_ROMAN | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT | NUM_OMIT_1_HUNDRED; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
| SetLengthMods(tr,3); // all equal | SetLengthMods(tr,3); // all equal | ||||
| SetLetterBits(tr,LETTERGP_A,hy_vowels); | SetLetterBits(tr,LETTERGP_A,hy_vowels); | ||||
| SetLetterBits(tr,LETTERGP_C,hy_consonants); | SetLetterBits(tr,LETTERGP_C,hy_consonants); | ||||
| tr->langopts.max_initial_consonants = 6; | tr->langopts.max_initial_consonants = 6; | ||||
| tr->langopts.numbers = 0x409; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||||
| // tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | // tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | ||||
| } | } | ||||
| break; | break; | ||||
| SetupTranslator(tr,stress_lengths_id,stress_amps_id); | SetupTranslator(tr,stress_lengths_id,stress_amps_id); | ||||
| tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
| tr->langopts.numbers = 0x1009 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_ROMAN; | |||||
| tr->langopts.stress_flags = 0x6 | 0x10; | tr->langopts.stress_flags = 0x6 | 0x10; | ||||
| tr->langopts.accents = 2; // "capital" after letter name | tr->langopts.accents = 2; // "capital" after letter name | ||||
| } | } | ||||
| SetLetterBits(tr,3,"jvr"); // Letter group H | SetLetterBits(tr,3,"jvr"); // Letter group H | ||||
| tr->letter_groups[1] = is_lettergroup_B; | tr->letter_groups[1] = is_lettergroup_B; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x8e9; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SINGLE_AND | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_1900; | |||||
| tr->langopts.numbers2 = 0x2; | tr->langopts.numbers2 = 0x2; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | ||||
| tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | ||||
| tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2 | tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2 | ||||
| tr->langopts.numbers = 0x2709 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_SINGLE_VOWEL | NUM_OMIT_1_HUNDRED |NUM_DECIMAL_COMMA | NUM_ROMAN | NUM_DFRACTION_1; | |||||
| tr->langopts.accents = 2; // Say "Capital" after the letter. | tr->langopts.accents = 2; // Say "Capital" after the letter. | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| } | } | ||||
| tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable | tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable | ||||
| tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | ||||
| tr->langopts.numbers = 0x0401; | |||||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | ||||
| tr->langopts.numbers = 0x100461; | |||||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED; | |||||
| tr->langopts.max_initial_consonants = 2; | tr->langopts.max_initial_consonants = 2; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.unstressed_wd1 = 0; | tr->langopts.unstressed_wd1 = 0; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.param[LOPT_DIERESES] = 1; | tr->langopts.param[LOPT_DIERESES] = 1; | ||||
| tr->langopts.numbers = 0x1 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_ROMAN; | |||||
| tr->langopts.max_roman = 5000; | tr->langopts.max_roman = 5000; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
| tr->charset_a0 = charsets[4]; // ISO-8859-4 | tr->charset_a0 = charsets[4]; // ISO-8859-4 | ||||
| tr->langopts.numbers = 0x409 + 0x8000 + 0x10000; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT; | |||||
| tr->langopts.stress_flags = 0x16 + 0x40000; | tr->langopts.stress_flags = 0x16 + 0x40000; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->letter_groups[0] = vowels_cyrillic; | tr->letter_groups[0] = vowels_cyrillic; | ||||
| tr->langopts.stress_rule = 4; // antipenultimate | tr->langopts.stress_rule = 4; // antipenultimate | ||||
| tr->langopts.numbers = 0x0429 + 0x4000; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||||
| tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x11c19; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT; | |||||
| memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | ||||
| } | } | ||||
| break; | break; | ||||
| SetupTranslator(tr,stress_lengths_no,NULL); | SetupTranslator(tr,stress_lengths_no,NULL); | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x11849; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_ALLOW_SPACE | NUM_1900 + NUM_ORDINAL_DOT; | |||||
| } | } | ||||
| break; | break; | ||||
| SetupTranslator(tr,stress_lengths_om,stress_amps_om); | SetupTranslator(tr,stress_lengths_om,stress_amps_om); | ||||
| tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
| tr->langopts.stress_flags = 0x16 + 0x80000; | |||||
| tr->langopts.stress_flags = 2 + NUM_SWAP_TENS | NUM_THOUS_SPACE | NUM_NOPAUSE; //?? | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | ||||
| tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | ||||
| tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | ||||
| tr->langopts.numbers=0x1009 + 0x4000; | |||||
| tr->langopts.numbers2=0x40; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_2; | |||||
| tr->langopts.numbers2 = 0x40; | |||||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt) | tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt) | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| } | } | ||||
| tr->langopts.stress_rule = 3; // stress on final syllable | tr->langopts.stress_rule = 3; // stress on final syllable | ||||
| tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | ||||
| tr->langopts.numbers = 0x069 + 0x4000 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_DFRACTION_2 | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_ROMAN; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| ResetLetterBits(tr,0x2); | ResetLetterBits(tr,0x2); | ||||
| SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y | SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y | ||||
| tr->langopts.stress_flags = 0x100 + 0x6; | tr->langopts.stress_flags = 0x100 + 0x6; | ||||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
| tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN; | |||||
| tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_flags = 0x16; | tr->langopts.stress_flags = 0x16; | ||||
| tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | ||||
| tr->langopts.numbers = 0x61 + 0x100000 + 0x4000; | |||||
| tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_2 | NUM_AND_HUNDRED; | |||||
| tr->langopts.numbers2 = 0x200; // say "thousands" before its number | tr->langopts.numbers2 = 0x200; // say "thousands" before its number | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
| tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | ||||
| tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN; | |||||
| tr->langopts.numbers = NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ROMAN; | |||||
| tr->langopts.numbers2 = 0x100; | tr->langopts.numbers2 = 0x100; | ||||
| tr->langopts.thousands_sep = 0; //no thousands separator | tr->langopts.thousands_sep = 0; //no thousands separator | ||||
| tr->langopts.decimal_sep = ','; | tr->langopts.decimal_sep = ','; | ||||
| tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
| tr->langopts.stress_flags = 0x16 + 0x100; | tr->langopts.stress_flags = 0x16 + 0x100; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x69 + 0x8000; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_4; | |||||
| tr->langopts.accents = 2; // "capital" after letter name | tr->langopts.accents = 2; // "capital" after letter name | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0x1909; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_1900; | |||||
| tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
| tr->langopts.stress_flags = 0x6 | 0x10; | tr->langopts.stress_flags = 0x6 | 0x10; | ||||
| tr->langopts.numbers = 0x4e1; | |||||
| tr->langopts.numbers2 = NUM2_100000a; | |||||
| tr->langopts.numbers = NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_OMIT_1_HUNDRED; | |||||
| tr->langopts.break_numbers = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||||
| } | } | ||||
| break; | break; | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | ||||
| tr->langopts.numbers2 = NUM2_100000; | |||||
| tr->langopts.break_numbers = 0x24a8; // 1000, 100,000 10,000,000 | |||||
| if(name2 == L('t','a')) | if(name2 == L('t','a')) | ||||
| { | { | ||||
| tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | ||||
| tr->langopts.stress_flags = 0x20; //no automatic secondary stress | tr->langopts.stress_flags = 0x20; //no automatic secondary stress | ||||
| tr->langopts.numbers = 0x1509 + 0x4000; | |||||
| tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||||
| tr->langopts.max_initial_consonants = 2; | tr->langopts.max_initial_consonants = 2; | ||||
| } | } | ||||
| break; | break; | ||||
| tr->letter_groups[0] = vowels_vi; | tr->letter_groups[0] = vowels_vi; | ||||
| tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches() | tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches() | ||||
| tr->langopts.unstressed_wd1 = 2; | tr->langopts.unstressed_wd1 = 2; | ||||
| tr->langopts.numbers = 0x0049 + 0x8000; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_DFRACTION_4; | |||||
| } | } | ||||
| break; | break; | ||||
| tr->translator_name = name2; | tr->translator_name = name2; | ||||
| if(tr->langopts.numbers & 0x8) | |||||
| if(tr->langopts.numbers & NUM_DECIMAL_COMMA) | |||||
| { | { | ||||
| // use . and ; for thousands and decimal separators | // use . and ; for thousands and decimal separators | ||||
| tr->langopts.thousands_sep = '.'; | tr->langopts.thousands_sep = '.'; | ||||
| tr->langopts.decimal_sep = ','; | tr->langopts.decimal_sep = ','; | ||||
| } | } | ||||
| if(tr->langopts.numbers & 0x4) | |||||
| if(tr->langopts.numbers & NUM_THOUS_SPACE) | |||||
| { | { | ||||
| tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space | tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space | ||||
| } | } | ||||
| tr->langopts.stress_rule = 5; | tr->langopts.stress_rule = 5; | ||||
| tr->langopts.stress_flags = 0x0020; // waas 0x1010 | tr->langopts.stress_flags = 0x0020; // waas 0x1010 | ||||
| tr->langopts.numbers = 0x0409; | |||||
| tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||||
| tr->langopts.numbers2 = 0xc2; // variant numbers before thousands | tr->langopts.numbers2 = 0xc2; // variant numbers before thousands | ||||
| tr->langopts.phoneme_change = 1; | tr->langopts.phoneme_change = 1; | ||||
| tr->langopts.testing = 2; | tr->langopts.testing = 2; |
| {//======================== | {//======================== | ||||
| // Replacement for iswalph() which also checks for some in-word symbols | // Replacement for iswalph() which also checks for some in-word symbols | ||||
| const unsigned short extra_indic_alphas[] = { | |||||
| 0xa70,0xa71, // Gurmukhi: tippi, addak | |||||
| 0 }; | |||||
| if(iswalpha(c)) | if(iswalpha(c)) | ||||
| return(1); | return(1); | ||||
| // Indic scripts: Devanagari, Tamil, etc | // Indic scripts: Devanagari, Tamil, etc | ||||
| if((c & 0x7f) < 0x64) | if((c & 0x7f) < 0x64) | ||||
| return(1); | return(1); | ||||
| if(lookupwchar(extra_indic_alphas, c) != 0) | |||||
| return(1); | |||||
| return(0); | return(0); | ||||
| } | } | ||||
| int c_temp; | int c_temp; | ||||
| char *pn; | char *pn; | ||||
| char *pw; | char *pw; | ||||
| static unsigned int break_numbers1 = 0x49249248; | |||||
| static unsigned int break_numbers2 = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
| static unsigned int break_numbers3 = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||||
| unsigned int break_numbers; | |||||
| char number_buf[80]; | char number_buf[80]; | ||||
| // start speaking at a specified word position in the text? | // start speaking at a specified word position in the text? | ||||
| // digits should have been converted to Latin alphabet ('0' to '9') | // digits should have been converted to Latin alphabet ('0' to '9') | ||||
| word = pw = &sbuf[words[ix].start]; | word = pw = &sbuf[words[ix].start]; | ||||
| if(iswdigit(word[0]) && (tr->langopts.numbers2 & NUM2_100000)) | |||||
| if(iswdigit(word[0]) && (tr->langopts.break_numbers != BREAK_THOUSANDS)) | |||||
| { | { | ||||
| // Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | // Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | ||||
| pn = number_buf; | pn = number_buf; | ||||
| pn = &number_buf[1]; | pn = &number_buf[1]; | ||||
| nx = n_digits; | nx = n_digits; | ||||
| if((tr->langopts.numbers2 & NUM2_100000a) == NUM2_100000a) | |||||
| break_numbers = break_numbers3; | |||||
| else | |||||
| if(tr->langopts.numbers2 & NUM2_100000) | |||||
| break_numbers = break_numbers2; | |||||
| else | |||||
| break_numbers = break_numbers1; | |||||
| while(pn < &number_buf[sizeof(number_buf)-3]) | while(pn < &number_buf[sizeof(number_buf)-3]) | ||||
| { | { | ||||
| if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep)) | if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep)) | ||||
| break; | break; | ||||
| *pn++ = c; | *pn++ = c; | ||||
| if((--nx > 0) && (break_numbers & (1 << nx))) | |||||
| if((--nx > 0) && (tr->langopts.break_numbers & (1 << nx))) | |||||
| { | { | ||||
| if(tr->langopts.thousands_sep != ' ') | if(tr->langopts.thousands_sep != ' ') | ||||
| { | { | ||||
| *pn++ = tr->langopts.thousands_sep; | *pn++ = tr->langopts.thousands_sep; | ||||
| } | } | ||||
| *pn++ = ' '; | *pn++ = ' '; | ||||
| if(break_numbers & (1 << (nx-1))) | |||||
| if(tr->langopts.break_numbers & (1 << (nx-1))) | |||||
| { | { | ||||
| // the next group only has 1 digits (i.e. NUM2_10000), make it three | // the next group only has 1 digits (i.e. NUM2_10000), make it three | ||||
| *pn++ = '0'; | *pn++ = '0'; | ||||
| *pn++ = '0'; | *pn++ = '0'; | ||||
| } | } | ||||
| if(break_numbers & (1 << (nx-2))) | |||||
| if(tr->langopts.break_numbers & (1 << (nx-2))) | |||||
| { | { | ||||
| // the next group only has 2 digits (i.e. NUM2_10000), make it three | // the next group only has 2 digits (i.e. NUM2_10000), make it three | ||||
| *pn++ = '0'; | *pn++ = '0'; |
| unsigned char *length_mods; | unsigned char *length_mods; | ||||
| unsigned char *length_mods0; | unsigned char *length_mods0; | ||||
| #define NUM_THOUS_SPACE 0x4 | |||||
| #define NUM_DECIMAL_COMMA 0x8 | |||||
| #define NUM_SWAP_TENS 0x10 | |||||
| #define NUM_AND_UNITS 0x20 | |||||
| #define NUM_HUNDRED_AND 0x40 | |||||
| #define NUM_SINGLE_AND 0x80 | |||||
| #define NUM_SINGLE_STRESS 0x100 | |||||
| #define NUM_SINGLE_VOWEL 0x200 | |||||
| #define NUM_OMIT_1_HUNDRED 0x400 | #define NUM_OMIT_1_HUNDRED 0x400 | ||||
| #define NUM_19_HUNDRED 0x800 | |||||
| #define NUM_1900 0x800 | |||||
| #define NUM_ALLOW_SPACE 0x1000 | |||||
| #define NUM_DFRACTION_1 0x2000 | |||||
| #define NUM_DFRACTION_2 0x4000 | |||||
| #define NUM_DFRACTION_3 0x6000 | |||||
| #define NUM_DFRACTION_4 0x8000 | |||||
| #define NUM_DFRACTION_5 0xa000 | |||||
| #define NUM_ORDINAL_DOT 0x10000 | #define NUM_ORDINAL_DOT 0x10000 | ||||
| #define NUM_ROMAN 0x20000 | #define NUM_ROMAN 0x20000 | ||||
| #define NUM_ROMAN_UC 0x40000 | #define NUM_ROMAN_UC 0x40000 | ||||
| #define NUM_NOPAUSE 0x80000 | #define NUM_NOPAUSE 0x80000 | ||||
| #define NUM_AND_HUNDRED 0x100000 | |||||
| #define NUM_ROMAN_AFTER 0x200000 | #define NUM_ROMAN_AFTER 0x200000 | ||||
| #define NUM_VIGESIMAL 0x400000 | #define NUM_VIGESIMAL 0x400000 | ||||
| #define NUM_ROMAN_ORDINAL 0x800000 | #define NUM_ROMAN_ORDINAL 0x800000 | ||||
| // bit23=Roman numbers are ordinal numbers | // bit23=Roman numbers are ordinal numbers | ||||
| int numbers; | int numbers; | ||||
| #define NUM2_100000 0x800 // numbers for 100,000 and 10,000,000 | |||||
| #define NUM2_100000a 0xc00 // numbers for 100,000 and 1,000,000 | |||||
| // bits 1-4 use variant form of numbers before thousands,millions,etc. | // bits 1-4 use variant form of numbers before thousands,millions,etc. | ||||
| // bit6=(LANG=pl) two forms of plural, M or MA | // bit6=(LANG=pl) two forms of plural, M or MA | ||||
| // bit7=(LANG-ru) use MB for 1 thousand, million, etc | // bit7=(LANG-ru) use MB for 1 thousand, million, etc | ||||
| // bit8=(LANG=cs,sk) two forms of plural, M or MA | // bit8=(LANG=cs,sk) two forms of plural, M or MA | ||||
| // bit9=(LANG=rw) say "thousand" and "million" before its number, not after | // bit9=(LANG=rw) say "thousand" and "million" before its number, not after | ||||
| // bit10=(LANG=sw) special word for 100,000 and 1,000,000 | |||||
| // bit11=(LANG=hi) special word for 100,000 and 10,000,000 | |||||
| int numbers2; | int numbers2; | ||||
| #define BREAK_THOUSANDS 0x49249248 | |||||
| int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000) | |||||
| int max_roman; | int max_roman; | ||||
| int thousands_sep; | int thousands_sep; | ||||
| int decimal_sep; | int decimal_sep; |
| amp = embedded_value[EMBED_H]; | amp = embedded_value[EMBED_H]; | ||||
| delay = 130; | delay = 130; | ||||
| } | } | ||||
| #ifdef deleted | |||||
| if(embedded_value[EMBED_T] > 0) | if(embedded_value[EMBED_T] > 0) | ||||
| { | { | ||||
| // announcing punctuation | |||||
| // announcing punctuation, add a small echo | |||||
| // This seems unpopular | |||||
| amp = embedded_value[EMBED_T] * 8; | amp = embedded_value[EMBED_T] * 8; | ||||
| delay = 60; | delay = 60; | ||||
| } | } | ||||
| #endif | |||||
| if(delay == 0) | if(delay == 0) | ||||
| amp = 0; | amp = 0; |