Fixes for ordinal numbers (lang=hu). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@225 d46cf337-b52f-0410-862d-fd96e6ae7743master
g h j J k l m n | g h j J k l m n | ||||
N n^ p q R s S S; | N n^ p q R s S S; | ||||
t tS v w x z | t tS v w x z | ||||
Dictionary pa_dict | |||||
a a~ e E e~ E~ i I | |||||
i: i~ I~ o O o: O~ o~ | |||||
r- U u u~ U~ V V~ | |||||
: b bh c ch d d. d.h | |||||
dh f g gh H j J Jh | |||||
k kh l l. m n N n. | |||||
n^ p ph Q R s S t | |||||
t. t.h th v x z |
// numeric | // numeric | ||||
//_0 zero $text // TEST | |||||
_0 z'i@roU | |||||
_0 zero $text // TEST | |||||
//_0 z'i@roU | |||||
_1 w'0n | _1 w'0n | ||||
?6 _1 w'Vn | ?6 _1 w'Vn | ||||
_2 t'u: | _2 t'u: | ||||
_80o 'eIti@ | _80o 'eIti@ | ||||
_90o n'aInti@ | _90o n'aInti@ | ||||
_0Co h'Vndr@dT | _0Co h'Vndr@dT | ||||
_0M1o T'aUz@ndT | |||||
// ABBREVIATIONS | // ABBREVIATIONS |
// ordinal numbers | // ordinal numbers | ||||
_ord Edik | _ord Edik | ||||
_0o n'ullAdik | |||||
_1ox ElSY: // number = '1' only | _1ox ElSY: // number = '1' only | ||||
_2ox ma:Sodik // number = '2' only | |||||
_2ox ma:Sodik // number = '2' only | |||||
_1o EJ:Edik | _1o EJ:Edik | ||||
_2o kEt:Edik | _2o kEt:Edik | ||||
_3o hAR2_mAdik | _3o hAR2_mAdik | ||||
_80o n^oltsvAnAdik | _80o n^oltsvAnAdik | ||||
_0Co sa:zAdik | _0Co sa:zAdik | ||||
_2Co ke:tsa:zAdik | _2Co ke:tsa:zAdik | ||||
_0M1o EzR2Edik | |||||
_1M1o EzR2Edik | |||||
_2M1o ke:tEzR2Edik | |||||
// These words mean that a dot after number, immediately preceding, does not mean an ordinal number | // These words mean that a dot after number, immediately preceding, does not mean an ordinal number | ||||
január $alt | január $alt | ||||
now $alt | now $alt | ||||
dec $alt | dec $alt | ||||
//this following 7 lines means the 7 hungarian day name | |||||
hétfő $alt | |||||
kedd $alt | |||||
szerda $alt | |||||
csütörtök $alt | |||||
péntek $alt | |||||
szombat $alt | |||||
vasárnap $alt | |||||
// accent names | // accent names | ||||
_lig ligAtu:R2A | _lig ligAtu:R2A | ||||
_acu e:lES | _acu e:lES | ||||
a.m An^n^i||m'int $dot | a.m An^n^i||m'int $dot | ||||
dr doktoR2 $dot | dr doktoR2 $dot | ||||
gpu $abbrev | gpu $abbrev | ||||
mvgyosz $abbrev | |||||
mvgyosz Emve:Je:oEs | |||||
OTP $abbrev | OTP $abbrev | ||||
id idY:SEb: $dot | id idY:SEb: $dot | ||||
ill illEtvE $dot | ill illEtvE $dot |
.group b | .group b | ||||
b b | b b | ||||
// bb b: | |||||
bb b: | |||||
biz (A b'iz, // bizalmas etc. | biz (A b'iz, // bizalmas etc. | ||||
biedermeier bi:dER2ma:jER2 | biedermeier bi:dER2ma:jER2 | ||||
ccs tS: | ccs tS: | ||||
!) cz ts // in names which start with a capital letter | !) cz ts // in names which start with a capital letter | ||||
ar) csz (e ts | ar) csz (e ts | ||||
anar) ch (i C | |||||
harmin) c ts | |||||
anar) ch (i C | |||||
hierar) ch (i C | hierar) ch (i C | ||||
me) ch (a C | me) ch (a C | ||||
te) ch (n C | te) ch (n C | ||||
.group d | .group d | ||||
d d | d d | ||||
dd d: | |||||
dts tS: | dts tS: | ||||
dt t: | dt t: | ||||
a) dsz ts: | a) dsz ts: | ||||
cselé) d d | cselé) d d | ||||
enge) dsz ts: | enge) dsz ts: | ||||
engedelmeske) dsz ts: | engedelmeske) dsz ts: | ||||
hazu) dsz ts: | |||||
mara) dsz ts: | mara) dsz ts: | ||||
tu) dsz ts: | |||||
kére) dz ts | kére) dz ts | ||||
// dd d: | // dd d: | ||||
dz dz | dz dz | ||||
dj J: | dj J: | ||||
min) dny n^ | min) dny n^ | ||||
.group e | .group e | ||||
e E | e E | ||||
D_-_) es (_ %ES | D_-_) es (_ %ES | ||||
.group g | .group g | ||||
g g | g g | ||||
// gg g: | |||||
gg g: | |||||
gy J | gy J | ||||
ggy J: | ggy J: | ||||
A) gysz (A Js: | A) gysz (A Js: | ||||
C) gyj J | C) gyj J | ||||
A) gyj (A J: | A) gyj (A J: | ||||
ha) gyj J: | |||||
_e) gy (es J: | _e) gy (es J: | ||||
_e) gy (et_ J: | _e) gy (et_ J: | ||||
_e) gy (etlen J: | _e) gy (etlen J: | ||||
.group l | .group l | ||||
l l | l l | ||||
lj j | |||||
ly j | ly j | ||||
lly jj | lly jj | ||||
á) ll (j j | á) ll (j j | ||||
beszé) lj jj | beszé) lj jj | ||||
bére) lj jj | bére) lj jj | ||||
fáj) l l | |||||
fájla) l (j jj | |||||
gondo) lj jjj | gondo) lj jjj | ||||
ke) lj jj | ke) lj jj | ||||
sajná) lj jj | |||||
sajná) lj jjj | |||||
llj jjj | |||||
C) ly (_ li | C) ly (_ li | ||||
_kéth) ly li | _kéth) ly li | ||||
szamue) lly lli | szamue) lly lli | ||||
sz s | sz s | ||||
ssz ss2 | ssz ss2 | ||||
szts stS: | szts stS: | ||||
föld) s (ánc S | |||||
s (színű S | s (színű S | ||||
hel) s (inki z | hel) s (inki z | ||||
ki) s (ebb SS | ki) s (ebb SS | ||||
tc ts: | tc ts: | ||||
tt t: | tt t: | ||||
tt (C tt | tt (C tt | ||||
ttn t:n | |||||
ty c | ty c | ||||
// s) ty c: // | // s) ty c: // | ||||
// z) ty c: // keztyű | // z) ty c: // keztyű | ||||
ttj tc: | ttj tc: | ||||
tty c: | tty c: | ||||
apá) ts (ág tS: | |||||
becsüle) t t | |||||
cson) t t | |||||
cson) tj c | |||||
ezüs) t t | |||||
éle) t t | éle) t t | ||||
felej) ts tS | |||||
já) tsz (ani ts: | |||||
tsz ts: | tsz ts: | ||||
szorí) ts (a tS: | szorí) ts (a tS: | ||||
ürí) ts (e tS: | ürí) ts (e tS: | ||||
mula) ts tS: | mula) ts tS: | ||||
nemze) ts tS: | nemze) ts tS: | ||||
néme) ts (ég tS: | |||||
min) t t | min) t t | ||||
tse tSE | tse tSE | ||||
tso tSo | tso tSo | ||||
tsá tSa: | tsá tSa: | ||||
tsd tSd | tsd tSd | ||||
tsé tSe: | tsé tSe: | ||||
t (cs t | |||||
ttsé tS:e: | ttsé tS:e: | ||||
C) tj c | C) tj c | ||||
A) tj (A c: | A) tj (A c: | ||||
_ka) ty (n ti | _ka) ty (n ti | ||||
vörösmar) ty ti | vörösmar) ty ti | ||||
á) t (sza t | |||||
á) t (jár t | á) t (jár t | ||||
á) t (sző t | á) t (sző t | ||||
á) t (szú t | á) t (szú t | ||||
á) t (szű t | á) t (szű t | ||||
bizo) tts (ág tS: | bizo) tts (ág tS: | ||||
állí) ts tS: | állí) ts tS: | ||||
állapo) t (sor t | |||||
bará) ts tS: | bará) ts tS: | ||||
dön) ts (ön tS | dön) ts (ön tS | ||||
elhivato) tts (ág tS: | elhivato) tts (ág tS: | ||||
kiál) ts tS | kiál) ts tS | ||||
köve) ts (ég tS: | köve) ts (ég tS: | ||||
kür) t t | kür) t t | ||||
kür) tj c | |||||
korlátozo) tts tS: | |||||
neve) ts (ég tS: | neve) ts (ég tS: | ||||
on) ts (u tS | |||||
szen)t (szék t | szen)t (szék t | ||||
szé) t t | |||||
szöve) ts (ég tS: | szöve) ts (ég tS: | ||||
pillan) ts tS | |||||
romlo) tts tS: | romlo) tts tS: | ||||
tar) ts tS | |||||
á) t (sor t | á) t (sor t | ||||
á) t (sé t | á) t (sé t | ||||
ne) t (c t | ne) t (c t | ||||
ké) t (száz t | ké) t (száz t | ||||
ö) t (száz t | ö) t (száz t | ||||
ha) t (száz t | ha) t (száz t | ||||
hé) t (száz t | |||||
vé) ts (ég tS: | vé) ts (ég tS: | ||||
bi) z (tons s | bi) z (tons s | ||||
bi) z (tos s | bi) z (tos s | ||||
bron) z (sz z | bron) z (sz z | ||||
csontvá) z z | |||||
e) z (t s | e) z (t s | ||||
ho) z (ta s | ho) z (ta s | ||||
ho) z (tá s | ho) z (tá s |
//_xx விழுக்காடு $text // TESTING doesn't reduce vowels | |||||
// This file is UTF8 encoded | // This file is UTF8 encoded | ||||
// Spelling to phoneme words and exceptions for Tamil | // Spelling to phoneme words and exceptions for Tamil | ||||
U+bf9 ru:ba:j | U+bf9 ru:ba:j | ||||
// abbreviations | // abbreviations | ||||
ரூ ru:ba:j $dot | |||||
Rs ru:ba:j $dot | |||||
ரூ ru:ba:j $dot | |||||
Rs ru:pi:z $dot | |||||
// numbers | // numbers | ||||
_0 suz.ijVm // சுழியம் | _0 suz.ijVm // சுழியம் | ||||
_0M1 a:jirVttU | _0M1 a:jirVttU | ||||
_1M1 a:jirVttU | _1M1 a:jirVttU | ||||
_0M2x lVd.tSVm | |||||
_0M2x lVd.tSVm // 100,000 | |||||
_1M2x orUlVd.tSVm | _1M2x orUlVd.tSVm | ||||
_0M2 lVd.tSVttU | _0M2 lVd.tSVttU | ||||
_1M2 orUlVd.tSVttU | _1M2 orUlVd.tSVttU | ||||
_0M3x ko:d.i | |||||
_0M3x ko:d.i // 10,000,000 | |||||
_1M3x orUko:d.i | _1M3x orUko:d.i | ||||
_0M3 ko:d.ie: | _0M3 ko:d.ie: | ||||
_1M3 orUko:d.ie: | _1M3 orUko:d.ie: | ||||
_0M4 nu:RUko:d.i // not correct, but should be understandable | |||||
_1M4 nu:RUko:d.i | |||||
_0M5 patta:jiRUmko:d.i | |||||
_1M5 patta:jiRUmko:d.i | |||||
_0M4x a:jirVmko:d.i // 10,000,000,000 | |||||
_1M4x a:jirVmko:d.i | |||||
_0M4 a:jirVmko:d.ie: | |||||
_1M4 a:jirVmko:d.ie: | |||||
_0M5 a:jirVma:jirVmko:d.i // 10,000,000,000,000 | |||||
_1M5 a:jirVma:jirVmko:d.i | |||||
_dpt _pul.l.i | _dpt _pul.l.i |
௭ 7 | ௭ 7 | ||||
௮ 8 | ௮ 8 | ||||
௯ 9 | ௯ 9 | ||||
ொ ொ | |||||
ோ ோ | |||||
ௌ ௌ | |||||
. | |||||
.group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae] | .group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae] | ||||
ஂ // anusvara | ஂ // anusvara | ||||
எ e | எ e | ||||
_) எ ;e // add a short [j] sound at start of word ? | _) எ ;e // add a short [j] sound at start of word ? | ||||
ஏ e:: | |||||
ஏ e: | |||||
_) ஏ ;e:: // add a short [j] sound at start of word ? | _) ஏ ;e:: // add a short [j] sound at start of word ? | ||||
ஐ aI | ஐ aI | ||||
ங NV | ங NV | ||||
ங (B N | ங (B N | ||||
ச zV | |||||
ச (B z | |||||
ச sV // ?? [z] | |||||
ச (B s | |||||
_) ச sa | _) ச sa | ||||
_) ச (B s | _) ச (B s | ||||
ச்ச tS:V | ச்ச tS:V | ||||
ச்ச (B tS: | ச்ச (B tS: | ||||
ற்ச tS: | |||||
ற்ச (B tS: | |||||
ட்) ச tSV | ட்) ச tSV | ||||
ட்) ச (B tS | ட்) ச (B tS | ||||
ஞ்) ச dZV | ஞ்) ச dZV | ||||
ப்ப ppV | ப்ப ppV | ||||
ப்ப (B pp | ப்ப (B pp | ||||
ட்) ப pV | ட்) ப pV | ||||
ட்) ப (B pV | |||||
ட்) ப (B p | |||||
ற்) ப pV | ற்) ப pV | ||||
ற்) ப (B p | ற்) ப (B p | ||||
ஃ) ப fV | ஃ) ப fV | ||||
ௌ aU | ௌ aU | ||||
் // virama | ் // virama | ||||
ௗ : // aU length mark | ௗ : // aU length mark |
60 phoneme tables | |||||
61 phoneme tables | |||||
new total | new total | ||||
base 103 103 | base 103 103 | ||||
base2 26 124 | base2 26 124 | ||||
fi 40 134 | fi 40 134 | ||||
fr 55 141 | fr 55 141 | ||||
fr_ca 11 141 | fr_ca 11 141 | ||||
hi 60 149 | |||||
ta 20 152 | |||||
hi 62 151 | |||||
ta 20 154 | |||||
hu 23 119 | hu 23 119 | ||||
lv 29 126 | lv 29 126 | ||||
nl 28 126 | nl 28 126 | ||||
hy 24 119 | hy 24 119 | ||||
da 21 118 | da 21 118 | ||||
rw 15 131 | rw 15 131 | ||||
ml 13 151 | |||||
kn 15 151 | |||||
bn 59 155 | |||||
ne 18 157 | |||||
mr 12 149 | |||||
ml 13 153 | |||||
kn 15 153 | |||||
bn 59 157 | |||||
ne 18 159 | |||||
mr 12 151 | |||||
eu 6 125 | eu 6 125 | ||||
mn 15 114 | mn 15 114 | ||||
pa 12 152 | |||||
Data file Used by | Data file Used by | ||||
b/b [b] base | b/b [b] base | ||||
vnasal/i_n [i~] pt | vnasal/i_n [i~] pt | ||||
[i~] bn | [i~] bn | ||||
[i:~] bn | [i:~] bn | ||||
vnasal/i_n2 [i~] hi | |||||
vnasal/i_n2 [I~] hi | |||||
[i~] hi | |||||
vnasal/m- [m-] sw | vnasal/m- [m-] sw | ||||
vnasal/n- [n-] sw | vnasal/n- [n-] sw | ||||
vnasal/nn- [N-] sw | vnasal/nn- [N-] sw | ||||
[o] zh | [o] zh | ||||
[O~] bn | [O~] bn | ||||
vnasal/oo_n3 [O~] pl | vnasal/oo_n3 [O~] pl | ||||
vnasal/u_n [u~] hi | |||||
vnasal/u_n [U~] hi | |||||
[u~] hi | |||||
[u~] pt | [u~] pt | ||||
[u] zh | [u] zh | ||||
[u~] bn | [u~] bn | ||||
[a:] hi | [a:] hi | ||||
[a:] bn | [a:] bn | ||||
[a] ne | [a] ne | ||||
[a] pa | |||||
vowel/a_en [A] fr | vowel/a_en [A] fr | ||||
vowel/@_bck [@] hi | vowel/@_bck [@] hi | ||||
[@/] hi | [@/] hi | ||||
[@] bn | [@] bn | ||||
[V] ne | [V] ne | ||||
[@/] ne | [@/] ne | ||||
[@] pa | |||||
vowel/e [e] base2 | vowel/e [e] base2 | ||||
[e:] en | [e:] en | ||||
[eI] en_n | [eI] en_n | ||||
[e:] no | [e:] no | ||||
[e] bn | [e] bn | ||||
[e:] bn | [e:] bn | ||||
[e] pa | |||||
vowel/e_3 [i] en_n | vowel/e_3 [i] en_n | ||||
[e:] hu | [e:] hu | ||||
[e] ku | [e] ku | ||||
[E] zh | [E] zh | ||||
[E#] ku | [E#] ku | ||||
[&] da | [&] da | ||||
[E] pa | |||||
vowel/ee#_2 [E-] sv | vowel/ee#_2 [E-] sv | ||||
[E#] sq | [E#] sq | ||||
vowel/ee_3 [&] af | vowel/ee_3 [&] af | ||||
[i] bn | [i] bn | ||||
[i:] bn | [i:] bn | ||||
[i:] mr | [i:] mr | ||||
[i] pa | |||||
vowel/ii [I] en_n | vowel/ii [I] en_n | ||||
[I2] en_n | [I2] en_n | ||||
[I] en_rp | [I] en_rp | ||||
[I] no | [I] no | ||||
[I] tr | [I] tr | ||||
[I] bn | [I] bn | ||||
[I] pa | |||||
vowel/ii#_3 [I2] en_us | vowel/ii#_3 [I2] en_us | ||||
vowel/ii_4 [I] en | vowel/ii_4 [I] en | ||||
[I2] en | [I2] en | ||||
[@/] ne | [@/] ne | ||||
[@] mr | [@] mr | ||||
[V] mr | [V] mr | ||||
[@] pa | |||||
[V] pa | |||||
vowel/@_low2 [@/] en_us | vowel/@_low2 [@/] en_us | ||||
[@2] en_us | [@2] en_us | ||||
vowel/o [o] base2 | vowel/o [o] base2 | ||||
[o] vi | [o] vi | ||||
[o] da | [o] da | ||||
[o:] bn | [o:] bn | ||||
[o:] pa | |||||
vowel/o-_2 [V] en_n | vowel/o-_2 [V] en_n | ||||
[V] en_wm | [V] en_wm | ||||
vowel/o_3 [oU] en_sc | vowel/o_3 [oU] en_sc | ||||
[O:] hi | [O:] hi | ||||
[O] it | [O] it | ||||
[O] bn | [O] bn | ||||
[O] pa | |||||
vowel/oo_5 [O] pl | vowel/oo_5 [O] pl | ||||
[O] is | [O] is | ||||
[O] sq | [O] sq | ||||
[U] mr | [U] mr | ||||
[u:] mr | [u:] mr | ||||
[u] mn | [u] mn | ||||
[u] pa | |||||
vowel/u_bck2 [u] fr | vowel/u_bck2 [u] fr | ||||
[u:] fr | [u:] fr | ||||
[u:] la | [u:] la | ||||
[u] zhy | [u] zhy | ||||
[U] bn | [U] bn | ||||
[U] mn | [U] mn | ||||
[U] pa | |||||
vowel/V [3] en_sc | vowel/V [3] en_sc | ||||
vowel/V_2 [V] en | vowel/V_2 [V] en | ||||
[a] af | [a] af | ||||
[V] hi | [V] hi | ||||
[V] ta | [V] ta | ||||
[V] bn | [V] bn | ||||
[V] pa | |||||
vowel/V_4 [V] en_sc | vowel/V_4 [V] en_sc | ||||
[V] da | [V] da | ||||
vowel/V_6 [V] en_us | vowel/V_6 [V] en_us |
formants vnasal/i_n2 | formants vnasal/i_n2 | ||||
endphoneme | endphoneme | ||||
phoneme I~ | |||||
vowel starttype (i) endtype (i) | |||||
length 170 | |||||
formants vnasal/i_n2 | |||||
endphoneme | |||||
phoneme e~ | phoneme e~ | ||||
vowel long starttype (e) endtype (e) | vowel long starttype (e) endtype (e) | ||||
length 220 | length 220 | ||||
endphoneme | endphoneme | ||||
phoneme E~ | phoneme E~ | ||||
vowel starttype (e) endtype (e) | |||||
vowel long starttype (e) endtype (e) | |||||
length 230 | length 230 | ||||
formants vnasal/ee_n2 | formants vnasal/ee_n2 | ||||
endphoneme | endphoneme | ||||
formants vnasal/u_n | formants vnasal/u_n | ||||
endphoneme | endphoneme | ||||
phoneme U~ | |||||
vowel starttype (u) endtype (u) | |||||
length 170 | |||||
formants vnasal/u_n | |||||
endphoneme | |||||
phoneme r- | phoneme r- | ||||
vowel starttype (@) endtype (@) | vowel starttype (@) endtype (@) |
phoneme e: | phoneme e: | ||||
vowel starttype (e) endtype (e) | vowel starttype (e) endtype (e) | ||||
length 270 | |||||
length 250 | |||||
formants vowel/e | formants vowel/e | ||||
endphoneme | endphoneme | ||||
phonemetable mn base | phonemetable mn base | ||||
include ph_mongolian | include ph_mongolian | ||||
phonemetable pa hi | |||||
include ph_punjabi |
static int transpose_max; | static int transpose_max; | ||||
static int text_mode = 0; | static int text_mode = 0; | ||||
static int debug_flag = 0; | static int debug_flag = 0; | ||||
static int error_need_dictionary = 0; | |||||
static int hash_counts[N_HASH_DICT]; | static int hash_counts[N_HASH_DICT]; | ||||
static char *hash_chains[N_HASH_DICT]; | static char *hash_chains[N_HASH_DICT]; | ||||
static char nullstring[] = {0}; | static char nullstring[] = {0}; | ||||
WORD_TAB winfo; | WORD_TAB winfo; | ||||
char decoded_phonemes[128]; | |||||
comment = NULL; | comment = NULL; | ||||
text_not_phonemes = 0; | text_not_phonemes = 0; | ||||
if(text_mode) | if(text_mode) | ||||
text_not_phonemes = 1; | text_not_phonemes = 1; | ||||
if(text_not_phonemes != translator->langopts.textmode) | |||||
{ | |||||
flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||||
} | |||||
if(text_not_phonemes) | if(text_not_phonemes) | ||||
{ | { | ||||
if(word[0] == '_') | if(word[0] == '_') | ||||
{ | { | ||||
// This is a special word, used by eSpeak. Translate this into phonemes now | // This is a special word, used by eSpeak. Translate this into phonemes now | ||||
// memset(&winfo,0,sizeof(winfo)); | |||||
// TranslateWord(translator,phonetic,0,&winfo); // but *_dict is not loaded ? | |||||
// DecodePhonemes(word_phonemes,decoded_phonemes); | |||||
memset(&winfo,0,sizeof(winfo)); | |||||
strcat(phonetic, " "); // need a space to indicate word-boundary | |||||
// PROBLEM vowel reductions are not applied to the translated phonemes | |||||
// condition rules are not applied | |||||
TranslateWord(translator,phonetic,0,&winfo); | |||||
text_not_phonemes = 0; | |||||
strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4); | |||||
if((word_phonemes[0] == 0) && (error_need_dictionary < 3)) | |||||
{ | |||||
// the dictionary was not loaded, we need a second attempt | |||||
error_need_dictionary++; | |||||
fprintf(f_log,"%5d: Need to compile dictionary again\n",linenum); | |||||
} | |||||
{ | |||||
//char decoded_phonemes[128]; | |||||
//DecodePhonemes(word_phonemes,decoded_phonemes); | |||||
//printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes); | //printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes); | ||||
} | |||||
} | |||||
else | |||||
{ | |||||
// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||||
strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||||
} | } | ||||
// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | |||||
strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4); | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
} | } | ||||
} | } | ||||
if(text_not_phonemes != translator->langopts.textmode) | |||||
{ | |||||
flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE; | |||||
} | |||||
if(sscanf(word,"U+%x",&wc) == 1) | if(sscanf(word,"U+%x",&wc) == 1) | ||||
{ | { | ||||
// Character code | // Character code | ||||
char path[sizeof(path_home)+40]; // path_dsource+20 | char path[sizeof(path_home)+40]; // path_dsource+20 | ||||
error_count = 0; | error_count = 0; | ||||
error_need_dictionary = 0; | |||||
memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | ||||
debug_flag = flags & 1; | debug_flag = flags & 1; |
strcpy(dictionary_name,name); // currently loaded dictionary name | strcpy(dictionary_name,name); // currently loaded dictionary name | ||||
if(no_error) // don't load dictionary, just set the dictionary_name | |||||
return(1); | |||||
// Load a pronunciation data file into memory | // Load a pronunciation data file into memory | ||||
// bytes 0-3: offset to rules data | // bytes 0-3: offset to rules data | ||||
// bytes 4-7: number of hash table entries | // bytes 4-7: number of hash table entries |
#ifdef deleted | #ifdef deleted | ||||
static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2007\n\n" | |||||
static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2009\n\n" | |||||
"Licensed under GNU General Public License version 3\n" | "Licensed under GNU General Public License version 3\n" | ||||
"http://espeak.sourceforge.net/"; | "http://espeak.sourceforge.net/"; | ||||
#endif | #endif | ||||
static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2007<br>" | |||||
static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2009<br>" | |||||
"<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>" | "<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>" | ||||
"Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>"; | "Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>"; | ||||
sprintf(fname_log,"%s%s",path_dsource,"dict_log"); | sprintf(fname_log,"%s%s",path_dsource,"dict_log"); | ||||
log = fopen(fname_log,"w"); | log = fopen(fname_log,"w"); | ||||
LoadDictionary(translator, dictionary_name, 0); | |||||
if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0) | if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0) | ||||
{ | { | ||||
wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal)); | wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal)); |
ph_stress[0] = phonSTRESS_P; | ph_stress[0] = phonSTRESS_P; | ||||
ph_stress[1] = 0; | ph_stress[1] = 0; | ||||
for(p=(unsigned char *)ph_buf3; *p != 0; p++) | |||||
for(p=(unsigned char *)ph_buf3; (*p != 0) && (phoneme_tab[*p] != NULL); p++) | |||||
{ | { | ||||
if(phoneme_tab[*p]->type == phSTRESS) | if(phoneme_tab[*p]->type == phSTRESS) | ||||
ph_stress[0] = 0; // stress is already marked | ph_stress[0] = 0; // stress is already marked | ||||
static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out) | static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out) | ||||
{//======================================================================================================= | {//======================================================================================================= | ||||
// thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr | |||||
int found; | int found; | ||||
int found_value=0; | int found_value=0; | ||||
char string[12]; | char string[12]; | ||||
ph_of[0] = 0; | ph_of[0] = 0; | ||||
// first look for a match with the exact value of thousands | // first look for a match with the exact value of thousands | ||||
if(thousands_exact) | |||||
if(thousands_exact & 1) | |||||
{ | { | ||||
// is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||||
sprintf(string,"_%dM%dx",value,thousandplex); | |||||
found_value = Lookup(tr, string, ph_thousands); | |||||
if(thousands_exact & 2) | |||||
{ | |||||
// ordinal number | |||||
sprintf(string,"_%dM%do",value,thousandplex); | |||||
found_value = Lookup(tr, string, ph_thousands); | |||||
} | |||||
if(!found_value) | |||||
{ | |||||
// is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta) | |||||
sprintf(string,"_%dM%dx",value,thousandplex); | |||||
found_value = Lookup(tr, string, ph_thousands); | |||||
} | |||||
} | } | ||||
if(found_value == 0) | if(found_value == 0) | ||||
{ | { | ||||
} | } | ||||
found = 0; | found = 0; | ||||
if(thousands_exact) | |||||
if(thousands_exact & 1) | |||||
{ | { | ||||
// is there a different pronunciation if there are no hundreds,tens,or units ? | |||||
sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||||
found = Lookup(tr, string, ph_thousands); | |||||
if(thousands_exact & 2) | |||||
{ | |||||
// ordinal number | |||||
sprintf(string,"_%s%do",M_Variant(value), thousandplex); | |||||
found = Lookup(tr, string, ph_thousands); | |||||
} | |||||
if(!found) | |||||
{ | |||||
// is there a different pronunciation if there are no hundreds,tens,or units ? | |||||
sprintf(string,"_%s%dx",M_Variant(value), thousandplex); | |||||
found = Lookup(tr, string, ph_thousands); | |||||
} | |||||
} | } | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
{ | { | ||||
units = (value % 10); | units = (value % 10); | ||||
if((control & 1) && ((units == 0) || (tr->langopts.numbers & 0x10))) | |||||
if((control & 1) && ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||||
{ | { | ||||
sprintf(string,"_%dXo",value / 10); | sprintf(string,"_%dXo",value / 10); | ||||
if(Lookup(tr, string, ph_tens) != 0) | if(Lookup(tr, string, ph_tens) != 0) | ||||
sprintf(string,"_%df",units); | sprintf(string,"_%df",units); | ||||
found = Lookup(tr, string, ph_digits); | found = Lookup(tr, string, ph_digits); | ||||
} | } | ||||
if((control & 1) && ((tr->langopts.numbers & 0x10) == 0)) | |||||
if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0)) | |||||
{ | { | ||||
// ordinal | // ordinal | ||||
sprintf(string,"_%do",units); | sprintf(string,"_%do",units); | ||||
if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) | if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0)) | ||||
{ | { | ||||
if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & 0x10))) | |||||
if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS))) | |||||
Lookup(tr, "_ord20", ph_ordinal); | Lookup(tr, "_ord20", ph_ordinal); | ||||
if(ph_ordinal[0] == 0) | if(ph_ordinal[0] == 0) | ||||
Lookup(tr, "_ord", ph_ordinal); | Lookup(tr, "_ord", ph_ordinal); | ||||
} | } | ||||
if((tr->langopts.numbers & 0x30) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||||
if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0)) | |||||
{ | { | ||||
Lookup(tr, "_0and", ph_and); | Lookup(tr, "_0and", ph_and); | ||||
if(tr->langopts.numbers & 0x10) | |||||
if(tr->langopts.numbers & NUM_SWAP_TENS) | |||||
sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal); | sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal); | ||||
else | else | ||||
sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal); | sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal); | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
if(tr->langopts.numbers & 0x200) | |||||
if(tr->langopts.numbers & NUM_SINGLE_VOWEL) | |||||
{ | { | ||||
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | ||||
if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) | if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0)) | ||||
sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal); | sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal); | ||||
} | } | ||||
if(tr->langopts.numbers & 0x100) | |||||
if(tr->langopts.numbers & NUM_SINGLE_STRESS) | |||||
{ | { | ||||
// only one primary stress | // only one primary stress | ||||
found = 0; | found = 0; | ||||
int tensunits; | int tensunits; | ||||
int x; | int x; | ||||
int exact; | int exact; | ||||
int ordinal; | |||||
char string[12]; // for looking up entries in **_list | char string[12]; // for looking up entries in **_list | ||||
char buf1[100]; | char buf1[100]; | ||||
char buf2[100]; | char buf2[100]; | ||||
char ph_hundred_and[12]; | char ph_hundred_and[12]; | ||||
char ph_thousand_and[12]; | char ph_thousand_and[12]; | ||||
ordinal = control & 2; | |||||
hundreds = value / 100; | hundreds = value / 100; | ||||
tensunits = value % 100; | tensunits = value % 100; | ||||
buf1[0] = 0; | buf1[0] = 0; | ||||
ph_thousand_and[0] = 0; | ph_thousand_and[0] = 0; | ||||
found = 0; | found = 0; | ||||
if((control & 2) && (tensunits == 0)) | |||||
if(ordinal && (tensunits == 0)) | |||||
{ | { | ||||
// ordinal number, with no tens or units | // ordinal number, with no tens or units | ||||
found = Lookup(tr, "_0Co", ph_100); | found = Lookup(tr, "_0Co", ph_100); | ||||
Lookup(tr, "_0C", ph_100); | Lookup(tr, "_0C", ph_100); | ||||
} | } | ||||
if(((tr->langopts.numbers & 0x0800) != 0) && (hundreds == 19)) | |||||
if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19)) | |||||
{ | { | ||||
// speak numbers such as 1984 as years: nineteen-eighty-four | // speak numbers such as 1984 as years: nineteen-eighty-four | ||||
// ph_100[0] = 0; // don't say "hundred", we also need to surpess "and" | // ph_100[0] = 0; // don't say "hundred", we also need to surpess "and" | ||||
if ((value % 1000) == 0) | if ((value % 1000) == 0) | ||||
exact = 1; | exact = 1; | ||||
if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact, ph_10T) == 0) | |||||
if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0) | |||||
{ | { | ||||
x = 0; | x = 0; | ||||
if(tr->langopts.numbers2 & (1 << (thousandplex+1))) | if(tr->langopts.numbers2 & (1 << (thousandplex+1))) | ||||
ph_digits[0] = 0; | ph_digits[0] = 0; | ||||
if(hundreds > 0) | if(hundreds > 0) | ||||
{ | { | ||||
if((tr->langopts.numbers & 0x100000) && ((control & 1) || (ph_thousands[0] != 0))) | |||||
if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0))) | |||||
{ | { | ||||
Lookup(tr, "_0and", ph_thousand_and); | Lookup(tr, "_0and", ph_thousand_and); | ||||
} | } | ||||
if(tensunits == 0) | if(tensunits == 0) | ||||
{ | { | ||||
// is there a special pronunciation for exactly n00 ? | // is there a special pronunciation for exactly n00 ? | ||||
sprintf(string,"_%dC0",hundreds); | |||||
found = Lookup(tr, string, ph_digits); | |||||
if(ordinal) | |||||
{ | |||||
// ordinal number | |||||
sprintf(string, "_%dCo", hundreds); | |||||
found = Lookup(tr, string, ph_digits); | |||||
} | |||||
if(!found) | |||||
{ | |||||
sprintf(string,"_%dC0",hundreds); | |||||
found = Lookup(tr, string, ph_digits); | |||||
} | |||||
} | } | ||||
if(!found) | if(!found) | ||||
{ | { | ||||
} | } | ||||
ph_hundred_and[0] = 0; | ph_hundred_and[0] = 0; | ||||
if((tr->langopts.numbers & 0x40) && (tensunits != 0)) | |||||
if((tr->langopts.numbers & NUM_HUNDRED_AND) && (tensunits != 0)) | |||||
{ | { | ||||
if((value > 100) || ((control & 1) && (thousandplex==0))) | if((value > 100) || ((control & 1) && (thousandplex==0))) | ||||
{ | { | ||||
if(thousandplex==0) | if(thousandplex==0) | ||||
{ | { | ||||
x = 2; // allow "eins" for 1 rather than "ein" | x = 2; // allow "eins" for 1 rather than "ein" | ||||
if(control & 2) | |||||
if(ordinal) | |||||
x = 3; // ordinal number | x = 3; // ordinal number | ||||
if((value < 100) && !(control & 1)) | if((value < 100) && !(control & 1)) | ||||
x |= 4; // tens and units only, no higher digits | x |= 4; // tens and units only, no higher digits | ||||
if(LookupNum2(tr, tensunits, x, buf2) != 0) | if(LookupNum2(tr, tensunits, x, buf2) != 0) | ||||
{ | { | ||||
if(tr->langopts.numbers & 0x80) | |||||
if(tr->langopts.numbers & NUM_SINGLE_AND) | |||||
ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units | ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units | ||||
} | } | ||||
} | } | ||||
value = this_value = atoi(word); | value = this_value = atoi(word); | ||||
ph_ordinal2[0] = 0; | ph_ordinal2[0] = 0; | ||||
if((tr->langopts.numbers & 0x10000) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||||
if((tr->langopts.numbers & NUM_ORDINAL_DOT) && (word[ix] == '.') && !isdigit(word[ix+2])) | |||||
{ | { | ||||
// ordinal number is indicated by dot after the number | // ordinal number is indicated by dot after the number | ||||
ordinal = 2; | ordinal = 2; | ||||
prev_thousands = 1; | prev_thousands = 1; | ||||
} | } | ||||
else | else | ||||
if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & 0x1000)) | |||||
if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE)) | |||||
{ | { | ||||
// thousands groups can be separated by spaces | // thousands groups can be separated by spaces | ||||
if((n_digits == 3) && isdigit(word[-2])) | if((n_digits == 3) && isdigit(word[-2])) | ||||
} | } | ||||
} | } | ||||
if((tr->langopts.numbers & 0x1000) && (word[n_digits] == ' ')) | |||||
if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' ')) | |||||
thousands_inc = 1; | thousands_inc = 1; | ||||
else | else | ||||
if(word[n_digits] == tr->langopts.thousands_sep) | if(word[n_digits] == tr->langopts.thousands_sep) | ||||
max_decimal_count = 2; | max_decimal_count = 2; | ||||
switch(decimal_mode = (tr->langopts.numbers & 0xe000)) | switch(decimal_mode = (tr->langopts.numbers & 0xe000)) | ||||
{ | { | ||||
case 0x8000: | |||||
case NUM_DFRACTION_4: | |||||
max_decimal_count = 5; | max_decimal_count = 5; | ||||
case 0x4000: | |||||
case NUM_DFRACTION_2: | |||||
// French/Polish decimal fraction | // French/Polish decimal fraction | ||||
while(word[n_digits] == '0') | while(word[n_digits] == '0') | ||||
{ | { | ||||
} | } | ||||
break; | break; | ||||
case 0x2000: // italian, say "hundredths" is leading zero | |||||
case 0xa000: // hungarian, always say "tenths" etc. | |||||
case NUM_DFRACTION_1: // italian, say "hundredths" is leading zero | |||||
case NUM_DFRACTION_5: // hungarian, always say "tenths" etc. | |||||
if(decimal_count <= 4) | if(decimal_count <= 4) | ||||
{ | { | ||||
LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | ||||
} | } | ||||
break; | break; | ||||
case 0x6000: | |||||
case NUM_DFRACTION_3: | |||||
// Romanian decimal fractions | // Romanian decimal fractions | ||||
if((decimal_count <= 4) && (word[n_digits] != '0')) | if((decimal_count <= 4) && (word[n_digits] != '0')) | ||||
{ | { | ||||
if(option_sayas == SAYAS_DIGITS1) | if(option_sayas == SAYAS_DIGITS1) | ||||
return(0); // speak digits individually | return(0); // speak digits individually | ||||
if((tr->langopts.numbers & 0x3) == 1) | |||||
if(tr->langopts.numbers != 0) | |||||
return(TranslateNumber_1(tr, word1, ph_out, flags, wflags)); | return(TranslateNumber_1(tr, word1, ph_out, flags, wflags)); | ||||
return(0); | return(0); |
static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum | static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum | ||||
// alter tone for announce punctuation or capitals | // alter tone for announce punctuation or capitals | ||||
static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||||
static const char *tone_punct_off = "\001T"; | |||||
//static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch | |||||
//static const char *tone_punct_off = "\001T\001P"; | |||||
static const char *tone_punct_on = ""; // add reverberation, lower pitch TEST apply no effect | |||||
static const char *tone_punct_off = ""; | |||||
// ignore these characters | // ignore these characters | ||||
static const unsigned short chars_ignore[] = { | static const unsigned short chars_ignore[] = { | ||||
int c1=' '; // current character | int c1=' '; // current character | ||||
int c2; // next character | int c2; // next character | ||||
int cprev=' '; // previous character | int cprev=' '; // previous character | ||||
int cprev2=' '; | |||||
int parag; | int parag; | ||||
int ix = 0; | int ix = 0; | ||||
int j; | int j; | ||||
int any_alnum = 0; | int any_alnum = 0; | ||||
int self_closing; | int self_closing; | ||||
int punct_data; | int punct_data; | ||||
int is_punctuation; | |||||
int save_c2; | |||||
int stressed_word = 0; | int stressed_word = 0; | ||||
const char *p; | const char *p; | ||||
wchar_t xml_buf[N_XML_BUF+1]; | wchar_t xml_buf[N_XML_BUF+1]; | ||||
} | } | ||||
} | } | ||||
cprev2 = cprev; | |||||
cprev = c1; | cprev = c1; | ||||
c1 = c2; | c1 = c2; | ||||
if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | ||||
{ | { | ||||
// note: (c2='?') is for when a smart-quote has been replaced by '?' | // note: (c2='?') is for when a smart-quote has been replaced by '?' | ||||
buf[ix] = ' '; | |||||
buf[ix+1] = 0; | |||||
is_punctuation = 1; | |||||
if((c1 == '.') && (cprev == '.')) | if((c1 == '.') && (cprev == '.')) | ||||
{ | { | ||||
c1 = 0x2026; | c1 = 0x2026; | ||||
} | } | ||||
nl_count = 0; | nl_count = 0; | ||||
while(!Eof() && iswspace(c2)) | |||||
{ | |||||
if(c2 == '\n') | |||||
nl_count++; | |||||
c2 = GetC(); // skip past space(s) | |||||
} | |||||
if(!Eof()) | |||||
save_c2 = c2; | |||||
if(iswspace(c2)) | |||||
{ | { | ||||
UngetC(c2); | |||||
while(!Eof() && iswspace(c2)) | |||||
{ | |||||
if(c2 == '\n') | |||||
nl_count++; | |||||
c2 = GetC(); // skip past space(s) | |||||
} | |||||
if(!Eof()) | |||||
{ | |||||
UngetC(c2); | |||||
} | |||||
} | } | ||||
if((nl_count==0) && (c1 == '.')) | if((nl_count==0) && (c1 == '.')) | ||||
{ | { | ||||
if(iswdigit(cprev) && (tr->langopts.numbers & 0x10000) && islower(c2)) | |||||
// if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT) && islower(c2)) | |||||
if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT)) | |||||
{ | { | ||||
// dot after a number indicates an ordinal number | // dot after a number indicates an ordinal number | ||||
c2 = '.'; | |||||
continue; | |||||
is_punctuation = 0; | |||||
} | } | ||||
else | |||||
if(iswlower(c2)) | if(iswlower(c2)) | ||||
{ | { | ||||
c2 = ' '; | |||||
continue; // next word has no capital letter, this dot is probably from an abbreviation | |||||
// next word has no capital letter, this dot is probably from an abbreviation | |||||
c1 = ' '; | |||||
is_punctuation = 0; | |||||
} | } | ||||
if(any_alnum==0) | if(any_alnum==0) | ||||
{ | { | ||||
c2 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||||
continue; | |||||
c1 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||||
is_punctuation = 0; | |||||
} | } | ||||
} | } | ||||
punct_data = punct_attributes[punct]; | |||||
if(nl_count > 1) | |||||
c2 = save_c2; | |||||
if(is_punctuation) | |||||
{ | { | ||||
if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||||
return(punct_data + 35); // with a longer pause | |||||
return(CLAUSE_PARAGRAPH); | |||||
buf[ix] = ' '; | |||||
buf[ix+1] = 0; | |||||
punct_data = punct_attributes[punct]; | |||||
if(nl_count > 1) | |||||
{ | |||||
if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION)) | |||||
return(punct_data + 35); // with a longer pause | |||||
return(CLAUSE_PARAGRAPH); | |||||
} | |||||
return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||||
} | } | ||||
return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||||
} | } | ||||
} | } | ||||
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.41.08 04.Oct.09"; | |||||
const char *version_string = "1.41.11 09.Oct.09"; | |||||
const int version_phdata = 0x014100; | const int version_phdata = 0x014100; | ||||
int option_device_number = -1; | int option_device_number = -1; |
#define OFFSET_ARMENIAN 0x530 | #define OFFSET_ARMENIAN 0x530 | ||||
#define OFFSET_DEVANAGARI 0x900 | #define OFFSET_DEVANAGARI 0x900 | ||||
#define OFFSET_BENGALI 0x980 | #define OFFSET_BENGALI 0x980 | ||||
#define OFFSET_GURMUKHI 0xa00 | |||||
#define OFFSET_TAMIL 0xb80 | #define OFFSET_TAMIL 0xb80 | ||||
#define OFFSET_KANNADA 0xc80 | #define OFFSET_KANNADA 0xc80 | ||||
#define OFFSET_MALAYALAM 0xd00 | #define OFFSET_MALAYALAM 0xd00 | ||||
tr->langopts.max_roman = 49; | tr->langopts.max_roman = 49; | ||||
tr->langopts.thousands_sep = ','; | tr->langopts.thousands_sep = ','; | ||||
tr->langopts.decimal_sep = '.'; | tr->langopts.decimal_sep = '.'; | ||||
tr->langopts.break_numbers = BREAK_THOUSANDS; // 1000, 1000,000 1,000,000 etc | |||||
memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone)); | memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone)); | ||||
tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
SetLetterVowel(tr,'y'); // add 'y' to vowels | SetLetterVowel(tr,'y'); // add 'y' to vowels | ||||
tr->langopts.numbers = 0x8d1 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_ROMAN | NUM_1900; | |||||
tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
} | } | ||||
break; | break; | ||||
SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama | SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama | ||||
tr->langopts.numbers = 0x1; | tr->langopts.numbers = 0x1; | ||||
tr->langopts.numbers2 = NUM2_100000; | |||||
tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | ||||
tr->langopts.numbers = 0x401; | |||||
tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||||
SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants | SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x10c59; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_ORDINAL_DOT | NUM_1900; | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | ||||
tr->langopts.numbers = 0x11419 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
} | } | ||||
break; | break; | ||||
SetupTranslator(tr,stress_lengths_en,NULL); | SetupTranslator(tr,stress_lengths_en,NULL); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
tr->langopts.numbers = 0x841 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_ROMAN | NUM_1900; | |||||
tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | ||||
tr->langopts.numbers = 0x109; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA; | |||||
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | ||||
if(name2 == L_grc) | if(name2 == L_grc) | ||||
tr->langopts.unstressed_wd1 = 3; | tr->langopts.unstressed_wd1 = 3; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.numbers = 0x1409 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ROMAN; | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | ||||
tr->langopts.numbers = 0x529 + NUM_ROMAN + NUM_ROMAN_AFTER; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_ROMAN | NUM_ROMAN_AFTER; | |||||
if(name2 == L('c','a')) | if(name2 == L('c','a')) | ||||
{ | { | ||||
static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 }; | static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 }; | ||||
SetupTranslator(tr,stress_lengths_eu,stress_amps_eu); | SetupTranslator(tr,stress_lengths_eu,stress_amps_eu); | ||||
tr->langopts.stress_rule = 1; // ?? second syllable ?? | tr->langopts.stress_rule = 1; // ?? second syllable ?? | ||||
tr->langopts.numbers = 0x569 + NUM_VIGESIMAL; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_VIGESIMAL; | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.param[LOPT_IT_DOUBLING] = 1; | tr->langopts.param[LOPT_IT_DOUBLING] = 1; | ||||
tr->langopts.long_stop = 130; | tr->langopts.long_stop = 130; | ||||
tr->langopts.numbers = 0x1009; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA + NUM_ALLOW_SPACE; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
// tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3 | // tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3 | ||||
tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
tr->langopts.stress_flags = 0x0024; // don't use secondary stress | tr->langopts.stress_flags = 0x0024; // don't use secondary stress | ||||
tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables | tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables | ||||
tr->langopts.numbers = 0x1509 + 0x8000 + NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL | NUM_DFRACTION_4; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
} | } | ||||
break; | break; | ||||
case L('h','i'): // Hindi | case L('h','i'): // Hindi | ||||
case L('n','e'): // Nepali | case L('n','e'): // Nepali | ||||
case L('p','a'): // Punjabi | |||||
{ | { | ||||
static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | ||||
static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 }; | static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 }; | ||||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | ||||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | ||||
tr->langopts.numbers = 0x011; | |||||
tr->langopts.numbers2 = NUM2_100000; | |||||
tr->langopts.numbers = NUM_SWAP_TENS; | |||||
tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
tr->letter_bits_offset = OFFSET_DEVANAGARI; | tr->letter_bits_offset = OFFSET_DEVANAGARI; | ||||
if(name2 == L('p','a')) | |||||
{ | |||||
tr->langopts.numbers = 0; // no number rules yet | |||||
tr->letter_bits_offset = OFFSET_GURMUKHI; | |||||
} | |||||
SetIndicLetters(tr); | SetIndicLetters(tr); | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_HUNDRED_AND | NUM_DECIMAL_COMMA | NUM_THOUS_SPACE | NUM_DFRACTION_2 | NUM_ROMAN_UC; | |||||
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | ||||
tr->langopts.replace_chars = replace_cyrillic_latin; | tr->langopts.replace_chars = replace_cyrillic_latin; | ||||
tr->langopts.param[LOPT_IT_DOUBLING] = 1; | tr->langopts.param[LOPT_IT_DOUBLING] = 1; | ||||
tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | ||||
tr->langopts.numbers = 0x1009 + 0xa000 + NUM_ROMAN + NUM_ROMAN_ORDINAL + NUM_ORDINAL_DOT + NUM_OMIT_1_HUNDRED; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_5 | NUM_ROMAN | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT | NUM_OMIT_1_HUNDRED; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
SetLengthMods(tr,3); // all equal | SetLengthMods(tr,3); // all equal | ||||
SetLetterBits(tr,LETTERGP_A,hy_vowels); | SetLetterBits(tr,LETTERGP_A,hy_vowels); | ||||
SetLetterBits(tr,LETTERGP_C,hy_consonants); | SetLetterBits(tr,LETTERGP_C,hy_consonants); | ||||
tr->langopts.max_initial_consonants = 6; | tr->langopts.max_initial_consonants = 6; | ||||
tr->langopts.numbers = 0x409; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||||
// tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | // tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | ||||
} | } | ||||
break; | break; | ||||
SetupTranslator(tr,stress_lengths_id,stress_amps_id); | SetupTranslator(tr,stress_lengths_id,stress_amps_id); | ||||
tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
tr->langopts.numbers = 0x1009 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_ROMAN; | |||||
tr->langopts.stress_flags = 0x6 | 0x10; | tr->langopts.stress_flags = 0x6 | 0x10; | ||||
tr->langopts.accents = 2; // "capital" after letter name | tr->langopts.accents = 2; // "capital" after letter name | ||||
} | } | ||||
SetLetterBits(tr,3,"jvr"); // Letter group H | SetLetterBits(tr,3,"jvr"); // Letter group H | ||||
tr->letter_groups[1] = is_lettergroup_B; | tr->letter_groups[1] = is_lettergroup_B; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x8e9; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SINGLE_AND | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_1900; | |||||
tr->langopts.numbers2 = 0x2; | tr->langopts.numbers2 = 0x2; | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | ||||
tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | ||||
tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2 | tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2 | ||||
tr->langopts.numbers = 0x2709 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_SINGLE_VOWEL | NUM_OMIT_1_HUNDRED |NUM_DECIMAL_COMMA | NUM_ROMAN | NUM_DFRACTION_1; | |||||
tr->langopts.accents = 2; // Say "Capital" after the letter. | tr->langopts.accents = 2; // Say "Capital" after the letter. | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
} | } | ||||
tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable | tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable | ||||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | ||||
tr->langopts.numbers = 0x0401; | |||||
tr->langopts.numbers = NUM_OMIT_1_HUNDRED; | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | ||||
tr->langopts.numbers = 0x100461; | |||||
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED; | |||||
tr->langopts.max_initial_consonants = 2; | tr->langopts.max_initial_consonants = 2; | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.unstressed_wd1 = 0; | tr->langopts.unstressed_wd1 = 0; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.param[LOPT_DIERESES] = 1; | tr->langopts.param[LOPT_DIERESES] = 1; | ||||
tr->langopts.numbers = 0x1 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_ROMAN; | |||||
tr->langopts.max_roman = 5000; | tr->langopts.max_roman = 5000; | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
tr->charset_a0 = charsets[4]; // ISO-8859-4 | tr->charset_a0 = charsets[4]; // ISO-8859-4 | ||||
tr->langopts.numbers = 0x409 + 0x8000 + 0x10000; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT; | |||||
tr->langopts.stress_flags = 0x16 + 0x40000; | tr->langopts.stress_flags = 0x16 + 0x40000; | ||||
} | } | ||||
break; | break; | ||||
tr->letter_groups[0] = vowels_cyrillic; | tr->letter_groups[0] = vowels_cyrillic; | ||||
tr->langopts.stress_rule = 4; // antipenultimate | tr->langopts.stress_rule = 4; // antipenultimate | ||||
tr->langopts.numbers = 0x0429 + 0x4000; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||||
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.param[LOPT_PREFIXES] = 1; | tr->langopts.param[LOPT_PREFIXES] = 1; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x11c19; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT; | |||||
memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | ||||
} | } | ||||
break; | break; | ||||
SetupTranslator(tr,stress_lengths_no,NULL); | SetupTranslator(tr,stress_lengths_no,NULL); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x11849; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_ALLOW_SPACE | NUM_1900 + NUM_ORDINAL_DOT; | |||||
} | } | ||||
break; | break; | ||||
SetupTranslator(tr,stress_lengths_om,stress_amps_om); | SetupTranslator(tr,stress_lengths_om,stress_amps_om); | ||||
tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
tr->langopts.stress_flags = 0x16 + 0x80000; | |||||
tr->langopts.stress_flags = 2 + NUM_SWAP_TENS | NUM_THOUS_SPACE | NUM_NOPAUSE; //?? | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | ||||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | ||||
tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | ||||
tr->langopts.numbers=0x1009 + 0x4000; | |||||
tr->langopts.numbers2=0x40; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_2; | |||||
tr->langopts.numbers2 = 0x40; | |||||
tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt) | tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt) | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
} | } | ||||
tr->langopts.stress_rule = 3; // stress on final syllable | tr->langopts.stress_rule = 3; // stress on final syllable | ||||
tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | ||||
tr->langopts.numbers = 0x069 + 0x4000 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_DFRACTION_2 | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_ROMAN; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
ResetLetterBits(tr,0x2); | ResetLetterBits(tr,0x2); | ||||
SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y | SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y | ||||
tr->langopts.stress_flags = 0x100 + 0x6; | tr->langopts.stress_flags = 0x100 + 0x6; | ||||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN; | |||||
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_flags = 0x16; | tr->langopts.stress_flags = 0x16; | ||||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | ||||
tr->langopts.numbers = 0x61 + 0x100000 + 0x4000; | |||||
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_2 | NUM_AND_HUNDRED; | |||||
tr->langopts.numbers2 = 0x200; // say "thousands" before its number | tr->langopts.numbers2 = 0x200; // say "thousands" before its number | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.spelling_stress = 1; | tr->langopts.spelling_stress = 1; | ||||
tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | ||||
tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN; | |||||
tr->langopts.numbers = NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ROMAN; | |||||
tr->langopts.numbers2 = 0x100; | tr->langopts.numbers2 = 0x100; | ||||
tr->langopts.thousands_sep = 0; //no thousands separator | tr->langopts.thousands_sep = 0; //no thousands separator | ||||
tr->langopts.decimal_sep = ','; | tr->langopts.decimal_sep = ','; | ||||
tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
tr->langopts.stress_flags = 0x16 + 0x100; | tr->langopts.stress_flags = 0x16 + 0x100; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x69 + 0x8000; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_4; | |||||
tr->langopts.accents = 2; // "capital" after letter name | tr->langopts.accents = 2; // "capital" after letter name | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0x1909; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_1900; | |||||
tr->langopts.accents = 1; | tr->langopts.accents = 1; | ||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_rule = 2; | tr->langopts.stress_rule = 2; | ||||
tr->langopts.stress_flags = 0x6 | 0x10; | tr->langopts.stress_flags = 0x6 | 0x10; | ||||
tr->langopts.numbers = 0x4e1; | |||||
tr->langopts.numbers2 = NUM2_100000a; | |||||
tr->langopts.numbers = NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_OMIT_1_HUNDRED; | |||||
tr->langopts.break_numbers = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||||
} | } | ||||
break; | break; | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | ||||
tr->langopts.numbers2 = NUM2_100000; | |||||
tr->langopts.break_numbers = 0x24a8; // 1000, 100,000 10,000,000 | |||||
if(name2 == L('t','a')) | if(name2 == L('t','a')) | ||||
{ | { | ||||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | ||||
tr->langopts.stress_flags = 0x20; //no automatic secondary stress | tr->langopts.stress_flags = 0x20; //no automatic secondary stress | ||||
tr->langopts.numbers = 0x1509 + 0x4000; | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2; | |||||
tr->langopts.max_initial_consonants = 2; | tr->langopts.max_initial_consonants = 2; | ||||
} | } | ||||
break; | break; | ||||
tr->letter_groups[0] = vowels_vi; | tr->letter_groups[0] = vowels_vi; | ||||
tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches() | tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches() | ||||
tr->langopts.unstressed_wd1 = 2; | tr->langopts.unstressed_wd1 = 2; | ||||
tr->langopts.numbers = 0x0049 + 0x8000; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_DFRACTION_4; | |||||
} | } | ||||
break; | break; | ||||
tr->translator_name = name2; | tr->translator_name = name2; | ||||
if(tr->langopts.numbers & 0x8) | |||||
if(tr->langopts.numbers & NUM_DECIMAL_COMMA) | |||||
{ | { | ||||
// use . and ; for thousands and decimal separators | // use . and ; for thousands and decimal separators | ||||
tr->langopts.thousands_sep = '.'; | tr->langopts.thousands_sep = '.'; | ||||
tr->langopts.decimal_sep = ','; | tr->langopts.decimal_sep = ','; | ||||
} | } | ||||
if(tr->langopts.numbers & 0x4) | |||||
if(tr->langopts.numbers & NUM_THOUS_SPACE) | |||||
{ | { | ||||
tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space | tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space | ||||
} | } | ||||
tr->langopts.stress_rule = 5; | tr->langopts.stress_rule = 5; | ||||
tr->langopts.stress_flags = 0x0020; // waas 0x1010 | tr->langopts.stress_flags = 0x0020; // waas 0x1010 | ||||
tr->langopts.numbers = 0x0409; | |||||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED; | |||||
tr->langopts.numbers2 = 0xc2; // variant numbers before thousands | tr->langopts.numbers2 = 0xc2; // variant numbers before thousands | ||||
tr->langopts.phoneme_change = 1; | tr->langopts.phoneme_change = 1; | ||||
tr->langopts.testing = 2; | tr->langopts.testing = 2; |
{//======================== | {//======================== | ||||
// Replacement for iswalph() which also checks for some in-word symbols | // Replacement for iswalph() which also checks for some in-word symbols | ||||
const unsigned short extra_indic_alphas[] = { | |||||
0xa70,0xa71, // Gurmukhi: tippi, addak | |||||
0 }; | |||||
if(iswalpha(c)) | if(iswalpha(c)) | ||||
return(1); | return(1); | ||||
// Indic scripts: Devanagari, Tamil, etc | // Indic scripts: Devanagari, Tamil, etc | ||||
if((c & 0x7f) < 0x64) | if((c & 0x7f) < 0x64) | ||||
return(1); | return(1); | ||||
if(lookupwchar(extra_indic_alphas, c) != 0) | |||||
return(1); | |||||
return(0); | return(0); | ||||
} | } | ||||
int c_temp; | int c_temp; | ||||
char *pn; | char *pn; | ||||
char *pw; | char *pw; | ||||
static unsigned int break_numbers1 = 0x49249248; | |||||
static unsigned int break_numbers2 = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi | |||||
static unsigned int break_numbers3 = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||||
unsigned int break_numbers; | |||||
char number_buf[80]; | char number_buf[80]; | ||||
// start speaking at a specified word position in the text? | // start speaking at a specified word position in the text? | ||||
// digits should have been converted to Latin alphabet ('0' to '9') | // digits should have been converted to Latin alphabet ('0' to '9') | ||||
word = pw = &sbuf[words[ix].start]; | word = pw = &sbuf[words[ix].start]; | ||||
if(iswdigit(word[0]) && (tr->langopts.numbers2 & NUM2_100000)) | |||||
if(iswdigit(word[0]) && (tr->langopts.break_numbers != BREAK_THOUSANDS)) | |||||
{ | { | ||||
// Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | // Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | ||||
pn = number_buf; | pn = number_buf; | ||||
pn = &number_buf[1]; | pn = &number_buf[1]; | ||||
nx = n_digits; | nx = n_digits; | ||||
if((tr->langopts.numbers2 & NUM2_100000a) == NUM2_100000a) | |||||
break_numbers = break_numbers3; | |||||
else | |||||
if(tr->langopts.numbers2 & NUM2_100000) | |||||
break_numbers = break_numbers2; | |||||
else | |||||
break_numbers = break_numbers1; | |||||
while(pn < &number_buf[sizeof(number_buf)-3]) | while(pn < &number_buf[sizeof(number_buf)-3]) | ||||
{ | { | ||||
if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep)) | if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep)) | ||||
break; | break; | ||||
*pn++ = c; | *pn++ = c; | ||||
if((--nx > 0) && (break_numbers & (1 << nx))) | |||||
if((--nx > 0) && (tr->langopts.break_numbers & (1 << nx))) | |||||
{ | { | ||||
if(tr->langopts.thousands_sep != ' ') | if(tr->langopts.thousands_sep != ' ') | ||||
{ | { | ||||
*pn++ = tr->langopts.thousands_sep; | *pn++ = tr->langopts.thousands_sep; | ||||
} | } | ||||
*pn++ = ' '; | *pn++ = ' '; | ||||
if(break_numbers & (1 << (nx-1))) | |||||
if(tr->langopts.break_numbers & (1 << (nx-1))) | |||||
{ | { | ||||
// the next group only has 1 digits (i.e. NUM2_10000), make it three | // the next group only has 1 digits (i.e. NUM2_10000), make it three | ||||
*pn++ = '0'; | *pn++ = '0'; | ||||
*pn++ = '0'; | *pn++ = '0'; | ||||
} | } | ||||
if(break_numbers & (1 << (nx-2))) | |||||
if(tr->langopts.break_numbers & (1 << (nx-2))) | |||||
{ | { | ||||
// the next group only has 2 digits (i.e. NUM2_10000), make it three | // the next group only has 2 digits (i.e. NUM2_10000), make it three | ||||
*pn++ = '0'; | *pn++ = '0'; |
unsigned char *length_mods; | unsigned char *length_mods; | ||||
unsigned char *length_mods0; | unsigned char *length_mods0; | ||||
#define NUM_THOUS_SPACE 0x4 | |||||
#define NUM_DECIMAL_COMMA 0x8 | |||||
#define NUM_SWAP_TENS 0x10 | |||||
#define NUM_AND_UNITS 0x20 | |||||
#define NUM_HUNDRED_AND 0x40 | |||||
#define NUM_SINGLE_AND 0x80 | |||||
#define NUM_SINGLE_STRESS 0x100 | |||||
#define NUM_SINGLE_VOWEL 0x200 | |||||
#define NUM_OMIT_1_HUNDRED 0x400 | #define NUM_OMIT_1_HUNDRED 0x400 | ||||
#define NUM_19_HUNDRED 0x800 | |||||
#define NUM_1900 0x800 | |||||
#define NUM_ALLOW_SPACE 0x1000 | |||||
#define NUM_DFRACTION_1 0x2000 | |||||
#define NUM_DFRACTION_2 0x4000 | |||||
#define NUM_DFRACTION_3 0x6000 | |||||
#define NUM_DFRACTION_4 0x8000 | |||||
#define NUM_DFRACTION_5 0xa000 | |||||
#define NUM_ORDINAL_DOT 0x10000 | #define NUM_ORDINAL_DOT 0x10000 | ||||
#define NUM_ROMAN 0x20000 | #define NUM_ROMAN 0x20000 | ||||
#define NUM_ROMAN_UC 0x40000 | #define NUM_ROMAN_UC 0x40000 | ||||
#define NUM_NOPAUSE 0x80000 | #define NUM_NOPAUSE 0x80000 | ||||
#define NUM_AND_HUNDRED 0x100000 | |||||
#define NUM_ROMAN_AFTER 0x200000 | #define NUM_ROMAN_AFTER 0x200000 | ||||
#define NUM_VIGESIMAL 0x400000 | #define NUM_VIGESIMAL 0x400000 | ||||
#define NUM_ROMAN_ORDINAL 0x800000 | #define NUM_ROMAN_ORDINAL 0x800000 | ||||
// bit23=Roman numbers are ordinal numbers | // bit23=Roman numbers are ordinal numbers | ||||
int numbers; | int numbers; | ||||
#define NUM2_100000 0x800 // numbers for 100,000 and 10,000,000 | |||||
#define NUM2_100000a 0xc00 // numbers for 100,000 and 1,000,000 | |||||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | // bits 1-4 use variant form of numbers before thousands,millions,etc. | ||||
// bit6=(LANG=pl) two forms of plural, M or MA | // bit6=(LANG=pl) two forms of plural, M or MA | ||||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | // bit7=(LANG-ru) use MB for 1 thousand, million, etc | ||||
// bit8=(LANG=cs,sk) two forms of plural, M or MA | // bit8=(LANG=cs,sk) two forms of plural, M or MA | ||||
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after | // bit9=(LANG=rw) say "thousand" and "million" before its number, not after | ||||
// bit10=(LANG=sw) special word for 100,000 and 1,000,000 | |||||
// bit11=(LANG=hi) special word for 100,000 and 10,000,000 | |||||
int numbers2; | int numbers2; | ||||
#define BREAK_THOUSANDS 0x49249248 | |||||
int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000) | |||||
int max_roman; | int max_roman; | ||||
int thousands_sep; | int thousands_sep; | ||||
int decimal_sep; | int decimal_sep; |
amp = embedded_value[EMBED_H]; | amp = embedded_value[EMBED_H]; | ||||
delay = 130; | delay = 130; | ||||
} | } | ||||
#ifdef deleted | |||||
if(embedded_value[EMBED_T] > 0) | if(embedded_value[EMBED_T] > 0) | ||||
{ | { | ||||
// announcing punctuation | |||||
// announcing punctuation, add a small echo | |||||
// This seems unpopular | |||||
amp = embedded_value[EMBED_T] * 8; | amp = embedded_value[EMBED_T] * 8; | ||||
delay = 60; | delay = 60; | ||||
} | } | ||||
#endif | |||||
if(delay == 0) | if(delay == 0) | ||||
amp = 0; | amp = 0; |