git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743master
brittanje $2 | brittanje $2 | ||||
ceylon s@lOn | ceylon s@lOn | ||||
china S'ina | china S'ina | ||||
(graaff reinet) x2rA:f||r@n'Et | |||||
italië it'A:li;@ | |||||
jerusalem j@r'ysalEm | jerusalem j@r'ysalEm | ||||
kanada kanad%a | kanada kanad%a | ||||
knysna neIsna | |||||
kongo kONgu | kongo kONgu | ||||
mesopotamië mEsuput'A:mi@ | mesopotamië mEsuput'A:mi@ | ||||
mosambiek musamb'ik | mosambiek musamb'ik | ||||
potchefstroom pOtSIfstr'o@m | |||||
(thaba nchu) tab'A:||ntSu | (thaba nchu) tab'A:||ntSu | ||||
europa Y@r'o@pa | europa Y@r'o@pa | ||||
outeniekwa @Ut@n'ikwa | |||||
portugal $1 | portugal $1 | ||||
potchefstroom pOtSIfstr'o@m | |||||
pretoria prit'o@ria | pretoria prit'o@ria | ||||
stellenbosch st%&l@mbOs | stellenbosch st%&l@mbOs | ||||
tunisië $2 | |||||
turkye $2 | |||||
upington apiNt@n | upington apiNt@n | ||||
worcester v'ust@r | worcester v'ust@r | ||||
zimbabwe zI:mb'ab_wE | |||||
zimbabwe zI:mb'ab_wE | |||||
botha bo@ta | botha bo@ta | ||||
breytenbach breIt@nbax2 | breytenbach breIt@nbax2 | ||||
carl kA:r@L | carl kA:r@L | ||||
cecilia s@si:lia | |||||
charles _^_EN | charles _^_EN | ||||
charlie tSA:li | charlie tSA:li | ||||
chopin S%OpA~ | chopin S%OpA~ | ||||
chris krIs | chris krIs | ||||
christo krIstu | christo krIstu | ||||
christian krIstian | christian krIstian | ||||
christelle kr@st&l | |||||
christine kr@st'i:n | christine kr@st'i:n | ||||
coetzee kuts'e@ | coetzee kuts'e@ | ||||
cronjé krOnj'e@ | cronjé krOnj'e@ | ||||
debora d@bo@ra | debora d@bo@ra | ||||
debussy d@bus'i: | |||||
der d@r | der d@r | ||||
deventer d'e@v@nt@r | deventer d'e@v@nt@r | ||||
du $u | du $u | ||||
martin m'A:rt@n | martin m'A:rt@n | ||||
michelle miS'&l | michelle miS'&l | ||||
michiel $2 | michiel $2 | ||||
miriam mIri@m | |||||
mostert m'Ost@rt | mostert m'Ost@rt | ||||
mozart mo@tsart | mozart mo@tsart | ||||
naomi na_'o@mi | naomi na_'o@mi | ||||
paul p@Ul | paul p@Ul | ||||
paulus p@UlWs | paulus p@UlWs | ||||
philip fIl@p | philip fIl@p | ||||
phillips fIl@ps | |||||
pierre p'e@r | pierre p'e@r | ||||
petrus pe@trWs | petrus pe@trWs | ||||
phoebe fi:bi | phoebe fi:bi | ||||
rachmaninoff rax2m'aninOf | rachmaninoff rax2m'aninOf | ||||
rebekka r@bEka | rebekka r@bEka | ||||
renée r@neI | |||||
retief r@tif | |||||
ronel run'&l | ronel run'&l | ||||
rousseau r@s@U | rousseau r@s@U | ||||
roux r'u | roux r'u | ||||
viljoen $2 | viljoen $2 | ||||
villiers vIli@rs | villiers vIli@rs | ||||
violet _^_EN $capital | violet _^_EN $capital | ||||
william _^_EN | |||||
marais mar'E: | marais mar'E: | ||||
mandela mand'E:la | mandela mand'E:la | ||||
celliers sIlj'e@ | celliers sIlj'e@ | ||||
tensy $2 $pause | tensy $2 $pause | ||||
aangesien $pause | aangesien $pause | ||||
wie $pause | wie $pause | ||||
wanneer $pause | |||||
wanneer $1 $pause | |||||
waar $pause | waar $pause | ||||
waarom $pause | waarom $pause | ||||
waarheen $pause | waarheen $pause | ||||
// main word list | // main word list | ||||
aanbid $2 | |||||
aanstaande $2 | aanstaande $2 | ||||
adagio ad'A:dZi;%@U | adagio ad'A:dZi;%@U | ||||
algaande $2 | algaande $2 | ||||
hoofsaaklik $2 | hoofsaaklik $2 | ||||
idee $2 | idee $2 | ||||
ietwat itvat | |||||
ignoreer ix2no@r'e@r | ignoreer ix2no@r'e@r | ||||
inagneming @nax2ne@m@N | inagneming @nax2ne@m@N | ||||
inkluis $2 | inkluis $2 | ||||
intensiteit $4 | intensiteit $4 | ||||
jawoord jA:vo@rt | jawoord jA:vo@rt | ||||
jupiter jupit@r | |||||
kafee kaf'e@ | kafee kaf'e@ | ||||
kapitaal kapit'A:l | |||||
kaviaar kavi'A:r | |||||
komaan kOmA:n | komaan kOmA:n | ||||
komberse kOmb'&rs@ | komberse kOmb'&rs@ | ||||
kombers kOmb'&rs | kombers kOmb'&rs | ||||
kopiereg kup'ir&x2 | kopiereg kup'ir&x2 | ||||
kritiek krIt'ik | kritiek krIt'ik | ||||
kultuur $2 | |||||
kulture $2 | |||||
kunsmatige kWnsm'A:t@x2@ | kunsmatige kWnsm'A:t@x2@ | ||||
kwansuis $2 | kwansuis $2 | ||||
macaroni makar'o@ni | macaroni makar'o@ni | ||||
madame mad'A:m | madame mad'A:m | ||||
makabere m%ak'A:b@r@ | makabere m%ak'A:b@r@ | ||||
maskering mask'e@r@N $only | |||||
meegee me@x2e@ | meegee me@x2e@ | ||||
memoriam mEm'o@riam | memoriam mEm'o@riam | ||||
merlot m&rl'o@ | merlot m&rl'o@ | ||||
uiteraard Yyt@r_'A:rt | uiteraard Yyt@r_'A:rt | ||||
vaarwel fA:rv'&l | vaarwel fA:rv'&l | ||||
vakant f%ak'ant | |||||
vakante f%ak'ant@ | |||||
vanaf fanaf | vanaf fanaf | ||||
vandat $1 | vandat $1 | ||||
vanne fan@ | vanne fan@ | ||||
ver f'&:r | ver f'&:r | ||||
verg f&rx2 | verg f&rx2 | ||||
vergete f@rx2e@t@ | vergete f@rx2e@t@ | ||||
vergewe f@rx2e@v@ | |||||
vermy $2 | vermy $2 | ||||
verre f&:r@ | verre f&:r@ | ||||
verregaande f&r@x2'A:nd@ | verregaande f&r@x2'A:nd@ |
Dictionary hi_dict | Dictionary hi_dict | ||||
@ @- @2 @3 a a: aI aU | |||||
e E e: E: E~ i I i: | |||||
l- o O o: O: o~ O~ r- | |||||
U u: u~ V | |||||
@ @2 @3 a a: aI aU E | |||||
e: E: E~ I i: O o: O: | |||||
O~ r- U u: V | |||||
- : b c ch d d. f | |||||
g h H j J k kh l | |||||
m n N n. n^ p ph Q | |||||
q r s S s. t T t. | |||||
t.h th v w x z | |||||
: b c ch d d. f g | |||||
h H j J k kh l m | |||||
n N n. n^ p ph Q q | |||||
r s S s. t T t. t.h | |||||
th v w x z | |||||
Dictionary hr_dict | Dictionary hr_dict | ||||
Dictionary hu_dict | Dictionary hu_dict | ||||
A a: E e: i i: o o: | A a: E e: i i: o o: | ||||
u u: Y y y: Y: | |||||
u u: Y Y: | |||||
: b c C d dZ f g | |||||
h j J k l l^ m n | |||||
N n^ p R R2 s S s2 | |||||
t tS ts v z Z | |||||
: b c d dZ f g h | |||||
j J k l m n n^ p | |||||
R R2 s S s2 t tS ts | |||||
v z Z | |||||
Dictionary it_dict | Dictionary it_dict | ||||
& &/ &U~ &~ @ @- a A | & &/ &U~ &~ @ @- a A | ||||
aI aU e E eI EI eU EU | aI aU e E eI EI eU EU | ||||
i i/ iU i~ o O oI OI | |||||
e~ i i/ iU o O oI OI | |||||
o~ u U uI u~ y | o~ u U uI u~ y | ||||
* : ; b C d dZ f | * : ; b C d dZ f | ||||
Dictionary ro_dict | Dictionary ro_dict | ||||
@ @- @I @U a aI aU e | |||||
ea eI eo eU i i/ I^ iI | |||||
iU o O Oa oI oU u uI | |||||
y Y yI yU | |||||
@ @- @I a aI aU e ea | |||||
eI eU i I^ iI o Oa oI | |||||
oU u uI y | |||||
* *; b b; c C d d; | |||||
dZ f f; g h j k l | |||||
l; m m; n N n; p p; | |||||
r s S S; t T t; tS | |||||
ts ts; v v; w w2 x z | |||||
Z z; Z; | |||||
* *; b c d dZ f g | |||||
h j k l m m; n p | |||||
r s S S; t tS ts ts; | |||||
v w w2 x z Z | |||||
Dictionary is_dict | Dictionary is_dict | ||||
r R s t w z | r R s t w z | ||||
Dictionary grc_dict | |||||
Dictionary mk_dict | Dictionary mk_dict | ||||
& @ @- @2 a A a: E | |||||
e e: E~ i I i: l- o | |||||
o: oU r- u u: y | |||||
& @ @- @2 a E e i | |||||
I o r- u | |||||
* b d dZ dZ; f g h | |||||
j k k^ l l^ m n N | |||||
n^ p r R s S t tS | |||||
ts tS; v x z Z | |||||
* b d dZ dZ; f g j | |||||
k k^ l l^ m n n^ p | |||||
r R s S t tS ts v | |||||
x z Z |
// ?3 use diphthong for "au" | // ?3 use diphthong for "au" | ||||
.replace | |||||
० 0 | |||||
१ 1 | |||||
२ 2 | |||||
३ 3 | |||||
४ 4 | |||||
५ 5 | |||||
६ 6 | |||||
७ 7 | |||||
८ 8 | |||||
९ 9 | |||||
// Vowels | // Vowels | ||||
.group अ | .group अ |
// This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
.replace | |||||
// allow o,u-circumflex for o,u-double-acute | |||||
ô ő | |||||
û ű | |||||
.group a | .group a | ||||
a A | a A | ||||
_) a (_ %A | _) a (_ %A |
// translation rules for Macedonian | // translation rules for Macedonian | ||||
// This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
.replace | |||||
a а | |||||
b б | |||||
c ц | |||||
ć ћ | |||||
č ч | |||||
dž џ | |||||
dz ѕ | |||||
d д | |||||
đ ђ | |||||
e е | |||||
f ф | |||||
g г | |||||
h х | |||||
i и | |||||
j ј | |||||
k к | |||||
lj љ | |||||
l л | |||||
m м | |||||
nj њ | |||||
n н | |||||
o о | |||||
p п | |||||
r р | |||||
s с | |||||
š ш | |||||
t т | |||||
u у | |||||
v в | |||||
z з | |||||
ž ж | |||||
đ ѓ | |||||
ć ќ | |||||
.group а | .group а | ||||
а a | а a | ||||
// This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
// replace s-comma, t-comma by s-cedilla, t-cedilla | |||||
.replace | |||||
ș ş | |||||
ț ţ | |||||
.group a | .group a |
39 phoneme tables | |||||
40 phoneme tables | |||||
new total | new total | ||||
base 96 96 | base 96 96 | ||||
base2 23 114 | |||||
base2 24 115 | |||||
en 53 144 | en 53 144 | ||||
en_n 30 144 | en_n 30 144 | ||||
en_us 37 144 | en_us 37 144 | ||||
mk 3 130 | mk 3 130 | ||||
sr 2 129 | sr 2 129 | ||||
ru 38 126 | ru 38 126 | ||||
it 17 117 | |||||
it 17 118 | |||||
la 21 114 | la 21 114 | ||||
es 6 114 | |||||
pt 28 131 | |||||
es 6 115 | |||||
pt 27 131 | |||||
pt_pt 20 131 | pt_pt 20 131 | ||||
ro 36 138 | |||||
el 8 114 | |||||
ro 36 139 | |||||
el 8 115 | |||||
grc 7 120 | |||||
sv 25 118 | sv 25 118 | ||||
no 28 122 | no 28 122 | ||||
is 32 121 | is 32 121 | ||||
30 r/trr base af de fi nl ru ro sv sw | 30 r/trr base af de fi nl ru ro sv sw | ||||
11 r/xr base | 11 r/xr base | ||||
2 ufric/ch base de | 2 ufric/ch base de | ||||
3 ufric/f base de ro | |||||
4 ufric/f base de ro grc | |||||
2 ufric/f_ base ro | 2 ufric/f_ base ro | ||||
5 ufric/h_ base fi hi la | 5 ufric/h_ base fi hi la | ||||
6 ufric/h@ base fi hi la | 6 ufric/h@ base fi hi la | ||||
8 ustop/ts_pzd base hi ru | 8 ustop/ts_pzd base hi ru | ||||
2 ustop/ts_pzd_ hi hu | 2 ustop/ts_pzd_ hi hu | ||||
2 ustop/ts_pzd2 hi hu | 2 ustop/ts_pzd2 hi hu | ||||
2 vdiph/0i pt vi | |||||
3 vdiph/0i pt grc vi | |||||
3 vdiph/0i_2 en_sc no en_wi | 3 vdiph/0i_2 en_sc no en_wi | ||||
3 vdiph2/e@ en_sc en_wi | 3 vdiph2/e@ en_sc en_wi | ||||
1 vdiph2/ea ro | 1 vdiph2/ea ro | ||||
1 vdiph/au_3 en_rp | 1 vdiph/au_3 en_rp | ||||
6 vdiph/au_4 base2 cy eo sk it is | 6 vdiph/au_4 base2 cy eo sk it is | ||||
1 vdiph/ee-e hi | 1 vdiph/ee-e hi | ||||
5 vdiph/eei en de nl pt vi | |||||
5 vdiph/eei base2 en de nl vi | |||||
3 vdiph/eei_2 en_us eo fi | 3 vdiph/eei_2 en_us eo fi | ||||
2 vdiph/eei_3 en_rp sk | 2 vdiph/eei_3 en_rp sk | ||||
3 vdiph/eeu pt vi zhy | |||||
4 vdiph/eeu pt grc vi zhy | |||||
2 vdiph/eeu_2 la pt_pt | 2 vdiph/eeu_2 la pt_pt | ||||
2 vdiph/eeu_3 en_n en_wm | 2 vdiph/eeu_3 en_n en_wm | ||||
1 vdiph/eey fi | 1 vdiph/eey fi | ||||
9 vdiph/ooi en en_n en_us cy eo fi no zhy | 9 vdiph/ooi en en_n en_us cy eo fi no zhy | ||||
1 vdiph/ooi_2 af | 1 vdiph/ooi_2 af | ||||
2 vdiph/ooi_3 en_rp en_wm | 2 vdiph/ooi_3 en_rp en_wm | ||||
1 vdiph/oou cs | |||||
2 vdiph/oou cs grc | |||||
2 vdiph/ou fi zhy | 2 vdiph/ou fi zhy | ||||
2 vdiph/ou_2 sk ro | 2 vdiph/ou_2 sk ro | ||||
2 vdiph/ou_3 is | 2 vdiph/ou_3 is | ||||
1 vdiph/Vu_2 en_wm | 1 vdiph/Vu_2 en_wm | ||||
1 vdiph/Vu_3 nl | 1 vdiph/Vu_3 nl | ||||
2 vdiph/&y fi nl | 2 vdiph/&y fi nl | ||||
2 vdiph/yi fi no | |||||
3 vdiph/yi fi grc no | |||||
1 vdiph/y#i fi | 1 vdiph/y#i fi | ||||
1 vdiph/y#i_2 is | 1 vdiph/y#i_2 is | ||||
1 vdiph/yi_fr fr | 1 vdiph/yi_fr fr | ||||
4 vowel/& en_rp fi hi sv | 4 vowel/& en_rp fi hi sv | ||||
4 vowel/0 base2 en hi pt | 4 vowel/0 base2 en hi pt | ||||
3 vowel/0_2 en_n pt_pt sw | 3 vowel/0_2 en_n pt_pt sw | ||||
5 vowel/0_3 en_us en_sc en_rp en_wm hu | |||||
4 vowel/0_3 en_us en_sc en_rp hu | |||||
2 vowel/@_2 fr | 2 vowel/@_2 fr | ||||
2 vowel/&_2 en_us | 2 vowel/&_2 en_us | ||||
6 vowel/@_3 en_sc de hi | 6 vowel/@_3 en_sc de hi | ||||
2 vowel/ii_6 en_wm | 2 vowel/ii_6 en_wm | ||||
1 vowel/ii_en en | 1 vowel/ii_en en | ||||
10 vowel/@_low en_rp hi ro no | 10 vowel/@_low en_rp hi ro no | ||||
12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi | |||||
10 vowel/o base2 en de hi it la pt_pt sv en_wi | |||||
4 vowel/o_2 cy hi hu no | 4 vowel/o_2 cy hi hu no | ||||
2 vowel/o-_2 en_n en_wm | 2 vowel/o-_2 en_n en_wm | ||||
3 vowel/o_3 en_sc fr | 3 vowel/o_3 en_sc fr | ||||
1 vowel/oe_4 sv | 1 vowel/oe_4 sv | ||||
2 vowel/o_mid fr hu | 2 vowel/o_mid fr hu | ||||
12 vowel/oo en_sc de eo la es el sv no zhy en_wi | 12 vowel/oo en_sc de eo la es el sv no zhy en_wi | ||||
10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||||
12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||||
3 vowel/oo_2 en_sc cy cs | 3 vowel/oo_2 en_sc cy cs | ||||
1 vowel/oo_3 af | 1 vowel/oo_3 af | ||||
5 vowel/oo_4 hi pl it en_wi | |||||
6 vowel/oo_4 en_wm hi pl it en_wi | |||||
1 vowel/oo_5 is | 1 vowel/oo_5 is | ||||
6 vowel/oo_en en en_n en_rp | 6 vowel/oo_en en en_n en_rp | ||||
2 vowelr/aa_r en_sc | 2 vowelr/aa_r en_sc | ||||
1 vowel/yy fr_ca | 1 vowel/yy fr_ca | ||||
1 vowel/yy_2 no | 1 vowel/yy_2 no | ||||
1 vowel/yy_3 sv | 1 vowel/yy_3 sv | ||||
5 vowel/yy_4 de hu la is | |||||
6 vowel/yy_4 de hu la grc is | |||||
1 vwl_af/@ af | 1 vwl_af/@ af | ||||
1 vwl_af/I af | 1 vwl_af/I af | ||||
2 vwl_af/r@ af | 2 vwl_af/r@ af | ||||
6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm | |||||
5 vwl_en/aI@ en en_n en_us en_sc en_rp | |||||
2 vwl_en/aI@_2 en_sc | 2 vwl_en/aI@_2 en_sc | ||||
5 vwl_en/aU@ en en_n en_us en_sc en_wm | 5 vwl_en/aU@ en en_n en_us en_sc en_wm | ||||
12 vwl_en/@L en en_us en_sc en_rp en_wm af | 12 vwl_en/@L en en_us en_sc en_rp en_wm af | ||||
1 vwl_en_n/aa_5 en_n | 1 vwl_en_n/aa_5 en_n | ||||
2 vwl_en_n/O@ en_n | 2 vwl_en_n/O@ en_n | ||||
1 vwl_en_n/u_ en_n | 1 vwl_en_n/u_ en_n | ||||
1 vwl_en/ooi@ en_wm | |||||
3 vwl_en_rp/aa en_rp | 3 vwl_en_rp/aa en_rp | ||||
1 vwl_en_rp/aU@ en_rp | 1 vwl_en_rp/aU@ en_rp | ||||
1 vwl_en_rp/e@ en_rp | 1 vwl_en_rp/e@ en_rp |
0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
0 aI@ NULL 60 aI @ | 0 aI@ NULL 60 aI @ | ||||
0 aU@ NULL 75 aU @ | 0 aU@ NULL 75 aU @ | ||||
0 x NULL 0 k |
0 w/ NULL 0 w | 0 w/ NULL 0 w | ||||
0 ; NULL 0 NULL | 0 ; NULL 0 NULL | ||||
0 g- NULL 0 NULL | 0 g- NULL 0 NULL | ||||
0 x NULL 0 k_h | |||||
0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
0 aI@ NULL 60 AI r | 0 aI@ NULL 60 AI r | ||||
0 aU@ NULL 75 aU r | 0 aU@ NULL 75 aU r |
0 w/ NULL 0 w | 0 w/ NULL 0 w | ||||
0 ; NULL 0 NULL | 0 ; NULL 0 NULL | ||||
0 g- NULL 0 NULL | 0 g- NULL 0 NULL | ||||
0 x NULL 0 k | |||||
0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
0 aI@ NULL 60 AI r | 0 aI@ NULL 60 AI r | ||||
0 aU@ NULL 75 aU r | 0 aU@ NULL 75 aU r |
endphoneme | endphoneme | ||||
phoneme EI | |||||
vowel starttype (e) endtype (i) | |||||
length 230 | |||||
formants vdiph/eei | |||||
endphoneme | |||||
phoneme oI | phoneme oI | ||||
vowel starttype (o) endtype (i) | vowel starttype (o) endtype (i) | ||||
length 240 | length 240 |
phoneme I | phoneme I | ||||
vowel starttype (e) endtype (e) | |||||
vowel starttype (@) endtype (@) | |||||
length 110 | length 110 | ||||
formants vowel/e# | formants vowel/e# | ||||
before l/2 vowel/@_3-30+l/L2_@L | before l/2 vowel/@_3-30+l/L2_@L | ||||
endphoneme | endphoneme | ||||
phoneme I2 | phoneme I2 | ||||
vowel starttype (e) endtype (e) | |||||
vowel starttype (@) endtype (@) | |||||
unstressed | unstressed | ||||
length 110 | length 110 | ||||
formants vowel/e# | formants vowel/e# |
phoneme 0 | phoneme 0 | ||||
vowel starttype (o) endtype (o) | vowel starttype (o) endtype (o) | ||||
length 140 | length 140 | ||||
formants vowel/0_3 | |||||
formants vowel/oo_4 | |||||
reduceto @ 0 | reduceto @ 0 | ||||
endphoneme | endphoneme | ||||
phoneme O@ | phoneme O@ | ||||
vowel starttype (o) endtype (@) | vowel starttype (o) endtype (@) | ||||
length 240 | length 240 | ||||
formants vowel/o | |||||
formants vowel/oo_1 | |||||
linkout r- | linkout r- | ||||
endphoneme | endphoneme | ||||
phoneme O | phoneme O | ||||
vowel starttype (o) endtype (o) | vowel starttype (o) endtype (o) | ||||
length 150 | length 150 | ||||
formants vowel/o | |||||
formants vowel/oo_1 | |||||
reduceto @ 0 | reduceto @ 0 | ||||
endphoneme | endphoneme | ||||
phoneme eI | phoneme eI | ||||
vowel starttype (@) endtype (i) | vowel starttype (@) endtype (i) | ||||
length 210 | |||||
length 230 | |||||
formants vdiph/@i_3 | formants vdiph/@i_3 | ||||
endphoneme | endphoneme | ||||
phoneme aI@ | phoneme aI@ | ||||
vowel starttype (a) endtype (@) | vowel starttype (a) endtype (@) | ||||
length 270 | length 270 | ||||
formants vwl_en/aI@ | |||||
formants vwl_en/ooi@ | |||||
linkout r- | linkout r- | ||||
endphoneme | endphoneme | ||||
//==================================================== | |||||
// Ancient Greek - based on base2 | |||||
//==================================================== | |||||
phoneme y | |||||
vowel starttype (i) endtype (i) | |||||
length 160 | |||||
formants vowel/yy_4 | |||||
endphoneme | |||||
phoneme EU | |||||
vowel starttype (e) endtype (u) | |||||
length 230 | |||||
formants vdiph/eeu | |||||
endphoneme | |||||
phoneme OI | |||||
vowel starttype (o) endtype (i) | |||||
length 230 | |||||
formants vdiph/0i | |||||
endphoneme | |||||
phoneme OU | |||||
vowel starttype (o) endtype (u) | |||||
length 230 | |||||
formants vdiph/oou | |||||
endphoneme | |||||
phoneme yI | |||||
vowel starttype (i) endtype (i) | |||||
length 230 | |||||
formants vdiph/yi | |||||
endphoneme | |||||
phoneme f // consider this an affrictive | |||||
vls blb afr | |||||
vowelout f1=0 f2=1000 -500 -350 f3=-200 80 | |||||
lengthmod 2 | |||||
wave ufric/f // could replace this with a [pf] wav file | |||||
endphoneme | |||||
endphoneme | endphoneme | ||||
phoneme EI | |||||
vowel starttype (e) endtype (i) | |||||
length 230 | |||||
formants vdiph/eei | |||||
endphoneme | |||||
phoneme OI | phoneme OI | ||||
vowel starttype (o) endtype (i) | vowel starttype (o) endtype (i) | ||||
length 230 | length 230 |
phonemetable el base2 | phonemetable el base2 | ||||
include ph_greek | include ph_greek | ||||
phonemetable grc base2 | |||||
include ph_greek_ancient | |||||
phonemetable sv base | phonemetable sv base | ||||
include ph_swedish | include ph_swedish | ||||
char encoded_ph[200]; | char encoded_ph[200]; | ||||
unsigned char bad_phoneme[4]; | unsigned char bad_phoneme[4]; | ||||
p = linebuf; | p = linebuf; | ||||
comment = NULL; | comment = NULL; | ||||
phonetic = word = ""; | phonetic = word = ""; | ||||
if((word[0] & 0x80)==0) // 7 bit ascii only | if((word[0] & 0x80)==0) // 7 bit ascii only | ||||
{ | { | ||||
// 1st letter - need to consider utf8 here | |||||
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||||
// ??? need to consider utf8 here | |||||
word[0] = tolower(word[0]); | word[0] = tolower(word[0]); | ||||
} | } | ||||
for(ix=0; finish==0; ix++) | for(ix=0; finish==0; ix++) | ||||
{ | { | ||||
c = input[ix]; | c = input[ix]; | ||||
if((c=='/') && (input[ix+1]=='/')) | |||||
c = input[ix] = '\n'; // treat command as end of line | |||||
switch(c = input[ix]) | switch(c = input[ix]) | ||||
{ | { | ||||
int different; | int different; | ||||
char *prev_rgroup_name; | char *prev_rgroup_name; | ||||
unsigned int char_code; | unsigned int char_code; | ||||
int compile_mode=0; | |||||
char *buf; | char *buf; | ||||
char buf1[120]; | char buf1[120]; | ||||
char *rules[N_RULES]; | char *rules[N_RULES]; | ||||
{ | { | ||||
linenum++; | linenum++; | ||||
buf = fgets(buf1,sizeof(buf1),f_in); | buf = fgets(buf1,sizeof(buf1),f_in); | ||||
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | |||||
if(buf != NULL) | |||||
{ | |||||
if((p = (unsigned char *)strstr(buf,"//")) != NULL) | |||||
*p = 0; | |||||
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||||
} | |||||
if((buf != NULL) && (memcmp(buf,".L",2)==0)) | if((buf != NULL) && (memcmp(buf,".L",2)==0)) | ||||
{ | { | ||||
continue; | continue; | ||||
} | } | ||||
if((buf == NULL) || (memcmp(buf,".group",6)==0)) | |||||
if((buf == NULL) || (buf[0] == '.')) | |||||
{ | { | ||||
// next .group or end of file, write out the previous group | // next .group or end of file, write out the previous group | ||||
} | } | ||||
n_rules = 0; | n_rules = 0; | ||||
if(buf == NULL) break; // end of file | |||||
if(compile_mode == 2) | |||||
{ | |||||
// end of the character replacements section | |||||
fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list | |||||
} | |||||
p = (unsigned char *)&buf[6]; | |||||
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||||
ix = 0; | |||||
while((*p > ' ') && (ix<12)) | |||||
group_name[ix++] = *p++; | |||||
group_name[ix]=0; | |||||
if(buf == NULL) break; // end of file | |||||
if(sscanf(group_name,"0x%x",&char_code)==1) | |||||
if(memcmp(buf,".replace",8)==0) | |||||
{ | { | ||||
// group character is given as a character code (max 16 bits) | |||||
p = (unsigned char *)group_name; | |||||
compile_mode = 2; | |||||
fputc(RULE_GROUP_START,f_out); | |||||
fputc(RULE_REPLACEMENTS,f_out); | |||||
if(char_code > 0x100) | |||||
{ | |||||
*p++ = (char_code >> 8); | |||||
} | |||||
*p++ = char_code; | |||||
*p = 0; | |||||
// advance to next word boundary | |||||
while((ftell(f_out) & 3) != 0) | |||||
fputc(0,f_out); | |||||
} | } | ||||
if(strlen(group_name) > 2) | |||||
if(memcmp(buf,".group",6)==0) | |||||
{ | { | ||||
if(utf8_in(&c,group_name,0) < 2) | |||||
compile_mode = 1; | |||||
p = (unsigned char *)&buf[6]; | |||||
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||||
ix = 0; | |||||
while((*p > ' ') && (ix<12)) | |||||
group_name[ix++] = *p++; | |||||
group_name[ix]=0; | |||||
if(sscanf(group_name,"0x%x",&char_code)==1) | |||||
{ | { | ||||
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||||
error_count++; | |||||
// group character is given as a character code (max 16 bits) | |||||
p = (unsigned char *)group_name; | |||||
if(char_code > 0x100) | |||||
{ | |||||
*p++ = (char_code >> 8); | |||||
} | |||||
*p++ = char_code; | |||||
*p = 0; | |||||
} | |||||
if(strlen(group_name) > 2) | |||||
{ | |||||
if(utf8_in(&c,group_name,0) < 2) | |||||
{ | |||||
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||||
error_count++; | |||||
} | |||||
group_name[2] = 0; | |||||
} | } | ||||
group_name[2] = 0; | |||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
prule = compile_rule(buf); | |||||
if((prule != NULL) && (n_rules < N_RULES)) | |||||
switch(compile_mode) | |||||
{ | { | ||||
rules[n_rules++] = prule; | |||||
case 1: // .group | |||||
prule = compile_rule(buf); | |||||
if((prule != NULL) && (n_rules < N_RULES)) | |||||
{ | |||||
rules[n_rules++] = prule; | |||||
} | |||||
break; | |||||
case 2: // .replace | |||||
{ | |||||
int replace1; | |||||
int replace2; | |||||
char *p; | |||||
p = buf; | |||||
replace1 = 0; | |||||
replace2 = 0; | |||||
while(isspace2(*p)) p++; | |||||
ix = 0; | |||||
while((unsigned char)(*p) > 0x20) // not space or zero-byte | |||||
{ | |||||
p += utf8_in(&c,p,0); | |||||
replace1 += (c << ix); | |||||
ix += 16; | |||||
} | |||||
while(isspace2(*p)) p++; | |||||
ix = 0; | |||||
while((unsigned char)(*p) > 0x20) | |||||
{ | |||||
p += utf8_in(&c,p,0); | |||||
replace2 += (c << ix); | |||||
ix += 16; | |||||
} | |||||
if(replace1 != 0) | |||||
{ | |||||
fwrite(&replace1,1,4,f_out); | |||||
fwrite(&replace2,1,4,f_out); | |||||
} | |||||
} | |||||
break; | |||||
} | } | ||||
} | } | ||||
fclose(f_temp); | fclose(f_temp); |
int ix; | int ix; | ||||
char *p; | char *p; | ||||
char *p_name; | char *p_name; | ||||
unsigned int *pw; | |||||
unsigned char c, c2; | unsigned char c, c2; | ||||
int len; | int len; | ||||
int rule_count; | |||||
n_groups2 = 0; | n_groups2 = 0; | ||||
for(ix=0; ix<256; ix++) | for(ix=0; ix<256; ix++) | ||||
} | } | ||||
p++; | p++; | ||||
if(p[0] == RULE_REPLACEMENTS) | |||||
{ | |||||
pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary | |||||
langopts.replace_chars = pw; | |||||
while(pw[0] != 0) | |||||
{ | |||||
pw += 2; // find the end of the replacement list, each entry is 2 words. | |||||
} | |||||
p = (char *)(pw+1); | |||||
continue; | |||||
} | |||||
if(p[0] == RULE_LETTERGP2) | if(p[0] == RULE_LETTERGP2) | ||||
{ | { | ||||
ix = p[1] - 'A'; | ix = p[1] - 'A'; | ||||
} | } | ||||
// skip over all the rules in this group | // skip over all the rules in this group | ||||
rule_count = 0; | |||||
while(*p != RULE_GROUP_END) | while(*p != RULE_GROUP_END) | ||||
{ | { | ||||
p += (strlen(p) + 1); | p += (strlen(p) + 1); | ||||
rule_count++; | |||||
} | } | ||||
p++; | p++; | ||||
} | } |
} // end of ConvertToItf8 | } // end of ConvertToItf8 | ||||
//****************************************************************************************************** | |||||
//#define calcspeedtab | //#define calcspeedtab |
if(langopts.numbers & 0x200) | if(langopts.numbers & 0x200) | ||||
{ | { | ||||
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | ||||
ix = strlen(ph_tens)-1; | |||||
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||||
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||||
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||||
ph_tens[ix] = 0; | |||||
if((ix = strlen(ph_tens)-1) >= 0) | |||||
{ | |||||
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||||
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||||
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||||
ph_tens[ix] = 0; | |||||
} | |||||
} | } | ||||
sprintf(ph_out,"%s%s",ph_tens,ph_digits); | sprintf(ph_out,"%s%s",ph_tens,ph_digits); | ||||
} | } | ||||
decimal_point = 0; | decimal_point = 0; | ||||
} | } | ||||
} | } | ||||
if(ph_out[0] != 0) | |||||
if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) | |||||
{ | { | ||||
int next_char; | int next_char; | ||||
utf8_in(&next_char,&word[n_digits+1],0); | utf8_in(&next_char,&word[n_digits+1],0); |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.29.10 16.Oct.07"; | |||||
const char *version_string = "1.29.11 23.Oct.07"; | |||||
const int version_phdata = 0x012901; | const int version_phdata = 0x012901; | ||||
int option_device_number = -1; | int option_device_number = -1; |
#define L_qa 0x716100 | #define L_qa 0x716100 | ||||
#define L_grc 0x677263 // grc Ancient Greek | |||||
#define OFFSET_GREEK 0x380 | #define OFFSET_GREEK 0x380 | ||||
#define OFFSET_DEVANAGARI 0x900 | #define OFFSET_DEVANAGARI 0x900 | ||||
static const unsigned int replace_cyrillic[] = | |||||
{0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452, | |||||
0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459, | |||||
0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441, | |||||
0x448,0x442,0x443,0x432,0x437,0x436, | |||||
0x453,0x45c,0}; // ѓ ѕ ќ | |||||
static const unsigned int replace_cyrillic_latin[] = | |||||
{'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111, | |||||
'e','f','g','h','i','j','k','l'+('j'<<16), | |||||
'l','m','n'+('j'<<16),'n','o','p','r','s', | |||||
0x161,'t','u','v','z',0x17e, | |||||
0x111,0x107,0}; | |||||
static const unsigned int replace_cyrillic_latin[] = | |||||
{0x430,'a', | |||||
0x431,'b', | |||||
0x446,'c', | |||||
0x45b,0x107, | |||||
0x447,0x10d, | |||||
0x45f,'d'+(0x17e<<16), | |||||
0x455,'d'+('z'<<16), | |||||
0x434,'d', | |||||
0x452,0x111, | |||||
0x435,'e', | |||||
0x444,'f', | |||||
0x433,'g', | |||||
0x445,'h', | |||||
0x438,'i', | |||||
0x458,'j', | |||||
0x43a,'k', | |||||
0x459,'l'+('j'<<16), | |||||
0x43b,'l', | |||||
0x43c,'m', | |||||
0x45a,'n'+('j'<<16), | |||||
0x43d,'n', | |||||
0x43e,'o', | |||||
0x43f,'p', | |||||
0x440,'r', | |||||
0x441,'s', | |||||
0x448,0x161, | |||||
0x442,'t', | |||||
0x443,'u', | |||||
0x432,'v', | |||||
0x437,'z', | |||||
0x436,0x17e, | |||||
0x453,0x111, | |||||
0x45c,0x107, | |||||
0}; // ѓ ѕ ќ | |||||
void SetupTranslator(Translator *tr, int *lengths, int *amps) | void SetupTranslator(Translator *tr, int *lengths, int *amps) | ||||
break; | break; | ||||
case L('e','l'): // Greek | case L('e','l'): // Greek | ||||
case L_grc: // Ancient Greek | |||||
{ | { | ||||
static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | ||||
static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | ||||
tr->langopts.numbers = 0xb09; | tr->langopts.numbers = 0xb09; | ||||
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | ||||
if(name2 == L_grc) | |||||
{ | |||||
// ancient greek | |||||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; | |||||
} | |||||
} | } | ||||
break; | break; | ||||
case L('h','i'): | case L('h','i'): | ||||
{ | { | ||||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | ||||
static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9 | |||||
static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0}; | |||||
static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | ||||
static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | ||||
tr->langopts.numbers = 0x811; | tr->langopts.numbers = 0x811; | ||||
tr->langopts.numbers2 = 0x100; | tr->langopts.numbers2 = 0x100; | ||||
tr->letter_bits_offset = OFFSET_DEVANAGARI; | tr->letter_bits_offset = OFFSET_DEVANAGARI; | ||||
tr->langopts.replace_chars = replace_chars_hi; | |||||
tr->langopts.replacement_chars = replacement_chars_hi; | |||||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | ||||
SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | ||||
tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | ||||
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | ||||
tr->langopts.replace_chars = replace_cyrillic; | |||||
tr->langopts.replacement_chars = replace_cyrillic_latin; | |||||
tr->langopts.replace_chars = replace_cyrillic_latin; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
SetLetterVowel(tr,'r'); | SetLetterVowel(tr,'r'); | ||||
{ | { | ||||
static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | ||||
static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | ||||
static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0}; | |||||
static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | ||||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
tr->langopts.replace_chars = replace_chars_hu; | |||||
tr->langopts.replacement_chars = replacement_chars_hu; | |||||
tr->langopts.vowel_pause = 0x20; | tr->langopts.vowel_pause = 0x20; | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
tr->langopts.stress_rule = 4; // antipenultimate | tr->langopts.stress_rule = 4; // antipenultimate | ||||
tr->langopts.numbers = 0x0c29 + 0x4000; | tr->langopts.numbers = 0x0c29 + 0x4000; | ||||
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | ||||
tr->langopts.replace_chars = replace_cyrillic_latin; | |||||
tr->langopts.replacement_chars = replace_cyrillic; | |||||
} | } | ||||
break; | break; | ||||
{ | { | ||||
static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | ||||
static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | ||||
static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0}; | |||||
static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | ||||
tr->langopts.stress_flags = 0x100 + 0x6; | tr->langopts.stress_flags = 0x100 + 0x6; | ||||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
tr->langopts.replace_chars = replace_chars_ro; | |||||
tr->langopts.replacement_chars = replacement_chars_ro; | |||||
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | ||||
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | ||||
} | } |
if(!found && iswdigit(first_char)) | if(!found && iswdigit(first_char)) | ||||
{ | { | ||||
Lookup("_0lang",word_phonemes); | |||||
if(word_phonemes[0] == phonSWITCH) | |||||
return(0); | |||||
found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | ||||
} | } | ||||
} // end of EmbeddedCommand | } // end of EmbeddedCommand | ||||
int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||||
{//===================================================================================================== | |||||
// To allow language specific examination and replacement of characters | |||||
int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||||
{//================================================================================== | |||||
int ix; | int ix; | ||||
unsigned int word; | unsigned int word; | ||||
unsigned int new_c, c2, c_lower; | unsigned int new_c, c2, c_lower; | ||||
int upper_case = 0; | int upper_case = 0; | ||||
static int ignore_next = 0; | static int ignore_next = 0; | ||||
const unsigned int *replace_chars; | |||||
if(ignore_next) | if(ignore_next) | ||||
{ | { | ||||
} | } | ||||
if(c == 0) return(0); | if(c == 0) return(0); | ||||
if(langopts.replace_chars == NULL) | |||||
if((replace_chars = tr->langopts.replace_chars) == NULL) | |||||
return(c); | return(c); | ||||
// there is a list of character codes to be substituted with alternative codes | // there is a list of character codes to be substituted with alternative codes | ||||
} | } | ||||
new_c = 0; | new_c = 0; | ||||
for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++) | |||||
for(ix=0; (word = replace_chars[ix]) != 0; ix+=2) | |||||
{ | { | ||||
if(c_lower == (word & 0xffff)) | if(c_lower == (word & 0xffff)) | ||||
{ | { | ||||
if((word >> 16) == 0) | if((word >> 16) == 0) | ||||
{ | { | ||||
new_c = langopts.replacement_chars[ix]; | |||||
new_c = replace_chars[ix+1]; | |||||
break; | break; | ||||
} | } | ||||
if((word >> 16) == (unsigned int)tolower(next_in)) | if((word >> 16) == (unsigned int)tolower(next_in)) | ||||
{ | { | ||||
new_c = langopts.replacement_chars[ix]; | |||||
new_c = replace_chars[ix+1]; | |||||
ignore_next = 1; | ignore_next = 1; | ||||
break; | break; | ||||
} | } | ||||
if(upper_case) | if(upper_case) | ||||
new_c = towupper(new_c); | new_c = towupper(new_c); | ||||
return(new_c); | return(new_c); | ||||
} | |||||
int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||||
{//===================================================================================================== | |||||
// To allow language specific examination and replacement of characters | |||||
return(SubstituteChar(this,c,next_in,insert)); | |||||
} | } | ||||
#define RULE_LETTERGP 17 // A B C H F G Y letter group number | #define RULE_LETTERGP 17 // A B C H F G Y letter group number | ||||
#define RULE_LETTERGP2 18 // L + letter group number | #define RULE_LETTERGP2 18 // L + letter group number | ||||
#define RULE_CAPITAL 19 // word starts with a capital letter | #define RULE_CAPITAL 19 // word starts with a capital letter | ||||
#define RULE_REPLACEMENTS 20 // section for character replacements | |||||
#define RULE_NO_SUFFIX 24 // N | #define RULE_NO_SUFFIX 24 // N | ||||
#define RULE_NOTVOWEL 25 // K | #define RULE_NOTVOWEL 25 // K | ||||
#define RULE_IFVERB 26 // V | #define RULE_IFVERB 26 // V | ||||
#define NUM_ROMAN 0x20000 | #define NUM_ROMAN 0x20000 | ||||
#define NUM_ROMAN_UC 0x40000 | #define NUM_ROMAN_UC 0x40000 | ||||
// bits0-1=which numbers routine to use. | // bits0-1=which numbers routine to use. | ||||
// bit2= thousands separator must be space | // bit2= thousands separator must be space | ||||
// bit3= , decimal separator, not . | // bit3= , decimal separator, not . | ||||
// bit16=dot after number indicates ordinal | // bit16=dot after number indicates ordinal | ||||
// bit17=recognize roman numbers | // bit17=recognize roman numbers | ||||
// bit18=Roman numbers only if upper case | // bit18=Roman numbers only if upper case | ||||
int numbers; | int numbers; | ||||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | // bits 1-4 use variant form of numbers before thousands,millions,etc. | ||||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | // bit7=(LANG-ru) use MB for 1 thousand, million, etc | ||||
// bit8=(LANG=sw) special word for 100,000s | // bit8=(LANG=sw) special word for 100,000s | ||||
int numbers2; | int numbers2; | ||||
int max_roman; | int max_roman; | ||||
int thousands_sep; | int thousands_sep; | ||||
int decimal_sep; | int decimal_sep; | ||||
char ideographs; // treat as separate words | char ideographs; // treat as separate words | ||||
int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | ||||
const unsigned int *replace_chars; // characters to be substitutes | const unsigned int *replace_chars; // characters to be substitutes | ||||
const unsigned int *replacement_chars; // substitutes for replace_chars | |||||
} LANGUAGE_OPTIONS; | } LANGUAGE_OPTIONS; | ||||