git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -91,21 +91,26 @@ bethlehem bEt_liEm | |||
brittanje $2 | |||
ceylon s@lOn | |||
china S'ina | |||
(graaff reinet) x2rA:f||r@n'Et | |||
italië it'A:li;@ | |||
jerusalem j@r'ysalEm | |||
kanada kanad%a | |||
knysna neIsna | |||
kongo kONgu | |||
mesopotamië mEsuput'A:mi@ | |||
mosambiek musamb'ik | |||
potchefstroom pOtSIfstr'o@m | |||
(thaba nchu) tab'A:||ntSu | |||
europa Y@r'o@pa | |||
outeniekwa @Ut@n'ikwa | |||
portugal $1 | |||
potchefstroom pOtSIfstr'o@m | |||
pretoria prit'o@ria | |||
stellenbosch st%&l@mbOs | |||
tunisië $2 | |||
turkye $2 | |||
upington apiNt@n | |||
worcester v'ust@r | |||
zimbabwe zI:mb'ab_wE | |||
zimbabwe zI:mb'ab_wE | |||
@@ -124,16 +129,19 @@ bester b'Est@r | |||
botha bo@ta | |||
breytenbach breIt@nbax2 | |||
carl kA:r@L | |||
cecilia s@si:lia | |||
charles _^_EN | |||
charlie tSA:li | |||
chopin S%OpA~ | |||
chris krIs | |||
christo krIstu | |||
christian krIstian | |||
christelle kr@st&l | |||
christine kr@st'i:n | |||
coetzee kuts'e@ | |||
cronjé krOnj'e@ | |||
debora d@bo@ra | |||
debussy d@bus'i: | |||
der d@r | |||
deventer d'e@v@nt@r | |||
du $u | |||
@@ -187,6 +195,7 @@ martha marta | |||
martin m'A:rt@n | |||
michelle miS'&l | |||
michiel $2 | |||
miriam mIri@m | |||
mostert m'Ost@rt | |||
mozart mo@tsart | |||
naomi na_'o@mi | |||
@@ -195,11 +204,14 @@ naudé nOd'e@ | |||
paul p@Ul | |||
paulus p@UlWs | |||
philip fIl@p | |||
phillips fIl@ps | |||
pierre p'e@r | |||
petrus pe@trWs | |||
phoebe fi:bi | |||
rachmaninoff rax2m'aninOf | |||
rebekka r@bEka | |||
renée r@neI | |||
retief r@tif | |||
ronel run'&l | |||
rousseau r@s@U | |||
roux r'u | |||
@@ -216,6 +228,7 @@ theron tr'On | |||
viljoen $2 | |||
villiers vIli@rs | |||
violet _^_EN $capital | |||
william _^_EN | |||
marais mar'E: | |||
mandela mand'E:la | |||
celliers sIlj'e@ | |||
@@ -292,7 +305,7 @@ alhoewel alhuv'&l $pause | |||
tensy $2 $pause | |||
aangesien $pause | |||
wie $pause | |||
wanneer $pause | |||
wanneer $1 $pause | |||
waar $pause | |||
waarom $pause | |||
waarheen $pause | |||
@@ -376,7 +389,6 @@ bv beIf'o@rbe@lt $dot | |||
// main word list | |||
aanbid $2 | |||
aanstaande $2 | |||
adagio ad'A:dZi;%@U | |||
algaande $2 | |||
@@ -468,7 +480,6 @@ hokaai hOkAI | |||
hoofsaaklik $2 | |||
idee $2 | |||
ietwat itvat | |||
ignoreer ix2no@r'e@r | |||
inagneming @nax2ne@m@N | |||
inkluis $2 | |||
@@ -476,16 +487,16 @@ intens @nt'Ens | |||
intensiteit $4 | |||
jawoord jA:vo@rt | |||
jupiter jupit@r | |||
kafee kaf'e@ | |||
kapitaal kapit'A:l | |||
kaviaar kavi'A:r | |||
komaan kOmA:n | |||
komberse kOmb'&rs@ | |||
kombers kOmb'&rs | |||
kopiereg kup'ir&x2 | |||
kritiek krIt'ik | |||
kultuur $2 | |||
kulture $2 | |||
kunsmatige kWnsm'A:t@x2@ | |||
kwansuis $2 | |||
@@ -497,6 +508,7 @@ lekkerste l&k@rst@ | |||
macaroni makar'o@ni | |||
madame mad'A:m | |||
makabere m%ak'A:b@r@ | |||
maskering mask'e@r@N $only | |||
meegee me@x2e@ | |||
memoriam mEm'o@riam | |||
merlot m&rl'o@ | |||
@@ -608,6 +620,8 @@ uiteindelik $2 | |||
uiteraard Yyt@r_'A:rt | |||
vaarwel fA:rv'&l | |||
vakant f%ak'ant | |||
vakante f%ak'ant@ | |||
vanaf fanaf | |||
vandat $1 | |||
vanne fan@ | |||
@@ -619,7 +633,6 @@ vererger f@r&rg@r | |||
ver f'&:r | |||
verg f&rx2 | |||
vergete f@rx2e@t@ | |||
vergewe f@rx2e@v@ | |||
vermy $2 | |||
verre f&:r@ | |||
verregaande f&r@x2'A:nd@ |
@@ -127,16 +127,15 @@ tS v w z Z z2 | |||
Dictionary hi_dict | |||
@ @- @2 @3 a a: aI aU | |||
e E e: E: E~ i I i: | |||
l- o O o: O: o~ O~ r- | |||
U u: u~ V | |||
@ @2 @3 a a: aI aU E | |||
e: E: E~ I i: O o: O: | |||
O~ r- U u: V | |||
- : b c ch d d. f | |||
g h H j J k kh l | |||
m n N n. n^ p ph Q | |||
q r s S s. t T t. | |||
t.h th v w x z | |||
: b c ch d d. f g | |||
h H j J k kh l m | |||
n N n. n^ p ph Q q | |||
r s S s. t T t. t.h | |||
th v w x z | |||
Dictionary hr_dict | |||
@@ -155,12 +154,12 @@ tS; v x z Z | |||
Dictionary hu_dict | |||
A a: E e: i i: o o: | |||
u u: Y y y: Y: | |||
u u: Y Y: | |||
: b c C d dZ f g | |||
h j J k l l^ m n | |||
N n^ p R R2 s S s2 | |||
t tS ts v z Z | |||
: b c d dZ f g h | |||
j J k l m n n^ p | |||
R R2 s S s2 t tS ts | |||
v z Z | |||
Dictionary it_dict | |||
@@ -288,7 +287,7 @@ Dictionary pt_dict | |||
& &/ &U~ &~ @ @- a A | |||
aI aU e E eI EI eU EU | |||
i i/ iU i~ o O oI OI | |||
e~ i i/ iU o O oI OI | |||
o~ u U uI u~ y | |||
* : ; b C d dZ f | |||
@@ -300,17 +299,14 @@ w x z Z | |||
Dictionary ro_dict | |||
@ @- @I @U a aI aU e | |||
ea eI eo eU i i/ I^ iI | |||
iU o O Oa oI oU u uI | |||
y Y yI yU | |||
@ @- @I a aI aU e ea | |||
eI eU i I^ iI o Oa oI | |||
oU u uI y | |||
* *; b b; c C d d; | |||
dZ f f; g h j k l | |||
l; m m; n N n; p p; | |||
r s S S; t T t; tS | |||
ts ts; v v; w w2 x z | |||
Z z; Z; | |||
* *; b c d dZ f g | |||
h j k l m m; n p | |||
r s S S; t tS ts ts; | |||
v w w2 x z Z | |||
Dictionary is_dict | |||
@@ -337,13 +333,16 @@ h j k l m n N p | |||
r R s t w z | |||
Dictionary grc_dict | |||
Dictionary mk_dict | |||
& @ @- @2 a A a: E | |||
e e: E~ i I i: l- o | |||
o: oU r- u u: y | |||
& @ @- @2 a E e i | |||
I o r- u | |||
* b d dZ dZ; f g h | |||
j k k^ l l^ m n N | |||
n^ p r R s S t tS | |||
ts tS; v x z Z | |||
* b d dZ dZ; f g j | |||
k k^ l l^ m n n^ p | |||
r R s S t tS ts v | |||
x z Z |
@@ -10,6 +10,19 @@ | |||
// ?3 use diphthong for "au" | |||
.replace | |||
० 0 | |||
१ 1 | |||
२ 2 | |||
३ 3 | |||
४ 4 | |||
५ 5 | |||
६ 6 | |||
७ 7 | |||
८ 8 | |||
९ 9 | |||
// Vowels | |||
.group अ |
@@ -2,6 +2,12 @@ | |||
// This file is UTF-8 encoded | |||
.replace | |||
// allow o,u-circumflex for o,u-double-acute | |||
ô ő | |||
û ű | |||
.group a | |||
a A | |||
_) a (_ %A |
@@ -1,6 +1,42 @@ | |||
// translation rules for Macedonian | |||
// This file is UTF-8 encoded | |||
.replace | |||
a а | |||
b б | |||
c ц | |||
ć ћ | |||
č ч | |||
dž џ | |||
dz ѕ | |||
d д | |||
đ ђ | |||
e е | |||
f ф | |||
g г | |||
h х | |||
i и | |||
j ј | |||
k к | |||
lj љ | |||
l л | |||
m м | |||
nj њ | |||
n н | |||
o о | |||
p п | |||
r р | |||
s с | |||
š ш | |||
t т | |||
u у | |||
v в | |||
z з | |||
ž ж | |||
đ ѓ | |||
ć ќ | |||
.group а | |||
а a | |||
@@ -3,6 +3,10 @@ | |||
// This file is UTF-8 encoded | |||
// replace s-comma, t-comma by s-cedilla, t-cedilla | |||
.replace | |||
ș ş | |||
ț ţ | |||
.group a |
@@ -1,7 +1,7 @@ | |||
39 phoneme tables | |||
40 phoneme tables | |||
new total | |||
base 96 96 | |||
base2 23 114 | |||
base2 24 115 | |||
en 53 144 | |||
en_n 30 144 | |||
en_us 37 144 | |||
@@ -25,13 +25,14 @@ | |||
mk 3 130 | |||
sr 2 129 | |||
ru 38 126 | |||
it 17 117 | |||
it 17 118 | |||
la 21 114 | |||
es 6 114 | |||
pt 28 131 | |||
es 6 115 | |||
pt 27 131 | |||
pt_pt 20 131 | |||
ro 36 138 | |||
el 8 114 | |||
ro 36 139 | |||
el 8 115 | |||
grc 7 120 | |||
sv 25 118 | |||
no 28 122 | |||
is 32 121 | |||
@@ -215,7 +216,7 @@ | |||
30 r/trr base af de fi nl ru ro sv sw | |||
11 r/xr base | |||
2 ufric/ch base de | |||
3 ufric/f base de ro | |||
4 ufric/f base de ro grc | |||
2 ufric/f_ base ro | |||
5 ufric/h_ base fi hi la | |||
6 ufric/h@ base fi hi la | |||
@@ -272,7 +273,7 @@ | |||
8 ustop/ts_pzd base hi ru | |||
2 ustop/ts_pzd_ hi hu | |||
2 ustop/ts_pzd2 hi hu | |||
2 vdiph/0i pt vi | |||
3 vdiph/0i pt grc vi | |||
3 vdiph/0i_2 en_sc no en_wi | |||
3 vdiph2/e@ en_sc en_wi | |||
1 vdiph2/ea ro | |||
@@ -323,10 +324,10 @@ | |||
1 vdiph/au_3 en_rp | |||
6 vdiph/au_4 base2 cy eo sk it is | |||
1 vdiph/ee-e hi | |||
5 vdiph/eei en de nl pt vi | |||
5 vdiph/eei base2 en de nl vi | |||
3 vdiph/eei_2 en_us eo fi | |||
2 vdiph/eei_3 en_rp sk | |||
3 vdiph/eeu pt vi zhy | |||
4 vdiph/eeu pt grc vi zhy | |||
2 vdiph/eeu_2 la pt_pt | |||
2 vdiph/eeu_3 en_n en_wm | |||
1 vdiph/eey fi | |||
@@ -346,7 +347,7 @@ | |||
9 vdiph/ooi en en_n en_us cy eo fi no zhy | |||
1 vdiph/ooi_2 af | |||
2 vdiph/ooi_3 en_rp en_wm | |||
1 vdiph/oou cs | |||
2 vdiph/oou cs grc | |||
2 vdiph/ou fi zhy | |||
2 vdiph/ou_2 sk ro | |||
2 vdiph/ou_3 is | |||
@@ -363,7 +364,7 @@ | |||
1 vdiph/Vu_2 en_wm | |||
1 vdiph/Vu_3 nl | |||
2 vdiph/&y fi nl | |||
2 vdiph/yi fi no | |||
3 vdiph/yi fi grc no | |||
1 vdiph/y#i fi | |||
1 vdiph/y#i_2 is | |||
1 vdiph/yi_fr fr | |||
@@ -416,7 +417,7 @@ | |||
4 vowel/& en_rp fi hi sv | |||
4 vowel/0 base2 en hi pt | |||
3 vowel/0_2 en_n pt_pt sw | |||
5 vowel/0_3 en_us en_sc en_rp en_wm hu | |||
4 vowel/0_3 en_us en_sc en_rp hu | |||
2 vowel/@_2 fr | |||
2 vowel/&_2 en_us | |||
6 vowel/@_3 en_sc de hi | |||
@@ -480,7 +481,7 @@ | |||
2 vowel/ii_6 en_wm | |||
1 vowel/ii_en en | |||
10 vowel/@_low en_rp hi ro no | |||
12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi | |||
10 vowel/o base2 en de hi it la pt_pt sv en_wi | |||
4 vowel/o_2 cy hi hu no | |||
2 vowel/o-_2 en_n en_wm | |||
3 vowel/o_3 en_sc fr | |||
@@ -493,10 +494,10 @@ | |||
1 vowel/oe_4 sv | |||
2 vowel/o_mid fr hu | |||
12 vowel/oo en_sc de eo la es el sv no zhy en_wi | |||
10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||
12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||
3 vowel/oo_2 en_sc cy cs | |||
1 vowel/oo_3 af | |||
5 vowel/oo_4 hi pl it en_wi | |||
6 vowel/oo_4 en_wm hi pl it en_wi | |||
1 vowel/oo_5 is | |||
6 vowel/oo_en en en_n en_rp | |||
2 vowelr/aa_r en_sc | |||
@@ -538,11 +539,11 @@ | |||
1 vowel/yy fr_ca | |||
1 vowel/yy_2 no | |||
1 vowel/yy_3 sv | |||
5 vowel/yy_4 de hu la is | |||
6 vowel/yy_4 de hu la grc is | |||
1 vwl_af/@ af | |||
1 vwl_af/I af | |||
2 vwl_af/r@ af | |||
6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm | |||
5 vwl_en/aI@ en en_n en_us en_sc en_rp | |||
2 vwl_en/aI@_2 en_sc | |||
5 vwl_en/aU@ en en_n en_us en_sc en_wm | |||
12 vwl_en/@L en en_us en_sc en_rp en_wm af | |||
@@ -550,6 +551,7 @@ | |||
1 vwl_en_n/aa_5 en_n | |||
2 vwl_en_n/O@ en_n | |||
1 vwl_en_n/u_ en_n | |||
1 vwl_en/ooi@ en_wm | |||
3 vwl_en_rp/aa en_rp | |||
1 vwl_en_rp/aU@ en_rp | |||
1 vwl_en_rp/e@ en_rp |
@@ -27,3 +27,4 @@ | |||
0 @- NULL 0 NULL | |||
0 aI@ NULL 60 aI @ | |||
0 aU@ NULL 75 aU @ | |||
0 x NULL 0 k |
@@ -12,6 +12,7 @@ | |||
0 w/ NULL 0 w | |||
0 ; NULL 0 NULL | |||
0 g- NULL 0 NULL | |||
0 x NULL 0 k_h | |||
0 @- NULL 0 NULL | |||
0 aI@ NULL 60 AI r | |||
0 aU@ NULL 75 aU r |
@@ -12,6 +12,7 @@ | |||
0 w/ NULL 0 w | |||
0 ; NULL 0 NULL | |||
0 g- NULL 0 NULL | |||
0 x NULL 0 k | |||
0 @- NULL 0 NULL | |||
0 aI@ NULL 60 AI r | |||
0 aU@ NULL 75 aU r |
@@ -107,6 +107,13 @@ phoneme eI | |||
endphoneme | |||
phoneme EI | |||
vowel starttype (e) endtype (i) | |||
length 230 | |||
formants vdiph/eei | |||
endphoneme | |||
phoneme oI | |||
vowel starttype (o) endtype (i) | |||
length 240 |
@@ -92,14 +92,14 @@ endphoneme | |||
phoneme I | |||
vowel starttype (e) endtype (e) | |||
vowel starttype (@) endtype (@) | |||
length 110 | |||
formants vowel/e# | |||
before l/2 vowel/@_3-30+l/L2_@L | |||
endphoneme | |||
phoneme I2 | |||
vowel starttype (e) endtype (e) | |||
vowel starttype (@) endtype (@) | |||
unstressed | |||
length 110 | |||
formants vowel/e# |
@@ -62,7 +62,7 @@ endphoneme | |||
phoneme 0 | |||
vowel starttype (o) endtype (o) | |||
length 140 | |||
formants vowel/0_3 | |||
formants vowel/oo_4 | |||
reduceto @ 0 | |||
endphoneme | |||
@@ -146,7 +146,7 @@ endphoneme | |||
phoneme O@ | |||
vowel starttype (o) endtype (@) | |||
length 240 | |||
formants vowel/o | |||
formants vowel/oo_1 | |||
linkout r- | |||
endphoneme | |||
@@ -154,7 +154,7 @@ endphoneme | |||
phoneme O | |||
vowel starttype (o) endtype (o) | |||
length 150 | |||
formants vowel/o | |||
formants vowel/oo_1 | |||
reduceto @ 0 | |||
endphoneme | |||
@@ -189,7 +189,7 @@ endphoneme | |||
phoneme eI | |||
vowel starttype (@) endtype (i) | |||
length 210 | |||
length 230 | |||
formants vdiph/@i_3 | |||
endphoneme | |||
@@ -228,7 +228,7 @@ endphoneme | |||
phoneme aI@ | |||
vowel starttype (a) endtype (@) | |||
length 270 | |||
formants vwl_en/aI@ | |||
formants vwl_en/ooi@ | |||
linkout r- | |||
endphoneme | |||
@@ -0,0 +1,44 @@ | |||
//==================================================== | |||
// Ancient Greek - based on base2 | |||
//==================================================== | |||
phoneme y | |||
vowel starttype (i) endtype (i) | |||
length 160 | |||
formants vowel/yy_4 | |||
endphoneme | |||
phoneme EU | |||
vowel starttype (e) endtype (u) | |||
length 230 | |||
formants vdiph/eeu | |||
endphoneme | |||
phoneme OI | |||
vowel starttype (o) endtype (i) | |||
length 230 | |||
formants vdiph/0i | |||
endphoneme | |||
phoneme OU | |||
vowel starttype (o) endtype (u) | |||
length 230 | |||
formants vdiph/oou | |||
endphoneme | |||
phoneme yI | |||
vowel starttype (i) endtype (i) | |||
length 230 | |||
formants vdiph/yi | |||
endphoneme | |||
phoneme f // consider this an affrictive | |||
vls blb afr | |||
vowelout f1=0 f2=1000 -500 -350 f3=-200 80 | |||
lengthmod 2 | |||
wave ufric/f // could replace this with a [pf] wav file | |||
endphoneme | |||
@@ -138,13 +138,6 @@ phoneme eI | |||
endphoneme | |||
phoneme EI | |||
vowel starttype (e) endtype (i) | |||
length 230 | |||
formants vdiph/eei | |||
endphoneme | |||
phoneme OI | |||
vowel starttype (o) endtype (i) | |||
length 230 |
@@ -1218,6 +1218,9 @@ include ph_romanian | |||
phonemetable el base2 | |||
include ph_greek | |||
phonemetable grc base2 | |||
include ph_greek_ancient | |||
phonemetable sv base | |||
include ph_swedish | |||
@@ -176,7 +176,6 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
char encoded_ph[200]; | |||
unsigned char bad_phoneme[4]; | |||
p = linebuf; | |||
comment = NULL; | |||
phonetic = word = ""; | |||
@@ -347,7 +346,8 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
if((word[0] & 0x80)==0) // 7 bit ascii only | |||
{ | |||
// 1st letter - need to consider utf8 here | |||
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||
// ??? need to consider utf8 here | |||
word[0] = tolower(word[0]); | |||
} | |||
@@ -789,8 +789,6 @@ char *compile_rule(char *input) | |||
for(ix=0; finish==0; ix++) | |||
{ | |||
c = input[ix]; | |||
if((c=='/') && (input[ix+1]=='/')) | |||
c = input[ix] = '\n'; // treat command as end of line | |||
switch(c = input[ix]) | |||
{ | |||
@@ -1167,6 +1165,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
int different; | |||
char *prev_rgroup_name; | |||
unsigned int char_code; | |||
int compile_mode=0; | |||
char *buf; | |||
char buf1[120]; | |||
char *rules[N_RULES]; | |||
@@ -1184,7 +1183,13 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
{ | |||
linenum++; | |||
buf = fgets(buf1,sizeof(buf1),f_in); | |||
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | |||
if(buf != NULL) | |||
{ | |||
if((p = (unsigned char *)strstr(buf,"//")) != NULL) | |||
*p = 0; | |||
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||
} | |||
if((buf != NULL) && (memcmp(buf,".L",2)==0)) | |||
{ | |||
@@ -1196,7 +1201,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
continue; | |||
} | |||
if((buf == NULL) || (memcmp(buf,".group",6)==0)) | |||
if((buf == NULL) || (buf[0] == '.')) | |||
{ | |||
// next .group or end of file, write out the previous group | |||
@@ -1212,46 +1217,106 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
} | |||
n_rules = 0; | |||
if(buf == NULL) break; // end of file | |||
if(compile_mode == 2) | |||
{ | |||
// end of the character replacements section | |||
fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list | |||
} | |||
p = (unsigned char *)&buf[6]; | |||
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||
ix = 0; | |||
while((*p > ' ') && (ix<12)) | |||
group_name[ix++] = *p++; | |||
group_name[ix]=0; | |||
if(buf == NULL) break; // end of file | |||
if(sscanf(group_name,"0x%x",&char_code)==1) | |||
if(memcmp(buf,".replace",8)==0) | |||
{ | |||
// group character is given as a character code (max 16 bits) | |||
p = (unsigned char *)group_name; | |||
compile_mode = 2; | |||
fputc(RULE_GROUP_START,f_out); | |||
fputc(RULE_REPLACEMENTS,f_out); | |||
if(char_code > 0x100) | |||
{ | |||
*p++ = (char_code >> 8); | |||
} | |||
*p++ = char_code; | |||
*p = 0; | |||
// advance to next word boundary | |||
while((ftell(f_out) & 3) != 0) | |||
fputc(0,f_out); | |||
} | |||
if(strlen(group_name) > 2) | |||
if(memcmp(buf,".group",6)==0) | |||
{ | |||
if(utf8_in(&c,group_name,0) < 2) | |||
compile_mode = 1; | |||
p = (unsigned char *)&buf[6]; | |||
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||
ix = 0; | |||
while((*p > ' ') && (ix<12)) | |||
group_name[ix++] = *p++; | |||
group_name[ix]=0; | |||
if(sscanf(group_name,"0x%x",&char_code)==1) | |||
{ | |||
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||
error_count++; | |||
// group character is given as a character code (max 16 bits) | |||
p = (unsigned char *)group_name; | |||
if(char_code > 0x100) | |||
{ | |||
*p++ = (char_code >> 8); | |||
} | |||
*p++ = char_code; | |||
*p = 0; | |||
} | |||
if(strlen(group_name) > 2) | |||
{ | |||
if(utf8_in(&c,group_name,0) < 2) | |||
{ | |||
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||
error_count++; | |||
} | |||
group_name[2] = 0; | |||
} | |||
group_name[2] = 0; | |||
} | |||
continue; | |||
} | |||
prule = compile_rule(buf); | |||
if((prule != NULL) && (n_rules < N_RULES)) | |||
switch(compile_mode) | |||
{ | |||
rules[n_rules++] = prule; | |||
case 1: // .group | |||
prule = compile_rule(buf); | |||
if((prule != NULL) && (n_rules < N_RULES)) | |||
{ | |||
rules[n_rules++] = prule; | |||
} | |||
break; | |||
case 2: // .replace | |||
{ | |||
int replace1; | |||
int replace2; | |||
char *p; | |||
p = buf; | |||
replace1 = 0; | |||
replace2 = 0; | |||
while(isspace2(*p)) p++; | |||
ix = 0; | |||
while((unsigned char)(*p) > 0x20) // not space or zero-byte | |||
{ | |||
p += utf8_in(&c,p,0); | |||
replace1 += (c << ix); | |||
ix += 16; | |||
} | |||
while(isspace2(*p)) p++; | |||
ix = 0; | |||
while((unsigned char)(*p) > 0x20) | |||
{ | |||
p += utf8_in(&c,p,0); | |||
replace2 += (c << ix); | |||
ix += 16; | |||
} | |||
if(replace1 != 0) | |||
{ | |||
fwrite(&replace1,1,4,f_out); | |||
fwrite(&replace2,1,4,f_out); | |||
} | |||
} | |||
break; | |||
} | |||
} | |||
fclose(f_temp); |
@@ -205,9 +205,9 @@ void Translator::InitGroups(void) | |||
int ix; | |||
char *p; | |||
char *p_name; | |||
unsigned int *pw; | |||
unsigned char c, c2; | |||
int len; | |||
int rule_count; | |||
n_groups2 = 0; | |||
for(ix=0; ix<256; ix++) | |||
@@ -228,6 +228,18 @@ void Translator::InitGroups(void) | |||
} | |||
p++; | |||
if(p[0] == RULE_REPLACEMENTS) | |||
{ | |||
pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary | |||
langopts.replace_chars = pw; | |||
while(pw[0] != 0) | |||
{ | |||
pw += 2; // find the end of the replacement list, each entry is 2 words. | |||
} | |||
p = (char *)(pw+1); | |||
continue; | |||
} | |||
if(p[0] == RULE_LETTERGP2) | |||
{ | |||
ix = p[1] - 'A'; | |||
@@ -266,11 +278,9 @@ void Translator::InitGroups(void) | |||
} | |||
// skip over all the rules in this group | |||
rule_count = 0; | |||
while(*p != RULE_GROUP_END) | |||
{ | |||
p += (strlen(p) + 1); | |||
rule_count++; | |||
} | |||
p++; | |||
} |
@@ -998,6 +998,8 @@ void ConvertToUtf8() | |||
} // end of ConvertToItf8 | |||
//****************************************************************************************************** | |||
//#define calcspeedtab |
@@ -354,12 +354,14 @@ int Translator::LookupNum2(int value, int control, char *ph_out) | |||
if(langopts.numbers & 0x200) | |||
{ | |||
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | |||
ix = strlen(ph_tens)-1; | |||
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||
ph_tens[ix] = 0; | |||
if((ix = strlen(ph_tens)-1) >= 0) | |||
{ | |||
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||
ph_tens[ix] = 0; | |||
} | |||
} | |||
sprintf(ph_out,"%s%s",ph_tens,ph_digits); | |||
} | |||
@@ -793,7 +795,7 @@ int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, | |||
decimal_point = 0; | |||
} | |||
} | |||
if(ph_out[0] != 0) | |||
if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) | |||
{ | |||
int next_char; | |||
utf8_in(&next_char,&word[n_digits+1],0); |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.29.10 16.Oct.07"; | |||
const char *version_string = "1.29.11 23.Oct.07"; | |||
const int version_phdata = 0x012901; | |||
int option_device_number = -1; |
@@ -38,6 +38,7 @@ | |||
#define L_qa 0x716100 | |||
#define L_grc 0x677263 // grc Ancient Greek | |||
#define OFFSET_GREEK 0x380 | |||
@@ -45,19 +46,41 @@ | |||
#define OFFSET_DEVANAGARI 0x900 | |||
static const unsigned int replace_cyrillic[] = | |||
{0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452, | |||
0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459, | |||
0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441, | |||
0x448,0x442,0x443,0x432,0x437,0x436, | |||
0x453,0x45c,0}; // ѓ ѕ ќ | |||
static const unsigned int replace_cyrillic_latin[] = | |||
{'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111, | |||
'e','f','g','h','i','j','k','l'+('j'<<16), | |||
'l','m','n'+('j'<<16),'n','o','p','r','s', | |||
0x161,'t','u','v','z',0x17e, | |||
0x111,0x107,0}; | |||
static const unsigned int replace_cyrillic_latin[] = | |||
{0x430,'a', | |||
0x431,'b', | |||
0x446,'c', | |||
0x45b,0x107, | |||
0x447,0x10d, | |||
0x45f,'d'+(0x17e<<16), | |||
0x455,'d'+('z'<<16), | |||
0x434,'d', | |||
0x452,0x111, | |||
0x435,'e', | |||
0x444,'f', | |||
0x433,'g', | |||
0x445,'h', | |||
0x438,'i', | |||
0x458,'j', | |||
0x43a,'k', | |||
0x459,'l'+('j'<<16), | |||
0x43b,'l', | |||
0x43c,'m', | |||
0x45a,'n'+('j'<<16), | |||
0x43d,'n', | |||
0x43e,'o', | |||
0x43f,'p', | |||
0x440,'r', | |||
0x441,'s', | |||
0x448,0x161, | |||
0x442,'t', | |||
0x443,'u', | |||
0x432,'v', | |||
0x437,'z', | |||
0x436,0x17e, | |||
0x453,0x111, | |||
0x45c,0x107, | |||
0}; // ѓ ѕ ќ | |||
void SetupTranslator(Translator *tr, int *lengths, int *amps) | |||
@@ -132,6 +155,7 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('e','l'): // Greek | |||
case L_grc: // Ancient Greek | |||
{ | |||
static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | |||
static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||
@@ -167,6 +191,12 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.numbers = 0xb09; | |||
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | |||
if(name2 == L_grc) | |||
{ | |||
// ancient greek | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; | |||
} | |||
} | |||
break; | |||
@@ -259,8 +289,6 @@ Translator *SelectTranslator(const char *name) | |||
case L('h','i'): | |||
{ | |||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | |||
static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9 | |||
static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0}; | |||
static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | |||
static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | |||
@@ -274,8 +302,6 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.numbers = 0x811; | |||
tr->langopts.numbers2 = 0x100; | |||
tr->letter_bits_offset = OFFSET_DEVANAGARI; | |||
tr->langopts.replace_chars = replace_chars_hi; | |||
tr->langopts.replacement_chars = replacement_chars_hi; | |||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | |||
@@ -306,8 +332,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | |||
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | |||
tr->langopts.replace_chars = replace_cyrillic; | |||
tr->langopts.replacement_chars = replace_cyrillic_latin; | |||
tr->langopts.replace_chars = replace_cyrillic_latin; | |||
SetLetterVowel(tr,'y'); | |||
SetLetterVowel(tr,'r'); | |||
@@ -319,14 +344,10 @@ Translator *SelectTranslator(const char *name) | |||
{ | |||
static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | |||
static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | |||
static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0}; | |||
static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->langopts.replace_chars = replace_chars_hu; | |||
tr->langopts.replacement_chars = replacement_chars_hu; | |||
tr->langopts.vowel_pause = 0x20; | |||
tr->langopts.stress_rule = 0; | |||
@@ -417,9 +438,6 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 4; // antipenultimate | |||
tr->langopts.numbers = 0x0c29 + 0x4000; | |||
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | |||
tr->langopts.replace_chars = replace_cyrillic_latin; | |||
tr->langopts.replacement_chars = replace_cyrillic; | |||
} | |||
break; | |||
@@ -495,8 +513,6 @@ SetLengthMods(tr,3); // all equal | |||
{ | |||
static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | |||
static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | |||
static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0}; | |||
static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | |||
@@ -505,8 +521,6 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_flags = 0x100 + 0x6; | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->langopts.replace_chars = replace_chars_ro; | |||
tr->langopts.replacement_chars = replacement_chars_ro; | |||
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | |||
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | |||
} |
@@ -665,6 +665,10 @@ if((wmark > 0) && (wmark < 8)) | |||
if(!found && iswdigit(first_char)) | |||
{ | |||
Lookup("_0lang",word_phonemes); | |||
if(word_phonemes[0] == phonSWITCH) | |||
return(0); | |||
found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||
} | |||
@@ -1560,16 +1564,14 @@ static int EmbeddedCommand(unsigned int &source_index) | |||
} // end of EmbeddedCommand | |||
int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||
{//===================================================================================================== | |||
// To allow language specific examination and replacement of characters | |||
int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||
{//================================================================================== | |||
int ix; | |||
unsigned int word; | |||
unsigned int new_c, c2, c_lower; | |||
int upper_case = 0; | |||
static int ignore_next = 0; | |||
const unsigned int *replace_chars; | |||
if(ignore_next) | |||
{ | |||
@@ -1578,7 +1580,7 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
} | |||
if(c == 0) return(0); | |||
if(langopts.replace_chars == NULL) | |||
if((replace_chars = tr->langopts.replace_chars) == NULL) | |||
return(c); | |||
// there is a list of character codes to be substituted with alternative codes | |||
@@ -1590,18 +1592,18 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
} | |||
new_c = 0; | |||
for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++) | |||
for(ix=0; (word = replace_chars[ix]) != 0; ix+=2) | |||
{ | |||
if(c_lower == (word & 0xffff)) | |||
{ | |||
if((word >> 16) == 0) | |||
{ | |||
new_c = langopts.replacement_chars[ix]; | |||
new_c = replace_chars[ix+1]; | |||
break; | |||
} | |||
if((word >> 16) == (unsigned int)tolower(next_in)) | |||
{ | |||
new_c = langopts.replacement_chars[ix]; | |||
new_c = replace_chars[ix+1]; | |||
ignore_next = 1; | |||
break; | |||
} | |||
@@ -1625,6 +1627,14 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
if(upper_case) | |||
new_c = towupper(new_c); | |||
return(new_c); | |||
} | |||
int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||
{//===================================================================================================== | |||
// To allow language specific examination and replacement of characters | |||
return(SubstituteChar(this,c,next_in,insert)); | |||
} | |||
@@ -118,6 +118,7 @@ | |||
#define RULE_LETTERGP 17 // A B C H F G Y letter group number | |||
#define RULE_LETTERGP2 18 // L + letter group number | |||
#define RULE_CAPITAL 19 // word starts with a capital letter | |||
#define RULE_REPLACEMENTS 20 // section for character replacements | |||
#define RULE_NO_SUFFIX 24 // N | |||
#define RULE_NOTVOWEL 25 // K | |||
#define RULE_IFVERB 26 // V | |||
@@ -278,6 +279,7 @@ typedef struct { | |||
#define NUM_ROMAN 0x20000 | |||
#define NUM_ROMAN_UC 0x40000 | |||
// bits0-1=which numbers routine to use. | |||
// bit2= thousands separator must be space | |||
// bit3= , decimal separator, not . | |||
@@ -294,7 +296,6 @@ typedef struct { | |||
// bit16=dot after number indicates ordinal | |||
// bit17=recognize roman numbers | |||
// bit18=Roman numbers only if upper case | |||
int numbers; | |||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | |||
@@ -302,6 +303,7 @@ typedef struct { | |||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | |||
// bit8=(LANG=sw) special word for 100,000s | |||
int numbers2; | |||
int max_roman; | |||
int thousands_sep; | |||
int decimal_sep; | |||
@@ -314,7 +316,6 @@ typedef struct { | |||
char ideographs; // treat as separate words | |||
int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | |||
const unsigned int *replace_chars; // characters to be substitutes | |||
const unsigned int *replacement_chars; // substitutes for replace_chars | |||
} LANGUAGE_OPTIONS; | |||