Browse Source

[1.29.11] Move character substitution feature to language *_rules files.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 17 years ago
parent
commit
5bdf9d63ab

+ 23
- 10
dictsource/af_list View File

@@ -91,21 +91,26 @@ bethlehem bEt_liEm
brittanje $2
ceylon s@lOn
china S'ina
(graaff reinet) x2rA:f||r@n'Et
italië it'A:li;@
jerusalem j@r'ysalEm
kanada kanad%a
knysna neIsna
kongo kONgu
mesopotamië mEsuput'A:mi@
mosambiek musamb'ik
potchefstroom pOtSIfstr'o@m
(thaba nchu) tab'A:||ntSu
europa Y@r'o@pa
outeniekwa @Ut@n'ikwa
portugal $1
potchefstroom pOtSIfstr'o@m
pretoria prit'o@ria
stellenbosch st%&l@mbOs
tunisië $2
turkye $2
upington apiNt@n
worcester v'ust@r
zimbabwe zI:mb'ab_wE

zimbabwe zI:mb'ab_wE



@@ -124,16 +129,19 @@ bester b'Est@r
botha bo@ta
breytenbach breIt@nbax2
carl kA:r@L
cecilia s@si:lia
charles _^_EN
charlie tSA:li
chopin S%OpA~
chris krIs
christo krIstu
christian krIstian
christelle kr@st&l
christine kr@st'i:n
coetzee kuts'e@
cronjé krOnj'e@
debora d@bo@ra
debussy d@bus'i:
der d@r
deventer d'e@v@nt@r
du $u
@@ -187,6 +195,7 @@ martha marta
martin m'A:rt@n
michelle miS'&l
michiel $2
miriam mIri@m
mostert m'Ost@rt
mozart mo@tsart
naomi na_'o@mi
@@ -195,11 +204,14 @@ naudé nOd'e@
paul p@Ul
paulus p@UlWs
philip fIl@p
phillips fIl@ps
pierre p'e@r
petrus pe@trWs
phoebe fi:bi
rachmaninoff rax2m'aninOf
rebekka r@bEka
renée r@neI
retief r@tif
ronel run'&l
rousseau r@s@U
roux r'u
@@ -216,6 +228,7 @@ theron tr'On
viljoen $2
villiers vIli@rs
violet _^_EN $capital
william _^_EN
marais mar'E:
mandela mand'E:la
celliers sIlj'e@
@@ -292,7 +305,7 @@ alhoewel alhuv'&l $pause
tensy $2 $pause
aangesien $pause
wie $pause
wanneer $pause
wanneer $1 $pause
waar $pause
waarom $pause
waarheen $pause
@@ -376,7 +389,6 @@ bv beIf'o@rbe@lt $dot

// main word list

aanbid $2
aanstaande $2
adagio ad'A:dZi;%@U
algaande $2
@@ -468,7 +480,6 @@ hokaai hOkAI
hoofsaaklik $2

idee $2
ietwat itvat
ignoreer ix2no@r'e@r
inagneming @nax2ne@m@N
inkluis $2
@@ -476,16 +487,16 @@ intens @nt'Ens
intensiteit $4

jawoord jA:vo@rt
jupiter jupit@r

kafee kaf'e@
kapitaal kapit'A:l
kaviaar kavi'A:r
komaan kOmA:n
komberse kOmb'&rs@
kombers kOmb'&rs
kopiereg kup'ir&x2
kritiek krIt'ik
kultuur $2
kulture $2
kunsmatige kWnsm'A:t@x2@
kwansuis $2

@@ -497,6 +508,7 @@ lekkerste l&k@rst@
macaroni makar'o@ni
madame mad'A:m
makabere m%ak'A:b@r@
maskering mask'e@r@N $only
meegee me@x2e@
memoriam mEm'o@riam
merlot m&rl'o@
@@ -608,6 +620,8 @@ uiteindelik $2
uiteraard Yyt@r_'A:rt

vaarwel fA:rv'&l
vakant f%ak'ant
vakante f%ak'ant@
vanaf fanaf
vandat $1
vanne fan@
@@ -619,7 +633,6 @@ vererger f@r&rg@r
ver f'&:r
verg f&rx2
vergete f@rx2e@t@
vergewe f@rx2e@v@
vermy $2
verre f&:r@
verregaande f&r@x2'A:nd@

+ 193
- 49
dictsource/af_rules
File diff suppressed because it is too large
View File


+ 31
- 32
dictsource/dict_phonemes View File

@@ -127,16 +127,15 @@ tS v w z Z z2

Dictionary hi_dict

@ @- @2 @3 a a: aI aU
e E e: E: E~ i I i:
l- o O o: O: o~ O~ r-
U u: u~ V
@ @2 @3 a a: aI aU E
e: E: E~ I i: O o: O:
O~ r- U u: V

- : b c ch d d. f
g h H j J k kh l
m n N n. n^ p ph Q
q r s S s. t T t.
t.h th v w x z
: b c ch d d. f g
h H j J k kh l m
n N n. n^ p ph Q q
r s S s. t T t. t.h
th v w x z


Dictionary hr_dict
@@ -155,12 +154,12 @@ tS; v x z Z
Dictionary hu_dict

A a: E e: i i: o o:
u u: Y y y: Y:
u u: Y Y:

: b c C d dZ f g
h j J k l l^ m n
N n^ p R R2 s S s2
t tS ts v z Z
: b c d dZ f g h
j J k l m n n^ p
R R2 s S s2 t tS ts
v z Z


Dictionary it_dict
@@ -288,7 +287,7 @@ Dictionary pt_dict

& &/ &U~ &~ @ @- a A
aI aU e E eI EI eU EU
i i/ iU i~ o O oI OI
e~ i i/ iU o O oI OI
o~ u U uI u~ y

* : ; b C d dZ f
@@ -300,17 +299,14 @@ w x z Z

Dictionary ro_dict

@ @- @I @U a aI aU e
ea eI eo eU i i/ I^ iI
iU o O Oa oI oU u uI
y Y yI yU
@ @- @I a aI aU e ea
eI eU i I^ iI o Oa oI
oU u uI y

* *; b b; c C d d;
dZ f f; g h j k l
l; m m; n N n; p p;
r s S S; t T t; tS
ts ts; v v; w w2 x z
Z z; Z;
* *; b c d dZ f g
h j k l m m; n p
r s S S; t tS ts ts;
v w w2 x z Z


Dictionary is_dict
@@ -337,13 +333,16 @@ h j k l m n N p
r R s t w z


Dictionary grc_dict



Dictionary mk_dict

& @ @- @2 a A a: E
e e: E~ i I i: l- o
o: oU r- u u: y
& @ @- @2 a E e i
I o r- u

* b d dZ dZ; f g h
j k k^ l l^ m n N
n^ p r R s S t tS
ts tS; v x z Z
* b d dZ dZ; f g j
k k^ l l^ m n n^ p
r R s S t tS ts v
x z Z

+ 13
- 0
dictsource/hi_rules View File

@@ -10,6 +10,19 @@
// ?3 use diphthong for "au"


.replace
० 0
१ 1
२ 2
३ 3
४ 4
५ 5
६ 6
७ 7
८ 8
९ 9


// Vowels

.group अ

+ 6
- 0
dictsource/hu_rules View File

@@ -2,6 +2,12 @@
// This file is UTF-8 encoded


.replace
// allow o,u-circumflex for o,u-double-acute
ô ő
û ű


.group a
a A
_) a (_ %A

+ 36
- 0
dictsource/mk_rules View File

@@ -1,6 +1,42 @@
// translation rules for Macedonian
// This file is UTF-8 encoded

.replace
a а
b б
c ц
ć ћ
č ч
dž џ
dz ѕ
d д
đ ђ
e е
f ф
g г
h х
i и
j ј
k к
lj љ
l л
m м
nj њ
n н
o о
p п
r р
s с
š ш
t т
u у
v в
z з
ž ж
đ ѓ
ć ќ


.group а
а a


+ 4
- 0
dictsource/ro_rules View File

@@ -3,6 +3,10 @@
// This file is UTF-8 encoded


// replace s-comma, t-comma by s-cedilla, t-cedilla
.replace
ș ş
ț ţ


.group a

+ 21
- 19
phsource/compile_report View File

@@ -1,7 +1,7 @@
39 phoneme tables
40 phoneme tables
new total
base 96 96
base2 23 114
base2 24 115
en 53 144
en_n 30 144
en_us 37 144
@@ -25,13 +25,14 @@
mk 3 130
sr 2 129
ru 38 126
it 17 117
it 17 118
la 21 114
es 6 114
pt 28 131
es 6 115
pt 27 131
pt_pt 20 131
ro 36 138
el 8 114
ro 36 139
el 8 115
grc 7 120
sv 25 118
no 28 122
is 32 121
@@ -215,7 +216,7 @@
30 r/trr base af de fi nl ru ro sv sw
11 r/xr base
2 ufric/ch base de
3 ufric/f base de ro
4 ufric/f base de ro grc
2 ufric/f_ base ro
5 ufric/h_ base fi hi la
6 ufric/h@ base fi hi la
@@ -272,7 +273,7 @@
8 ustop/ts_pzd base hi ru
2 ustop/ts_pzd_ hi hu
2 ustop/ts_pzd2 hi hu
2 vdiph/0i pt vi
3 vdiph/0i pt grc vi
3 vdiph/0i_2 en_sc no en_wi
3 vdiph2/e@ en_sc en_wi
1 vdiph2/ea ro
@@ -323,10 +324,10 @@
1 vdiph/au_3 en_rp
6 vdiph/au_4 base2 cy eo sk it is
1 vdiph/ee-e hi
5 vdiph/eei en de nl pt vi
5 vdiph/eei base2 en de nl vi
3 vdiph/eei_2 en_us eo fi
2 vdiph/eei_3 en_rp sk
3 vdiph/eeu pt vi zhy
4 vdiph/eeu pt grc vi zhy
2 vdiph/eeu_2 la pt_pt
2 vdiph/eeu_3 en_n en_wm
1 vdiph/eey fi
@@ -346,7 +347,7 @@
9 vdiph/ooi en en_n en_us cy eo fi no zhy
1 vdiph/ooi_2 af
2 vdiph/ooi_3 en_rp en_wm
1 vdiph/oou cs
2 vdiph/oou cs grc
2 vdiph/ou fi zhy
2 vdiph/ou_2 sk ro
2 vdiph/ou_3 is
@@ -363,7 +364,7 @@
1 vdiph/Vu_2 en_wm
1 vdiph/Vu_3 nl
2 vdiph/&y fi nl
2 vdiph/yi fi no
3 vdiph/yi fi grc no
1 vdiph/y#i fi
1 vdiph/y#i_2 is
1 vdiph/yi_fr fr
@@ -416,7 +417,7 @@
4 vowel/& en_rp fi hi sv
4 vowel/0 base2 en hi pt
3 vowel/0_2 en_n pt_pt sw
5 vowel/0_3 en_us en_sc en_rp en_wm hu
4 vowel/0_3 en_us en_sc en_rp hu
2 vowel/@_2 fr
2 vowel/&_2 en_us
6 vowel/@_3 en_sc de hi
@@ -480,7 +481,7 @@
2 vowel/ii_6 en_wm
1 vowel/ii_en en
10 vowel/@_low en_rp hi ro no
12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi
10 vowel/o base2 en de hi it la pt_pt sv en_wi
4 vowel/o_2 cy hi hu no
2 vowel/o-_2 en_n en_wm
3 vowel/o_3 en_sc fr
@@ -493,10 +494,10 @@
1 vowel/oe_4 sv
2 vowel/o_mid fr hu
12 vowel/oo en_sc de eo la es el sv no zhy en_wi
10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi
12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi
3 vowel/oo_2 en_sc cy cs
1 vowel/oo_3 af
5 vowel/oo_4 hi pl it en_wi
6 vowel/oo_4 en_wm hi pl it en_wi
1 vowel/oo_5 is
6 vowel/oo_en en en_n en_rp
2 vowelr/aa_r en_sc
@@ -538,11 +539,11 @@
1 vowel/yy fr_ca
1 vowel/yy_2 no
1 vowel/yy_3 sv
5 vowel/yy_4 de hu la is
6 vowel/yy_4 de hu la grc is
1 vwl_af/@ af
1 vwl_af/I af
2 vwl_af/r@ af
6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm
5 vwl_en/aI@ en en_n en_us en_sc en_rp
2 vwl_en/aI@_2 en_sc
5 vwl_en/aU@ en en_n en_us en_sc en_wm
12 vwl_en/@L en en_us en_sc en_rp en_wm af
@@ -550,6 +551,7 @@
1 vwl_en_n/aa_5 en_n
2 vwl_en_n/O@ en_n
1 vwl_en_n/u_ en_n
1 vwl_en/ooi@ en_wm
3 vwl_en_rp/aa en_rp
1 vwl_en_rp/aU@ en_rp
1 vwl_en_rp/e@ en_rp

+ 1
- 0
phsource/mbrola/en1 View File

@@ -27,3 +27,4 @@
0 @- NULL 0 NULL
0 aI@ NULL 60 aI @
0 aU@ NULL 75 aU @
0 x NULL 0 k

+ 1
- 0
phsource/mbrola/us View File

@@ -12,6 +12,7 @@
0 w/ NULL 0 w
0 ; NULL 0 NULL
0 g- NULL 0 NULL
0 x NULL 0 k_h
0 @- NULL 0 NULL
0 aI@ NULL 60 AI r
0 aU@ NULL 75 aU r

+ 1
- 0
phsource/mbrola/us3 View File

@@ -12,6 +12,7 @@
0 w/ NULL 0 w
0 ; NULL 0 NULL
0 g- NULL 0 NULL
0 x NULL 0 k
0 @- NULL 0 NULL
0 aI@ NULL 60 AI r
0 aU@ NULL 75 aU r

+ 7
- 0
phsource/ph_base2 View File

@@ -107,6 +107,13 @@ phoneme eI
endphoneme


phoneme EI
vowel starttype (e) endtype (i)
length 230
formants vdiph/eei
endphoneme


phoneme oI
vowel starttype (o) endtype (i)
length 240

+ 2
- 2
phsource/ph_english_sc View File

@@ -92,14 +92,14 @@ endphoneme


phoneme I
vowel starttype (e) endtype (e)
vowel starttype (@) endtype (@)
length 110
formants vowel/e#
before l/2 vowel/@_3-30+l/L2_@L
endphoneme

phoneme I2
vowel starttype (e) endtype (e)
vowel starttype (@) endtype (@)
unstressed
length 110
formants vowel/e#

+ 5
- 5
phsource/ph_english_wm View File

@@ -62,7 +62,7 @@ endphoneme
phoneme 0
vowel starttype (o) endtype (o)
length 140
formants vowel/0_3
formants vowel/oo_4
reduceto @ 0
endphoneme

@@ -146,7 +146,7 @@ endphoneme
phoneme O@
vowel starttype (o) endtype (@)
length 240
formants vowel/o
formants vowel/oo_1
linkout r-
endphoneme

@@ -154,7 +154,7 @@ endphoneme
phoneme O
vowel starttype (o) endtype (o)
length 150
formants vowel/o
formants vowel/oo_1
reduceto @ 0
endphoneme

@@ -189,7 +189,7 @@ endphoneme

phoneme eI
vowel starttype (@) endtype (i)
length 210
length 230
formants vdiph/@i_3
endphoneme

@@ -228,7 +228,7 @@ endphoneme
phoneme aI@
vowel starttype (a) endtype (@)
length 270
formants vwl_en/aI@
formants vwl_en/ooi@
linkout r-
endphoneme


+ 44
- 0
phsource/ph_greek_ancient View File

@@ -0,0 +1,44 @@

//====================================================
// Ancient Greek - based on base2
//====================================================

phoneme y
vowel starttype (i) endtype (i)
length 160
formants vowel/yy_4
endphoneme

phoneme EU
vowel starttype (e) endtype (u)
length 230
formants vdiph/eeu
endphoneme

phoneme OI
vowel starttype (o) endtype (i)
length 230
formants vdiph/0i
endphoneme

phoneme OU
vowel starttype (o) endtype (u)
length 230
formants vdiph/oou
endphoneme


phoneme yI
vowel starttype (i) endtype (i)
length 230
formants vdiph/yi
endphoneme


phoneme f // consider this an affrictive
vls blb afr
vowelout f1=0 f2=1000 -500 -350 f3=-200 80
lengthmod 2
wave ufric/f // could replace this with a [pf] wav file
endphoneme


+ 0
- 7
phsource/ph_pt_brazil View File

@@ -138,13 +138,6 @@ phoneme eI
endphoneme


phoneme EI
vowel starttype (e) endtype (i)
length 230
formants vdiph/eei
endphoneme


phoneme OI
vowel starttype (o) endtype (i)
length 230

+ 3
- 0
phsource/phonemes View File

@@ -1218,6 +1218,9 @@ include ph_romanian
phonemetable el base2
include ph_greek

phonemetable grc base2
include ph_greek_ancient

phonemetable sv base
include ph_swedish


BIN
phsource/vdiph2/i@_2 View File


BIN
phsource/vowel/e# View File


BIN
phsource/vowel/i_2 View File


BIN
phsource/vowelr/aa_r View File


BIN
phsource/vwl_en/ooi@ View File


+ 96
- 31
src/compiledict.cpp View File

@@ -176,7 +176,6 @@ int compile_line(char *linebuf, char *dict_line, int *hash)
char encoded_ph[200];
unsigned char bad_phoneme[4];
p = linebuf;
comment = NULL;
phonetic = word = "";
@@ -347,7 +346,8 @@ int compile_line(char *linebuf, char *dict_line, int *hash)
if((word[0] & 0x80)==0) // 7 bit ascii only
{
// 1st letter - need to consider utf8 here
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii)
// ??? need to consider utf8 here
word[0] = tolower(word[0]);
}

@@ -789,8 +789,6 @@ char *compile_rule(char *input)
for(ix=0; finish==0; ix++)
{
c = input[ix];
if((c=='/') && (input[ix+1]=='/'))
c = input[ix] = '\n'; // treat command as end of line

switch(c = input[ix])
{
@@ -1167,6 +1165,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
int different;
char *prev_rgroup_name;
unsigned int char_code;
int compile_mode=0;
char *buf;
char buf1[120];
char *rules[N_RULES];
@@ -1184,7 +1183,13 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{
linenum++;
buf = fgets(buf1,sizeof(buf1),f_in);
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n
if(buf != NULL)
{
if((p = (unsigned char *)strstr(buf,"//")) != NULL)
*p = 0;

if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
}

if((buf != NULL) && (memcmp(buf,".L",2)==0))
{
@@ -1196,7 +1201,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
continue;
}

if((buf == NULL) || (memcmp(buf,".group",6)==0))
if((buf == NULL) || (buf[0] == '.'))
{
// next .group or end of file, write out the previous group

@@ -1212,46 +1217,106 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
}
n_rules = 0;

if(buf == NULL) break; // end of file
if(compile_mode == 2)
{
// end of the character replacements section
fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list
}

p = (unsigned char *)&buf[6];
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE !
ix = 0;
while((*p > ' ') && (ix<12))
group_name[ix++] = *p++;
group_name[ix]=0;
if(buf == NULL) break; // end of file

if(sscanf(group_name,"0x%x",&char_code)==1)
if(memcmp(buf,".replace",8)==0)
{
// group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name;
compile_mode = 2;
fputc(RULE_GROUP_START,f_out);
fputc(RULE_REPLACEMENTS,f_out);

if(char_code > 0x100)
{
*p++ = (char_code >> 8);
}
*p++ = char_code;
*p = 0;
// advance to next word boundary
while((ftell(f_out) & 3) != 0)
fputc(0,f_out);
}

if(strlen(group_name) > 2)
if(memcmp(buf,".group",6)==0)
{
if(utf8_in(&c,group_name,0) < 2)
compile_mode = 1;

p = (unsigned char *)&buf[6];
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE !
ix = 0;
while((*p > ' ') && (ix<12))
group_name[ix++] = *p++;
group_name[ix]=0;
if(sscanf(group_name,"0x%x",&char_code)==1)
{
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++;
// group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name;
if(char_code > 0x100)
{
*p++ = (char_code >> 8);
}
*p++ = char_code;
*p = 0;
}
if(strlen(group_name) > 2)
{
if(utf8_in(&c,group_name,0) < 2)
{
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++;
}
group_name[2] = 0;
}

group_name[2] = 0;
}

continue;
}
prule = compile_rule(buf);
if((prule != NULL) && (n_rules < N_RULES))
switch(compile_mode)
{
rules[n_rules++] = prule;
case 1: // .group
prule = compile_rule(buf);
if((prule != NULL) && (n_rules < N_RULES))
{
rules[n_rules++] = prule;
}
break;

case 2: // .replace
{
int replace1;
int replace2;
char *p;

p = buf;
replace1 = 0;
replace2 = 0;
while(isspace2(*p)) p++;
ix = 0;
while((unsigned char)(*p) > 0x20) // not space or zero-byte
{
p += utf8_in(&c,p,0);
replace1 += (c << ix);
ix += 16;
}
while(isspace2(*p)) p++;
ix = 0;
while((unsigned char)(*p) > 0x20)
{
p += utf8_in(&c,p,0);
replace2 += (c << ix);
ix += 16;
}
if(replace1 != 0)
{
fwrite(&replace1,1,4,f_out);
fwrite(&replace2,1,4,f_out);
}
}
break;
}
}
fclose(f_temp);

+ 13
- 3
src/dictionary.cpp View File

@@ -205,9 +205,9 @@ void Translator::InitGroups(void)
int ix;
char *p;
char *p_name;
unsigned int *pw;
unsigned char c, c2;
int len;
int rule_count;

n_groups2 = 0;
for(ix=0; ix<256; ix++)
@@ -228,6 +228,18 @@ void Translator::InitGroups(void)
}
p++;

if(p[0] == RULE_REPLACEMENTS)
{
pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary
langopts.replace_chars = pw;
while(pw[0] != 0)
{
pw += 2; // find the end of the replacement list, each entry is 2 words.
}
p = (char *)(pw+1);
continue;
}

if(p[0] == RULE_LETTERGP2)
{
ix = p[1] - 'A';
@@ -266,11 +278,9 @@ void Translator::InitGroups(void)
}

// skip over all the rules in this group
rule_count = 0;
while(*p != RULE_GROUP_END)
{
p += (strlen(p) + 1);
rule_count++;
}
p++;
}

+ 2
- 0
src/extras.cpp View File

@@ -998,6 +998,8 @@ void ConvertToUtf8()

} // end of ConvertToItf8

//******************************************************************************************************



//#define calcspeedtab

+ 9
- 7
src/numbers.cpp View File

@@ -354,12 +354,14 @@ int Translator::LookupNum2(int value, int control, char *ph_out)
if(langopts.numbers & 0x200)
{
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
ix = strlen(ph_tens)-1;
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;

if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
ph_tens[ix] = 0;
if((ix = strlen(ph_tens)-1) >= 0)
{
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
ph_tens[ix] = 0;
}
}
sprintf(ph_out,"%s%s",ph_tens,ph_digits);
}
@@ -793,7 +795,7 @@ int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags,
decimal_point = 0;
}
}
if(ph_out[0] != 0)
if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
{
int next_char;
utf8_in(&next_char,&word[n_digits+1],0);

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.29.10 16.Oct.07";
const char *version_string = "1.29.11 23.Oct.07";
const int version_phdata = 0x012901;

int option_device_number = -1;

+ 44
- 30
src/tr_languages.cpp View File

@@ -38,6 +38,7 @@


#define L_qa 0x716100
#define L_grc 0x677263 // grc Ancient Greek


#define OFFSET_GREEK 0x380
@@ -45,19 +46,41 @@
#define OFFSET_DEVANAGARI 0x900


static const unsigned int replace_cyrillic[] =
{0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452,
0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459,
0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441,
0x448,0x442,0x443,0x432,0x437,0x436,
0x453,0x45c,0}; // ѓ ѕ ќ

static const unsigned int replace_cyrillic_latin[] =
{'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111,
'e','f','g','h','i','j','k','l'+('j'<<16),
'l','m','n'+('j'<<16),'n','o','p','r','s',
0x161,'t','u','v','z',0x17e,
0x111,0x107,0};
static const unsigned int replace_cyrillic_latin[] =
{0x430,'a',
0x431,'b',
0x446,'c',
0x45b,0x107,
0x447,0x10d,
0x45f,'d'+(0x17e<<16),
0x455,'d'+('z'<<16),
0x434,'d',
0x452,0x111,
0x435,'e',
0x444,'f',
0x433,'g',
0x445,'h',
0x438,'i',
0x458,'j',
0x43a,'k',
0x459,'l'+('j'<<16),
0x43b,'l',
0x43c,'m',
0x45a,'n'+('j'<<16),
0x43d,'n',
0x43e,'o',
0x43f,'p',
0x440,'r',
0x441,'s',
0x448,0x161,
0x442,'t',
0x443,'u',
0x432,'v',
0x437,'z',
0x436,0x17e,
0x453,0x111,
0x45c,0x107,
0}; // ѓ ѕ ќ


void SetupTranslator(Translator *tr, int *lengths, int *amps)
@@ -132,6 +155,7 @@ Translator *SelectTranslator(const char *name)
break;

case L('e','l'): // Greek
case L_grc: // Ancient Greek
{
static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300};
static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable
@@ -167,6 +191,12 @@ Translator *SelectTranslator(const char *name)

tr->langopts.numbers = 0xb09;
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands

if(name2 == L_grc)
{
// ancient greek
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1;
}
}
break;

@@ -259,8 +289,6 @@ Translator *SelectTranslator(const char *name)
case L('h','i'):
{
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f};
static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9
static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0};
static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250};
static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 };

@@ -274,8 +302,6 @@ Translator *SelectTranslator(const char *name)
tr->langopts.numbers = 0x811;
tr->langopts.numbers2 = 0x100;
tr->letter_bits_offset = OFFSET_DEVANAGARI;
tr->langopts.replace_chars = replace_chars_hi;
tr->langopts.replacement_chars = replacement_chars_hi;

memset(tr->letter_bits,0,sizeof(tr->letter_bits));
SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters
@@ -306,8 +332,7 @@ Translator *SelectTranslator(const char *name)

tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC;
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards
tr->langopts.replace_chars = replace_cyrillic;
tr->langopts.replacement_chars = replace_cyrillic_latin;
tr->langopts.replace_chars = replace_cyrillic_latin;

SetLetterVowel(tr,'y');
SetLetterVowel(tr,'r');
@@ -319,14 +344,10 @@ Translator *SelectTranslator(const char *name)
{
static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 };
static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220};
static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0};
static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute

tr = new Translator();
SetupTranslator(tr,stress_lengths_hu,stress_amps_hu);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.replace_chars = replace_chars_hu;
tr->langopts.replacement_chars = replacement_chars_hu;

tr->langopts.vowel_pause = 0x20;
tr->langopts.stress_rule = 0;
@@ -417,9 +438,6 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 4; // antipenultimate
tr->langopts.numbers = 0x0c29 + 0x4000;
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards

tr->langopts.replace_chars = replace_cyrillic_latin;
tr->langopts.replacement_chars = replace_cyrillic;
}
break;

@@ -495,8 +513,6 @@ SetLengthMods(tr,3); // all equal
{
static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260};
static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 };
static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0};
static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla

tr = new Translator();
SetupTranslator(tr,stress_lengths_ro,stress_amps_ro);
@@ -505,8 +521,6 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_flags = 0x100 + 0x6;

tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.replace_chars = replace_chars_ro;
tr->langopts.replacement_chars = replacement_chars_ro;
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
}

+ 19
- 9
src/translate.cpp View File

@@ -665,6 +665,10 @@ if((wmark > 0) && (wmark < 8))
if(!found && iswdigit(first_char))
{
Lookup("_0lang",word_phonemes);
if(word_phonemes[0] == phonSWITCH)
return(0);

found = TranslateNumber(word,phonemes,&dictionary_flags,wflags);
}

@@ -1560,16 +1564,14 @@ static int EmbeddedCommand(unsigned int &source_index)
} // end of EmbeddedCommand



int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
{//=====================================================================================================
// To allow language specific examination and replacement of characters

int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert)
{//==================================================================================
int ix;
unsigned int word;
unsigned int new_c, c2, c_lower;
int upper_case = 0;
static int ignore_next = 0;
const unsigned int *replace_chars;

if(ignore_next)
{
@@ -1578,7 +1580,7 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i
}
if(c == 0) return(0);

if(langopts.replace_chars == NULL)
if((replace_chars = tr->langopts.replace_chars) == NULL)
return(c);

// there is a list of character codes to be substituted with alternative codes
@@ -1590,18 +1592,18 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i
}

new_c = 0;
for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++)
for(ix=0; (word = replace_chars[ix]) != 0; ix+=2)
{
if(c_lower == (word & 0xffff))
{
if((word >> 16) == 0)
{
new_c = langopts.replacement_chars[ix];
new_c = replace_chars[ix+1];
break;
}
if((word >> 16) == (unsigned int)tolower(next_in))
{
new_c = langopts.replacement_chars[ix];
new_c = replace_chars[ix+1];
ignore_next = 1;
break;
}
@@ -1625,6 +1627,14 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i
if(upper_case)
new_c = towupper(new_c);
return(new_c);

}


int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
{//=====================================================================================================
// To allow language specific examination and replacement of characters
return(SubstituteChar(this,c,next_in,insert));
}



+ 3
- 2
src/translate.h View File

@@ -118,6 +118,7 @@
#define RULE_LETTERGP 17 // A B C H F G Y letter group number
#define RULE_LETTERGP2 18 // L + letter group number
#define RULE_CAPITAL 19 // word starts with a capital letter
#define RULE_REPLACEMENTS 20 // section for character replacements
#define RULE_NO_SUFFIX 24 // N
#define RULE_NOTVOWEL 25 // K
#define RULE_IFVERB 26 // V
@@ -278,6 +279,7 @@ typedef struct {

#define NUM_ROMAN 0x20000
#define NUM_ROMAN_UC 0x40000

// bits0-1=which numbers routine to use.
// bit2= thousands separator must be space
// bit3= , decimal separator, not .
@@ -294,7 +296,6 @@ typedef struct {
// bit16=dot after number indicates ordinal
// bit17=recognize roman numbers
// bit18=Roman numbers only if upper case

int numbers;

// bits 1-4 use variant form of numbers before thousands,millions,etc.
@@ -302,6 +303,7 @@ typedef struct {
// bit7=(LANG-ru) use MB for 1 thousand, million, etc
// bit8=(LANG=sw) special word for 100,000s
int numbers2;

int max_roman;
int thousands_sep;
int decimal_sep;
@@ -314,7 +316,6 @@ typedef struct {
char ideographs; // treat as separate words
int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2"
const unsigned int *replace_chars; // characters to be substitutes
const unsigned int *replacement_chars; // substitutes for replace_chars
} LANGUAGE_OPTIONS;



Loading…
Cancel
Save