Browse Source

[1.29.11] Move character substitution feature to language *_rules files.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 17 years ago
parent
commit
5bdf9d63ab

+ 23
- 10
dictsource/af_list View File

brittanje $2 brittanje $2
ceylon s@lOn ceylon s@lOn
china S'ina china S'ina
(graaff reinet) x2rA:f||r@n'Et
italië it'A:li;@
jerusalem j@r'ysalEm jerusalem j@r'ysalEm
kanada kanad%a kanada kanad%a
knysna neIsna
kongo kONgu kongo kONgu
mesopotamië mEsuput'A:mi@ mesopotamië mEsuput'A:mi@
mosambiek musamb'ik mosambiek musamb'ik
potchefstroom pOtSIfstr'o@m
(thaba nchu) tab'A:||ntSu (thaba nchu) tab'A:||ntSu
europa Y@r'o@pa europa Y@r'o@pa
outeniekwa @Ut@n'ikwa
portugal $1 portugal $1
potchefstroom pOtSIfstr'o@m
pretoria prit'o@ria pretoria prit'o@ria
stellenbosch st%&l@mbOs stellenbosch st%&l@mbOs
tunisië $2
turkye $2
upington apiNt@n upington apiNt@n
worcester v'ust@r worcester v'ust@r
zimbabwe zI:mb'ab_wE

zimbabwe zI:mb'ab_wE






botha bo@ta botha bo@ta
breytenbach breIt@nbax2 breytenbach breIt@nbax2
carl kA:r@L carl kA:r@L
cecilia s@si:lia
charles _^_EN charles _^_EN
charlie tSA:li charlie tSA:li
chopin S%OpA~ chopin S%OpA~
chris krIs chris krIs
christo krIstu christo krIstu
christian krIstian christian krIstian
christelle kr@st&l
christine kr@st'i:n christine kr@st'i:n
coetzee kuts'e@ coetzee kuts'e@
cronjé krOnj'e@ cronjé krOnj'e@
debora d@bo@ra debora d@bo@ra
debussy d@bus'i:
der d@r der d@r
deventer d'e@v@nt@r deventer d'e@v@nt@r
du $u du $u
martin m'A:rt@n martin m'A:rt@n
michelle miS'&l michelle miS'&l
michiel $2 michiel $2
miriam mIri@m
mostert m'Ost@rt mostert m'Ost@rt
mozart mo@tsart mozart mo@tsart
naomi na_'o@mi naomi na_'o@mi
paul p@Ul paul p@Ul
paulus p@UlWs paulus p@UlWs
philip fIl@p philip fIl@p
phillips fIl@ps
pierre p'e@r pierre p'e@r
petrus pe@trWs petrus pe@trWs
phoebe fi:bi phoebe fi:bi
rachmaninoff rax2m'aninOf rachmaninoff rax2m'aninOf
rebekka r@bEka rebekka r@bEka
renée r@neI
retief r@tif
ronel run'&l ronel run'&l
rousseau r@s@U rousseau r@s@U
roux r'u roux r'u
viljoen $2 viljoen $2
villiers vIli@rs villiers vIli@rs
violet _^_EN $capital violet _^_EN $capital
william _^_EN
marais mar'E: marais mar'E:
mandela mand'E:la mandela mand'E:la
celliers sIlj'e@ celliers sIlj'e@
tensy $2 $pause tensy $2 $pause
aangesien $pause aangesien $pause
wie $pause wie $pause
wanneer $pause
wanneer $1 $pause
waar $pause waar $pause
waarom $pause waarom $pause
waarheen $pause waarheen $pause


// main word list // main word list


aanbid $2
aanstaande $2 aanstaande $2
adagio ad'A:dZi;%@U adagio ad'A:dZi;%@U
algaande $2 algaande $2
hoofsaaklik $2 hoofsaaklik $2


idee $2 idee $2
ietwat itvat
ignoreer ix2no@r'e@r ignoreer ix2no@r'e@r
inagneming @nax2ne@m@N inagneming @nax2ne@m@N
inkluis $2 inkluis $2
intensiteit $4 intensiteit $4


jawoord jA:vo@rt jawoord jA:vo@rt
jupiter jupit@r


kafee kaf'e@ kafee kaf'e@
kapitaal kapit'A:l
kaviaar kavi'A:r
komaan kOmA:n komaan kOmA:n
komberse kOmb'&rs@ komberse kOmb'&rs@
kombers kOmb'&rs kombers kOmb'&rs
kopiereg kup'ir&x2 kopiereg kup'ir&x2
kritiek krIt'ik kritiek krIt'ik
kultuur $2
kulture $2
kunsmatige kWnsm'A:t@x2@ kunsmatige kWnsm'A:t@x2@
kwansuis $2 kwansuis $2


macaroni makar'o@ni macaroni makar'o@ni
madame mad'A:m madame mad'A:m
makabere m%ak'A:b@r@ makabere m%ak'A:b@r@
maskering mask'e@r@N $only
meegee me@x2e@ meegee me@x2e@
memoriam mEm'o@riam memoriam mEm'o@riam
merlot m&rl'o@ merlot m&rl'o@
uiteraard Yyt@r_'A:rt uiteraard Yyt@r_'A:rt


vaarwel fA:rv'&l vaarwel fA:rv'&l
vakant f%ak'ant
vakante f%ak'ant@
vanaf fanaf vanaf fanaf
vandat $1 vandat $1
vanne fan@ vanne fan@
ver f'&:r ver f'&:r
verg f&rx2 verg f&rx2
vergete f@rx2e@t@ vergete f@rx2e@t@
vergewe f@rx2e@v@
vermy $2 vermy $2
verre f&:r@ verre f&:r@
verregaande f&r@x2'A:nd@ verregaande f&r@x2'A:nd@

+ 193
- 49
dictsource/af_rules
File diff suppressed because it is too large
View File


+ 31
- 32
dictsource/dict_phonemes View File



Dictionary hi_dict Dictionary hi_dict


@ @- @2 @3 a a: aI aU
e E e: E: E~ i I i:
l- o O o: O: o~ O~ r-
U u: u~ V
@ @2 @3 a a: aI aU E
e: E: E~ I i: O o: O:
O~ r- U u: V


- : b c ch d d. f
g h H j J k kh l
m n N n. n^ p ph Q
q r s S s. t T t.
t.h th v w x z
: b c ch d d. f g
h H j J k kh l m
n N n. n^ p ph Q q
r s S s. t T t. t.h
th v w x z




Dictionary hr_dict Dictionary hr_dict
Dictionary hu_dict Dictionary hu_dict


A a: E e: i i: o o: A a: E e: i i: o o:
u u: Y y y: Y:
u u: Y Y:


: b c C d dZ f g
h j J k l l^ m n
N n^ p R R2 s S s2
t tS ts v z Z
: b c d dZ f g h
j J k l m n n^ p
R R2 s S s2 t tS ts
v z Z




Dictionary it_dict Dictionary it_dict


& &/ &U~ &~ @ @- a A & &/ &U~ &~ @ @- a A
aI aU e E eI EI eU EU aI aU e E eI EI eU EU
i i/ iU i~ o O oI OI
e~ i i/ iU o O oI OI
o~ u U uI u~ y o~ u U uI u~ y


* : ; b C d dZ f * : ; b C d dZ f


Dictionary ro_dict Dictionary ro_dict


@ @- @I @U a aI aU e
ea eI eo eU i i/ I^ iI
iU o O Oa oI oU u uI
y Y yI yU
@ @- @I a aI aU e ea
eI eU i I^ iI o Oa oI
oU u uI y


* *; b b; c C d d;
dZ f f; g h j k l
l; m m; n N n; p p;
r s S S; t T t; tS
ts ts; v v; w w2 x z
Z z; Z;
* *; b c d dZ f g
h j k l m m; n p
r s S S; t tS ts ts;
v w w2 x z Z




Dictionary is_dict Dictionary is_dict
r R s t w z r R s t w z




Dictionary grc_dict



Dictionary mk_dict Dictionary mk_dict


& @ @- @2 a A a: E
e e: E~ i I i: l- o
o: oU r- u u: y
& @ @- @2 a E e i
I o r- u


* b d dZ dZ; f g h
j k k^ l l^ m n N
n^ p r R s S t tS
ts tS; v x z Z
* b d dZ dZ; f g j
k k^ l l^ m n n^ p
r R s S t tS ts v
x z Z

+ 13
- 0
dictsource/hi_rules View File

// ?3 use diphthong for "au" // ?3 use diphthong for "au"




.replace
० 0
१ 1
२ 2
३ 3
४ 4
५ 5
६ 6
७ 7
८ 8
९ 9


// Vowels // Vowels


.group अ .group अ

+ 6
- 0
dictsource/hu_rules View File

// This file is UTF-8 encoded // This file is UTF-8 encoded




.replace
// allow o,u-circumflex for o,u-double-acute
ô ő
û ű


.group a .group a
a A a A
_) a (_ %A _) a (_ %A

+ 36
- 0
dictsource/mk_rules View File

// translation rules for Macedonian // translation rules for Macedonian
// This file is UTF-8 encoded // This file is UTF-8 encoded


.replace
a а
b б
c ц
ć ћ
č ч
dž џ
dz ѕ
d д
đ ђ
e е
f ф
g г
h х
i и
j ј
k к
lj љ
l л
m м
nj њ
n н
o о
p п
r р
s с
š ш
t т
u у
v в
z з
ž ж
đ ѓ
ć ќ


.group а .group а
а a а a



+ 4
- 0
dictsource/ro_rules View File

// This file is UTF-8 encoded // This file is UTF-8 encoded




// replace s-comma, t-comma by s-cedilla, t-cedilla
.replace
ș ş
ț ţ




.group a .group a

+ 21
- 19
phsource/compile_report View File

39 phoneme tables
40 phoneme tables
new total new total
base 96 96 base 96 96
base2 23 114
base2 24 115
en 53 144 en 53 144
en_n 30 144 en_n 30 144
en_us 37 144 en_us 37 144
mk 3 130 mk 3 130
sr 2 129 sr 2 129
ru 38 126 ru 38 126
it 17 117
it 17 118
la 21 114 la 21 114
es 6 114
pt 28 131
es 6 115
pt 27 131
pt_pt 20 131 pt_pt 20 131
ro 36 138
el 8 114
ro 36 139
el 8 115
grc 7 120
sv 25 118 sv 25 118
no 28 122 no 28 122
is 32 121 is 32 121
30 r/trr base af de fi nl ru ro sv sw 30 r/trr base af de fi nl ru ro sv sw
11 r/xr base 11 r/xr base
2 ufric/ch base de 2 ufric/ch base de
3 ufric/f base de ro
4 ufric/f base de ro grc
2 ufric/f_ base ro 2 ufric/f_ base ro
5 ufric/h_ base fi hi la 5 ufric/h_ base fi hi la
6 ufric/h@ base fi hi la 6 ufric/h@ base fi hi la
8 ustop/ts_pzd base hi ru 8 ustop/ts_pzd base hi ru
2 ustop/ts_pzd_ hi hu 2 ustop/ts_pzd_ hi hu
2 ustop/ts_pzd2 hi hu 2 ustop/ts_pzd2 hi hu
2 vdiph/0i pt vi
3 vdiph/0i pt grc vi
3 vdiph/0i_2 en_sc no en_wi 3 vdiph/0i_2 en_sc no en_wi
3 vdiph2/e@ en_sc en_wi 3 vdiph2/e@ en_sc en_wi
1 vdiph2/ea ro 1 vdiph2/ea ro
1 vdiph/au_3 en_rp 1 vdiph/au_3 en_rp
6 vdiph/au_4 base2 cy eo sk it is 6 vdiph/au_4 base2 cy eo sk it is
1 vdiph/ee-e hi 1 vdiph/ee-e hi
5 vdiph/eei en de nl pt vi
5 vdiph/eei base2 en de nl vi
3 vdiph/eei_2 en_us eo fi 3 vdiph/eei_2 en_us eo fi
2 vdiph/eei_3 en_rp sk 2 vdiph/eei_3 en_rp sk
3 vdiph/eeu pt vi zhy
4 vdiph/eeu pt grc vi zhy
2 vdiph/eeu_2 la pt_pt 2 vdiph/eeu_2 la pt_pt
2 vdiph/eeu_3 en_n en_wm 2 vdiph/eeu_3 en_n en_wm
1 vdiph/eey fi 1 vdiph/eey fi
9 vdiph/ooi en en_n en_us cy eo fi no zhy 9 vdiph/ooi en en_n en_us cy eo fi no zhy
1 vdiph/ooi_2 af 1 vdiph/ooi_2 af
2 vdiph/ooi_3 en_rp en_wm 2 vdiph/ooi_3 en_rp en_wm
1 vdiph/oou cs
2 vdiph/oou cs grc
2 vdiph/ou fi zhy 2 vdiph/ou fi zhy
2 vdiph/ou_2 sk ro 2 vdiph/ou_2 sk ro
2 vdiph/ou_3 is 2 vdiph/ou_3 is
1 vdiph/Vu_2 en_wm 1 vdiph/Vu_2 en_wm
1 vdiph/Vu_3 nl 1 vdiph/Vu_3 nl
2 vdiph/&y fi nl 2 vdiph/&y fi nl
2 vdiph/yi fi no
3 vdiph/yi fi grc no
1 vdiph/y#i fi 1 vdiph/y#i fi
1 vdiph/y#i_2 is 1 vdiph/y#i_2 is
1 vdiph/yi_fr fr 1 vdiph/yi_fr fr
4 vowel/& en_rp fi hi sv 4 vowel/& en_rp fi hi sv
4 vowel/0 base2 en hi pt 4 vowel/0 base2 en hi pt
3 vowel/0_2 en_n pt_pt sw 3 vowel/0_2 en_n pt_pt sw
5 vowel/0_3 en_us en_sc en_rp en_wm hu
4 vowel/0_3 en_us en_sc en_rp hu
2 vowel/@_2 fr 2 vowel/@_2 fr
2 vowel/&_2 en_us 2 vowel/&_2 en_us
6 vowel/@_3 en_sc de hi 6 vowel/@_3 en_sc de hi
2 vowel/ii_6 en_wm 2 vowel/ii_6 en_wm
1 vowel/ii_en en 1 vowel/ii_en en
10 vowel/@_low en_rp hi ro no 10 vowel/@_low en_rp hi ro no
12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi
10 vowel/o base2 en de hi it la pt_pt sv en_wi
4 vowel/o_2 cy hi hu no 4 vowel/o_2 cy hi hu no
2 vowel/o-_2 en_n en_wm 2 vowel/o-_2 en_n en_wm
3 vowel/o_3 en_sc fr 3 vowel/o_3 en_sc fr
1 vowel/oe_4 sv 1 vowel/oe_4 sv
2 vowel/o_mid fr hu 2 vowel/o_mid fr hu
12 vowel/oo en_sc de eo la es el sv no zhy en_wi 12 vowel/oo en_sc de eo la es el sv no zhy en_wi
10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi
12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi
3 vowel/oo_2 en_sc cy cs 3 vowel/oo_2 en_sc cy cs
1 vowel/oo_3 af 1 vowel/oo_3 af
5 vowel/oo_4 hi pl it en_wi
6 vowel/oo_4 en_wm hi pl it en_wi
1 vowel/oo_5 is 1 vowel/oo_5 is
6 vowel/oo_en en en_n en_rp 6 vowel/oo_en en en_n en_rp
2 vowelr/aa_r en_sc 2 vowelr/aa_r en_sc
1 vowel/yy fr_ca 1 vowel/yy fr_ca
1 vowel/yy_2 no 1 vowel/yy_2 no
1 vowel/yy_3 sv 1 vowel/yy_3 sv
5 vowel/yy_4 de hu la is
6 vowel/yy_4 de hu la grc is
1 vwl_af/@ af 1 vwl_af/@ af
1 vwl_af/I af 1 vwl_af/I af
2 vwl_af/r@ af 2 vwl_af/r@ af
6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm
5 vwl_en/aI@ en en_n en_us en_sc en_rp
2 vwl_en/aI@_2 en_sc 2 vwl_en/aI@_2 en_sc
5 vwl_en/aU@ en en_n en_us en_sc en_wm 5 vwl_en/aU@ en en_n en_us en_sc en_wm
12 vwl_en/@L en en_us en_sc en_rp en_wm af 12 vwl_en/@L en en_us en_sc en_rp en_wm af
1 vwl_en_n/aa_5 en_n 1 vwl_en_n/aa_5 en_n
2 vwl_en_n/O@ en_n 2 vwl_en_n/O@ en_n
1 vwl_en_n/u_ en_n 1 vwl_en_n/u_ en_n
1 vwl_en/ooi@ en_wm
3 vwl_en_rp/aa en_rp 3 vwl_en_rp/aa en_rp
1 vwl_en_rp/aU@ en_rp 1 vwl_en_rp/aU@ en_rp
1 vwl_en_rp/e@ en_rp 1 vwl_en_rp/e@ en_rp

+ 1
- 0
phsource/mbrola/en1 View File

0 @- NULL 0 NULL 0 @- NULL 0 NULL
0 aI@ NULL 60 aI @ 0 aI@ NULL 60 aI @
0 aU@ NULL 75 aU @ 0 aU@ NULL 75 aU @
0 x NULL 0 k

+ 1
- 0
phsource/mbrola/us View File

0 w/ NULL 0 w 0 w/ NULL 0 w
0 ; NULL 0 NULL 0 ; NULL 0 NULL
0 g- NULL 0 NULL 0 g- NULL 0 NULL
0 x NULL 0 k_h
0 @- NULL 0 NULL 0 @- NULL 0 NULL
0 aI@ NULL 60 AI r 0 aI@ NULL 60 AI r
0 aU@ NULL 75 aU r 0 aU@ NULL 75 aU r

+ 1
- 0
phsource/mbrola/us3 View File

0 w/ NULL 0 w 0 w/ NULL 0 w
0 ; NULL 0 NULL 0 ; NULL 0 NULL
0 g- NULL 0 NULL 0 g- NULL 0 NULL
0 x NULL 0 k
0 @- NULL 0 NULL 0 @- NULL 0 NULL
0 aI@ NULL 60 AI r 0 aI@ NULL 60 AI r
0 aU@ NULL 75 aU r 0 aU@ NULL 75 aU r

+ 7
- 0
phsource/ph_base2 View File

endphoneme endphoneme




phoneme EI
vowel starttype (e) endtype (i)
length 230
formants vdiph/eei
endphoneme


phoneme oI phoneme oI
vowel starttype (o) endtype (i) vowel starttype (o) endtype (i)
length 240 length 240

+ 2
- 2
phsource/ph_english_sc View File





phoneme I phoneme I
vowel starttype (e) endtype (e)
vowel starttype (@) endtype (@)
length 110 length 110
formants vowel/e# formants vowel/e#
before l/2 vowel/@_3-30+l/L2_@L before l/2 vowel/@_3-30+l/L2_@L
endphoneme endphoneme


phoneme I2 phoneme I2
vowel starttype (e) endtype (e)
vowel starttype (@) endtype (@)
unstressed unstressed
length 110 length 110
formants vowel/e# formants vowel/e#

+ 5
- 5
phsource/ph_english_wm View File

phoneme 0 phoneme 0
vowel starttype (o) endtype (o) vowel starttype (o) endtype (o)
length 140 length 140
formants vowel/0_3
formants vowel/oo_4
reduceto @ 0 reduceto @ 0
endphoneme endphoneme


phoneme O@ phoneme O@
vowel starttype (o) endtype (@) vowel starttype (o) endtype (@)
length 240 length 240
formants vowel/o
formants vowel/oo_1
linkout r- linkout r-
endphoneme endphoneme


phoneme O phoneme O
vowel starttype (o) endtype (o) vowel starttype (o) endtype (o)
length 150 length 150
formants vowel/o
formants vowel/oo_1
reduceto @ 0 reduceto @ 0
endphoneme endphoneme




phoneme eI phoneme eI
vowel starttype (@) endtype (i) vowel starttype (@) endtype (i)
length 210
length 230
formants vdiph/@i_3 formants vdiph/@i_3
endphoneme endphoneme


phoneme aI@ phoneme aI@
vowel starttype (a) endtype (@) vowel starttype (a) endtype (@)
length 270 length 270
formants vwl_en/aI@
formants vwl_en/ooi@
linkout r- linkout r-
endphoneme endphoneme



+ 44
- 0
phsource/ph_greek_ancient View File


//====================================================
// Ancient Greek - based on base2
//====================================================

phoneme y
vowel starttype (i) endtype (i)
length 160
formants vowel/yy_4
endphoneme

phoneme EU
vowel starttype (e) endtype (u)
length 230
formants vdiph/eeu
endphoneme

phoneme OI
vowel starttype (o) endtype (i)
length 230
formants vdiph/0i
endphoneme

phoneme OU
vowel starttype (o) endtype (u)
length 230
formants vdiph/oou
endphoneme


phoneme yI
vowel starttype (i) endtype (i)
length 230
formants vdiph/yi
endphoneme


phoneme f // consider this an affrictive
vls blb afr
vowelout f1=0 f2=1000 -500 -350 f3=-200 80
lengthmod 2
wave ufric/f // could replace this with a [pf] wav file
endphoneme


+ 0
- 7
phsource/ph_pt_brazil View File

endphoneme endphoneme




phoneme EI
vowel starttype (e) endtype (i)
length 230
formants vdiph/eei
endphoneme


phoneme OI phoneme OI
vowel starttype (o) endtype (i) vowel starttype (o) endtype (i)
length 230 length 230

+ 3
- 0
phsource/phonemes View File

phonemetable el base2 phonemetable el base2
include ph_greek include ph_greek


phonemetable grc base2
include ph_greek_ancient

phonemetable sv base phonemetable sv base
include ph_swedish include ph_swedish



BIN
phsource/vdiph2/i@_2 View File


BIN
phsource/vowel/e# View File


BIN
phsource/vowel/i_2 View File


BIN
phsource/vowelr/aa_r View File


BIN
phsource/vwl_en/ooi@ View File


+ 96
- 31
src/compiledict.cpp View File

char encoded_ph[200]; char encoded_ph[200];
unsigned char bad_phoneme[4]; unsigned char bad_phoneme[4];
p = linebuf; p = linebuf;
comment = NULL; comment = NULL;
phonetic = word = ""; phonetic = word = "";
if((word[0] & 0x80)==0) // 7 bit ascii only if((word[0] & 0x80)==0) // 7 bit ascii only
{ {
// 1st letter - need to consider utf8 here
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii)
// ??? need to consider utf8 here
word[0] = tolower(word[0]); word[0] = tolower(word[0]);
} }


for(ix=0; finish==0; ix++) for(ix=0; finish==0; ix++)
{ {
c = input[ix]; c = input[ix];
if((c=='/') && (input[ix+1]=='/'))
c = input[ix] = '\n'; // treat command as end of line


switch(c = input[ix]) switch(c = input[ix])
{ {
int different; int different;
char *prev_rgroup_name; char *prev_rgroup_name;
unsigned int char_code; unsigned int char_code;
int compile_mode=0;
char *buf; char *buf;
char buf1[120]; char buf1[120];
char *rules[N_RULES]; char *rules[N_RULES];
{ {
linenum++; linenum++;
buf = fgets(buf1,sizeof(buf1),f_in); buf = fgets(buf1,sizeof(buf1),f_in);
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n
if(buf != NULL)
{
if((p = (unsigned char *)strstr(buf,"//")) != NULL)
*p = 0;

if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
}


if((buf != NULL) && (memcmp(buf,".L",2)==0)) if((buf != NULL) && (memcmp(buf,".L",2)==0))
{ {
continue; continue;
} }


if((buf == NULL) || (memcmp(buf,".group",6)==0))
if((buf == NULL) || (buf[0] == '.'))
{ {
// next .group or end of file, write out the previous group // next .group or end of file, write out the previous group


} }
n_rules = 0; n_rules = 0;


if(buf == NULL) break; // end of file
if(compile_mode == 2)
{
// end of the character replacements section
fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list
}


p = (unsigned char *)&buf[6];
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE !
ix = 0;
while((*p > ' ') && (ix<12))
group_name[ix++] = *p++;
group_name[ix]=0;
if(buf == NULL) break; // end of file


if(sscanf(group_name,"0x%x",&char_code)==1)
if(memcmp(buf,".replace",8)==0)
{ {
// group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name;
compile_mode = 2;
fputc(RULE_GROUP_START,f_out);
fputc(RULE_REPLACEMENTS,f_out);


if(char_code > 0x100)
{
*p++ = (char_code >> 8);
}
*p++ = char_code;
*p = 0;
// advance to next word boundary
while((ftell(f_out) & 3) != 0)
fputc(0,f_out);
} }


if(strlen(group_name) > 2)
if(memcmp(buf,".group",6)==0)
{ {
if(utf8_in(&c,group_name,0) < 2)
compile_mode = 1;

p = (unsigned char *)&buf[6];
while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE !
ix = 0;
while((*p > ' ') && (ix<12))
group_name[ix++] = *p++;
group_name[ix]=0;
if(sscanf(group_name,"0x%x",&char_code)==1)
{ {
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++;
// group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name;
if(char_code > 0x100)
{
*p++ = (char_code >> 8);
}
*p++ = char_code;
*p = 0;
}
if(strlen(group_name) > 2)
{
if(utf8_in(&c,group_name,0) < 2)
{
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++;
}
group_name[2] = 0;
} }

group_name[2] = 0;
} }


continue; continue;
} }
prule = compile_rule(buf);
if((prule != NULL) && (n_rules < N_RULES))
switch(compile_mode)
{ {
rules[n_rules++] = prule;
case 1: // .group
prule = compile_rule(buf);
if((prule != NULL) && (n_rules < N_RULES))
{
rules[n_rules++] = prule;
}
break;

case 2: // .replace
{
int replace1;
int replace2;
char *p;

p = buf;
replace1 = 0;
replace2 = 0;
while(isspace2(*p)) p++;
ix = 0;
while((unsigned char)(*p) > 0x20) // not space or zero-byte
{
p += utf8_in(&c,p,0);
replace1 += (c << ix);
ix += 16;
}
while(isspace2(*p)) p++;
ix = 0;
while((unsigned char)(*p) > 0x20)
{
p += utf8_in(&c,p,0);
replace2 += (c << ix);
ix += 16;
}
if(replace1 != 0)
{
fwrite(&replace1,1,4,f_out);
fwrite(&replace2,1,4,f_out);
}
}
break;
} }
} }
fclose(f_temp); fclose(f_temp);

+ 13
- 3
src/dictionary.cpp View File

int ix; int ix;
char *p; char *p;
char *p_name; char *p_name;
unsigned int *pw;
unsigned char c, c2; unsigned char c, c2;
int len; int len;
int rule_count;


n_groups2 = 0; n_groups2 = 0;
for(ix=0; ix<256; ix++) for(ix=0; ix<256; ix++)
} }
p++; p++;


if(p[0] == RULE_REPLACEMENTS)
{
pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary
langopts.replace_chars = pw;
while(pw[0] != 0)
{
pw += 2; // find the end of the replacement list, each entry is 2 words.
}
p = (char *)(pw+1);
continue;
}

if(p[0] == RULE_LETTERGP2) if(p[0] == RULE_LETTERGP2)
{ {
ix = p[1] - 'A'; ix = p[1] - 'A';
} }


// skip over all the rules in this group // skip over all the rules in this group
rule_count = 0;
while(*p != RULE_GROUP_END) while(*p != RULE_GROUP_END)
{ {
p += (strlen(p) + 1); p += (strlen(p) + 1);
rule_count++;
} }
p++; p++;
} }

+ 2
- 0
src/extras.cpp View File



} // end of ConvertToItf8 } // end of ConvertToItf8


//******************************************************************************************************





//#define calcspeedtab //#define calcspeedtab

+ 9
- 7
src/numbers.cpp View File

if(langopts.numbers & 0x200) if(langopts.numbers & 0x200)
{ {
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian) // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
ix = strlen(ph_tens)-1;
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;

if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
ph_tens[ix] = 0;
if((ix = strlen(ph_tens)-1) >= 0)
{
if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
ph_tens[ix] = 0;
}
} }
sprintf(ph_out,"%s%s",ph_tens,ph_digits); sprintf(ph_out,"%s%s",ph_tens,ph_digits);
} }
decimal_point = 0; decimal_point = 0;
} }
} }
if(ph_out[0] != 0)
if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
{ {
int next_char; int next_char;
utf8_in(&next_char,&word[n_digits+1],0); utf8_in(&next_char,&word[n_digits+1],0);

+ 1
- 1
src/synthdata.cpp View File

#include "translate.h" #include "translate.h"
#include "wave.h" #include "wave.h"


const char *version_string = "1.29.10 16.Oct.07";
const char *version_string = "1.29.11 23.Oct.07";
const int version_phdata = 0x012901; const int version_phdata = 0x012901;


int option_device_number = -1; int option_device_number = -1;

+ 44
- 30
src/tr_languages.cpp View File





#define L_qa 0x716100 #define L_qa 0x716100
#define L_grc 0x677263 // grc Ancient Greek




#define OFFSET_GREEK 0x380 #define OFFSET_GREEK 0x380
#define OFFSET_DEVANAGARI 0x900 #define OFFSET_DEVANAGARI 0x900




static const unsigned int replace_cyrillic[] =
{0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452,
0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459,
0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441,
0x448,0x442,0x443,0x432,0x437,0x436,
0x453,0x45c,0}; // ѓ ѕ ќ

static const unsigned int replace_cyrillic_latin[] =
{'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111,
'e','f','g','h','i','j','k','l'+('j'<<16),
'l','m','n'+('j'<<16),'n','o','p','r','s',
0x161,'t','u','v','z',0x17e,
0x111,0x107,0};
static const unsigned int replace_cyrillic_latin[] =
{0x430,'a',
0x431,'b',
0x446,'c',
0x45b,0x107,
0x447,0x10d,
0x45f,'d'+(0x17e<<16),
0x455,'d'+('z'<<16),
0x434,'d',
0x452,0x111,
0x435,'e',
0x444,'f',
0x433,'g',
0x445,'h',
0x438,'i',
0x458,'j',
0x43a,'k',
0x459,'l'+('j'<<16),
0x43b,'l',
0x43c,'m',
0x45a,'n'+('j'<<16),
0x43d,'n',
0x43e,'o',
0x43f,'p',
0x440,'r',
0x441,'s',
0x448,0x161,
0x442,'t',
0x443,'u',
0x432,'v',
0x437,'z',
0x436,0x17e,
0x453,0x111,
0x45c,0x107,
0}; // ѓ ѕ ќ




void SetupTranslator(Translator *tr, int *lengths, int *amps) void SetupTranslator(Translator *tr, int *lengths, int *amps)
break; break;


case L('e','l'): // Greek case L('e','l'): // Greek
case L_grc: // Ancient Greek
{ {
static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300};
static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable


tr->langopts.numbers = 0xb09; tr->langopts.numbers = 0xb09;
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands

if(name2 == L_grc)
{
// ancient greek
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1;
}
} }
break; break;


case L('h','i'): case L('h','i'):
{ {
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f};
static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9
static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0};
static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250};
static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 };


tr->langopts.numbers = 0x811; tr->langopts.numbers = 0x811;
tr->langopts.numbers2 = 0x100; tr->langopts.numbers2 = 0x100;
tr->letter_bits_offset = OFFSET_DEVANAGARI; tr->letter_bits_offset = OFFSET_DEVANAGARI;
tr->langopts.replace_chars = replace_chars_hi;
tr->langopts.replacement_chars = replacement_chars_hi;


memset(tr->letter_bits,0,sizeof(tr->letter_bits)); memset(tr->letter_bits,0,sizeof(tr->letter_bits));
SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters


tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC;
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards
tr->langopts.replace_chars = replace_cyrillic;
tr->langopts.replacement_chars = replace_cyrillic_latin;
tr->langopts.replace_chars = replace_cyrillic_latin;


SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
SetLetterVowel(tr,'r'); SetLetterVowel(tr,'r');
{ {
static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 };
static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220};
static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0};
static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute


tr = new Translator(); tr = new Translator();
SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); SetupTranslator(tr,stress_lengths_hu,stress_amps_hu);
tr->charset_a0 = charsets[2]; // ISO-8859-2 tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.replace_chars = replace_chars_hu;
tr->langopts.replacement_chars = replacement_chars_hu;


tr->langopts.vowel_pause = 0x20; tr->langopts.vowel_pause = 0x20;
tr->langopts.stress_rule = 0; tr->langopts.stress_rule = 0;
tr->langopts.stress_rule = 4; // antipenultimate tr->langopts.stress_rule = 4; // antipenultimate
tr->langopts.numbers = 0x0c29 + 0x4000; tr->langopts.numbers = 0x0c29 + 0x4000;
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards

tr->langopts.replace_chars = replace_cyrillic_latin;
tr->langopts.replacement_chars = replace_cyrillic;
} }
break; break;


{ {
static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260};
static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 };
static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0};
static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla


tr = new Translator(); tr = new Translator();
SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); SetupTranslator(tr,stress_lengths_ro,stress_amps_ro);
tr->langopts.stress_flags = 0x100 + 0x6; tr->langopts.stress_flags = 0x100 + 0x6;


tr->charset_a0 = charsets[2]; // ISO-8859-2 tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.replace_chars = replace_chars_ro;
tr->langopts.replacement_chars = replacement_chars_ro;
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
} }

+ 19
- 9
src/translate.cpp View File

if(!found && iswdigit(first_char)) if(!found && iswdigit(first_char))
{ {
Lookup("_0lang",word_phonemes);
if(word_phonemes[0] == phonSWITCH)
return(0);

found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); found = TranslateNumber(word,phonemes,&dictionary_flags,wflags);
} }


} // end of EmbeddedCommand } // end of EmbeddedCommand





int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
{//=====================================================================================================
// To allow language specific examination and replacement of characters

int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert)
{//==================================================================================
int ix; int ix;
unsigned int word; unsigned int word;
unsigned int new_c, c2, c_lower; unsigned int new_c, c2, c_lower;
int upper_case = 0; int upper_case = 0;
static int ignore_next = 0; static int ignore_next = 0;
const unsigned int *replace_chars;


if(ignore_next) if(ignore_next)
{ {
} }
if(c == 0) return(0); if(c == 0) return(0);


if(langopts.replace_chars == NULL)
if((replace_chars = tr->langopts.replace_chars) == NULL)
return(c); return(c);


// there is a list of character codes to be substituted with alternative codes // there is a list of character codes to be substituted with alternative codes
} }


new_c = 0; new_c = 0;
for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++)
for(ix=0; (word = replace_chars[ix]) != 0; ix+=2)
{ {
if(c_lower == (word & 0xffff)) if(c_lower == (word & 0xffff))
{ {
if((word >> 16) == 0) if((word >> 16) == 0)
{ {
new_c = langopts.replacement_chars[ix];
new_c = replace_chars[ix+1];
break; break;
} }
if((word >> 16) == (unsigned int)tolower(next_in)) if((word >> 16) == (unsigned int)tolower(next_in))
{ {
new_c = langopts.replacement_chars[ix];
new_c = replace_chars[ix+1];
ignore_next = 1; ignore_next = 1;
break; break;
} }
if(upper_case) if(upper_case)
new_c = towupper(new_c); new_c = towupper(new_c);
return(new_c); return(new_c);

}


int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
{//=====================================================================================================
// To allow language specific examination and replacement of characters
return(SubstituteChar(this,c,next_in,insert));
} }





+ 3
- 2
src/translate.h View File

#define RULE_LETTERGP 17 // A B C H F G Y letter group number #define RULE_LETTERGP 17 // A B C H F G Y letter group number
#define RULE_LETTERGP2 18 // L + letter group number #define RULE_LETTERGP2 18 // L + letter group number
#define RULE_CAPITAL 19 // word starts with a capital letter #define RULE_CAPITAL 19 // word starts with a capital letter
#define RULE_REPLACEMENTS 20 // section for character replacements
#define RULE_NO_SUFFIX 24 // N #define RULE_NO_SUFFIX 24 // N
#define RULE_NOTVOWEL 25 // K #define RULE_NOTVOWEL 25 // K
#define RULE_IFVERB 26 // V #define RULE_IFVERB 26 // V


#define NUM_ROMAN 0x20000 #define NUM_ROMAN 0x20000
#define NUM_ROMAN_UC 0x40000 #define NUM_ROMAN_UC 0x40000

// bits0-1=which numbers routine to use. // bits0-1=which numbers routine to use.
// bit2= thousands separator must be space // bit2= thousands separator must be space
// bit3= , decimal separator, not . // bit3= , decimal separator, not .
// bit16=dot after number indicates ordinal // bit16=dot after number indicates ordinal
// bit17=recognize roman numbers // bit17=recognize roman numbers
// bit18=Roman numbers only if upper case // bit18=Roman numbers only if upper case

int numbers; int numbers;


// bits 1-4 use variant form of numbers before thousands,millions,etc. // bits 1-4 use variant form of numbers before thousands,millions,etc.
// bit7=(LANG-ru) use MB for 1 thousand, million, etc // bit7=(LANG-ru) use MB for 1 thousand, million, etc
// bit8=(LANG=sw) special word for 100,000s // bit8=(LANG=sw) special word for 100,000s
int numbers2; int numbers2;

int max_roman; int max_roman;
int thousands_sep; int thousands_sep;
int decimal_sep; int decimal_sep;
char ideographs; // treat as separate words char ideographs; // treat as separate words
int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2"
const unsigned int *replace_chars; // characters to be substitutes const unsigned int *replace_chars; // characters to be substitutes
const unsigned int *replacement_chars; // substitutes for replace_chars
} LANGUAGE_OPTIONS; } LANGUAGE_OPTIONS;





Loading…
Cancel
Save