*_rules: New option, (Pb to specify a character such as apostrophe which splits a word into two parts (used for lang-tr). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@150 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -88,13 +88,13 @@ r R s t ts v z Z | |||
Dictionary es_dict | |||
@- a aI e E eI eU i | |||
o O oI u | |||
@- a aI aU e E eI eU | |||
i o O oI u | |||
* ** : b B d D f | |||
g j J J^ k l l^ m | |||
n n^ p Q r R s t | |||
T tS v v# w x z | |||
g j J J^ k l m n | |||
n^ p Q r R s t T | |||
tS v v# w x z | |||
Dictionary fi_dict | |||
@@ -397,10 +397,9 @@ Dictionary tr_dict | |||
a e E i I o O u | |||
u# W y Y | |||
: ; b d dZ f g h | |||
j k l m n p Q r | |||
R s S t tS v w z | |||
Z | |||
b c d dZ f g h j | |||
J k l m n p r R | |||
s S t tS v z Z | |||
Dictionary ku_dict |
@@ -10,63 +10,63 @@ _#32 Esp'aTjo | |||
// names of symbols | |||
_. punto | |||
_, kOma | |||
_; p,untOik'Oma | |||
_, koma | |||
_; p,untoik'oma | |||
_: d,Osp'untOs | |||
_! TE*R'aRaDmi**aTj'On | |||
_? TE*R'aRintE*R,OQaTj'On | |||
_? TE*R'aRintE*R,oQaTj'On | |||
_¡ aB@-*'iRaDmi**aTj'On | |||
_¿ aB@-*'iRintE*R,OQaTj'On | |||
_¿ aB@-*'iRintE*R,oQaTj'On | |||
_= iQw'al | |||
_< mEn'ORke | |||
_< men'ORke | |||
_> maJ^'ORke | |||
_' apOst@-*'Ofo | |||
_" kOm'iJ^as | |||
_' apOst@-*'ofo | |||
_" kom'iJ^as | |||
_+ s'iQnOdesum'aR | |||
_$ dOlar | |||
_# almOaD'il^a | |||
_* astE*'isko | |||
_$ dolar | |||
_# almoaD'iJ^a | |||
_* aste*'isko | |||
_- gJ^On | |||
__ suB@-*aJ^'aDo | |||
_/ ba*Ra | |||
_\ b'a*RaimbERt'iDa | |||
_` aT'EntOg@-*'aBe | |||
_( ,aB@-*epa**'EntEsis | |||
_) Tj,E*Rapa**'EntEsis | |||
_[ ,aB@-*ekORtS'Ete | |||
_] Tj,E*RakORtS'Ete | |||
_( ,aB@-*epa**'Entesis | |||
_) Tj,E*Rapa**'Entesis | |||
_[ ,aB@-*ekORtS'ete | |||
_] Tj,E*RakORtS'ete | |||
_{ ,aB@-*eJ^'aBe | |||
_} Tj,E*RaJ^'aBe | |||
_« kOm'iJ^as||iTkJ^'ERDas | |||
_» kOm'iJ^as||dE**'EtSas | |||
_« kom'iJ^as||iTkJ^'ERDas | |||
_» kom'iJ^as||dE**'EtSas | |||
^ TiRkumfl'Exo | |||
^ TiRkumfl'exo | |||
€ eU*o | |||
% pOrTj'Ento | |||
& ampERs'ant | |||
@ a*R'OBa | |||
@ a*R'oBa | |||
/ ba*Ra | |||
© kOpi*R'aIt | |||
© kopi*R'aIt | |||
£ liB@-*as | |||
¶ p'a*Rafo | |||
§ sEkTj'On | |||
// numbers | |||
_0 TE*o | |||
_0 Te*o | |||
_1 'uno | |||
_2 d'Os | |||
_3 t@-*'es | |||
_4 kw'at@-*o | |||
_5 T'inko | |||
_6 s'eIs | |||
_7 sj'Ete | |||
_7 sj'ete | |||
_8 'OtSo | |||
_9 nw'Eve | |||
_9 nw'eve | |||
_1X dj'ET | |||
_11 'OnTe | |||
_12 d'OTe | |||
_13 t@-*'ETe | |||
_12 d'oTe | |||
_13 t@-*'eTe | |||
_14 kat'ORTe | |||
_15 k'inTe | |||
_20 v'eInte | |||
@@ -78,21 +78,25 @@ _6X sEs'Enta | |||
_7X sEt'Enta | |||
_8X OtS'Enta | |||
_9X nOv'Enta | |||
_0C T'ientOs | |||
_0C Tj'EntOs | |||
_1C0 T'ien // exactly one hundred | |||
_1C T'iento | |||
_5C kinj'EntOs | |||
_7C s,EtETj'EntOs | |||
_9C n,OvETj'EntOs | |||
_7C s,eteTj'EntOs | |||
_9C n,OveTj'EntOs | |||
_0M1 m'il | |||
_1M1 m'il // no '1' before thousand | |||
_0M2 mil^'Ones | |||
_1M2 'unmil^'On | |||
_0M4 _bil^'Ones | |||
_1M4 'unbil^'On | |||
_0M2 miJ^'ones | |||
_1M2 'unmiJ^'On | |||
_0M4 _biJ^'onEs | |||
_1M4 'unbiJ^'On | |||
?2 _0M2 mij:'ones | |||
?2 _1M2 'unmij:'On | |||
?2 _0M4 _bij:'onEs | |||
?2 _1M4 'unbij:'On | |||
_0and i | |||
_dpt kOma | |||
//_roman ROm'ano | |||
_dpt koma | |||
//_roman Rom'ano | |||
vi vi // not a Roman number | |||
@@ -5,6 +5,7 @@ | |||
// ?1 Castilian | |||
// ?2 Latin America | |||
.L01 j w l r d g n m | |||
.group a | |||
_) a (_ a | |||
@@ -12,6 +13,8 @@ | |||
ai aI | |||
ay (K aI | |||
ay (_ 'aI | |||
au aU | |||
au (_ 'aU | |||
.group b | |||
@@ -20,7 +23,9 @@ | |||
_) b b | |||
m) b b | |||
n) b b | |||
b (L01 b | |||
b (iA b | |||
b (uA b | |||
.group c | |||
_) c (_ Te | |||
@@ -39,12 +44,13 @@ | |||
.group e | |||
_) e (_ e | |||
e E | |||
e (_ e | |||
e e | |||
e (CK E | |||
ei eI | |||
ey (K eI | |||
ey (_ 'eI | |||
_) eu eU | |||
eu eU | |||
eu (_ 'eU | |||
.group f | |||
@@ -89,6 +95,7 @@ | |||
_) l (_ Ele | |||
l l | |||
ll J^ | |||
?2 A) ll (A j: | |||
.group m | |||
@@ -109,8 +116,9 @@ | |||
.group o | |||
_) o (_ o | |||
o O | |||
o (_ o | |||
o o | |||
o (CK O | |||
oi oI | |||
oy (K oI | |||
oy (_ 'oI | |||
@@ -151,6 +159,7 @@ | |||
_) u (_ u | |||
u u | |||
u (A w | |||
u (y_ w | |||
l) u (A %u | |||
r) u (A %u | |||
@@ -163,8 +172,15 @@ | |||
_) v v# | |||
?1 m) v b | |||
?1 n) v b | |||
?1 v (L01 b | |||
?1 v (iA b | |||
?1 v (uA b | |||
?2 v v# | |||
?2 _) v v | |||
?2 v (L01 v | |||
?2 v (j v | |||
?2 v (iA v | |||
?2 v (uA v | |||
.group w | |||
@@ -188,6 +204,7 @@ | |||
n) y J | |||
_l) y J | |||
y (A J^ | |||
?2 A) y (A j: | |||
.group z | |||
@@ -211,7 +228,7 @@ | |||
_) ++ (_ masm'as | |||
\+\+\+) + // ignore + after the first 3 | |||
# almOaD'il^a | |||
# almOaD'iJ^a | |||
\#) # | |||
__) - (_D m'EnOs |
@@ -5,7 +5,7 @@ | |||
// 2006-11-18 Gilles Casse <[email protected]> | |||
// | |||
// Updated 2008-02-20 Michel Such <[email protected]> | |||
// Updated 2008-02-24 Michel Such <[email protected]> | |||
// | |||
// * Numbers, a few abbreviations and exceptions. | |||
// | |||
@@ -32,7 +32,7 @@ e @ | |||
f Ef | |||
g Ze | |||
h aS | |||
// i i | |||
i i | |||
ï i:tRema | |||
j Zi | |||
k ka | |||
@@ -269,11 +269,12 @@ nous $u+ $verbf | |||
vous $u+ $verbf | |||
elles $u+ $verbf | |||
ils $u+ $verbf | |||
on $u $verbf | |||
on O~n2 $u $verbf | |||
me $u $verbf | |||
te $u $verbf | |||
se $u $verbf | |||
lui $u $verbf | |||
ça $u $verbf | |||
ce $u $nounf | |||
cette $u $nounf | |||
@@ -368,44 +369,29 @@ tout t'ut2 $u | |||
// Letters which can be words | |||
//=========================== | |||
à a:aksA~gRav $atend | |||
i i $atend | |||
y i:gR'Ek $atend | |||
// pronunciation exceptions | |||
aspic aspik | |||
consent kO~s'A~t2 | |||
scient si | |||
bénéficient benefisi $verb | |||
coing kwE~ | |||
concurrent kO~kyR'A~ | |||
content kO~t@-t2 $verb | |||
convent kO~vA~ | |||
couvent k'uvt2 $verb | |||
(couvent couvent) kuvA~||k'uvt2 | |||
dessus d@sy | |||
dessous d@su | |||
divers divErz2 | |||
►évident evid'A~ | |||
évident evidt2 $verb | |||
ferment fErm'A~ | |||
ferment f'Ermt2 $verb | |||
firent f'irt2 | |||
parent paR'A~ | |||
parent p'art2 $verb | |||
pastis pastis | |||
poing pwE~ | |||
président pRezid'A~ | |||
président pRezidt2 $verb | |||
résident Rezid'A~ | |||
résident Rezidt2 $verb | |||
ressent r@s'A~t2 | |||
récurrent RekyR'A~ | |||
riz ri | |||
sergent sErZ'A~ | |||
serment sErm'A~ | |||
serpent sErp'A~ | |||
torrent tOR'A~ | |||
sphincter sfE~ktEr | |||
tunis tynis | |||
@@ -421,14 +407,10 @@ montréal mO~Real | |||
(cent une) s'A~||yn | |||
(cent onzième) s'A~||O~zj'Em | |||
(cent onze) s'A~||O~z | |||
(on habille) O~||nab'ij | |||
absent absA~ | |||
accident aksidA~ | |||
adéquat adekuat2 | |||
adéquate adekuat | |||
airbus Erbys | |||
anus anys | |||
ardent aRdA~ | |||
auvent ovA~ | |||
bissus bisys | |||
bonus bonys | |||
bus bys | |||
@@ -437,27 +419,22 @@ campus kA~pys | |||
casus kazys | |||
choeur k@r | |||
cocus coky | |||
discident disidA~ | |||
estomac Estoma | |||
fils fis | |||
imprudent E~pRydA~ | |||
iris iris | |||
juin ZyE~ | |||
laser lazEr | |||
malus malys | |||
mars maRs | |||
minus minys | |||
occident OksidA~ | |||
orient ORjA~ | |||
paravent paravA~ | |||
pays pEi | |||
phallus falys | |||
polder pOldEr | |||
prudent pRydA~ | |||
secret sYkRE | |||
souris suri | |||
sus sys | |||
sus sy $verb | |||
to tu | |||
vénus venys | |||
@@ -541,23 +518,16 @@ flirt fl@Rt | |||
// words from other languages | |||
about _^_en | |||
acer asEr | |||
alone _^_en | |||
also _^_en | |||
and _^_en | |||
amazon amaz'On | |||
apple _^_en | |||
april _^_en | |||
at _^_en | |||
(audible manager) odibl||manadZ@r | |||
be _^_en | |||
bit _^_en | |||
bluetooth blut'us | |||
both _^_en | |||
can _^_en | |||
computer _^_en | |||
(cyber link) _^_en | |||
debian dEbjAn | |||
don't _^_en | |||
driver _^_en | |||
emacs Emaks | |||
emacspeak Emakspi:k | |||
@@ -565,13 +535,10 @@ end _^_en | |||
epson EpsOn | |||
espeak @spi:k | |||
ethernet etERnEt | |||
ever _^_en | |||
exit egzit | |||
eye _^_en | |||
eyes _^_en | |||
false _^_en | |||
(file zilla) fajl||zija | |||
for _^_en | |||
get _^_en | |||
google gu:g@l | |||
gnome gnom | |||
@@ -580,19 +547,10 @@ hamburger _!A~b@rg@r | |||
help _^_en | |||
hot _^_en | |||
in _^_en | |||
into _^_en | |||
inside _^_en | |||
insight _^_en | |||
internet E~tERnEt | |||
(internet explorer) E~tErn'Et||EksplOr'@r | |||
is _^_en | |||
it _^_en | |||
its _^_en | |||
june _^_en | |||
july _^_en | |||
klaxon klaksOn | |||
later _^_en | |||
latest _^_en | |||
layer _^_en | |||
let _^_en | |||
liszt list | |||
@@ -601,46 +559,25 @@ login _^_en | |||
logon _^_en | |||
(mac os x) makoEsiks | |||
made _^_en | |||
mail _^_en | |||
march _^_en | |||
media medja | |||
messenger _^_en | |||
my _^_en | |||
name _^_en | |||
never _^_en | |||
not _^_en | |||
null _^_en | |||
october _^_en | |||
of _^_en | |||
often _^_en | |||
ok oke | |||
open Op'@n | |||
outlook autluk | |||
outside _^_en | |||
outsider _^_en | |||
paint _^_en | |||
paper _^_en | |||
player _^_en | |||
redhat REdat | |||
same _^_en | |||
sametime _^_en | |||
she _^_en | |||
schubert SubER | |||
since _^_en | |||
sun _^_en | |||
sure _^_en | |||
(text aloud) tEkst||@lawd | |||
their _^_en | |||
there _^_en | |||
these _^_en | |||
those _^_en | |||
they _^_en | |||
thus _^_en | |||
true _^_en | |||
ubuntu ubuntu | |||
up _^_en | |||
viking vikiN | |||
was _^_en | |||
won't _^_en | |||
yacht jot | |||
yes _^_en | |||
@@ -4,7 +4,7 @@ | |||
// 2006-11-18 Gilles Casse <[email protected]> | |||
// | |||
// Updated: 2008-02-20 Michel Such <[email protected]> | |||
// Updated: 2008-02-24 Michel Such <[email protected]> | |||
// | |||
// * The rules are based on Cicero TTS. | |||
// Y | |||
@@ -15,9 +15,6 @@ | |||
// Y front vowels: e i y é ê è î | |||
// K not a vowel (i.e. consonant, space, ) | |||
.L01 a b c d e f g h i l p q r t v | |||
.L02 a e i o u y | |||
.group a | |||
ae (_ e // reggae vitae | |||
@@ -53,19 +50,7 @@ | |||
a a // bateau | |||
// group a: English section | |||
_C) ad (_ _^_en // bad, had, sad | |||
_) again (X _^_en // again, against | |||
alk _^_en // talk, walk | |||
f) all (en_ _^_en // fallen | |||
sm) all _^_en // small, smaller | |||
ank _^_en // tank, blank | |||
ark _^_en // dark, park | |||
ainl _^_en | |||
ainm _^_en | |||
_C) ast (_ _^_en | |||
_C) ast (eX _^_en | |||
ather (_ _^_en // rather | |||
aunc _^_en | |||
a (wC _^_en | |||
@@ -78,14 +63,10 @@ | |||
back _^_en | |||
bbl _^_en | |||
bird _^_en // bird | |||
_) blue (X _^_en // probably | |||
bly (_ _^_en // probably | |||
board _^_en // board, keyboard | |||
box (_ _^_en // box, inbox, outbox | |||
bug (_ _^_en // bug, debug | |||
bug (g _^_en // debugger | |||
buil _^_en | |||
_) buy _^_en | |||
_) by _^_en // by, bye, bypass | |||
@@ -127,15 +108,14 @@ | |||
c k // recoin donc | |||
s) cien (t jA~ // conscient scientifique | |||
cien (t_ i // ils appr�ient, remercient. | |||
i) cien (t_ sjA~ // coefficient | |||
cien (t_ si // ils appr�ient, remercient. | |||
effi) cien (t_ sjA~ // coefficient | |||
défi) cien (t_ sjA~ // déficient | |||
// group c: English section | |||
cast (_ _^_en // broadcast | |||
chme _^_en | |||
cult (_ _^_en // difficult | |||
C) ch (_ _^_en // french | |||
cut (_ _^_en // cut, shortcut | |||
cy (_ _^_en // currency, frequency | |||
.group d | |||
@@ -162,18 +142,9 @@ | |||
d d // don bled | |||
// group d: English section | |||
') d (_ _^_en | |||
day (_ _^_en | |||
_) dec _^_en | |||
dece _^_en | |||
_) def _^_en | |||
dele _^_en | |||
deter _^_en | |||
_) devic _^_en | |||
diffe _^_en | |||
dle (X _^_en // bundle, handle | |||
dly (_ _^_en | |||
dy (_ _^_en | |||
.group e | |||
@@ -218,11 +189,58 @@ | |||
Ci) en (nA E // mienne | |||
éC) en (t_ A~ // récent différent élément | |||
étic) en (t_ A~ // réticent | |||
_C) en (t_ A~ // cent vent lent dent | |||
cc) en (t_ A~ // accent | |||
_jac) en (t_ A~ // sous-jacent | |||
Asc) en (t_ A~ // luminescent | |||
dol) en (t_ A~ // dolent | |||
imCoC) en (t_ A~ // impotent | |||
inCoC) en (t_ A~ // innocent, indolent | |||
XACcid) en (t_ A~ // accident, occident | |||
_ag) en (t_ A~ // agent | |||
_arC) en (t_ A~ // argent, ardent, arpent | |||
_urg) en (t_ A~ // urgent | |||
V_urg) en (t_ // urgent (verbe) | |||
_émin) en (t_ A~ // éminent | |||
immin) en (t_ A~ // imminent | |||
oémin) en (t_ A~ // proéminent | |||
jac) en (t_ A~ // sous-jacent, adjacent | |||
_lat) en (t_ A~ // latent | |||
lig) en (t_ A~ // intelligent | |||
man) en (t_ A~ // permanent | |||
ndig) en (t_ A~ // indigent | |||
_pat) en (t_ A~ // patent | |||
rmam) en (t_ A~ // firmament | |||
xig) en (t_ A~ // exigent | |||
Vxig) en (t_ // exigent | |||
éCerg) en (t_ A~ // détergent | |||
V_éCerg) en (t_ // émergent | |||
verg) en (t_ A~ // convergent, divergent | |||
Vverg) en (t_ // convergent, divergent (verbe) | |||
_serg) en (t_ A~ // sergent | |||
CACCim) en (t_ A~ // condiment, gentiment | |||
dim) en (t_ A~ // rudiment | |||
Agim) en (t_ A~ // régiment | |||
Alim) en (t_ A~ // poliment | |||
inim) en (t_ A~ // infiniment | |||
manim) en (t_ A~ // maniment | |||
onim) en (t_ A~ // boniment | |||
plim) en (t_ A~ // compliment | |||
Atim) en (t_ A~ // bâtiment | |||
_cim) en (t_ A~ // ciment | |||
_pim) en (t_ A~ // piment | |||
erm) en (t_ A~ // ferment, serment | |||
Vferm) en (t_ // ferment, referment (verbe) | |||
Arp) en (t_ A~ // arpent, serpent | |||
CArr) en (t_ A~ // conncurrent, torrent | |||
mitt) en (t_ A~ // intermittent | |||
énit) en (t_ A~ // pénitent | |||
tourm) en (t_ A~ // tourment | |||
ti) en (t_ E~ // retient | |||
ati) en (t_ A~ // patient | |||
@@ -239,18 +257,22 @@ | |||
mom) en (t_ A~ // moment | |||
Aaim) en (t_ A~ | |||
Caim) en (t_ A~ | |||
cum) en (t_ A~ | |||
dum) en (t_ A~ | |||
gum) en (t_ A~ | |||
lum) en (t_ A~ | |||
rum) en (t_ A~ // prudemment | |||
oCum) en (t_ A~ | |||
rum) en (t_ A~ | |||
_cli) en (t A~ // client | |||
éCid) en (t_ A~ | |||
VéCid) en (t_ | |||
Xtrid) en (t_ A~ | |||
_laur) en (t_ A~ | |||
_mécont) en (t_ A~ | |||
_cont) en (t_ A~ | |||
mpét) en (t_ A~ // compétent | |||
prés) en (t_ A~ | |||
_Vcont) en (t_ t2 | |||
éC) en (t_ A~ // compétent | |||
_souv) en (t_ A~ // souvent | |||
Cud) en (t_ A~ | |||
s_couv) en (t_ // elles couvent | |||
qui_couv) en (t_ | |||
@@ -271,7 +293,6 @@ | |||
en (s_ A~ | |||
en (CA A~ // pentathlon | |||
en (CC A~ // entre | |||
en (ch_ En // french | |||
sp) ens (_ Ens // suspens | |||
éC) ens (_ A~z2 // dépens démens | |||
@@ -386,6 +407,8 @@ | |||
tr) e (CrA @ // entreprise | |||
e (CC E // infect pelle mettre | |||
e (CC E // infect pelle mettre | |||
_s) e (cr @ // secret | |||
e (C_ E | |||
es (_ z2 | |||
@@ -403,37 +426,21 @@ | |||
// group e: English section | |||
eac _^_en | |||
C) ead _^_en // read, head | |||
eaf _^_en // leaf | |||
eag _^_en // eagle | |||
eak _^_en // break, speak | |||
Cr) eam _^_en // stream | |||
Cl) ean _^_en // clean | |||
l) ean _^_en // lean | |||
m) ean _^_en // mean | |||
ear _^_en // ear, search | |||
eas _^_en // please | |||
eat _^_en // eat, seat | |||
eave _^_en // leave | |||
ed (_ _^_en | |||
_) edit _^_en | |||
ee _^_en // meeting | |||
eft (_ _^_en // left | |||
ehen _^_en // comprehensive | |||
eing _^_en // being, goeing | |||
eive _^_en // receive | |||
eith _^_en // either neither | |||
ej _^_en | |||
_) el (AC _^_en // element, eliminate | |||
eld (_ _^_en // field | |||
elf (_ _^_en // shelf | |||
elves (_ _^_en // shelves | |||
ember (_ _^_en | |||
enter (_ _^_en // enter, center | |||
ely (_ _^_en // lately | |||
_) enhan _^_en | |||
ooC) er (_ _^_en // scooter | |||
ern (_ _^_en // western | |||
_C) etter (_ _^_en // letter, better | |||
ack) et (_ _^_en // racket, packet | |||
ock) et (_ _^_en // rocket, pocket | |||
qu) est (_ _^_en // request | |||
@@ -454,16 +461,9 @@ | |||
_neu) f (_heures v | |||
// group f: English section | |||
fail (_ _^_en // fail | |||
fail (A _^_en // failure | |||
faith _^_en // failure | |||
_) fire _^_en | |||
fly _^_en | |||
ford (_ _^_en | |||
_) frame _^_en | |||
_) freq _^_en | |||
friend _^_en // friend, friendly | |||
fy (_ _^_en | |||
.group g | |||
@@ -512,7 +512,6 @@ | |||
@@) gate (X _^_en | |||
gh _^_en // high, higher | |||
girl _^_en // girl | |||
gly (_ _^_en | |||
give (X _^_en | |||
gy (_ _^_en | |||
@@ -599,7 +598,6 @@ _) h (umo // humour | |||
// group h: English section | |||
_) half _^_en | |||
_) h (As_ _^_en // has, his | |||
here (_ _^_en // here | |||
@@ -631,30 +629,12 @@ _) h (As_ _^_en // has, his | |||
ique (_ ik | |||
// group i: English section | |||
A_) i (_ _^_en | |||
C_) i (_ _^_en | |||
_) i (_A _^_en | |||
_) i (_C _^_en | |||
L02C) ic (_ _^_en | |||
L02CC) ic (_ _^_en | |||
@C) id (_ _^_en | |||
idd _^_en | |||
iev _^_en | |||
ife (_ _^_en | |||
igg _^_en | |||
ike (X _^_en // bike, like | |||
ild _^_en | |||
ilt _^_en | |||
ing (_ _^_en // parking | |||
ind (_ _^_en // find, mind | |||
_C) ind (er_ _^_en // finder, reminder | |||
_AC) ind (er_ _^_en // finder, reminder | |||
ink _^_en // link, pink | |||
ip (_ _^_en // chip, ship | |||
ious (_ _^_en // various | |||
iously (_ _^_en // previously | |||
ist (_ _^_en // tourist | |||
ism (_ _^_en // tourism | |||
ize _^_en // realize | |||
@@ -667,7 +647,8 @@ _) h (As_ _^_en // has, his | |||
ïn (C E~ // coïncider | |||
ïn (_ E~ | |||
ïs is // maïs, archaïsme | |||
ï (q i // archaïquee | |||
ï (q i // archaïque | |||
ï (c i // laïc | |||
a) ï j // aïeul | |||
ï i // ambiguïté | |||
@@ -675,12 +656,16 @@ _) h (As_ _^_en // has, his | |||
.group j | |||
j Z // adjoint joujoux | |||
// group j: English section | |||
ject (_ _^_en | |||
.group k | |||
k k // kafka | |||
// group k: English section | |||
AC) k _^_en // blank, black, dark | |||
ke (X _^_en // basket, make, take | |||
key _^_en | |||
ky (_ _^_en | |||
@@ -694,15 +679,8 @@ _) h (As_ _^_en // has, his | |||
ui) ll j // juillet | |||
// group l: English section | |||
') ld (_ _^_en | |||
') ll (_ _^_en | |||
less (_ _^_en // noiseless | |||
_) live _^_en | |||
ll (_ _^_en | |||
lly (_ _^_en | |||
lk (_ _^_en | |||
lessly (_ _^_en // endlessly | |||
lord (_ _^_en | |||
.group m | |||
@@ -710,9 +688,7 @@ _) h (As_ _^_en // has, his | |||
mm m // pomme | |||
// group m: English section | |||
') m (_ _^_en | |||
mail (A _^_en // mailer | |||
may _^_en | |||
mov (A _^_en // move, movy | |||
@@ -726,10 +702,6 @@ _) h (As_ _^_en // has, his | |||
A) ng (_ N // parking meeting | |||
nn n // panne | |||
// group n: English section | |||
n't (_ _^_en | |||
nunc _^_en | |||
ny (_ _^_en | |||
.group o | |||
@@ -808,22 +780,14 @@ _) h (As_ _^_en // has, his | |||
oa (X _^_en // approach, load | |||
oa (CeX _^_en | |||
oach _^_en | |||
oes (_ _^_en | |||
old (er_ _^_en // folder, older | |||
C) oing (_ _^_en // going | |||
_aC) ong (_ _^_en // along, among | |||
oo (Ce _^_en // boomer | |||
oo (X _^_en // pool | |||
C) oot _^_en // bootable football | |||
_) one _^_en | |||
_) onl _^_en | |||
orm (_ _^_en | |||
_sC) ot _^_en | |||
othe _^_en // other, mother | |||
C) ou (ld_ _^_en // could, should | |||
oun (C _^_en // bounce, found | |||
_C) ouse _^_en // mouse, house | |||
C) outh _^_en // mouth, south | |||
_) over _^_en // over | |||
ow _^_en // cow, town, down | |||
oy (X _^_en // boy, toy | |||
@@ -859,16 +823,8 @@ C) oing (_ _^_en // going | |||
// group p: English section | |||
pad (_ _^_en | |||
plug _^_en // plug | |||
pmen _^_en | |||
ply (_ _^_en // simply | |||
_) pre (L01 _^_en | |||
press (_ _^_en | |||
print (_ _^_en // print | |||
printer (_ _^_en // printer | |||
_) prove (X _^_en // prove | |||
_AC) prove (X _^_en // improve | |||
_AC) provem _^_en // improvement | |||
py (_ _^_en // copy | |||
.group q | |||
@@ -879,6 +835,7 @@ C) oing (_ _^_en // going | |||
_A) qu (ilat ky // équilatéral | |||
_C) qu (a kw // squale square | |||
_A) qu (a kw // équateur | |||
dA) qu (a kw // adequate | |||
_) qu (artz kw // quartz | |||
qu k // quatre | |||
_) que (_ k@ // que | |||
@@ -901,8 +858,6 @@ mou) rr RR // mourrai | |||
_) real _^_en | |||
rese _^_en | |||
rst (_ _^_en | |||
_) rule (X _^_en // rule | |||
ry (_ _^_en // theory | |||
.group s | |||
@@ -978,20 +933,13 @@ e) s (_h z2 | |||
s (v z | |||
// group s: English section | |||
') s (_ _^_en | |||
sh (_ _^_en | |||
she (C_ _^_en | |||
ship _^_en // friendship | |||
shop _^_en // shop | |||
sh (At _^_en // shut, shot | |||
shout _^_en | |||
sk _^_en | |||
_) smil _^_en // smile, smiley | |||
spy _^_en | |||
A) ss (_ _^_en // boss, cross | |||
_) state _^_en // state, statement | |||
C) s (ton _^_en // winston | |||
stone (_ _^_en | |||
sy (_ _^_en | |||
@@ -1057,8 +1005,6 @@ C) te (_ t@- // patte | |||
_ce) t (_ t | |||
// group t: English section | |||
tch (_ _^_en | |||
_) th (AX _^_en // that, this, then, than | |||
C) th (_ _^_en // month | |||
_) time _^_en | |||
tle (_ _^_en | |||
@@ -1067,10 +1013,7 @@ C) te (_ t@- // patte | |||
try (_ _^_en // try, country | |||
sCar) t (_ _^_en // smart, start | |||
sCar) t (er_ _^_en // starter | |||
tme _^_en | |||
tne _^_en | |||
_) tun (AX _^_en | |||
ty (_ _^_en // party, buty | |||
.group u | |||
@@ -1104,28 +1047,21 @@ C) te (_ t@- // patte | |||
g) u (ë y // ambiguë | |||
// group u: English section | |||
C) u (ch_ _^_en // much such | |||
C) ui (ce _^_en // produice, juice | |||
umber (_ _^_en | |||
ump (_ _^_en | |||
unch _^_en | |||
_C) unct _^_en // function, punctuation | |||
under _^_en // under, understand, thunder | |||
_) up (C _^_en // upper, update | |||
upt (_ _^_en | |||
up (_ _^_en // setup | |||
C) ur (ch _^_en // church | |||
urn (X _^_en // burn, turn | |||
ust (_ _^_en // just, trust | |||
rib) u (teX _^_en // tribute, attribute | |||
.group v | |||
v v | |||
// goup v: English section | |||
') ve (_ _^_en | |||
vail (A _^_en | |||
voice (X _^_en | |||
void (_ _^_en | |||
vy (_ _^_en | |||
@@ -1142,11 +1078,9 @@ C) te (_ t@- // patte | |||
wr _^_en // write | |||
w (ACC _^_en // wash, wish, with | |||
way _^_en // way, away | |||
wh _^_en // what, which, who | |||
_) wi _^_en // wire | |||
win _^_en // winner, window | |||
wise _^_en | |||
_) wom _^_en // woman | |||
wor _^_en // word, world | |||
@@ -1177,9 +1111,6 @@ C) te (_ t@- // patte | |||
si) x (iè z // sixième | |||
deu) x (iè z // deuxième | |||
// group x: English section | |||
xamp _^_en | |||
.group y | |||
y i // cryogénique myope |
@@ -55,7 +55,7 @@ | |||
க (் g | |||
்) க ga | |||
்) க (B g | |||
_) க ka | |||
_) க kV | |||
_) க (B k | |||
க்க k:a | |||
க்க (B k: | |||
@@ -78,7 +78,7 @@ | |||
ட d.a | |||
ட (B d. | |||
_) ட t.a | |||
_) ட t.V | |||
_) ட (B t. | |||
ட்ட t.a | |||
ட்ட (B t. | |||
@@ -88,7 +88,7 @@ | |||
த da | |||
த (B d | |||
_) த ta | |||
_) த tV | |||
_) த (B t | |||
த்த t:a | |||
த்த (B t: | |||
@@ -101,10 +101,12 @@ | |||
ப ba | |||
ப (B b | |||
_) ப pa | |||
_) ப pV | |||
_) ப (B p | |||
ப்ப p:a | |||
ப்ப (B p: | |||
ஃ) ப fa | |||
ஃ) ப (B f | |||
ம ma | |||
ம (B m | |||
@@ -117,7 +119,7 @@ | |||
ற Ra | |||
ற (B R | |||
ற் (ற t // RR -> tR | |||
ற் (ற t. // RR -> t.R | |||
ல la | |||
ல (B l |
@@ -9,8 +9,8 @@ c dZE | |||
d dE | |||
e E | |||
f fE | |||
g g;E | |||
ğ jumuS'ak||g;'E | |||
g JE | |||
ğ jumuS'ak||J'E | |||
h hE | |||
ı u# | |||
i i | |||
@@ -19,7 +19,7 @@ k ka | |||
l lE | |||
m mE | |||
n nE | |||
o O | |||
_o O | |||
ö W | |||
p pE | |||
q kvE | |||
@@ -37,41 +37,79 @@ z zE | |||
_?? sEmb'Ol | |||
_0 su#fu#r | |||
_1 bir | |||
_2 iki | |||
_0 su#fu#R | |||
_1 biR | |||
_2 ici | |||
_3 YtS | |||
_4 dWrt | |||
_4 dWRt | |||
_5 beS | |||
_6 altu# | |||
_7 jedi | |||
_8 sekiz | |||
_8 seciz | |||
_9 dokuz | |||
_10 on | |||
_11 'onbir | |||
_12 'oniki | |||
_11 'onbiR | |||
_12 'onici | |||
_1X on | |||
_2X jirmi | |||
_2X jiRmi | |||
_3X otuz | |||
_4X ku#rk | |||
_4X ku#Rk | |||
_5X elli | |||
_6X altmu#S | |||
_7X jetmiS | |||
_8X seksEn | |||
_9X doksan | |||
_0C jyz | |||
_2C 'ikijyz | |||
_2C 'icijyz | |||
_0M1 bIn | |||
_1M1 bIn | |||
_0M2 miljon | |||
_1M2 miljon | |||
_0M3 miljar | |||
_1M3 miljar | |||
_dpt _virg,Yl_| | |||
_dpt _viRg,Yl_| | |||
// function words | |||
// exceptions | |||
// conjunctions | |||
ve $brk // and | |||
yoksa $brk // or | |||
veya $brk // or | |||
ama $brk $1 // but | |||
ki $brk // that | |||
mı $u // question | |||
mi $u | |||
mu $u | |||
mü $u | |||
// pronouns | |||
bu $u+ // this | |||
şu $u+ // that | |||
ben $u+ // I | |||
sen $u+ // you | |||
o $u+ // it | |||
siz $u+ // we | |||
biz $u+ // you | |||
bir $u+ // a (or one) | |||
da $u // also | |||
de $u | |||
ta $u | |||
te $u | |||
ile $u | |||
// EXCEPTIONS | |||
// Person Names | |||
// Place Names | |||
ankara $1 | |||
istanbul $2 | |||
@@ -2,11 +2,27 @@ | |||
// Turkish translation rules | |||
// This file is UTF-8 encoded | |||
// Stress rule: Right-most vowel, but stop before the vowel which is | |||
// marked as unstressed [%]. | |||
// unvoiced consonants | |||
.L01 ç f h k p q s ş t | |||
.replace | |||
` ' | |||
.group a | |||
a a | |||
avru (pa avr'u | |||
@) a (_S1 a | |||
acak (_S3 adZak | |||
acakmış (_S7 adZakm%u#S | |||
ayım (_S4 aju#m | |||
alım (_S4 alu#m | |||
asın (_S4 asu#n | |||
avru (pa avr'u | |||
.group b | |||
b b | |||
@@ -20,49 +36,149 @@ | |||
.group d | |||
d d | |||
da (_S2 da | |||
de (_S2 de | |||
dan (_S3 dan | |||
den (_S3 den | |||
// declare these suffices so that preceding "ma/me" is unstressed | |||
dim (_S3 dim | |||
dik (_S3 dik | |||
din (_S3 din | |||
diniz (_S5 diniz | |||
di (_S2 di | |||
diler (_S5 dileR | |||
dım (_S3 du#m | |||
dık (_S3 du#k | |||
dın (_S3 du#n | |||
dınız (_S5 du#nu#z | |||
dı (_S2 du# | |||
dılar (_S5 du#laR | |||
dum (_S3 dum | |||
duk (_S3 duk | |||
dun (_S3 dun | |||
dunuz (_S5 dunuz | |||
du (_S2 du | |||
dular (_S5 dular | |||
düm (_S3 dym | |||
dük (_S3 dyk | |||
dün (_S3 dyn | |||
dünüz (_S5 dynyz | |||
dü (_S2 dy | |||
düler (_S5 dyler | |||
dir (_S3 %diR | |||
dır (_S3 %du#R | |||
dur (_S3 %duR | |||
dür (_S3 %dyR | |||
_) d (eğil 'd | |||
.group e | |||
e e | |||
@) e (_S1 e | |||
ecek (_S4 edZek | |||
ecekmiş (_S7 edZekm%iS | |||
eyim (_S4 ejim | |||
elim (_S4 elim | |||
esin (_S4 esin | |||
.group f | |||
f f | |||
.group g | |||
g g | |||
g (i J | |||
g (e J | |||
g (ü J | |||
g (ö J | |||
.group ğ | |||
ğ Q | |||
A) ğ : | |||
A) ğ :|| | |||
i) ğ j | |||
e) ğ j | |||
.group h | |||
h h | |||
.group i | |||
i i | |||
@) i (_S1 i | |||
im (_S2 %im | |||
iniz (_S4 %iniz | |||
iz (_S2 %iz | |||
iyor (_S4 ij%oR | |||
in (_S2 in | |||
.group ı | |||
ı u# | |||
@) ı (_S1 u# | |||
ım (_S2 %u#m | |||
ınız (_S2 %u#nu#z | |||
ız (_S2 %u#z | |||
ıyor (_S4 u#j%oR | |||
ın (_S2 u#n | |||
.group j | |||
j Z | |||
.group k | |||
k k | |||
k (i c | |||
k (e c | |||
k (ü c | |||
k (ö c | |||
ken (_S3 %cen | |||
.group l | |||
l l | |||
la (_S2 %la | |||
le (_S2 %le | |||
lar (_S3 laR | |||
ler (_S3 leR | |||
li (_S2 li | |||
lı (_S2 lu# | |||
lu (_S2 lu | |||
lü (_S2 ly | |||
lik (_S3 lik | |||
lık (_S3 lu#k | |||
luk (_S3 luk | |||
lük (_S3 lyk | |||
.group m | |||
m m | |||
ma (_S2 m%a | |||
me (_S2 m%e | |||
mı (_S2 m%u# | |||
ma (_S2 %ma | |||
me (_S2 %me | |||
.group n | |||
n n | |||
nin (_S3 nin | |||
nın (_S3 nu#n | |||
nun (_S3 nun | |||
nün (_S3 nyn | |||
.group o | |||
o o | |||
@@ -76,23 +192,76 @@ | |||
q k | |||
.group r | |||
r r | |||
r R | |||
A) r (A * | |||
ra (_S2 %Ra | |||
re (_S2 %Re | |||
ra (_N Ra // only an unstressed suffix if another suffix follows | |||
re (_N Re | |||
.group s | |||
s s | |||
sınız (_S5 %su#nu#z | |||
siniz (_S5 %siniz | |||
sunuz (_S5 %sunuz | |||
sünüz (_S5 %synyz | |||
sam (_S3 %sam // if | |||
sak (_S3 %sak | |||
san (_S3 %san | |||
sanız (_S5 %sanu#z | |||
sa (_S2 %sa | |||
sem (_S3 %sem | |||
sek (_S3 %sek | |||
sen (_S3 %sen | |||
seniz (_S5 %seniz | |||
se (_S2 %se | |||
.group ş | |||
ş S | |||
.group t | |||
t t | |||
ta (_S2 ta | |||
te (_S2 te | |||
tan (_S3 tan | |||
ten (_S3 ten | |||
L01) tir (_S3 %tiR | |||
L01) tır (_S3 %tu#R | |||
L01) tur (_S3 %tuR | |||
L01) tür (_S3 %tyR | |||
.group u | |||
u u | |||
@) u (_S1 u | |||
um (_S2 %um | |||
unuz (_S4 %unuz | |||
uz (_S2 %uz | |||
uyor (_S4 uj%oR | |||
un (_S2 un | |||
.group ü | |||
ü y | |||
@) ü (_S1 y | |||
üm (_S2 %ym | |||
ünüz (_S4 %ynyz | |||
üz (_S2 %yz | |||
üyor (_S4 yj%oR | |||
ün (_S2 yn | |||
.group v | |||
v v | |||
@@ -103,11 +272,55 @@ | |||
x ks | |||
K) x z | |||
.group y | |||
y j | |||
ya (_S2 ja | |||
ye (_S2 je | |||
yi (_S2 ji | |||
yı (_S2 ju# | |||
yu (_S2 ju | |||
yü (_S2 jy | |||
yim (_S3 %jim | |||
yım (_S3 %ju#m | |||
yum (_S3 %jum | |||
yüm (_S3 %jym | |||
yiz (_S3 %jiz | |||
yız (_S3 %ju#z | |||
yuz (_S3 %juz | |||
yüz (_S3 %jyz | |||
yiniz (_S5 %jiniz | |||
yınız (_S5 %ju#nu#z | |||
yunuz (_S5 %junuz | |||
yünüz (_S5 %jynyz | |||
yla (_S3 %jla | |||
yle (_S3 %jle | |||
yacak (_S5 jadZak | |||
yecek (_S5 jedZek | |||
yacakmış (_S8 jadZakm%u#S | |||
yecekmiş (_S8 jedZekm%iS | |||
yayım (_S5 jaju#m | |||
yalım (_S5 jalu#m | |||
yasın (_S5 jasu#n | |||
yeyim (_S5 jejim | |||
yelim (_S5 jelim | |||
yesin (_S5 jesin | |||
yken (_S4 jc%en | |||
.group z | |||
z z | |||
.group | |||
$ dolar | |||
' (Pb // split a word at ' and translate the first part separately. | |||
@@ -2,13 +2,13 @@ | |||
new total | |||
base 99 99 | |||
base2 24 118 | |||
en 54 148 | |||
en_n 30 148 | |||
en_us 34 148 | |||
en_sc 39 150 | |||
en_rp 34 148 | |||
en_wm 30 148 | |||
en_wi 30 148 | |||
en 53 147 | |||
en_n 30 147 | |||
en_us 34 147 | |||
en_sc 39 149 | |||
en_rp 34 147 | |||
en_wm 30 147 | |||
en_wi 30 147 | |||
af 38 131 | |||
cy 29 125 | |||
de 33 126 | |||
@@ -18,7 +18,7 @@ | |||
fr 44 125 | |||
fr_ca 11 125 | |||
hi 51 136 | |||
ta 15 138 | |||
ta 16 139 | |||
hu 23 115 | |||
nl 28 122 | |||
pl 15 110 | |||
@@ -46,7 +46,7 @@ | |||
sw 14 108 | |||
th 50 142 | |||
id 14 120 | |||
tr 14 123 | |||
tr 18 123 | |||
ku 13 120 | |||
ja 7 104 | |||
@@ -172,6 +172,7 @@ dzh/dzh_ [dZ] base | |||
[dZ;] ru | |||
dzh/dz_pzd [J] base | |||
[J2] hi | |||
[J] tr | |||
dzh/dz_pzd_ [J] base | |||
[J2] hi | |||
dzh/xdzh [dZ] base | |||
@@ -657,6 +658,8 @@ ustop/ki [c] base | |||
[c] is | |||
[k] zhy | |||
[k] sw | |||
[J] tr | |||
[c] tr | |||
ustop/kl [k] base | |||
[k] base2 | |||
[k] en | |||
@@ -1617,8 +1620,7 @@ vowel/u [u:] en_wi | |||
[u] th | |||
[u:] th | |||
[u] id | |||
vowel/u# [u-] en | |||
[u:] en_sc | |||
vowel/u# [u:] en_sc | |||
[Y] tr | |||
vowel/u_2 [u1] fi | |||
[u] sk | |||
@@ -1628,6 +1630,8 @@ vowel/u_2 [u1] fi | |||
vowel/u#_2 [u-] ta | |||
[u-] sv | |||
vowel/u_3 [yU] ro | |||
vowel/u#_3 [U] ta | |||
[U:] ta | |||
vowel/u_4 [u:] en_n | |||
vowel/u#_4 [U] en_sc | |||
[u:] en_sc | |||
@@ -1664,7 +1668,6 @@ vowel/uu [U] en | |||
vowel/uu# [U] ku | |||
vowel/uu_2 [U] base2 | |||
[U] de | |||
[U] ta | |||
[U] tr | |||
vowel/uu_3 [u] af | |||
[y] zh |
@@ -433,9 +433,3 @@ phoneme W | |||
endphoneme | |||
phoneme u- | |||
vowel starttype (u) endtype (u) | |||
length 200 | |||
formants vowel/u# | |||
endphoneme | |||
@@ -77,8 +77,8 @@ phoneme v# // a shorter [v], a little towards [b] | |||
vowelin f1=1 f2=1000 -300 -200 f3=-300 100 | |||
vowelout f1=0 f2=1000 -500 -300 f3=-300 60 len=10 | |||
lengthmod 6 | |||
formants voc/v2+vocw/v%75 | |||
before _ voc/v_+vocw/v%75 | |||
formants voc/v2+vocw/v%80 | |||
before _ voc/v_+vocw/v%80 | |||
switchvoicing f | |||
endphoneme | |||
@@ -77,17 +77,23 @@ endphoneme | |||
phoneme U | |||
vowel starttype (u) endtype (u) | |||
length 130 | |||
formants vowel/uu_2 | |||
length 150 | |||
formants vowel/u#_3 | |||
endphoneme | |||
phoneme u: | |||
vowel starttype (u) endtype (u) | |||
length 270 | |||
formants vowel/u | |||
endphoneme | |||
phoneme U: | |||
vowel starttype (u) endtype (u) | |||
length 270 | |||
formants vowel/u#_3 | |||
endphoneme | |||
phoneme u- | |||
vowel starttype (u) endtype (u) |
@@ -91,3 +91,36 @@ phoneme O | |||
formants vowel/oo | |||
endphoneme | |||
phoneme : // Lengthen previous vowel by "length" | |||
virtual | |||
length 180 | |||
endphoneme | |||
phoneme c | |||
vls pal stop | |||
vowelin f1=0 f2=2700 200 500 f3=400 80 len=60 | |||
vowelout f1=0 f2=2700 300 500 f3=300 80 | |||
lengthmod 2 | |||
wave ustop/ki | |||
before _ ustop/ki%80 | |||
endphoneme | |||
phoneme J | |||
vcd pla stop | |||
vowelin f1=2 f2=2700 400 600 f3=300 80 | |||
vowelout f1=2 f2=2700 400 600 f3=300 80 colr=1 | |||
formants dzh/dz_pzd+ustop/ki%60 | |||
lengthmod 5 | |||
switchvoicing c | |||
endphoneme | |||
phoneme l | |||
import_phoneme l en | |||
endphoneme | |||
@@ -993,10 +993,10 @@ endphoneme | |||
phoneme q | |||
vls uvl stop | |||
vowelin f1=1 f2=1700 0 200 f3=-300 80 f4 paus rms=35 | |||
vowelout f1=1 f2=1700 0 200 f3=-300 80 f4 rms=30 | |||
vowelin f1=1 f2=1700 0 200 f3=-300 80 paus f4 rms=30 | |||
vowelout f1=1 f2=1700 -100 200 f3=-300 80 f4 rms=35 | |||
lengthmod 2 | |||
wave ustop/q%50 | |||
wave ustop/q%48 | |||
before _ ustop/q%40 | |||
endphoneme | |||
@@ -810,6 +810,9 @@ void copy_rule_string(char *string, int &state) | |||
case 't': | |||
sxflags |= SUFX_T; | |||
break; | |||
case 'b': | |||
sxflags |= SUFX_B; | |||
break; | |||
default: | |||
if(isdigit(c)) | |||
value = (value*10) + (c - '0'); |
@@ -2272,6 +2272,8 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char | |||
{ | |||
c = p_start[ix]; | |||
word_copy[ix++] = c; | |||
if(c == 0) | |||
break; | |||
} | |||
word_copy[ix] = 0; | |||
@@ -3096,7 +3098,7 @@ int Translator::RemoveEnding(char *word, int end_type, char *word_copy) | |||
word_copy[i] = 0; | |||
// look for multibyte characters to increase the number of bytes to remove | |||
for(len_ending = i = (end_type & 0xf); i>0 ;i--) // num.of characters of the suffix | |||
for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix | |||
{ | |||
word_end--; | |||
while((*word_end & 0xc0) == 0x80) |
@@ -466,7 +466,7 @@ int Translator::LookupNum3(int value, char *ph_out, int suppress_null, int thous | |||
Lookup("_0C",ph_100); | |||
if((hundreds >= 10) && ((langopts.numbers & 0x0800) || (hundreds != 19))) | |||
if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19))) | |||
{ | |||
ph_digits[0] = 0; | |||
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.31.14 21.Feb.08"; | |||
const char *version_string = "1.31.17 24.Feb.08"; | |||
const int version_phdata = 0x013105; | |||
int option_device_number = -1; |
@@ -42,7 +42,7 @@ Translator_English::Translator_English() : Translator() | |||
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | |||
langopts.stress_rule = 0; | |||
langopts.numbers = 0x41 + NUM_ROMAN; | |||
langopts.numbers = 0x841 + NUM_ROMAN; | |||
langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word | |||
} | |||
@@ -147,7 +147,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.param[LOPT_PREFIXES] = 1; | |||
memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); | |||
tr->langopts.numbers = 0x11c19 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x11419 + NUM_ROMAN; | |||
SetLetterVowel(tr,'y'); | |||
} | |||
break; | |||
@@ -189,7 +189,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.unstressed_wd2 = 2; | |||
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | |||
tr->langopts.numbers = 0xb09; | |||
tr->langopts.numbers = 0x309; | |||
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | |||
if(name2 == L_grc) | |||
@@ -218,7 +218,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.unstressed_wd1 = 1; | |||
tr->langopts.unstressed_wd2 = 2; | |||
tr->langopts.numbers = 0x1c09 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x1409 + NUM_ROMAN; | |||
} | |||
break; | |||
@@ -258,7 +258,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.param[LOPT_IT_DOUBLING] = 1; | |||
tr->langopts.long_stop = 140; | |||
tr->langopts.numbers = 0x1009; | |||
tr->langopts.numbers = 0x1809; | |||
SetLetterVowel(tr,'y'); | |||
tr->langopts.max_initial_consonants = 2; | |||
tr->langopts.spelling_stress = 1; | |||
@@ -295,7 +295,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
tr->langopts.numbers = 0x811; | |||
tr->langopts.numbers = 0x011; | |||
tr->langopts.numbers2 = 0x100; | |||
tr->letter_bits_offset = OFFSET_DEVANAGARI; | |||
@@ -331,7 +331,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.max_initial_consonants = 5; | |||
tr->langopts.spelling_stress = 1; | |||
tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | |||
tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC; | |||
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | |||
tr->langopts.replace_chars = replace_cyrillic_latin; | |||
@@ -358,7 +358,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.param[LOPT_IT_DOUBLING] = 1; | |||
tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word | |||
tr->langopts.numbers = 0x1809 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x1009 + NUM_ROMAN; | |||
SetLetterVowel(tr,'y'); | |||
tr->langopts.spelling_stress = 1; | |||
SetLengthMods(tr,3); // all equal | |||
@@ -372,7 +372,7 @@ SetLengthMods(tr,3); // all equal | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_id,stress_amps_id); | |||
tr->langopts.stress_rule = 2; | |||
tr->langopts.numbers = 0x1809 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x1009 + NUM_ROMAN; | |||
tr->langopts.stress_flags = 0x6 | 0x10; | |||
} | |||
break; | |||
@@ -395,7 +395,7 @@ SetLengthMods(tr,3); // all equal | |||
SetLetterBits(tr,3,"jvr"); // Letter group H | |||
tr->letter_groups[1] = is_lettergroup_B; | |||
SetLetterVowel(tr,'y'); | |||
tr->langopts.numbers = 0xe9; | |||
tr->langopts.numbers = 0x8e9; | |||
tr->langopts.numbers2 = 0x2; | |||
} | |||
break; | |||
@@ -417,7 +417,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.param[LOPT_IT_DOUBLING] = 2; // double the first consonant if the previous word ends in a stressed vowel | |||
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels | |||
tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list | |||
tr->langopts.numbers = 0x2709 + 0x800 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x2709 + NUM_ROMAN; | |||
} | |||
break; | |||
@@ -451,7 +451,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
tr->langopts.numbers = 0x100c69; | |||
tr->langopts.numbers = 0x100469; | |||
tr->langopts.max_initial_consonants = 2; | |||
} | |||
break; | |||
@@ -483,7 +483,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->letter_groups[0] = vowels_cyrillic; | |||
tr->langopts.stress_rule = 4; // antipenultimate | |||
tr->langopts.numbers = 0x0c29 + 0x4000; | |||
tr->langopts.numbers = 0x0429 + 0x4000; | |||
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | |||
} | |||
break; | |||
@@ -500,7 +500,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.param[LOPT_PREFIXES] = 1; | |||
SetLetterVowel(tr,'y'); | |||
tr->langopts.numbers = 0x11419; | |||
tr->langopts.numbers = 0x11c19; | |||
memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); | |||
} | |||
break; | |||
@@ -514,7 +514,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 0; | |||
SetLetterVowel(tr,'y'); | |||
tr->langopts.numbers = 0x11049; | |||
tr->langopts.numbers = 0x11849; | |||
} | |||
break; | |||
@@ -531,7 +531,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished | |||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; | |||
tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) | |||
tr->langopts.numbers=0x1809 + 0x4000; | |||
tr->langopts.numbers=0x1009 + 0x4000; | |||
tr->langopts.numbers2=0x40; | |||
tr->langopts.param[LOPT_COMBINE_WORDS] = 2 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable words (marked with $alt) | |||
SetLetterVowel(tr,'y'); | |||
@@ -549,7 +549,7 @@ SetLengthMods(tr,3); // all equal | |||
// tr->langopts.vowel_pause = 1; | |||
tr->langopts.stress_rule = 3; // stress on final syllable | |||
tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; | |||
tr->langopts.numbers = 0xa69 + 0x2000 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x269 + 0x2000 + NUM_ROMAN; | |||
SetLetterVowel(tr,'y'); | |||
} | |||
break; | |||
@@ -566,7 +566,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_flags = 0x100 + 0x6; | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN; | |||
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | |||
} | |||
break; | |||
@@ -593,7 +593,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.spelling_stress = 1; | |||
tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word | |||
tr->langopts.numbers = 0x0c01 + 0x4000 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN; | |||
tr->langopts.numbers2 = 0x40; | |||
tr->langopts.thousands_sep = 0; //no thousands separator | |||
tr->langopts.decimal_sep = ','; | |||
@@ -620,7 +620,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 0; | |||
SetLetterVowel(tr,'y'); | |||
// SetLetterBits(tr,6,"eiyäö"); // soft vowels NOTE accented letters don't work in SetLetterBits | |||
tr->langopts.numbers = 0x1109; | |||
tr->langopts.numbers = 0x1909; | |||
} | |||
break; | |||
@@ -637,7 +637,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 2; | |||
tr->langopts.stress_flags = 0x6 | 0x10; | |||
tr->langopts.numbers = 0x8e1; | |||
tr->langopts.numbers = 0x4e1; | |||
tr->langopts.numbers2 = 0x100; | |||
} | |||
break; | |||
@@ -684,16 +684,17 @@ SetLengthMods(tr,3); // all equal | |||
case L('t','r'): // Turkish | |||
{ | |||
static const unsigned char stress_amps_tr[8] = {16,16, 20,20, 20,24, 24,22 }; | |||
static const short stress_lengths_tr[8] = {170,170, 190,170, 0,0, 250,270}; | |||
static const unsigned char stress_amps_tr[8] = {18,18, 20,20, 20,24, 24,22 }; | |||
static const short stress_lengths_tr[8] = {190,190, 190,190, 0,0, 250,270}; | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_tr,stress_amps_tr); | |||
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5 | |||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
tr->langopts.stress_flags = 0x20; //no automatic secondary stress | |||
tr->langopts.numbers = 0x1d09 + 0x4000; | |||
tr->langopts.numbers = 0x1509 + 0x4000; | |||
tr->langopts.max_initial_consonants = 2; | |||
} | |||
break; | |||
@@ -1044,7 +1045,7 @@ Translator_Afrikaans::Translator_Afrikaans() : Translator() | |||
langopts.param[LOPT_PREFIXES] = 1; | |||
SetLetterVowel(this,'y'); // add 'y' to vowels | |||
langopts.numbers = 0x0d1 + NUM_ROMAN; | |||
langopts.numbers = 0x8d1 + NUM_ROMAN; | |||
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | |||
} | |||
@@ -613,6 +613,8 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
int word_length; | |||
int ix; | |||
int posn; | |||
int pfix; | |||
int n_chars; | |||
unsigned int dictionary_flags[2]; | |||
unsigned int dictionary_flags2[2]; | |||
int end_type=0; | |||
@@ -624,6 +626,7 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
char prefix_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes[N_WORD_PHONEMES]; | |||
char word_copy[N_WORD_BYTES]; | |||
char prefix_chars[N_WORD_BYTES]; | |||
int found=0; | |||
int end_flags; | |||
char c_temp; // save a character byte while we temporarily replace it with space | |||
@@ -824,7 +827,7 @@ if((wmark > 0) && (wmark < 8)) | |||
{ | |||
// Found a standard prefix, remove it and retranslate | |||
if(confirm_prefix) | |||
if(confirm_prefix && !(end_type & SUFX_B)) | |||
{ | |||
int end2; | |||
char phonemes2[N_WORD_PHONEMES]; | |||
@@ -856,8 +859,6 @@ if((wmark > 0) && (wmark < 8)) | |||
} | |||
} | |||
strcat(prefix_phonemes,end_phonemes); | |||
end_phonemes[0] = 0; | |||
prefix_type = end_type; | |||
if(prefix_type & SUFX_V) | |||
@@ -866,15 +867,44 @@ if((wmark > 0) && (wmark < 8)) | |||
} | |||
wordx[-1] = c_temp; | |||
for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | |||
pfix = 1; | |||
prefix_chars[0] = 0; | |||
n_chars = prefix_type & 0x3f; | |||
for(ix=0; ix < n_chars; ix++) // num. of characters to remove | |||
{ | |||
wordx++; | |||
while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||
prefix_chars[pfix++] = *wordx++; | |||
if((prefix_type & SUFX_B) && (ix == (n_chars-1))) | |||
{ | |||
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character | |||
} | |||
while((*wordx & 0xc0) == 0x80) | |||
{ | |||
prefix_chars[pfix++] = *wordx++; // for multibyte characters | |||
} | |||
} | |||
prefix_chars[pfix] = 0; | |||
c_temp = wordx[-1]; | |||
wordx[-1] = ' '; | |||
confirm_prefix = 1; | |||
if(prefix_type & SUFX_B) | |||
{ | |||
// retranslate the prefix part | |||
char *wordpf; | |||
wordpf = &prefix_chars[1]; | |||
found = LookupDictList(&wordpf, phonemes, dictionary_flags, SUFX_P, wtab); // without prefix | |||
if(found == 0) | |||
{ | |||
end_type = TranslateRules(wordpf, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags[0]); | |||
strcat(prefix_phonemes, phonemes); | |||
} | |||
} | |||
strcat(prefix_phonemes,end_phonemes); | |||
end_phonemes[0] = 0; | |||
end_type = 0; | |||
found = LookupDictList(&wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix | |||
if(dictionary_flags[0]==0) |
@@ -106,6 +106,7 @@ | |||
#define SUFX_F 0x2000 // verb follows | |||
#define SUFX_Q 0x4000 // don't retranslate | |||
#define SUFX_T 0x10000 // don't affect the stress position in the stem | |||
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P) | |||
#define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes | |||
#define FLAG_SUFX 0x04 | |||
@@ -321,7 +322,7 @@ typedef struct { | |||
// bit8=only one primary stress in tens+units | |||
// bit9=only one vowel betwen tens and units | |||
// bit10=omit "one" before "hundred" | |||
// bit11=don't say 19** as nineteen hundred | |||
// bit11=say 19** as nineteen hundred | |||
// bit12=allow space as thousands separator (in addition to langopts.thousands_sep) | |||
// bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro) | |||
// bit16=dot after number indicates ordinal |