Browse Source

[1.31.17]

*_rules: New option, (Pb to specify a character such as apostrophe which splits a word into two parts (used for lang-tr).
 

git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@150 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 17 years ago
parent
commit
bf8ee6362e

+ 8
- 9
dictsource/dict_phonemes View File



Dictionary es_dict Dictionary es_dict


@- a aI e E eI eU i
o O oI u
@- a aI aU e E eI eU
i o O oI u


* ** : b B d D f * ** : b B d D f
g j J J^ k l l^ m
n n^ p Q r R s t
T tS v v# w x z
g j J J^ k l m n
n^ p Q r R s t T
tS v v# w x z




Dictionary fi_dict Dictionary fi_dict
a e E i I o O u a e E i I o O u
u# W y Y u# W y Y


: ; b d dZ f g h
j k l m n p Q r
R s S t tS v w z
Z
b c d dZ f g h j
J k l m n p r R
s S t tS v z Z




Dictionary ku_dict Dictionary ku_dict

+ 37
- 33
dictsource/es_list View File



// names of symbols // names of symbols
_. punto _. punto
_, kOma
_; p,untOik'Oma
_, koma
_; p,untoik'oma
_: d,Osp'untOs _: d,Osp'untOs
_! TE*R'aRaDmi**aTj'On _! TE*R'aRaDmi**aTj'On
_? TE*R'aRintE*R,OQaTj'On
_? TE*R'aRintE*R,oQaTj'On
_¡ aB@-*'iRaDmi**aTj'On _¡ aB@-*'iRaDmi**aTj'On
_¿ aB@-*'iRintE*R,OQaTj'On
_¿ aB@-*'iRintE*R,oQaTj'On
_= iQw'al _= iQw'al
_< mEn'ORke
_< men'ORke
_> maJ^'ORke _> maJ^'ORke
_' apOst@-*'Ofo
_" kOm'iJ^as
_' apOst@-*'ofo
_" kom'iJ^as
_+ s'iQnOdesum'aR _+ s'iQnOdesum'aR
_$ dOlar
_# almOaD'il^a
_* astE*'isko
_$ dolar
_# almoaD'iJ^a
_* aste*'isko
_- gJ^On _- gJ^On
__ suB@-*aJ^'aDo __ suB@-*aJ^'aDo
_/ ba*Ra _/ ba*Ra
_\ b'a*RaimbERt'iDa _\ b'a*RaimbERt'iDa
_` aT'EntOg@-*'aBe _` aT'EntOg@-*'aBe
_( ,aB@-*epa**'EntEsis
_) Tj,E*Rapa**'EntEsis
_[ ,aB@-*ekORtS'Ete
_] Tj,E*RakORtS'Ete
_( ,aB@-*epa**'Entesis
_) Tj,E*Rapa**'Entesis
_[ ,aB@-*ekORtS'ete
_] Tj,E*RakORtS'ete
_{ ,aB@-*eJ^'aBe _{ ,aB@-*eJ^'aBe
_} Tj,E*RaJ^'aBe _} Tj,E*RaJ^'aBe
_« kOm'iJ^as||iTkJ^'ERDas
_» kOm'iJ^as||dE**'EtSas
_« kom'iJ^as||iTkJ^'ERDas
_» kom'iJ^as||dE**'EtSas


^ TiRkumfl'Exo
^ TiRkumfl'exo
€ eU*o € eU*o
% pOrTj'Ento % pOrTj'Ento
& ampERs'ant & ampERs'ant
@ a*R'OBa
@ a*R'oBa
/ ba*Ra / ba*Ra
© kOpi*R'aIt
© kopi*R'aIt
£ liB@-*as £ liB@-*as
¶ p'a*Rafo ¶ p'a*Rafo
§ sEkTj'On § sEkTj'On




// numbers // numbers
_0 TE*o
_0 Te*o
_1 'uno _1 'uno
_2 d'Os _2 d'Os
_3 t@-*'es _3 t@-*'es
_4 kw'at@-*o _4 kw'at@-*o
_5 T'inko _5 T'inko
_6 s'eIs _6 s'eIs
_7 sj'Ete
_7 sj'ete
_8 'OtSo _8 'OtSo
_9 nw'Eve
_9 nw'eve
_1X dj'ET _1X dj'ET
_11 'OnTe _11 'OnTe
_12 d'OTe
_13 t@-*'ETe
_12 d'oTe
_13 t@-*'eTe
_14 kat'ORTe _14 kat'ORTe
_15 k'inTe _15 k'inTe
_20 v'eInte _20 v'eInte
_7X sEt'Enta _7X sEt'Enta
_8X OtS'Enta _8X OtS'Enta
_9X nOv'Enta _9X nOv'Enta
_0C T'ientOs
_0C Tj'EntOs
_1C0 T'ien // exactly one hundred _1C0 T'ien // exactly one hundred
_1C T'iento _1C T'iento
_5C kinj'EntOs _5C kinj'EntOs
_7C s,EtETj'EntOs
_9C n,OvETj'EntOs
_7C s,eteTj'EntOs
_9C n,OveTj'EntOs
_0M1 m'il _0M1 m'il
_1M1 m'il // no '1' before thousand _1M1 m'il // no '1' before thousand
_0M2 mil^'Ones
_1M2 'unmil^'On
_0M4 _bil^'Ones
_1M4 'unbil^'On
_0M2 miJ^'ones
_1M2 'unmiJ^'On
_0M4 _biJ^'onEs
_1M4 'unbiJ^'On
?2 _0M2 mij:'ones
?2 _1M2 'unmij:'On
?2 _0M4 _bij:'onEs
?2 _1M4 'unbij:'On
_0and i _0and i
_dpt kOma
//_roman ROm'ano
_dpt koma
//_roman Rom'ano


vi vi // not a Roman number vi vi // not a Roman number



+ 24
- 7
dictsource/es_rules View File

// ?1 Castilian // ?1 Castilian
// ?2 Latin America // ?2 Latin America


.L01 j w l r d g n m


.group a .group a
_) a (_ a _) a (_ a
ai aI ai aI
ay (K aI ay (K aI
ay (_ 'aI ay (_ 'aI
au aU
au (_ 'aU




.group b .group b
_) b b _) b b
m) b b m) b b
n) b b n) b b

b (L01 b
b (iA b
b (uA b


.group c .group c
_) c (_ Te _) c (_ Te


.group e .group e
_) e (_ e _) e (_ e
e E
e (_ e
e e
e (CK E
ei eI ei eI
ey (K eI ey (K eI
ey (_ 'eI ey (_ 'eI
_) eu eU
eu eU
eu (_ 'eU




.group f .group f
_) l (_ Ele _) l (_ Ele
l l l l
ll J^ ll J^
?2 A) ll (A j:




.group m .group m


.group o .group o
_) o (_ o _) o (_ o
o O
o (_ o
o o
o (CK O
oi oI
oy (K oI oy (K oI
oy (_ 'oI oy (_ 'oI


_) u (_ u _) u (_ u
u u u u
u (A w u (A w
u (y_ w
l) u (A %u l) u (A %u
r) u (A %u r) u (A %u


_) v v# _) v v#
?1 m) v b ?1 m) v b
?1 n) v b ?1 n) v b
?1 v (L01 b
?1 v (iA b
?1 v (uA b
?2 v v# ?2 v v#
?2 _) v v ?2 _) v v
?2 v (L01 v
?2 v (j v
?2 v (iA v
?2 v (uA v




.group w .group w
n) y J n) y J
_l) y J _l) y J
y (A J^ y (A J^
?2 A) y (A j:




.group z .group z
_) ++ (_ masm'as _) ++ (_ masm'as
\+\+\+) + // ignore + after the first 3 \+\+\+) + // ignore + after the first 3


# almOaD'il^a
# almOaD'iJ^a
\#) # \#) #


__) - (_D m'EnOs __) - (_D m'EnOs

+ 10
- 73
dictsource/fr_list View File



// 2006-11-18 Gilles Casse <[email protected]> // 2006-11-18 Gilles Casse <[email protected]>
// //
// Updated 2008-02-20 Michel Such <[email protected]>
// Updated 2008-02-24 Michel Such <[email protected]>
// //
// * Numbers, a few abbreviations and exceptions. // * Numbers, a few abbreviations and exceptions.
// //
f Ef f Ef
g Ze g Ze
h aS h aS
// i i
i i
ï i:tRema ï i:tRema
j Zi j Zi
k ka k ka
vous $u+ $verbf vous $u+ $verbf
elles $u+ $verbf elles $u+ $verbf
ils $u+ $verbf ils $u+ $verbf
on $u $verbf
on O~n2 $u $verbf
me $u $verbf me $u $verbf
te $u $verbf te $u $verbf
se $u $verbf se $u $verbf
lui $u $verbf lui $u $verbf
ça $u $verbf


ce $u $nounf ce $u $nounf
cette $u $nounf cette $u $nounf
// Letters which can be words // Letters which can be words
//=========================== //===========================
à a:aksA~gRav $atend à a:aksA~gRav $atend
i i $atend
y i:gR'Ek $atend y i:gR'Ek $atend






// pronunciation exceptions // pronunciation exceptions


aspic aspik
consent kO~s'A~t2 consent kO~s'A~t2
scient si scient si
bénéficient benefisi $verb
coing kwE~ coing kwE~
concurrent kO~kyR'A~
content kO~t@-t2 $verb
convent kO~vA~
couvent k'uvt2 $verb couvent k'uvt2 $verb
(couvent couvent) kuvA~||k'uvt2 (couvent couvent) kuvA~||k'uvt2
dessus d@sy dessus d@sy
dessous d@su dessous d@su
divers divErz2 divers divErz2
►évident evid'A~
évident evidt2 $verb
ferment fErm'A~
ferment f'Ermt2 $verb
firent f'irt2
parent paR'A~ parent paR'A~
parent p'art2 $verb parent p'art2 $verb
pastis pastis pastis pastis
poing pwE~ poing pwE~
président pRezid'A~
président pRezidt2 $verb
résident Rezid'A~
résident Rezidt2 $verb
ressent r@s'A~t2 ressent r@s'A~t2
récurrent RekyR'A~
riz ri riz ri
sergent sErZ'A~
serment sErm'A~
serpent sErp'A~
torrent tOR'A~
sphincter sfE~ktEr
tunis tynis tunis tynis




(cent une) s'A~||yn (cent une) s'A~||yn
(cent onzième) s'A~||O~zj'Em (cent onzième) s'A~||O~zj'Em
(cent onze) s'A~||O~z (cent onze) s'A~||O~z
(on habille) O~||nab'ij
absent absA~ absent absA~
accident aksidA~
adéquat adekuat2
adéquate adekuat
airbus Erbys airbus Erbys
anus anys anus anys
ardent aRdA~
auvent ovA~
bissus bisys bissus bisys
bonus bonys bonus bonys
bus bys bus bys
casus kazys casus kazys
choeur k@r choeur k@r
cocus coky cocus coky
discident disidA~
estomac Estoma estomac Estoma
fils fis fils fis
imprudent E~pRydA~
iris iris iris iris
juin ZyE~ juin ZyE~
laser lazEr laser lazEr
malus malys malus malys
mars maRs mars maRs
minus minys minus minys
occident OksidA~
orient ORjA~ orient ORjA~
paravent paravA~
pays pEi pays pEi
phallus falys phallus falys
polder pOldEr polder pOldEr
prudent pRydA~
secret sYkRE
souris suri souris suri
sus sys sus sys
sus sy $verb sus sy $verb
to tu
vénus venys vénus venys




// words from other languages // words from other languages
about _^_en about _^_en
acer asEr acer asEr
alone _^_en
also _^_en
and _^_en and _^_en
amazon amaz'On amazon amaz'On
apple _^_en apple _^_en
april _^_en
at _^_en at _^_en
(audible manager) odibl||manadZ@r (audible manager) odibl||manadZ@r
be _^_en
bit _^_en bit _^_en
bluetooth blut'us bluetooth blut'us
both _^_en
can _^_en
computer _^_en computer _^_en
(cyber link) _^_en (cyber link) _^_en
debian dEbjAn debian dEbjAn
don't _^_en
driver _^_en driver _^_en
emacs Emaks emacs Emaks
emacspeak Emakspi:k emacspeak Emakspi:k
epson EpsOn epson EpsOn
espeak @spi:k espeak @spi:k
ethernet etERnEt ethernet etERnEt
ever _^_en
exit egzit exit egzit
eye _^_en eye _^_en
eyes _^_en eyes _^_en
false _^_en
(file zilla) fajl||zija (file zilla) fajl||zija
for _^_en
get _^_en get _^_en
google gu:g@l google gu:g@l
gnome gnom gnome gnom
help _^_en help _^_en
hot _^_en hot _^_en
in _^_en in _^_en
into _^_en
inside _^_en inside _^_en
insight _^_en
internet E~tERnEt internet E~tERnEt
(internet explorer) E~tErn'Et||EksplOr'@r (internet explorer) E~tErn'Et||EksplOr'@r
is _^_en
it _^_en
its _^_en
june _^_en
july _^_en
klaxon klaksOn klaxon klaksOn
later _^_en
latest _^_en
layer _^_en layer _^_en
let _^_en let _^_en
liszt list liszt list
logon _^_en logon _^_en
(mac os x) makoEsiks (mac os x) makoEsiks
made _^_en made _^_en
mail _^_en
march _^_en
media medja media medja
messenger _^_en messenger _^_en
my _^_en my _^_en
name _^_en
never _^_en
not _^_en not _^_en
null _^_en null _^_en
october _^_en
of _^_en
often _^_en
ok oke ok oke
open Op'@n open Op'@n
outlook autluk outlook autluk
outside _^_en
outsider _^_en
paint _^_en paint _^_en
paper _^_en
player _^_en player _^_en
redhat REdat redhat REdat
same _^_en same _^_en
sametime _^_en sametime _^_en
she _^_en
schubert SubER schubert SubER
since _^_en
sun _^_en sun _^_en
sure _^_en
(text aloud) tEkst||@lawd (text aloud) tEkst||@lawd
their _^_en
there _^_en
these _^_en
those _^_en
they _^_en
thus _^_en
true _^_en
ubuntu ubuntu ubuntu ubuntu
up _^_en up _^_en
viking vikiN
was _^_en
won't _^_en
yacht jot yacht jot
yes _^_en yes _^_en



+ 72
- 141
dictsource/fr_rules View File



// 2006-11-18 Gilles Casse <[email protected]> // 2006-11-18 Gilles Casse <[email protected]>
// //
// Updated: 2008-02-20 Michel Such <[email protected]>
// Updated: 2008-02-24 Michel Such <[email protected]>
// //
// * The rules are based on Cicero TTS. // * The rules are based on Cicero TTS.
// Y // Y
// Y front vowels: e i y é ê è î // Y front vowels: e i y é ê è î
// K not a vowel (i.e. consonant, space, ) // K not a vowel (i.e. consonant, space, )


.L01 a b c d e f g h i l p q r t v
.L02 a e i o u y



.group a .group a
ae (_ e // reggae vitae ae (_ e // reggae vitae
a a // bateau a a // bateau


// group a: English section // group a: English section
_C) ad (_ _^_en // bad, had, sad
_) again (X _^_en // again, against
alk _^_en // talk, walk
f) all (en_ _^_en // fallen
sm) all _^_en // small, smaller sm) all _^_en // small, smaller
ank _^_en // tank, blank
ark _^_en // dark, park
ainl _^_en
ainm _^_en
_C) ast (_ _^_en
_C) ast (eX _^_en
ather (_ _^_en // rather
aunc _^_en
a (wC _^_en a (wC _^_en




back _^_en back _^_en
bbl _^_en bbl _^_en
bird _^_en // bird bird _^_en // bird
_) blue (X _^_en // probably
bly (_ _^_en // probably
board _^_en // board, keyboard board _^_en // board, keyboard
box (_ _^_en // box, inbox, outbox box (_ _^_en // box, inbox, outbox
bug (_ _^_en // bug, debug bug (_ _^_en // bug, debug
bug (g _^_en // debugger bug (g _^_en // debugger
buil _^_en
_) buy _^_en
_) by _^_en // by, bye, bypass _) by _^_en // by, bye, bypass




c k // recoin donc c k // recoin donc


s) cien (t jA~ // conscient scientifique s) cien (t jA~ // conscient scientifique
cien (t_ i // ils appr�ient, remercient.
i) cien (t_ sjA~ // coefficient
cien (t_ si // ils appr�ient, remercient.
effi) cien (t_ sjA~ // coefficient
défi) cien (t_ sjA~ // déficient


// group c: English section // group c: English section
cast (_ _^_en // broadcast cast (_ _^_en // broadcast
chme _^_en
cult (_ _^_en // difficult
C) ch (_ _^_en // french
cut (_ _^_en // cut, shortcut cut (_ _^_en // cut, shortcut
cy (_ _^_en // currency, frequency




.group d .group d
d d // don bled d d // don bled


// group d: English section // group d: English section
') d (_ _^_en
day (_ _^_en day (_ _^_en
_) dec _^_en
dece _^_en
_) def _^_en
dele _^_en
deter _^_en
_) devic _^_en _) devic _^_en
diffe _^_en
dle (X _^_en // bundle, handle dle (X _^_en // bundle, handle
dly (_ _^_en
dy (_ _^_en




.group e .group e
Ci) en (nA E // mienne Ci) en (nA E // mienne


éC) en (t_ A~ // récent différent élément éC) en (t_ A~ // récent différent élément
étic) en (t_ A~ // réticent


_C) en (t_ A~ // cent vent lent dent _C) en (t_ A~ // cent vent lent dent
cc) en (t_ A~ // accent cc) en (t_ A~ // accent

_jac) en (t_ A~ // sous-jacent
Asc) en (t_ A~ // luminescent
dol) en (t_ A~ // dolent
imCoC) en (t_ A~ // impotent
inCoC) en (t_ A~ // innocent, indolent

XACcid) en (t_ A~ // accident, occident

_ag) en (t_ A~ // agent
_arC) en (t_ A~ // argent, ardent, arpent
_urg) en (t_ A~ // urgent
V_urg) en (t_ // urgent (verbe)

_émin) en (t_ A~ // éminent
immin) en (t_ A~ // imminent
oémin) en (t_ A~ // proéminent
jac) en (t_ A~ // sous-jacent, adjacent
_lat) en (t_ A~ // latent
lig) en (t_ A~ // intelligent
man) en (t_ A~ // permanent
ndig) en (t_ A~ // indigent
_pat) en (t_ A~ // patent
rmam) en (t_ A~ // firmament
xig) en (t_ A~ // exigent
Vxig) en (t_ // exigent
éCerg) en (t_ A~ // détergent
V_éCerg) en (t_ // émergent
verg) en (t_ A~ // convergent, divergent
Vverg) en (t_ // convergent, divergent (verbe)

_serg) en (t_ A~ // sergent
CACCim) en (t_ A~ // condiment, gentiment
dim) en (t_ A~ // rudiment
Agim) en (t_ A~ // régiment
Alim) en (t_ A~ // poliment
inim) en (t_ A~ // infiniment
manim) en (t_ A~ // maniment
onim) en (t_ A~ // boniment
plim) en (t_ A~ // compliment
Atim) en (t_ A~ // bâtiment
_cim) en (t_ A~ // ciment
_pim) en (t_ A~ // piment
erm) en (t_ A~ // ferment, serment
Vferm) en (t_ // ferment, referment (verbe)
Arp) en (t_ A~ // arpent, serpent
CArr) en (t_ A~ // conncurrent, torrent
mitt) en (t_ A~ // intermittent
énit) en (t_ A~ // pénitent
tourm) en (t_ A~ // tourment


ti) en (t_ E~ // retient ti) en (t_ E~ // retient
ati) en (t_ A~ // patient ati) en (t_ A~ // patient
mom) en (t_ A~ // moment mom) en (t_ A~ // moment
Aaim) en (t_ A~ Aaim) en (t_ A~
Caim) en (t_ A~ Caim) en (t_ A~
cum) en (t_ A~
dum) en (t_ A~
gum) en (t_ A~ gum) en (t_ A~
lum) en (t_ A~
rum) en (t_ A~ // prudemment
oCum) en (t_ A~
rum) en (t_ A~


_cli) en (t A~ // client _cli) en (t A~ // client
éCid) en (t_ A~
VéCid) en (t_
Xtrid) en (t_ A~
_laur) en (t_ A~ _laur) en (t_ A~
_mécont) en (t_ A~ _mécont) en (t_ A~
_cont) en (t_ A~ _cont) en (t_ A~
mpét) en (t_ A~ // compétent
prés) en (t_ A~
_Vcont) en (t_ t2
éC) en (t_ A~ // compétent
_souv) en (t_ A~ // souvent _souv) en (t_ A~ // souvent
Cud) en (t_ A~


s_couv) en (t_ // elles couvent s_couv) en (t_ // elles couvent
qui_couv) en (t_ qui_couv) en (t_
en (s_ A~ en (s_ A~
en (CA A~ // pentathlon en (CA A~ // pentathlon
en (CC A~ // entre en (CC A~ // entre
en (ch_ En // french
sp) ens (_ Ens // suspens sp) ens (_ Ens // suspens
éC) ens (_ A~z2 // dépens démens éC) ens (_ A~z2 // dépens démens


tr) e (CrA @ // entreprise tr) e (CrA @ // entreprise


e (CC E // infect pelle mettre e (CC E // infect pelle mettre
e (CC E // infect pelle mettre
_s) e (cr @ // secret
e (C_ E e (C_ E
es (_ z2 es (_ z2


// group e: English section // group e: English section
eac _^_en eac _^_en
C) ead _^_en // read, head C) ead _^_en // read, head
eaf _^_en // leaf
eag _^_en // eagle
eak _^_en // break, speak eak _^_en // break, speak
Cr) eam _^_en // stream Cr) eam _^_en // stream
Cl) ean _^_en // clean Cl) ean _^_en // clean
l) ean _^_en // lean
m) ean _^_en // mean
ear _^_en // ear, search ear _^_en // ear, search
eas _^_en // please eas _^_en // please
eat _^_en // eat, seat
eave _^_en // leave eave _^_en // leave
ed (_ _^_en ed (_ _^_en
_) edit _^_en _) edit _^_en
ee _^_en // meeting ee _^_en // meeting
eft (_ _^_en // left
ehen _^_en // comprehensive
eing _^_en // being, goeing eing _^_en // being, goeing
eive _^_en // receive eive _^_en // receive
eith _^_en // either neither
ej _^_en ej _^_en
_) el (AC _^_en // element, eliminate
eld (_ _^_en // field eld (_ _^_en // field
elf (_ _^_en // shelf
elves (_ _^_en // shelves
ember (_ _^_en
enter (_ _^_en // enter, center enter (_ _^_en // enter, center
ely (_ _^_en // lately
_) enhan _^_en _) enhan _^_en
ooC) er (_ _^_en // scooter
ern (_ _^_en // western
_C) etter (_ _^_en // letter, better
ack) et (_ _^_en // racket, packet ack) et (_ _^_en // racket, packet
ock) et (_ _^_en // rocket, pocket ock) et (_ _^_en // rocket, pocket
qu) est (_ _^_en // request qu) est (_ _^_en // request
_neu) f (_heures v _neu) f (_heures v


// group f: English section // group f: English section
fail (_ _^_en // fail
fail (A _^_en // failure
faith _^_en // failure
_) fire _^_en _) fire _^_en
fly _^_en fly _^_en
ford (_ _^_en
_) frame _^_en _) frame _^_en
_) freq _^_en
friend _^_en // friend, friendly
fy (_ _^_en




.group g .group g
@@) gate (X _^_en @@) gate (X _^_en
gh _^_en // high, higher gh _^_en // high, higher
girl _^_en // girl girl _^_en // girl
gly (_ _^_en
give (X _^_en give (X _^_en
gy (_ _^_en gy (_ _^_en




// group h: English section // group h: English section
_) half _^_en _) half _^_en
_) h (As_ _^_en // has, his
here (_ _^_en // here here (_ _^_en // here




ique (_ ik ique (_ ik


// group i: English section // group i: English section
A_) i (_ _^_en
C_) i (_ _^_en
_) i (_A _^_en
_) i (_C _^_en
L02C) ic (_ _^_en
L02CC) ic (_ _^_en
@C) id (_ _^_en
idd _^_en
iev _^_en iev _^_en
ife (_ _^_en ife (_ _^_en
igg _^_en
ike (X _^_en // bike, like
ild _^_en
ilt _^_en
ing (_ _^_en // parking ing (_ _^_en // parking
ind (_ _^_en // find, mind ind (_ _^_en // find, mind
_C) ind (er_ _^_en // finder, reminder _C) ind (er_ _^_en // finder, reminder
_AC) ind (er_ _^_en // finder, reminder _AC) ind (er_ _^_en // finder, reminder
ink _^_en // link, pink
ip (_ _^_en // chip, ship
ious (_ _^_en // various
iously (_ _^_en // previously
ist (_ _^_en // tourist
ism (_ _^_en // tourism
ize _^_en // realize ize _^_en // realize




ïn (C E~ // coïncider ïn (C E~ // coïncider
ïn (_ E~ ïn (_ E~
ïs is // maïs, archaïsme ïs is // maïs, archaïsme
ï (q i // archaïquee
ï (q i // archaïque
ï (c i // laïc
a) ï j // aïeul a) ï j // aïeul
ï i // ambiguïté ï i // ambiguïté


.group j .group j
j Z // adjoint joujoux j Z // adjoint joujoux


// group j: English section
ject (_ _^_en



.group k .group k
k k // kafka k k // kafka




// group k: English section // group k: English section
AC) k _^_en // blank, black, dark
ke (X _^_en // basket, make, take ke (X _^_en // basket, make, take
key _^_en key _^_en
ky (_ _^_en ky (_ _^_en
ui) ll j // juillet ui) ll j // juillet


// group l: English section // group l: English section
') ld (_ _^_en
') ll (_ _^_en
less (_ _^_en // noiseless less (_ _^_en // noiseless
_) live _^_en _) live _^_en
ll (_ _^_en
lly (_ _^_en
lk (_ _^_en
lessly (_ _^_en // endlessly
lord (_ _^_en




.group m .group m
mm m // pomme mm m // pomme


// group m: English section // group m: English section
') m (_ _^_en
mail (A _^_en // mailer mail (A _^_en // mailer
may _^_en
mov (A _^_en // move, movy mov (A _^_en // move, movy




A) ng (_ N // parking meeting A) ng (_ N // parking meeting
nn n // panne nn n // panne


// group n: English section
n't (_ _^_en
nunc _^_en
ny (_ _^_en




.group o .group o
oa (X _^_en // approach, load oa (X _^_en // approach, load
oa (CeX _^_en oa (CeX _^_en
oach _^_en oach _^_en
oes (_ _^_en
old (er_ _^_en // folder, older old (er_ _^_en // folder, older
C) oing (_ _^_en // going C) oing (_ _^_en // going
_aC) ong (_ _^_en // along, among
oo (Ce _^_en // boomer oo (Ce _^_en // boomer
oo (X _^_en // pool oo (X _^_en // pool
C) oot _^_en // bootable football C) oot _^_en // bootable football
_) one _^_en _) one _^_en
_) onl _^_en
orm (_ _^_en
_sC) ot _^_en _sC) ot _^_en
othe _^_en // other, mother
C) ou (ld_ _^_en // could, should
oun (C _^_en // bounce, found oun (C _^_en // bounce, found
_C) ouse _^_en // mouse, house
C) outh _^_en // mouth, south
_) over _^_en // over _) over _^_en // over
ow _^_en // cow, town, down ow _^_en // cow, town, down
oy (X _^_en // boy, toy oy (X _^_en // boy, toy
// group p: English section // group p: English section
pad (_ _^_en pad (_ _^_en
plug _^_en // plug plug _^_en // plug
pmen _^_en
ply (_ _^_en // simply
_) pre (L01 _^_en
press (_ _^_en
print (_ _^_en // print print (_ _^_en // print
printer (_ _^_en // printer printer (_ _^_en // printer
_) prove (X _^_en // prove
_AC) prove (X _^_en // improve
_AC) provem _^_en // improvement
py (_ _^_en // copy




.group q .group q
_A) qu (ilat ky // équilatéral _A) qu (ilat ky // équilatéral
_C) qu (a kw // squale square _C) qu (a kw // squale square
_A) qu (a kw // équateur _A) qu (a kw // équateur
dA) qu (a kw // adequate
_) qu (artz kw // quartz _) qu (artz kw // quartz
qu k // quatre qu k // quatre
_) que (_ k@ // que _) que (_ k@ // que
_) real _^_en _) real _^_en
rese _^_en rese _^_en
rst (_ _^_en rst (_ _^_en
_) rule (X _^_en // rule
ry (_ _^_en // theory




.group s .group s
s (v z s (v z


// group s: English section // group s: English section
') s (_ _^_en
sh (_ _^_en sh (_ _^_en
she (C_ _^_en
ship _^_en // friendship ship _^_en // friendship
shop _^_en // shop shop _^_en // shop
sh (At _^_en // shut, shot sh (At _^_en // shut, shot
shout _^_en
sk _^_en sk _^_en
_) smil _^_en // smile, smiley _) smil _^_en // smile, smiley
spy _^_en spy _^_en
A) ss (_ _^_en // boss, cross
_) state _^_en // state, statement
C) s (ton _^_en // winston
stone (_ _^_en
sy (_ _^_en sy (_ _^_en




_ce) t (_ t _ce) t (_ t


// group t: English section // group t: English section
tch (_ _^_en
_) th (AX _^_en // that, this, then, than
C) th (_ _^_en // month C) th (_ _^_en // month
_) time _^_en _) time _^_en
tle (_ _^_en tle (_ _^_en
try (_ _^_en // try, country try (_ _^_en // try, country
sCar) t (_ _^_en // smart, start sCar) t (_ _^_en // smart, start
sCar) t (er_ _^_en // starter sCar) t (er_ _^_en // starter
tme _^_en
tne _^_en
_) tun (AX _^_en _) tun (AX _^_en
ty (_ _^_en // party, buty




.group u .group u
g) u (ë y // ambiguë g) u (ë y // ambiguë


// group u: English section // group u: English section
C) u (ch_ _^_en // much such
C) ui (ce _^_en // produice, juice C) ui (ce _^_en // produice, juice
umber (_ _^_en umber (_ _^_en
ump (_ _^_en ump (_ _^_en
unch _^_en unch _^_en
_C) unct _^_en // function, punctuation
under _^_en // under, understand, thunder under _^_en // under, understand, thunder
_) up (C _^_en // upper, update _) up (C _^_en // upper, update
upt (_ _^_en
up (_ _^_en // setup up (_ _^_en // setup
C) ur (ch _^_en // church
urn (X _^_en // burn, turn urn (X _^_en // burn, turn
ust (_ _^_en // just, trust ust (_ _^_en // just, trust
rib) u (teX _^_en // tribute, attribute




.group v .group v
v v v v


// goup v: English section // goup v: English section
') ve (_ _^_en
vail (A _^_en
voice (X _^_en voice (X _^_en
void (_ _^_en void (_ _^_en
vy (_ _^_en vy (_ _^_en
wr _^_en // write wr _^_en // write
w (ACC _^_en // wash, wish, with w (ACC _^_en // wash, wish, with
way _^_en // way, away way _^_en // way, away
wh _^_en // what, which, who
_) wi _^_en // wire _) wi _^_en // wire
win _^_en // winner, window win _^_en // winner, window
wise _^_en wise _^_en
_) wom _^_en // woman
wor _^_en // word, world wor _^_en // word, world




si) x (iè z // sixième si) x (iè z // sixième
deu) x (iè z // deuxième deu) x (iè z // deuxième


// group x: English section
xamp _^_en



.group y .group y
y i // cryogénique myope y i // cryogénique myope

+ 7
- 5
dictsource/ta_rules View File

க (் g க (் g
்) க ga ்) க ga
்) க (B g ்) க (B g
_) க ka
_) க kV
_) க (B k _) க (B k
க்க k:a க்க k:a
க்க (B k: க்க (B k:


ட d.a ட d.a
ட (B d. ட (B d.
_) ட t.a
_) ட t.V
_) ட (B t. _) ட (B t.
ட்ட t.a ட்ட t.a
ட்ட (B t. ட்ட (B t.


த da த da
த (B d த (B d
_) த ta
_) த tV
_) த (B t _) த (B t
த்த t:a த்த t:a
த்த (B t: த்த (B t:


ப ba ப ba
ப (B b ப (B b
_) ப pa
_) ப pV
_) ப (B p _) ப (B p
ப்ப p:a ப்ப p:a
ப்ப (B p: ப்ப (B p:
ஃ) ப fa
ஃ) ப (B f


ம ma ம ma
ம (B m ம (B m


ற Ra ற Ra
ற (B R ற (B R
ற் (ற t // RR -> tR
ற் (ற t. // RR -> t.R


ல la ல la
ல (B l ல (B l

+ 55
- 17
dictsource/tr_list View File

d dE d dE
e E e E
f fE f fE
g g;E
ğ jumuS'ak||g;'E
g JE
ğ jumuS'ak||J'E
h hE h hE
ı u# ı u#
i i i i
l lE l lE
m mE m mE
n nE n nE
o O
_o O
ö W ö W
p pE p pE
q kvE q kvE


_?? sEmb'Ol _?? sEmb'Ol


_0 su#fu#r
_1 bir
_2 iki
_0 su#fu#R
_1 biR
_2 ici
_3 YtS _3 YtS
_4 dWrt
_4 dWRt
_5 beS _5 beS
_6 altu# _6 altu#
_7 jedi _7 jedi
_8 sekiz
_8 seciz
_9 dokuz _9 dokuz
_10 on _10 on
_11 'onbir
_12 'oniki
_11 'onbiR
_12 'onici
_1X on _1X on
_2X jirmi
_2X jiRmi
_3X otuz _3X otuz
_4X ku#rk
_4X ku#Rk
_5X elli _5X elli
_6X altmu#S _6X altmu#S
_7X jetmiS _7X jetmiS
_8X seksEn _8X seksEn
_9X doksan _9X doksan
_0C jyz _0C jyz
_2C 'ikijyz
_2C 'icijyz
_0M1 bIn _0M1 bIn
_1M1 bIn _1M1 bIn
_0M2 miljon _0M2 miljon
_1M2 miljon
_0M3 miljar _0M3 miljar
_1M3 miljar
_dpt _virg,Yl_|
_dpt _viRg,Yl_|




// function words


// exceptions
// conjunctions
ve $brk // and
yoksa $brk // or
veya $brk // or
ama $brk $1 // but
ki $brk // that


mı $u // question
mi $u
mu $u
mü $u


// pronouns
bu $u+ // this
şu $u+ // that

ben $u+ // I
sen $u+ // you
o $u+ // it
siz $u+ // we
biz $u+ // you



bir $u+ // a (or one)

da $u // also
de $u
ta $u
te $u

ile $u


// EXCEPTIONS

// Person Names

// Place Names
ankara $1 ankara $1
istanbul $2



+ 219
- 6
dictsource/tr_rules View File

// Turkish translation rules // Turkish translation rules
// This file is UTF-8 encoded // This file is UTF-8 encoded


// Stress rule: Right-most vowel, but stop before the vowel which is
// marked as unstressed [%].

// unvoiced consonants
.L01 ç f h k p q s ş t

.replace
` '


.group a .group a
a a a a


avru (pa avr'u
@) a (_S1 a

acak (_S3 adZak
acakmış (_S7 adZakm%u#S
ayım (_S4 aju#m
alım (_S4 alu#m
asın (_S4 asu#n

avru (pa avr'u


.group b .group b
b b b b
.group d .group d
d d d d


da (_S2 da
de (_S2 de
dan (_S3 dan
den (_S3 den

// declare these suffices so that preceding "ma/me" is unstressed
dim (_S3 dim
dik (_S3 dik
din (_S3 din
diniz (_S5 diniz
di (_S2 di
diler (_S5 dileR
dım (_S3 du#m
dık (_S3 du#k
dın (_S3 du#n
dınız (_S5 du#nu#z
dı (_S2 du#
dılar (_S5 du#laR
dum (_S3 dum
duk (_S3 duk
dun (_S3 dun
dunuz (_S5 dunuz
du (_S2 du
dular (_S5 dular
düm (_S3 dym
dük (_S3 dyk
dün (_S3 dyn
dünüz (_S5 dynyz
dü (_S2 dy
düler (_S5 dyler

dir (_S3 %diR
dır (_S3 %du#R
dur (_S3 %duR
dür (_S3 %dyR

_) d (eğil 'd _) d (eğil 'd



.group e .group e
e e e e


@) e (_S1 e

ecek (_S4 edZek
ecekmiş (_S7 edZekm%iS
eyim (_S4 ejim
elim (_S4 elim
esin (_S4 esin


.group f .group f
f f f f


.group g .group g
g g g g
g (i J
g (e J
g (ü J
g (ö J


.group ğ .group ğ
ğ Q ğ Q
A) ğ :
A) ğ :||
i) ğ j
e) ğ j


.group h .group h
h h h h



.group i .group i
i i i i


@) i (_S1 i

im (_S2 %im
iniz (_S4 %iniz
iz (_S2 %iz
iyor (_S4 ij%oR
in (_S2 in


.group ı .group ı
ı u# ı u#


@) ı (_S1 u#

ım (_S2 %u#m
ınız (_S2 %u#nu#z
ız (_S2 %u#z
ıyor (_S4 u#j%oR
ın (_S2 u#n


.group j .group j
j Z j Z


.group k .group k
k k k k
k (i c
k (e c
k (ü c
k (ö c

ken (_S3 %cen



.group l .group l
l l l l


la (_S2 %la
le (_S2 %le

lar (_S3 laR
ler (_S3 leR

li (_S2 li
lı (_S2 lu#
lu (_S2 lu
lü (_S2 ly

lik (_S3 lik
lık (_S3 lu#k
luk (_S3 luk
lük (_S3 lyk


.group m .group m
m m m m


ma (_S2 m%a
me (_S2 m%e
mı (_S2 m%u#
ma (_S2 %ma
me (_S2 %me


.group n .group n
n n n n


nin (_S3 nin
nın (_S3 nu#n
nun (_S3 nun
nün (_S3 nyn

.group o .group o
o o o o


q k q k


.group r .group r
r r
r R
A) r (A *

ra (_S2 %Ra
re (_S2 %Re
ra (_N Ra // only an unstressed suffix if another suffix follows
re (_N Re



.group s .group s
s s s s


sınız (_S5 %su#nu#z
siniz (_S5 %siniz
sunuz (_S5 %sunuz
sünüz (_S5 %synyz

sam (_S3 %sam // if
sak (_S3 %sak
san (_S3 %san
sanız (_S5 %sanu#z
sa (_S2 %sa
sem (_S3 %sem
sek (_S3 %sek
sen (_S3 %sen
seniz (_S5 %seniz
se (_S2 %se


.group ş .group ş
ş S ş S


.group t .group t
t t t t


ta (_S2 ta
te (_S2 te
tan (_S3 tan
ten (_S3 ten

L01) tir (_S3 %tiR
L01) tır (_S3 %tu#R
L01) tur (_S3 %tuR
L01) tür (_S3 %tyR


.group u .group u
u u u u


@) u (_S1 u

um (_S2 %um
unuz (_S4 %unuz
uz (_S2 %uz
uyor (_S4 uj%oR
un (_S2 un


.group ü .group ü
ü y ü y


@) ü (_S1 y

üm (_S2 %ym
ünüz (_S4 %ynyz
üz (_S2 %yz
üyor (_S4 yj%oR
ün (_S2 yn


.group v .group v
v v v v


x ks x ks
K) x z K) x z



.group y .group y
y j y j


ya (_S2 ja
ye (_S2 je
yi (_S2 ji
yı (_S2 ju#
yu (_S2 ju
yü (_S2 jy

yim (_S3 %jim
yım (_S3 %ju#m
yum (_S3 %jum
yüm (_S3 %jym

yiz (_S3 %jiz
yız (_S3 %ju#z
yuz (_S3 %juz
yüz (_S3 %jyz

yiniz (_S5 %jiniz
yınız (_S5 %ju#nu#z
yunuz (_S5 %junuz
yünüz (_S5 %jynyz

yla (_S3 %jla
yle (_S3 %jle

yacak (_S5 jadZak
yecek (_S5 jedZek
yacakmış (_S8 jadZakm%u#S
yecekmiş (_S8 jedZekm%iS

yayım (_S5 jaju#m
yalım (_S5 jalu#m
yasın (_S5 jasu#n
yeyim (_S5 jejim
yelim (_S5 jelim
yesin (_S5 jesin

yken (_S4 jc%en


.group z .group z
z z z z


.group .group
$ dolar $ dolar
' (Pb // split a word at ' and translate the first part separately.



+ 15
- 12
phsource/compile_report View File

new total new total
base 99 99 base 99 99
base2 24 118 base2 24 118
en 54 148
en_n 30 148
en_us 34 148
en_sc 39 150
en_rp 34 148
en_wm 30 148
en_wi 30 148
en 53 147
en_n 30 147
en_us 34 147
en_sc 39 149
en_rp 34 147
en_wm 30 147
en_wi 30 147
af 38 131 af 38 131
cy 29 125 cy 29 125
de 33 126 de 33 126
fr 44 125 fr 44 125
fr_ca 11 125 fr_ca 11 125
hi 51 136 hi 51 136
ta 15 138
ta 16 139
hu 23 115 hu 23 115
nl 28 122 nl 28 122
pl 15 110 pl 15 110
sw 14 108 sw 14 108
th 50 142 th 50 142
id 14 120 id 14 120
tr 14 123
tr 18 123
ku 13 120 ku 13 120
ja 7 104 ja 7 104


[dZ;] ru [dZ;] ru
dzh/dz_pzd [J] base dzh/dz_pzd [J] base
[J2] hi [J2] hi
[J] tr
dzh/dz_pzd_ [J] base dzh/dz_pzd_ [J] base
[J2] hi [J2] hi
dzh/xdzh [dZ] base dzh/xdzh [dZ] base
[c] is [c] is
[k] zhy [k] zhy
[k] sw [k] sw
[J] tr
[c] tr
ustop/kl [k] base ustop/kl [k] base
[k] base2 [k] base2
[k] en [k] en
[u] th [u] th
[u:] th [u:] th
[u] id [u] id
vowel/u# [u-] en
[u:] en_sc
vowel/u# [u:] en_sc
[Y] tr [Y] tr
vowel/u_2 [u1] fi vowel/u_2 [u1] fi
[u] sk [u] sk
vowel/u#_2 [u-] ta vowel/u#_2 [u-] ta
[u-] sv [u-] sv
vowel/u_3 [yU] ro vowel/u_3 [yU] ro
vowel/u#_3 [U] ta
[U:] ta
vowel/u_4 [u:] en_n vowel/u_4 [u:] en_n
vowel/u#_4 [U] en_sc vowel/u#_4 [U] en_sc
[u:] en_sc [u:] en_sc
vowel/uu# [U] ku vowel/uu# [U] ku
vowel/uu_2 [U] base2 vowel/uu_2 [U] base2
[U] de [U] de
[U] ta
[U] tr [U] tr
vowel/uu_3 [u] af vowel/uu_3 [u] af
[y] zh [y] zh

+ 0
- 6
phsource/ph_english View File

endphoneme endphoneme




phoneme u-
vowel starttype (u) endtype (u)
length 200
formants vowel/u#
endphoneme


+ 2
- 2
phsource/ph_spanish View File

vowelin f1=1 f2=1000 -300 -200 f3=-300 100 vowelin f1=1 f2=1000 -300 -200 f3=-300 100
vowelout f1=0 f2=1000 -500 -300 f3=-300 60 len=10 vowelout f1=0 f2=1000 -500 -300 f3=-300 60 len=10
lengthmod 6 lengthmod 6
formants voc/v2+vocw/v%75
before _ voc/v_+vocw/v%75
formants voc/v2+vocw/v%80
before _ voc/v_+vocw/v%80
switchvoicing f switchvoicing f
endphoneme endphoneme



+ 9
- 3
phsource/ph_tamil View File



phoneme U phoneme U
vowel starttype (u) endtype (u) vowel starttype (u) endtype (u)
length 130
formants vowel/uu_2
length 150
formants vowel/u#_3
endphoneme endphoneme



phoneme u: phoneme u:
vowel starttype (u) endtype (u) vowel starttype (u) endtype (u)
length 270 length 270
formants vowel/u formants vowel/u
endphoneme endphoneme


phoneme U:
vowel starttype (u) endtype (u)
length 270
formants vowel/u#_3
endphoneme




phoneme u- phoneme u-
vowel starttype (u) endtype (u) vowel starttype (u) endtype (u)

+ 33
- 0
phsource/ph_turkish View File

formants vowel/oo formants vowel/oo
endphoneme endphoneme




phoneme : // Lengthen previous vowel by "length"
virtual
length 180
endphoneme


phoneme c
vls pal stop
vowelin f1=0 f2=2700 200 500 f3=400 80 len=60
vowelout f1=0 f2=2700 300 500 f3=300 80
lengthmod 2
wave ustop/ki
before _ ustop/ki%80
endphoneme


phoneme J
vcd pla stop
vowelin f1=2 f2=2700 400 600 f3=300 80
vowelout f1=2 f2=2700 400 600 f3=300 80 colr=1
formants dzh/dz_pzd+ustop/ki%60
lengthmod 5
switchvoicing c
endphoneme


phoneme l
import_phoneme l en
endphoneme



+ 3
- 3
phsource/phonemes View File



phoneme q phoneme q
vls uvl stop vls uvl stop
vowelin f1=1 f2=1700 0 200 f3=-300 80 f4 paus rms=35
vowelout f1=1 f2=1700 0 200 f3=-300 80 f4 rms=30
vowelin f1=1 f2=1700 0 200 f3=-300 80 paus f4 rms=30
vowelout f1=1 f2=1700 -100 200 f3=-300 80 f4 rms=35
lengthmod 2 lengthmod 2
wave ustop/q%50
wave ustop/q%48
before _ ustop/q%40 before _ ustop/q%40
endphoneme endphoneme



BIN
phsource/voc/v2 View File


BIN
phsource/vowel/u#_3 View File


BIN
phsource/vowel/u#_7 View File


BIN
phsource/vowel/vowelchart.png View File


+ 3
- 0
src/compiledict.cpp View File

case 't': case 't':
sxflags |= SUFX_T; sxflags |= SUFX_T;
break; break;
case 'b':
sxflags |= SUFX_B;
break;
default: default:
if(isdigit(c)) if(isdigit(c))
value = (value*10) + (c - '0'); value = (value*10) + (c - '0');

+ 3
- 1
src/dictionary.cpp View File

{ {
c = p_start[ix]; c = p_start[ix];
word_copy[ix++] = c; word_copy[ix++] = c;
if(c == 0)
break;
} }
word_copy[ix] = 0; word_copy[ix] = 0;


word_copy[i] = 0; word_copy[i] = 0;


// look for multibyte characters to increase the number of bytes to remove // look for multibyte characters to increase the number of bytes to remove
for(len_ending = i = (end_type & 0xf); i>0 ;i--) // num.of characters of the suffix
for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix
{ {
word_end--; word_end--;
while((*word_end & 0xc0) == 0x80) while((*word_end & 0xc0) == 0x80)

+ 1
- 1
src/numbers.cpp View File



Lookup("_0C",ph_100); Lookup("_0C",ph_100);


if((hundreds >= 10) && ((langopts.numbers & 0x0800) || (hundreds != 19)))
if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19)))
{ {
ph_digits[0] = 0; ph_digits[0] = 0;



+ 1
- 1
src/synthdata.cpp View File

#include "translate.h" #include "translate.h"
#include "wave.h" #include "wave.h"


const char *version_string = "1.31.14 21.Feb.08";
const char *version_string = "1.31.17 24.Feb.08";
const int version_phdata = 0x013105; const int version_phdata = 0x013105;


int option_device_number = -1; int option_device_number = -1;

+ 1
- 1
src/tr_english.cpp View File

memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths));
langopts.stress_rule = 0; langopts.stress_rule = 0;


langopts.numbers = 0x41 + NUM_ROMAN;
langopts.numbers = 0x841 + NUM_ROMAN;
langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word
} }



+ 25
- 24
src/tr_languages.cpp View File

tr->langopts.param[LOPT_PREFIXES] = 1; tr->langopts.param[LOPT_PREFIXES] = 1;
memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths)); memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths));
tr->langopts.numbers = 0x11c19 + NUM_ROMAN;
tr->langopts.numbers = 0x11419 + NUM_ROMAN;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
} }
break; break;
tr->langopts.unstressed_wd2 = 2; tr->langopts.unstressed_wd2 = 2;
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels


tr->langopts.numbers = 0xb09;
tr->langopts.numbers = 0x309;
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands


if(name2 == L_grc) if(name2 == L_grc)
tr->langopts.unstressed_wd1 = 1; tr->langopts.unstressed_wd1 = 1;
tr->langopts.unstressed_wd2 = 2; tr->langopts.unstressed_wd2 = 2;


tr->langopts.numbers = 0x1c09 + NUM_ROMAN;
tr->langopts.numbers = 0x1409 + NUM_ROMAN;
} }
break; break;


tr->langopts.param[LOPT_IT_DOUBLING] = 1; tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.long_stop = 140; tr->langopts.long_stop = 140;


tr->langopts.numbers = 0x1009;
tr->langopts.numbers = 0x1809;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
tr->langopts.max_initial_consonants = 2; tr->langopts.max_initial_consonants = 2;
tr->langopts.spelling_stress = 1; tr->langopts.spelling_stress = 1;


tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
tr->langopts.numbers = 0x811;
tr->langopts.numbers = 0x011;
tr->langopts.numbers2 = 0x100; tr->langopts.numbers2 = 0x100;
tr->letter_bits_offset = OFFSET_DEVANAGARI; tr->letter_bits_offset = OFFSET_DEVANAGARI;


tr->langopts.max_initial_consonants = 5; tr->langopts.max_initial_consonants = 5;
tr->langopts.spelling_stress = 1; tr->langopts.spelling_stress = 1;


tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC;
tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC;
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards
tr->langopts.replace_chars = replace_cyrillic_latin; tr->langopts.replace_chars = replace_cyrillic_latin;


tr->langopts.param[LOPT_IT_DOUBLING] = 1; tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word


tr->langopts.numbers = 0x1809 + NUM_ROMAN;
tr->langopts.numbers = 0x1009 + NUM_ROMAN;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
tr->langopts.spelling_stress = 1; tr->langopts.spelling_stress = 1;
SetLengthMods(tr,3); // all equal SetLengthMods(tr,3); // all equal
tr = new Translator(); tr = new Translator();
SetupTranslator(tr,stress_lengths_id,stress_amps_id); SetupTranslator(tr,stress_lengths_id,stress_amps_id);
tr->langopts.stress_rule = 2; tr->langopts.stress_rule = 2;
tr->langopts.numbers = 0x1809 + NUM_ROMAN;
tr->langopts.numbers = 0x1009 + NUM_ROMAN;
tr->langopts.stress_flags = 0x6 | 0x10; tr->langopts.stress_flags = 0x6 | 0x10;
} }
break; break;
SetLetterBits(tr,3,"jvr"); // Letter group H SetLetterBits(tr,3,"jvr"); // Letter group H
tr->letter_groups[1] = is_lettergroup_B; tr->letter_groups[1] = is_lettergroup_B;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
tr->langopts.numbers = 0xe9;
tr->langopts.numbers = 0x8e9;
tr->langopts.numbers2 = 0x2; tr->langopts.numbers2 = 0x2;
} }
break; break;
tr->langopts.param[LOPT_IT_DOUBLING] = 2; // double the first consonant if the previous word ends in a stressed vowel tr->langopts.param[LOPT_IT_DOUBLING] = 2; // double the first consonant if the previous word ends in a stressed vowel
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels
tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list
tr->langopts.numbers = 0x2709 + 0x800 + NUM_ROMAN;
tr->langopts.numbers = 0x2709 + NUM_ROMAN;
} }
break; break;




tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable


tr->langopts.numbers = 0x100c69;
tr->langopts.numbers = 0x100469;
tr->langopts.max_initial_consonants = 2; tr->langopts.max_initial_consonants = 2;
} }
break; break;
tr->letter_groups[0] = vowels_cyrillic; tr->letter_groups[0] = vowels_cyrillic;


tr->langopts.stress_rule = 4; // antipenultimate tr->langopts.stress_rule = 4; // antipenultimate
tr->langopts.numbers = 0x0c29 + 0x4000;
tr->langopts.numbers = 0x0429 + 0x4000;
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards
} }
break; break;
tr->langopts.param[LOPT_PREFIXES] = 1; tr->langopts.param[LOPT_PREFIXES] = 1;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x11419;
tr->langopts.numbers = 0x11c19;
memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths)); memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths));
} }
break; break;


tr->langopts.stress_rule = 0; tr->langopts.stress_rule = 0;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x11049;
tr->langopts.numbers = 0x11849;
} }
break; break;


tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8; tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8;
tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :) tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :)
tr->langopts.numbers=0x1809 + 0x4000;
tr->langopts.numbers=0x1009 + 0x4000;
tr->langopts.numbers2=0x40; tr->langopts.numbers2=0x40;
tr->langopts.param[LOPT_COMBINE_WORDS] = 2 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable words (marked with $alt) tr->langopts.param[LOPT_COMBINE_WORDS] = 2 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable words (marked with $alt)
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
// tr->langopts.vowel_pause = 1; // tr->langopts.vowel_pause = 1;
tr->langopts.stress_rule = 3; // stress on final syllable tr->langopts.stress_rule = 3; // stress on final syllable
tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000; tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000;
tr->langopts.numbers = 0xa69 + 0x2000 + NUM_ROMAN;
tr->langopts.numbers = 0x269 + 0x2000 + NUM_ROMAN;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
} }
break; break;
tr->langopts.stress_flags = 0x100 + 0x6; tr->langopts.stress_flags = 0x100 + 0x6;


tr->charset_a0 = charsets[2]; // ISO-8859-2 tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN;
tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
} }
break; break;
tr->langopts.spelling_stress = 1; tr->langopts.spelling_stress = 1;
tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word


tr->langopts.numbers = 0x0c01 + 0x4000 + NUM_ROMAN;
tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN;
tr->langopts.numbers2 = 0x40; tr->langopts.numbers2 = 0x40;
tr->langopts.thousands_sep = 0; //no thousands separator tr->langopts.thousands_sep = 0; //no thousands separator
tr->langopts.decimal_sep = ','; tr->langopts.decimal_sep = ',';
tr->langopts.stress_rule = 0; tr->langopts.stress_rule = 0;
SetLetterVowel(tr,'y'); SetLetterVowel(tr,'y');
// SetLetterBits(tr,6,"eiyäö"); // soft vowels NOTE accented letters don't work in SetLetterBits // SetLetterBits(tr,6,"eiyäö"); // soft vowels NOTE accented letters don't work in SetLetterBits
tr->langopts.numbers = 0x1109;
tr->langopts.numbers = 0x1909;
} }
break; break;


tr->langopts.stress_rule = 2; tr->langopts.stress_rule = 2;
tr->langopts.stress_flags = 0x6 | 0x10; tr->langopts.stress_flags = 0x6 | 0x10;


tr->langopts.numbers = 0x8e1;
tr->langopts.numbers = 0x4e1;
tr->langopts.numbers2 = 0x100; tr->langopts.numbers2 = 0x100;
} }
break; break;


case L('t','r'): // Turkish case L('t','r'): // Turkish
{ {
static const unsigned char stress_amps_tr[8] = {16,16, 20,20, 20,24, 24,22 };
static const short stress_lengths_tr[8] = {170,170, 190,170, 0,0, 250,270};
static const unsigned char stress_amps_tr[8] = {18,18, 20,20, 20,24, 24,22 };
static const short stress_lengths_tr[8] = {190,190, 190,190, 0,0, 250,270};


tr = new Translator(); tr = new Translator();
SetupTranslator(tr,stress_lengths_tr,stress_amps_tr); SetupTranslator(tr,stress_lengths_tr,stress_amps_tr);
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5 tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5


tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
tr->langopts.stress_flags = 0x20; //no automatic secondary stress


tr->langopts.numbers = 0x1d09 + 0x4000;
tr->langopts.numbers = 0x1509 + 0x4000;
tr->langopts.max_initial_consonants = 2; tr->langopts.max_initial_consonants = 2;
} }
break; break;
langopts.param[LOPT_PREFIXES] = 1; langopts.param[LOPT_PREFIXES] = 1;
SetLetterVowel(this,'y'); // add 'y' to vowels SetLetterVowel(this,'y'); // add 'y' to vowels


langopts.numbers = 0x0d1 + NUM_ROMAN;
langopts.numbers = 0x8d1 + NUM_ROMAN;
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths));
} }



+ 36
- 6
src/translate.cpp View File

int word_length; int word_length;
int ix; int ix;
int posn; int posn;
int pfix;
int n_chars;
unsigned int dictionary_flags[2]; unsigned int dictionary_flags[2];
unsigned int dictionary_flags2[2]; unsigned int dictionary_flags2[2];
int end_type=0; int end_type=0;
char prefix_phonemes[N_WORD_PHONEMES]; char prefix_phonemes[N_WORD_PHONEMES];
char end_phonemes[N_WORD_PHONEMES]; char end_phonemes[N_WORD_PHONEMES];
char word_copy[N_WORD_BYTES]; char word_copy[N_WORD_BYTES];
char prefix_chars[N_WORD_BYTES];
int found=0; int found=0;
int end_flags; int end_flags;
char c_temp; // save a character byte while we temporarily replace it with space char c_temp; // save a character byte while we temporarily replace it with space
{ {
// Found a standard prefix, remove it and retranslate // Found a standard prefix, remove it and retranslate


if(confirm_prefix)
if(confirm_prefix && !(end_type & SUFX_B))
{ {
int end2; int end2;
char phonemes2[N_WORD_PHONEMES]; char phonemes2[N_WORD_PHONEMES];
} }
} }


strcat(prefix_phonemes,end_phonemes);
end_phonemes[0] = 0;
prefix_type = end_type; prefix_type = end_type;


if(prefix_type & SUFX_V) if(prefix_type & SUFX_V)
} }


wordx[-1] = c_temp; wordx[-1] = c_temp;
for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove
pfix = 1;
prefix_chars[0] = 0;
n_chars = prefix_type & 0x3f;

for(ix=0; ix < n_chars; ix++) // num. of characters to remove
{ {
wordx++;
while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters
prefix_chars[pfix++] = *wordx++;

if((prefix_type & SUFX_B) && (ix == (n_chars-1)))
{
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
}

while((*wordx & 0xc0) == 0x80)
{
prefix_chars[pfix++] = *wordx++; // for multibyte characters
}
} }
prefix_chars[pfix] = 0;
c_temp = wordx[-1]; c_temp = wordx[-1];
wordx[-1] = ' '; wordx[-1] = ' ';
confirm_prefix = 1; confirm_prefix = 1;


if(prefix_type & SUFX_B)
{
// retranslate the prefix part
char *wordpf;
wordpf = &prefix_chars[1];
found = LookupDictList(&wordpf, phonemes, dictionary_flags, SUFX_P, wtab); // without prefix
if(found == 0)
{
end_type = TranslateRules(wordpf, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags[0]);
strcat(prefix_phonemes, phonemes);
}
}
strcat(prefix_phonemes,end_phonemes);
end_phonemes[0] = 0;

end_type = 0; end_type = 0;
found = LookupDictList(&wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix found = LookupDictList(&wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix
if(dictionary_flags[0]==0) if(dictionary_flags[0]==0)

+ 2
- 1
src/translate.h View File

#define SUFX_F 0x2000 // verb follows #define SUFX_F 0x2000 // verb follows
#define SUFX_Q 0x4000 // don't retranslate #define SUFX_Q 0x4000 // don't retranslate
#define SUFX_T 0x10000 // don't affect the stress position in the stem #define SUFX_T 0x10000 // don't affect the stress position in the stem
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)


#define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes #define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes
#define FLAG_SUFX 0x04 #define FLAG_SUFX 0x04
// bit8=only one primary stress in tens+units // bit8=only one primary stress in tens+units
// bit9=only one vowel betwen tens and units // bit9=only one vowel betwen tens and units
// bit10=omit "one" before "hundred" // bit10=omit "one" before "hundred"
// bit11=don't say 19** as nineteen hundred
// bit11=say 19** as nineteen hundred
// bit12=allow space as thousands separator (in addition to langopts.thousands_sep) // bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
// bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro) // bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)
// bit16=dot after number indicates ordinal // bit16=dot after number indicates ordinal

Loading…
Cancel
Save