Speak accented letter names as base-letter name plus accent name, using a table of Unicode characters up to U+17F. Added $accent attribute for *_list files, meaning "speak as base-letter name plus accent name". Added accent names to several languages *_list files. Added language option to allow vowels to merge between words (used for lang=TA). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@153 d46cf337-b52f-0410-862d-fd96e6ae7743master
û kapi?y | û kapi?y | ||||
ü de@lte@k@n?y | ü de@lte@k@n?y | ||||
_cap h'o@flEt@r | |||||
_?? s@mb'o@l | |||||
_?A l'Et@r | |||||
// accent names | |||||
_lig d'Wb@llEt@r | |||||
_acu ak'yt | |||||
_ac2 d'Wb@lakyt | |||||
_brv br'i:v | |||||
_ced s'e@dIla | |||||
_cir k'api | |||||
_dia d'e@lte@k@n | |||||
_dot p'Wnt | |||||
_grv x2r'afIs | |||||
_hac k'A:rOn | |||||
_mcn m'akrOn | |||||
_ogo o@x2o@n,&k | |||||
_rng 'A:nb&l | |||||
_stk str'e@p | |||||
_tld t'Ild@ | |||||
// numeric | // numeric | ||||
_0 'nWl | _0 'nWl | ||||
_1 _'e@n | _1 _'e@n |
_#9 t'ap | _#9 t'ap | ||||
_#32 m'ezeRa // space | _#32 m'ezeRa // space | ||||
_?? simbol | |||||
_?A znak | |||||
_cap m'ajuskule | |||||
// accent names | |||||
_lig l'igat,uRa | |||||
_acu tS'a:Rka | |||||
_ac2 dvj'etS'a:Rki | |||||
_brv 'obloUtSek | |||||
_ced ts'eJila | |||||
_cir stR^'i:Ska | |||||
_dia tR'ema | |||||
_dot t'etSka | |||||
_grv c'eSki:;'aktsent | |||||
_hac h'a:tSek | |||||
_mcn m'akRon | |||||
_ogo 'ogonek | |||||
_rng kR'oUZek | |||||
_stk S'ikma:tS'a:Rka | |||||
_tld c'ilda | |||||
_$ dola:R | _$ dola:R | ||||
_' apostRof | _' apostRof | ||||
_< menSi: | _< menSi: | ||||
_> vjetSi: | _> vjetSi: | ||||
_| sv'isla:||tS'a:Ra | _| sv'isla:||tS'a:Ra | ||||
_?? simbol | |||||
_?A znak | |||||
// Numbers | // Numbers | ||||
// Abbreviations | // Abbreviations | ||||
// if a word has no vowels (or "r") then it will | // if a word has no vowels (or "r") then it will | ||||
// automatically be spoken as individual letters | // automatically be spoken as individual letters | ||||
e.g napR^ | |||||
arc $abbrev | arc $abbrev | ||||
arj $abbrev | arj $abbrev | ||||
atd $abbrev | atd $abbrev |
_#32 l'e:*ts'aIC@n | _#32 l'e:*ts'aIC@n | ||||
_?? zymb'o:l | _?? zymb'o:l | ||||
// accent names | |||||
_lig l'i:gat,u:* | |||||
_acu ak'u:t | |||||
_ac2 d'Op@l,aku:t | |||||
_brv b@-*'e:v@ | |||||
_ced tse:d'Il@ | |||||
_cir ts'I*kUmflEks | |||||
_dia 'UmlaUt | |||||
_dot p'Unkt | |||||
_grv g@-*'AvIs | |||||
_hac h'atSe:k | |||||
_mcn m'ak@-*o:n | |||||
_ogo 'o:go:n,e:k | |||||
_rng k@-*'u:Ze:k | |||||
_stk S@-*'Egst@-*IC | |||||
_tld t'Ild@ | |||||
// Not Roman numbers | // Not Roman numbers | ||||
v faU | v faU | ||||
x Iks | x Iks |
i i: l- o o: oU r- u | i i: l- o o: oU r- u | ||||
u: | u: | ||||
* b c d dZ f g h | |||||
j J k l l^ m n N | |||||
n^ p r R R^ R^/ s S | |||||
t tS ts v x z Z | |||||
* ; b c d dZ f g | |||||
h j J k l l^ m n | |||||
N n^ p r R R^ R^/ s | |||||
S t tS ts v x z Z | |||||
Dictionary cy_dict | Dictionary cy_dict | ||||
Dictionary it_dict | Dictionary it_dict | ||||
@- a a/ aI aU e E i | @- a a/ aI aU e E i | ||||
o O oI u | |||||
I o O oI u | |||||
* : b d dZ f g h | |||||
j k l l^ m n N n^ | |||||
p r R s S t tS ts | |||||
v w w2 z | |||||
* : ; b d dZ f g | |||||
h j k l l^ m n N | |||||
n^ p r R s S t tS | |||||
ts v w w2 z | |||||
Dictionary nl_dict | Dictionary nl_dict | ||||
l- l: o o: oU r- r: u | l- l: o o: oU r- r: u | ||||
u: | u: | ||||
* : b d d; dZ f g | |||||
h j k l l^ m n N | |||||
n^ p r R R^ R^/ s S | |||||
t t; tS ts v x z Z | |||||
* : ; b d d; dZ f | |||||
g h j k l l^ m n | |||||
N n^ p r R R^ R^/ s | |||||
S t t; tS ts v x z | |||||
Z | |||||
Dictionary sv_dict | Dictionary sv_dict | ||||
E e e: i I i: l- O | E e e: i I i: l- O | ||||
o o: oU r* r- u U u: | o o: oU r* r- u U u: | ||||
* b d dZ dZ; f g h | |||||
j k l L l^ m n N | |||||
n^ p r R s S t tS | |||||
ts tS; v x z Z | |||||
* ; b d dZ dZ; f g | |||||
h j k l L l^ m n | |||||
N n^ p r R s S t | |||||
tS ts tS; v x z Z | |||||
Dictionary id_dict | Dictionary id_dict |
z zEd | z zEd | ||||
?3 z zi: | ?3 z zi: | ||||
é i:a2k'ju:t | |||||
// sounds are specified for these accented characters | |||||
// use the $accent attribute here to force the use of the | |||||
// accent table when speaking the name of the character. | |||||
_é $accent | |||||
ê $accent | |||||
_ä $accent | |||||
_ë $accent | |||||
_ï $accent | |||||
_ö $accent | |||||
_ü $accent | |||||
_č $accent | |||||
_š $accent | |||||
_ş $accent | |||||
_ž $accent | |||||
_ñ $accent | |||||
// accent names | |||||
_lig l,Iga2tS@ | |||||
_acu a2kj'u:t | |||||
_ac2 dVb@la2kj'u:t | |||||
_brv br'i:v | |||||
_ced s@d'Ila2 | |||||
_cir s'3:kVmfl,Eks | |||||
_dia 'UmlaUt | |||||
_dot wI2Dd'0ta2bVv | |||||
_grv gr'A:v | |||||
_hac h'atS,Ek | |||||
_mcn m'akr,0n | |||||
_ogo '0g0n,Ek | |||||
_rng r'INg | |||||
_stk str'oUk | |||||
_tld t'Ild@ | |||||
// character names | // character names | ||||
_, k0m@ | _, k0m@ | ||||
_- h,aIf@n | _- h,aIf@n | ||||
_. d0t | _. d0t | ||||
_/ stroUk | |||||
_: koUl@n | _: koUl@n | ||||
_; sEmIk'oUl0n | _; sEmIk'oUl0n | ||||
_< ElaNg@L | _< ElaNg@L | ||||
_? kwEstS@n | _? kwEstS@n | ||||
_@ at|saIn | _@ at|saIn | ||||
_[ lEftskwe@ | _[ lEftskwe@ | ||||
_\ bakstroUk | |||||
_] raItskwe@ | _] raItskwe@ | ||||
_^ s3:kVmfl,Eks | _^ s3:kVmfl,Eks | ||||
?5 _^ sIRkVmfl,Eks | ?5 _^ sIRkVmfl,Eks | ||||
¬ n0t_|saIn | ¬ n0t_|saIn | ||||
ə SwA: | ə SwA: | ||||
æ eI'i: | æ eI'i: | ||||
ɛ oUp@n'i: | |||||
ɔ oUp@n'oU | |||||
þ TO@n | |||||
ŋ ENg | |||||
ð ED | ð ED | ||||
ʃ ES | ʃ ES | ||||
ʒ EZ | ʒ EZ | ||||
ĸ krA: | |||||
ɛ oUp@n'i: | |||||
ɔ oUp@n'oU | |||||
// numeric | // numeric | ||||
.group | .group | ||||
// non-ascii characters with specified pronunciations | // non-ascii characters with specified pronunciations | ||||
// List the accented characters in en_list with the $accent attribute | |||||
é eI | é eI | ||||
_) é I2 | _) é I2 | ||||
ê E: | ê E: |
_?? s'imbOlo | _?? s'imbOlo | ||||
_#32 Esp'aTjo | _#32 Esp'aTjo | ||||
// accent names | |||||
_lig liQaD'u**a | |||||
_acu aQ'uDo | |||||
_ac2 d'Oble||aQ'uDo | |||||
_brv b**'eBe | |||||
_ced TeD'iJ^a | |||||
_cir TiRkumfl'exo | |||||
_dia dj'E**esis | |||||
_dac d'Oble||aQ'uDo | |||||
_dot p'unto // ?? | |||||
_grv g**'aBe | |||||
_hac ka**'On | |||||
_mcn mak**'On | |||||
_ogo kol'ita | |||||
_rng an'iJ^o | |||||
_stk b'a*Ra // ?? | |||||
_tld t'ilde | |||||
// names of symbols | // names of symbols | ||||
_. punto | _. punto |
_) r R | _) r R | ||||
A) r (A ** | A) r (A ** | ||||
C) r (A ** | C) r (A ** | ||||
l) r (A R | |||||
m) r (A R | |||||
n) r (A R | |||||
rr *R | rr *R | ||||
// 2006-11-18 Gilles Casse <[email protected]> | // 2006-11-18 Gilles Casse <[email protected]> | ||||
// | // | ||||
// Updated 2008-02-24 Michel Such <[email protected]> | |||||
// Updated 2008-02-29 Michel Such <[email protected]> | |||||
// | // | ||||
// * Numbers, a few abbreviations and exceptions. | // * Numbers, a few abbreviations and exceptions. | ||||
// | // | ||||
// "letter" name, then include the letter name here, with the letter | // "letter" name, then include the letter name here, with the letter | ||||
// prefixed by a _ character. | // prefixed by a _ character. | ||||
_à a:aksA~gRav | |||||
â a:aksA~siRkO~flEks | |||||
ä a:tRema | |||||
_à $accent // speak as base-letter name + accent name | |||||
â $accent | |||||
ä $accent | |||||
b be | b be | ||||
c se | c se | ||||
ç sesedij | |||||
ç $accent | |||||
d de | d de | ||||
e @ | e @ | ||||
ë @:tRema | |||||
é @:aksA~Egy | |||||
è @:aksA~gRav | |||||
ê @:aksA~siRkO~flEks | |||||
ë @:tRema | |||||
ë $accent | |||||
é $accent | |||||
è $accent | |||||
ê $accent | |||||
ë $accent | |||||
f Ef | f Ef | ||||
g Ze | g Ze | ||||
h aS | h aS | ||||
i i | i i | ||||
ï i:tRema | |||||
ï $accent | |||||
j Zi | j Zi | ||||
k ka | k ka | ||||
l El | l El | ||||
m Em | m Em | ||||
n En | n En | ||||
ñ Entilde | |||||
ñ $accent | |||||
o o | o o | ||||
ô o:aksA~siRkO~flEks | |||||
ö o:tRema | |||||
ô $accent | |||||
ö $accent | |||||
p pe | p pe | ||||
q ky | q ky | ||||
r ER | r ER | ||||
s Es | s Es | ||||
t te | t te | ||||
u y | u y | ||||
ù y:aksA~gRav | |||||
û y:aksA~siRkO~flEks | |||||
ü y:tRema | |||||
ù $accent | |||||
û $accent | |||||
ü $accent | |||||
v ve | v ve | ||||
w dubl@v'e | w dubl@v'e | ||||
x iks | x iks | ||||
// character names | |||||
// accent names | |||||
_lig ligat'yr | |||||
_acu aksA~tEg'y | |||||
_ac2 dublaksA~tEg'y | |||||
_brv br'Ev | |||||
_ced sed'ij | |||||
_cir aksA~siRkO~fl'Eks | |||||
_dia tRem'a | |||||
_dot pw'E~syskr'i | |||||
_grv aksA~gR'av | |||||
_hac kar'O~ | |||||
_mcn makr'O~ | |||||
_ogo OgOn'Ek | |||||
_rng rO~t2A~S'Ef | |||||
_stk b'aR | |||||
_tld t'ild | |||||
// character names | |||||
//_cap k,ap@-t@L | |||||
_?A lEt@ | |||||
_cap maZysk'yl | |||||
_?A l'Etr | |||||
_?? sE~b'Oll | _?? sE~b'Oll | ||||
_#9 tabylasjO~ | _#9 tabylasjO~ | ||||
_#32 Espas | _#32 Espas | ||||
_" gijmE | _" gijmE | ||||
_# djEz | _# djEz | ||||
_' apOstROf | _' apOstROf | ||||
// Letters which can be words | // Letters which can be words | ||||
//=========================== | //=========================== | ||||
à a:aksA~gRav $atend | |||||
y i:gR'Ek $atend | |||||
à $atend $accent | |||||
y $atend $accent | |||||
z z@ | z z@ | ||||
ž Z@ | ž Z@ | ||||
// accent names | |||||
_lig l'ig&t,UR& | |||||
_acu 'akUt | |||||
_ac2 dv'ostr**UkI;'akUt | |||||
//_brv | |||||
_ced ts'EdIl& | |||||
_cir ts'iRkUmflEks | |||||
_dia 'uml&Ut | |||||
_dot t'otSk& | |||||
//_grv | |||||
_hac kv'atSIts& | |||||
_mcn m'akr**on | |||||
_ogo 'ogonEk | |||||
_rng r**'iNg | |||||
_stk kr**'oz | |||||
_tld t'ild& | |||||
// symbols | // symbols | ||||
_?? znak // unknown symbol | _?? znak // unknown symbol | ||||
_?A slovo // unknown letter | _?A slovo // unknown letter | ||||
_cap k'apIt&l // ?? use English until I find the correct word | |||||
© 'aUtoRsk&||pr*av& | © 'aUtoRsk&||pr*av& | ||||
% p'osto | % p'osto | ||||
+ plus | + plus |
_2M4 ke:tbillio: | _2M4 ke:tbillio: | ||||
_dpt ||_vEss2Y:_ | _dpt ||_vEss2Y:_ | ||||
// accent names | |||||
_lig ligAtu:R2A | |||||
_acu e:lES | |||||
_ac2 kEt:Y:S | |||||
_brv fe:lkYR2 | |||||
//_ced // cedilla | |||||
_cir tsiR2kumflEks | |||||
_dia tR2e:mA | |||||
//_dot // dot above | |||||
_grv tompA | |||||
_hac pipA | |||||
_mcn mAkR2on // ?? macron | |||||
_ogo hoR2gok | |||||
_rng kYR2 | |||||
_stk a:thu:za:S | |||||
_tld tildE | |||||
// Abbreviations | // Abbreviations | ||||
km kilo:me:tER2 | km kilo:me:tER2 |
// character names | // character names | ||||
_cap k'apital | |||||
_?? s'imbolo | |||||
_cap k'apital | |||||
_?? s'imbolo | |||||
_?A let:'e:Ra | |||||
// accent names | |||||
_lig l,egat'u:Ra | |||||
_acu atS:'ento_|ak'u:to | |||||
_ac2 d'op:i;o_|atS:'ento_|ak'u:to | |||||
_brv b@-*'e:ve | |||||
_ced tSed'il^a | |||||
_cir tSi;@-*konfl'esso | |||||
_dia djeR'e:zI | |||||
_dot p'unto||s,ov@-*ask@-*'it:o | |||||
_grv atS:'entog@-*'a:ve | |||||
_hac h'atSek | |||||
_mcn m'ak@-*on | |||||
_ogo og'o:nek | |||||
_rng an'ello | |||||
_stk b'aR*a | |||||
_tld t'ilde | |||||
_! p'untoesklamat'ivo | _! p'untoesklamat'ivo | ||||
_" viRgolet:e | _" viRgolet:e |
î ,ydin'i $atend | î ,ydin'i $atend | ||||
_. punkt | _. punkt | ||||
_?? ka*akt'er | |||||
_cap maZusk'ul@ | |||||
// accent names | |||||
_lig ligat'ur@ | |||||
_acu aktS'entaskuts'it | |||||
//_ac2 | |||||
_brv k@tS'ul@ | |||||
_ced sed'il@ | |||||
_cir tSirkumfl'eks | |||||
_dia t@-*'em@ | |||||
_dot punkt | |||||
_grv aktS'entg@-*'av | |||||
_hac hatS'ek | |||||
_mcn mak@-*'on | |||||
_ogo ogon'ek | |||||
//_rng | |||||
_stk sl'eS | |||||
_tld t'ild@ | |||||
// numeric | // numeric |
w dv'ojite:,ve: | w dv'ojite:,ve: | ||||
x iks | x iks | ||||
y ipsilon | y ipsilon | ||||
ý d,l-he:'i: | |||||
ý d,l-he:'ipsilon | |||||
_z zet | _z zet | ||||
ž Zet | ž Zet | ||||
// accent names | |||||
_lig l'igat,u:Rov,ane: | |||||
_acu s_d'l:Zn^om | |||||
//_ac2 | |||||
_brv bR'eve | |||||
_ced s_ts'ed;illoU | |||||
_cir s'ostRi;,eSkoU | |||||
_dia s_pR'ehla:skoU | |||||
_dot s_b'otkoU | |||||
_grv s_'obRa:t;,eni:md'l:Zn^om | |||||
_hac s_m'ektSen^om | |||||
_mcn s_m'akRonom | |||||
_ogo s_'ogon^ek | |||||
_rng s_kR'u:Skom | |||||
_stk S'ikmi: d'l:Zen^ | |||||
_tld s_v'l-novkoU | |||||
// character names | // character names | ||||
_cap vel^ke: | _cap vel^ke: | ||||
_?? simbol | _?? simbol |
எ e | எ e | ||||
ஏ e: | |||||
ஏ e:: | |||||
ஐ E: | ஐ E: | ||||
// consonants | // consonants | ||||
க ga // inter-vocalic, unless there is virama before or after | |||||
க gV // inter-vocalic, unless there is virama before or after | |||||
க (B g | க (B g | ||||
க (் g | க (் g | ||||
்) க ga | |||||
்) க gV | |||||
்) க (B g | ்) க (B g | ||||
_) க ka | |||||
_) க kV | |||||
_) க (B k | _) க (B k | ||||
க்க k:a | |||||
க்க k:V | |||||
க்க (B k: | க்க (B k: | ||||
ங Na | |||||
ங NV | |||||
ங (B N | ங (B N | ||||
ச dZa | |||||
ச dZV | |||||
ச (B dZ | ச (B dZ | ||||
_) ச sa | _) ச sa | ||||
_) ச (B s | _) ச (B s | ||||
ச்ச tS:a | |||||
ச்ச tS:V | |||||
ச்ச (B tS: | ச்ச (B tS: | ||||
ஜ dZa | |||||
ஜ dZV | |||||
ஜ (B dZ | ஜ (B dZ | ||||
ஞ n^a | |||||
ஞ n^V | |||||
ஞ (B n^ | ஞ (B n^ | ||||
ட d.a | |||||
ட d.V | |||||
ட (B d. | ட (B d. | ||||
_) ட t.a | |||||
_) ட t.V | |||||
_) ட (B t. | _) ட (B t. | ||||
ட்ட t.a | |||||
ட்ட t.V | |||||
ட்ட (B t. | ட்ட (B t. | ||||
ண n.a | |||||
ண n.V | |||||
ண (B n. | ண (B n. | ||||
த da | |||||
த dV | |||||
த (B d | த (B d | ||||
_) த ta | |||||
_) த tV | |||||
_) த (B t | _) த (B t | ||||
த்த t:a | |||||
த்த (B t: | |||||
த்த ttV | |||||
த்த (B tt | |||||
ந na | |||||
ந nV | |||||
ந (B n | ந (B n | ||||
ன na | |||||
ன nV | |||||
ன (B n | ன (B n | ||||
ப ba | |||||
ப bV | |||||
ப (B b | ப (B b | ||||
_) ப pa | |||||
_) ப pV | |||||
_) ப (B p | _) ப (B p | ||||
ப்ப p:a | |||||
ப்ப p:V | |||||
ப்ப (B p: | ப்ப (B p: | ||||
ஃ) ப fa | ஃ) ப fa | ||||
ஃ) ப (B f | ஃ) ப (B f | ||||
ம ma | |||||
ம mV | |||||
ம (B m | ம (B m | ||||
ய ja | |||||
ய jV | |||||
ய (B j | ய (B j | ||||
ர ra | |||||
ர rV | |||||
ர (B r | ர (B r | ||||
ற Ra | |||||
ற RV | |||||
ற (B R | ற (B R | ||||
ற் (ற t. // RR -> t.R | ற் (ற t. // RR -> t.R | ||||
ல la | |||||
ல lV | |||||
ல (B l | ல (B l | ||||
ள l.a | |||||
ள l.V | |||||
ள (B l. | ள (B l. | ||||
ழ z.a | |||||
ழ z.V | |||||
ழ (B z. | ழ (B z. | ||||
வ va | |||||
வ vV | |||||
வ (B v | வ (B v | ||||
ஶ Sa | |||||
ஶ SV | |||||
ஶ (B S | ஶ (B S | ||||
ஷ s.a | |||||
ஷ s.V | |||||
ஷ (B s. | ஷ (B s. | ||||
ஸ sa | |||||
ஸ sV | |||||
ஸ (B s | ஸ (B s | ||||
ஹ ha | |||||
ஹ hV | |||||
ஹ (B h | ஹ (B h | ||||
ௗ : // aU length mark | ௗ : // aU length mark | ||||
.group | .group | ||||
$ dola | |||||
$ dolV |
fr 44 124 | fr 44 124 | ||||
fr_ca 11 124 | fr_ca 11 124 | ||||
hi 51 135 | hi 51 135 | ||||
ta 16 138 | |||||
ta 17 138 | |||||
hu 23 114 | hu 23 114 | ||||
nl 28 121 | nl 28 121 | ||||
pl 15 109 | pl 15 109 |
phoneme U | phoneme U | ||||
vowel starttype (u) endtype (u) | vowel starttype (u) endtype (u) | ||||
length 150 | |||||
length 130 | |||||
formants vowel/u#_3 | formants vowel/u#_3 | ||||
endphoneme | endphoneme | ||||
phoneme U: | phoneme U: | ||||
vowel starttype (u) endtype (u) | vowel starttype (u) endtype (u) | ||||
length 270 | |||||
length 240 | |||||
formants vowel/u#_3 | formants vowel/u#_3 | ||||
endphoneme | endphoneme | ||||
switchvoicing f | switchvoicing f | ||||
endphoneme | endphoneme | ||||
phoneme : // Lengthen previous vowel by "length" | |||||
virtual | |||||
length 50 | |||||
endphoneme | |||||
{"$verbextend",0x28}, /* extend influence of 'verb follows' */ | {"$verbextend",0x28}, /* extend influence of 'verb follows' */ | ||||
{"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */ | {"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */ | ||||
{"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */ | {"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */ | ||||
{"$accent", 0x2b}, // character name is base-character name + accent name | |||||
// doesn't set dictionary_flags | // doesn't set dictionary_flags | ||||
{"$?", 100}, // conditional rule, followed by byte giving the condition number | {"$?", 100}, // conditional rule, followed by byte giving the condition number |
// no group for this letter, use default group | // no group for this letter, use default group | ||||
MatchRule(&p, "", groups1[0], &match1, word_flags, dict_flags); | MatchRule(&p, "", groups1[0], &match1, word_flags, dict_flags); | ||||
if(match1.points == 0) | |||||
if((match1.points == 0) && ((option_sayas & 0x10) == 0)) | |||||
{ | { | ||||
// no match, try removing the accent and re-translating the word | // no match, try removing the accent and re-translating the word | ||||
n = utf8_in(&letter,p-1,0)-1; | n = utf8_in(&letter,p-1,0)-1; | ||||
if((letter >= 0xc0) && (letter <= 0x241)) | |||||
if((letter >= 0xc0) && (letter <= 0x241) && ((ix = remove_accent[letter-0xc0]) != 0)) | |||||
{ | { | ||||
// within range of the remove_accent table | // within range of the remove_accent table | ||||
p2 = p-1; | p2 = p-1; | ||||
p[-1] = remove_accent[letter-0xc0]; | |||||
p[-1] = ix; | |||||
while((p[0] = p[n]) != ' ') p++; | while((p[0] = p[n]) != ' ') p++; | ||||
while(n-- > 0) *p++ = ' '; // replacement character must be no longer than original | while(n-- > 0) *p++ = ' '; // replacement character must be no longer than original | ||||
found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | ||||
if((found == 0) && (flags[1] & FLAG_ACCENT)) | |||||
{ | |||||
int letter; | |||||
word2 = word; | |||||
if(*word2 == '_') word2++; | |||||
len = utf8_in(&letter, word2, 0); | |||||
LookupAccentedLetter(letter, ph_out); | |||||
found = word2 + len; | |||||
} | |||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
ph_out[0] = 0; | ph_out[0] = 0; | ||||
int Translator::Lookup(const char *word, char *ph_out) | int Translator::Lookup(const char *word, char *ph_out) | ||||
{//=================================================== | {//=================================================== | ||||
unsigned int flags[2]; | unsigned int flags[2]; | ||||
flags[0] = flags[1] = 0; | |||||
char *word1 = (char *)word; | char *word1 = (char *)word; | ||||
return(LookupDictList(&word1, ph_out, flags, 0, NULL)); | return(LookupDictList(&word1, ph_out, flags, 0, NULL)); | ||||
} | } |
#define M_NAME 0 | |||||
#define M_ACUTE 1 | |||||
#define M_BREVE 2 | |||||
#define M_CARON 3 | |||||
#define M_CEDILLA 4 | |||||
#define M_CIRCUMFLEX 5 | |||||
#define M_DIAERESIS 6 | |||||
#define M_DOUBLE_ACUTE 7 | |||||
#define M_DOT_ABOVE 8 | |||||
#define M_GRAVE 9 | |||||
#define M_MACRON 10 | |||||
#define M_OGONEK 11 | |||||
#define M_RING 12 | |||||
#define M_STROKE 13 | |||||
#define M_TILDE 14 | |||||
#define M_MIDDLE_DOT 8 // duplicate of M_DOT_ABOVE | |||||
typedef struct { | |||||
char *name; | |||||
int flags; | |||||
} ACCENTS; | |||||
// these are tokens to look up in the *_list file. | |||||
ACCENTS accents_tab[] = { | |||||
{"_lig", 1}, | |||||
{"_acu", 0}, // acute | |||||
{"_brv", 0}, // breve | |||||
{"_hac", 0}, // caron/hacek | |||||
{"_ced", 0}, // cedilla | |||||
{"_cir", 0}, // circumflex | |||||
{"_dia", 0}, // diaeresis | |||||
{"_ac2", 0}, // double acute | |||||
{"_dot", 0}, // dot | |||||
{"_grv", 0}, // grave | |||||
{"_mcn", 0}, // macron | |||||
{"_ogo", 0}, // ogonek | |||||
{"_rng", 0}, // ring | |||||
{"_stk", 0}, // stroke | |||||
{"_tld", 0}, // tilde | |||||
}; | |||||
#define CAPITAL 0 | |||||
#define LETTER(ch,mod1,mod2) ch+(mod1 << 8) | |||||
#define LIGATURE(ch1,ch2,mod1) ch1+(ch2 << 8)+0x8000 | |||||
// characters U+00e0 to U+017f | |||||
const short letter_accents_0e0[] = { | |||||
LETTER('a',M_GRAVE,0), // U+00e0 | |||||
LETTER('a',M_ACUTE,0), | |||||
LETTER('a',M_CIRCUMFLEX,0), | |||||
LETTER('a',M_TILDE,0), | |||||
LETTER('a',M_DIAERESIS,0), | |||||
LETTER('a',M_RING,0), | |||||
LIGATURE('a','e',0), | |||||
LETTER('c',M_CEDILLA,0), | |||||
LETTER('e',M_GRAVE,0), | |||||
LETTER('e',M_ACUTE,0), | |||||
LETTER('e',M_CIRCUMFLEX,0), | |||||
LETTER('e',M_DIAERESIS,0), | |||||
LETTER('i',M_GRAVE,0), | |||||
LETTER('i',M_ACUTE,0), | |||||
LETTER('i',M_CIRCUMFLEX,0), | |||||
LETTER('i',M_DIAERESIS,0), | |||||
LETTER('d',M_NAME,0), // eth // U+00f0 | |||||
LETTER('n',M_TILDE,0), | |||||
LETTER('o',M_GRAVE,0), | |||||
LETTER('o',M_ACUTE,0), | |||||
LETTER('o',M_CIRCUMFLEX,0), | |||||
LETTER('o',M_TILDE,0), | |||||
LETTER('o',M_DIAERESIS,0), | |||||
0, // division sign | |||||
LETTER('o',M_STROKE,0), | |||||
LETTER('u',M_GRAVE,0), | |||||
LETTER('u',M_ACUTE,0), | |||||
LETTER('u',M_CIRCUMFLEX,0), | |||||
LETTER('u',M_DIAERESIS,0), | |||||
LETTER('y',M_ACUTE,0), | |||||
LETTER('t',M_NAME,0), // thorn | |||||
LETTER('y',M_DIAERESIS,0), | |||||
CAPITAL, // U+0100 | |||||
LETTER('a',M_MACRON,0), | |||||
CAPITAL, | |||||
LETTER('a',M_BREVE,0), | |||||
CAPITAL, | |||||
LETTER('a',M_OGONEK,0), | |||||
CAPITAL, | |||||
LETTER('c',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('c',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('c',M_DOT_ABOVE,0), | |||||
CAPITAL, | |||||
LETTER('c',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('d',M_CARON,0), | |||||
CAPITAL, // U+0110 | |||||
LETTER('d',M_STROKE,0), | |||||
CAPITAL, | |||||
LETTER('e',M_MACRON,0), | |||||
CAPITAL, | |||||
LETTER('e',M_BREVE,0), | |||||
CAPITAL, | |||||
LETTER('e',M_DOT_ABOVE,0), | |||||
CAPITAL, | |||||
LETTER('e',M_OGONEK,0), | |||||
CAPITAL, | |||||
LETTER('e',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('g',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('g',M_BREVE,0), | |||||
CAPITAL, // U+0120 | |||||
LETTER('g',M_DOT_ABOVE,0), | |||||
CAPITAL, | |||||
LETTER('g',M_CEDILLA,0), | |||||
CAPITAL, | |||||
LETTER('h',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('h',M_STROKE,0), | |||||
CAPITAL, | |||||
LETTER('i',M_TILDE,0), | |||||
CAPITAL, | |||||
LETTER('i',M_MACRON,0), | |||||
CAPITAL, | |||||
LETTER('i',M_BREVE,0), | |||||
CAPITAL, | |||||
LETTER('i',M_OGONEK,0), | |||||
CAPITAL, // U+0130 | |||||
LETTER('i',M_NAME,0), // dotless i | |||||
CAPITAL, | |||||
LIGATURE('i','j',0), | |||||
CAPITAL, | |||||
LETTER('j',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('k',M_CEDILLA,0), | |||||
LETTER('k',M_NAME,0), // kra | |||||
CAPITAL, | |||||
LETTER('l',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('l',M_CEDILLA,0), | |||||
CAPITAL, | |||||
LETTER('1',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('1',M_MIDDLE_DOT,0), // U+0140 | |||||
CAPITAL, | |||||
LETTER('1',M_STROKE,0), | |||||
CAPITAL, | |||||
LETTER('n',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('n',M_CEDILLA,0), | |||||
CAPITAL, | |||||
LETTER('n',M_CARON,0), | |||||
LETTER('n',M_NAME,0), // apostrophe n | |||||
CAPITAL, | |||||
LETTER('n',M_NAME,0), // eng | |||||
CAPITAL, | |||||
LETTER('o',M_MACRON,0), | |||||
CAPITAL, | |||||
LETTER('o',M_BREVE,0), | |||||
CAPITAL, // U+0150 | |||||
LETTER('o',M_DOUBLE_ACUTE,0), | |||||
CAPITAL, | |||||
LIGATURE('o','e',0), | |||||
CAPITAL, | |||||
LETTER('r',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('r',M_CEDILLA,0), | |||||
CAPITAL, | |||||
LETTER('r',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('s',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('s',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('s',M_CEDILLA,0), | |||||
CAPITAL, // U+0160 | |||||
LETTER('s',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('t',M_CEDILLA,0), | |||||
CAPITAL, | |||||
LETTER('t',M_CARON,0), | |||||
CAPITAL, | |||||
LETTER('t',M_STROKE,0), | |||||
CAPITAL, | |||||
LETTER('u',M_TILDE,0), | |||||
CAPITAL, | |||||
LETTER('u',M_MACRON,0), | |||||
CAPITAL, | |||||
LETTER('u',M_BREVE,0), | |||||
CAPITAL, | |||||
LETTER('u',M_RING,0), | |||||
CAPITAL, // U+0170 | |||||
LETTER('u',M_DOUBLE_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('u',M_OGONEK,0), | |||||
CAPITAL, | |||||
LETTER('w',M_CIRCUMFLEX,0), | |||||
CAPITAL, | |||||
LETTER('y',M_CIRCUMFLEX,0), | |||||
CAPITAL, // Y-DIAERESIS | |||||
CAPITAL, | |||||
LETTER('z',M_ACUTE,0), | |||||
CAPITAL, | |||||
LETTER('z',M_DOT_ABOVE,0), | |||||
CAPITAL, | |||||
LETTER('z',M_CARON,0), | |||||
LETTER('s',M_NAME,0), // long-s // U+17f | |||||
}; | |||||
int Translator::LookupLetter2(unsigned int letter, char *ph_buf) | |||||
{//============================================================= | |||||
int len; | |||||
char single_letter[10]; | |||||
single_letter[0] = 0; | |||||
single_letter[1] = '_'; | |||||
len = utf8_out(letter, &single_letter[2]); | |||||
single_letter[2+len] = 0; | |||||
if(Lookup(&single_letter[1],ph_buf) == 0) | |||||
{ | |||||
single_letter[1] = ' '; | |||||
if(Lookup(&single_letter[2],ph_buf) == 0) | |||||
{ | |||||
TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,0); | |||||
} | |||||
} | |||||
return(ph_buf[0]); | |||||
} | |||||
void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf) | |||||
{//===================================================================== | |||||
// lookup the character in the accents table | |||||
int accent_data; | |||||
int accent1; | |||||
int basic_letter; | |||||
int letter2=0; | |||||
char ph_letter1[30]; | |||||
char ph_letter2[30]; | |||||
char ph_accent1[30]; | |||||
if((letter >= 0xe0) && (letter < 0x17f)) | |||||
{ | |||||
accent_data = letter_accents_0e0[letter - 0xe0]; | |||||
basic_letter = accent_data & 0x7f; | |||||
if((accent1 = (accent_data >> 8) & 0x7f) != 0) | |||||
{ | |||||
if(accent_data & 0x8000) | |||||
{ | |||||
letter2 = accent1; | |||||
accent1 = 0; | |||||
} | |||||
if(Lookup(accents_tab[accent1].name, ph_accent1) != 0) | |||||
{ | |||||
if(LookupLetter2(basic_letter, ph_letter1) != 0) | |||||
{ | |||||
if(accent1 == 0) | |||||
{ | |||||
//ligature | |||||
LookupLetter2(letter2, ph_letter2); | |||||
sprintf(ph_buf,"%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, ph_letter2); | |||||
} | |||||
else | |||||
{ | |||||
if(langopts.accents & 1) | |||||
sprintf(ph_buf,"%s%c%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1); | |||||
else | |||||
sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} // end of LookupAccentedLetter | |||||
void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | ||||
{//============================================================================= | {//============================================================================= | ||||
unsigned char *p; | unsigned char *p; | ||||
static char single_letter[10] = {0,0}; | static char single_letter[10] = {0,0}; | ||||
char ph_stress[2]; | char ph_stress[2]; | ||||
char ph_buf3[30]; | |||||
unsigned int dict_flags[2]; | |||||
char ph_buf3[40]; | |||||
char *ptr; | |||||
ph_buf1[0] = 0; | ph_buf1[0] = 0; | ||||
len = utf8_out(letter,&single_letter[2]); | len = utf8_out(letter,&single_letter[2]); | ||||
single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31 | single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31 | ||||
single_letter[1] = '_'; | single_letter[1] = '_'; | ||||
// if the $accent flag is set for this letter, use the accents table (below) | |||||
dict_flags[1] = 0; | |||||
ptr = &single_letter[1]; | |||||
if(Lookup(&single_letter[1],ph_buf3) == 0) | if(Lookup(&single_letter[1],ph_buf3) == 0) | ||||
{ | { | ||||
single_letter[1] = ' '; | single_letter[1] = ' '; | ||||
} | } | ||||
} | } | ||||
if(ph_buf3[0] == 0) | |||||
{ | |||||
LookupAccentedLetter(letter, ph_buf3); | |||||
} | |||||
if(ph_buf3[0] == 0) | if(ph_buf3[0] == 0) | ||||
{ | { | ||||
ph_buf1[0] = 0; | ph_buf1[0] = 0; | ||||
for(p2 = hexbuf; *p2 != 0; p2++) | for(p2 = hexbuf; *p2 != 0; p2++) | ||||
{ | { | ||||
pbuf += strlen(pbuf); | pbuf += strlen(pbuf); | ||||
*pbuf++ = phonPAUSE_VSHORT; | |||||
LookupLetter(*p2, 0, pbuf); | LookupLetter(*p2, 0, pbuf); | ||||
} | } | ||||
} | } |
const char *Translator::LookupCharName(int c) | const char *Translator::LookupCharName(int c) | ||||
{//========================================== | {//========================================== | ||||
// Find the phoneme string (in ascii) to speak the name of character c | // Find the phoneme string (in ascii) to speak the name of character c | ||||
// Used for punctuation characters and symbols | |||||
int ix; | int ix; | ||||
unsigned int flags[2]; | unsigned int flags[2]; |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.31.19 28.Feb.08"; | |||||
const char *version_string = "1.31.20 01.Mar.08"; | |||||
const int version_phdata = 0x013105; | const int version_phdata = 0x013105; | ||||
int option_device_number = -1; | int option_device_number = -1; |
if(p->newword) | if(p->newword) | ||||
{ | { | ||||
last_frame = NULL; | |||||
if(translator->langopts.param[LOPT_WORD_MERGE] == 0) | |||||
last_frame = NULL; | |||||
sourceix = (p->sourceix & 0x7ff) + clause_start_char; | sourceix = (p->sourceix & 0x7ff) + clause_start_char; | ||||
if(p->newword & 4) | if(p->newword & 4) |
tr->langopts.stress_rule = 1; | tr->langopts.stress_rule = 1; | ||||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | ||||
tr->letter_bits_offset = OFFSET_TAMIL; | tr->letter_bits_offset = OFFSET_TAMIL; | ||||
tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words | |||||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | ||||
SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters | SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters | ||||
SetLetterVowel(this,'y'); // add 'y' to vowels | SetLetterVowel(this,'y'); // add 'y' to vowels | ||||
langopts.numbers = 0x8d1 + NUM_ROMAN; | langopts.numbers = 0x8d1 + NUM_ROMAN; | ||||
langopts.accents = 1; | |||||
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | ||||
} | } | ||||
#define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */ | #define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */ | ||||
#define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */ | #define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */ | ||||
#define FLAG_ALLCAPS 0x400 // only if the word is all capitals | #define FLAG_ALLCAPS 0x400 // only if the word is all capitals | ||||
#define FLAG_ACCENT 0x800 // character name is base-character name + accent name | |||||
#define N_LOPTS 14 | |||||
#define N_LOPTS 15 | |||||
#define LOPT_DIERESES 1 | #define LOPT_DIERESES 1 | ||||
// 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables | // 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables | ||||
// bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word | // bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word | ||||
// increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels | // increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels | ||||
#define LOPT_SONORANT_MIN 7 | #define LOPT_SONORANT_MIN 7 | ||||
// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||||
// bit 1=also after a word which ends with a stressed vowel | |||||
#define LOPT_IT_DOUBLING 8 | |||||
// don't break vowels at word boundary | |||||
#define LOPT_WORD_MERGE 8 | |||||
// max. amplitude for vowel at the end of a clause | // max. amplitude for vowel at the end of a clause | ||||
#define LOPT_MAXAMP_EOC 9 | #define LOPT_MAXAMP_EOC 9 | ||||
// stressed syllable is indicated by capitals | // stressed syllable is indicated by capitals | ||||
#define LOPT_SYLLABLE_CAPS 13 | #define LOPT_SYLLABLE_CAPS 13 | ||||
// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||||
// bit 1=also after a word which ends with a stressed vowel | |||||
#define LOPT_IT_DOUBLING 14 | |||||
typedef struct { | typedef struct { | ||||
// bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme) | // bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme) | ||||
int max_roman; | int max_roman; | ||||
int thousands_sep; | int thousands_sep; | ||||
int decimal_sep; | int decimal_sep; | ||||
// bit 0, accent name before the letter name | |||||
int accents; | |||||
int tone_language; // 1=tone language | int tone_language; // 1=tone language | ||||
int intonation_group; | int intonation_group; | ||||
int long_stop; // extra mS pause for a lengthened stop | int long_stop; // extra mS pause for a lengthened stop | ||||
const char *LookupSpecial(const char *string, char *text_out); | const char *LookupSpecial(const char *string, char *text_out); | ||||
const char *LookupCharName(int c); | const char *LookupCharName(int c); | ||||
void LookupLetter(unsigned int letter, int next_byte, char *ph_buf); | void LookupLetter(unsigned int letter, int next_byte, char *ph_buf); | ||||
int LookupLetter2(unsigned int letter, char *ph_buf); | |||||
void LookupAccentedLetter(unsigned int letter, char *ph_buf); | |||||
int LookupNum2(int value, int control, char *ph_out); | int LookupNum2(int value, int control, char *ph_out); | ||||
int LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands); | int LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands); | ||||
int LookupThousands(int value, int thousandplex, char *ph_out); | int LookupThousands(int value, int thousandplex, char *ph_out); |