Speak accented letter names as base-letter name plus accent name, using a table of Unicode characters up to U+17F. Added $accent attribute for *_list files, meaning "speak as base-letter name plus accent name". Added accent names to several languages *_list files. Added language option to allow vowels to merge between words (used for lang=TA). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@153 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -60,6 +60,28 @@ z zEd | |||
û kapi?y | |||
ü de@lte@k@n?y | |||
_cap h'o@flEt@r | |||
_?? s@mb'o@l | |||
_?A l'Et@r | |||
// accent names | |||
_lig d'Wb@llEt@r | |||
_acu ak'yt | |||
_ac2 d'Wb@lakyt | |||
_brv br'i:v | |||
_ced s'e@dIla | |||
_cir k'api | |||
_dia d'e@lte@k@n | |||
_dot p'Wnt | |||
_grv x2r'afIs | |||
_hac k'A:rOn | |||
_mcn m'akrOn | |||
_ogo o@x2o@n,&k | |||
_rng 'A:nb&l | |||
_stk str'e@p | |||
_tld t'Ild@ | |||
// numeric | |||
_0 'nWl | |||
_1 _'e@n |
@@ -51,6 +51,28 @@ _z zet | |||
_#9 t'ap | |||
_#32 m'ezeRa // space | |||
_?? simbol | |||
_?A znak | |||
_cap m'ajuskule | |||
// accent names | |||
_lig l'igat,uRa | |||
_acu tS'a:Rka | |||
_ac2 dvj'etS'a:Rki | |||
_brv 'obloUtSek | |||
_ced ts'eJila | |||
_cir stR^'i:Ska | |||
_dia tR'ema | |||
_dot t'etSka | |||
_grv c'eSki:;'aktsent | |||
_hac h'a:tSek | |||
_mcn m'akRon | |||
_ogo 'ogonek | |||
_rng kR'oUZek | |||
_stk S'ikma:tS'a:Rka | |||
_tld c'ilda | |||
_$ dola:R | |||
_' apostRof | |||
@@ -72,8 +94,6 @@ _} sl'oZena:||z'avR^i:t | |||
_< menSi: | |||
_> vjetSi: | |||
_| sv'isla:||tS'a:Ra | |||
_?? simbol | |||
_?A znak | |||
// Numbers | |||
@@ -258,6 +278,9 @@ z zet $atend | |||
// Abbreviations | |||
// if a word has no vowels (or "r") then it will | |||
// automatically be spoken as individual letters | |||
e.g napR^ | |||
arc $abbrev | |||
arj $abbrev | |||
atd $abbrev |
@@ -44,6 +44,23 @@ _#9 t,abu:l'Ato:* | |||
_#32 l'e:*ts'aIC@n | |||
_?? zymb'o:l | |||
// accent names | |||
_lig l'i:gat,u:* | |||
_acu ak'u:t | |||
_ac2 d'Op@l,aku:t | |||
_brv b@-*'e:v@ | |||
_ced tse:d'Il@ | |||
_cir ts'I*kUmflEks | |||
_dia 'UmlaUt | |||
_dot p'Unkt | |||
_grv g@-*'AvIs | |||
_hac h'atSe:k | |||
_mcn m'ak@-*o:n | |||
_ogo 'o:go:n,e:k | |||
_rng k@-*'u:Ze:k | |||
_stk S@-*'Egst@-*IC | |||
_tld t'Ild@ | |||
// Not Roman numbers | |||
v faU | |||
x Iks |
@@ -21,10 +21,10 @@ Dictionary cs_dict | |||
i i: l- o o: oU r- u | |||
u: | |||
* b c d dZ f g h | |||
j J k l l^ m n N | |||
n^ p r R R^ R^/ s S | |||
t tS ts v x z Z | |||
* ; b c d dZ f g | |||
h j J k l l^ m n | |||
N n^ p r R R^ R^/ s | |||
S t tS ts v x z Z | |||
Dictionary cy_dict | |||
@@ -149,12 +149,12 @@ v z Z | |||
Dictionary it_dict | |||
@- a a/ aI aU e E i | |||
o O oI u | |||
I o O oI u | |||
* : b d dZ f g h | |||
j k l l^ m n N n^ | |||
p r R s S t tS ts | |||
v w w2 z | |||
* : ; b d dZ f g | |||
h j k l l^ m n N | |||
n^ p r R s S t tS | |||
ts v w w2 z | |||
Dictionary nl_dict | |||
@@ -213,10 +213,11 @@ Dictionary sk_dict | |||
l- l: o o: oU r- r: u | |||
u: | |||
* : b d d; dZ f g | |||
h j k l l^ m n N | |||
n^ p r R R^ R^/ s S | |||
t t; tS ts v x z Z | |||
* : ; b d d; dZ f | |||
g h j k l l^ m n | |||
N n^ p r R R^ R^/ s | |||
S t t; tS ts v x z | |||
Z | |||
Dictionary sv_dict | |||
@@ -371,10 +372,10 @@ Dictionary hbs_dict | |||
E e e: i I i: l- O | |||
o o: oU r* r- u U u: | |||
* b d dZ dZ; f g h | |||
j k l L l^ m n N | |||
n^ p r R s S t tS | |||
ts tS; v x z Z | |||
* ; b d dZ dZ; f g | |||
h j k l L l^ m n | |||
N n^ p r R s S t | |||
tS ts tS; v x z Z | |||
Dictionary id_dict |
@@ -47,7 +47,38 @@ y waI | |||
z zEd | |||
?3 z zi: | |||
é i:a2k'ju:t | |||
// sounds are specified for these accented characters | |||
// use the $accent attribute here to force the use of the | |||
// accent table when speaking the name of the character. | |||
_é $accent | |||
ê $accent | |||
_ä $accent | |||
_ë $accent | |||
_ï $accent | |||
_ö $accent | |||
_ü $accent | |||
_č $accent | |||
_š $accent | |||
_ş $accent | |||
_ž $accent | |||
_ñ $accent | |||
// accent names | |||
_lig l,Iga2tS@ | |||
_acu a2kj'u:t | |||
_ac2 dVb@la2kj'u:t | |||
_brv br'i:v | |||
_ced s@d'Ila2 | |||
_cir s'3:kVmfl,Eks | |||
_dia 'UmlaUt | |||
_dot wI2Dd'0ta2bVv | |||
_grv gr'A:v | |||
_hac h'atS,Ek | |||
_mcn m'akr,0n | |||
_ogo '0g0n,Ek | |||
_rng r'INg | |||
_stk str'oUk | |||
_tld t'Ild@ | |||
// character names | |||
@@ -73,7 +104,6 @@ _+ plVs | |||
_, k0m@ | |||
_- h,aIf@n | |||
_. d0t | |||
_/ stroUk | |||
_: koUl@n | |||
_; sEmIk'oUl0n | |||
_< ElaNg@L | |||
@@ -82,7 +112,6 @@ _> A@aNg@L | |||
_? kwEstS@n | |||
_@ at|saIn | |||
_[ lEftskwe@ | |||
_\ bakstroUk | |||
_] raItskwe@ | |||
_^ s3:kVmfl,Eks | |||
?5 _^ sIRkVmfl,Eks | |||
@@ -162,11 +191,14 @@ _₠ jU@roU | |||
¬ n0t_|saIn | |||
ə SwA: | |||
æ eI'i: | |||
ɛ oUp@n'i: | |||
ɔ oUp@n'oU | |||
þ TO@n | |||
ŋ ENg | |||
ð ED | |||
ʃ ES | |||
ʒ EZ | |||
ĸ krA: | |||
ɛ oUp@n'i: | |||
ɔ oUp@n'oU | |||
// numeric | |||
@@ -5154,6 +5154,7 @@ | |||
.group | |||
// non-ascii characters with specified pronunciations | |||
// List the accented characters in en_list with the $accent attribute | |||
é eI | |||
_) é I2 | |||
ê E: |
@@ -7,6 +7,24 @@ _cap m'aJ^us | |||
_?? s'imbOlo | |||
_#32 Esp'aTjo | |||
// accent names | |||
_lig liQaD'u**a | |||
_acu aQ'uDo | |||
_ac2 d'Oble||aQ'uDo | |||
_brv b**'eBe | |||
_ced TeD'iJ^a | |||
_cir TiRkumfl'exo | |||
_dia dj'E**esis | |||
_dac d'Oble||aQ'uDo | |||
_dot p'unto // ?? | |||
_grv g**'aBe | |||
_hac ka**'On | |||
_mcn mak**'On | |||
_ogo kol'ita | |||
_rng an'iJ^o | |||
_stk b'a*Ra // ?? | |||
_tld t'ilde | |||
// names of symbols | |||
_. punto |
@@ -145,6 +145,9 @@ | |||
_) r R | |||
A) r (A ** | |||
C) r (A ** | |||
l) r (A R | |||
m) r (A R | |||
n) r (A R | |||
rr *R | |||
@@ -5,7 +5,7 @@ | |||
// 2006-11-18 Gilles Casse <[email protected]> | |||
// | |||
// Updated 2008-02-24 Michel Such <[email protected]> | |||
// Updated 2008-02-29 Michel Such <[email protected]> | |||
// | |||
// * Numbers, a few abbreviations and exceptions. | |||
// | |||
@@ -16,42 +16,42 @@ | |||
// "letter" name, then include the letter name here, with the letter | |||
// prefixed by a _ character. | |||
_à a:aksA~gRav | |||
â a:aksA~siRkO~flEks | |||
ä a:tRema | |||
_à $accent // speak as base-letter name + accent name | |||
â $accent | |||
ä $accent | |||
b be | |||
c se | |||
ç sesedij | |||
ç $accent | |||
d de | |||
e @ | |||
ë @:tRema | |||
é @:aksA~Egy | |||
è @:aksA~gRav | |||
ê @:aksA~siRkO~flEks | |||
ë @:tRema | |||
ë $accent | |||
é $accent | |||
è $accent | |||
ê $accent | |||
ë $accent | |||
f Ef | |||
g Ze | |||
h aS | |||
i i | |||
ï i:tRema | |||
ï $accent | |||
j Zi | |||
k ka | |||
l El | |||
m Em | |||
n En | |||
ñ Entilde | |||
ñ $accent | |||
o o | |||
ô o:aksA~siRkO~flEks | |||
ö o:tRema | |||
ô $accent | |||
ö $accent | |||
p pe | |||
q ky | |||
r ER | |||
s Es | |||
t te | |||
u y | |||
ù y:aksA~gRav | |||
û y:aksA~siRkO~flEks | |||
ü y:tRema | |||
ù $accent | |||
û $accent | |||
ü $accent | |||
v ve | |||
w dubl@v'e | |||
x iks | |||
@@ -60,15 +60,33 @@ _y i:gR'Ek | |||
// character names | |||
// accent names | |||
_lig ligat'yr | |||
_acu aksA~tEg'y | |||
_ac2 dublaksA~tEg'y | |||
_brv br'Ev | |||
_ced sed'ij | |||
_cir aksA~siRkO~fl'Eks | |||
_dia tRem'a | |||
_dot pw'E~syskr'i | |||
_grv aksA~gR'av | |||
_hac kar'O~ | |||
_mcn makr'O~ | |||
_ogo OgOn'Ek | |||
_rng rO~t2A~S'Ef | |||
_stk b'aR | |||
_tld t'ild | |||
// character names | |||
//_cap k,ap@-t@L | |||
_?A lEt@ | |||
_cap maZysk'yl | |||
_?A l'Etr | |||
_?? sE~b'Oll | |||
_#9 tabylasjO~ | |||
_#32 Espas | |||
_" gijmE | |||
_# djEz | |||
_' apOstROf | |||
@@ -383,8 +401,8 @@ tout t'ut2 $u | |||
// Letters which can be words | |||
//=========================== | |||
à a:aksA~gRav $atend | |||
y i:gR'Ek $atend | |||
à $atend $accent | |||
y $atend $accent | |||
@@ -651,3 +669,4 @@ vincent vE~sA~ | |||
@@ -41,9 +41,29 @@ | |||
z z@ | |||
ž Z@ | |||
// accent names | |||
_lig l'ig&t,UR& | |||
_acu 'akUt | |||
_ac2 dv'ostr**UkI;'akUt | |||
//_brv | |||
_ced ts'EdIl& | |||
_cir ts'iRkUmflEks | |||
_dia 'uml&Ut | |||
_dot t'otSk& | |||
//_grv | |||
_hac kv'atSIts& | |||
_mcn m'akr**on | |||
_ogo 'ogonEk | |||
_rng r**'iNg | |||
_stk kr**'oz | |||
_tld t'ild& | |||
// symbols | |||
_?? znak // unknown symbol | |||
_?A slovo // unknown letter | |||
_cap k'apIt&l // ?? use English until I find the correct word | |||
© 'aUtoRsk&||pr*av& | |||
% p'osto | |||
+ plus |
@@ -45,6 +45,23 @@ _1M4 billio: | |||
_2M4 ke:tbillio: | |||
_dpt ||_vEss2Y:_ | |||
// accent names | |||
_lig ligAtu:R2A | |||
_acu e:lES | |||
_ac2 kEt:Y:S | |||
_brv fe:lkYR2 | |||
//_ced // cedilla | |||
_cir tsiR2kumflEks | |||
_dia tR2e:mA | |||
//_dot // dot above | |||
_grv tompA | |||
_hac pipA | |||
_mcn mAkR2on // ?? macron | |||
_ogo hoR2gok | |||
_rng kYR2 | |||
_stk a:thu:za:S | |||
_tld tildE | |||
// Abbreviations | |||
km kilo:me:tER2 |
@@ -4,8 +4,28 @@ | |||
// character names | |||
_cap k'apital | |||
_?? s'imbolo | |||
_cap k'apital | |||
_?? s'imbolo | |||
_?A let:'e:Ra | |||
// accent names | |||
_lig l,egat'u:Ra | |||
_acu atS:'ento_|ak'u:to | |||
_ac2 d'op:i;o_|atS:'ento_|ak'u:to | |||
_brv b@-*'e:ve | |||
_ced tSed'il^a | |||
_cir tSi;@-*konfl'esso | |||
_dia djeR'e:zI | |||
_dot p'unto||s,ov@-*ask@-*'it:o | |||
_grv atS:'entog@-*'a:ve | |||
_hac h'atSek | |||
_mcn m'ak@-*on | |||
_ogo og'o:nek | |||
_rng an'ello | |||
_stk b'aR*a | |||
_tld t'ilde | |||
_! p'untoesklamat'ivo | |||
_" viRgolet:e |
@@ -48,6 +48,26 @@ z zet | |||
î ,ydin'i $atend | |||
_. punkt | |||
_?? ka*akt'er | |||
_cap maZusk'ul@ | |||
// accent names | |||
_lig ligat'ur@ | |||
_acu aktS'entaskuts'it | |||
//_ac2 | |||
_brv k@tS'ul@ | |||
_ced sed'il@ | |||
_cir tSirkumfl'eks | |||
_dia t@-*'em@ | |||
_dot punkt | |||
_grv aktS'entg@-*'av | |||
_hac hatS'ek | |||
_mcn mak@-*'on | |||
_ogo ogon'ek | |||
//_rng | |||
_stk sl'eS | |||
_tld t'ild@ | |||
// numeric |
@@ -43,10 +43,28 @@ _v ve: | |||
w dv'ojite:,ve: | |||
x iks | |||
y ipsilon | |||
ý d,l-he:'i: | |||
ý d,l-he:'ipsilon | |||
_z zet | |||
ž Zet | |||
// accent names | |||
_lig l'igat,u:Rov,ane: | |||
_acu s_d'l:Zn^om | |||
//_ac2 | |||
_brv bR'eve | |||
_ced s_ts'ed;illoU | |||
_cir s'ostRi;,eSkoU | |||
_dia s_pR'ehla:skoU | |||
_dot s_b'otkoU | |||
_grv s_'obRa:t;,eni:md'l:Zn^om | |||
_hac s_m'ektSen^om | |||
_mcn s_m'akRonom | |||
_ogo s_'ogon^ek | |||
_rng s_kR'u:Skom | |||
_stk S'ikmi: d'l:Zen^ | |||
_tld s_v'l-novkoU | |||
// character names | |||
_cap vel^ke: | |||
_?? simbol |
@@ -38,7 +38,7 @@ | |||
எ e | |||
ஏ e: | |||
ஏ e:: | |||
ஐ E: | |||
@@ -50,99 +50,99 @@ | |||
// consonants | |||
க ga // inter-vocalic, unless there is virama before or after | |||
க gV // inter-vocalic, unless there is virama before or after | |||
க (B g | |||
க (் g | |||
்) க ga | |||
்) க gV | |||
்) க (B g | |||
_) க ka | |||
_) க kV | |||
_) க (B k | |||
க்க k:a | |||
க்க k:V | |||
க்க (B k: | |||
ங Na | |||
ங NV | |||
ங (B N | |||
ச dZa | |||
ச dZV | |||
ச (B dZ | |||
_) ச sa | |||
_) ச (B s | |||
ச்ச tS:a | |||
ச்ச tS:V | |||
ச்ச (B tS: | |||
ஜ dZa | |||
ஜ dZV | |||
ஜ (B dZ | |||
ஞ n^a | |||
ஞ n^V | |||
ஞ (B n^ | |||
ட d.a | |||
ட d.V | |||
ட (B d. | |||
_) ட t.a | |||
_) ட t.V | |||
_) ட (B t. | |||
ட்ட t.a | |||
ட்ட t.V | |||
ட்ட (B t. | |||
ண n.a | |||
ண n.V | |||
ண (B n. | |||
த da | |||
த dV | |||
த (B d | |||
_) த ta | |||
_) த tV | |||
_) த (B t | |||
த்த t:a | |||
த்த (B t: | |||
த்த ttV | |||
த்த (B tt | |||
ந na | |||
ந nV | |||
ந (B n | |||
ன na | |||
ன nV | |||
ன (B n | |||
ப ba | |||
ப bV | |||
ப (B b | |||
_) ப pa | |||
_) ப pV | |||
_) ப (B p | |||
ப்ப p:a | |||
ப்ப p:V | |||
ப்ப (B p: | |||
ஃ) ப fa | |||
ஃ) ப (B f | |||
ம ma | |||
ம mV | |||
ம (B m | |||
ய ja | |||
ய jV | |||
ய (B j | |||
ர ra | |||
ர rV | |||
ர (B r | |||
ற Ra | |||
ற RV | |||
ற (B R | |||
ற் (ற t. // RR -> t.R | |||
ல la | |||
ல lV | |||
ல (B l | |||
ள l.a | |||
ள l.V | |||
ள (B l. | |||
ழ z.a | |||
ழ z.V | |||
ழ (B z. | |||
வ va | |||
வ vV | |||
வ (B v | |||
ஶ Sa | |||
ஶ SV | |||
ஶ (B S | |||
ஷ s.a | |||
ஷ s.V | |||
ஷ (B s. | |||
ஸ sa | |||
ஸ sV | |||
ஸ (B s | |||
ஹ ha | |||
ஹ hV | |||
ஹ (B h | |||
@@ -176,4 +176,4 @@ | |||
ௗ : // aU length mark | |||
.group | |||
$ dola | |||
$ dolV |
@@ -18,7 +18,7 @@ | |||
fr 44 124 | |||
fr_ca 11 124 | |||
hi 51 135 | |||
ta 16 138 | |||
ta 17 138 | |||
hu 23 114 | |||
nl 28 121 | |||
pl 15 109 |
@@ -77,7 +77,7 @@ endphoneme | |||
phoneme U | |||
vowel starttype (u) endtype (u) | |||
length 150 | |||
length 130 | |||
formants vowel/u#_3 | |||
endphoneme | |||
@@ -89,7 +89,7 @@ endphoneme | |||
phoneme U: | |||
vowel starttype (u) endtype (u) | |||
length 270 | |||
length 240 | |||
formants vowel/u#_3 | |||
endphoneme | |||
@@ -113,3 +113,8 @@ phoneme v | |||
switchvoicing f | |||
endphoneme | |||
phoneme : // Lengthen previous vowel by "length" | |||
virtual | |||
length 50 | |||
endphoneme | |||
@@ -103,6 +103,7 @@ MNEM_TAB mnem_flags[] = { | |||
{"$verbextend",0x28}, /* extend influence of 'verb follows' */ | |||
{"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */ | |||
{"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */ | |||
{"$accent", 0x2b}, // character name is base-character name + accent name | |||
// doesn't set dictionary_flags | |||
{"$?", 100}, // conditional rule, followed by byte giving the condition number |
@@ -2390,15 +2390,15 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char | |||
// no group for this letter, use default group | |||
MatchRule(&p, "", groups1[0], &match1, word_flags, dict_flags); | |||
if(match1.points == 0) | |||
if((match1.points == 0) && ((option_sayas & 0x10) == 0)) | |||
{ | |||
// no match, try removing the accent and re-translating the word | |||
n = utf8_in(&letter,p-1,0)-1; | |||
if((letter >= 0xc0) && (letter <= 0x241)) | |||
if((letter >= 0xc0) && (letter <= 0x241) && ((ix = remove_accent[letter-0xc0]) != 0)) | |||
{ | |||
// within range of the remove_accent table | |||
p2 = p-1; | |||
p[-1] = remove_accent[letter-0xc0]; | |||
p[-1] = ix; | |||
while((p[0] = p[n]) != ' ') p++; | |||
while(n-- > 0) *p++ = ' '; // replacement character must be no longer than original | |||
@@ -3020,6 +3020,16 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags | |||
found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | |||
if((found == 0) && (flags[1] & FLAG_ACCENT)) | |||
{ | |||
int letter; | |||
word2 = word; | |||
if(*word2 == '_') word2++; | |||
len = utf8_in(&letter, word2, 0); | |||
LookupAccentedLetter(letter, ph_out); | |||
found = word2 + len; | |||
} | |||
if(found == 0) | |||
{ | |||
ph_out[0] = 0; | |||
@@ -3086,6 +3096,7 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags | |||
int Translator::Lookup(const char *word, char *ph_out) | |||
{//=================================================== | |||
unsigned int flags[2]; | |||
flags[0] = flags[1] = 0; | |||
char *word1 = (char *)word; | |||
return(LookupDictList(&word1, ph_out, flags, 0, NULL)); | |||
} |
@@ -36,6 +36,290 @@ | |||
#define M_NAME 0 | |||
#define M_ACUTE 1 | |||
#define M_BREVE 2 | |||
#define M_CARON 3 | |||
#define M_CEDILLA 4 | |||
#define M_CIRCUMFLEX 5 | |||
#define M_DIAERESIS 6 | |||
#define M_DOUBLE_ACUTE 7 | |||
#define M_DOT_ABOVE 8 | |||
#define M_GRAVE 9 | |||
#define M_MACRON 10 | |||
#define M_OGONEK 11 | |||
#define M_RING 12 | |||
#define M_STROKE 13 | |||
#define M_TILDE 14 | |||
#define M_MIDDLE_DOT 8 // duplicate of M_DOT_ABOVE | |||
typedef struct { | |||
char *name; | |||
int flags; | |||
} ACCENTS; | |||
// these are tokens to look up in the *_list file. | |||
ACCENTS accents_tab[] = { | |||
{"_lig", 1}, | |||
{"_acu", 0}, // acute | |||
{"_brv", 0}, // breve | |||
{"_hac", 0}, // caron/hacek | |||
{"_ced", 0}, // cedilla | |||
{"_cir", 0}, // circumflex | |||
{"_dia", 0}, // diaeresis | |||
{"_ac2", 0}, // double acute | |||
{"_dot", 0}, // dot | |||
{"_grv", 0}, // grave | |||
{"_mcn", 0}, // macron | |||
{"_ogo", 0}, // ogonek | |||
{"_rng", 0}, // ring | |||
{"_stk", 0}, // stroke | |||
{"_tld", 0}, // tilde | |||
}; | |||
#define CAPITAL 0 | |||
#define LETTER(ch,mod1,mod2) ch+(mod1 << 8) | |||
#define LIGATURE(ch1,ch2,mod1) ch1+(ch2 << 8)+0x8000 | |||
// characters U+00e0 to U+017f | |||
const short letter_accents_0e0[] = { | |||
LETTER('a',M_GRAVE,0), // U+00e0 | |||
LETTER('a',M_ACUTE,0), | |||
LETTER('a',M_CIRCUMFLEX,0), | |||
LETTER('a',M_TILDE,0), | |||
LETTER('a',M_DIAERESIS,0), | |||
LETTER('a',M_RING,0), | |||
LIGATURE('a','e',0), | |||
LETTER('c',M_CEDILLA,0), | |||
LETTER('e',M_GRAVE,0), | |||
LETTER('e',M_ACUTE,0), | |||
LETTER('e',M_CIRCUMFLEX,0), | |||
LETTER('e',M_DIAERESIS,0), | |||
LETTER('i',M_GRAVE,0), | |||
LETTER('i',M_ACUTE,0), | |||
LETTER('i',M_CIRCUMFLEX,0), | |||
LETTER('i',M_DIAERESIS,0), | |||
LETTER('d',M_NAME,0), // eth // U+00f0 | |||
LETTER('n',M_TILDE,0), | |||
LETTER('o',M_GRAVE,0), | |||
LETTER('o',M_ACUTE,0), | |||
LETTER('o',M_CIRCUMFLEX,0), | |||
LETTER('o',M_TILDE,0), | |||
LETTER('o',M_DIAERESIS,0), | |||
0, // division sign | |||
LETTER('o',M_STROKE,0), | |||
LETTER('u',M_GRAVE,0), | |||
LETTER('u',M_ACUTE,0), | |||
LETTER('u',M_CIRCUMFLEX,0), | |||
LETTER('u',M_DIAERESIS,0), | |||
LETTER('y',M_ACUTE,0), | |||
LETTER('t',M_NAME,0), // thorn | |||
LETTER('y',M_DIAERESIS,0), | |||
CAPITAL, // U+0100 | |||
LETTER('a',M_MACRON,0), | |||
CAPITAL, | |||
LETTER('a',M_BREVE,0), | |||
CAPITAL, | |||
LETTER('a',M_OGONEK,0), | |||
CAPITAL, | |||
LETTER('c',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('c',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('c',M_DOT_ABOVE,0), | |||
CAPITAL, | |||
LETTER('c',M_CARON,0), | |||
CAPITAL, | |||
LETTER('d',M_CARON,0), | |||
CAPITAL, // U+0110 | |||
LETTER('d',M_STROKE,0), | |||
CAPITAL, | |||
LETTER('e',M_MACRON,0), | |||
CAPITAL, | |||
LETTER('e',M_BREVE,0), | |||
CAPITAL, | |||
LETTER('e',M_DOT_ABOVE,0), | |||
CAPITAL, | |||
LETTER('e',M_OGONEK,0), | |||
CAPITAL, | |||
LETTER('e',M_CARON,0), | |||
CAPITAL, | |||
LETTER('g',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('g',M_BREVE,0), | |||
CAPITAL, // U+0120 | |||
LETTER('g',M_DOT_ABOVE,0), | |||
CAPITAL, | |||
LETTER('g',M_CEDILLA,0), | |||
CAPITAL, | |||
LETTER('h',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('h',M_STROKE,0), | |||
CAPITAL, | |||
LETTER('i',M_TILDE,0), | |||
CAPITAL, | |||
LETTER('i',M_MACRON,0), | |||
CAPITAL, | |||
LETTER('i',M_BREVE,0), | |||
CAPITAL, | |||
LETTER('i',M_OGONEK,0), | |||
CAPITAL, // U+0130 | |||
LETTER('i',M_NAME,0), // dotless i | |||
CAPITAL, | |||
LIGATURE('i','j',0), | |||
CAPITAL, | |||
LETTER('j',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('k',M_CEDILLA,0), | |||
LETTER('k',M_NAME,0), // kra | |||
CAPITAL, | |||
LETTER('l',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('l',M_CEDILLA,0), | |||
CAPITAL, | |||
LETTER('1',M_CARON,0), | |||
CAPITAL, | |||
LETTER('1',M_MIDDLE_DOT,0), // U+0140 | |||
CAPITAL, | |||
LETTER('1',M_STROKE,0), | |||
CAPITAL, | |||
LETTER('n',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('n',M_CEDILLA,0), | |||
CAPITAL, | |||
LETTER('n',M_CARON,0), | |||
LETTER('n',M_NAME,0), // apostrophe n | |||
CAPITAL, | |||
LETTER('n',M_NAME,0), // eng | |||
CAPITAL, | |||
LETTER('o',M_MACRON,0), | |||
CAPITAL, | |||
LETTER('o',M_BREVE,0), | |||
CAPITAL, // U+0150 | |||
LETTER('o',M_DOUBLE_ACUTE,0), | |||
CAPITAL, | |||
LIGATURE('o','e',0), | |||
CAPITAL, | |||
LETTER('r',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('r',M_CEDILLA,0), | |||
CAPITAL, | |||
LETTER('r',M_CARON,0), | |||
CAPITAL, | |||
LETTER('s',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('s',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('s',M_CEDILLA,0), | |||
CAPITAL, // U+0160 | |||
LETTER('s',M_CARON,0), | |||
CAPITAL, | |||
LETTER('t',M_CEDILLA,0), | |||
CAPITAL, | |||
LETTER('t',M_CARON,0), | |||
CAPITAL, | |||
LETTER('t',M_STROKE,0), | |||
CAPITAL, | |||
LETTER('u',M_TILDE,0), | |||
CAPITAL, | |||
LETTER('u',M_MACRON,0), | |||
CAPITAL, | |||
LETTER('u',M_BREVE,0), | |||
CAPITAL, | |||
LETTER('u',M_RING,0), | |||
CAPITAL, // U+0170 | |||
LETTER('u',M_DOUBLE_ACUTE,0), | |||
CAPITAL, | |||
LETTER('u',M_OGONEK,0), | |||
CAPITAL, | |||
LETTER('w',M_CIRCUMFLEX,0), | |||
CAPITAL, | |||
LETTER('y',M_CIRCUMFLEX,0), | |||
CAPITAL, // Y-DIAERESIS | |||
CAPITAL, | |||
LETTER('z',M_ACUTE,0), | |||
CAPITAL, | |||
LETTER('z',M_DOT_ABOVE,0), | |||
CAPITAL, | |||
LETTER('z',M_CARON,0), | |||
LETTER('s',M_NAME,0), // long-s // U+17f | |||
}; | |||
int Translator::LookupLetter2(unsigned int letter, char *ph_buf) | |||
{//============================================================= | |||
int len; | |||
char single_letter[10]; | |||
single_letter[0] = 0; | |||
single_letter[1] = '_'; | |||
len = utf8_out(letter, &single_letter[2]); | |||
single_letter[2+len] = 0; | |||
if(Lookup(&single_letter[1],ph_buf) == 0) | |||
{ | |||
single_letter[1] = ' '; | |||
if(Lookup(&single_letter[2],ph_buf) == 0) | |||
{ | |||
TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,0); | |||
} | |||
} | |||
return(ph_buf[0]); | |||
} | |||
void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf) | |||
{//===================================================================== | |||
// lookup the character in the accents table | |||
int accent_data; | |||
int accent1; | |||
int basic_letter; | |||
int letter2=0; | |||
char ph_letter1[30]; | |||
char ph_letter2[30]; | |||
char ph_accent1[30]; | |||
if((letter >= 0xe0) && (letter < 0x17f)) | |||
{ | |||
accent_data = letter_accents_0e0[letter - 0xe0]; | |||
basic_letter = accent_data & 0x7f; | |||
if((accent1 = (accent_data >> 8) & 0x7f) != 0) | |||
{ | |||
if(accent_data & 0x8000) | |||
{ | |||
letter2 = accent1; | |||
accent1 = 0; | |||
} | |||
if(Lookup(accents_tab[accent1].name, ph_accent1) != 0) | |||
{ | |||
if(LookupLetter2(basic_letter, ph_letter1) != 0) | |||
{ | |||
if(accent1 == 0) | |||
{ | |||
//ligature | |||
LookupLetter2(letter2, ph_letter2); | |||
sprintf(ph_buf,"%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, ph_letter2); | |||
} | |||
else | |||
{ | |||
if(langopts.accents & 1) | |||
sprintf(ph_buf,"%s%c%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1); | |||
else | |||
sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT); | |||
} | |||
} | |||
} | |||
} | |||
} | |||
} // end of LookupAccentedLetter | |||
void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
{//============================================================================= | |||
@@ -43,7 +327,9 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
unsigned char *p; | |||
static char single_letter[10] = {0,0}; | |||
char ph_stress[2]; | |||
char ph_buf3[30]; | |||
unsigned int dict_flags[2]; | |||
char ph_buf3[40]; | |||
char *ptr; | |||
ph_buf1[0] = 0; | |||
len = utf8_out(letter,&single_letter[2]); | |||
@@ -83,6 +369,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31 | |||
single_letter[1] = '_'; | |||
// if the $accent flag is set for this letter, use the accents table (below) | |||
dict_flags[1] = 0; | |||
ptr = &single_letter[1]; | |||
if(Lookup(&single_letter[1],ph_buf3) == 0) | |||
{ | |||
single_letter[1] = ' '; | |||
@@ -92,6 +383,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
} | |||
} | |||
if(ph_buf3[0] == 0) | |||
{ | |||
LookupAccentedLetter(letter, ph_buf3); | |||
} | |||
if(ph_buf3[0] == 0) | |||
{ | |||
ph_buf1[0] = 0; | |||
@@ -185,6 +481,7 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control, int wor | |||
for(p2 = hexbuf; *p2 != 0; p2++) | |||
{ | |||
pbuf += strlen(pbuf); | |||
*pbuf++ = phonPAUSE_VSHORT; | |||
LookupLetter(*p2, 0, pbuf); | |||
} | |||
} |
@@ -517,6 +517,7 @@ const char *Translator::LookupSpecial(const char *string, char* text_out) | |||
const char *Translator::LookupCharName(int c) | |||
{//========================================== | |||
// Find the phoneme string (in ascii) to speak the name of character c | |||
// Used for punctuation characters and symbols | |||
int ix; | |||
unsigned int flags[2]; |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.31.19 28.Feb.08"; | |||
const char *version_string = "1.31.20 01.Mar.08"; | |||
const int version_phdata = 0x013105; | |||
int option_device_number = -1; |
@@ -1146,7 +1146,9 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) | |||
if(p->newword) | |||
{ | |||
last_frame = NULL; | |||
if(translator->langopts.param[LOPT_WORD_MERGE] == 0) | |||
last_frame = NULL; | |||
sourceix = (p->sourceix & 0x7ff) + clause_start_char; | |||
if(p->newword & 4) |
@@ -663,6 +663,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_rule = 1; | |||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
tr->letter_bits_offset = OFFSET_TAMIL; | |||
tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words | |||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters | |||
@@ -1055,6 +1056,7 @@ Translator_Afrikaans::Translator_Afrikaans() : Translator() | |||
SetLetterVowel(this,'y'); // add 'y' to vowels | |||
langopts.numbers = 0x8d1 + NUM_ROMAN; | |||
langopts.accents = 1; | |||
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | |||
} | |||
@@ -74,6 +74,7 @@ | |||
#define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */ | |||
#define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */ | |||
#define FLAG_ALLCAPS 0x400 // only if the word is all capitals | |||
#define FLAG_ACCENT 0x800 // character name is base-character name + accent name | |||
@@ -230,7 +231,7 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
#define N_LOPTS 14 | |||
#define N_LOPTS 15 | |||
#define LOPT_DIERESES 1 | |||
// 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables | |||
// bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word | |||
@@ -254,9 +255,8 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
// increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels | |||
#define LOPT_SONORANT_MIN 7 | |||
// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||
// bit 1=also after a word which ends with a stressed vowel | |||
#define LOPT_IT_DOUBLING 8 | |||
// don't break vowels at word boundary | |||
#define LOPT_WORD_MERGE 8 | |||
// max. amplitude for vowel at the end of a clause | |||
#define LOPT_MAXAMP_EOC 9 | |||
@@ -277,6 +277,11 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
// stressed syllable is indicated by capitals | |||
#define LOPT_SYLLABLE_CAPS 13 | |||
// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||
// bit 1=also after a word which ends with a stressed vowel | |||
#define LOPT_IT_DOUBLING 14 | |||
typedef struct { | |||
// bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme) | |||
@@ -341,6 +346,10 @@ typedef struct { | |||
int max_roman; | |||
int thousands_sep; | |||
int decimal_sep; | |||
// bit 0, accent name before the letter name | |||
int accents; | |||
int tone_language; // 1=tone language | |||
int intonation_group; | |||
int long_stop; // extra mS pause for a lengthened stop | |||
@@ -430,6 +439,8 @@ private: | |||
const char *LookupSpecial(const char *string, char *text_out); | |||
const char *LookupCharName(int c); | |||
void LookupLetter(unsigned int letter, int next_byte, char *ph_buf); | |||
int LookupLetter2(unsigned int letter, char *ph_buf); | |||
void LookupAccentedLetter(unsigned int letter, char *ph_buf); | |||
int LookupNum2(int value, int control, char *ph_out); | |||
int LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands); | |||
int LookupThousands(int value, int thousandplex, char *ph_out); |