Speak accented letter names as base-letter name plus accent name, using a table of Unicode characters up to U+17F. Added $accent attribute for *_list files, meaning "speak as base-letter name plus accent name". Added accent names to several languages *_list files. Added language option to allow vowels to merge between words (used for lang=TA). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@153 d46cf337-b52f-0410-862d-fd96e6ae7743master
| @@ -60,6 +60,28 @@ z zEd | |||
| û kapi?y | |||
| ü de@lte@k@n?y | |||
| _cap h'o@flEt@r | |||
| _?? s@mb'o@l | |||
| _?A l'Et@r | |||
| // accent names | |||
| _lig d'Wb@llEt@r | |||
| _acu ak'yt | |||
| _ac2 d'Wb@lakyt | |||
| _brv br'i:v | |||
| _ced s'e@dIla | |||
| _cir k'api | |||
| _dia d'e@lte@k@n | |||
| _dot p'Wnt | |||
| _grv x2r'afIs | |||
| _hac k'A:rOn | |||
| _mcn m'akrOn | |||
| _ogo o@x2o@n,&k | |||
| _rng 'A:nb&l | |||
| _stk str'e@p | |||
| _tld t'Ild@ | |||
| // numeric | |||
| _0 'nWl | |||
| _1 _'e@n | |||
| @@ -51,6 +51,28 @@ _z zet | |||
| _#9 t'ap | |||
| _#32 m'ezeRa // space | |||
| _?? simbol | |||
| _?A znak | |||
| _cap m'ajuskule | |||
| // accent names | |||
| _lig l'igat,uRa | |||
| _acu tS'a:Rka | |||
| _ac2 dvj'etS'a:Rki | |||
| _brv 'obloUtSek | |||
| _ced ts'eJila | |||
| _cir stR^'i:Ska | |||
| _dia tR'ema | |||
| _dot t'etSka | |||
| _grv c'eSki:;'aktsent | |||
| _hac h'a:tSek | |||
| _mcn m'akRon | |||
| _ogo 'ogonek | |||
| _rng kR'oUZek | |||
| _stk S'ikma:tS'a:Rka | |||
| _tld c'ilda | |||
| _$ dola:R | |||
| _' apostRof | |||
| @@ -72,8 +94,6 @@ _} sl'oZena:||z'avR^i:t | |||
| _< menSi: | |||
| _> vjetSi: | |||
| _| sv'isla:||tS'a:Ra | |||
| _?? simbol | |||
| _?A znak | |||
| // Numbers | |||
| @@ -258,6 +278,9 @@ z zet $atend | |||
| // Abbreviations | |||
| // if a word has no vowels (or "r") then it will | |||
| // automatically be spoken as individual letters | |||
| e.g napR^ | |||
| arc $abbrev | |||
| arj $abbrev | |||
| atd $abbrev | |||
| @@ -44,6 +44,23 @@ _#9 t,abu:l'Ato:* | |||
| _#32 l'e:*ts'aIC@n | |||
| _?? zymb'o:l | |||
| // accent names | |||
| _lig l'i:gat,u:* | |||
| _acu ak'u:t | |||
| _ac2 d'Op@l,aku:t | |||
| _brv b@-*'e:v@ | |||
| _ced tse:d'Il@ | |||
| _cir ts'I*kUmflEks | |||
| _dia 'UmlaUt | |||
| _dot p'Unkt | |||
| _grv g@-*'AvIs | |||
| _hac h'atSe:k | |||
| _mcn m'ak@-*o:n | |||
| _ogo 'o:go:n,e:k | |||
| _rng k@-*'u:Ze:k | |||
| _stk S@-*'Egst@-*IC | |||
| _tld t'Ild@ | |||
| // Not Roman numbers | |||
| v faU | |||
| x Iks | |||
| @@ -21,10 +21,10 @@ Dictionary cs_dict | |||
| i i: l- o o: oU r- u | |||
| u: | |||
| * b c d dZ f g h | |||
| j J k l l^ m n N | |||
| n^ p r R R^ R^/ s S | |||
| t tS ts v x z Z | |||
| * ; b c d dZ f g | |||
| h j J k l l^ m n | |||
| N n^ p r R R^ R^/ s | |||
| S t tS ts v x z Z | |||
| Dictionary cy_dict | |||
| @@ -149,12 +149,12 @@ v z Z | |||
| Dictionary it_dict | |||
| @- a a/ aI aU e E i | |||
| o O oI u | |||
| I o O oI u | |||
| * : b d dZ f g h | |||
| j k l l^ m n N n^ | |||
| p r R s S t tS ts | |||
| v w w2 z | |||
| * : ; b d dZ f g | |||
| h j k l l^ m n N | |||
| n^ p r R s S t tS | |||
| ts v w w2 z | |||
| Dictionary nl_dict | |||
| @@ -213,10 +213,11 @@ Dictionary sk_dict | |||
| l- l: o o: oU r- r: u | |||
| u: | |||
| * : b d d; dZ f g | |||
| h j k l l^ m n N | |||
| n^ p r R R^ R^/ s S | |||
| t t; tS ts v x z Z | |||
| * : ; b d d; dZ f | |||
| g h j k l l^ m n | |||
| N n^ p r R R^ R^/ s | |||
| S t t; tS ts v x z | |||
| Z | |||
| Dictionary sv_dict | |||
| @@ -371,10 +372,10 @@ Dictionary hbs_dict | |||
| E e e: i I i: l- O | |||
| o o: oU r* r- u U u: | |||
| * b d dZ dZ; f g h | |||
| j k l L l^ m n N | |||
| n^ p r R s S t tS | |||
| ts tS; v x z Z | |||
| * ; b d dZ dZ; f g | |||
| h j k l L l^ m n | |||
| N n^ p r R s S t | |||
| tS ts tS; v x z Z | |||
| Dictionary id_dict | |||
| @@ -47,7 +47,38 @@ y waI | |||
| z zEd | |||
| ?3 z zi: | |||
| é i:a2k'ju:t | |||
| // sounds are specified for these accented characters | |||
| // use the $accent attribute here to force the use of the | |||
| // accent table when speaking the name of the character. | |||
| _é $accent | |||
| ê $accent | |||
| _ä $accent | |||
| _ë $accent | |||
| _ï $accent | |||
| _ö $accent | |||
| _ü $accent | |||
| _č $accent | |||
| _š $accent | |||
| _ş $accent | |||
| _ž $accent | |||
| _ñ $accent | |||
| // accent names | |||
| _lig l,Iga2tS@ | |||
| _acu a2kj'u:t | |||
| _ac2 dVb@la2kj'u:t | |||
| _brv br'i:v | |||
| _ced s@d'Ila2 | |||
| _cir s'3:kVmfl,Eks | |||
| _dia 'UmlaUt | |||
| _dot wI2Dd'0ta2bVv | |||
| _grv gr'A:v | |||
| _hac h'atS,Ek | |||
| _mcn m'akr,0n | |||
| _ogo '0g0n,Ek | |||
| _rng r'INg | |||
| _stk str'oUk | |||
| _tld t'Ild@ | |||
| // character names | |||
| @@ -73,7 +104,6 @@ _+ plVs | |||
| _, k0m@ | |||
| _- h,aIf@n | |||
| _. d0t | |||
| _/ stroUk | |||
| _: koUl@n | |||
| _; sEmIk'oUl0n | |||
| _< ElaNg@L | |||
| @@ -82,7 +112,6 @@ _> A@aNg@L | |||
| _? kwEstS@n | |||
| _@ at|saIn | |||
| _[ lEftskwe@ | |||
| _\ bakstroUk | |||
| _] raItskwe@ | |||
| _^ s3:kVmfl,Eks | |||
| ?5 _^ sIRkVmfl,Eks | |||
| @@ -162,11 +191,14 @@ _₠ jU@roU | |||
| ¬ n0t_|saIn | |||
| ə SwA: | |||
| æ eI'i: | |||
| ɛ oUp@n'i: | |||
| ɔ oUp@n'oU | |||
| þ TO@n | |||
| ŋ ENg | |||
| ð ED | |||
| ʃ ES | |||
| ʒ EZ | |||
| ĸ krA: | |||
| ɛ oUp@n'i: | |||
| ɔ oUp@n'oU | |||
| // numeric | |||
| @@ -5154,6 +5154,7 @@ | |||
| .group | |||
| // non-ascii characters with specified pronunciations | |||
| // List the accented characters in en_list with the $accent attribute | |||
| é eI | |||
| _) é I2 | |||
| ê E: | |||
| @@ -7,6 +7,24 @@ _cap m'aJ^us | |||
| _?? s'imbOlo | |||
| _#32 Esp'aTjo | |||
| // accent names | |||
| _lig liQaD'u**a | |||
| _acu aQ'uDo | |||
| _ac2 d'Oble||aQ'uDo | |||
| _brv b**'eBe | |||
| _ced TeD'iJ^a | |||
| _cir TiRkumfl'exo | |||
| _dia dj'E**esis | |||
| _dac d'Oble||aQ'uDo | |||
| _dot p'unto // ?? | |||
| _grv g**'aBe | |||
| _hac ka**'On | |||
| _mcn mak**'On | |||
| _ogo kol'ita | |||
| _rng an'iJ^o | |||
| _stk b'a*Ra // ?? | |||
| _tld t'ilde | |||
| // names of symbols | |||
| _. punto | |||
| @@ -145,6 +145,9 @@ | |||
| _) r R | |||
| A) r (A ** | |||
| C) r (A ** | |||
| l) r (A R | |||
| m) r (A R | |||
| n) r (A R | |||
| rr *R | |||
| @@ -5,7 +5,7 @@ | |||
| // 2006-11-18 Gilles Casse <[email protected]> | |||
| // | |||
| // Updated 2008-02-24 Michel Such <[email protected]> | |||
| // Updated 2008-02-29 Michel Such <[email protected]> | |||
| // | |||
| // * Numbers, a few abbreviations and exceptions. | |||
| // | |||
| @@ -16,42 +16,42 @@ | |||
| // "letter" name, then include the letter name here, with the letter | |||
| // prefixed by a _ character. | |||
| _à a:aksA~gRav | |||
| â a:aksA~siRkO~flEks | |||
| ä a:tRema | |||
| _à $accent // speak as base-letter name + accent name | |||
| â $accent | |||
| ä $accent | |||
| b be | |||
| c se | |||
| ç sesedij | |||
| ç $accent | |||
| d de | |||
| e @ | |||
| ë @:tRema | |||
| é @:aksA~Egy | |||
| è @:aksA~gRav | |||
| ê @:aksA~siRkO~flEks | |||
| ë @:tRema | |||
| ë $accent | |||
| é $accent | |||
| è $accent | |||
| ê $accent | |||
| ë $accent | |||
| f Ef | |||
| g Ze | |||
| h aS | |||
| i i | |||
| ï i:tRema | |||
| ï $accent | |||
| j Zi | |||
| k ka | |||
| l El | |||
| m Em | |||
| n En | |||
| ñ Entilde | |||
| ñ $accent | |||
| o o | |||
| ô o:aksA~siRkO~flEks | |||
| ö o:tRema | |||
| ô $accent | |||
| ö $accent | |||
| p pe | |||
| q ky | |||
| r ER | |||
| s Es | |||
| t te | |||
| u y | |||
| ù y:aksA~gRav | |||
| û y:aksA~siRkO~flEks | |||
| ü y:tRema | |||
| ù $accent | |||
| û $accent | |||
| ü $accent | |||
| v ve | |||
| w dubl@v'e | |||
| x iks | |||
| @@ -60,15 +60,33 @@ _y i:gR'Ek | |||
| // character names | |||
| // accent names | |||
| _lig ligat'yr | |||
| _acu aksA~tEg'y | |||
| _ac2 dublaksA~tEg'y | |||
| _brv br'Ev | |||
| _ced sed'ij | |||
| _cir aksA~siRkO~fl'Eks | |||
| _dia tRem'a | |||
| _dot pw'E~syskr'i | |||
| _grv aksA~gR'av | |||
| _hac kar'O~ | |||
| _mcn makr'O~ | |||
| _ogo OgOn'Ek | |||
| _rng rO~t2A~S'Ef | |||
| _stk b'aR | |||
| _tld t'ild | |||
| // character names | |||
| //_cap k,ap@-t@L | |||
| _?A lEt@ | |||
| _cap maZysk'yl | |||
| _?A l'Etr | |||
| _?? sE~b'Oll | |||
| _#9 tabylasjO~ | |||
| _#32 Espas | |||
| _" gijmE | |||
| _# djEz | |||
| _' apOstROf | |||
| @@ -383,8 +401,8 @@ tout t'ut2 $u | |||
| // Letters which can be words | |||
| //=========================== | |||
| à a:aksA~gRav $atend | |||
| y i:gR'Ek $atend | |||
| à $atend $accent | |||
| y $atend $accent | |||
| @@ -651,3 +669,4 @@ vincent vE~sA~ | |||
| @@ -41,9 +41,29 @@ | |||
| z z@ | |||
| ž Z@ | |||
| // accent names | |||
| _lig l'ig&t,UR& | |||
| _acu 'akUt | |||
| _ac2 dv'ostr**UkI;'akUt | |||
| //_brv | |||
| _ced ts'EdIl& | |||
| _cir ts'iRkUmflEks | |||
| _dia 'uml&Ut | |||
| _dot t'otSk& | |||
| //_grv | |||
| _hac kv'atSIts& | |||
| _mcn m'akr**on | |||
| _ogo 'ogonEk | |||
| _rng r**'iNg | |||
| _stk kr**'oz | |||
| _tld t'ild& | |||
| // symbols | |||
| _?? znak // unknown symbol | |||
| _?A slovo // unknown letter | |||
| _cap k'apIt&l // ?? use English until I find the correct word | |||
| © 'aUtoRsk&||pr*av& | |||
| % p'osto | |||
| + plus | |||
| @@ -45,6 +45,23 @@ _1M4 billio: | |||
| _2M4 ke:tbillio: | |||
| _dpt ||_vEss2Y:_ | |||
| // accent names | |||
| _lig ligAtu:R2A | |||
| _acu e:lES | |||
| _ac2 kEt:Y:S | |||
| _brv fe:lkYR2 | |||
| //_ced // cedilla | |||
| _cir tsiR2kumflEks | |||
| _dia tR2e:mA | |||
| //_dot // dot above | |||
| _grv tompA | |||
| _hac pipA | |||
| _mcn mAkR2on // ?? macron | |||
| _ogo hoR2gok | |||
| _rng kYR2 | |||
| _stk a:thu:za:S | |||
| _tld tildE | |||
| // Abbreviations | |||
| km kilo:me:tER2 | |||
| @@ -4,8 +4,28 @@ | |||
| // character names | |||
| _cap k'apital | |||
| _?? s'imbolo | |||
| _cap k'apital | |||
| _?? s'imbolo | |||
| _?A let:'e:Ra | |||
| // accent names | |||
| _lig l,egat'u:Ra | |||
| _acu atS:'ento_|ak'u:to | |||
| _ac2 d'op:i;o_|atS:'ento_|ak'u:to | |||
| _brv b@-*'e:ve | |||
| _ced tSed'il^a | |||
| _cir tSi;@-*konfl'esso | |||
| _dia djeR'e:zI | |||
| _dot p'unto||s,ov@-*ask@-*'it:o | |||
| _grv atS:'entog@-*'a:ve | |||
| _hac h'atSek | |||
| _mcn m'ak@-*on | |||
| _ogo og'o:nek | |||
| _rng an'ello | |||
| _stk b'aR*a | |||
| _tld t'ilde | |||
| _! p'untoesklamat'ivo | |||
| _" viRgolet:e | |||
| @@ -48,6 +48,26 @@ z zet | |||
| î ,ydin'i $atend | |||
| _. punkt | |||
| _?? ka*akt'er | |||
| _cap maZusk'ul@ | |||
| // accent names | |||
| _lig ligat'ur@ | |||
| _acu aktS'entaskuts'it | |||
| //_ac2 | |||
| _brv k@tS'ul@ | |||
| _ced sed'il@ | |||
| _cir tSirkumfl'eks | |||
| _dia t@-*'em@ | |||
| _dot punkt | |||
| _grv aktS'entg@-*'av | |||
| _hac hatS'ek | |||
| _mcn mak@-*'on | |||
| _ogo ogon'ek | |||
| //_rng | |||
| _stk sl'eS | |||
| _tld t'ild@ | |||
| // numeric | |||
| @@ -43,10 +43,28 @@ _v ve: | |||
| w dv'ojite:,ve: | |||
| x iks | |||
| y ipsilon | |||
| ý d,l-he:'i: | |||
| ý d,l-he:'ipsilon | |||
| _z zet | |||
| ž Zet | |||
| // accent names | |||
| _lig l'igat,u:Rov,ane: | |||
| _acu s_d'l:Zn^om | |||
| //_ac2 | |||
| _brv bR'eve | |||
| _ced s_ts'ed;illoU | |||
| _cir s'ostRi;,eSkoU | |||
| _dia s_pR'ehla:skoU | |||
| _dot s_b'otkoU | |||
| _grv s_'obRa:t;,eni:md'l:Zn^om | |||
| _hac s_m'ektSen^om | |||
| _mcn s_m'akRonom | |||
| _ogo s_'ogon^ek | |||
| _rng s_kR'u:Skom | |||
| _stk S'ikmi: d'l:Zen^ | |||
| _tld s_v'l-novkoU | |||
| // character names | |||
| _cap vel^ke: | |||
| _?? simbol | |||
| @@ -38,7 +38,7 @@ | |||
| எ e | |||
| ஏ e: | |||
| ஏ e:: | |||
| ஐ E: | |||
| @@ -50,99 +50,99 @@ | |||
| // consonants | |||
| க ga // inter-vocalic, unless there is virama before or after | |||
| க gV // inter-vocalic, unless there is virama before or after | |||
| க (B g | |||
| க (் g | |||
| ்) க ga | |||
| ்) க gV | |||
| ்) க (B g | |||
| _) க ka | |||
| _) க kV | |||
| _) க (B k | |||
| க்க k:a | |||
| க்க k:V | |||
| க்க (B k: | |||
| ங Na | |||
| ங NV | |||
| ங (B N | |||
| ச dZa | |||
| ச dZV | |||
| ச (B dZ | |||
| _) ச sa | |||
| _) ச (B s | |||
| ச்ச tS:a | |||
| ச்ச tS:V | |||
| ச்ச (B tS: | |||
| ஜ dZa | |||
| ஜ dZV | |||
| ஜ (B dZ | |||
| ஞ n^a | |||
| ஞ n^V | |||
| ஞ (B n^ | |||
| ட d.a | |||
| ட d.V | |||
| ட (B d. | |||
| _) ட t.a | |||
| _) ட t.V | |||
| _) ட (B t. | |||
| ட்ட t.a | |||
| ட்ட t.V | |||
| ட்ட (B t. | |||
| ண n.a | |||
| ண n.V | |||
| ண (B n. | |||
| த da | |||
| த dV | |||
| த (B d | |||
| _) த ta | |||
| _) த tV | |||
| _) த (B t | |||
| த்த t:a | |||
| த்த (B t: | |||
| த்த ttV | |||
| த்த (B tt | |||
| ந na | |||
| ந nV | |||
| ந (B n | |||
| ன na | |||
| ன nV | |||
| ன (B n | |||
| ப ba | |||
| ப bV | |||
| ப (B b | |||
| _) ப pa | |||
| _) ப pV | |||
| _) ப (B p | |||
| ப்ப p:a | |||
| ப்ப p:V | |||
| ப்ப (B p: | |||
| ஃ) ப fa | |||
| ஃ) ப (B f | |||
| ம ma | |||
| ம mV | |||
| ம (B m | |||
| ய ja | |||
| ய jV | |||
| ய (B j | |||
| ர ra | |||
| ர rV | |||
| ர (B r | |||
| ற Ra | |||
| ற RV | |||
| ற (B R | |||
| ற் (ற t. // RR -> t.R | |||
| ல la | |||
| ல lV | |||
| ல (B l | |||
| ள l.a | |||
| ள l.V | |||
| ள (B l. | |||
| ழ z.a | |||
| ழ z.V | |||
| ழ (B z. | |||
| வ va | |||
| வ vV | |||
| வ (B v | |||
| ஶ Sa | |||
| ஶ SV | |||
| ஶ (B S | |||
| ஷ s.a | |||
| ஷ s.V | |||
| ஷ (B s. | |||
| ஸ sa | |||
| ஸ sV | |||
| ஸ (B s | |||
| ஹ ha | |||
| ஹ hV | |||
| ஹ (B h | |||
| @@ -176,4 +176,4 @@ | |||
| ௗ : // aU length mark | |||
| .group | |||
| $ dola | |||
| $ dolV | |||
| @@ -18,7 +18,7 @@ | |||
| fr 44 124 | |||
| fr_ca 11 124 | |||
| hi 51 135 | |||
| ta 16 138 | |||
| ta 17 138 | |||
| hu 23 114 | |||
| nl 28 121 | |||
| pl 15 109 | |||
| @@ -77,7 +77,7 @@ endphoneme | |||
| phoneme U | |||
| vowel starttype (u) endtype (u) | |||
| length 150 | |||
| length 130 | |||
| formants vowel/u#_3 | |||
| endphoneme | |||
| @@ -89,7 +89,7 @@ endphoneme | |||
| phoneme U: | |||
| vowel starttype (u) endtype (u) | |||
| length 270 | |||
| length 240 | |||
| formants vowel/u#_3 | |||
| endphoneme | |||
| @@ -113,3 +113,8 @@ phoneme v | |||
| switchvoicing f | |||
| endphoneme | |||
| phoneme : // Lengthen previous vowel by "length" | |||
| virtual | |||
| length 50 | |||
| endphoneme | |||
| @@ -103,6 +103,7 @@ MNEM_TAB mnem_flags[] = { | |||
| {"$verbextend",0x28}, /* extend influence of 'verb follows' */ | |||
| {"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */ | |||
| {"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */ | |||
| {"$accent", 0x2b}, // character name is base-character name + accent name | |||
| // doesn't set dictionary_flags | |||
| {"$?", 100}, // conditional rule, followed by byte giving the condition number | |||
| @@ -2390,15 +2390,15 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char | |||
| // no group for this letter, use default group | |||
| MatchRule(&p, "", groups1[0], &match1, word_flags, dict_flags); | |||
| if(match1.points == 0) | |||
| if((match1.points == 0) && ((option_sayas & 0x10) == 0)) | |||
| { | |||
| // no match, try removing the accent and re-translating the word | |||
| n = utf8_in(&letter,p-1,0)-1; | |||
| if((letter >= 0xc0) && (letter <= 0x241)) | |||
| if((letter >= 0xc0) && (letter <= 0x241) && ((ix = remove_accent[letter-0xc0]) != 0)) | |||
| { | |||
| // within range of the remove_accent table | |||
| p2 = p-1; | |||
| p[-1] = remove_accent[letter-0xc0]; | |||
| p[-1] = ix; | |||
| while((p[0] = p[n]) != ' ') p++; | |||
| while(n-- > 0) *p++ = ' '; // replacement character must be no longer than original | |||
| @@ -3020,6 +3020,16 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags | |||
| found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | |||
| if((found == 0) && (flags[1] & FLAG_ACCENT)) | |||
| { | |||
| int letter; | |||
| word2 = word; | |||
| if(*word2 == '_') word2++; | |||
| len = utf8_in(&letter, word2, 0); | |||
| LookupAccentedLetter(letter, ph_out); | |||
| found = word2 + len; | |||
| } | |||
| if(found == 0) | |||
| { | |||
| ph_out[0] = 0; | |||
| @@ -3086,6 +3096,7 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags | |||
| int Translator::Lookup(const char *word, char *ph_out) | |||
| {//=================================================== | |||
| unsigned int flags[2]; | |||
| flags[0] = flags[1] = 0; | |||
| char *word1 = (char *)word; | |||
| return(LookupDictList(&word1, ph_out, flags, 0, NULL)); | |||
| } | |||
| @@ -36,6 +36,290 @@ | |||
| #define M_NAME 0 | |||
| #define M_ACUTE 1 | |||
| #define M_BREVE 2 | |||
| #define M_CARON 3 | |||
| #define M_CEDILLA 4 | |||
| #define M_CIRCUMFLEX 5 | |||
| #define M_DIAERESIS 6 | |||
| #define M_DOUBLE_ACUTE 7 | |||
| #define M_DOT_ABOVE 8 | |||
| #define M_GRAVE 9 | |||
| #define M_MACRON 10 | |||
| #define M_OGONEK 11 | |||
| #define M_RING 12 | |||
| #define M_STROKE 13 | |||
| #define M_TILDE 14 | |||
| #define M_MIDDLE_DOT 8 // duplicate of M_DOT_ABOVE | |||
| typedef struct { | |||
| char *name; | |||
| int flags; | |||
| } ACCENTS; | |||
| // these are tokens to look up in the *_list file. | |||
| ACCENTS accents_tab[] = { | |||
| {"_lig", 1}, | |||
| {"_acu", 0}, // acute | |||
| {"_brv", 0}, // breve | |||
| {"_hac", 0}, // caron/hacek | |||
| {"_ced", 0}, // cedilla | |||
| {"_cir", 0}, // circumflex | |||
| {"_dia", 0}, // diaeresis | |||
| {"_ac2", 0}, // double acute | |||
| {"_dot", 0}, // dot | |||
| {"_grv", 0}, // grave | |||
| {"_mcn", 0}, // macron | |||
| {"_ogo", 0}, // ogonek | |||
| {"_rng", 0}, // ring | |||
| {"_stk", 0}, // stroke | |||
| {"_tld", 0}, // tilde | |||
| }; | |||
| #define CAPITAL 0 | |||
| #define LETTER(ch,mod1,mod2) ch+(mod1 << 8) | |||
| #define LIGATURE(ch1,ch2,mod1) ch1+(ch2 << 8)+0x8000 | |||
| // characters U+00e0 to U+017f | |||
| const short letter_accents_0e0[] = { | |||
| LETTER('a',M_GRAVE,0), // U+00e0 | |||
| LETTER('a',M_ACUTE,0), | |||
| LETTER('a',M_CIRCUMFLEX,0), | |||
| LETTER('a',M_TILDE,0), | |||
| LETTER('a',M_DIAERESIS,0), | |||
| LETTER('a',M_RING,0), | |||
| LIGATURE('a','e',0), | |||
| LETTER('c',M_CEDILLA,0), | |||
| LETTER('e',M_GRAVE,0), | |||
| LETTER('e',M_ACUTE,0), | |||
| LETTER('e',M_CIRCUMFLEX,0), | |||
| LETTER('e',M_DIAERESIS,0), | |||
| LETTER('i',M_GRAVE,0), | |||
| LETTER('i',M_ACUTE,0), | |||
| LETTER('i',M_CIRCUMFLEX,0), | |||
| LETTER('i',M_DIAERESIS,0), | |||
| LETTER('d',M_NAME,0), // eth // U+00f0 | |||
| LETTER('n',M_TILDE,0), | |||
| LETTER('o',M_GRAVE,0), | |||
| LETTER('o',M_ACUTE,0), | |||
| LETTER('o',M_CIRCUMFLEX,0), | |||
| LETTER('o',M_TILDE,0), | |||
| LETTER('o',M_DIAERESIS,0), | |||
| 0, // division sign | |||
| LETTER('o',M_STROKE,0), | |||
| LETTER('u',M_GRAVE,0), | |||
| LETTER('u',M_ACUTE,0), | |||
| LETTER('u',M_CIRCUMFLEX,0), | |||
| LETTER('u',M_DIAERESIS,0), | |||
| LETTER('y',M_ACUTE,0), | |||
| LETTER('t',M_NAME,0), // thorn | |||
| LETTER('y',M_DIAERESIS,0), | |||
| CAPITAL, // U+0100 | |||
| LETTER('a',M_MACRON,0), | |||
| CAPITAL, | |||
| LETTER('a',M_BREVE,0), | |||
| CAPITAL, | |||
| LETTER('a',M_OGONEK,0), | |||
| CAPITAL, | |||
| LETTER('c',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('c',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('c',M_DOT_ABOVE,0), | |||
| CAPITAL, | |||
| LETTER('c',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('d',M_CARON,0), | |||
| CAPITAL, // U+0110 | |||
| LETTER('d',M_STROKE,0), | |||
| CAPITAL, | |||
| LETTER('e',M_MACRON,0), | |||
| CAPITAL, | |||
| LETTER('e',M_BREVE,0), | |||
| CAPITAL, | |||
| LETTER('e',M_DOT_ABOVE,0), | |||
| CAPITAL, | |||
| LETTER('e',M_OGONEK,0), | |||
| CAPITAL, | |||
| LETTER('e',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('g',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('g',M_BREVE,0), | |||
| CAPITAL, // U+0120 | |||
| LETTER('g',M_DOT_ABOVE,0), | |||
| CAPITAL, | |||
| LETTER('g',M_CEDILLA,0), | |||
| CAPITAL, | |||
| LETTER('h',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('h',M_STROKE,0), | |||
| CAPITAL, | |||
| LETTER('i',M_TILDE,0), | |||
| CAPITAL, | |||
| LETTER('i',M_MACRON,0), | |||
| CAPITAL, | |||
| LETTER('i',M_BREVE,0), | |||
| CAPITAL, | |||
| LETTER('i',M_OGONEK,0), | |||
| CAPITAL, // U+0130 | |||
| LETTER('i',M_NAME,0), // dotless i | |||
| CAPITAL, | |||
| LIGATURE('i','j',0), | |||
| CAPITAL, | |||
| LETTER('j',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('k',M_CEDILLA,0), | |||
| LETTER('k',M_NAME,0), // kra | |||
| CAPITAL, | |||
| LETTER('l',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('l',M_CEDILLA,0), | |||
| CAPITAL, | |||
| LETTER('1',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('1',M_MIDDLE_DOT,0), // U+0140 | |||
| CAPITAL, | |||
| LETTER('1',M_STROKE,0), | |||
| CAPITAL, | |||
| LETTER('n',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('n',M_CEDILLA,0), | |||
| CAPITAL, | |||
| LETTER('n',M_CARON,0), | |||
| LETTER('n',M_NAME,0), // apostrophe n | |||
| CAPITAL, | |||
| LETTER('n',M_NAME,0), // eng | |||
| CAPITAL, | |||
| LETTER('o',M_MACRON,0), | |||
| CAPITAL, | |||
| LETTER('o',M_BREVE,0), | |||
| CAPITAL, // U+0150 | |||
| LETTER('o',M_DOUBLE_ACUTE,0), | |||
| CAPITAL, | |||
| LIGATURE('o','e',0), | |||
| CAPITAL, | |||
| LETTER('r',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('r',M_CEDILLA,0), | |||
| CAPITAL, | |||
| LETTER('r',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('s',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('s',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('s',M_CEDILLA,0), | |||
| CAPITAL, // U+0160 | |||
| LETTER('s',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('t',M_CEDILLA,0), | |||
| CAPITAL, | |||
| LETTER('t',M_CARON,0), | |||
| CAPITAL, | |||
| LETTER('t',M_STROKE,0), | |||
| CAPITAL, | |||
| LETTER('u',M_TILDE,0), | |||
| CAPITAL, | |||
| LETTER('u',M_MACRON,0), | |||
| CAPITAL, | |||
| LETTER('u',M_BREVE,0), | |||
| CAPITAL, | |||
| LETTER('u',M_RING,0), | |||
| CAPITAL, // U+0170 | |||
| LETTER('u',M_DOUBLE_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('u',M_OGONEK,0), | |||
| CAPITAL, | |||
| LETTER('w',M_CIRCUMFLEX,0), | |||
| CAPITAL, | |||
| LETTER('y',M_CIRCUMFLEX,0), | |||
| CAPITAL, // Y-DIAERESIS | |||
| CAPITAL, | |||
| LETTER('z',M_ACUTE,0), | |||
| CAPITAL, | |||
| LETTER('z',M_DOT_ABOVE,0), | |||
| CAPITAL, | |||
| LETTER('z',M_CARON,0), | |||
| LETTER('s',M_NAME,0), // long-s // U+17f | |||
| }; | |||
| int Translator::LookupLetter2(unsigned int letter, char *ph_buf) | |||
| {//============================================================= | |||
| int len; | |||
| char single_letter[10]; | |||
| single_letter[0] = 0; | |||
| single_letter[1] = '_'; | |||
| len = utf8_out(letter, &single_letter[2]); | |||
| single_letter[2+len] = 0; | |||
| if(Lookup(&single_letter[1],ph_buf) == 0) | |||
| { | |||
| single_letter[1] = ' '; | |||
| if(Lookup(&single_letter[2],ph_buf) == 0) | |||
| { | |||
| TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,0); | |||
| } | |||
| } | |||
| return(ph_buf[0]); | |||
| } | |||
| void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf) | |||
| {//===================================================================== | |||
| // lookup the character in the accents table | |||
| int accent_data; | |||
| int accent1; | |||
| int basic_letter; | |||
| int letter2=0; | |||
| char ph_letter1[30]; | |||
| char ph_letter2[30]; | |||
| char ph_accent1[30]; | |||
| if((letter >= 0xe0) && (letter < 0x17f)) | |||
| { | |||
| accent_data = letter_accents_0e0[letter - 0xe0]; | |||
| basic_letter = accent_data & 0x7f; | |||
| if((accent1 = (accent_data >> 8) & 0x7f) != 0) | |||
| { | |||
| if(accent_data & 0x8000) | |||
| { | |||
| letter2 = accent1; | |||
| accent1 = 0; | |||
| } | |||
| if(Lookup(accents_tab[accent1].name, ph_accent1) != 0) | |||
| { | |||
| if(LookupLetter2(basic_letter, ph_letter1) != 0) | |||
| { | |||
| if(accent1 == 0) | |||
| { | |||
| //ligature | |||
| LookupLetter2(letter2, ph_letter2); | |||
| sprintf(ph_buf,"%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, ph_letter2); | |||
| } | |||
| else | |||
| { | |||
| if(langopts.accents & 1) | |||
| sprintf(ph_buf,"%s%c%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1); | |||
| else | |||
| sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // end of LookupAccentedLetter | |||
| void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
| {//============================================================================= | |||
| @@ -43,7 +327,9 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
| unsigned char *p; | |||
| static char single_letter[10] = {0,0}; | |||
| char ph_stress[2]; | |||
| char ph_buf3[30]; | |||
| unsigned int dict_flags[2]; | |||
| char ph_buf3[40]; | |||
| char *ptr; | |||
| ph_buf1[0] = 0; | |||
| len = utf8_out(letter,&single_letter[2]); | |||
| @@ -83,6 +369,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
| single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31 | |||
| single_letter[1] = '_'; | |||
| // if the $accent flag is set for this letter, use the accents table (below) | |||
| dict_flags[1] = 0; | |||
| ptr = &single_letter[1]; | |||
| if(Lookup(&single_letter[1],ph_buf3) == 0) | |||
| { | |||
| single_letter[1] = ' '; | |||
| @@ -92,6 +383,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1) | |||
| } | |||
| } | |||
| if(ph_buf3[0] == 0) | |||
| { | |||
| LookupAccentedLetter(letter, ph_buf3); | |||
| } | |||
| if(ph_buf3[0] == 0) | |||
| { | |||
| ph_buf1[0] = 0; | |||
| @@ -185,6 +481,7 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control, int wor | |||
| for(p2 = hexbuf; *p2 != 0; p2++) | |||
| { | |||
| pbuf += strlen(pbuf); | |||
| *pbuf++ = phonPAUSE_VSHORT; | |||
| LookupLetter(*p2, 0, pbuf); | |||
| } | |||
| } | |||
| @@ -517,6 +517,7 @@ const char *Translator::LookupSpecial(const char *string, char* text_out) | |||
| const char *Translator::LookupCharName(int c) | |||
| {//========================================== | |||
| // Find the phoneme string (in ascii) to speak the name of character c | |||
| // Used for punctuation characters and symbols | |||
| int ix; | |||
| unsigned int flags[2]; | |||
| @@ -35,7 +35,7 @@ | |||
| #include "translate.h" | |||
| #include "wave.h" | |||
| const char *version_string = "1.31.19 28.Feb.08"; | |||
| const char *version_string = "1.31.20 01.Mar.08"; | |||
| const int version_phdata = 0x013105; | |||
| int option_device_number = -1; | |||
| @@ -1146,7 +1146,9 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) | |||
| if(p->newword) | |||
| { | |||
| last_frame = NULL; | |||
| if(translator->langopts.param[LOPT_WORD_MERGE] == 0) | |||
| last_frame = NULL; | |||
| sourceix = (p->sourceix & 0x7ff) + clause_start_char; | |||
| if(p->newword & 4) | |||
| @@ -663,6 +663,7 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 1; | |||
| tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
| tr->letter_bits_offset = OFFSET_TAMIL; | |||
| tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words | |||
| memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
| SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters | |||
| @@ -1055,6 +1056,7 @@ Translator_Afrikaans::Translator_Afrikaans() : Translator() | |||
| SetLetterVowel(this,'y'); // add 'y' to vowels | |||
| langopts.numbers = 0x8d1 + NUM_ROMAN; | |||
| langopts.accents = 1; | |||
| memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | |||
| } | |||
| @@ -74,6 +74,7 @@ | |||
| #define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */ | |||
| #define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */ | |||
| #define FLAG_ALLCAPS 0x400 // only if the word is all capitals | |||
| #define FLAG_ACCENT 0x800 // character name is base-character name + accent name | |||
| @@ -230,7 +231,7 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
| #define N_LOPTS 14 | |||
| #define N_LOPTS 15 | |||
| #define LOPT_DIERESES 1 | |||
| // 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables | |||
| // bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word | |||
| @@ -254,9 +255,8 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
| // increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels | |||
| #define LOPT_SONORANT_MIN 7 | |||
| // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||
| // bit 1=also after a word which ends with a stressed vowel | |||
| #define LOPT_IT_DOUBLING 8 | |||
| // don't break vowels at word boundary | |||
| #define LOPT_WORD_MERGE 8 | |||
| // max. amplitude for vowel at the end of a clause | |||
| #define LOPT_MAXAMP_EOC 9 | |||
| @@ -277,6 +277,11 @@ extern const int param_defaults[N_SPEECH_PARAM]; | |||
| // stressed syllable is indicated by capitals | |||
| #define LOPT_SYLLABLE_CAPS 13 | |||
| // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute | |||
| // bit 1=also after a word which ends with a stressed vowel | |||
| #define LOPT_IT_DOUBLING 14 | |||
| typedef struct { | |||
| // bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme) | |||
| @@ -341,6 +346,10 @@ typedef struct { | |||
| int max_roman; | |||
| int thousands_sep; | |||
| int decimal_sep; | |||
| // bit 0, accent name before the letter name | |||
| int accents; | |||
| int tone_language; // 1=tone language | |||
| int intonation_group; | |||
| int long_stop; // extra mS pause for a lengthened stop | |||
| @@ -430,6 +439,8 @@ private: | |||
| const char *LookupSpecial(const char *string, char *text_out); | |||
| const char *LookupCharName(int c); | |||
| void LookupLetter(unsigned int letter, int next_byte, char *ph_buf); | |||
| int LookupLetter2(unsigned int letter, char *ph_buf); | |||
| void LookupAccentedLetter(unsigned int letter, char *ph_buf); | |||
| int LookupNum2(int value, int control, char *ph_out); | |||
| int LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands); | |||
| int LookupThousands(int value, int thousandplex, char *ph_out); | |||