Fixes to: announce punctuation ( --punct option) git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@227 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -595,11 +595,18 @@ t tS v w x z | |||
Dictionary pa_dict | |||
@ a a~ e E e~ E~ i | |||
I i: i~ I~ o O o: O~ | |||
o~ r- U u u~ U~ V V~ | |||
I i~ I~ o O O~ o~ r- | |||
U u u~ U~ V V~ | |||
: ; b bh c ch d d. | |||
dh dh. f g gh H j J | |||
Jh k kh l l. m n N | |||
n. n^ p ph Q R s S | |||
t t. th th. v x z | |||
Dictionary prs_dict | |||
@ A i o | |||
b d h l t X |
@@ -262,8 +262,8 @@ U+32f n,0nsI2l'abI2k | |||
// numeric | |||
_0 zero $text // TEST | |||
//_0 z'i@roU | |||
//_0 zero $text // TEST | |||
_0 z'i@roU | |||
_1 w'0n | |||
?6 _1 w'Vn | |||
_2 t'u: | |||
@@ -1582,12 +1582,13 @@ offers 0f3z | |||
ogre oUg3 | |||
okay oUk'eI | |||
oled oUlEd // abbrev | |||
omeg oUm'eIg@ | |||
omega oUm'eIg@ | |||
omelet 0ml@t | |||
omelette 0ml@t | |||
omnibus $1 | |||
omniscience 0mn'IsI@ns | |||
opaque oUp'eIk | |||
oped 0p'Ed | |||
opine oUp'aIn | |||
opossum @p0s@m | |||
opponent @poUn@nt |
@@ -24,7 +24,7 @@ | |||
// 2006-11-18 Gilles Casse <[email protected]> | |||
// | |||
// Updated 2009-09-28 Michel Such <[email protected]> | |||
// Updated 2009-10-09 Michel Such <[email protected]> | |||
// | |||
// * Numbers, a few abbreviations and exceptions. | |||
// | |||
@@ -864,11 +864,13 @@ vincent vE~sA~ | |||
afrikaans afrikans | |||
bahamas baamas | |||
bangladesh bA~gladES | |||
bayeux bajY | |||
bélarus belarys | |||
belize beliz | |||
bissau bisa'o | |||
(burkina faso) byrkina||faso | |||
burundi burundi | |||
calvados calvadoss $text | |||
croatie kroasi | |||
esperanto E2sperA~to | |||
groenland groEnlA~d |
@@ -23,7 +23,7 @@ | |||
// 2006-11-18 Gilles Casse <[email protected]> | |||
// | |||
// Updated: 2009-09-25 Michel Such <[email protected]> | |||
// Updated: 2009-10-09 Michel Such <[email protected]> | |||
// | |||
// Letter classes: | |||
@@ -300,6 +300,7 @@ _) en (A A~n // enivrer | |||
ol) en (t_ A~ // somnolent, violent | |||
Vol) en (t_ // somnolent, violent | |||
Xol) ent (_ t2 // volent | |||
eul) ent (_ t2 // veulent | |||
ul) en (t_ A~ // turbulent | |||
Vul) en (t_ | |||
imCoC) en (t_ A~ // impotent | |||
@@ -878,6 +879,7 @@ C) i (es_ i | |||
_) n (_° nymero | |||
_) neuro (@@P5 nYro | |||
_) nvda (P4 Envedea | |||
.group o |
@@ -49,7 +49,8 @@ _0Z1 tizEd // tenths | |||
_0Z2 sa:zAd // hundredths | |||
_0Z3 EzR2Ed | |||
_0Z4 ti:zEzR2Ed | |||
_0Z5 sa:zEzR2Ed | |||
_0Z6 miliomod | |||
// ordinal numbers | |||
@@ -90,6 +91,8 @@ szeptember $alt | |||
október $alt | |||
november $alt | |||
december $alt | |||
//The following 12 row contains shorter hungarian month names | |||
jan $alt | |||
feb $alt |
@@ -16,12 +16,15 @@ _) alattvaló _!'AlAtvAlo: | |||
.group á | |||
á a: | |||
@) ában (_S4 a:bAn | |||
@) ának (_S4 a:nAk | |||
.group b | |||
b b | |||
bb b: | |||
@) ban (_S3 bAn | |||
@) ben (_S3 bEn | |||
biz (A b'iz, // bizalmas etc. | |||
biedermeier bi:dER2ma:jER2 | |||
@@ -75,14 +78,20 @@ pá) c (só ts | |||
dd d: | |||
dts tS: | |||
dt t: | |||
apró) d (sz d | |||
a) dsz ts: | |||
beszé) d (szin d | |||
cselé) d d | |||
enge) dsz ts: | |||
engedelmeske) dsz ts: | |||
fogó) dz (kod ts | |||
hazu) dsz ts: | |||
ha) d (járat d | |||
hor) d (szék d | |||
mara) dsz ts: | |||
tu) dsz ts: | |||
kére) dz ts | |||
mentegető) dz z | |||
// dd d: | |||
dz dz | |||
A) dz (A d:z | |||
@@ -105,11 +114,14 @@ min) dny n^ | |||
.group e | |||
e E | |||
D_-_) es (_ %ES | |||
_) egyezség _!'EJ:ESSe:g | |||
.group é | |||
é e: | |||
éi (é e::i: | |||
@) ében (_S4 e:bEn | |||
@) ének (_S4 e:nEk | |||
.group ë | |||
ë Y | |||
@@ -131,11 +143,14 @@ gg g: | |||
C) gyj J | |||
A) gyj (A J: | |||
ha) gyj J: | |||
_e) gy (es J: | |||
e) gy (jelentésű J | |||
_e) gy (es J: | |||
_e) gy (et_ J: | |||
_e) gy (etlen J: | |||
_e) gy (ik J: | |||
_e) gy (üt J: | |||
e) gyj (elentésű Jj | |||
gyön) gy (sor J | |||
_minde) gy (ik J: | |||
na) gysz (a Js | |||
@@ -155,17 +170,25 @@ le) g (gy g | |||
.group i | |||
i i | |||
@) i (_S1 i | |||
_) itthon ithon | |||
.group í | |||
í i: | |||
íts i:tS: | |||
fesz) ítsd i:tSd | |||
bizony) ítsd i:tSd | |||
.group j | |||
j j | |||
@) jában (_S5 ja:bAn | |||
@) jának (_S5 ja:nAk | |||
_) javíts jAvi:tS: | |||
_) javítsd jAvi:tSd | |||
.group k | |||
k k | |||
kk k: | |||
@@ -271,6 +294,7 @@ szts stS: | |||
föld) s (ánc S | |||
s (színű S | |||
hel) s (inki z | |||
ki) s (ebb SS | |||
_má) s (szó S | |||
_munká) s S | |||
@@ -313,12 +337,18 @@ ttn t:n | |||
tty c: | |||
apá) ts (ág tS: | |||
becsüle) t t | |||
csalódo) tts (ág tS: | |||
cson) t t | |||
cson) tj c | |||
ezüs) t t | |||
éle) t t | |||
felej) ts tS | |||
gyapo) t t | |||
gráni) t t | |||
já) tsz (ani ts: | |||
tsz ts: | |||
szorí) ts (a tS: | |||
@@ -350,6 +380,8 @@ vörösmar) ty ti | |||
á) t (sző t | |||
á) t (szú t | |||
á) t (szű t | |||
á) t (jut t | |||
á) t (jö t | |||
bizo) tts (ág tS: | |||
állí) ts tS: | |||
állapo) t (sor t | |||
@@ -361,12 +393,17 @@ kiál) ts tS | |||
köve) ts (ég tS: | |||
kür) t t | |||
kür) tj c | |||
küldö) ttj c: | |||
korlátozo) tts tS: | |||
neve) ts (ég tS: | |||
on) ts (u tS | |||
o) tt t: | |||
szen)t (szék t | |||
szé) t t | |||
szöve) ts (ég tS: | |||
söté) ts (ég tS: | |||
sajá) ts (ág tS: | |||
pillan) ts tS | |||
romlo) tts tS: | |||
tar) ts tS | |||
@@ -433,6 +470,7 @@ csontvá) z z | |||
e) z (t s | |||
ho) z (ta s | |||
ho) z (tá s | |||
helye) z (ked s | |||
inté) z (ked s | |||
költö) z (köd s | |||
@@ -1,4 +1,3 @@ | |||
//_xx விழுக்காடு $text // TESTING doesn't reduce vowels | |||
// This file is UTF8 encoded | |||
// Spelling to phoneme words and exceptions for Tamil | |||
@@ -157,6 +156,73 @@ _1M4 a:jirVmko:d.ie: | |||
_0M5 a:jirVma:jirVmko:d.i // 10,000,000,000,000 | |||
_1M5 a:jirVma:jirVmko:d.i | |||
_dpt _pul.l.i | |||
// exceptions | |||
பாபு $alt // initial ப as [b] | |||
பாலு $alt | |||
பவானி $alt | |||
பகவதி $alt | |||
பானம் $alt | |||
பாறம் $alt | |||
பாரதி $alt | |||
பாரதம் $alt | |||
பரதம் $alt | |||
பாப $alt | |||
பாகம் $alt | |||
பண்தம் $alt | |||
பவனி $alt | |||
தானம் $alt // initial த as [d] | |||
தேகம் $alt | |||
திரவியம் $alt | |||
தூபம் $alt | |||
தீபம் $alt | |||
தீபாவளி $alt | |||
தீரம் $alt | |||
தைரியம் $alt | |||
திடம் $alt | |||
திரவம் $alt | |||
திடீர் $alt | |||
திவசம் $alt | |||
தியானம் $alt | |||
தானியம் $alt | |||
தண்டனை $alt | |||
துவம்சம் $alt | |||
திருஷ்டி $alt | |||
தயாநிதி $alt | |||
தயாளு $alt | |||
துக்கம் $alt | |||
திகில் $alt | |||
துந்துவி $alt | |||
தயவு $alt | |||
தடால் $alt | |||
திக் $alt | |||
தடக் $alt | |||
தீக்ஷை $alt | |||
கக $alt // initial க as [g] | |||
கனம் $alt | |||
கணம் $alt | |||
கணபதி $alt | |||
கணீர் $alt | |||
கிலி $alt | |||
கில்லி $alt | |||
கோதுமை $alt | |||
கோபுரம் $alt | |||
காந்தம் $alt | |||
காந்தி $alt | |||
குண்டு $alt | |||
கோலி $alt | |||
குகை $alt | |||
கிரி $alt | |||
கோமதி $alt | |||
பப pVpV |
@@ -43,7 +43,7 @@ | |||
_) எ ;e // add a short [j] sound at start of word ? | |||
ஏ e: | |||
_) ஏ ;e:: // add a short [j] sound at start of word ? | |||
_) ஏ ;e: // add a short [j] sound at start of word ? | |||
ஐ aI | |||
@@ -62,12 +62,22 @@ | |||
்) க (B g | |||
_) க kV | |||
_) க (B k | |||
_) க (T gV // if word has $alt attribute in ta_list | |||
_) க (BT g // $alt | |||
க்க kkV | |||
க்க (B kk | |||
_) க (ௌ g | |||
க (ீத g | |||
க (ஜ gV | |||
_) க (ம்பீர gV | |||
_) க (ந்தர்வ gV | |||
_) க (ோவிந்த g | |||
ங NV | |||
ங (B N | |||
ச sV // ?? [z] | |||
ச (B s | |||
_) ச sa | |||
@@ -81,12 +91,15 @@ | |||
ஞ்) ச dZV | |||
ஞ்) ச (B dZ | |||
ஜ dZV | |||
ஜ (B dZ | |||
ஞ n^V | |||
ஞ (B n^ | |||
ட d.V | |||
ட (B d. | |||
_) ட t.V | |||
@@ -94,26 +107,43 @@ | |||
ட்ட t.t.V | |||
ட்ட (B t.t. | |||
ண n.V | |||
ண (B n. | |||
த dV | |||
த (B d | |||
_) த tV | |||
_) த (B t | |||
_) த (T dV // if word has $alt attribute in ta_list | |||
_) த (BT d // if word has $alt | |||
த்த ttV | |||
த்த (B tt | |||
// exceptions, த as [d] | |||
_) த (ன dV | |||
_) த (ர்ம dV | |||
_) த (ுர் d | |||
_) த (ூர d | |||
_) த (ெய்வ d | |||
_) த (ுஷ் d | |||
_) த (க்ஷிணா dV | |||
ந nV | |||
ந (B n // dental n | |||
ன nV | |||
ன (B n // alveolar n | |||
ப bV | |||
ப (B b | |||
_) ப pV | |||
_) ப (B p | |||
_) ப (T bV // if word has $alt attribute in ta_list | |||
_) ப (BT b // if word has $alt | |||
ப்ப ppV | |||
ப்ப (B pp | |||
ட்) ப pV | |||
@@ -123,43 +153,56 @@ | |||
ஃ) ப fV | |||
ஃ) ப (B f | |||
ம mV | |||
ம (B m | |||
ய jV | |||
ய (B j | |||
ர rV | |||
ர (B r | |||
ற RV | |||
ற (B R | |||
ற் (ற t. // RR -> t.R | |||
ல lV | |||
ல (B l | |||
ள l.V | |||
ள (B l. | |||
ழ z.V | |||
ழ (B z. | |||
வ vV | |||
வ (B v | |||
ஶ SV | |||
ஶ (B S | |||
ஷ s.V | |||
ஷ (B s. | |||
ஸ sV | |||
ஸ (B s | |||
ஹ hV | |||
ஹ (B h | |||
க்ஷ ks.V | |||
க்ஷ (B ks. | |||
@@ -1,4 +1,4 @@ | |||
61 phoneme tables | |||
62 phoneme tables | |||
new total | |||
base 104 104 | |||
base2 26 125 | |||
@@ -61,6 +61,7 @@ | |||
eu 6 126 | |||
mn 15 115 | |||
pa 13 153 | |||
prs 8 111 | |||
Data file Used by | |||
b/b [b] base | |||
@@ -833,7 +834,6 @@ ustop/kr [k] base | |||
[k] base2 | |||
[k] en | |||
[k] fi | |||
[k] fr | |||
[kh] hi | |||
[k] hu | |||
[k] lv | |||
@@ -1185,6 +1185,7 @@ vdiph/aoo [aU] en_us | |||
vdiph/au [aU] en | |||
[aU] af | |||
[a:U] vi | |||
[aU] prs | |||
vdiph/au# [aU] en_sc | |||
vdiph/au_2 [au] zhy | |||
vdiph/au_3 [aU] en_rp | |||
@@ -1204,6 +1205,7 @@ vdiph/eei [EI] base2 | |||
[EI] vi | |||
[eI] hy | |||
[eI] ne | |||
[eI] prs | |||
vdiph/eei_2 [eI] eo | |||
[ei] fi | |||
[ei] lv | |||
@@ -1339,11 +1341,11 @@ vnasal/e_n [e~] af | |||
[e~] hi | |||
[e~] pt | |||
[e~] bn | |||
vnasal/ii_n [I~] hi | |||
vnasal/i_n [i~] pt | |||
[i~] bn | |||
[i:~] bn | |||
vnasal/i_n2 [I~] hi | |||
[i~] hi | |||
vnasal/i_n2 [i~] hi | |||
vnasal/m- [m-] sw | |||
vnasal/n- [n-] sw | |||
vnasal/nn- [N-] sw | |||
@@ -1629,6 +1631,7 @@ vowel/aa_2 [A:] en | |||
[A] zh | |||
vowel/aa_3 [A:] af | |||
[A] sq | |||
[A] prs | |||
vowel/aa_4 [A:] sv | |||
[O] vi | |||
vowel/aa_5 [A:] en_n | |||
@@ -1755,6 +1758,7 @@ vowel/e_mid [E] en_rp | |||
[E2] id | |||
[e] hy | |||
[E] bn | |||
[E] prs | |||
vowel/e_mid2 [E] af | |||
[E] de | |||
[E2] de | |||
@@ -1803,6 +1807,7 @@ vowel/i [i] base2 | |||
[i:] kn | |||
[i] ne | |||
[i] mn | |||
[i] prs | |||
vowel/i# [y:] cy | |||
vowel/i_2 [i2] de | |||
[i:] de | |||
@@ -1925,6 +1930,7 @@ vowel/o [o] base2 | |||
[oU] en_n | |||
[oU] en_wi | |||
[o:] de | |||
[o2] fr | |||
[o] hi | |||
[o] ta | |||
[o] it | |||
@@ -1945,8 +1951,8 @@ vowel/o [o] base2 | |||
[o] bn | |||
[o] mr | |||
[o:] mn | |||
[o:] pa | |||
vowel/o_2 [o:] cy | |||
[o2] fr | |||
[o:] hi | |||
[o:] hu | |||
[o:] la | |||
@@ -1954,11 +1960,9 @@ vowel/o_2 [o:] cy | |||
[o] vi | |||
[o] da | |||
[o:] bn | |||
[o:] pa | |||
vowel/o-_2 [V] en_n | |||
[V] en_wm | |||
vowel/o_3 [oU] en_sc | |||
[o] fr | |||
[o:] ta | |||
vowel/o-_3 [U] en_rp | |||
vowel/o-_4 [o] ro | |||
@@ -1966,6 +1970,7 @@ vowel/o_5 [O] nl | |||
[O] da | |||
vowel/o_6 [O:] nl | |||
vowel/o_7 [o] ku | |||
vowel/o_8 [o] fr | |||
vowel/oe [W] en | |||
[W] af | |||
[W] de | |||
@@ -1996,6 +2001,7 @@ vowel/oo [O:] en_sc | |||
[o] zhy | |||
[O] tr | |||
[O] id | |||
[o] prs | |||
vowel/oo_1 [O:] en_n | |||
[O:] en_rp | |||
[O@] en_rp | |||
@@ -2064,6 +2070,7 @@ vowel/u [u:] en_wi | |||
[u] kn | |||
[u:] kn | |||
[u] bn | |||
[u] prs | |||
vowel/u# [u:] en_sc | |||
[Y] tr | |||
vowel/u_2 [u1] fi | |||
@@ -2250,8 +2257,7 @@ vwl_en_us/oor [0] en_us | |||
vwl_en_us/or [o@] en_us | |||
[O:] en_sc | |||
vwl_en_us/ur [U@] en_us | |||
vwl_fr/r [R] fr | |||
[r] fr | |||
vwl_fr/r [r] fr | |||
[r2] fr | |||
vwl_fr/_r [R] fr | |||
[r] fr | |||
@@ -2265,6 +2271,7 @@ vwl_fr/r@ [@] fr | |||
[E~] fr | |||
[W~] fr | |||
vwl_fr/@R [x] pt_pt | |||
vwl_fr/_r2 [R] fr | |||
vwl_fr/@R2 [R] fr_ca | |||
vwl_fr/@R3 [Q2] hy | |||
vwl_fr/@R4 [Q2] hy |
@@ -1,7 +1,7 @@ | |||
//==================================================== | |||
// French | |||
//==================================================== | |||
// Updated 2009-09-30 Michel Such <[email protected]> | |||
// Updated 2009-10-07 Michel Such <[email protected]> | |||
phoneme (l) virtual | |||
// Used for l and l/ | |||
@@ -96,7 +96,7 @@ endphoneme | |||
phoneme i- // shorter | |||
vowel starttype (i) endtype (i) | |||
length 130 | |||
length 110 | |||
formants vowel/i_2 | |||
after (l) l/li-15 | |||
after (r) vwl_fr/ri | |||
@@ -119,7 +119,7 @@ endphoneme | |||
phoneme o | |||
vowel starttype (o) endtype (o) | |||
length 170 | |||
formants vowel/o_3 | |||
formants vowel/o_8 | |||
after (l) l/lo-10 | |||
after (r) vwl_fr/ro | |||
before l/ DFT+l/l_o | |||
@@ -132,7 +132,7 @@ endphoneme | |||
phoneme o2 | |||
vowel starttype (o) endtype (o) | |||
length 170 | |||
formants vowel/o_2 | |||
formants vowel/o | |||
after (l) l/lo-10 | |||
after (r) vwl_fr/ro | |||
before l/ vowel/o_mid2+l/l_o | |||
@@ -237,10 +237,10 @@ endphoneme | |||
phoneme wA | |||
vowel starttype (o) endtype (a) | |||
length 230 | |||
length 220 | |||
formants vwl_fr/w_a | |||
after (l) l/lo-10 | |||
after (r) vwl_fr/ro-15 | |||
after (r) vwl_fr/ro | |||
before l/ DFT-10+l/l_a | |||
before j DFT | |||
before (r) DFT+vwl_fr/xr | |||
@@ -394,7 +394,7 @@ phoneme k | |||
lengthmod 2 | |||
wave ustop/k%40 // reduce strength of noise burst | |||
before _ ustop/k_ | |||
before (r) ustop/kr%45 | |||
before (r) ustop/k%50 | |||
before r/2 ustop/k%60 | |||
before (l) ustop/kl%45 | |||
before (i) ustop/ki%45 | |||
@@ -465,7 +465,7 @@ endphoneme | |||
phoneme r // between vowels in a word | |||
liquid uvl starttype (r) endtype (r) | |||
vowelin f1=0 f2=1600 -300 300 f3=-200 80 | |||
vowelin f1=0 f2=1600 -300 300 f3=-200 80 len=20 | |||
vowelout f1=2 f2=1600 -300 300 f3=-300 80 | |||
length 100 | |||
lengthmod 7 | |||
@@ -481,12 +481,12 @@ endphoneme | |||
phoneme R // First letter of a word | |||
liquid uvl starttype (r) endtype (r) | |||
vowelin f1=0 f2=1600 -300 300 f3=-200 80 | |||
vowelin f1=0 f2=1600 -300 300 f3=-200 80 len=20 | |||
vowelout f1=2 f2=1600 -300 300 f3=-300 80 | |||
length 100 | |||
lengthmod 0 | |||
beforenotvowel r/2 | |||
formants vwl_fr/r | |||
formants vwl_fr/_r2 | |||
after _ vwl_fr/_r | |||
after (a) vwl_fr/_r | |||
after (e) vwl_fr/_r | |||
@@ -513,7 +513,7 @@ endphoneme | |||
phoneme r/2 // variant of [r] when not preceding a vowel | |||
liquid uvl starttype r/2 endtype r/2 | |||
vowelout f1=2 f2=1600 -300 300 f3=-300 80 | |||
vowelout f1=2 f2=1600 -300 300 f3=-300 100 | |||
length 100 | |||
lengthmod 2 | |||
formants vwl_fr/r_+r3/rx%25 |
@@ -169,7 +169,7 @@ endphoneme | |||
phoneme I~ | |||
vowel starttype (i) endtype (i) | |||
length 170 | |||
formants vnasal/i_n2 | |||
formants vnasal/ii_n | |||
endphoneme | |||
phoneme e~ |
@@ -1382,3 +1382,6 @@ include ph_mongolian | |||
phonemetable pa hi | |||
include ph_punjabi | |||
phonemetable prs base | |||
include ph_dari |
@@ -223,8 +223,6 @@ static int compile_line(char *linebuf, char *dict_line, int *hash) | |||
unsigned char bad_phoneme[4]; | |||
static char nullstring[] = {0}; | |||
WORD_TAB winfo; | |||
comment = NULL; | |||
text_not_phonemes = 0; | |||
phonetic = word = nullstring; | |||
@@ -437,12 +435,11 @@ step=1; // TEST | |||
if(word[0] == '_') | |||
{ | |||
// This is a special word, used by eSpeak. Translate this into phonemes now | |||
memset(&winfo,0,sizeof(winfo)); | |||
strcat(phonetic, " "); // need a space to indicate word-boundary | |||
// PROBLEM vowel reductions are not applied to the translated phonemes | |||
// condition rules are not applied | |||
TranslateWord(translator,phonetic,0,&winfo); | |||
TranslateWord(translator,phonetic,0,NULL); | |||
text_not_phonemes = 0; | |||
strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4); | |||
@@ -247,6 +247,12 @@ int LoadDictionary(Translator *tr, const char *name, int no_error) | |||
sprintf(fname,"%s%c%s_dict",path_home,PATHSEP,name); | |||
size = GetFileLength(fname); | |||
if(tr->data_dictlist != NULL) | |||
{ | |||
Free(tr->data_dictlist); | |||
tr->data_dictlist = NULL; | |||
} | |||
f = fopen(fname,"rb"); | |||
if((f == NULL) || (size <= 0)) | |||
{ | |||
@@ -257,9 +263,6 @@ int LoadDictionary(Translator *tr, const char *name, int no_error) | |||
return(1); | |||
} | |||
if(tr->data_dictlist != NULL) | |||
Free(tr->data_dictlist); | |||
tr->data_dictlist = Alloc(size); | |||
fread(tr->data_dictlist,size,1,f); | |||
fclose(f); |
@@ -317,7 +317,6 @@ void Lexicon_It(int pass) | |||
char phonemes[80]; | |||
char phonemes2[80]; | |||
char buf_out[120]; | |||
WORD_TAB winfo; | |||
static const char *vowels1 = "aeiou"; | |||
static const char *vowels2 = "aeou"; | |||
@@ -438,8 +437,7 @@ void Lexicon_It(int pass) | |||
} | |||
} | |||
// translate | |||
memset(&winfo,0,sizeof(winfo)); | |||
TranslateWord(translator,&word1[1],0,&winfo); | |||
TranslateWord(translator,&word1[1],0, NULL); | |||
DecodePhonemes(word_phonemes,phonemes); | |||
stress_posn1 = 0; | |||
@@ -462,8 +460,7 @@ void Lexicon_It(int pass) | |||
vowel_ix++; | |||
} | |||
memset(&winfo,0,sizeof(winfo)); | |||
TranslateWord(translator,&word2[1],0,&winfo); | |||
TranslateWord(translator,&word2[1],0, NULL); | |||
DecodePhonemes(word_phonemes,phonemes2); | |||
vowel_ix = 1; | |||
@@ -619,7 +616,6 @@ void Lexicon_De() | |||
char pronounce2[80]; | |||
char phonemes[80]; | |||
char phonemes2[80]; | |||
WORD_TAB winfo; | |||
static const char *vowels = "aeiouyAEIOUY29@"; | |||
@@ -728,8 +724,7 @@ void Lexicon_De() | |||
} | |||
// translate | |||
memset(&winfo,0,sizeof(winfo)); | |||
TranslateWord(translator,&word2[1],0,&winfo); | |||
TranslateWord(translator,&word2[1],0, NULL); | |||
DecodePhonemes2(word_phonemes,phonemes); // also need to change some phoneme names | |||
@@ -794,7 +789,6 @@ void Lexicon_Ru() | |||
int wlen; | |||
int len; | |||
int check_root; | |||
WORD_TAB winfo; | |||
char word[80]; | |||
char word2[80]; | |||
@@ -933,8 +927,7 @@ p_unicode = unicode; | |||
} | |||
// translate | |||
memset(&winfo,0,sizeof(winfo)); | |||
TranslateWord(translator, &word2[1],0,&winfo); | |||
TranslateWord(translator, &word2[1],0, NULL); | |||
DecodePhonemes(word_phonemes,phonemes); | |||
// find the stress position in the translation |
@@ -1314,7 +1314,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
char ph_buf2[50]; | |||
char suffix[20]; | |||
char *wordptr; | |||
unsigned int dictflags[2]; | |||
unsigned int dictflags; | |||
static const char str_pause[2] = {phonPAUSE_NOLINK,0}; | |||
@@ -1331,14 +1331,12 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
// but not if the next word starts with an upper-case letter | |||
ordinal = 2; | |||
word[ix] = ' '; | |||
if(tr->translator_name == L('h','u')) | |||
{ | |||
// lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt) | |||
dictflags[0] = 0; | |||
wordptr = &word[ix+2]; | |||
LookupDictList(tr, &wordptr, ph_buf, dictflags, 0, NULL); | |||
if(dictflags[0] & FLAG_ALT_TRANS) // TEST | |||
dictflags = TranslateWord(tr, &word[ix+2], 0, NULL); | |||
if(dictflags & FLAG_ALT_TRANS) | |||
ordinal = 0; | |||
} | |||
} | |||
@@ -1514,23 +1512,20 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
} | |||
break; | |||
case NUM_DFRACTION_1: // italian, say "hundredths" is leading zero | |||
case NUM_DFRACTION_1: // italian, say "hundredths" if leading zero | |||
case NUM_DFRACTION_5: // hungarian, always say "tenths" etc. | |||
if(decimal_count <= 4) | |||
LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | |||
if((word[n_digits]=='0') || (decimal_mode == NUM_DFRACTION_5)) | |||
{ | |||
LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0); | |||
if((word[n_digits]=='0') || (decimal_mode == 0xa000)) | |||
{ | |||
// decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix | |||
sprintf(string,"_0Z%d",decimal_count); | |||
if(Lookup(tr, string, buf1) == 0) | |||
break; // revert to speaking single digits | |||
// decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix | |||
sprintf(string,"_0Z%d",decimal_count); | |||
if(Lookup(tr, string, buf1) == 0) | |||
break; // revert to speaking single digits | |||
strcat(ph_buf,buf1); | |||
} | |||
strcat(ph_out,ph_buf); | |||
n_digits += decimal_count; | |||
strcat(ph_buf,buf1); | |||
} | |||
strcat(ph_out,ph_buf); | |||
n_digits += decimal_count; | |||
break; | |||
case NUM_DFRACTION_3: |
@@ -816,8 +816,8 @@ static int LoadSoundFile2(const char *fname) | |||
static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bufix) | |||
{//================================================================================= | |||
static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause) | |||
{//========================================================================================================== | |||
// announce punctuation names | |||
// c1: the punctuation character | |||
// c2: the following character | |||
@@ -826,12 +826,19 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu | |||
const char *punctname; | |||
int found = 0; | |||
int soundicon; | |||
char *p; | |||
int c2; | |||
int len; | |||
int bufix1; | |||
char buf[200]; | |||
char buf2[80]; | |||
c2 = *c2_ptr; | |||
buf[0] = 0; | |||
if((soundicon = LookupSoundicon(c1)) >= 0) | |||
{ | |||
// add an embedded command to play the soundicon | |||
sprintf(&buf[bufix],"\001%dI ",soundicon); | |||
sprintf(buf,"\001%dI ",soundicon); | |||
UngetC(c2); | |||
found = 1; | |||
} | |||
@@ -839,7 +846,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu | |||
if((punctname = LookupCharName(tr, c1)) != NULL) | |||
{ | |||
found = 1; | |||
if(bufix==0) | |||
if((*bufix==0) || (end_clause==0)) | |||
{ | |||
punct_count=1; | |||
while(c2 == c1) | |||
@@ -847,26 +854,32 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu | |||
punct_count++; | |||
c2 = GetC(); | |||
} | |||
UngetC(c2); | |||
*c2_ptr = c2; | |||
if(end_clause) | |||
{ | |||
UngetC(c2); | |||
} | |||
p = &buf[bufix]; | |||
if(punct_count==1) | |||
{ | |||
sprintf(p,"%s %s %s",tone_punct_on,punctname,tone_punct_off); | |||
sprintf(buf,"%s %s %s",tone_punct_on,punctname,tone_punct_off); | |||
} | |||
else | |||
if(punct_count < 4) | |||
{ | |||
sprintf(p,"\001+10S%s",tone_punct_on); | |||
sprintf(buf,"\001+10S%s",tone_punct_on); | |||
while(punct_count-- > 0) | |||
sprintf(buf,"%s %s",buf,punctname); | |||
sprintf(p,"%s %s\001-10S",buf,tone_punct_off); | |||
{ | |||
sprintf(buf2," %s",punctname); | |||
strcat(buf, buf2); | |||
} | |||
sprintf(buf2," %s\001-10S",tone_punct_off); | |||
strcat(buf, buf2); | |||
} | |||
else | |||
{ | |||
sprintf(p,"%s %s %d %s %s", | |||
sprintf(buf,"%s %s %d %s %s", | |||
tone_punct_on,punctname,punct_count,punctname,tone_punct_off); | |||
return(CLAUSE_COMMA); | |||
} | |||
} | |||
else | |||
@@ -879,17 +892,25 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu | |||
ssml_ignore_l_angle = c1; // this was < which was converted to <, don't pick it up again as < | |||
} | |||
ungot_char2 = c1; | |||
buf[bufix] = ' '; | |||
buf[bufix+1] = 0; | |||
buf[0] = ' '; | |||
buf[1] = 0; | |||
} | |||
} | |||
if(found == 0) | |||
return(-1); | |||
bufix1 = *bufix; | |||
len = strlen(buf); | |||
strcpy(&output[*bufix],buf); | |||
*bufix += len; | |||
if(end_clause==0) | |||
return(-1); | |||
if(c1 == '-') | |||
return(CLAUSE_NONE); // no pause | |||
if(bufix > 0) | |||
if(bufix1 > 0) | |||
return(CLAUSE_SHORTCOMMA); | |||
if((strchr_w(punct_close,c1) != NULL) && !iswalnum(c2)) | |||
return(CLAUSE_SHORTFALL+4); | |||
@@ -1919,8 +1940,8 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
int any_alnum = 0; | |||
int self_closing; | |||
int punct_data; | |||
int is_punctuation; | |||
int save_c2; | |||
int is_end_clause; | |||
int announced_punctuation; | |||
int stressed_word = 0; | |||
const char *p; | |||
wchar_t xml_buf[N_XML_BUF+1]; | |||
@@ -2304,35 +2325,48 @@ if(option_ssml) parag=1; | |||
linelength = 0; | |||
} | |||
if(option_punctuation && (phoneme_mode==0) && (sayas_mode==0) && iswpunct(c1)) | |||
{ | |||
// option is set to explicitly speak punctuation characters | |||
// if a list of allowed punctuation has been set up, check whether the character is in it | |||
if((option_punctuation == 1) || (wcschr(option_punctlist,c1) != NULL)) | |||
{ | |||
if((terminator = AnnouncePunctuation(tr, c1, c2, buf, ix)) >= 0) | |||
return(terminator); | |||
} | |||
} | |||
announced_punctuation = 0; | |||
if((phoneme_mode==0) && (sayas_mode==0) && ((punct = lookupwchar(punct_chars,c1)) != 0)) | |||
if((phoneme_mode==0) && (sayas_mode==0)) | |||
{ | |||
punct_data = punct_attributes[punct]; | |||
is_end_clause = 0; | |||
if(punct_data & PUNCT_IN_WORD) | |||
if((punct = lookupwchar(punct_chars,c1)) != 0) | |||
{ | |||
// Armenian punctuation inside a word | |||
stressed_word = 1; | |||
*tone_type = punct_data >> 12 & 0xf; // override the end-of-sentence type | |||
continue; | |||
punct_data = punct_attributes[punct]; | |||
if(punct_data & PUNCT_IN_WORD) | |||
{ | |||
// Armenian punctuation inside a word | |||
stressed_word = 1; | |||
*tone_type = punct_data >> 12 & 0xf; // override the end-of-sentence type | |||
continue; | |||
} | |||
if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | |||
{ | |||
// note: (c2='?') is for when a smart-quote has been replaced by '?' | |||
is_end_clause = 1; | |||
} | |||
} | |||
if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof())) | |||
if(option_punctuation && iswpunct(c1)) | |||
{ | |||
// note: (c2='?') is for when a smart-quote has been replaced by '?' | |||
is_punctuation = 1; | |||
// option is set to explicitly speak punctuation characters | |||
// if a list of allowed punctuation has been set up, check whether the character is in it | |||
if((option_punctuation == 1) || (wcschr(option_punctlist,c1) != NULL)) | |||
{ | |||
if((terminator = AnnouncePunctuation(tr, c1, &c2, buf, &ix, is_end_clause)) >= 0) | |||
return(terminator); | |||
announced_punctuation = c1; | |||
} | |||
} | |||
if(is_end_clause) | |||
{ | |||
int c_next; | |||
if((c1 == '.') && (cprev == '.')) | |||
{ | |||
c1 = 0x2026; | |||
@@ -2340,47 +2374,43 @@ if(option_ssml) parag=1; | |||
} | |||
nl_count = 0; | |||
save_c2 = c2; | |||
c_next = c2; | |||
if(iswspace(c2)) | |||
if(iswspace(c_next)) | |||
{ | |||
while(!Eof() && iswspace(c2)) | |||
while(!Eof() && iswspace(c_next)) | |||
{ | |||
if(c2 == '\n') | |||
if(c_next == '\n') | |||
nl_count++; | |||
c2 = GetC(); // skip past space(s) | |||
} | |||
if(!Eof()) | |||
{ | |||
UngetC(c2); | |||
c_next = GetC(); // skip past space(s) | |||
} | |||
} | |||
if((nl_count==0) && (c1 == '.')) | |||
{ | |||
// if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT) && islower(c2)) | |||
if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT)) | |||
{ | |||
// dot after a number indicates an ordinal number | |||
is_punctuation = 0; | |||
if(islower(c_next) || (c_next == '<')) | |||
is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag) | |||
} | |||
else | |||
if(iswlower(c2)) | |||
if(iswlower(c_next)) | |||
{ | |||
// next word has no capital letter, this dot is probably from an abbreviation | |||
c1 = ' '; | |||
is_punctuation = 0; | |||
is_end_clause = 0; | |||
} | |||
if(any_alnum==0) | |||
{ | |||
c1 = ' '; // no letters or digits yet, so probably not a sentence terminator | |||
is_punctuation = 0; | |||
is_end_clause = 0; | |||
} | |||
} | |||
c2 = save_c2; | |||
if(is_punctuation) | |||
if(is_end_clause) | |||
{ | |||
UngetC(c_next); | |||
buf[ix] = ' '; | |||
buf[ix+1] = 0; | |||
@@ -2393,12 +2423,33 @@ if(option_ssml) parag=1; | |||
} | |||
return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote | |||
} | |||
else | |||
{ | |||
if(!Eof()) | |||
{ | |||
if(iswspace(c2)) | |||
UngetC(c_next); | |||
} | |||
} | |||
} | |||
} | |||
if(speech_parameters[espeakSILENCE]==1) | |||
continue; | |||
if(c1 == announced_punctuation) | |||
{ | |||
// const unsigned short keep_punctuation[] = { | |||
// '\'', '-', 0x92, 0xb4, 0x2019, 0x2032, 0 }; | |||
// This character has already been announced, so delete it so that it isn't spoken a second time. | |||
// Unless it's a hyphen or apostrophe (which is used by TranslateClause() ) | |||
if(!IsBracket(c1)) | |||
{ | |||
c1 = ' '; | |||
} | |||
} | |||
j = ix+1; | |||
ix += utf8_out(c1,&buf[ix]); // buf[ix++] = c1; | |||
if(!iswspace(c1) && !IsBracket(c1)) |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.41.13 10.Oct.09"; | |||
const char *version_string = "1.41.15 12.Oct.09"; | |||
const int version_phdata = 0x014100; | |||
int option_device_number = -1; |
@@ -535,7 +535,7 @@ static short vcolouring[N_VCOLOUR][5] = { | |||
// fprintf(stderr,"FMT%d %3s %3d-%3d f1=%d f2=%4d %4d %4d f3=%4d %3d\n", | |||
// which,WordToString(other_ph->mnemonic),len,rms,f1,f2,f2_min,f2_max,f3_adj,f3_amp); | |||
if(other_ph->mnemonic == '?') | |||
if((other_ph != NULL) && (other_ph->mnemonic == '?')) | |||
flags |= 8; | |||
if(which == 1) |
@@ -464,6 +464,13 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('f','a'): // Farsi | |||
{ | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
} | |||
break; | |||
case L('f','i'): // Finnish | |||
{ | |||
static const unsigned char stress_amps_fi[8] = {18,16, 22,22, 20,22, 22,22 }; |
@@ -601,8 +601,9 @@ int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab) | |||
int spell_word; | |||
int stress_bits; | |||
int emphasize_allcaps = 0; | |||
int wflags = wtab->flags; | |||
int wmark = wtab->wmark; | |||
int wflags; | |||
int wmark; | |||
WORD_TAB wtab_null[8]; | |||
// translate these to get pronunciations of plural 's' suffix (different forms depending on | |||
// the preceding letter | |||
@@ -610,6 +611,14 @@ int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab) | |||
static char word_iz[4] = {0,'i','z',0}; | |||
static char word_ss[4] = {0,'s','s',0}; | |||
if(wtab == NULL) | |||
{ | |||
memset(wtab_null, 0, sizeof(wtab_null)); | |||
wtab = wtab_null; | |||
} | |||
wflags = wtab->flags; | |||
wmark = wtab->wmark; | |||
dictionary_flags[0] = 0; | |||
dictionary_flags[1] = 0; | |||
dictionary_flags2[0] = 0; | |||
@@ -620,6 +629,13 @@ int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab) | |||
end_phonemes[0] = 0; | |||
ph_limit = &phonemes[N_WORD_PHONEMES]; | |||
if(tr->data_dictlist == NULL) | |||
{ | |||
// dictionary is not loaded | |||
word_phonemes[0] = 0; | |||
return(0); | |||
} | |||
// count the length of the word | |||
if(*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot | |||
wordx = word1; | |||
@@ -1053,6 +1069,7 @@ strcpy(phonemes2,phonemes); | |||
AppendPhonemes(tr,phonemes, N_WORD_PHONEMES, end_phonemes); | |||
end_phonemes[0] = 0; | |||
} | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
} | |||
wordx[-1] = c_temp; | |||
} |
@@ -98,10 +98,10 @@ | |||
#define FLAG_DONT_SWITCH_TRANSLATOR 0x1000 | |||
#define FLAG_SUFFIX_REMOVED 0x2000 | |||
#define FLAG_HYPHEN_AFTER 0x4000 | |||
#define FLAG_ORDINAL 0x8000 // passed to TranslateNumber() to indicate an ordinal number | |||
#define FLAG_NO_TRACE 0x10000 // passed to TranslateRules() to suppress dictionary lookup printout | |||
#define FLAG_NO_PREFIX 0x20000 | |||
#define FLAG_ORDINAL 0x40000 // passed to TranslateNumber() to indicate an ordinal number | |||
// prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000 | |||
#define SUFX_E 0x0100 // e may have been added |