lang=en: Don't treat "Mc" prefix as a separate word, combine it with the following word. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@113 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -262,6 +262,10 @@ l m n N p r s t | |||
tS v w z | |||
Dictionary ta_dict | |||
Dictionary pt_dict | |||
& &/ &U~ &~ @ @- a A |
@@ -169,7 +169,7 @@ i.e aIi:_! $pause | |||
irc $abbrev | |||
lbs paUndz | |||
ltd lImItId | |||
mc m@k | |||
mc m@k $alt2 // combine with the following word | |||
oem $abbrev | |||
ok $abbrev | |||
omg $abbrev |
@@ -3227,7 +3227,9 @@ | |||
li) mb (e m | |||
o) mb (e m | |||
du) mb (e m | |||
_) m (c m@ | |||
_) mc m@k | |||
_) mc (A mak | |||
_) mc (-A mak | |||
mn (_ m | |||
_) mn n | |||
&B) me (_ mI2 | |||
@@ -4965,6 +4967,7 @@ | |||
t) y (rann I | |||
ph) ys (i Iz | |||
yr (_ @ | |||
yre (_ aI3 | |||
c) y (ni I | |||
l) y (ri I | |||
p) y (ra I |
@@ -162,37 +162,7 @@ _1M4 bili'on | |||
_dpt _:z'api*k&_ | |||
_0and i | |||
// Roman numerals | |||
ii $abbrev | |||
iii $abbrev | |||
iv $abbrev | |||
vii $abbrev | |||
viii $abbrev | |||
ix $abbrev | |||
xi $abbrev | |||
xii $abbrev | |||
xiii $abbrev | |||
xiv $abbrev | |||
xvi $abbrev | |||
xvii $abbrev | |||
xviii $abbrev | |||
xix $abbrev | |||
xxi $abbrev | |||
xxii $abbrev | |||
xxiii $abbrev | |||
xxiv $abbrev | |||
xxvi $abbrev | |||
xxvii $abbrev | |||
xxviii $abbrev | |||
xxix $abbrev | |||
xxxi $abbrev | |||
xxxii $abbrev | |||
xxxiii $abbrev | |||
xxxiv $abbrev | |||
xxxvi $abbrev | |||
xxxvii $abbrev | |||
xxxviii $abbrev | |||
xxxix $abbrev | |||
// Abbreviations | |||
@@ -0,0 +1,4 @@ | |||
// This file is UTF8 encoded | |||
// Spelling to phoneme words and exceptions for Tamil | |||
@@ -0,0 +1,173 @@ | |||
// This file is UTF8 encoded | |||
// Spelling to phoneme rules for Tamil | |||
.replace | |||
௦ 0 // Tamil numbers | |||
௧ 1 | |||
௨ 2 | |||
௩ 3 | |||
௪ 4 | |||
௫ 5 | |||
௬ 6 | |||
௭ 7 | |||
௮ 8 | |||
௯ 9 | |||
.group 0xe0ae | |||
ஂ // anusvara | |||
ஃ // visarga | |||
அ a | |||
ஆ a: | |||
இ i | |||
ஈ i: | |||
உ u | |||
ஊ u: | |||
எ e | |||
ஏ e: | |||
ஐ E: | |||
ஒ o | |||
ஓ o: | |||
ஔ aU | |||
// consonants | |||
க ga // inter-vocalic, unless there is virama before or after | |||
க (B g | |||
க (் g | |||
்) க ga | |||
்) க (B g | |||
_) க ka | |||
_) க (B k | |||
க்க ka | |||
க்க (B k | |||
ங Na | |||
ங (B N | |||
ச dZa | |||
ச (B dZ | |||
_) ச sa | |||
_) ச (B s | |||
ச்ச tSa | |||
ச்ச (B tS | |||
ஜ dZa | |||
ஜ (B dZ | |||
ஞ n^a | |||
ஞ (B n^ | |||
ட d.a | |||
ட (B d. | |||
_) ட t.a | |||
_) ட (B t. | |||
ட்ட t.a | |||
ட்ட (B t. | |||
ண n.a | |||
ண (B n. | |||
த da | |||
த (B d | |||
_) த ta | |||
_) த (B t | |||
த்த ta | |||
த்த (B t | |||
ந na | |||
ந (B n | |||
ன na | |||
ன (B n | |||
ப ba | |||
ப (B b | |||
_) ப pa | |||
_) ப (B p | |||
ப்ப pa | |||
ப்ப (B p | |||
ம ma | |||
ம (B m | |||
ய ja | |||
ய (B j | |||
ர ra | |||
ர (B r | |||
ற Ra | |||
ற (B R | |||
ற் (ற t // RR -> tR | |||
ல la | |||
ல (B l | |||
ள l/2a // this should be [l.] not [l/2] | |||
ள (B l/2 | |||
ழ z.a | |||
ழ (B z. | |||
வ va | |||
வ (B v | |||
ஶ Sa | |||
ஶ (B S | |||
ஷ s.a | |||
ஷ (B s. | |||
ஸ sa | |||
ஸ (B s | |||
ஹ ha | |||
ஹ (B h | |||
// combining vowels | |||
ா a: | |||
ி i | |||
.group 0xe0af | |||
ீ i: | |||
ு u | |||
ூ u: | |||
ெ e | |||
ே e: | |||
ை E: | |||
ொ o | |||
ோ o: | |||
ௌ aU | |||
் // virama | |||
ௗ : // aU length mark | |||
.group | |||
$ dola |
@@ -4,5 +4,5 @@ gender male | |||
stressLength 170 170 190 190 0 0 230 240 | |||
dictrules 1 | |||
words 1 | |||
words 3 | |||
@@ -0,0 +1,4 @@ | |||
name tamil-test | |||
language ta | |||
gender male | |||
@@ -1,4 +1,4 @@ | |||
41 phoneme tables | |||
42 phoneme tables | |||
new total | |||
base 96 96 | |||
base2 24 115 | |||
@@ -16,6 +16,7 @@ | |||
fr 34 118 | |||
fr_ca 11 118 | |||
hi 50 132 | |||
ta 12 134 | |||
hu 23 112 | |||
nl 26 117 | |||
pl 15 107 | |||
@@ -23,7 +24,7 @@ | |||
cs 6 125 | |||
hr 20 130 | |||
mk 3 131 | |||
sr 11 130 | |||
sr 14 130 | |||
ru 38 126 | |||
it 17 118 | |||
la 21 114 | |||
@@ -427,6 +428,7 @@ r3/r_trill [R2] base | |||
[r] cy | |||
[r] nl | |||
[R] sk | |||
[r*] sr | |||
[x] pt | |||
r3/r_trill2 [R] base | |||
[R] hr | |||
@@ -438,6 +440,7 @@ r3/r_trill3.wav [R3] base | |||
[r] nl | |||
r3/r_trill.wav [R2] base | |||
[R] sk | |||
[r*] sr | |||
[x] pt | |||
r3/r_u [(u)] base | |||
r3/rx [*] base | |||
@@ -699,6 +702,7 @@ ustop/t_dnt [t] base | |||
[t] hu | |||
[t] pl | |||
[t] hr | |||
[t] sr | |||
[t] ru | |||
[t;] ro | |||
[t[] vi | |||
@@ -741,7 +745,6 @@ ustop/ts_pzd_ [c] hi | |||
ustop/ts_pzd2 [c] hi | |||
[c] hu | |||
ustop/t_sr [d] sr | |||
[t] sr | |||
ustop/ts_rfx [ts.] zh | |||
ustop/ts_rfx_unasp [ts.] zh | |||
ustop/ts_sr.wav [tS;] sr | |||
@@ -1106,6 +1109,7 @@ vowel/a [a] en_n | |||
[a] zh | |||
vowel/a# [a/] base2 | |||
[a2] en_sc | |||
[&] sr | |||
[&] pt | |||
[&/] pt | |||
[a2] en_wi | |||
@@ -1120,7 +1124,8 @@ vowel/a_2 [a] base2 | |||
[A:] en_wi | |||
[A@] en_wi | |||
[aa] en_wi | |||
vowel/a#_2 [&] hr | |||
vowel/a#_2 [a] ta | |||
[&] hr | |||
[a2] sv | |||
[&] is | |||
[a/] sw | |||
@@ -1150,6 +1155,7 @@ vowel/a_5 [a/] it | |||
[a] sw | |||
vowel/aa [a] fi | |||
[A] fr_ca | |||
[a:] ta | |||
[A] no | |||
[A:] no | |||
[aa] zhy | |||
@@ -1185,6 +1191,8 @@ vowel/e [e] base2 | |||
[e] eo | |||
[e] fr | |||
[E:] fr | |||
[e] ta | |||
[e:] ta | |||
[e] hr | |||
[e/] it | |||
[e:] la | |||
@@ -1267,6 +1275,7 @@ vowel/i [i] base2 | |||
[i:] cy | |||
[i] eo | |||
[I] fr | |||
[i:] ta | |||
[i:] hu | |||
[i] pl | |||
[i] it | |||
@@ -1301,6 +1310,7 @@ vowel/i#_5 [y] pt_pt | |||
[i/] pt_pt | |||
[y] ro | |||
vowel/i_6 [i:] en_us | |||
[i] ta | |||
[i] hr | |||
vowel/i_en [i:] en | |||
[i] vi | |||
@@ -1338,6 +1348,7 @@ vowel/ii_5 [I] en_wi | |||
[I2] en_wi | |||
vowel/ii_6 [I] en_wm | |||
[I2] en_wm | |||
[I] sr | |||
vowel/ii_en [i] en | |||
vowel/@_low [3] en_rp | |||
[@] en_rp | |||
@@ -1353,6 +1364,8 @@ vowel/o [o] base2 | |||
[o:] en | |||
[o:] de | |||
[o] hi | |||
[o] ta | |||
[o:] ta | |||
[o] it | |||
[o/] it | |||
[o:] la | |||
@@ -1450,7 +1463,8 @@ vowel/u_2 [u1] fi | |||
[u] sk | |||
[U] hr | |||
[u] ro | |||
vowel/u#_2 [u-] sv | |||
vowel/u#_2 [u-] ta | |||
[u-] sv | |||
vowel/u_3 [yU] ro | |||
vowel/u_4 [u:] en_n | |||
vowel/u#_4 [U] en_sc | |||
@@ -1465,6 +1479,7 @@ vowel/u_bck [u] base2 | |||
[u] fi | |||
[u] fr_ca | |||
[u:] hi | |||
[u:] ta | |||
[u:] hu | |||
[u] nl | |||
[u] pl | |||
@@ -1489,6 +1504,7 @@ vowel/uu_4 [U] fi | |||
vowel/uu_bck [U] en_n | |||
[U] fr_ca | |||
[U] hi | |||
[u] ta | |||
[u] hu | |||
[U] pt | |||
[U] no |
@@ -13,14 +13,39 @@ | |||
// Only phonemes which differ from the inherited phonemes need | |||
// to be included here. | |||
phoneme I | |||
vowel starttype (i) endtype (i) | |||
length 120 | |||
formants vowel/ii_6 | |||
linkout ; | |||
endphoneme | |||
phoneme & | |||
vowel starttype (a) endtype (a) | |||
length 140 | |||
formants vowel/a# | |||
endphoneme | |||
phoneme r* // this is [R] from Slovak/Czech | |||
liquid | |||
vowelin f1=0 f2=1700 -300 300 f3=-300 80 | |||
vowelout f1=2 f2=1700 -300 300 f3=-300 80 brk | |||
formants r3/r_trill+r3/r_trill.wav%50 | |||
trill | |||
lengthmod 6 | |||
endphoneme | |||
phoneme t // dental variant of /t/ | |||
vls dnt stop | |||
vowelin f1=0 f2=1600 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1600 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t_sr%50 | |||
before _ ustop/t_sr%35 | |||
wave ustop/t_dnt%50 | |||
before _ ustop/t_dnt%50 | |||
switchvoicing d | |||
endphoneme | |||
@@ -97,8 +122,8 @@ phoneme tS | |||
vls pla afr sibilant | |||
vowelin f1=0 f2=2300 200 400 f3=-100 80 | |||
lengthmod 2 | |||
wave ustop/tsh_sr.wav | |||
before _ ustop/tsh_sr.wav | |||
wave ustop/tsh_sr.wav%50 | |||
before _ ustop/tsh_sr.wav%50 | |||
switchvoicing dZ | |||
endphoneme | |||
@@ -113,12 +138,14 @@ phoneme tS; | |||
endphoneme | |||
phoneme Z | |||
vcd pla frc sibilant | |||
vowelin f1=0 f2=2000 0 300 f3=-200 80 | |||
vowelout f1=2 f2=2000 0 300 f3=-200 80 brk | |||
formants voc/zh+ufric/sh_sr.wav | |||
before _ voc/zh+ufric/sh_sr.wav | |||
formants voc/zh+ufric/sh_sr.wav%60 | |||
before _ voc/zh+ufric/sh_sr.wav%50 | |||
lengthmod 6 | |||
switchvoicing S | |||
endphoneme | |||
@@ -130,8 +157,8 @@ phoneme S | |||
vowelout f1=0 f2=2100 100 300 f3=-100 80 | |||
length 100 | |||
lengthmod 3 | |||
wave ufric/sh_sr.wav | |||
before _ ufric/sh_sr.wav | |||
wave ufric/sh_sr.wav%70 | |||
before _ ufric/sh_sr.wav%60 | |||
switchvoicing Z | |||
endphoneme | |||
@@ -0,0 +1,84 @@ | |||
// Phonemes for Tamil | |||
// This inherits from ph_hindi | |||
phoneme a | |||
vowel starttype (@) endtype (@) | |||
length 130 | |||
formants vowel/a#_2 | |||
endphoneme | |||
phoneme a: | |||
vowel starttype (a) endtype (a) | |||
length 270 | |||
formants vowel/aa | |||
endphoneme | |||
phoneme e | |||
vowel starttype (e) endtype (e) | |||
length 130 | |||
formants vowel/e | |||
endphoneme | |||
phoneme e: | |||
vowel starttype (e) endtype (e) | |||
length 270 | |||
formants vowel/e | |||
endphoneme | |||
phoneme i | |||
vowel starttype (i) endtype (i) | |||
length 130 | |||
formants vowel/i_6 | |||
endphoneme | |||
phoneme i: | |||
vowel starttype (i) endtype (i) | |||
length 270 | |||
formants vowel/i | |||
endphoneme | |||
phoneme o | |||
vowel starttype (o) endtype (o) | |||
length 130 | |||
formants vowel/o | |||
endphoneme | |||
phoneme o: | |||
vowel starttype (o) endtype (o) | |||
length 270 | |||
formants vowel/o | |||
endphoneme | |||
phoneme u | |||
vowel starttype (u) endtype (u) | |||
length 130 | |||
formants vowel/uu_bck | |||
endphoneme | |||
phoneme u: | |||
vowel starttype (u) endtype (u) | |||
length 270 | |||
formants vowel/u_bck | |||
endphoneme | |||
phoneme u- | |||
vowel starttype (u) endtype (u) | |||
length 130 | |||
formants vowel/u#_2 | |||
endphoneme | |||
@@ -0,0 +1,407 @@ | |||
//based on "phonemes" | |||
//==================================================== | |||
// Tone Numbers | |||
//==================================================== | |||
phoneme 11 // tone: low level | |||
stress | |||
tone 15 10 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 21 // tone: low fall | |||
stress | |||
tone 20 10 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 214 // tone: fall rise | |||
stress | |||
tone 20 40 envelope/p_214 NULL | |||
endphoneme | |||
phoneme 22 // tone: mid-low level | |||
stress | |||
tone 21 19 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 33 // tone: mid level | |||
stress | |||
tone 31 29 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 35 // tone: mid rise | |||
stress | |||
tone 30 50 envelope/p_rise NULL | |||
endphoneme | |||
phoneme 44 // tone: mid-high level | |||
stress | |||
tone 41 39 envelope/p_level NULL | |||
endphoneme | |||
phoneme 51 // tone: high fall | |||
stress | |||
tone 50 10 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 53 // tone: high fall | |||
stress | |||
tone 50 30 envelope/p_fall NULL | |||
endphoneme | |||
phoneme 55 // tone: high level | |||
stress | |||
tone 50 50 envelope/p_level NULL | |||
endphoneme | |||
//==================================================== | |||
// Consonants | |||
//==================================================== | |||
phoneme p | |||
vls blb stop | |||
vowelin f1=0 f2=1000 -50 -100 f3=-200 80 amp=11 | |||
vowelout f1=0 f2=1000 -500 -350 f3=-300 80 rms=22 | |||
lengthmod 2 | |||
wave ustop/p_unasp | |||
endphoneme | |||
phoneme ph | |||
vls blb stop | |||
vowelin f1=0 f2=1000 -50 -100 f3=-200 80 amp=11 | |||
vowelout f1=0 f2=1000 -500 -350 f3=-300 80 rms=22 | |||
lengthmod 2 | |||
wave ustop/p_ | |||
endphoneme | |||
phoneme m //not clear before some vowel | |||
vcd blb nasal | |||
vowelout f1=2 f2=1000 -500 -350 f3=-200 80 brk | |||
lengthmod 2 | |||
formants m/_m | |||
endphoneme | |||
phoneme f //not clear before some vowel | |||
vls lbd frc | |||
vowelout f1=0 f2=1000 -500 -350 f3=-200 80 | |||
lengthmod 2 | |||
wave ufric/f | |||
endphoneme | |||
phoneme t | |||
vls alv stop | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t_unasp | |||
endphoneme | |||
phoneme th | |||
vls alv stop | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t_ | |||
endphoneme | |||
phoneme n | |||
vcd alv nasal | |||
vowelout f1=2 f2=1700 -300 250 f3=-100 80 rms=20 brk | |||
length 30 | |||
lengthmod 2 | |||
formants n/_n | |||
before _ n/n_ | |||
before (i) n/ni | |||
endphoneme | |||
phoneme l | |||
liquid | |||
vowelin f1=0 f2=2700 400 600 f3=300 80 rate len=20 | |||
lengthmod 2 | |||
formants l/_l | |||
before (i) l/li | |||
endphoneme | |||
phoneme k | |||
vls uvl stop | |||
vowelin f1=1 f2=1700 0 200 f3=-300 80 f4 | |||
vowelout f1=1 f2=1700 0 200 f3=-300 80 f4 rms=30 | |||
lengthmod 2 | |||
wave ustop/k_unasp_ | |||
endphoneme | |||
phoneme kh | |||
vls uvl stop | |||
vowelin f1=1 f2=1700 0 200 f3=-300 80 f4 | |||
vowelout f1=1 f2=1700 0 200 f3=-300 80 f4 rms=30 | |||
lengthmod 2 | |||
wave ustop/k_ | |||
endphoneme | |||
phoneme tS; | |||
vls pal afr sibilant palatal | |||
vowelin f1=0 f2=2700 400 600 f3=300 80 | |||
lengthmod 2 | |||
wave ustop/tsh_pzd_unasp | |||
endphoneme | |||
phoneme tS;h | |||
vls pal afr sibilant palatal | |||
vowelin f1=0 f2=2700 400 600 f3=300 80 | |||
lengthmod 2 | |||
wave ustop/tsh_pzd | |||
endphoneme | |||
phoneme S; | |||
vls pla frc sibilant | |||
vowelin f1=0 f2=2700 400 600 f3=300 80 | |||
lengthmod 2 | |||
wave ufric/sh_pzd | |||
endphoneme | |||
phoneme ts | |||
vls alv afr sibilant | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts_unasp | |||
endphoneme | |||
phoneme tsh | |||
vls alv afr sibilant | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
endphoneme | |||
phoneme ts. | |||
vls pla frc sibilant | |||
vowelin f1=0 f2=1800 -100 300 f3=-300 80 | |||
vowelout f1=0 f2=1800 -100 300 f3=-300 80 | |||
length 100 | |||
lengthmod 3 | |||
wave ustop/ts_rfx_unasp | |||
endphoneme | |||
phoneme ts.h | |||
vls pla frc sibilant | |||
vowelin f1=0 f2=1800 -100 300 f3=-300 80 | |||
vowelout f1=0 f2=1800 -100 300 f3=-300 80 | |||
length 100 | |||
lengthmod 3 | |||
wave ustop/ts_rfx | |||
endphoneme | |||
phoneme j | |||
liquid | |||
palatal | |||
length 40 | |||
formants j/_j | |||
lengthmod 1 | |||
endphoneme | |||
phoneme w | |||
liquid | |||
length 40 | |||
lengthmod 1 | |||
formants w/_w | |||
endphoneme | |||
//==================================================== | |||
// Vowels and Diphtnongs | |||
//==================================================== | |||
phoneme N //should be more different from n | |||
vcd vel nasal | |||
length 10 | |||
formants NULL | |||
before _ nn/nn_ | |||
after _ nn/_nn | |||
lengthmod 1 | |||
after (i) nn/inn | |||
endphoneme | |||
phoneme a | |||
vowel starttype (a) endtype (a) | |||
length 250 | |||
formants vowel/a | |||
endphoneme | |||
phoneme A | |||
vowel starttype (a) endtype (a) | |||
length 250 | |||
formants vowel/aa | |||
before N vowel/aa_2 | |||
endphoneme | |||
phoneme ai | |||
vowel starttype (a) endtype (i) | |||
length 250 | |||
formants vdiph/ae | |||
endphoneme | |||
phoneme Au | |||
vowel starttype (a) endtype (u) | |||
length 250 | |||
formants vwl_zh/aau | |||
endphoneme | |||
phoneme @ | |||
vowel starttype (@) endtype (@) | |||
length 230 | |||
formants vowel/3_2%120 | |||
before N vowel/@_bck | |||
endphoneme | |||
phoneme @r //tmp use V3_r | |||
vowel starttype (@) endtype (@) | |||
length 250 | |||
formants vowelr/V3_r | |||
endphoneme | |||
phoneme E | |||
vowel starttype (e) endtype (e) | |||
length 250 | |||
formants vowel/ee_2 | |||
endphoneme | |||
phoneme ei | |||
vowel starttype (e) endtype (i) | |||
length 250 | |||
formants vdiph/ei | |||
endphoneme | |||
phoneme i | |||
vowel starttype (i) endtype (i) | |||
length 250 | |||
formants vowel/i | |||
before N vowel/ii_2 | |||
endphoneme | |||
phoneme i[ //after ts tsh s | |||
vowel starttype (i) endtype (i) | |||
length 250 | |||
formants vwl_zh/i_dnt | |||
endphoneme | |||
phoneme i. //after ts. ts.h s. z. | |||
vowel starttype (i) endtype (i) | |||
length 250 | |||
formants vwl_zh/i_rfx | |||
endphoneme | |||
phoneme iA //try more before N | |||
vowel starttype (i) endtype (a) | |||
length 250 | |||
formants vwl_zh/iaa | |||
endphoneme | |||
phoneme iAu | |||
vowel starttype (i) endtype (u) | |||
length 250 | |||
formants vwl_zh/iaau | |||
endphoneme | |||
phoneme iE | |||
vowel starttype (i) endtype (e) | |||
length 250 | |||
formants vwl_zh/iee | |||
endphoneme | |||
phoneme io | |||
vowel starttype (i) endtype (o) | |||
length 250 | |||
formants vdiph2/iioo | |||
endphoneme | |||
phoneme iou | |||
vowel starttype (i) endtype (u) | |||
length 250 | |||
formants vwl_zh/iou | |||
endphoneme | |||
phoneme o | |||
vowel starttype (o) endtype (o) | |||
length 250 | |||
formants vowel/o | |||
endphoneme | |||
phoneme o- | |||
vowel starttype (o) endtype (o) | |||
length 250 | |||
formants vowel/o- | |||
endphoneme | |||
phoneme ou | |||
vowel starttype (o) endtype (u) | |||
length 250 | |||
formants vdiph/8u | |||
endphoneme | |||
phoneme u //try more before N | |||
vowel starttype (u) endtype (u) | |||
length 250 | |||
formants vowel/u | |||
before N vowel/o | |||
endphoneme | |||
phoneme uA | |||
vowel starttype (u) endtype (a) | |||
length 250 | |||
formants vwl_zh/uaa | |||
endphoneme | |||
phoneme ua | |||
vowel starttype (u) endtype (a) | |||
length 250 | |||
formants vdiph2/oa | |||
endphoneme | |||
phoneme uai | |||
vowel starttype (u) endtype (i) | |||
length 250 | |||
formants vwl_zh/uai | |||
endphoneme | |||
phoneme u@ | |||
vowel starttype (u) endtype (@) | |||
length 250 | |||
formants vdiph2/o@ | |||
endphoneme | |||
phoneme uei | |||
vowel starttype (u) endtype (i) | |||
length 250 | |||
formants vwl_zh/uei | |||
endphoneme | |||
phoneme uo | |||
vowel starttype (u) endtype (o) | |||
length 250 | |||
formants vwl_zh/uo | |||
endphoneme | |||
phoneme y //try more before N | |||
vowel starttype (i) endtype (u) | |||
length 250 | |||
formants vowel/y | |||
before N vdiph2/yu | |||
endphoneme | |||
phoneme y& | |||
vowel starttype (i) endtype (e) | |||
length 250 | |||
formants vwl_zh/y& | |||
endphoneme | |||
phoneme yE //try more so not similar to iE | |||
vowel starttype (i) endtype (e) | |||
length 250 | |||
formants vwl_zh/yee | |||
endphoneme | |||
phoneme y@ | |||
vowel starttype (i) endtype (@) | |||
length 250 | |||
formants vdiph2/y#@ | |||
endphoneme |
@@ -1172,6 +1172,9 @@ include ph_french_ca | |||
phonemetable hi base | |||
include ph_hindi | |||
phonemetable ta hi | |||
include ph_tamil | |||
phonemetable hu base | |||
include ph_hungarian | |||
@@ -976,7 +976,7 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
int done; | |||
unsigned char vowel_stress[N_WORD_PHONEMES/2]; | |||
char syllable_type[N_WORD_PHONEMES/2]; | |||
char syllable_weight[N_WORD_PHONEMES/2]; | |||
unsigned char phonetic[N_WORD_PHONEMES]; | |||
static char consonant_types[16] = {0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0}; | |||
@@ -1040,7 +1040,7 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
// followed by two consonants, a long consonant, or consonant and end-of-word | |||
weight++; | |||
} | |||
syllable_type[ix] = weight; | |||
syllable_weight[ix] = weight; | |||
ix++; | |||
} | |||
} | |||
@@ -1164,16 +1164,14 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
{ | |||
int wt; | |||
int max_weight = -1; | |||
int min_weight = 10; | |||
int prev_stressed; | |||
for(ix = 1; ix < vowel_count; ix++) | |||
// find the heaviest syllable, excluding the final syllable | |||
for(ix = 1; ix < (vowel_count-1); ix++) | |||
{ | |||
if(vowel_stress[ix] == 0) | |||
{ | |||
if((wt = syllable_type[ix]) < min_weight) | |||
min_weight = wt; | |||
if(wt >= max_weight) | |||
if((wt = syllable_weight[ix]) >= max_weight) | |||
{ | |||
max_weight = wt; | |||
prev_stressed = stressed_syllable; | |||
@@ -1181,14 +1179,19 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
} | |||
} | |||
} | |||
if(max_weight > min_weight) | |||
if((syllable_weight[vowel_count-1] == 2) && (max_weight< 2)) | |||
{ | |||
// different weights, don't stress the final syllable | |||
if((stressed_syllable == vowel_count-1) && (syllable_type[vowel_count-2] > 0)) | |||
{ | |||
stressed_syllable = vowel_count-2; | |||
} | |||
// the only double=heavy syllable is the final syllable, so stress this | |||
stressed_syllable = vowel_count-1; | |||
} | |||
else | |||
if(max_weight <= 0) | |||
{ | |||
// all syllables, exclusing the last, are light. Stress the first syllable | |||
stressed_syllable = 1; | |||
} | |||
vowel_stress[stressed_syllable] = 4; | |||
max_stress = 4; | |||
} | |||
@@ -1253,7 +1256,7 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
if((stress == 3) && (langopts.stress_flags & 0x20)) | |||
continue; // don't use secondary stress | |||
if((v > 1) && (langopts.stress_flags & 0x40) && (syllable_type[v]==0) && (syllable_type[v+1]>0)) | |||
if((v > 1) && (langopts.stress_flags & 0x40) && (syllable_weight[v]==0) && (syllable_weight[v+1]>0)) | |||
{ | |||
// don't put secondary stress on a light syllable which is followed by a heavy syllable | |||
continue; |
@@ -43,7 +43,7 @@ typedef struct { | |||
short pitch2; | |||
} SYLLABLE; | |||
SYLLABLE syllable_tab[N_PHONEME_LIST]; | |||
SYLLABLE *syllable_tab; | |||
static int tone_pitch_env; /* used to return pitch envelope */ | |||
@@ -242,7 +242,7 @@ static int drops_0[8] = {0x400,0x400,0x700,0x700,0x700,0xa00,0x1800,0x0e00}; | |||
static short oflow[] = {0, 20, 12, 4, 0}; | |||
static short oflow_emf[] = {5, 24, 15, 10, 5}; | |||
static short oflow_less[] = {1, 17, 10, 5, 1}; | |||
static short back_emf[] = {36, 32, 0}; | |||
static short back_emf[] = {35, 32, 0}; | |||
typedef struct { | |||
unsigned char pitch_env0; /* pitch envelope, tonic syllable at end */ | |||
@@ -285,7 +285,7 @@ static TONE_TABLE tone_table[N_TONE_TABLE] = { | |||
20, 25, 34, 20, drops_0, 3, 3, 5, oflow, NULL, 15, 29, 0}, | |||
{PITCHfall, 41, 4, PITCHfall, 41, 27, // exclamation | |||
20, 25, 34, 24, drops_0, 3, 4, 5, oflow_emf, back_emf, 18, 5, 0}, | |||
20, 25, 34, 24, drops_0, 3, 4, 5, oflow_emf, back_emf, 16, 5, 0}, | |||
{PITCHfall, 38, 2, PITCHfall, 42, 30, // statement, emphatic | |||
20, 25, 34, 22, drops_0, 3, 3, 5, oflow, NULL, 15, 5, 0}, | |||
@@ -376,7 +376,7 @@ static void count_pitch_vowels(int start, int end, int clause_end) | |||
if(no_tonic) | |||
{ | |||
tone_posn = tone_posn2 = end-1; | |||
tone_posn = tone_posn2 = end; // next position after the end of the truncated clause | |||
} | |||
else | |||
if(last_primary >= 0) | |||
@@ -880,6 +880,10 @@ void Translator::CalcPitches(int clause_type) | |||
int count_primary; | |||
int ph_end=n_phoneme_list; | |||
SYLLABLE syllable_tab2[N_PHONEME_LIST]; | |||
syllable_tab = syllable_tab2; // don't use permanent storage. it's only needed during the call of CalcPitches() | |||
if(langopts.intonation == 1) | |||
{ | |||
CalcPitches_Tone(clause_type); | |||
@@ -892,7 +896,7 @@ void Translator::CalcPitches(int clause_type) | |||
option = 0; | |||
group_tone_emph = group_tone = punct_to_tone[option][clause_type]; | |||
group_tone_emph = punct_to_tone[option][4]; // emphatic form of statement | |||
group_tone_emph = punct_to_tone[option][5]; // emphatic form of statement | |||
if(clause_type == 4) | |||
no_tonic = 1; /* incomplete clause, used for abbreviations such as Mr. Dr. Mrs. */ | |||
@@ -907,6 +911,7 @@ void Translator::CalcPitches(int clause_type) | |||
if(p->synthflags & SFLAG_SYLLABLE) | |||
{ | |||
syllable_tab[n_st].flags = 0; | |||
syllable_tab[n_st].env = PITCHfall; | |||
syllable_tab[n_st++].stress = p->tone; // stress level | |||
if(p->tone >= 4) |
@@ -1967,22 +1967,15 @@ if(option_ssml) parag=1; | |||
} | |||
ix += utf8_out(c1,&buf[ix]); // buf[ix++] = c1; | |||
if(!iswalnum(c1) && (ix > (n_buf-20))) | |||
if(((ix > (n_buf-20)) && !IsAlpha(c1) && !iswdigit(c1)) || (ix >= (n_buf-2))) | |||
{ | |||
// clause too long, getting near end of buffer, so break here | |||
// try to break at a word boundary (unless we actually reach the end of buffer). | |||
buf[ix] = ' '; | |||
buf[ix+1] = 0; | |||
UngetC(c2); | |||
return(CLAUSE_NONE); | |||
} | |||
if(ix >= (n_buf-2)) | |||
{ | |||
// reached end of buffer, must break now | |||
buf[n_buf-2] = ' '; | |||
buf[n_buf-1] = 0; | |||
UngetC(c2); | |||
return(CLAUSE_NONE); | |||
} | |||
} | |||
buf[ix] = ' '; | |||
buf[ix+1] = 0; |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.29.26 18.Dec.07"; | |||
const char *version_string = "1.29.29 20.Dec.07"; | |||
const int version_phdata = 0x012924; | |||
int option_device_number = -1; |
@@ -33,9 +33,6 @@ | |||
#include "voice.h" | |||
#include "translate.h" | |||
#define PITCHfall 0 | |||
#define PITCHrise 1 | |||
extern FILE *f_log; | |||
static void SmoothSpect(void); |
@@ -18,7 +18,7 @@ | |||
***************************************************************************/ | |||
#define N_PHONEME_LIST 700 // enough for source[] full of text, else it will truncate | |||
#define N_PHONEME_LIST 1000 // enough for source[N_TR_SOURCE] full of text, else it will truncate | |||
#define MAX_HARMONIC 400 // 400 * 50Hz = 20 kHz, more than enough | |||
#define N_SEQ_FRAMES 25 // max frames in a spectrum sequence (real max is ablut 8) |
@@ -43,6 +43,7 @@ Translator_English::Translator_English() : Translator() | |||
langopts.stress_rule = 0; | |||
langopts.numbers = 0x41 + NUM_ROMAN; | |||
langopts.param[LOPT_COMBINE_WORDS] = 5; // allow "mc" to cmbine with the following word | |||
} | |||
@@ -52,7 +53,7 @@ static unsigned char initials_bitmap[86] = { | |||
0x20, 0x24, 0x20, 0x80, 0x10, 0x00, 0x00, 0x00, | |||
0x00, 0x28, 0x08, 0x00, 0x88, 0x22, 0x04, 0x00, // 16 | |||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
0x00, 0x88, 0x22, 0x04, 0x00, 0x02, 0x00, 0x00, // 32 | |||
0x00, 0x88, 0x22, 0x04, 0x00, 0x02, 0x00, 0x04, // 32 | |||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |||
0x00, 0x28, 0x8a, 0x03, 0x00, 0x00, 0x40, 0x00, // 48 | |||
0x02, 0x00, 0x41, 0xca, 0x9b, 0x06, 0x20, 0x80, |
@@ -45,6 +45,7 @@ | |||
#define OFFSET_GREEK 0x380 | |||
#define OFFSET_CYRILLIC 0x420 | |||
#define OFFSET_DEVANAGARI 0x900 | |||
#define OFFSET_TAMIL 0xb80 | |||
static const unsigned int replace_cyrillic_latin[] = | |||
@@ -292,7 +293,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->charset_a0 = charsets[19]; // ISCII | |||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
tr->langopts.numbers = 0x811; | |||
tr->langopts.numbers2 = 0x100; | |||
@@ -314,9 +315,14 @@ Translator *SelectTranslator(const char *name) | |||
{ | |||
static const unsigned char stress_amps_hr[8] = {16,16, 20,20, 20,24, 24,22 }; | |||
static const short stress_lengths_hr[8] = {180,160, 200,200, 0,0, 220,230}; | |||
static const short stress_lengths_sr[8] = {160,150, 200,200, 0,0, 250,260}; | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_hr,stress_amps_hr); | |||
if(name2 == L('s','r')) | |||
SetupTranslator(tr,stress_lengths_sr,stress_amps_hr); | |||
else | |||
SetupTranslator(tr,stress_lengths_hr,stress_amps_hr); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->langopts.stress_rule = 0; | |||
@@ -590,6 +596,30 @@ SetLengthMods(tr,3); // all equal | |||
} | |||
break; | |||
case L('t','a'): | |||
{ | |||
static const short stress_lengths_ta[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | |||
static const unsigned char stress_amps_ta[8] = {17,14, 20,19, 20,24, 24,22 }; | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_ta,stress_amps_ta); | |||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
tr->langopts.numbers = 0x811; | |||
tr->langopts.numbers2 = 0x100; | |||
tr->letter_bits_offset = OFFSET_TAMIL; | |||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters | |||
SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4c); // vowel signs | |||
SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama | |||
SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
} | |||
break; | |||
case L('t','r'): // Turkish | |||
{ | |||
static const unsigned char stress_amps_tr[8] = {16,16, 20,20, 20,24, 24,22 }; |
@@ -88,8 +88,8 @@ static int embedded_ix; | |||
static int embedded_read; | |||
unsigned int embedded_list[N_EMBEDDED_LIST]; | |||
// the source text of a single clause | |||
#define N_TR_SOURCE 350 | |||
// the source text of a single clause (UTF8 bytes) | |||
#define N_TR_SOURCE 700 | |||
static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice change info at end | |||
int n_replace_phonemes; | |||
@@ -339,12 +339,12 @@ unsigned char *length_mod_tabs[6] = { | |||
}; | |||
/* index by 0=. 1=, 2=?, 3=! 4=none */ | |||
static unsigned char punctuation_to_tone[4][5] = { | |||
{0,1,2,3,4}, | |||
{5,6,2,0,4}, | |||
{0,1,2,3,0}, | |||
{0,1,2,3,0} }; | |||
/* index by 0=. 1=, 2=?, 3=! 4=none, 5=emphasized */ | |||
static unsigned char punctuation_to_tone[4][6] = { | |||
{0,1,2,3,0,4}, | |||
{5,6,2,0,0,4}, | |||
{0,1,2,3,0,0}, | |||
{0,1,2,3,0,0} }; | |||
void SetLengthMods(Translator *tr, int value) | |||
@@ -369,6 +369,9 @@ int IsAlpha(unsigned int c) | |||
if((c >= 0x901) && (c <= 0x957)) | |||
return(1); // Devanagari vowel signs and other signs | |||
if((c >= 0xb81) && (c <= 0xbe5)) | |||
return(1); // Devanagari vowel signs and other signs | |||
if((c >= 0x300) && (c <= 0x36f)) | |||
return(1); // combining accents | |||
@@ -25,7 +25,7 @@ | |||
#define N_WORD_PHONEMES 160 // max phonemes in a word | |||
#define N_WORD_BYTES 160 // max bytes for the UTF8 characters in a word | |||
#define N_CLAUSE_WORDS 256 // max words in a clause | |||
#define N_CLAUSE_WORDS 300 // max words in a clause | |||
#define N_RULE_GROUP2 120 // max num of two-letter rule chains | |||
#define N_HASH_DICT 1024 | |||
#define N_CHARSETS 20 |