Added languages: ca (Catalan) lv (Latvian) Numbers: allow for languages which have numbers for 100,000 and (or not) 10,000,000 (eg. hi, sw) Phoneme [ts] new files in phsource/ustop *_rules: fix crash for lettergroups numbers greater than 10, and better error reporting. Languages: eo, jbo: adjust syllable lengths. Language: en, fix "1st, 2nd, 3rd, 5th". But a full ordinal number feature is still needed. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@202 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -107,7 +107,7 @@ x z Z | |||
Dictionary eo_dict | |||
@- a aI aU e eI eU i | |||
o OI u uI | |||
o oI u uI | |||
* b d dZ f g h j | |||
k l m n p r R s | |||
@@ -237,12 +237,13 @@ ts v w w2 z | |||
Dictionary jbo_dict | |||
@ a aI aU e eI i l- | |||
o OI r- u | |||
@ @- a aI aU e eI i | |||
l- o oI r- u | |||
b d dZ f g h j k | |||
l m n N p R s S | |||
t tS v w x z Z | |||
* - b d dZ f g h | |||
j k l m n N p R | |||
s S t tS v w x z | |||
Z | |||
Dictionary ko_dict | |||
@@ -515,8 +516,8 @@ tS v w z | |||
Dictionary lv_dict | |||
a a: ai au e e: ei i | |||
i: ie iu o o: oi u u: | |||
ui uo | |||
i: ie iu o o: u u: ui | |||
uo | |||
: b c d dZ f g h | |||
j J k l l^ m n n^ | |||
@@ -543,5 +544,28 @@ y | |||
: b c ch d d. dZ f | |||
g h j J k kh l l. | |||
m n N n. n^ p ph R | |||
R2 s s. S; t t. t.h th | |||
tS v w z | |||
R2 s S s. S; t t. t.h | |||
th tS v w z | |||
Dictionary om_dict | |||
@# a a: E e I I# i: | |||
O o: U u: | |||
** : a# b B c` d d` | |||
dZ f g h j k k` l | |||
m n n^ p p` R s S | |||
t t` tS v w z | |||
Dictionary ca_dict | |||
@ a a# aI e E i o | |||
O u U | |||
* ** : b B d D dZ; | |||
f g j J J^ k l l^ | |||
m n N n^ p Q r R | |||
R2 s S; t T tS ts tS; | |||
v v# w z Z Z; |
@@ -108,22 +108,23 @@ _% p3s'Ent | |||
?5 _% pVRs'Ent | |||
_& amp@sand | |||
_' kwoUt | |||
_( lEftbrakI2t | |||
_) raItbrakI2t | |||
_[ lEftbrakI2t | |||
_] raItbrakI2t | |||
_( lEftpa2rEn | |||
_) raItpa2rEn | |||
_* ast@rIsk | |||
_+ plVs | |||
_, k0m@ | |||
_- h,aIf@n | |||
?3 _- daS | |||
_. d0t | |||
_: koUl@n | |||
_; sEmIk'oUl@n | |||
_< ElaNg@L | |||
_< lEsDan | |||
_= i:kw@Lz | |||
_> A@aNg@L | |||
_> greIt@Dan | |||
_? kwEstS@n | |||
_@ at|saIn | |||
_[ lEftskwe@ | |||
_] raItskwe@ | |||
_^ s3:kVmfl,Eks | |||
?5 _^ sIRkVmfl,Eks | |||
__ 'Vnd3sk,o@ | |||
@@ -183,7 +184,8 @@ _₠ jU@roU | |||
₨ ru:pi: | |||
₩ w0n | |||
§ sEkS@n | |||
¶ par@graf // in en_rules | |||
¶ par@graf | |||
¶¶ par@grafs | |||
¤ kVr@nsIsaIn | |||
© k0pIraIt | |||
® rEdZIst3d | |||
@@ -280,6 +282,11 @@ _0M3 b'Ili@n | |||
_dpt pOInt | |||
_roman roUm@n_ | |||
(1 st) f3:st | |||
(2 nd) sEk@nd | |||
(3 rd) T3:d | |||
(5 th) fIT | |||
// ABBREVIATIONS | |||
//************** | |||
@@ -311,6 +318,7 @@ ie aIi:_! $pause $only | |||
i.e aIi:_! $pause | |||
irc $abbrev | |||
lbs paUndz | |||
LBS $abbrev | |||
ltd lImI2tI2d | |||
mc m@k $alt2 // combine with the following word | |||
oem $abbrev | |||
@@ -336,19 +344,19 @@ xxx $abbrev // not roman 30 | |||
xy $abbrev | |||
nd $only | |||
rd $only | |||
th $only | |||
st $only | |||
nd $only | |||
rd $only | |||
th $only | |||
st $only | |||
mr mIst3 $dot | |||
mrs mIsI2z $dot | |||
//ms mIz $dot | |||
dr d0kt3 $dot $capital | |||
lt $dot | |||
prof $dot | |||
rev $dot | |||
st $dot | |||
dr $dot | |||
lt $dot | |||
prof $dot | |||
rev $dot | |||
st $dot | |||
// not abbreviations when all-caps | |||
all O:l $allcaps |
@@ -5368,5 +5368,3 @@ | |||
%) % (% | |||
%%) % (_ _:: | |||
// extra symbols (mostly in en_list) | |||
¶¶ par@grafs |
@@ -29,7 +29,28 @@ _0M3 mili'ardo | |||
_0M4 duili'ono | |||
_dpt komo_ | |||
// accent names | |||
_acu dekst@-*akoRno | |||
_brv hoko | |||
_cir tsiRkumflekso | |||
_dia dupunktasupeRsigno | |||
_grv maldekst@-*akoRno | |||
_tld tildo | |||
_?? liteRo | |||
_. punkto | |||
_: dupunkto | |||
_; punktokomo | |||
_, komo | |||
_? demandosigno | |||
_- dividost@-*eko | |||
~ tildo | |||
° gradoI | |||
% p@-*otsento | |||
@ po | |||
& kaI | |||
v vo // not Roman numbers | |||
x ikso | |||
@@ -100,7 +121,8 @@ tra $u $pause | |||
// misc adverbs etc | |||
eĉ $brk | |||
(pli ol) pliol | |||
pli pl,i | |||
plej pl,eI | |||
// pronouns | |||
mi $u |
@@ -102,7 +102,9 @@ | |||
.group o | |||
_) o(_ o | |||
o o | |||
oj OI | |||
oj oI | |||
-) oj (_ %oI | |||
-) on (_ %on | |||
.group p | |||
_) p(_ po | |||
@@ -203,18 +205,13 @@ | |||
\.) . | |||
. (D _:k'omo_ | |||
* asteRisko | |||
pundo | |||
(D _pundOI_: | |||
£ pundo | |||
£ (D _pundoI_: | |||
! ,ekk@-*io, | |||
: dupunkto | |||
- _ | |||
- (_a | |||
$ dolaRo | |||
$ (D _dolaROI_: | |||
% p@-*otsento | |||
@ po | |||
& %kaI | |||
$ (D _dolaRoI_: | |||
@@ -61,7 +61,7 @@ _« kom'iJ^as||iTkJ^'ERDas | |||
_» kom'iJ^as||dE**'EtSas | |||
^ TiRkumfl'exo | |||
€ eU*o | |||
€ eU**o | |||
% pOrTj'Ento | |||
& ampERs'ant | |||
@ a*R'oBa |
@@ -113,11 +113,11 @@ _97 s@t:'a:n@we: | |||
_98 ath:'a:n@we: | |||
_99 nIn^j'a:n@we: | |||
_0C s'O: | |||
_0M1 h@z'a:r | |||
_0M2 d,Vsla:kh@_ // until we fix numbers > 100,000 | |||
_1M2 d,Vsla:kh@_ // until we fix numbers > 100,000 | |||
_0L l'a:kh | |||
_0C s'O: | |||
_0M1 h@z'a:r | |||
_0M2 l'a:kh | |||
_0M3 cr'ore | |||
_0M4 b'ilion // ?? English | |||
_dpt _d@s@ml'o:_ // ?? what is Hi for "." ? | |||
// Single consonants |
@@ -28,12 +28,12 @@ g g@ | |||
_i ibu | |||
j Z@ | |||
k k@ | |||
l l@ | |||
m m@ | |||
n n@ | |||
_l l@ | |||
_m m@ | |||
_n n@ | |||
_o obu | |||
p p@ | |||
r R@ | |||
_r R@ | |||
s s@ | |||
t t@ | |||
_u ubu | |||
@@ -67,11 +67,11 @@ zo'e $u+ // pronoun | |||
i i_: $u // sentence break (recognised by eSpeak program). Try a short pause after ".i" | |||
// end-of-clause [_;_] before these | |||
noi _;_nOI $u | |||
poi _;_pOI $u | |||
noi _;_noI $u | |||
poi _;_poI $u | |||
no'u _;_nohu $u | |||
po'u _;_pohu $u | |||
goi _;_gOI $u | |||
goi _;_goI $u | |||
ija _;_iZa // should this series be unstressed? | |||
ijanai _;_iZanaI | |||
@@ -86,7 +86,7 @@ to _::to $u // start parenthesis, pause but don't raise intonation | |||
// terminators (include a pause after) | |||
ku ku_:: $u | |||
boi bOI_:: $u | |||
boi boI_:: $u | |||
vau vaU_:: $u | |||
kei keI_:: $u | |||
toi tOI_:: $u | |||
toi toI_:: $u |
@@ -8,9 +8,9 @@ | |||
a a | |||
ai aI | |||
au aU | |||
X) a (_ %a // single-syllable cmavo are unstressed | |||
X) ai (_ %aI | |||
X) au (_ %aU | |||
X) a (_ ,a // single-syllable cmavo are unstressed | |||
X) ai (_ ,aI | |||
X) au (_ ,aU | |||
.group b | |||
b b | |||
@@ -25,8 +25,8 @@ | |||
.group e | |||
e e | |||
ei eI | |||
X) e (_ %e | |||
X) ei (_ %eI | |||
X) e (_ ,e | |||
X) ei (_ ,eI | |||
.group f | |||
f f | |||
@@ -42,8 +42,8 @@ | |||
.group i | |||
i i | |||
i (A j | |||
X) i (_ %i | |||
X) i (A_ %j | |||
X) i (_ ,i | |||
X) i (A_ ,j | |||
.group j | |||
j Z | |||
@@ -52,7 +52,7 @@ | |||
k k | |||
.group l | |||
l l- | |||
l l- // syllabic | |||
A) l l | |||
l (A l | |||
@@ -60,15 +60,17 @@ | |||
m m | |||
.group n | |||
n n | |||
n n- // syllabic | |||
A) n n | |||
n (A n | |||
n (g N | |||
n (k N | |||
.group o | |||
o o | |||
oi OI | |||
X) o (_ %o | |||
X) oi (_ %OI | |||
oi oI | |||
X) o (_ ,o | |||
X) oi (_ ,oI | |||
.group p | |||
p p | |||
@@ -77,8 +79,9 @@ | |||
q kw | |||
.group r | |||
r r- | |||
r r- // syllabic | |||
A) r R | |||
C) r (A @-* | |||
r (A R | |||
@@ -92,8 +95,8 @@ | |||
.group u | |||
u u | |||
u (A w | |||
X) u (_ %u | |||
X) u (A_ %w | |||
X) u (_ ,u | |||
X) u (A_ ,w | |||
.group v | |||
v v | |||
@@ -114,9 +117,9 @@ | |||
' h | |||
_) ' (_ '@h@ | |||
. _! | |||
. _! // dot | |||
_) . (@P1 _! // remove . prefix | |||
. (_S1 _! // remove . suffix | |||
ˈ ' // U+2c8 stress marker | |||
ˈ ' // U+2c8 stress marker (from syllable capitalisation) |
@@ -57,15 +57,6 @@ _6C m,ias'ita | |||
_7C m,ias'aba | |||
_8C m,ian'ane | |||
_9C m,iat'isa | |||
_1L l'akim'oJa | |||
_2L l'aki_|mb'ili | |||
_3L l'akit'atu | |||
_4L l'aki'n-ne | |||
_5L l'akit'ano | |||
_6L l'akis'ita | |||
_7L l'akis'aba | |||
_8L l'akin'ane | |||
_9L l'akit'isa | |||
_0M1 'elfu | |||
_1M1 ,elfum'oJa | |||
_2M1 ,elfu_|mb'ili | |||
@@ -76,7 +67,17 @@ _6M1 ,elfus'ita | |||
_7M1 ,elfus'aba | |||
_8M1 ,elfun'ane | |||
_9M1 ,elfut'isa | |||
_0M2 _|mili'oni | |||
_1M2 l'akim'oJa | |||
_2M2 l'aki_|mb'ili | |||
_3M2 l'akit'atu | |||
_4M2 l'aki'n-ne | |||
_5M2 l'akit'ano | |||
_6M2 l'akis'ita | |||
_7M2 l'akis'aba | |||
_8M2 l'akin'ane | |||
_9M2 l'akit'isa | |||
_0M3 _|mili'oni // 1,000,000 | |||
_0M4 _|bili'oni | |||
_dpt _nukta | |||
_0and _|na | |||
@@ -0,0 +1,4 @@ | |||
name catalan-test | |||
language ca | |||
gender male | |||
@@ -0,0 +1,6 @@ | |||
name latvian | |||
language lv | |||
gender male | |||
replace 03 o o: | |||
@@ -1,4 +1,4 @@ | |||
58 phoneme tables | |||
59 phoneme tables | |||
new total | |||
base 101 101 | |||
base2 24 120 | |||
@@ -20,6 +20,7 @@ | |||
hi 52 139 | |||
ta 17 142 | |||
hu 23 117 | |||
lv 28 123 | |||
nl 28 124 | |||
pl 18 114 | |||
sk 29 132 | |||
@@ -32,6 +33,7 @@ | |||
la 21 119 | |||
es 9 121 | |||
es_la 1 121 | |||
ca 9 122 | |||
pt 28 137 | |||
pt_pt 20 137 | |||
ro 36 144 | |||
@@ -55,9 +57,8 @@ | |||
kn 15 141 | |||
bn 59 147 | |||
ne 23 151 | |||
lv 28 123 | |||
hy 17 115 | |||
om 18 118 | |||
om 21 121 | |||
Data file Used by | |||
b/b [b] base | |||
@@ -292,6 +293,7 @@ h/ha [h] base | |||
[<h>] la | |||
[H] bn | |||
[h] bn | |||
[a#] om | |||
h/he [h] base | |||
[h] fi | |||
[H] hi | |||
@@ -360,6 +362,7 @@ l/l [l] base | |||
[l] sk | |||
[l] ru | |||
[l] la | |||
[l] ca | |||
[l] pt | |||
[l;] ro | |||
l/_l [l] base | |||
@@ -375,6 +378,7 @@ l/_l [l] base | |||
[l] sk | |||
[l] ru | |||
[l] la | |||
[l] ca | |||
[l] pt | |||
[l;] ro | |||
[l] zh | |||
@@ -471,6 +475,7 @@ l/tl [l] base | |||
[l] sk | |||
[l] ru | |||
[l] la | |||
[l] ca | |||
[l] pt | |||
[l;] ro | |||
[l] sq | |||
@@ -578,11 +583,11 @@ r3/r_sr [r.] bn | |||
r3/r_trill [R2] base | |||
[R3] base | |||
[r] af | |||
[R2] lv | |||
[r] nl | |||
[R] sk | |||
[r*] sr | |||
[x] pt | |||
[R2] lv | |||
r3/r_trill2 [R] base | |||
[r] cy | |||
[R] hr | |||
@@ -593,10 +598,10 @@ r3/r_trill3.wav [R3] base | |||
[r] af | |||
[r] nl | |||
r3/r_trill.wav [R2] base | |||
[R2] lv | |||
[R] sk | |||
[r*] sr | |||
[x] pt | |||
[R2] lv | |||
r3/r_u [(u)] base | |||
r3/r_ulv [r"] hy | |||
r3/r_uvl [r"] hy | |||
@@ -714,23 +719,23 @@ ufric/s_ [s] base | |||
[z2] fr | |||
[z3] fr | |||
[s2] hu | |||
[s2] lv | |||
[s] es | |||
[s#] pt | |||
[z;] ro | |||
[s2] is | |||
[s2] lv | |||
ufric/s! [s] base | |||
[s2] fi | |||
[s] fr | |||
[s2] hu | |||
[s2] lv | |||
[s] es | |||
[s#] pt | |||
[s2] is | |||
[s2] lv | |||
ufric/s_continue [s2] fi | |||
[s2] hu | |||
[s2] is | |||
[s2] lv | |||
[s2] is | |||
ufric/sh [S] base | |||
[S] fr | |||
[s#] pt_pt | |||
@@ -774,6 +779,7 @@ ufric/x_sr [h] sr | |||
ufric/xx3 [X] base | |||
[Q] hy | |||
ustop/c [c] base | |||
ustop/c_ejc [c`] om | |||
ustop/k [k] base | |||
[k] en | |||
[k] fr | |||
@@ -788,13 +794,14 @@ ustop/k_ [k] base | |||
[k] fr | |||
[kh] hi | |||
[k] hu | |||
[k] lv | |||
[k] sk | |||
[k] el | |||
[kh] zh | |||
[k] sw | |||
[k] sq | |||
[kh] bn | |||
[k] lv | |||
ustop/k_ejc [k`] om | |||
ustop/ki [c] base | |||
[k] base | |||
[k] base2 | |||
@@ -803,6 +810,7 @@ ustop/ki [c] base | |||
[k] fi | |||
[k] fr | |||
[k] hu | |||
[k] lv | |||
[k] sk | |||
[k^] mk | |||
[c] is | |||
@@ -812,7 +820,7 @@ ustop/ki [c] base | |||
[c] tr | |||
[J] ku | |||
[c] ku | |||
[k] lv | |||
ustop/ki_ejc [k`] om | |||
ustop/kl [k] base | |||
[k] base2 | |||
[k] en | |||
@@ -820,12 +828,12 @@ ustop/kl [k] base | |||
[k] fr | |||
[kh] hi | |||
[k] hu | |||
[k] lv | |||
[k] sk | |||
[k] el | |||
[k] zhy | |||
[k] sw | |||
[kh] bn | |||
[k] lv | |||
ustop/kr [k] base | |||
[k] base2 | |||
[k] en | |||
@@ -833,27 +841,27 @@ ustop/kr [k] base | |||
[k] fr | |||
[kh] hi | |||
[k] hu | |||
[k] lv | |||
[k] sk | |||
[k] el | |||
[k] zhy | |||
[k] sw | |||
[kh] bn | |||
[k] lv | |||
ustop/k_unasp [k] base | |||
[k] base2 | |||
[k] fi | |||
[k] hi | |||
[k] hu | |||
[k] lv | |||
[k] sk | |||
[k] el | |||
[k] zhy | |||
[k`] ko | |||
[k] sq | |||
[k] bn | |||
[k] lv | |||
[k`] om | |||
ustop/k_unasp_ [k] zh | |||
ustop/null [?] base | |||
[dh] om | |||
ustop/p [p] base | |||
[p] fr | |||
[p2] fr | |||
@@ -868,6 +876,7 @@ ustop/p_ [p] base | |||
[ph] hy | |||
ustop/p_asp [ph] hi | |||
[ph] hy | |||
ustop/p_ejc [p`] om | |||
ustop/percus10 [(X1] base | |||
ustop/pl [p] base | |||
[p] base2 | |||
@@ -876,13 +885,13 @@ ustop/pl [p] base | |||
[p] hi | |||
[ph] hi | |||
[p] hu | |||
[p] lv | |||
[p] sk | |||
[p] la | |||
[p;] ro | |||
[p] sq | |||
[p] bn | |||
[ph] bn | |||
[p] lv | |||
[ph] hy | |||
ustop/pr [p] base | |||
[p] base2 | |||
@@ -902,6 +911,7 @@ ustop/p_unasp [p] base | |||
[p] fr | |||
[p] hi | |||
[p] hu | |||
[p] lv | |||
[p] sk | |||
[p] la | |||
[p;] ro | |||
@@ -909,18 +919,16 @@ ustop/p_unasp [p] base | |||
[p`] ko | |||
[p] sq | |||
[p] bn | |||
[p] lv | |||
[p`] om | |||
ustop/p_unasp_ [p] base2 | |||
[p] fi | |||
[p] hi | |||
[p] hu | |||
[p] lv | |||
[p] sk | |||
[p] la | |||
[p;] ro | |||
[p] sq | |||
[p] bn | |||
[p] lv | |||
ustop/q [q] base | |||
ustop/q_u [q] base | |||
ustop/t [t] base | |||
@@ -935,13 +943,13 @@ ustop/t_ [t] base | |||
[t3] fr | |||
[t.] hi | |||
[t.h] hi | |||
[t] lv | |||
[t] sk | |||
[t] el | |||
[th] zh | |||
[t] sq | |||
[t.] bn | |||
[t.h] bn | |||
[t] lv | |||
ustop/t_dnt [t] base | |||
[t[] base | |||
[t] base2 | |||
@@ -958,8 +966,8 @@ ustop/t_dnt [t] base | |||
[t] sw | |||
[t`] ko | |||
[t] bn | |||
[t`] om | |||
ustop/t_dnt2 [t[] vi | |||
ustop/t_ejc [t`] om | |||
ustop/t_hi [t] hi | |||
[t] bn | |||
[th] bn | |||
@@ -980,8 +988,17 @@ ustop/ts [ts] base2 | |||
[ts;] ro | |||
[z] zhy | |||
[tsh] zh | |||
[ts] hy | |||
ustop/ts_ [ts] base2 | |||
[ts] de | |||
[ts] eo | |||
[ts] hu | |||
[ts] lv | |||
[ts] pl | |||
[ts] ru | |||
[ts;] ro | |||
[ts] hy | |||
ustop/ts2 [ts] lv | |||
ustop/tsh [tS] base | |||
[c] zhy | |||
ustop/tsh_ [tS] base | |||
@@ -992,11 +1009,11 @@ ustop/t_short [t] fr | |||
[t2] fr | |||
[t3] fr | |||
[t.] hi | |||
[t] lv | |||
[t] sk | |||
[t] el | |||
[t] sq | |||
[t.] bn | |||
[t] lv | |||
ustop/tsh_pzd [tS;] zh | |||
ustop/tsh_pzd_unasp [tS;] zh | |||
ustop/tsh_sr [tS] sr | |||
@@ -1013,7 +1030,6 @@ ustop/ts_pzd2 [c] hi | |||
[c] hu | |||
[c`] ko | |||
[c] bn | |||
[c`] om | |||
ustop/ts_pzd3 [tS;] hr | |||
[dZ;] hr | |||
ustop/t_sr [d] sr | |||
@@ -1090,13 +1106,14 @@ vdiph2/y-y# [yY] fi | |||
vdiph/8u [oU] en_us | |||
[ou] zh | |||
vdiph/aae [aI] en_us | |||
vdiph/aai [aai] zhy | |||
vdiph/aai [ai] lv | |||
[aai] zhy | |||
[aI] ne | |||
[ai] lv | |||
vdiph/aai_2 [AI] af | |||
[AY] cy | |||
vdiph/aai_3 [ai] fi | |||
vdiph/aau [au] fi | |||
[au] lv | |||
vdiph/aau_2 [aU] en_wi | |||
[aU] de | |||
[VU] nl | |||
@@ -1157,8 +1174,8 @@ vdiph/eei [EI] base2 | |||
[eI] hy | |||
vdiph/eei_2 [eI] eo | |||
[ei] fi | |||
[eI] id | |||
[ei] lv | |||
[eI] id | |||
vdiph/eei_3 [eI] sk | |||
[eI] ku | |||
vdiph/eeu [EU] pt | |||
@@ -1214,18 +1231,17 @@ vdiph/ooi [OI] en | |||
[OI] en_n | |||
[OI] cy | |||
[OY] cy | |||
[OI] eo | |||
[oI] eo | |||
[oi] fi | |||
[oi] lv | |||
[OI] no | |||
[oi] zhy | |||
[oi] lv | |||
vdiph/ooi_2 [OI] af | |||
vdiph/ooi_3 [OI] en_rp | |||
[aI] en_wm | |||
vdiph/ooi_4 [OI] en_us | |||
vdiph/oou [oU] cs | |||
[OU] grc | |||
[au] lv | |||
vdiph/ou [ou] fi | |||
[ou] zhy | |||
[oU] ne | |||
@@ -1241,10 +1257,10 @@ vdiph/@u_en [oU] en | |||
vdiph/ui [uI] base2 | |||
[uI] eo | |||
[ui] fi | |||
[ui] lv | |||
[uI] vi | |||
[ui] zhy | |||
[uI] ne | |||
[ui] lv | |||
vdiph/u-i [yI] vi | |||
vdiph/ui_2 [uI] af | |||
vdiph/ui_3 [uI] cy | |||
@@ -1322,6 +1338,7 @@ voc/dh [D] base | |||
[D] es | |||
voc/dh_ [D] base | |||
[D] es | |||
voc/dh_om [d`] om | |||
voc/j [J^] base | |||
voc/Q [Q] base | |||
[Q^] base | |||
@@ -1415,11 +1432,11 @@ vowel/@- [@-] base | |||
vowel/& [a] en_rp | |||
[&] fi | |||
[&:] hi | |||
[&] lv | |||
[&:] lv | |||
[&] sv | |||
[&] bn | |||
[&:] bn | |||
[&] lv | |||
[&:] lv | |||
vowel/0 [0] en | |||
[O] hi | |||
[O] pt | |||
@@ -1482,13 +1499,13 @@ vowel/a# [a/] base2 | |||
[&/] pt | |||
[a#] rw | |||
[a/] rw | |||
[a] om | |||
vowel/a_2 [a] base2 | |||
[a] en_wi | |||
[A:] en_wi | |||
[A@] en_wi | |||
[aa] en_wi | |||
[a] eo | |||
[a] ca | |||
[A] pt | |||
[a] pt_pt | |||
[a] ro | |||
@@ -1502,6 +1519,7 @@ vowel/a#_2 [a#] pl | |||
[a/] sw | |||
[a/] sq | |||
[a] kn | |||
[a] om | |||
vowel/a_3 [a] en_sc | |||
[a/] en_sc | |||
[A:] en_sc | |||
@@ -1511,7 +1529,6 @@ vowel/a_3 [a] en_sc | |||
[a] is | |||
[A] zh | |||
[A] da | |||
[a:] om | |||
vowel/a#_3 [a2] en | |||
[a2] en_n | |||
[a2] en_us | |||
@@ -1520,6 +1537,7 @@ vowel/a#_3 [a2] en | |||
[@] de | |||
[a] hi | |||
[a#] ru | |||
[a#] ca | |||
[&] pt_pt | |||
[&/] pt_pt | |||
[@] vi | |||
@@ -1528,6 +1546,7 @@ vowel/a#_3 [a2] en | |||
[a] bn | |||
vowel/a_4 [a/] el | |||
[a] vi | |||
[a:] om | |||
vowel/a_5 [a:] ta | |||
[a/] it | |||
[a] la | |||
@@ -1537,15 +1556,16 @@ vowel/a_5 [a:] ta | |||
[a] sq | |||
[a:] ml | |||
[a:] kn | |||
[A:] om | |||
vowel/a_6 [a] fr | |||
[a2] fr | |||
vowel/aa [a] fi | |||
[A] fr_ca | |||
[a] lv | |||
[a:] lv | |||
[A] no | |||
[A:] no | |||
[aa] zhy | |||
[a] lv | |||
[a:] lv | |||
[a] hy | |||
vowel/aa# [O] en_us | |||
[0] en_wi | |||
@@ -1598,6 +1618,7 @@ vowel/e [e] base2 | |||
[e] hr | |||
[e/] it | |||
[e:] la | |||
[e] ca | |||
[e] pt | |||
[e] pt_pt | |||
[e] grc | |||
@@ -1657,7 +1678,6 @@ vowel/ee_6 [&] sk | |||
[E3] sv | |||
[E] ku | |||
vowel/e_mid [E] en_rp | |||
[e] jbo | |||
[E] fr | |||
[E2] fr | |||
[E] fr_ca | |||
@@ -1669,6 +1689,7 @@ vowel/e_mid [E] en_rp | |||
[E] hr | |||
[E] it | |||
[E] es | |||
[E] ca | |||
[E] pt_pt | |||
[E] no | |||
[E] is | |||
@@ -1682,7 +1703,10 @@ vowel/e_mid2 [E] af | |||
[E] de | |||
[E2] de | |||
[E:] de | |||
[e] jbo | |||
[e] fi | |||
[e] lv | |||
[e:] lv | |||
[E] nl | |||
[e] sk | |||
[e] es | |||
@@ -1692,8 +1716,6 @@ vowel/e_mid2 [E] af | |||
[e] tr | |||
[E] ko | |||
[e] kn | |||
[e] lv | |||
[e:] lv | |||
[E] om | |||
vowel/@_fnt [@] en_wi | |||
[@2] en_wi | |||
@@ -1738,7 +1760,6 @@ vowel/i_3 [i] af | |||
[i] fr | |||
[i:] fr | |||
[i:] sk | |||
[i:] lv | |||
vowel/i_4 [i] fi | |||
[i] hu | |||
[I] it | |||
@@ -1749,13 +1770,14 @@ vowel/i#_5 [y] pt_pt | |||
[i/] pt_pt | |||
[y] ro | |||
vowel/i_6 [i] ta | |||
[i] lv | |||
[i:] lv | |||
[i] hr | |||
[i] tr | |||
[i] ku | |||
[i] id | |||
[i] ml | |||
[i] kn | |||
[i] lv | |||
vowel/i#_6 [i.] zh | |||
vowel/i_7 [i] pl | |||
vowel/i#_7 [i[] zh | |||
@@ -1845,6 +1867,7 @@ vowel/o [o] base2 | |||
[o] it | |||
[o/] it | |||
[O] la | |||
[o] ca | |||
[o] pt_pt | |||
[o] grc | |||
[o:] sv | |||
@@ -1898,6 +1921,7 @@ vowel/oo [O:] en_sc | |||
[O] de | |||
[o] eo | |||
[O] es | |||
[O] ca | |||
[o] el | |||
[O] sv | |||
[O] no | |||
@@ -1912,13 +1936,13 @@ vowel/oo_1 [O:] en_n | |||
[O@] en_wm | |||
[O:] af | |||
[o] fi | |||
[o] lv | |||
[o:] lv | |||
[o] sk | |||
[o:] sk | |||
[o] hr | |||
[o] vi | |||
[O/] sq | |||
[o] lv | |||
[o:] lv | |||
vowel/oo_2 [0] en_sc | |||
[O] cy | |||
[o] cs | |||
@@ -1987,7 +2011,9 @@ vowel/u#_3 [U] ta | |||
[U:] ta | |||
vowel/u#_4 [U] en_sc | |||
[u:] en_sc | |||
vowel/u_5 [u] sw | |||
vowel/u_5 [u] lv | |||
[u:] lv | |||
[u] sw | |||
vowel/u_6 [U] pt_pt | |||
[u] pt_pt | |||
[u] ku | |||
@@ -2026,9 +2052,8 @@ vowel/uu [U] en | |||
vowel/uu# [U] ku | |||
vowel/uu_2 [U] base2 | |||
[U] de | |||
[U] ca | |||
[U] tr | |||
[u] lv | |||
[u:] lv | |||
vowel/uu_3 [u] af | |||
[y] zh | |||
vowel/uu_4 [U] fi | |||
@@ -2101,7 +2126,6 @@ vowel/yy_4 [y] de | |||
[y] is | |||
[y] hy | |||
vweak/@ [@#] om | |||
vweak/a [a#] om | |||
vwl_af/@ [@] af | |||
vwl_af/I [I] af | |||
vwl_af/r@ [@] af | |||
@@ -2233,6 +2257,7 @@ vwl_zh/ung [ung] zhy | |||
vwl_zh/uo [uo] zh | |||
vwl_zh/y& [y&] zh | |||
vwl_zh/yee [yE] zh | |||
w/iw_ [w/] base | |||
w/w [w] base | |||
w/_w [w] base | |||
[w] zh | |||
@@ -2262,6 +2287,7 @@ w/wu [(u)] base | |||
w/xw [(@)] base | |||
[(a)] base | |||
[(e)] base | |||
[(i)] base | |||
[(o)] base | |||
[(u)] base | |||
x/b [b] base | |||
@@ -2281,6 +2307,7 @@ x/d [d] base | |||
[d] is | |||
[d] id | |||
[d.] bn | |||
[d`] om | |||
x/d_ [d] base | |||
[d[] base | |||
[d] base2 |
@@ -161,6 +161,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -0,0 +1,67 @@ | |||
//==================================================== | |||
// Catalan - based on Spanish and Base2 | |||
//==================================================== | |||
phoneme a | |||
vowel starttype (a) endtype (a) | |||
length 180 | |||
formants vowel/a_2 | |||
reduceto a# 2 | |||
endphoneme | |||
phoneme a# | |||
vowel starttype (@) endtype (@) | |||
length 170 | |||
formants vowel/a#_3 | |||
unstressed | |||
endphoneme | |||
phoneme e | |||
vowel starttype (e) endtype (e) | |||
length 170 | |||
formants vowel/e | |||
reduceto @ 2 | |||
endphoneme | |||
phoneme E | |||
vowel starttype (e) endtype (e) | |||
length 180 | |||
formants vowel/e_mid | |||
reduceto @ 2 | |||
endphoneme | |||
phoneme O | |||
vowel starttype (o) endtype (o) | |||
length 170 | |||
formants vowel/oo | |||
reduceto U 2 | |||
endphoneme | |||
phoneme o | |||
vowel starttype (o) endtype (o) | |||
length 170 | |||
formants vowel/o | |||
reduceto U 2 | |||
endphoneme | |||
phoneme U | |||
vowel starttype (u) endtype (u) | |||
length 160 | |||
formants vowel/uu_2 | |||
unstressed | |||
endphoneme | |||
phoneme l | |||
liquid | |||
length 100 | |||
lengthmod 7 | |||
beforenotvowel l/2 // use 'dark' [l] after a vowel | |||
formants l/l | |||
after _ l/_l t l/tl | |||
endphoneme | |||
@@ -42,42 +42,42 @@ endphoneme | |||
phoneme aU | |||
vowel starttype (a) endtype (u) | |||
length 230 | |||
length 250 | |||
formants vdiph/au_4 | |||
endphoneme | |||
phoneme eU | |||
vowel starttype (e) endtype (u) | |||
length 230 | |||
length 250 | |||
formants vdiph/eu | |||
endphoneme | |||
phoneme aI | |||
vowel starttype (a) endtype (i) | |||
length 240 | |||
length 250 | |||
formants vdiph/ai | |||
endphoneme | |||
phoneme eI | |||
vowel starttype (e) endtype (i) | |||
length 230 | |||
length 250 | |||
formants vdiph/eei_2 | |||
endphoneme | |||
phoneme OI | |||
phoneme oI | |||
vowel starttype (o) endtype (i) | |||
length 240 | |||
length 250 | |||
formants vdiph/ooi | |||
endphoneme | |||
phoneme uI | |||
vowel starttype (u) endtype (i) | |||
length 230 | |||
length 250 | |||
formants vdiph/ui | |||
endphoneme | |||
@@ -93,6 +93,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -218,6 +218,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -102,6 +102,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -0,0 +1,217 @@ | |||
phoneme @ | |||
vowel starttype (@) endtype (@) | |||
length 120 | |||
formants vowel/8_7 | |||
endphoneme | |||
phoneme i | |||
vowel starttype (i) endtype (i) | |||
length 110 | |||
formants vowel/i_6 | |||
linkout ; | |||
endphoneme | |||
phoneme i: | |||
vowel starttype (i) endtype (i) | |||
length 300 | |||
formants vowel/i_6 | |||
linkout ; | |||
endphoneme | |||
phoneme e | |||
vowel starttype (e) endtype (e) | |||
length 120 | |||
formants vowel/e_mid2 | |||
endphoneme | |||
phoneme e: | |||
vowel starttype (e) endtype (e) | |||
length 330 | |||
formants vowel/e_mid2 | |||
endphoneme | |||
phoneme & | |||
vowel starttype (a) endtype (a) | |||
length 120 | |||
formants vowel/& | |||
endphoneme | |||
phoneme &: | |||
vowel starttype (a) endtype (a) | |||
length 330 | |||
formants vowel/& | |||
endphoneme | |||
phoneme a | |||
vowel starttype (a) endtype (a) | |||
length 120 | |||
formants vowel/aa | |||
endphoneme | |||
phoneme a: | |||
vowel starttype (a) endtype (a) | |||
length 350 | |||
formants vowel/aa | |||
endphoneme | |||
phoneme o | |||
vowel starttype (o) endtype (o) | |||
length 120 | |||
formants vowel/oo_1 | |||
endphoneme | |||
phoneme o: | |||
vowel starttype (o) endtype (o) | |||
length 330 | |||
formants vowel/oo_1 | |||
endphoneme | |||
phoneme u | |||
vowel starttype (u) endtype (u) | |||
length 110 | |||
formants vowel/u_5 | |||
endphoneme | |||
phoneme u: | |||
vowel starttype (u) endtype (u) | |||
length 330 | |||
formants vowel/u_5 | |||
endphoneme | |||
phoneme ai | |||
vowel starttype (a) endtype (i) | |||
length 310 | |||
long | |||
formants vdiph/aai | |||
endphoneme | |||
phoneme ei | |||
vowel starttype (e) endtype (i) | |||
length 300 | |||
long | |||
formants vdiph/eei_2 | |||
endphoneme | |||
phoneme au | |||
vowel starttype (a) endtype (u) | |||
length 300 | |||
long | |||
formants vdiph/aau | |||
endphoneme | |||
phoneme uo | |||
vowel starttype (u) endtype (a) | |||
length 300 | |||
long | |||
// formants vdiph2/uo | |||
formants vdiph2/uaa | |||
endphoneme | |||
phoneme oi | |||
vowel starttype (o) endtype (i) | |||
length 290 | |||
long | |||
formants vdiph/ooi | |||
endphoneme | |||
phoneme ui | |||
vowel starttype (u) endtype (i) | |||
length 290 | |||
long | |||
formants vdiph/ui | |||
endphoneme | |||
phoneme ie | |||
vowel starttype (i) endtype (e) | |||
length 290 | |||
long | |||
formants vdiph2/ie_2 | |||
endphoneme | |||
phoneme iu | |||
vowel starttype (i) endtype (u) | |||
length 290 | |||
long | |||
formants vdiph2/iu_2 | |||
endphoneme | |||
phoneme R2 // this is [R] from Slovak/Czech | |||
liquid | |||
vowelin f1=0 f2=1700 -300 300 f3=-300 80 | |||
vowelout f1=2 f2=1700 -300 300 f3=-300 80 brk | |||
formants r3/r_trill+r3/r_trill.wav%50 | |||
trill | |||
lengthmod 6 | |||
endphoneme | |||
phoneme ts | |||
vls alv afr sibilant lengthenstop | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts2%90 | |||
before _ ustop/ts_ | |||
endphoneme | |||
phoneme t // try disable this and try using English [t] | |||
vls alv stop lengthenstop | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t_short%115 | |||
before _ ustop/t_ | |||
switchvoicing d | |||
endphoneme | |||
phoneme p | |||
vls blb stop | |||
vowelin f1=0 f2=1000 -50 -100 f3=-200 80 | |||
vowelout f1=0 f2=1000 -500 -350 f3=-300 80 rms=30 | |||
lengthmod 2 | |||
wave ustop/p_unasp%120 | |||
before _ ustop/p_unasp_%80 | |||
before l ustop/pl | |||
switchvoicing b | |||
endphoneme | |||
phoneme k | |||
vls vel stop | |||
vowelin f1=0 f2=2300 200 400 f3=-100 80 | |||
vowelout f1=0 f2=2300 300 400 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/k_unasp%60 // weaker | |||
before _ ustop/k_ | |||
before r ustop/kr | |||
before l ustop/kl | |||
before (i) ustop/ki | |||
before l^ ustop/ki | |||
switchvoicing g | |||
endphoneme | |||
phoneme s2 // second part of long [s:] | |||
vls alv frc sibilant | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 3 | |||
wave ufric/s_continue | |||
before _ ufric/s_ | |||
before p ufric/s! | |||
before t ufric/s! | |||
before k ufric/s! | |||
switchvoicing z | |||
endphoneme | |||
@@ -17,7 +17,7 @@ endphoneme | |||
phoneme e | |||
vowel starttype (e) endtype (e) | |||
length 170 | |||
formants vowel/e_mid | |||
formants vowel/e_mid2 | |||
endphoneme | |||
@@ -135,6 +135,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -219,6 +219,7 @@ phoneme ts; | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 colr=1 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -40,6 +40,7 @@ phoneme ts | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/ts | |||
before _ ustop/ts_ | |||
endphoneme | |||
@@ -83,7 +83,7 @@ endphoneme | |||
phoneme u: | |||
vowel starttype (u) endtype (u) | |||
length 250 | |||
length 260 | |||
formants vowel/u | |||
endphoneme | |||
@@ -115,7 +115,7 @@ phoneme _| // Very short pause | |||
formants NULL | |||
starttype _ endtype _ | |||
length 1 | |||
lengthmod 1 | |||
lengthmod 5 | |||
endphoneme | |||
phoneme _: // Standard pause | |||
@@ -301,6 +301,7 @@ phoneme (i) virtual | |||
before N DFT-40+nn/inn | |||
before ; DFT-40+j2/xj2 | |||
before l DFT-40+l/xl | |||
before w DFT-30+w/xw | |||
before j DFT-30+j/xj | |||
before r- DFT-60+r/xr | |||
before r/ DFT+r3/r_i | |||
@@ -498,6 +499,7 @@ phoneme w/ // used for [w] when not before a vowel | |||
vowelout len=50 | |||
length 100 | |||
formants w/w_ | |||
after (i) w/iw_ | |||
lengthmod 7 | |||
beforevowel w | |||
endphoneme | |||
@@ -1224,6 +1226,9 @@ include ph_tamil | |||
phonemetable hu base | |||
include ph_hungarian | |||
phonemetable lv base | |||
include ph_latvian | |||
phonemetable nl base | |||
include ph_dutch | |||
@@ -1261,6 +1266,9 @@ include ph_spanish | |||
phonemetable es_la es | |||
include ph_spanish_la | |||
phonemetable ca es | |||
include ph_catalan | |||
phonemetable pt base2 | |||
include ph_pt_brazil | |||
@@ -1333,11 +1341,9 @@ include ph_bengali | |||
phonemetable ne hi | |||
include ph_nepali | |||
phonemetable lv base | |||
include ph_latvian | |||
phonemetable hy base | |||
include ph_armenian_west | |||
phonemetable om base | |||
include ph_oromo | |||
@@ -264,6 +264,7 @@ static keywtab_t keywords[] = { | |||
{"long", 0x2000000+phLONG}, | |||
{"brkafter", 0x2000000+phBRKAFTER}, | |||
{"nonsyllabic",0x2000000+phNONSYLLABIC}, | |||
{"lengthenstop",0x2000000+phLENGTHENSTOP}, | |||
// voiced / unvoiced | |||
{"vcd", 0x2000000+phVOICED}, | |||
@@ -602,7 +603,7 @@ int Compile::LoadSpect(const char *path, int control) | |||
float total; | |||
float pkheight; | |||
int marker1_set=0; | |||
int frame_vowelbreak=NULL; | |||
int frame_vowelbreak=0; | |||
SpectFrame *fr; | |||
wxString path_sep = _T("/"); | |||
@@ -50,6 +50,7 @@ static int debug_flag = 0; | |||
int hash_counts[N_HASH_DICT]; | |||
char *hash_chains[N_HASH_DICT]; | |||
char letterGroupsDefined[N_LETTER_GROUPS]; | |||
MNEM_TAB mnem_flags[] = { | |||
// these in the first group put a value in bits0-3 of dictionary_flags | |||
@@ -199,6 +200,10 @@ static char nullstring[] = {0}; | |||
text_not_phonemes = 0; | |||
phonetic = word = nullstring; | |||
if(memcmp(linebuf,"_-",2)==0) | |||
{ | |||
step=1; // TEST | |||
} | |||
p = linebuf; | |||
// while(isspace2(*p)) p++; | |||
@@ -313,7 +318,7 @@ static char nullstring[] = {0}; | |||
break; | |||
case 1: | |||
if(c == '-') | |||
if((c == '-') && (word[0] != '_')) | |||
{ | |||
flag_codes[n_flag_codes++] = BITNUM_FLAG_HYPHENATED; | |||
c = ' '; | |||
@@ -798,10 +803,16 @@ void copy_rule_string(char *string, int &state) | |||
c = *p++ - '0'; | |||
value = *p++ - '0'; | |||
c = c * 10 + value; | |||
if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS)) | |||
if((value < 0) || (value > 9)) | |||
{ | |||
c = 0; | |||
fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | |||
fprintf(f_log,"%5d: Expected 2 digits after 'L'\n",linenum); | |||
error_count++; | |||
} | |||
else | |||
if((c <= 0) || (c >= N_LETTER_GROUPS) || (letterGroupsDefined[(int)c] == 0)) | |||
{ | |||
fprintf(f_log,"%5d: Letter group L%.2d not defined\n",linenum,c); | |||
error_count++; | |||
} | |||
c += 'A'; | |||
@@ -1249,18 +1260,25 @@ static int compile_lettergroup(char *input, FILE *f_out) | |||
p = input; | |||
if(!isdigit(p[0]) || !isdigit(p[1])) | |||
{ | |||
fprintf(f_log,"%5d: Expected 2 digits after '.L'\n",linenum); | |||
error_count++; | |||
return(1); | |||
} | |||
group = atoi(&p[1]); | |||
group = atoi(&p[0]); | |||
if(group >= N_LETTER_GROUPS) | |||
{ | |||
fprintf(f_log,"%5d: lettergroup out of range (01-%.2d)\n",linenum,N_LETTER_GROUPS); | |||
error_count++; | |||
return(1); | |||
} | |||
while(!isspace2(*p)) p++; | |||
fputc(RULE_GROUP_START,f_out); | |||
fputc(RULE_LETTERGP2,f_out); | |||
fputc(group + 'A', f_out); | |||
letterGroupsDefined[group] = 1; | |||
for(;;) | |||
{ | |||
@@ -1346,11 +1364,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
if(memcmp(buf,".L",2)==0) | |||
{ | |||
if(compile_lettergroup(&buf[2], f_out) != 0) | |||
{ | |||
fprintf(f_log,"%5d: Bad lettergroup\n",linenum); | |||
error_count++; | |||
} | |||
compile_lettergroup(&buf[2], f_out); | |||
continue; | |||
} | |||
@@ -1509,6 +1523,8 @@ int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, cha | |||
char path[sizeof(path_home)+40]; // path_dsource+20 | |||
error_count = 0; | |||
memset(letterGroupsDefined,0,sizeof(letterGroupsDefined)); | |||
debug_flag = flags & 1; | |||
if(dsource == NULL) |
@@ -686,6 +686,8 @@ int Translator::IsLetterGroup(char *word, int group, int pre) | |||
char *w; | |||
p = letterGroups[group]; | |||
if(p == NULL) | |||
return(0); | |||
while(*p != RULE_GROUP_END) | |||
{ | |||
@@ -1012,7 +1014,9 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
int unstressed_word = 0; | |||
char *max_output; | |||
int final_ph; | |||
int final_ph2; | |||
int mnem; | |||
int mnem2; | |||
int post_tonic; | |||
int opt_length; | |||
int done; | |||
@@ -1043,6 +1047,7 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
} | |||
if(ix == 0) return; | |||
final_ph = phonetic[ix-1]; | |||
final_ph2 = phonetic[ix-2]; | |||
max_output = output + (N_WORD_PHONEMES-3); /* check for overrun */ | |||
@@ -1131,7 +1136,14 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
else | |||
{ | |||
mnem = phoneme_tab[final_ph]->mnemonic; | |||
if((mnem != 'n') && (mnem != 's')) | |||
mnem2 = phoneme_tab[final_ph2]->mnemonic; | |||
if((mnem == 's') && (mnem2 == 'n')) | |||
{ | |||
// -ns stress remains on penultimate syllable | |||
} | |||
else | |||
if(((mnem != 'n') && (mnem != 's')) || (phoneme_tab[final_ph2]->type != phVOWEL)) | |||
{ | |||
stressed_syllable = vowel_count - 1; | |||
} |
@@ -756,6 +756,7 @@ int Translator::TranslateRoman(char *word, char *ph_out) | |||
int subtract; | |||
int repeat = 0; | |||
unsigned int flags; | |||
char ph_roman[30]; | |||
char number_chars[N_WORD_BYTES]; | |||
static const char *roman_numbers = "ixcmvld"; | |||
@@ -806,11 +807,20 @@ int Translator::TranslateRoman(char *word, char *ph_out) | |||
if(acc > langopts.max_roman) | |||
return(0); | |||
Lookup("_roman",ph_out); // precede by "roman" if _rom is defined in *_list | |||
p = &ph_out[strlen(ph_out)]; | |||
Lookup("_roman",ph_roman); // precede by "roman" if _rom is defined in *_list | |||
p = &ph_out[0]; | |||
if((langopts.numbers & NUM_ROMAN_AFTER) == 0) | |||
{ | |||
strcpy(ph_out,ph_roman); | |||
p = &ph_out[strlen(ph_out)]; | |||
} | |||
sprintf(number_chars," %d ",acc); | |||
TranslateNumber(&number_chars[1],p,&flags,0); | |||
if(langopts.numbers & NUM_ROMAN_AFTER) | |||
strcat(ph_out,ph_roman); | |||
return(1); | |||
} // end of TranslateRoman | |||
@@ -1236,24 +1246,6 @@ int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, | |||
{ | |||
if((thousandplex > 0) && (value < 1000)) | |||
{ | |||
if(langopts.numbers2 & 0x100) | |||
{ | |||
if((thousandplex == 1) && (value >= 100)) | |||
{ | |||
// special word for 100,000's | |||
char ph_buf3[20]; | |||
sprintf(string,"_%dL",value / 100); | |||
if(Lookup(string,ph_buf2) == 0) | |||
{ | |||
LookupNum2(value/100,0,ph_buf2); | |||
Lookup("_0L",ph_buf3); | |||
strcat(ph_buf2,ph_buf3); | |||
} | |||
value %= 100; | |||
if(value == 0) | |||
suppress_null = 1; | |||
} | |||
} | |||
if((suppress_null == 0) && (LookupThousands(value,thousandplex,ph_append))) | |||
{ | |||
// found an exact match for N thousand |
@@ -56,8 +56,9 @@ | |||
#define phBEFORENOTVOWEL2 0x1000 | |||
#define phSWITCHVOICING 0x0800 | |||
#define phNONSYLLABIC 0x100000 // don't count this vowel as a syllable when finding the stress position | |||
#define phLONG 0x200000 | |||
#define phNONSYLLABIC 0x100000 // don't count this vowel as a syllable when finding the stress position | |||
#define phLONG 0x200000 | |||
#define phLENGTHENSTOP 0x400000 // make the pre-pause slightly longer | |||
// fixed phoneme code numbers, these can be used from the program code | |||
#define phonCONTROL 1 |
@@ -557,7 +557,11 @@ void Translator::MakePhonemeList(int post_pause, int start_sentence) | |||
{ | |||
if((x = (langopts.word_gap & 0x7)) != 0) | |||
{ | |||
insert_ph = pause_phonemes[x]; | |||
if((x > 1) || ((insert_ph != phonPAUSE_SHORT) && (insert_ph != phonPAUSE_NOLINK))) | |||
{ | |||
// don't reduce the pause | |||
insert_ph = pause_phonemes[x]; | |||
} | |||
} | |||
if(option_wordgap > 0) | |||
{ |
@@ -320,6 +320,9 @@ void Translator::CalcLengths() | |||
if((langopts.word_gap & 0x10) && (p->newword)) | |||
p->prepause = 60; | |||
if(p->ph->phflags & phLENGTHENSTOP) | |||
p->prepause += 30; | |||
if(p->synthflags & SFLAG_LENGTHEN) | |||
p->prepause += langopts.long_stop; | |||
break; | |||
@@ -487,9 +490,9 @@ void Translator::CalcLengths() | |||
} | |||
// calc length modifier | |||
if(next->ph->code == phonPAUSE_VSHORT) | |||
if((next->ph->code == phonPAUSE_VSHORT) && (next2->type == phPAUSE)) | |||
{ | |||
// ignore very short pause | |||
// if PAUSE_VSHORT is followed by a pause, then use that | |||
next = next2; | |||
next2 = next3; | |||
next3 = &phoneme_list[ix+4]; |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.39.22 10.Nov.08"; | |||
const char *version_string = "1.39.26 15.Nov.08"; | |||
const int version_phdata = 0x013900; | |||
int option_device_number = -1; |
@@ -275,7 +275,7 @@ Translator *SelectTranslator(const char *name) | |||
case L('e','o'): | |||
{ | |||
static const short stress_lengths_eo[8] = {145, 145, 200, 170, 0, 0, 320, 340}; | |||
static const short stress_lengths_eo[8] = {145, 145, 230, 170, 0, 0, 360, 370}; | |||
static const unsigned char stress_amps_eo[] = {16,14, 20,20, 20,24, 24,22 }; | |||
static const wchar_t eo_char_apostrophe[2] = {'l',0}; | |||
@@ -286,6 +286,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->char_plus_apostrophe = eo_char_apostrophe; | |||
tr->langopts.word_gap = 1; | |||
tr->langopts.vowel_pause = 2; | |||
tr->langopts.stress_rule = 2; | |||
tr->langopts.stress_flags = 0x6 | 0x10; | |||
tr->langopts.unstressed_wd1 = 3; | |||
@@ -296,9 +297,12 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('e','s'): // Spanish | |||
case L('c','a'): // Catalan | |||
{ | |||
static const short stress_lengths_es[8] = {170, 200, 180, 180, 0, 0, 220, 250}; | |||
static const short stress_lengths_es[8] = {180, 210, 190, 190, 0, 0, 230, 260}; | |||
// static const short stress_lengths_es[8] = {170, 200, 180, 180, 0, 0, 220, 250}; | |||
static const unsigned char stress_amps_es[8] = {16,12, 18,18, 20,20, 20,20 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||
static const wchar_t ca_punct_within_word[] = {'\'',0xb7,0}; // ca: allow middle-dot within word | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_es,stress_amps_es); | |||
@@ -313,7 +317,13 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.unstressed_wd2 = 2; | |||
tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels | |||
tr->langopts.numbers = 0x529 + NUM_ROMAN; | |||
tr->langopts.numbers = 0x529 + NUM_ROMAN + NUM_ROMAN_AFTER; | |||
if(name2 == L('c','a')) | |||
{ | |||
tr->punct_within_word = ca_punct_within_word; | |||
tr->langopts.stress_flags = 0x200 | 0x6 | 0x30; // stress last syllable unless word ends with a vowel | |||
} | |||
} | |||
break; | |||
@@ -369,7 +379,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable | |||
tr->langopts.numbers = 0x011; | |||
tr->langopts.numbers2 = 0x100; | |||
tr->langopts.numbers2 = NUM2_100000; | |||
tr->letter_bits_offset = OFFSET_DEVANAGARI; | |||
SetIndicLetters(tr); | |||
} | |||
@@ -515,12 +525,13 @@ SetLengthMods(tr,3); // all equal | |||
case L_jbo: // Lojban | |||
{ | |||
static const short stress_lengths_jbo[8] = {185,170, 200,200, 0,0, 290,300}; | |||
static const short stress_lengths_jbo[8] = {145,145, 170,160, 0,0, 330,350}; | |||
static const wchar_t jbo_punct_within_word[] = {'.',',','\'',0x2c8,0}; // allow period and comma within a word, also stress marker (from LOPT_SYLLABLE_CAPS) | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_jbo,NULL); | |||
tr->langopts.stress_rule = 2; | |||
tr->langopts.vowel_pause = 0x20c; // pause before a word which starts with a vowel, or after a word which ends in a consonant | |||
// tr->langopts.word_gap = 1; | |||
tr->punct_within_word = jbo_punct_within_word; | |||
tr->langopts.param[LOPT_SYLLABLE_CAPS] = 1; // capitals indicate stressed syllables | |||
SetLetterVowel(tr,'y'); | |||
@@ -577,8 +588,10 @@ SetLengthMods(tr,3); // all equal | |||
case L('l','v'): // latvian | |||
{ | |||
static const unsigned char stress_amps_lv[8] = {17,14, 20,20, 20,22, 22,22 }; | |||
static const short stress_lengths_lv[8] = {180,140, 210,210, 0,0, 210,210}; | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_sk,stress_amps_sk); | |||
SetupTranslator(tr,stress_lengths_lv,stress_amps_lv); | |||
tr->langopts.stress_rule = 0; | |||
tr->langopts.spelling_stress = 1; | |||
@@ -636,6 +649,17 @@ SetLengthMods(tr,3); // all equal | |||
} | |||
break; | |||
case L('o','m'): | |||
{ | |||
static const unsigned char stress_amps_om[] = {16,16, 20,20, 20,24, 24,22 }; | |||
static const short stress_lengths_om[8] = {200,200, 200,200, 0,0, 200,200}; | |||
tr = new Translator(); | |||
SetupTranslator(tr,stress_lengths_om,stress_amps_om); | |||
tr->langopts.stress_rule = 3; | |||
} | |||
break; | |||
case L('p','l'): // Polish | |||
{ | |||
static const short stress_lengths_pl[8] = {160, 190, 175, 175, 0, 0, 200, 210}; | |||
@@ -780,7 +804,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.stress_flags = 0x6 | 0x10; | |||
tr->langopts.numbers = 0x4e1; | |||
tr->langopts.numbers2 = 0x100; | |||
tr->langopts.numbers2 = NUM2_100000a; | |||
} | |||
break; | |||
@@ -807,13 +831,15 @@ SetLengthMods(tr,3); // all equal | |||
if(name2 == L('k','n')) | |||
{ | |||
tr->letter_bits_offset = OFFSET_KANNADA; | |||
tr->langopts.numbers = 0x1; | |||
tr->langopts.numbers2 = NUM2_100000; | |||
} | |||
tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words | |||
SetIndicLetters(tr); // call this after setting OFFSET_ | |||
} | |||
break; | |||
#ifdef deleted | |||
case L('t','h'): // Thai | |||
{ | |||
static const short stress_lengths_th[8] = {230,150, 230,230, 230,0, 230,250}; | |||
@@ -830,6 +856,7 @@ SetLengthMods(tr,3); // all equal | |||
tr->langopts.word_gap = 0x21; // length of a final vowel is less dependent on the next consonant, don't merge consonant with next word | |||
} | |||
break; | |||
#endif | |||
case L('t','r'): // Turkish | |||
{ |
@@ -2597,10 +2597,14 @@ if((c == '/') && (langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(prev_ou | |||
for(ix=0; ix<word_count; ix++) | |||
{ | |||
int j; | |||
int nx; | |||
int c_temp; | |||
char *pn; | |||
char *pw; | |||
static unsigned int break_numbers1 = 0x49249248; | |||
static unsigned int break_numbers2 = 0x492492a8; // for languages which have numbers for 100,000 and 100,00,000 | |||
static unsigned int break_numbers3 = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000 | |||
unsigned int break_numbers; | |||
char number_buf[80]; | |||
// start speaking at a specified word position in the text? | |||
@@ -2617,24 +2621,76 @@ if((c == '/') && (langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(prev_ou | |||
// digits should have been converted to Latin alphabet ('0' to '9') | |||
word = pw = &sbuf[words[ix].start]; | |||
if(iswdigit(word[0]) && (langopts.numbers2 & NUM2_100000)) | |||
{ | |||
// Languages with 100000 numbers. Remove thousands separators so that we can insert them again later | |||
pn = number_buf; | |||
while(pn < &number_buf[sizeof(number_buf)-3]) | |||
{ | |||
if(iswdigit(*pw)) | |||
{ | |||
*pn++ = *pw++; | |||
} | |||
else | |||
if((*pw == langopts.thousands_sep) && (pw[1] == ' ') && iswdigit(pw[2])) | |||
{ | |||
pw += 2; | |||
ix++; // skip "word" | |||
} | |||
else | |||
{ | |||
nx = pw - word; | |||
memset(word,' ',nx); | |||
nx = pn - number_buf; | |||
memcpy(word,number_buf,nx); | |||
break; | |||
} | |||
} | |||
pw = word; | |||
} | |||
for(n_digits=0; iswdigit(word[n_digits]); n_digits++); // count consecutive digits | |||
if((n_digits > 4) && (word[0] != '0')) | |||
{ | |||
// word is entirely digits, insert commas and break into 3 digit "words" | |||
number_buf[0] = ' '; | |||
pn = &number_buf[1]; | |||
j = n_digits; | |||
nx = n_digits; | |||
if(langopts.numbers2 & NUM2_100000a) | |||
break_numbers = break_numbers3; | |||
else | |||
if(langopts.numbers2 & NUM2_100000) | |||
break_numbers = break_numbers2; | |||
else | |||
break_numbers = break_numbers1; | |||
while(pn < &number_buf[sizeof(number_buf)-3]) | |||
{ | |||
if(!isdigit(c = *pw++) && (c != langopts.decimal_sep)) | |||
break; | |||
*pn++ = c; | |||
if((--j > 0) && (j % 3)==0) | |||
if((--nx > 0) && (break_numbers & (1 << nx))) | |||
{ | |||
if(langopts.thousands_sep != ' ') | |||
{ | |||
*pn++ = langopts.thousands_sep; | |||
} | |||
*pn++ = ' '; | |||
if(break_numbers & (1 << (nx-1))) | |||
{ | |||
// the next group only has 1 digits (i.e. NUM2_10000), make it three | |||
*pn++ = '0'; | |||
*pn++ = '0'; | |||
} | |||
if(break_numbers & (1 << (nx-2))) | |||
{ | |||
// the next group only has 2 digits (i.e. NUM2_10000), make it three | |||
*pn++ = '0'; | |||
} | |||
} | |||
} | |||
pn[0] = ' '; | |||
@@ -2659,10 +2715,10 @@ if((c == '/') && (langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(prev_ou | |||
for(pw = word; *pw != ' ';) | |||
{ | |||
memset(number_buf,' ',9); | |||
j = utf8_in(&c_temp, pw, 0); | |||
memcpy(&number_buf[2],pw,j); | |||
nx = utf8_in(&c_temp, pw, 0); | |||
memcpy(&number_buf[2],pw,nx); | |||
TranslateWord2(&number_buf[2], &words[ix], 0, 0 ); | |||
pw += j; | |||
pw += nx; | |||
} | |||
} | |||
@@ -29,7 +29,7 @@ | |||
#define N_RULE_GROUP2 120 // max num of two-letter rule chains | |||
#define N_HASH_DICT 1024 | |||
#define N_CHARSETS 20 | |||
#define N_LETTER_GROUPS 20 | |||
#define N_LETTER_GROUPS 25 | |||
/* dictionary flags, word 1 */ | |||
@@ -322,9 +322,10 @@ typedef struct { | |||
unsigned char *length_mods; | |||
unsigned char *length_mods0; | |||
#define NUM_ROMAN 0x20000 | |||
#define NUM_ROMAN_UC 0x40000 | |||
#define NUM_NOPAUSE 0x80000 | |||
#define NUM_ROMAN 0x20000 | |||
#define NUM_ROMAN_UC 0x40000 | |||
#define NUM_NOPAUSE 0x80000 | |||
#define NUM_ROMAN_AFTER 0x200000 | |||
// bits0-1=which numbers routine to use. | |||
// bit2= thousands separator must be space | |||
@@ -344,13 +345,17 @@ typedef struct { | |||
// bit18=Roman numbers only if upper case | |||
// bit19=don't add pause after a number | |||
// bit20='and' before hundreds | |||
// bit21= say "roman" after the number, not before | |||
int numbers; | |||
#define NUM2_100000 0x100 // numbers for 100,000 and 10,000,000 | |||
#define NUM2_100000a 0x500 // numbers for 100,000 and 1,000,000 | |||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | |||
// bit6=(LANG=pl) two forms of plural, M or MA | |||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | |||
// bit8=(LANG=sw) special word for 100,000s | |||
// bit8=(LANG=hi) special word for 100,000 and 10,000,000 | |||
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after | |||
// bit10=(LANG=sw) special word for 100,000 and 1,000,000 | |||
int numbers2; | |||
int max_roman; |