Indicate unrecognized letters within a word by clicks. lang-tr: Non-standard lower case conversion for "I". Tone languages: indicate emphasis by increasing the pitch range. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@151 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -153,6 +153,7 @@ portugal $1 | |||
potchefstroom pOtSIfstr'o@m | |||
pretoria prit'o@ria | |||
stellenbosch st%&l@mbOs | |||
swellendam $3 | |||
tunisië $2 | |||
turkye $2 | |||
upington apiNt@n | |||
@@ -203,6 +204,7 @@ cronjé krOnj'e@ | |||
debora d@bo@ra | |||
debussy d@bus'i: | |||
der d@r | |||
derick dErik | |||
deventer d'e@v@nt@r | |||
du $u | |||
(du pisanie) du||pis'A:ni | |||
@@ -319,6 +321,8 @@ shakespeare _^_EN | |||
stephan ste@fan | |||
stephanus st@fA:nWs | |||
strauss straUs | |||
suzanne suz'A:n | |||
suzette suz'Et | |||
tania tanja | |||
telemann te@l@man | |||
terblanche t@rblA:nS | |||
@@ -560,6 +564,7 @@ ekself %&ks'&lf | |||
ekstra Ekstra | |||
elders &ld@rs | |||
elite il'i:t | |||
encore A~NkO:r | |||
enige e@nIx2@ | |||
eone $2 | |||
era e@ra | |||
@@ -575,8 +580,6 @@ finalis $3 | |||
finaliste $3 | |||
fort fOrt | |||
forte fOrt@ | |||
frontaal $2 | |||
frontale $2 | |||
g'n x2In | |||
gaandeweg x2A:nd@v&x2 | |||
@@ -711,6 +714,7 @@ onmag $1 | |||
onmin $1 | |||
oorleg $2 | |||
oorstroom $2 | |||
oorwaai $1 | |||
oplaas $2 | |||
onraad $1 | |||
onrus $1 |
@@ -47,6 +47,7 @@ | |||
ara (bi %arA: // fix 1st a sound and stress: Arabië/Arabies/-e | |||
ara (bier %ar%a // Stress and a sounds: Arabier/-e | |||
C) are (CA A:r@ // e sound: amptenaredom/blaredak/garebiltong/haredos/jarelange | |||
amarula %am%arul%a // fix stress and a sounds: amarula and compounds | |||
a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | |||
a (ristokr %a // shorten 1st a: aristokrasie/-kraat/-krate/-kraties | |||
@C) as (_ as // shorten final a of words ending in -as: rigtingvas/jonkmanskas, etc. | |||
@@ -410,6 +411,7 @@ | |||
bota (nie_N b%o@t%a // stress on 3rd slb.: botanie | |||
_) bowe b''o@v@ // fix e sound in compounds: boweaards/bowemaans/bowetoon | |||
_) breda br@dA: // move default stress: (van )Breda/Bredasdorp | |||
bril (jant br@l // move default stress: briljant/-e/-heid/-ste | |||
bru (ta br%y // move default stress: brutaal/brutale/brutaliteit | |||
buffe (t b%WfE // fix stress and e sound: buffet/-te/buffetete, etc. | |||
buiten (ge b'Yyt@n // buitengewoon/s/buitengewone/buitengemeen | |||
@@ -615,9 +617,11 @@ | |||
di (ploma d@ // move default stress: diploma/-s and compounds/diplomaat | |||
direk d%irEk // direk/-sie/-te/-theid/indirek... | |||
dirig (e d@r@x2 // fix stress and i sounds: dirigeer/dirigent and compounds | |||
_) dis (inP3 d@s // disintegrasie/disinvestering and similar | |||
dis (koers d@s // move default stress: diskoers/-e | |||
diskre (s d@skrE // fix stress and e sound: diskresie/diskresionêre | |||
_) dis (kre d@s // move default stress: diskreet/-heid/-ste/diskrete | |||
_) dis (oP3 d@s // disorganisasie/disoriënteer | |||
divide (nd d@v@dE //dividende etc. | |||
d (_N t // | |||
dj c // palatal stop | |||
@@ -829,6 +833,7 @@ | |||
gel) eer e@r // words containing geleer take default stress: regsgeleerde | |||
@k) eer e@r // words containing keer take default stress, except for: | |||
ik) eer 'e@r // abdikeer/kommunikeer, etc. | |||
oriënt) eer e@r // default stress: beroepsgeoriënteerd and similar | |||
Cm) eer e@r // words containing meer preceded by consonant take default stress: bergmeer/strandmeer, etc. | |||
rm) eer 'e@r // but not words like: alarmeer/gereformeerd/transformeer | |||
sf) eer e@r // sfeer at end of compounds does not take stress: atmosfeer, etc. | |||
@@ -1063,7 +1068,7 @@ | |||
C) ensies 'Ensis // forensiese | |||
@) enswaar (dig @nsv'A:r // noemenswaardige/bejammerenswaardig | |||
@C) ent (K 'Ent // orent/student/presidentsrede | |||
@C) ent (e_ 'Ent // plurals of some of the above### | |||
@C) ent (e_ 'Ent // plurals of some of the above | |||
en) ent (_ Ent // restore default stress: koppenent/voetenent | |||
pr) ent (+ Ent // default stress for compounds of prent | |||
@s) ent (rA Ent // winkelsentrum/studentesentrum, etc. | |||
@@ -1161,6 +1166,8 @@ | |||
feite feIt@ // fix 2nd e sound: feitebepaling/-bevinding/-bron/-fout/-kennis/-lik | |||
@) fel (end f@l // betwyfelend/skuifelend/weifelend | |||
@) fel (ing f@l // betwyfeling/skuifeling/weifeling | |||
fenom (e f@n%Om // fix stress and vowel sounds: fenomeen/-ene | |||
fenomen (A@ f@n%Om@n // fix stress and vowel sounds: fenomenaal/-ale/fenomeneel/-ele | |||
fero (mo f%Er%u // fix stress and vowel sounds: feromoon/feromone | |||
fer (weel f@r // fix stress and 1st e sound: ferweel and many compounds | |||
fessor fEs@r // fix last o sound: professor and compounds like universiteitsprofessor | |||
@@ -1192,6 +1199,7 @@ | |||
foto fo@tu | |||
fo (togra fo@ // move default stress: fotograaf and similar | |||
_) fran (CisCA fr%an // fix stress and a sound: Fransiskus/Franciscus/Francisca | |||
front (A fr%Ont // fix stress and e sound: konfronterend/-e/konfrontering | |||
fru (str fr%W // move default stress: frustrerend/-e | |||
fung (e f%WNx2 // fungeer/fungerende/fungering | |||
fungus fWNgWs_ // fix g sound: fungus and 2nd us sound in compounds: fungusinfeksie | |||
@@ -1256,7 +1264,7 @@ | |||
gra (niet x2r%a // move default stress and fix a sound: graniet and compounds | |||
gra (sieu x2r%a // fix stress and shorten a sound: grasieus/-e | |||
_) gras (A x2ras_ // fix stress and a sound: graseter/grasoppervlakte/grasuie | |||
gra (sie x2rA: // but restore default stress and long a: grasie | |||
_) gra (sie x2rA: // but restore default stress and long a: grasie | |||
_) gri (mA x2r@ // grimas/grimeer/-middel and other compounds/grimering | |||
grotere x2ro@t@r@ // fix vowel sounds and stress: grotere/-s | |||
guerrilla g@r'Ila | |||
@@ -1445,6 +1453,7 @@ | |||
_) in (aCemP2 'In_ //inasem/inademing, etc. | |||
self) in (C@ _@n // selfingenome/-nheid/selfinkeer, etc. | |||
_) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | |||
invest (@ @nv%Est // fix stress, v and e sounds: investeer/-erende/-ering | |||
_) iese (C ,is@ // iesegrimmig and derivitives | |||
ieterma (g %it@rm%a // fix stress and a sound: ietermagô | |||
ieus i'Y@s // this ending always takes stress | |||
@@ -1503,7 +1512,6 @@ | |||
_em) i (r @ | |||
ië e@:@- // i followed by "deelteken e" | |||
iën i'En //i deelteken e n | |||
or) iën i;En // default stress: (beroeps)georiënteerd, etc. | |||
iëteit %i@t'eIt // fix stress: (imk)piëteit/variëteit | |||
iee (C i'e@ // distansieer/finansieer/finansieel/prieel | |||
@@ -2133,8 +2141,10 @@ | |||
_) meege (@P5 m'e@x2@ | |||
me (juf m@ // move default stress and shorten e sound | |||
me (laats m@ // fix stress and e sound: melaats/-e/-heid | |||
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melankolie | |||
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | |||
melanch m%El%aNk // stress and 1st e sound: melancholies/melancholiek/-e/melancholikus/-ci | |||
melankolie (_N m%El%aNk%o@li // fix stress and 1st e sound: melankolie | |||
melank m%El%aNk // stress and 1st e sound: melankolies/melankoliek/-e/melankolikus/-ci | |||
mem (bra m%Em // move default stress: membraan/membrane | |||
memo (ran m%Em%u // fix stress and vowel sounds: memorandum/memoranda and compounds | |||
meneer m@n'e@r // meneer/meneertjie | |||
@@ -2328,6 +2338,7 @@ | |||
orgi (de %Orx2@ // move default stress: orgidee/orgideë and compounds | |||
orie o@ri | |||
oriu 'o@riW | |||
oriënt (@ %o@r%ij%Ent // disoriëntering/(beroeps)georiënteerd | |||
_) or (ka %Or // move default stress: orkaan/orkane/orkaanwaarskuwing... | |||
orkes %OrkEs // orkes and compounds | |||
_) os (moC %Os // move default stress: osmose/osmoties/-e | |||
@@ -2598,6 +2609,7 @@ | |||
oor (reed %o@r // move default stress | |||
oorre (dA %o@re@ // move default stress: oorreding/-skrag/-vermoë | |||
oor (rompel %o@r // move default stress | |||
oorsaaklik %o@rsA:kl@k // fix stress: oorsaaklik/-e/-heid | |||
_) oor (skadu %o@r // move default stress: oorskadu/-wing | |||
_) oor (skat %o@r // move default stress: oorskat/-te/-ting | |||
oor (skrei %o@r // stress: oorskrei/-ding snd compounds | |||
@@ -2612,7 +2624,7 @@ | |||
oor (tuig %o@r // oortuig/oortuiging/oortuigende | |||
_) oor (vleuel %o@r // move default stress: oorvleuel/-ing | |||
_) oor (vloedig %o@r // move default stress: oorvloedig/-e vs. oorvloed | |||
_) oor (w o@r' // oorwin/oorweeg/oorweging/oorweldig/end... | |||
_) oor (w %o@r // oorwin/oorweeg/oorweging/oorweldig/end... | |||
oor (wig 'o@r // stress | |||
oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | |||
@@ -2649,6 +2661,7 @@ | |||
parme (saan p%arm@ // move default stress: parmesaan(kaas) and compounds | |||
paro (di p%ar%u // fix stress and vowel sounds: parodie and compounds | |||
parti (tu p%art@ // fix stress and i sound: partituur/partiture and compounds | |||
pa (stel p%a // move default stress: pastel(kleur) and similar compounds | |||
pa (tat p@ // patat/warmpatat/wurgpatat | |||
patie (KN p%at'i //words ending in patie has stress at end + a sound/simpatiek | |||
@) paties p'A:tis // fix stress and a sound: simpaties/-e | |||
@@ -3387,6 +3400,7 @@ | |||
teleks t&lEks // fix stress and vowel sounds: teleks and compounds | |||
_) teler te@l@r // fix stress and vowel sounds: teler/-s/-y/-svereniging, etc. | |||
@C) te (nk t& ++ //tenk and friends | |||
_) te (no t@ //tenoor, tenore etc. | |||
@C) te (C t@ /// | |||
s) te (king te@ //ontsteking in compounds | |||
te (kkie+ tE // fix e sound: tekkie/-s/staptekkies and similar | |||
@@ -3480,7 +3494,6 @@ | |||
ek) terende (_ t'e@r@nd@ //reflekterende respekterende | |||
k) te (ring t'e@ //selektering, reflektering etc. | |||
n) terende (_ t'e@r@nd@ //konfronterende mensonterende | |||
pe) terende (_ t'e@r@nd@ //kompeterende | |||
i) terende (_ t'e@r@nd@ //presipiterende moniterende | |||
ul) terende (_ t'e@r@nd@ //konsulterende resulterende |
@@ -411,3 +411,13 @@ u U y | |||
k l m n p q r R | |||
s S t tS v w x z | |||
Z | |||
Dictionary rw_dict | |||
a e i o u | |||
* b B c d dZ f g | |||
h j J k l m n N | |||
n^ p q s S S; t tS | |||
w x z |
@@ -5197,7 +5197,6 @@ | |||
__) - (_D m'aIn@s | |||
A_) - (_D _ | |||
C_) - (_D _ | |||
--) - | |||
+ plVs | |||
_) ++ (_ plVspl'Vs |
@@ -46,6 +46,7 @@ | |||
_) e (_ e | |||
e e | |||
e (CK E | |||
e (Ch e | |||
ei eI | |||
ey (K eI | |||
ey (_ 'eI | |||
@@ -118,6 +119,7 @@ | |||
_) o (_ o | |||
o o | |||
o (CK O | |||
o (Ch o | |||
oi oI | |||
oy (K oI | |||
oy (_ 'oI | |||
@@ -136,7 +138,7 @@ | |||
.group r | |||
_) r (_ E*e | |||
_) r (_ E**e | |||
r R | |||
_) r R | |||
A) r (A ** |
@@ -321,6 +321,5 @@ | |||
.group | |||
$ dolar | |||
' (Pb // split a word at ' and translate the first part separately. | |||
' (Pb % // split a word at ' and translate the first part separately. | |||
@@ -3440,7 +3440,7 @@ $textmode | |||
举 ju3 | |||
乎 hu1 | |||
乐 le4 | |||
了 liao3 | |||
了 le5 // more common than liao3 | |||
事 shi4 | |||
亮 liang4 | |||
亲 qin1 | |||
@@ -3728,7 +3728,7 @@ $textmode | |||
當 dang1 | |||
發 fa1 | |||
白 bai2 | |||
的 di4 | |||
的 de5 // more common than di4 | |||
盖 gai4 | |||
盘 pan2 | |||
盛 sheng4 |
@@ -78,13 +78,16 @@ language). | |||
.group b | |||
b p | |||
@) b (K _^_EN | |||
.group c | |||
c tsh | |||
ch ts.h | |||
@) c (K _^_EN | |||
.group d | |||
d t | |||
@) d (K _^_EN | |||
.group e | |||
e o- | |||
@@ -110,12 +113,15 @@ language). | |||
.group f | |||
f f | |||
@) f (K _^_EN | |||
.group g | |||
g k | |||
@) g (K _^_EN | |||
.group h | |||
h x | |||
@) h (K _^_EN | |||
.group i | |||
i i //i in ing | |||
@@ -135,15 +141,19 @@ language). | |||
.group j | |||
j tS; | |||
@) j (K _^_EN | |||
.group k | |||
k kh | |||
@) k (K _^_EN | |||
.group l | |||
l l | |||
@) l (K _^_EN | |||
.group m | |||
m m | |||
@) m (K _^_EN | |||
.group n | |||
n n | |||
@@ -166,9 +176,11 @@ language). | |||
.group p | |||
p ph | |||
@) p (K _^_EN | |||
.group q | |||
q tS;h | |||
@) q (K _^_EN | |||
.group r | |||
r z. | |||
@@ -177,9 +189,11 @@ language). | |||
.group s | |||
s s | |||
sh s. | |||
@) s (K _^_EN | |||
.group t | |||
t th | |||
@) t (K _^_EN | |||
.group u | |||
u u | |||
@@ -225,6 +239,7 @@ language). | |||
n) ve yE //üe | |||
.group w | |||
// @) w (K _^_EN | |||
wa wA //wa wan wang | |||
wai wai | |||
wa1i wai55 | |||
@@ -242,6 +257,7 @@ language). | |||
.group x | |||
x S; | |||
@) x (K _^_EN | |||
.group y | |||
y j //before a o e i | |||
@@ -250,10 +266,12 @@ language). | |||
y (uDn ; | |||
yo (DngK yu | |||
y (K i // foreign words | |||
@) y (K _^_EN | |||
.group z | |||
z ts | |||
zh ts. | |||
@) z (K _^_EN | |||
//tone number | |||
.group |
@@ -1,4 +1,4 @@ | |||
49 phoneme tables | |||
50 phoneme tables | |||
new total | |||
base 99 99 | |||
base2 24 118 | |||
@@ -49,6 +49,7 @@ | |||
tr 18 123 | |||
ku 13 120 | |||
ja 7 104 | |||
rw 12 123 | |||
Data file Used by | |||
b/b [b] base | |||
@@ -1233,6 +1234,8 @@ vowel/a# [a/] base2 | |||
[&] sr | |||
[&] pt | |||
[&/] pt | |||
[a#] rw | |||
[a/] rw | |||
vowel/a_2 [a] base2 | |||
[a] en_wi | |||
[A:] en_wi | |||
@@ -1243,6 +1246,7 @@ vowel/a_2 [a] base2 | |||
[a] pt_pt | |||
[a] ro | |||
[a:] vi | |||
[a] rw | |||
vowel/a#_2 [&] hr | |||
[a2] sv | |||
[&] is | |||
@@ -1329,6 +1333,8 @@ vowel/e [e] base2 | |||
[e] th | |||
[e:] th | |||
[e] id | |||
[e] rw | |||
[e#] rw | |||
vowel/e# [I] en_sc | |||
[I2] en_sc | |||
vowel/e_2 [eI] en_sc | |||
@@ -1425,6 +1431,8 @@ vowel/i [i] base2 | |||
[i] th | |||
[i:] th | |||
[i] ja | |||
[i] rw | |||
[i#] rw | |||
vowel/i# [i] en_us | |||
[i] en_rp | |||
[y:] cy | |||
@@ -1525,6 +1533,8 @@ vowel/o [o] base2 | |||
[o] th | |||
[o:] th | |||
[o] id | |||
[o] rw | |||
[o#] rw | |||
vowel/o_2 [o:] cy | |||
[o:] hi | |||
[o:] hu | |||
@@ -1661,6 +1671,8 @@ vowel/u_bck [u] base2 | |||
[u] is | |||
[u] zhy | |||
[u] zh | |||
[u] rw | |||
[u#] rw | |||
vowel/u_bck2 [u:] la | |||
vowel/u_fnt [u:] en_rp | |||
vowel/uu [U] en |
@@ -1287,3 +1287,6 @@ include ph_kurdish | |||
phonemetable ja base | |||
include ph_japanese | |||
phonemetable rw base2 | |||
include ph_kinyarwanda |
@@ -1201,7 +1201,7 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int | |||
if(stressed_syllable == 0) | |||
{ | |||
stressed_syllable = vowel_count - 1; | |||
for(ix=2; ix < vowel_count; ix++) | |||
for(ix=1; ix < vowel_count; ix++) | |||
{ | |||
if(vowel_stress[ix] == 1) | |||
{ | |||
@@ -2303,7 +2303,7 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char | |||
while(((c = *p) != ' ') && (c != 0)) | |||
{ | |||
if(IsAlpha(wc)) | |||
any_alpha = wc; | |||
any_alpha++; | |||
wc_prev = wc; | |||
wc_bytes = utf8_in(&wc,p,0); | |||
@@ -2420,6 +2420,18 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char | |||
} | |||
} | |||
} | |||
if(match1.points == 0) | |||
{ | |||
static const char str_unknown[4] = {phonCAPITAL,phonCAPITAL,phonCAPITAL,0}; | |||
if(((any_alpha > 0) || (p[wc_bytes-1] > ' ')) && !iswpunct(wc)) | |||
{ | |||
// an unrecognised character in a word, indicate with clicks | |||
match1.phonemes = str_unknown; | |||
match1.points = 1; | |||
} | |||
p += (wc_bytes-1); | |||
} | |||
} | |||
} | |||
@@ -99,15 +99,12 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control) | |||
int n_bytes; | |||
int letter; | |||
int len; | |||
int phoneme_tab_en; | |||
char *p2; | |||
char *pbuf; | |||
char capital[20]; | |||
char ph_buf[60]; | |||
char ph_buf2[60]; | |||
char ph_buf_en[60]; | |||
char hexbuf[6]; | |||
static char single_letter[10] = {0,0}; | |||
ph_buf[0] = 0; | |||
capital[0] = 0; | |||
@@ -127,7 +124,7 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control) | |||
Lookup("_cap",capital); | |||
} | |||
} | |||
letter = towlower(letter); | |||
letter = towlower2(letter); | |||
LookupLetter(letter, word[n_bytes], ph_buf); | |||
@@ -139,13 +136,9 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control) | |||
if(ph_buf[0] == 0) | |||
{ | |||
phoneme_tab_en = SetTranslator2("en"); | |||
translator2->LookupLetter(letter, word[n_bytes], ph_buf_en); | |||
if(ph_buf_en[0] != 0) | |||
{ | |||
sprintf(ph_buf,"%c%c%s%c%c",phonSWITCH2, phoneme_tab_en + phonTOP, ph_buf_en, phonSWITCH2, voice->phoneme_tab_ix + phonTOP); | |||
} | |||
SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table | |||
// ?? speak as English ?? | |||
sprintf(phonemes,"%c",phonSWITCH); | |||
return(0); | |||
} | |||
if(ph_buf[0] == 0) |
@@ -87,8 +87,6 @@ | |||
#define phonT_REDUCED 25 | |||
#define phonSTRESS_TONIC 26 | |||
#define phonPAUSE_CLAUSE 27 | |||
#define phonSWITCH2 28 | |||
#define phonTOP 28 // highest pre-defined phoneme number | |||
extern const unsigned char pause_phonemes[8]; // 0, vshort, short, pause, long, glottalstop | |||
@@ -299,6 +299,18 @@ float wcstod(const wchar_t *str, wchar_t **tailptr) | |||
} | |||
#endif | |||
int towlower2(unsigned int c) | |||
{ | |||
// check for non-standard upper to lower case conversions | |||
if(c == 'I') | |||
{ | |||
if(translator->translator_name == L('t','r')) | |||
{ | |||
c = 0x131; // I -> ı | |||
} | |||
} | |||
return(towlower(c)); | |||
} | |||
static void GetC_unget(int c) | |||
{//========================== | |||
@@ -1456,7 +1468,19 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int &outix, int n_outb | |||
{ | |||
value = attrlookup(attr1,mnem_emphasis); | |||
} | |||
sp->parameter[espeakEMPHASIS] = value; | |||
if(translator->langopts.tone_language == 1) | |||
{ | |||
static unsigned char emphasis_to_pitch_range[] = {50,50,40,70,90,90}; | |||
static unsigned char emphasis_to_volume[] = {100,100,70,110,140,140}; | |||
// tone language (eg.Chinese) do emphasis by increasing the pitch range. | |||
sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value]; | |||
sp->parameter[espeakVOLUME] = emphasis_to_volume[value]; | |||
} | |||
else | |||
{ | |||
sp->parameter[espeakEMPHASIS] = value; | |||
} | |||
ProcessParamStack(outbuf, outix); | |||
break; | |||
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.31.17 24.Feb.08"; | |||
const char *version_string = "1.31.18 25.Feb.08"; | |||
const int version_phdata = 0x013105; | |||
int option_device_number = -1; |
@@ -71,9 +71,10 @@ int Translator_English::Unpronouncable(char *word) | |||
This function is language specific. | |||
*/ | |||
unsigned char c; | |||
int c; | |||
int vowel_posn=9; | |||
int index; | |||
int count; | |||
int ix; | |||
int apostrophe=0; | |||
@@ -92,19 +93,26 @@ int Translator_English::Unpronouncable(char *word) | |||
} | |||
index=0; | |||
while(((c = word[index++]) != 0) && !isspace(c)) | |||
count=0; | |||
for(;;) | |||
{ | |||
index += utf8_in(&c,&word[index],0); | |||
count++; | |||
if((c==0) || (c==' ')) | |||
break; | |||
if(IsVowel(c) || (c == 'y')) | |||
{ | |||
vowel_posn = index; | |||
vowel_posn = count; | |||
break; | |||
} | |||
if(c == '\'') | |||
apostrophe = 1; | |||
else | |||
if((c < 'a') || (c > 'z')) | |||
return(0); // letter (not vowel) outside a-z range or apostrophe, abort test | |||
if((c < 'a') || (c > 0x241)) | |||
return(0); // letter (not vowel) outside Latin character range or apostrophe, abort test | |||
} | |||
if((vowel_posn > 5) || ((word[0]!='s') && (vowel_posn > 4))) | |||
return(1); // no vowel, or no vowel in first four letters |
@@ -575,6 +575,15 @@ SetLengthMods(tr,3); // all equal | |||
tr = new Translator_Russian(); | |||
break; | |||
case L('r','w'): // Kiryarwanda | |||
{ | |||
tr = new Translator(); | |||
tr->langopts.stress_rule = 2; | |||
tr->langopts.stress_flags = 0x16; | |||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | |||
} | |||
break; | |||
case L('s','k'): // Slovak | |||
case L('c','s'): // Czech | |||
{ |
@@ -788,7 +788,7 @@ if((wmark > 0) && (wmark < 8)) | |||
return(0); | |||
} | |||
p = &wordx[word_length-3]; | |||
p = &wordx[word_length-3]; // this looks wrong. Doesn't consider multi-byte chars. | |||
if(memcmp(p,"'s ",3) == 0) | |||
{ | |||
// remove a 's suffix and pronounce this separately (not as an individual letter) | |||
@@ -819,6 +819,30 @@ if((wmark > 0) && (wmark < 8)) | |||
return(0); | |||
} | |||
if((phonemes[0] == 0) && (end_phonemes[0] == 0)) | |||
{ | |||
int wc; | |||
// characters not recognised, speak them individually | |||
utf8_in(&wc, wordx, 0); | |||
if(!iswpunct(wc)) | |||
{ | |||
posn = 0; | |||
while(*wordx != ' ') | |||
{ | |||
wordx += TranslateLetter(wordx, phonemes, 4); | |||
posn++; | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
} | |||
SetSpellingStress(phonemes,spell_word,posn); | |||
} | |||
} | |||
c_temp = wordx[-1]; | |||
found = 0; | |||
@@ -871,7 +895,7 @@ if((wmark > 0) && (wmark < 8)) | |||
prefix_chars[0] = 0; | |||
n_chars = prefix_type & 0x3f; | |||
for(ix=0; ix < n_chars; ix++) // num. of characters to remove | |||
for(ix=0; ix < n_chars; ix++) // num. of bytes to remove | |||
{ | |||
prefix_chars[pfix++] = *wordx++; | |||
@@ -879,11 +903,6 @@ if((wmark > 0) && (wmark < 8)) | |||
{ | |||
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character | |||
} | |||
while((*wordx & 0xc0) == 0x80) | |||
{ | |||
prefix_chars[pfix++] = *wordx++; // for multibyte characters | |||
} | |||
} | |||
prefix_chars[pfix] = 0; | |||
c_temp = wordx[-1]; | |||
@@ -894,15 +913,22 @@ if((wmark > 0) && (wmark < 8)) | |||
{ | |||
// retranslate the prefix part | |||
char *wordpf; | |||
char prefix_phonemes2[12]; | |||
strncpy0(prefix_phonemes2,end_phonemes,sizeof(prefix_phonemes2)); | |||
wordpf = &prefix_chars[1]; | |||
found = LookupDictList(&wordpf, phonemes, dictionary_flags, SUFX_P, wtab); // without prefix | |||
if(found == 0) | |||
{ | |||
end_type = TranslateRules(wordpf, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags[0]); | |||
strcat(prefix_phonemes, phonemes); | |||
sprintf(prefix_phonemes,"%s%s%s",phonemes,end_phonemes,prefix_phonemes2); | |||
} | |||
prefix_flags = 1; | |||
} | |||
else | |||
{ | |||
strcat(prefix_phonemes,end_phonemes); | |||
} | |||
strcat(prefix_phonemes,end_phonemes); | |||
end_phonemes[0] = 0; | |||
end_type = 0; | |||
@@ -1539,12 +1565,6 @@ int Translator::TranslateWord2(char *word, WORD_TAB *wtab, int pre_pause, int ne | |||
srcix = source_ix+1; | |||
} | |||
else | |||
if(ph_code == phonSWITCH2) | |||
{ | |||
SetPlist2(&ph_list2[n_ph_list2],phonSWITCH); | |||
ph_list2[n_ph_list2++].tone_number = *p++ - phonTOP; // phoneme table number (phonTOP is added to avoid confusion with special phoneme numbers) | |||
} | |||
else | |||
if(ph_code == phonX1) | |||
{ | |||
// a language specific action | |||
@@ -1971,7 +1991,7 @@ void *Translator::TranslateClause(FILE *f_text, const void *vp_input, int *tone_ | |||
{ | |||
c = ' '; | |||
} | |||
c = towlower(c); | |||
c = towlower2(c); | |||
} | |||
if(phoneme_mode) | |||
@@ -2109,7 +2129,7 @@ if((c == '/') && (langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(prev_ou | |||
if(iswupper(c)) | |||
{ | |||
c = towlower(c); | |||
c = towlower2(c); | |||
if(langopts.param[LOPT_SYLLABLE_CAPS]) | |||
{ |
@@ -580,6 +580,7 @@ void InitText2(void); | |||
int IsDigit(unsigned int c); | |||
int IsAlpha(unsigned int c); | |||
int isspace2(unsigned int c); | |||
int towlower2(unsigned int c); | |||
void SetVoiceStack(espeak_VOICE *v); | |||
extern FILE *f_trans; // for logging |