Browse Source

[1.24.12] Produce dictsource/dict_phonemes, a list of which phonemes are used by each language's *_rules and *_list files.

Fix crash with -X option when spelling words (acronyms).
Rules files: added special character X meaning "no vowel until the word boundary". Used for lang=no.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@14 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 18 years ago
parent
commit
f65bf2072b

+ 57
- 54
dictsource/en_list View File

@@ -141,52 +141,53 @@ _dpt pOInt
// ABBREVIATIONS
//**************

abc eIbi:s'i:
ac eI'si:
adfs eIdi:Ef'Es
a.k.a aka2_!
agm eIdZi:'Em
ai eI;'aI
api apI2
asap eIEseIpi:
awol eIw0l
cio si:aI'oU
ctrl k0ntr'oUl
dept dI2pA@tm@nt
diy di:aI'waI
eg fO@Egz'aamp@L
etc Et'sEtr@
eur jU@
hmm h@m
(http ://) eItSti:ti:'pi:_
ibm aIbi:Em
ie aIi:_! $pause
i.e aIi:_! $pause
irc aI;A@s'i:
lbs paUndz
ltd lImItId
mc m@k
oem oUi:'Em
ok oU'keI
os oUEs
riscos rIskoUEs
sae EseI'i:
seac si:ak
st s@nt
th T
thu T3: // Thursday
?5 thu TIR // Thursday
ufo ju:Ef'oU
ui ,ju:'aI
uk ju:k'eI
url ju:A@'El
usa ju:Es'eI
wwii dVb@Lju:dVb@Lju:t'u:
xy EkswaI

ii tu: $abbrev // roman numerals
iii Tri: $abbrev
iv fo@ $abbrev
abc $abbrev
ac $abbrev
adfs $abbrev
a.k.a aka2_!
agm $abbrev
ai $abbrev
api $abbrev
asap $abbrev
awol eIw0l
cio $abbrev
ctrl k0ntr'oUl
dept dI2pA@tm@nt
diy $abbrev
edt $abbrev
eg fO@Egz'aamp@L
est $abbrev
etc Et'sEtr@
eur jU@
hmm h@m
(http ://) eItSti:ti:'pi:_
ibm $abbrev
ie aIi:_! $pause
i.e aIi:_! $pause
irc $abbrev
lbs paUndz
ltd lImItId
mc m@k
oem $abbrev
ok $abbrev
os $abbrev
riscos rIskoUEs
sae $abbrev
st s@nt
th T
thu T3: // Thursday
?5 thu TIR // Thursday
ufo $abbrev
ui $abbrev
uk $abbrev
url $abbrev
usa $abbrev
wwii dVb@Lju:dVb@Lju:t'u:
xy $abbrev

ii tu: $abbrev // roman numerals
iii Tri: $abbrev
iv fo@ $abbrev


nd $only
@@ -194,14 +195,14 @@ rd $only
th $only
st $only

mr mIst3 $dot
mrs mIsIz $dot
mr mIst3 $dot
mrs mIsIz $dot
//ms mIz $dot
dr d0kt3 $dot $capital
lt $dot
prof $dot
rev $dot
st $dot
dr d0kt3 $dot $capital
lt $dot
prof $dot
rev $dot
st $dot



@@ -448,6 +449,7 @@ bastion basti@n
batman batman
belisha b@l'i:S@
bely bI2laI
beryl bEr@L
basal beIs@L
best bEst
beta bi:t@
@@ -866,7 +868,6 @@ foretell fo@t'El
forever $2
forewarn $2
formid fO@m'Id // formidable
forte fo@teI
foyer fOIeI
freelance fri:laans
frigate frIg@t
@@ -1327,6 +1328,7 @@ peculiar pI2kju:lI3
pedalo pEd@loU
pejorative p@dZ0r@tIv
penal pi:n@L
?4 penchant p0nS0n
peninsula p@n'InsjUl@
penis pi:nI2s
perfectly p3:fEktlI2
@@ -1672,6 +1674,7 @@ sundae sVndeI
sundial sVndaI@l
suite swi:t $onlys
superb su:p'3:b
superfluous su:p'3:flu:@s
superman su:p3man
supplier s@plaI3
suppose $2

+ 5
- 1
dictsource/en_rules View File

@@ -317,6 +317,7 @@
c) ad (enc eId
ad (eq ad
_) ad (i ad
gl) ad (i ad
adjec adZEk
ad (le eId
ad (junct_ ad
@@ -479,7 +480,7 @@
_) al (i al
_) al (ig a2l
_) ali (Be a2laI
&) al (isC_ @l
&) ali (sC_ @li
&) al (isCic @l
_) al (k al
_) all (@ a2l
@@ -2677,6 +2678,7 @@
pl) ia (nt 'aI@
ill) ia (nt I2@
al) ia (nt I2@
_p) ia (nA i@
iar aI@
@l) iar (_ i@
iara (_ I'A:r@
@@ -3754,6 +3756,7 @@
&) or (ous @
orough Vr@
eff) or (t 3
f) orte (_ 'o@teI
w) or (t_ o@
w) or (r V
w) or (n o@
@@ -4473,6 +4476,7 @@
_) tri (vi trI
tsch tS
_) two tu:
ttu (r t@
@) tur (A tS@r
@) tur (al_ tS=@r
@) tur (y tS@r

+ 2
- 1
dictsource/hr_rules View File

@@ -4,6 +4,7 @@
.group a
a a
aj (K aI
a (r A // don't reduce to [&]

.group b
b b
@@ -70,7 +71,7 @@
o o
ou oU
oj (K oI
o (r 8
&) o (r_ 8

.group p
p p

+ 8
- 6
dictsource/no_list View File

@@ -65,7 +65,7 @@ _9X n'It:i:
_0C h'u-:nd@-*e:d@2
_1C 'Et||h'u-:nd@-*e:d@2
_0M1 t'u-:s@n
_0M1 'Et||t'u-:s@n
_1M1 'Et||t'u-:s@n
_0M2 m'Illi:;,u:n@r
_1M2 'e:n||m'Illi:;,u:n

@@ -84,13 +84,13 @@ ei $u


// pronouns
jeg $u+
jeg jaI $u+
du $u+
han $u+
hun $u+
vi $u+
dere $u+
de $u+
de di: $u+

meg $u+
deg $u+
@@ -104,8 +104,7 @@ denne $u+
dette $u+
disse $u+
den $u+
det $u+
de $u+
det de: $u+


// possessive adjectives
@@ -135,7 +134,7 @@ i $u $brk // in
av $u $brk // of, off, by
bak $pause // behind
etter $u $pause // after
for $u $pause // for
for fOr: $u $pause // for
foran $pause // in front of
fra $u $pause // from
in $u $brk // in
@@ -216,6 +215,9 @@ _å o:
_i i:
i i: $atend



// MAIN WORD DICTIONARY
//*********************

kom kOm

+ 16
- 0
dictsource/no_rules View File

@@ -8,13 +8,17 @@
a A:
a (C% A
a (_ A
X) a (CCX A // single syllable with >= 2 final consonants
ai AI
au aU

aa o: // å
aa (C% O
X) aa (CCX O
ae a: // æ
ae (C% a
X) ae (CCX a


.group b
b b
@@ -36,10 +40,12 @@
d d
dd d:
r) d (_
&) det (_ d@2

.group e
e e:
e (C% E
X) e (CCX E
ei aI
e (rC a // ??
&) e (_ @2
@@ -70,6 +76,7 @@
.group i
i i:
i (C% I
X) i (CCX I
&) ig (_ I

.group j
@@ -102,10 +109,12 @@
.group o
o u:
o (C% O
X) o (CCX O
oi OI

oe Y: // ø
oe (C% W
X) oe (CCX W
oey Yy

o (nd U
@@ -155,6 +164,7 @@
.group u
u u-:
u (C% u-
X) u (CCX u-
ui u-I

.group v
@@ -171,6 +181,7 @@
.group y
y y:
y (C% y
X) y (CCX y

.group z
z s
@@ -180,24 +191,29 @@
.group å
å o:
å (C% O
X) å (CCX O

.group æ
æ E:
æ (r a:
æ (C% a
X) æ (CCX a

.group ø
ø Y:
ø (C% W
X) ø (CCX W
øy Yy

.group ä
ä E:
ä (C% E
X) ä (CCX E

.group ö
ö Y:
ö (C% W
X) ö (CCX W

.group
é 'e:

+ 6
- 7
phsource/compile_report View File

@@ -14,13 +14,13 @@
fi 40 123
fr 33 115
fr_ca 11 115
hi 50 128
hi 49 127
hu 24 109
nl 25 112
pl 17 103
sk 25 120
cs 5 120
hr 23 129
hr 24 130
ru 36 120
it 28 112
es 6 112
@@ -233,7 +233,6 @@
2 ufric/x base vi
4 ufric/x2 af nl pt
1 ufric/x_hr hr
1 ufric/xx hr
1 ustop/c base
5 ustop/k base en fr hi sw
9 ustop/k_ base en fi fr hi hu it el sw
@@ -346,6 +345,7 @@
1 vdiph/u-i vi
1 vdiph/ui_2 af
2 vdiph/ui_3 cy
1 vdiph/ui_4 hr
1 vdiph/Vi vi
1 vdiph/Vu af
2 vdiph/Vu_2 en_us en_wm
@@ -411,9 +411,9 @@
1 vowel/3_3 en_rp
3 vowel/3_en en en_wm af
2 vowel/@_4 nl hr
2 vowel/8_2 en_us sv
3 vowel/8_2 en_us hr sv
1 vowel/8_3 zh_yue
10 vowel/a en_n cy de hu nl pl sk hr
11 vowel/a en_n cy de hu nl pl sk hr
4 vowel/a# en_sc it pt
5 vowel/a_2 eo it pt_pt ro vi
4 vowel/a#_2 hr sv is sw
@@ -463,7 +463,6 @@
6 vowel/ii_en en en_n
5 vowel/@_low hi ro no
8 vowel/o en en_wm de hi it pt_pt sv
1 vowel/o- hr
4 vowel/o_2 cy hi hu no
2 vowel/o-_2 en_n en_wm
2 vowel/o_3 en_sc
@@ -487,7 +486,7 @@
3 vowelr/r-voc hi sk
2 vowelr/V3_r en en_sc
1 vowelr/V_r en
8 vowel/u en_n cy de eo fr hi cs
7 vowel/u en_n cy de eo fr cs
5 vowel/u# en en_sc
3 vowel/u_2 fi sk ro
1 vowel/u#_2 sv

BIN
phsource/l/_l View File


BIN
phsource/l/l@ View File


BIN
phsource/l/la View File


BIN
phsource/l/le View File


BIN
phsource/l/li View File


BIN
phsource/l/lo View File


BIN
phsource/l/lu View File


+ 18
- 14
phsource/ph_croatian View File

@@ -25,6 +25,13 @@ phoneme a
endphoneme


phoneme A // 'a' before 'r', doesn't reduce to [&]
vowel starttype (a) endtype (a)
length 160
formants vowel/a
endphoneme


phoneme &
vowel starttype (a) endtype (a)
length 140
@@ -72,7 +79,7 @@ endphoneme
phoneme 8
vowel starttype (o) endtype (o)
length 140
formants vowel/o-
formants vowel/8_2
endphoneme


@@ -92,14 +99,14 @@ endphoneme

phoneme aI
vowel starttype (a) endtype (i)
length 230
length 250
formants vdiph/ai
endphoneme


phoneme eI
vowel starttype (e) endtype (i)
length 220
length 250
formants vdiph/ei_2
linkout ;
endphoneme
@@ -115,12 +122,19 @@ endphoneme

phoneme oI
vowel starttype (o) endtype (i)
length 220
length 250
formants vdiph/ooi_3
linkout ;
endphoneme


phoneme uI
vowel starttype (u) endtype (i)
length 250
formants vdiph/ui_4
linkout ;
endphoneme


phoneme p
vls blb stop
@@ -170,14 +184,4 @@ phoneme x
switchvoicing Q
endphoneme

phoneme x2 // TEST
vls vel frc
vowelin f1=0 f2=2300 200 400 f3=-100 80
vowelout f1=0 f2=2300 300 400 f3=-100 80 rms=20
length 100
lengthmod 3
wave ufric/xx%120
switchvoicing Q
endphoneme



+ 10
- 16
phsource/ph_hindi View File

@@ -5,7 +5,7 @@

phoneme : // Lengthen the previous vowel by "length"
virtual
length 80
length 100
endphoneme

phoneme @
@@ -13,7 +13,7 @@ phoneme @
length 130
formants vowel/@_bck
before H vowel/@_low
reduceto NULL 0
reduceto @- 0
endphoneme

phoneme V
@@ -48,14 +48,14 @@ endphoneme

phoneme i:
vowel long starttype (i) endtype (i)
length 220
length 230
formants vowel/i_fnt
linkout ;
endphoneme

phoneme i // as [i:] but not marked as 'long'
vowel starttype (i) endtype (i)
length 220
length 230
formants vowel/i_fnt
linkout ;
endphoneme
@@ -69,7 +69,7 @@ endphoneme

phoneme e:
vowel long starttype (e) endtype (e)
length 210
length 220
formants vowel/e_2
endphoneme

@@ -81,7 +81,7 @@ endphoneme

phoneme E:
vowel long starttype (e) endtype (e)
length 210
length 220
formants vdiph/ee-e
endphoneme

@@ -100,7 +100,7 @@ endphoneme

phoneme &:
vowel long starttype (a) endtype (a)
length 210
length 220
formants vowel/&
endphoneme

@@ -120,7 +120,7 @@ endphoneme

phoneme o:
vowel long starttype (o) endtype (o)
length 210
length 220
formants vowel/o_2
endphoneme

@@ -132,7 +132,7 @@ endphoneme

phoneme O:
vowel long starttype (o) endtype (o)
length 200
length 210
formants vowel/oo_4
endphoneme

@@ -165,16 +165,10 @@ endphoneme

phoneme u:
vowel long starttype (u) endtype (u)
length 220
length 230
formants vowel/u_bck
endphoneme

phoneme u2:
vowel long starttype (u) endtype (u)
length 220
formants vowel/u
endphoneme



phoneme r-

BIN
phsource/vdiph/ei_2 View File


BIN
phsource/vdiph/ooi_3 View File


BIN
phsource/vowel/vowelchart.png View File


BIN
phsource/w/w2 View File


+ 88
- 0
src/compiledata.cpp View File

@@ -75,6 +75,7 @@

extern void Write4Bytes(FILE *f, int value);
extern void MakeVowelLists(void);
extern void FindPhonemesUsed(void);
extern int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, char *fname);
extern char voice_name[];

@@ -1824,6 +1825,76 @@ void Compile::Report(void)
}



static int ph_sorter(char **a, char **b)
{//======================================
int ix;
int t1, t2;
char mnem1[6];

PHONEME_TAB *p1 = (PHONEME_TAB *)(*a);
PHONEME_TAB *p2 = (PHONEME_TAB *)(*b);

t1 = p1->type;
if(t1 > phVOWEL) t1 = phVOWEL+1;

t2 = p2->type;
if(t2 > phVOWEL) t2 = phVOWEL+1;

if((ix = t1 - t2) != 0)
return(ix);

strcpy(mnem1,WordToString(p1->mnemonic));
return(strcasecmp(mnem1,WordToString(p2->mnemonic)));
} /* end of ph_sorter */



void PrintPhonemesUsed(FILE *f, const char *dictname)
{//==================================================
int ix;
PHONEME_TAB *ph;
PHONEME_TAB *ph_tab[N_PHONEME_TAB];
int count = 0;
int n_ph = 0;
int section = 0;

fprintf(f,"\n\nDictionary %s_dict\n",dictname);
fflush(f);

for(ix=0; (ix<N_PHONEME_TAB) && (phoneme_tab[ix] != NULL); ix++)
{
if(phoneme_tab_flags[ix] & 2)
{
ph_tab[n_ph++] = phoneme_tab[ix];
}
}

qsort((void *)ph_tab,n_ph,sizeof(PHONEME_TAB *),(int (*)(const void *,const void *))ph_sorter);

for(ix=0; ix<n_ph; ix++)
{
ph = ph_tab[ix];

if(ph->type > 1)
{
if((ph->type > phVOWEL) && (section == 0))
{
section = 1;
count = 0;
fputc('\n',f);
}
if((count & 0x7) == 0)
fputc('\n',f);
fprintf(f,"%-4s ",WordToString(ph->mnemonic));
count++;
}
}
fputc('\n',f);
} // end of PrintPhonemesUsed



wxString CompileAllDictionaries()
{//==============================
wxString filename;
@@ -1834,6 +1905,7 @@ wxString CompileAllDictionaries()
int errors = 0;
int dict_count = 0;
FILE *log;
FILE *f_phused;
char dictname[80];
char fname_log[80];
char save_voice_name[80];
@@ -1866,6 +1938,13 @@ wxString CompileAllDictionaries()

sprintf(fname_log,"%s%s",path_dsource,"dict_log");
log = fopen(fname_log,"w");
sprintf(fname_log,"%s%s",path_dsource,"dict_phonemes");
f_phused = fopen(fname_log,"w");

if(f_phused)
{
fprintf(f_phused,"Phonemes which are used in the *_rules and *_list files\n");
}

bool cont = dir.GetFirst(&filename, _T("*_rules"), wxDIR_FILES);
while ( cont )
@@ -1883,10 +1962,19 @@ wxString CompileAllDictionaries()
errors += err;
}

if(f_phused != NULL)
{
memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
FindPhonemesUsed();
PrintPhonemesUsed(f_phused,dictname);
}

cont = dir.GetNext(&filename);
}
if(log != NULL)
fclose(log);
if(f_phused != NULL)
fclose(f_phused);

LoadVoice(save_voice_name,1);


+ 3
- 0
src/compiledict.cpp View File

@@ -657,6 +657,9 @@ void copy_rule_string(char *string, int &state)
case 'W':
c = RULE_SPELLING;
break;
case 'X':
c = RULE_NOVOWELS;
break;
case 'L':
// expect two digits
c = *p++ - '0';

+ 32
- 3
src/dictionary.cpp View File

@@ -1261,7 +1261,11 @@ void Translator::SetWordStress(char *output, unsigned int dictionary_flags, int
else
{
// unstressed syllable within a word
v_stress = 1; /* change from 0 (unstressed) to 1 (diminished stress) */
if((vowel_stress[v-1] != 1) || ((langopts.stress_flags & 0x10000) == 0))
{
v_stress = 1; /* change from 0 (unstressed) to 1 (diminished stress) */
vowel_stress[v] = v_stress;
}
}
}

@@ -1383,7 +1387,7 @@ char *Translator::DecodeRule(const char *group, char *rule)
static char output[60];

static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',
'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T'};
'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'};


match_type = 0;
@@ -1805,6 +1809,23 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec
}
break;

case RULE_NOVOWELS:
{
char *p = post_ptr + letter_xbytes;
while(letter_w != RULE_SPACE)
{
if(IsLetter(letter_w,LETTERGP_VOWEL2))
{
failed = 1;
break;
}
p += utf8_in(&letter_w,p,0);
}
if(!failed)
match.points += (19-distance_right);
}
break;

case RULE_INC_SCORE:
match.points += 20; // force an increase in points
break;
@@ -1932,6 +1953,13 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec
failed = 1;
break;

case RULE_NOVOWELS:
if(word_vowel_count== 0)
match.points += 19;
else
failed =1;
break;

case RULE_IFVERB:
if(expect_verb)
match.points += 1;
@@ -2710,7 +2738,8 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i

int Translator::Lookup(char *word, char *ph_out)
{//=============================================
return(LookupDictList(word,ph_out,NULL,0));
unsigned int flags;
return(LookupDictList(word,ph_out,&flags,0));
}



+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.24.11 17.May.07";
const char *version_string = "1.24.12 18.May.07";
const int version_phdata = 0x012201;

int option_device_number = -1;

+ 3
- 3
src/tr_languages.cpp View File

@@ -244,7 +244,7 @@ Translator *SelectTranslator(const char *name)
static const wchar_t replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9
static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0};
static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250};
static int stress_amps_hi[8] = {17,14, 20,20, 20,24, 24,22 };
static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 };

tr = new Translator();
SetupTranslator(tr,stress_lengths_hi,stress_amps_hi);
@@ -252,7 +252,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable

tr->langopts.stress_rule = 6; // stress on last heaviest syllable
tr->langopts.stress_flags = 0x4; // use 'diminished' for unstressed final syllable
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
tr->langopts.numbers = 0x811 + 0x40000;
tr->letter_bits_offset = OFFSET_DEVANAGARI;
tr->langopts.replace_chars = replace_chars_hi;
@@ -392,7 +392,7 @@ Translator *SelectTranslator(const char *name)
case L('n','o'): // Norwegian
{
static int stress_amps_no[] = {16,16, 20,20, 20,24, 24,22 };
static int stress_lengths_no[8] = {160,140, 200,200, 0,0, 250,270};
static int stress_lengths_no[8] = {160,140, 200,190, 0,0, 220,240};
tr = new Translator();
SetupTranslator(tr,stress_lengths_no,stress_amps_no);


+ 5
- 3
src/translate.h View File

@@ -122,9 +122,10 @@
#define RULE_NOTVOWEL 25 // K
#define RULE_IFVERB 26 // V
#define RULE_LETTERGP 27 // L + letter group number
#define RULE_ALT1 28 // word has $alt attribute
#define RULE_SPELLING 31 // while spelling letter-by-letter
#define RULE_LAST_RULE 28
#define RULE_ALT1 28 // T word has $alt attribute
#define RULE_NOVOWELS 29 // X no vowels up to word boundary
#define RULE_SPELLING 31 // W while spelling letter-by-letter
#define RULE_LAST_RULE 31

#define LETTERGP_A 0
#define LETTERGP_B 1
@@ -258,6 +259,7 @@ typedef struct {
// bit9=stress last syllable if it doesn't end in vowel or "s" or "n" LANG=Spanish
// bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
// bit13= If there is only one syllable before the primary stress, give it a secondary stress
// bit16= Don't diminish consecutive syllables within a word.

int stress_flags;
int unstressed_wd1; // stress for $u word of 1 syllable

+ 13
- 2
src/vowelchart.cpp View File

@@ -373,10 +373,12 @@ void VowelChart(int control, char *fname)



static void FindPhonemesUsed(void)
{//===============================
void FindPhonemesUsed(void)
{//========================
int hash;
char *p;
char *start;
char *group;
char *next;
unsigned char c;
int count = 0;
@@ -386,15 +388,24 @@ static void FindPhonemesUsed(void)
p = translator->data_dictrules;
while(*p != 0)
{
if(*p == RULE_CONDITION)
p+=2;
if(*p == RULE_GROUP_END)
{
p++;
if(*p == 0) break;
}
if(*p == RULE_GROUP_START)
{
group = p;
p += (strlen(p)+1);
}

while((((c = *p) != RULE_PHONEMES)) && (c != 0)) p++;
count++;
if(c == RULE_PHONEMES)
{
start = p;
p++;
while(*p != 0)
{

Loading…
Cancel
Save