Browse Source

[1.25.14] Added feature to define collections of letter sequences in *_rules files for use in rules.

lang=pt  Reduce phoneme [&~] to [&] in unstressed syllables.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@30 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 18 years ago
parent
commit
85cb0ec378

+ 1
- 0
dictsource/de_rules View File

@@ -421,6 +421,7 @@
&) schaft (_S6 Saft
schein (lich S'aIn
schwer Sve:*
_) selbst (@@P6 z'Elpst
shop _^_EN
_) sky _^_EN
soldat z%OldA:t

+ 3
- 3
dictsource/dict_phonemes View File

@@ -217,9 +217,9 @@ o~ U u uI u~ y

* : ; b C d dZ f
g h j k l l^ m n
N n^ p Q r R s S
s# s; t T tS ts v w
x z Z
N n^ p Q r R r- s
S s# s; t T tS ts v
w x z Z


Dictionary ru_dict

+ 6
- 6
dictsource/hu_list View File

@@ -46,7 +46,7 @@ mm milime:tER

ill illEdvE $dot
stb SAtYb:i $dot
vö vEZd _!'Yss2E
vö v'EZd||_'Yss2E
pl pe:lda:ul $dot
mta $abbrev
a.m An^n^i||m'int $dot
@@ -200,17 +200,17 @@ a A: $atend // letter "a"
b be:
c tse:
d de:
f Ef
f Eff
g ge:
h ha:
j je:
k ka:
l El
m Em
n En
l Ell
m Emm
n Enn
p pe:
q ku
r ER
r ERR
s S_
s ES $atend
_s ES

+ 12
- 12
dictsource/is_rules View File

@@ -1,7 +1,7 @@
// Spelling-to-phoneme rules for Icelandic
// This file is UTF8 encoded.

// letter group L08 voiceless consonants
// letter group B voiceless consonants c,f,h,k,p,t,x,þ
// letter group F p,t,k,s
// letter group H j,r,v,

@@ -43,7 +43,7 @@

.group ð
ð D
ð (L08 T
ð (B T
ð (__ T // before pause


@@ -154,23 +154,23 @@
.group l
l l
l (_ l#
l (L08X l#
l (BX l#
ll dl
ll (L08 tl#
ll (B8 tl#
ll (_ tl# // ??


.group m
m m
mm (K m
L08) m hm#
m (L08 m#
B) m hm#
m (B m#


.group n
n n
L08) n hn#
n (L08 n#
B) n hn#
n (B n#
n (g N
ng (_ Ng
ngt (K N#d
@@ -209,7 +209,7 @@
.group p
_) p p
p p
L08) p b
B) p b
s) p b
p (K b
p (s f
@@ -226,8 +226,8 @@
_) r R2
C) r @-*
A) r (A R
L08) r r#
r (L08 r#
B) r r#
r (B r#
r (_ r#
r R
rl dl
@@ -244,7 +244,7 @@
.group t
_) t t
t t
L08) t d
B) t d
s) t d
t (K d
tt hd

+ 6
- 6
dictsource/pt_rules View File

@@ -28,8 +28,8 @@
a (r_ 'a
?1 p) a (d_ =E
a (CC_ &
?1 al (_ 'Al // eg: Portugal, capital, etc.
?1 a (lK ,A // Algarve, alto, etc...
?1 a (l_ 'A // eg: Portugal, capital, etc.
?1 a (lK ,A // Algarve, Almerinda, etc...
?2 al (K aU
?2 alh alj

@@ -141,10 +141,10 @@
e (cem_ E
e (ces_ E

e (stA_ E
e (stA_ E
e (stAm_ E
e (stAs_ E
e (strA_ E
e (strA_ E

e (rnA_ E
e (rnAm_ E
@@ -157,7 +157,7 @@ e (stA_ E
e (xA_ E
e (xAm_ E
e (xAs_ E
?2 _n) e (t E
?2 _n) e (t E

em (C eIm
en (K eIN
@@ -475,7 +475,7 @@ e (stA_ E
l) r x
n) r x
s) r x
?1 A) r (_ r // this letter is imperfect.
?1 A) r (_ r- // [r-] is English linking-r


.group s

+ 0
- 2
dictsource/ru_rules View File

@@ -9,8 +9,6 @@
// G voiced: б в г д ж з
// H hard consonant: ъ ж ц ш
// Y iotated vowel, softsign: ь е ё и ю я
// L08 кпстфх
// L09 бгджзлмнр


.group а

+ 8
- 8
phsource/compile_report View File

@@ -24,8 +24,8 @@
ru 36 123
it 25 114
es 6 114
pt 34 137
pt_pt 20 137
pt 28 131
pt_pt 20 131
ro 36 138
el 8 114
sv 25 117
@@ -418,14 +418,14 @@
2 vowel/@_4 nl hr
3 vowel/8_2 en_us hr sv
1 vowel/8_3 zh_yue
12 vowel/a en_n cy de hu nl pl sk hr pt
11 vowel/a en_n cy de hu nl pl sk hr
4 vowel/a# en_sc it pt
6 vowel/a_2 eo it pt pt_pt ro vi
5 vowel/a#_2 hr pt sv is sw
7 vowel/a_3 en_sc cs pt is
4 vowel/a#_2 hr sv is sw
6 vowel/a_3 en_sc cs is
12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue
4 vowel/a_4 en_wm el vi
3 vowel/a_5 pt sv sw
2 vowel/a_5 sv sw
7 vowel/aa en_us fi fr_ca no zh_yue
1 vowel/aa# fi
3 vowel/aa_2 en cy
@@ -433,7 +433,7 @@
2 vowel/aa_4 sv vi
2 vowel/aa_5 en_n
1 vowel/aa_6 de
2 vowel/aa_7 nl pt
1 vowel/aa_7 nl
4 vowel/a_en en fr
1 vowel/@_bck hi
13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi
@@ -513,7 +513,7 @@
2 vowel/V en en_sc
3 vowel/V_2 af ru
3 vowel/V_3 en_rp hi vi
2 vowel/V_4 en_sc pt
1 vowel/V_4 en_sc
6 vowel/y en de fi hu nl zh_yue
10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue
1 vowel/y## is

+ 1
- 1
phsource/ph_af View File

@@ -255,7 +255,7 @@ endphoneme


phoneme r
liquid
liquid starttype #r endtype #r
length 60
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80

+ 1
- 1
phsource/ph_dutch View File

@@ -139,7 +139,7 @@ endphoneme


phoneme r // from Afrikaans
liquid
liquid starttype #r endtype #r
length 60
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80

+ 2
- 37
phsource/ph_pt_brazil View File

@@ -32,6 +32,7 @@ phoneme &~
vowel starttype (@) endtype (@)
length 200
formants vnasal/a#_n
reduceto & 4
endphoneme

phoneme &U~
@@ -63,48 +64,12 @@ phoneme &/ // Used for final "a" when next word starts with "a"
endphoneme


phoneme A // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/a_3
endphoneme

phoneme A2 // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/aa_7
endphoneme

phoneme A3 // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/a_5
endphoneme

phoneme A4 // for "al" in European Portuguese
phoneme A // for "al" in European Portuguese, don't reduce to [&]
vowel starttype (a) endtype (a)
length 200
formants vowel/a_2
endphoneme

phoneme A5 // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/a
endphoneme

phoneme A6 // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/V_4
endphoneme

phoneme A7 // for "al" in European Portuguese
vowel starttype (a) endtype (a)
length 200
formants vowel/a#_2
endphoneme



phoneme E

+ 1
- 1
phsource/phonemes View File

@@ -402,7 +402,7 @@ phoneme R2 // this is [R] from Slovak/Czech
endphoneme

phoneme R3 // Afrikaans
liquid
liquid starttype #r endtype #r
length 60
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80

BIN
phsource/vowel/a_3 View File


BIN
phsource/vowel/e# View File


BIN
phsource/vowel/oo_2 View File


+ 4
- 2
src/compiledata.cpp View File

@@ -532,13 +532,15 @@ int Compile::NextItem(int type)
{
item_string[ix++] = c;
c = fgetc(f_in);
if(feof(f_in))
break;
if(item_string[ix-1] == '=')
break;
}
ungetc(c,f_in);
item_string[ix] = 0;

if(feof(f_in)) return(-1);
if(!feof(f_in))
ungetc(c,f_in);

keyword = -1;


+ 54
- 5
src/compiledict.cpp View File

@@ -665,7 +665,7 @@ void copy_rule_string(char *string, int &state)
c = *p++ - '0';
value = *p++ - '0';
c = c * 10 + value;
if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_TYPES))
if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS))
{
c = 0;
fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum);
@@ -676,11 +676,11 @@ void copy_rule_string(char *string, int &state)
{
// pre-rule, put the group number before the RULE_LETTERGP command
output[ix++] = c;
c = RULE_LETTERGP;
c = RULE_LETTERGP2;
}
else
{
output[ix++] = RULE_LETTERGP;
output[ix++] = RULE_LETTERGP2;
}
break;

@@ -1076,8 +1076,47 @@ void output_rule_group(FILE *f_out, int n_rules, char **rules, char *name)



int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{//=============================================================
static int compile_lettergroup(char *input, FILE *f_out)
{//=====================================================
char *p;
int group;

p = input;
if(!isdigit(p[0]) || !isdigit(p[1]))
{
return(1);
}

group = atoi(&p[1]);
if(group >= N_LETTER_GROUPS)
return(1);

while(!isspace2(*p)) p++;

fputc(RULE_GROUP_START,f_out);
fputc(RULE_LETTERGP2,f_out);
fputc(group + 'A', f_out);

for(;;)
{
while(isspace2(*p)) p++;
if(*p == 0)
break;
while((*p & 0xff) > ' ')
{
fputc(*p++, f_out);
}
fputc(0,f_out);
}
fputc(RULE_GROUP_END,f_out);

return(0);
}


static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{//====================================================================
char *prule;
unsigned char *p;
int ix;
@@ -1108,6 +1147,16 @@ int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
buf = fgets(buf1,sizeof(buf1),f_in);
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n

if((buf != NULL) && (memcmp(buf,".L",2)==0))
{
if(compile_lettergroup(&buf[2], f_out) != 0)
{
fprintf(f_log,"%5d: Bad lettergroup\n",linenum);
error_count++;
}
continue;
}

if((buf == NULL) || (memcmp(buf,".group",6)==0))
{
// next .group or end of file, write out the previous group

+ 85
- 33
src/dictionary.cpp View File

@@ -213,6 +213,7 @@ void Translator::InitGroups(void)
groups2_count[ix]=0;
groups2_start[ix]=255; // indicates "not set"
}
memset(letterGroups,0,sizeof(letterGroups));

p = data_dictrules;
while(*p != 0)
@@ -223,30 +224,44 @@ void Translator::InitGroups(void)
break;
}
p++;
len = strlen(p);
p_name = p;
c = p_name[0];
p += (len+1);
if(len == 1)

if(p[0] == RULE_LETTERGP2)
{
groups1[c] = p;
ix = p[1] - 'A';
p += 2;
if((ix >= 0) && (ix < N_LETTER_GROUPS))
{
letterGroups[ix] = p;
}
}
else
if(len == 0)
{
groups1[0] = p;
len = strlen(p);
p_name = p;
c = p_name[0];
p += (len+1);
if(len == 1)
{
groups1[c] = p;
}
else
if(len == 0)
{
groups1[0] = p;
}
else
{
if(groups2_start[c] == 255)
groups2_start[c] = n_groups2;
groups2_count[c]++;
groups2[n_groups2] = p;
c2 = p_name[1];
groups2_name[n_groups2++] = (c + (c2 << 8));
}
}
else
{
if(groups2_start[c] == 255)
groups2_start[c] = n_groups2;

groups2_count[c]++;
groups2[n_groups2] = p;
c2 = p_name[1];
groups2_name[n_groups2++] = (c + (c2 << 8));
}
// skip over all the rules in this group
rule_count = 0;
while(*p != RULE_GROUP_END)
@@ -589,6 +604,31 @@ return(0);



int Translator::IsLetterGroup(char *word, int group)
{//=================================================
// match the word against a list of utf-8 strings
char *p;
char *w;

p = letterGroups[group];

while(*p != 0)
{
w = word;
while(*p == *w)
{
*w++;
*p++;
}
if(*p == 0)
return(w-word); // matched a complete string

while(*p++ != 0); // skip to end of string
}
return(0);
}


int Translator::IsLetter(int letter, int group)
{//============================================
if(letter_groups[group] != NULL)
@@ -1412,8 +1452,9 @@ char *Translator::DecodeRule(const char *group, char *rule)
static char output[60];

static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',
'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'};
'@','&','%','+','#','S','D','Z','A','L',' ',' ',' ',' ',' ','N','K','V',' ','T','X','?','W'};

static char symbols_lg[] = {'A','B','C','H','F','G','Y'};

match_type = 0;
buf_pre[0] = 0;
@@ -1465,25 +1506,23 @@ char *Translator::DecodeRule(const char *group, char *rule)
}
else
if(rb == RULE_LETTERGP)
{
c = symbols_lg[*rule++ - 'A'];
}
else
if(rb == RULE_LETTERGP2)
{
value = *rule++ - 'A';
if(value >= 8)
{
p[0] = 'L';
p[1] = (value / 10) + '0';
c = (value % 10) + '0';
p[0] = 'L';
p[1] = (value / 10) + '0';
c = (value % 10) + '0';

if(match_type == RULE_PRE)
{
p[0] = c;
c = 'L';
}
p+=2;
}
else
if(match_type == RULE_PRE)
{
c = symbols[value + RULE_LETTER_GROUPS];
p[0] = c;
c = 'L';
}
p+=2;
}
else
if(rb <= RULE_LAST_RULE)
@@ -1611,6 +1650,7 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec
int distance_right;
int distance_left;
int lg_pts;
int n_bytes;

MatchRecord match;
static MatchRecord best;
@@ -1626,6 +1666,7 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec
if(rule == NULL)
{
match_out->points = 0;
(*word)++;
return;
}

@@ -1752,6 +1793,17 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec
failed = 1;
break;

case RULE_LETTERGP2: // match against a list of utf-t strings
letter_group = *rule++ - 'A';
if((n_bytes = IsLetterGroup(post_ptr-1,letter_group)) >0)
{
match.points += (20-distance_right);
post_ptr += (n_bytes-1);
}
else
failed =1;
break;

case RULE_NOTVOWEL:
if(!IsLetter(letter_w,0))
{

+ 16
- 0
src/phonemelist.cpp View File

@@ -419,6 +419,22 @@ if((ph->mnemonic == 't') && ((prev->type == phVOWEL) || (prev->mnemonic == 'n'))
}
#endif

if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY))
{
// reduction for vowels has already been done in SetWordStress
int reduce_level;

if(next->type == phVOWEL)
{
reduce_level = (ph->phflags >> 28) & 7;
if((&plist2[1])->stress < reduce_level)
{
// look at the stress of the following vowel
ph = phoneme_tab[ph->reduce_to];
}
}
}

if((plist2+1)->synthflags & SFLAG_LENGTHEN)
{
static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};

+ 2
- 2
src/synthdata.cpp View File

@@ -35,8 +35,8 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.25.13 30.May.07";
const int version_phdata = 0x012501;
const char *version_string = "1.25.14 31.May.07";
const int version_phdata = 0x012514;

int option_device_number = -1;


+ 2
- 9
src/tr_languages.cpp View File

@@ -344,7 +344,7 @@ Translator *SelectTranslator(const char *name)
{
static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 };
static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250};
static const wchar_t is_L08[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's'
static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's'

tr = new Translator();
SetupTranslator(tr,stress_lengths_is,stress_amps_is);
@@ -356,7 +356,7 @@ Translator *SelectTranslator(const char *name)
ResetLetterBits(tr,0x18);
SetLetterBits(tr,4,"kpst"); // Letter group F
SetLetterBits(tr,3,"jvr"); // Letter group H
tr->letter_groups[8] = is_L08;
tr->letter_groups[1] = is_lettergroup_B;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0xe9;
tr->langopts.numbers2 = 0x2;
@@ -656,10 +656,6 @@ Translator_Russian::Translator_Russian() : Translator()
static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents)
static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign)

// these are unicode character codes
static const wchar_t ru_L08[] = {0x43a,0x43f,0x441,0x442,0x444,0x445,0}; // кпстфх
static const wchar_t ru_L09[] = {0x431,0x433,0x434,0x436,0x43b,0x43c,0x43d,0x440,0}; // бгджзлмнр

SetupTranslator(this,stress_lengths_ru,stress_amps_ru);

charset_a0 = charsets[18]; // KOI8-R
@@ -678,9 +674,6 @@ Translator_Russian::Translator_Russian() : Translator()
SetLetterBits(this,6,ru_ivowels);
SetLetterBits(this,7,ru_vowels);

letter_groups[8] = ru_L08; // This is L08 in ru_rules
letter_groups[9] = ru_L09; // This is L09 in ru_rules

langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word
langopts.param[LOPT_REGRESSIVE_VOICING] = 1;
langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1;

+ 7
- 11
src/translate.h View File

@@ -29,6 +29,8 @@
#define N_RULE_GROUP2 120 // max num of two-letter rule chains
#define N_HASH_DICT 1024
#define N_CHARSETS 20
#define N_LETTER_GROUPS 20


/* flags from word dictionary */
// bits 0-3 stressed syllable, 7=unstressed
@@ -110,18 +112,11 @@
#define RULE_ENDING 14
#define RULE_DIGIT 15 // D digit
#define RULE_NONALPHA 16 // Z non-alpha
#define RULE_LETTER_GROUPS 17 // 17 to 23
#define RULE_LETTER1 17 // A vowels
#define RULE_LETTER2 18 // B 'hard' consonants
#define RULE_LETTER3 19 // C all consonants
#define RULE_LETTER4 20 // H letter group
#define RULE_LETTER5 21 // F letter group
#define RULE_LETTER6 22 // G letter group
#define RULE_LETTER7 23 // Y letter group
#define RULE_LETTERGP 17 // A B C H F G Y letter group number
#define RULE_LETTERGP2 18 // L + letter group number
#define RULE_NO_SUFFIX 24 // N
#define RULE_NOTVOWEL 25 // K
#define RULE_IFVERB 26 // V
#define RULE_LETTERGP 27 // L + letter group number
#define RULE_ALT1 28 // T word has $alt attribute
#define RULE_NOVOWELS 29 // X no vowels up to word boundary
#define RULE_SPELLING 31 // W while spelling letter-by-letter
@@ -355,8 +350,7 @@ public:
// holds properties of characters: vowel, consonant, etc for pronunciation rules
unsigned char letter_bits[256];
int letter_bits_offset;
#define N_LETTER_TYPES 20
const wchar_t *letter_groups[N_LETTER_TYPES];
const wchar_t *letter_groups[8];

/* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
unsigned char punct_to_tone[4][5];
@@ -394,6 +388,7 @@ private:
void ApplySpecialAttribute(char *phonemes, int dict_flags);

int IsLetter(int letter, int group);
int IsLetterGroup(char *word, int group);

void CalcPitches_Tone(int clause_tone);

@@ -420,6 +415,7 @@ protected:
unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter
unsigned char groups2_start[256]; // index into groups2
char *letterGroups[N_LETTER_GROUPS];
int n_ph_list2;
PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes

+ 6
- 2
src/vowelchart.cpp View File

@@ -378,7 +378,6 @@ void FindPhonemesUsed(void)
int hash;
char *p;
char *start;
char *group;
char *next;
unsigned char c;
int count = 0;
@@ -397,7 +396,12 @@ void FindPhonemesUsed(void)
}
if(*p == RULE_GROUP_START)
{
group = p;
if(p[1] == RULE_LETTERGP2)
{
while(*p != RULE_GROUP_END) p++;
continue;
}

p += (strlen(p)+1);
}


Loading…
Cancel
Save