lang=pt Reduce phoneme [&~] to [&] in unstressed syllables. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@30 d46cf337-b52f-0410-862d-fd96e6ae7743master
&) schaft (_S6 Saft | &) schaft (_S6 Saft | ||||
schein (lich S'aIn | schein (lich S'aIn | ||||
schwer Sve:* | schwer Sve:* | ||||
_) selbst (@@P6 z'Elpst | |||||
shop _^_EN | shop _^_EN | ||||
_) sky _^_EN | _) sky _^_EN | ||||
soldat z%OldA:t | soldat z%OldA:t |
* : ; b C d dZ f | * : ; b C d dZ f | ||||
g h j k l l^ m n | g h j k l l^ m n | ||||
N n^ p Q r R s S | |||||
s# s; t T tS ts v w | |||||
x z Z | |||||
N n^ p Q r R r- s | |||||
S s# s; t T tS ts v | |||||
w x z Z | |||||
Dictionary ru_dict | Dictionary ru_dict |
ill illEdvE $dot | ill illEdvE $dot | ||||
stb SAtYb:i $dot | stb SAtYb:i $dot | ||||
vö vEZd _!'Yss2E | |||||
vö v'EZd||_'Yss2E | |||||
pl pe:lda:ul $dot | pl pe:lda:ul $dot | ||||
mta $abbrev | mta $abbrev | ||||
a.m An^n^i||m'int $dot | a.m An^n^i||m'int $dot | ||||
b be: | b be: | ||||
c tse: | c tse: | ||||
d de: | d de: | ||||
f Ef | |||||
f Eff | |||||
g ge: | g ge: | ||||
h ha: | h ha: | ||||
j je: | j je: | ||||
k ka: | k ka: | ||||
l El | |||||
m Em | |||||
n En | |||||
l Ell | |||||
m Emm | |||||
n Enn | |||||
p pe: | p pe: | ||||
q ku | q ku | ||||
r ER | |||||
r ERR | |||||
s S_ | s S_ | ||||
s ES $atend | s ES $atend | ||||
_s ES | _s ES |
// Spelling-to-phoneme rules for Icelandic | // Spelling-to-phoneme rules for Icelandic | ||||
// This file is UTF8 encoded. | // This file is UTF8 encoded. | ||||
// letter group L08 voiceless consonants | |||||
// letter group B voiceless consonants c,f,h,k,p,t,x,þ | |||||
// letter group F p,t,k,s | // letter group F p,t,k,s | ||||
// letter group H j,r,v, | // letter group H j,r,v, | ||||
.group ð | .group ð | ||||
ð D | ð D | ||||
ð (L08 T | |||||
ð (B T | |||||
ð (__ T // before pause | ð (__ T // before pause | ||||
.group l | .group l | ||||
l l | l l | ||||
l (_ l# | l (_ l# | ||||
l (L08X l# | |||||
l (BX l# | |||||
ll dl | ll dl | ||||
ll (L08 tl# | |||||
ll (B8 tl# | |||||
ll (_ tl# // ?? | ll (_ tl# // ?? | ||||
.group m | .group m | ||||
m m | m m | ||||
mm (K m | mm (K m | ||||
L08) m hm# | |||||
m (L08 m# | |||||
B) m hm# | |||||
m (B m# | |||||
.group n | .group n | ||||
n n | n n | ||||
L08) n hn# | |||||
n (L08 n# | |||||
B) n hn# | |||||
n (B n# | |||||
n (g N | n (g N | ||||
ng (_ Ng | ng (_ Ng | ||||
ngt (K N#d | ngt (K N#d | ||||
.group p | .group p | ||||
_) p p | _) p p | ||||
p p | p p | ||||
L08) p b | |||||
B) p b | |||||
s) p b | s) p b | ||||
p (K b | p (K b | ||||
p (s f | p (s f | ||||
_) r R2 | _) r R2 | ||||
C) r @-* | C) r @-* | ||||
A) r (A R | A) r (A R | ||||
L08) r r# | |||||
r (L08 r# | |||||
B) r r# | |||||
r (B r# | |||||
r (_ r# | r (_ r# | ||||
r R | r R | ||||
rl dl | rl dl | ||||
.group t | .group t | ||||
_) t t | _) t t | ||||
t t | t t | ||||
L08) t d | |||||
B) t d | |||||
s) t d | s) t d | ||||
t (K d | t (K d | ||||
tt hd | tt hd |
a (r_ 'a | a (r_ 'a | ||||
?1 p) a (d_ =E | ?1 p) a (d_ =E | ||||
a (CC_ & | a (CC_ & | ||||
?1 al (_ 'Al // eg: Portugal, capital, etc. | |||||
?1 a (lK ,A // Algarve, alto, etc... | |||||
?1 a (l_ 'A // eg: Portugal, capital, etc. | |||||
?1 a (lK ,A // Algarve, Almerinda, etc... | |||||
?2 al (K aU | ?2 al (K aU | ||||
?2 alh alj | ?2 alh alj | ||||
e (cem_ E | e (cem_ E | ||||
e (ces_ E | e (ces_ E | ||||
e (stA_ E | |||||
e (stA_ E | |||||
e (stAm_ E | e (stAm_ E | ||||
e (stAs_ E | e (stAs_ E | ||||
e (strA_ E | |||||
e (strA_ E | |||||
e (rnA_ E | e (rnA_ E | ||||
e (rnAm_ E | e (rnAm_ E | ||||
e (xA_ E | e (xA_ E | ||||
e (xAm_ E | e (xAm_ E | ||||
e (xAs_ E | e (xAs_ E | ||||
?2 _n) e (t E | |||||
?2 _n) e (t E | |||||
em (C eIm | em (C eIm | ||||
en (K eIN | en (K eIN | ||||
l) r x | l) r x | ||||
n) r x | n) r x | ||||
s) r x | s) r x | ||||
?1 A) r (_ r // this letter is imperfect. | |||||
?1 A) r (_ r- // [r-] is English linking-r | |||||
.group s | .group s |
// G voiced: б в г д ж з | // G voiced: б в г д ж з | ||||
// H hard consonant: ъ ж ц ш | // H hard consonant: ъ ж ц ш | ||||
// Y iotated vowel, softsign: ь е ё и ю я | // Y iotated vowel, softsign: ь е ё и ю я | ||||
// L08 кпстфх | |||||
// L09 бгджзлмнр | |||||
.group а | .group а |
ru 36 123 | ru 36 123 | ||||
it 25 114 | it 25 114 | ||||
es 6 114 | es 6 114 | ||||
pt 34 137 | |||||
pt_pt 20 137 | |||||
pt 28 131 | |||||
pt_pt 20 131 | |||||
ro 36 138 | ro 36 138 | ||||
el 8 114 | el 8 114 | ||||
sv 25 117 | sv 25 117 | ||||
2 vowel/@_4 nl hr | 2 vowel/@_4 nl hr | ||||
3 vowel/8_2 en_us hr sv | 3 vowel/8_2 en_us hr sv | ||||
1 vowel/8_3 zh_yue | 1 vowel/8_3 zh_yue | ||||
12 vowel/a en_n cy de hu nl pl sk hr pt | |||||
11 vowel/a en_n cy de hu nl pl sk hr | |||||
4 vowel/a# en_sc it pt | 4 vowel/a# en_sc it pt | ||||
6 vowel/a_2 eo it pt pt_pt ro vi | 6 vowel/a_2 eo it pt pt_pt ro vi | ||||
5 vowel/a#_2 hr pt sv is sw | |||||
7 vowel/a_3 en_sc cs pt is | |||||
4 vowel/a#_2 hr sv is sw | |||||
6 vowel/a_3 en_sc cs is | |||||
12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue | 12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue | ||||
4 vowel/a_4 en_wm el vi | 4 vowel/a_4 en_wm el vi | ||||
3 vowel/a_5 pt sv sw | |||||
2 vowel/a_5 sv sw | |||||
7 vowel/aa en_us fi fr_ca no zh_yue | 7 vowel/aa en_us fi fr_ca no zh_yue | ||||
1 vowel/aa# fi | 1 vowel/aa# fi | ||||
3 vowel/aa_2 en cy | 3 vowel/aa_2 en cy | ||||
2 vowel/aa_4 sv vi | 2 vowel/aa_4 sv vi | ||||
2 vowel/aa_5 en_n | 2 vowel/aa_5 en_n | ||||
1 vowel/aa_6 de | 1 vowel/aa_6 de | ||||
2 vowel/aa_7 nl pt | |||||
1 vowel/aa_7 nl | |||||
4 vowel/a_en en fr | 4 vowel/a_en en fr | ||||
1 vowel/@_bck hi | 1 vowel/@_bck hi | ||||
13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi | 13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi | ||||
2 vowel/V en en_sc | 2 vowel/V en en_sc | ||||
3 vowel/V_2 af ru | 3 vowel/V_2 af ru | ||||
3 vowel/V_3 en_rp hi vi | 3 vowel/V_3 en_rp hi vi | ||||
2 vowel/V_4 en_sc pt | |||||
1 vowel/V_4 en_sc | |||||
6 vowel/y en de fi hu nl zh_yue | 6 vowel/y en de fi hu nl zh_yue | ||||
10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue | 10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue | ||||
1 vowel/y## is | 1 vowel/y## is |
phoneme r | phoneme r | ||||
liquid | |||||
liquid starttype #r endtype #r | |||||
length 60 | length 60 | ||||
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
phoneme r // from Afrikaans | phoneme r // from Afrikaans | ||||
liquid | |||||
liquid starttype #r endtype #r | |||||
length 60 | length 60 | ||||
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
vowel starttype (@) endtype (@) | vowel starttype (@) endtype (@) | ||||
length 200 | length 200 | ||||
formants vnasal/a#_n | formants vnasal/a#_n | ||||
reduceto & 4 | |||||
endphoneme | endphoneme | ||||
phoneme &U~ | phoneme &U~ | ||||
endphoneme | endphoneme | ||||
phoneme A // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/a_3 | |||||
endphoneme | |||||
phoneme A2 // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/aa_7 | |||||
endphoneme | |||||
phoneme A3 // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/a_5 | |||||
endphoneme | |||||
phoneme A4 // for "al" in European Portuguese | |||||
phoneme A // for "al" in European Portuguese, don't reduce to [&] | |||||
vowel starttype (a) endtype (a) | vowel starttype (a) endtype (a) | ||||
length 200 | length 200 | ||||
formants vowel/a_2 | formants vowel/a_2 | ||||
endphoneme | endphoneme | ||||
phoneme A5 // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/a | |||||
endphoneme | |||||
phoneme A6 // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/V_4 | |||||
endphoneme | |||||
phoneme A7 // for "al" in European Portuguese | |||||
vowel starttype (a) endtype (a) | |||||
length 200 | |||||
formants vowel/a#_2 | |||||
endphoneme | |||||
phoneme E | phoneme E |
endphoneme | endphoneme | ||||
phoneme R3 // Afrikaans | phoneme R3 // Afrikaans | ||||
liquid | |||||
liquid starttype #r endtype #r | |||||
length 60 | length 60 | ||||
vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
{ | { | ||||
item_string[ix++] = c; | item_string[ix++] = c; | ||||
c = fgetc(f_in); | c = fgetc(f_in); | ||||
if(feof(f_in)) | |||||
break; | |||||
if(item_string[ix-1] == '=') | if(item_string[ix-1] == '=') | ||||
break; | break; | ||||
} | } | ||||
ungetc(c,f_in); | |||||
item_string[ix] = 0; | item_string[ix] = 0; | ||||
if(feof(f_in)) return(-1); | |||||
if(!feof(f_in)) | |||||
ungetc(c,f_in); | |||||
keyword = -1; | keyword = -1; | ||||
c = *p++ - '0'; | c = *p++ - '0'; | ||||
value = *p++ - '0'; | value = *p++ - '0'; | ||||
c = c * 10 + value; | c = c * 10 + value; | ||||
if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_TYPES)) | |||||
if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS)) | |||||
{ | { | ||||
c = 0; | c = 0; | ||||
fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | ||||
{ | { | ||||
// pre-rule, put the group number before the RULE_LETTERGP command | // pre-rule, put the group number before the RULE_LETTERGP command | ||||
output[ix++] = c; | output[ix++] = c; | ||||
c = RULE_LETTERGP; | |||||
c = RULE_LETTERGP2; | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
output[ix++] = RULE_LETTERGP; | |||||
output[ix++] = RULE_LETTERGP2; | |||||
} | } | ||||
break; | break; | ||||
int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||||
{//============================================================= | |||||
static int compile_lettergroup(char *input, FILE *f_out) | |||||
{//===================================================== | |||||
char *p; | |||||
int group; | |||||
p = input; | |||||
if(!isdigit(p[0]) || !isdigit(p[1])) | |||||
{ | |||||
return(1); | |||||
} | |||||
group = atoi(&p[1]); | |||||
if(group >= N_LETTER_GROUPS) | |||||
return(1); | |||||
while(!isspace2(*p)) p++; | |||||
fputc(RULE_GROUP_START,f_out); | |||||
fputc(RULE_LETTERGP2,f_out); | |||||
fputc(group + 'A', f_out); | |||||
for(;;) | |||||
{ | |||||
while(isspace2(*p)) p++; | |||||
if(*p == 0) | |||||
break; | |||||
while((*p & 0xff) > ' ') | |||||
{ | |||||
fputc(*p++, f_out); | |||||
} | |||||
fputc(0,f_out); | |||||
} | |||||
fputc(RULE_GROUP_END,f_out); | |||||
return(0); | |||||
} | |||||
static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||||
{//==================================================================== | |||||
char *prule; | char *prule; | ||||
unsigned char *p; | unsigned char *p; | ||||
int ix; | int ix; | ||||
buf = fgets(buf1,sizeof(buf1),f_in); | buf = fgets(buf1,sizeof(buf1),f_in); | ||||
if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | ||||
if((buf != NULL) && (memcmp(buf,".L",2)==0)) | |||||
{ | |||||
if(compile_lettergroup(&buf[2], f_out) != 0) | |||||
{ | |||||
fprintf(f_log,"%5d: Bad lettergroup\n",linenum); | |||||
error_count++; | |||||
} | |||||
continue; | |||||
} | |||||
if((buf == NULL) || (memcmp(buf,".group",6)==0)) | if((buf == NULL) || (memcmp(buf,".group",6)==0)) | ||||
{ | { | ||||
// next .group or end of file, write out the previous group | // next .group or end of file, write out the previous group |
groups2_count[ix]=0; | groups2_count[ix]=0; | ||||
groups2_start[ix]=255; // indicates "not set" | groups2_start[ix]=255; // indicates "not set" | ||||
} | } | ||||
memset(letterGroups,0,sizeof(letterGroups)); | |||||
p = data_dictrules; | p = data_dictrules; | ||||
while(*p != 0) | while(*p != 0) | ||||
break; | break; | ||||
} | } | ||||
p++; | p++; | ||||
len = strlen(p); | |||||
p_name = p; | |||||
c = p_name[0]; | |||||
p += (len+1); | |||||
if(len == 1) | |||||
if(p[0] == RULE_LETTERGP2) | |||||
{ | { | ||||
groups1[c] = p; | |||||
ix = p[1] - 'A'; | |||||
p += 2; | |||||
if((ix >= 0) && (ix < N_LETTER_GROUPS)) | |||||
{ | |||||
letterGroups[ix] = p; | |||||
} | |||||
} | } | ||||
else | else | ||||
if(len == 0) | |||||
{ | { | ||||
groups1[0] = p; | |||||
len = strlen(p); | |||||
p_name = p; | |||||
c = p_name[0]; | |||||
p += (len+1); | |||||
if(len == 1) | |||||
{ | |||||
groups1[c] = p; | |||||
} | |||||
else | |||||
if(len == 0) | |||||
{ | |||||
groups1[0] = p; | |||||
} | |||||
else | |||||
{ | |||||
if(groups2_start[c] == 255) | |||||
groups2_start[c] = n_groups2; | |||||
groups2_count[c]++; | |||||
groups2[n_groups2] = p; | |||||
c2 = p_name[1]; | |||||
groups2_name[n_groups2++] = (c + (c2 << 8)); | |||||
} | |||||
} | } | ||||
else | |||||
{ | |||||
if(groups2_start[c] == 255) | |||||
groups2_start[c] = n_groups2; | |||||
groups2_count[c]++; | |||||
groups2[n_groups2] = p; | |||||
c2 = p_name[1]; | |||||
groups2_name[n_groups2++] = (c + (c2 << 8)); | |||||
} | |||||
// skip over all the rules in this group | // skip over all the rules in this group | ||||
rule_count = 0; | rule_count = 0; | ||||
while(*p != RULE_GROUP_END) | while(*p != RULE_GROUP_END) | ||||
int Translator::IsLetterGroup(char *word, int group) | |||||
{//================================================= | |||||
// match the word against a list of utf-8 strings | |||||
char *p; | |||||
char *w; | |||||
p = letterGroups[group]; | |||||
while(*p != 0) | |||||
{ | |||||
w = word; | |||||
while(*p == *w) | |||||
{ | |||||
*w++; | |||||
*p++; | |||||
} | |||||
if(*p == 0) | |||||
return(w-word); // matched a complete string | |||||
while(*p++ != 0); // skip to end of string | |||||
} | |||||
return(0); | |||||
} | |||||
int Translator::IsLetter(int letter, int group) | int Translator::IsLetter(int letter, int group) | ||||
{//============================================ | {//============================================ | ||||
if(letter_groups[group] != NULL) | if(letter_groups[group] != NULL) | ||||
static char output[60]; | static char output[60]; | ||||
static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ', | static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ', | ||||
'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'}; | |||||
'@','&','%','+','#','S','D','Z','A','L',' ',' ',' ',' ',' ','N','K','V',' ','T','X','?','W'}; | |||||
static char symbols_lg[] = {'A','B','C','H','F','G','Y'}; | |||||
match_type = 0; | match_type = 0; | ||||
buf_pre[0] = 0; | buf_pre[0] = 0; | ||||
} | } | ||||
else | else | ||||
if(rb == RULE_LETTERGP) | if(rb == RULE_LETTERGP) | ||||
{ | |||||
c = symbols_lg[*rule++ - 'A']; | |||||
} | |||||
else | |||||
if(rb == RULE_LETTERGP2) | |||||
{ | { | ||||
value = *rule++ - 'A'; | value = *rule++ - 'A'; | ||||
if(value >= 8) | |||||
{ | |||||
p[0] = 'L'; | |||||
p[1] = (value / 10) + '0'; | |||||
c = (value % 10) + '0'; | |||||
p[0] = 'L'; | |||||
p[1] = (value / 10) + '0'; | |||||
c = (value % 10) + '0'; | |||||
if(match_type == RULE_PRE) | |||||
{ | |||||
p[0] = c; | |||||
c = 'L'; | |||||
} | |||||
p+=2; | |||||
} | |||||
else | |||||
if(match_type == RULE_PRE) | |||||
{ | { | ||||
c = symbols[value + RULE_LETTER_GROUPS]; | |||||
p[0] = c; | |||||
c = 'L'; | |||||
} | } | ||||
p+=2; | |||||
} | } | ||||
else | else | ||||
if(rb <= RULE_LAST_RULE) | if(rb <= RULE_LAST_RULE) | ||||
int distance_right; | int distance_right; | ||||
int distance_left; | int distance_left; | ||||
int lg_pts; | int lg_pts; | ||||
int n_bytes; | |||||
MatchRecord match; | MatchRecord match; | ||||
static MatchRecord best; | static MatchRecord best; | ||||
if(rule == NULL) | if(rule == NULL) | ||||
{ | { | ||||
match_out->points = 0; | match_out->points = 0; | ||||
(*word)++; | |||||
return; | return; | ||||
} | } | ||||
failed = 1; | failed = 1; | ||||
break; | break; | ||||
case RULE_LETTERGP2: // match against a list of utf-t strings | |||||
letter_group = *rule++ - 'A'; | |||||
if((n_bytes = IsLetterGroup(post_ptr-1,letter_group)) >0) | |||||
{ | |||||
match.points += (20-distance_right); | |||||
post_ptr += (n_bytes-1); | |||||
} | |||||
else | |||||
failed =1; | |||||
break; | |||||
case RULE_NOTVOWEL: | case RULE_NOTVOWEL: | ||||
if(!IsLetter(letter_w,0)) | if(!IsLetter(letter_w,0)) | ||||
{ | { |
} | } | ||||
#endif | #endif | ||||
if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY)) | |||||
{ | |||||
// reduction for vowels has already been done in SetWordStress | |||||
int reduce_level; | |||||
if(next->type == phVOWEL) | |||||
{ | |||||
reduce_level = (ph->phflags >> 28) & 7; | |||||
if((&plist2[1])->stress < reduce_level) | |||||
{ | |||||
// look at the stress of the following vowel | |||||
ph = phoneme_tab[ph->reduce_to]; | |||||
} | |||||
} | |||||
} | |||||
if((plist2+1)->synthflags & SFLAG_LENGTHEN) | if((plist2+1)->synthflags & SFLAG_LENGTHEN) | ||||
{ | { | ||||
static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0}; | static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0}; |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.25.13 30.May.07"; | |||||
const int version_phdata = 0x012501; | |||||
const char *version_string = "1.25.14 31.May.07"; | |||||
const int version_phdata = 0x012514; | |||||
int option_device_number = -1; | int option_device_number = -1; | ||||
{ | { | ||||
static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | ||||
static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250}; | static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250}; | ||||
static const wchar_t is_L08[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||||
static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_is,stress_amps_is); | SetupTranslator(tr,stress_lengths_is,stress_amps_is); | ||||
ResetLetterBits(tr,0x18); | ResetLetterBits(tr,0x18); | ||||
SetLetterBits(tr,4,"kpst"); // Letter group F | SetLetterBits(tr,4,"kpst"); // Letter group F | ||||
SetLetterBits(tr,3,"jvr"); // Letter group H | SetLetterBits(tr,3,"jvr"); // Letter group H | ||||
tr->letter_groups[8] = is_L08; | |||||
tr->letter_groups[1] = is_lettergroup_B; | |||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
tr->langopts.numbers = 0xe9; | tr->langopts.numbers = 0xe9; | ||||
tr->langopts.numbers2 = 0x2; | tr->langopts.numbers2 = 0x2; | ||||
static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents) | static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents) | ||||
static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign) | static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign) | ||||
// these are unicode character codes | |||||
static const wchar_t ru_L08[] = {0x43a,0x43f,0x441,0x442,0x444,0x445,0}; // кпстфх | |||||
static const wchar_t ru_L09[] = {0x431,0x433,0x434,0x436,0x43b,0x43c,0x43d,0x440,0}; // бгджзлмнр | |||||
SetupTranslator(this,stress_lengths_ru,stress_amps_ru); | SetupTranslator(this,stress_lengths_ru,stress_amps_ru); | ||||
charset_a0 = charsets[18]; // KOI8-R | charset_a0 = charsets[18]; // KOI8-R | ||||
SetLetterBits(this,6,ru_ivowels); | SetLetterBits(this,6,ru_ivowels); | ||||
SetLetterBits(this,7,ru_vowels); | SetLetterBits(this,7,ru_vowels); | ||||
letter_groups[8] = ru_L08; // This is L08 in ru_rules | |||||
letter_groups[9] = ru_L09; // This is L09 in ru_rules | |||||
langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | ||||
langopts.param[LOPT_REGRESSIVE_VOICING] = 1; | langopts.param[LOPT_REGRESSIVE_VOICING] = 1; | ||||
langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1; | langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1; |
#define N_RULE_GROUP2 120 // max num of two-letter rule chains | #define N_RULE_GROUP2 120 // max num of two-letter rule chains | ||||
#define N_HASH_DICT 1024 | #define N_HASH_DICT 1024 | ||||
#define N_CHARSETS 20 | #define N_CHARSETS 20 | ||||
#define N_LETTER_GROUPS 20 | |||||
/* flags from word dictionary */ | /* flags from word dictionary */ | ||||
// bits 0-3 stressed syllable, 7=unstressed | // bits 0-3 stressed syllable, 7=unstressed | ||||
#define RULE_ENDING 14 | #define RULE_ENDING 14 | ||||
#define RULE_DIGIT 15 // D digit | #define RULE_DIGIT 15 // D digit | ||||
#define RULE_NONALPHA 16 // Z non-alpha | #define RULE_NONALPHA 16 // Z non-alpha | ||||
#define RULE_LETTER_GROUPS 17 // 17 to 23 | |||||
#define RULE_LETTER1 17 // A vowels | |||||
#define RULE_LETTER2 18 // B 'hard' consonants | |||||
#define RULE_LETTER3 19 // C all consonants | |||||
#define RULE_LETTER4 20 // H letter group | |||||
#define RULE_LETTER5 21 // F letter group | |||||
#define RULE_LETTER6 22 // G letter group | |||||
#define RULE_LETTER7 23 // Y letter group | |||||
#define RULE_LETTERGP 17 // A B C H F G Y letter group number | |||||
#define RULE_LETTERGP2 18 // L + letter group number | |||||
#define RULE_NO_SUFFIX 24 // N | #define RULE_NO_SUFFIX 24 // N | ||||
#define RULE_NOTVOWEL 25 // K | #define RULE_NOTVOWEL 25 // K | ||||
#define RULE_IFVERB 26 // V | #define RULE_IFVERB 26 // V | ||||
#define RULE_LETTERGP 27 // L + letter group number | |||||
#define RULE_ALT1 28 // T word has $alt attribute | #define RULE_ALT1 28 // T word has $alt attribute | ||||
#define RULE_NOVOWELS 29 // X no vowels up to word boundary | #define RULE_NOVOWELS 29 // X no vowels up to word boundary | ||||
#define RULE_SPELLING 31 // W while spelling letter-by-letter | #define RULE_SPELLING 31 // W while spelling letter-by-letter | ||||
// holds properties of characters: vowel, consonant, etc for pronunciation rules | // holds properties of characters: vowel, consonant, etc for pronunciation rules | ||||
unsigned char letter_bits[256]; | unsigned char letter_bits[256]; | ||||
int letter_bits_offset; | int letter_bits_offset; | ||||
#define N_LETTER_TYPES 20 | |||||
const wchar_t *letter_groups[N_LETTER_TYPES]; | |||||
const wchar_t *letter_groups[8]; | |||||
/* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */ | /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */ | ||||
unsigned char punct_to_tone[4][5]; | unsigned char punct_to_tone[4][5]; | ||||
void ApplySpecialAttribute(char *phonemes, int dict_flags); | void ApplySpecialAttribute(char *phonemes, int dict_flags); | ||||
int IsLetter(int letter, int group); | int IsLetter(int letter, int group); | ||||
int IsLetterGroup(char *word, int group); | |||||
void CalcPitches_Tone(int clause_tone); | void CalcPitches_Tone(int clause_tone); | ||||
unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter | unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter | ||||
unsigned char groups2_start[256]; // index into groups2 | unsigned char groups2_start[256]; // index into groups2 | ||||
char *letterGroups[N_LETTER_GROUPS]; | |||||
int n_ph_list2; | int n_ph_list2; | ||||
PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes | PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes |
int hash; | int hash; | ||||
char *p; | char *p; | ||||
char *start; | char *start; | ||||
char *group; | |||||
char *next; | char *next; | ||||
unsigned char c; | unsigned char c; | ||||
int count = 0; | int count = 0; | ||||
} | } | ||||
if(*p == RULE_GROUP_START) | if(*p == RULE_GROUP_START) | ||||
{ | { | ||||
group = p; | |||||
if(p[1] == RULE_LETTERGP2) | |||||
{ | |||||
while(*p != RULE_GROUP_END) p++; | |||||
continue; | |||||
} | |||||
p += (strlen(p)+1); | p += (strlen(p)+1); | ||||
} | } | ||||