Languages changes: it, pt. Added experimental suffix option to allow multiple suffixes to be removed from a word (eg. (_S2m ). git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@295 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -1078,11 +1078,10 @@ cortes $2 | |||
cortile $2 | |||
cortisol $3 | |||
cortocircuito kO@-*totSi@-*k'uito | |||
cosa $alt | |||
cos $alt | |||
cosc $alt | |||
coscia $alt | |||
cosen $alt2 | |||
cosi $2 | |||
cosm $alt | |||
cosmic $alt | |||
cosm $alt | |||
@@ -3274,7 +3273,6 @@ rore $alt | |||
ros $alt | |||
rosalia $3 | |||
roseo $1 $alt | |||
rose $2 | |||
rosolia $3 | |||
rospigliosi $alt2 | |||
rosp $alt |
@@ -695,6 +695,7 @@ caractere $alt | |||
careta $alt2 | |||
carreta $alt2 | |||
casebre $alt | |||
cateto $alt2 | |||
catorze $alt2 | |||
cefaleia $alt | |||
cerca $alt $verb | |||
@@ -741,6 +742,7 @@ desfecho $alt2 | |||
desprezo $alt $verb | |||
deva $alt2 | |||
devo $alt2 | |||
deveras $alt | |||
dez $alt | |||
diarreia $alt | |||
discordo $alt | |||
@@ -772,14 +774,17 @@ estiverdes $alt | |||
estiveres $alt | |||
estrofe $alt | |||
etiqueta $alt2 | |||
europa $alt | |||
exagero $alt $verb | |||
expeça $alt2 | |||
expeço $alt2 | |||
farofa $alt | |||
febre $alt | |||
feitora $alt2 | |||
fera $alt | |||
fere $alt | |||
ferem $alt | |||
ferozes $alt | |||
fezes $alt | |||
folga $alt | |||
fogos $alt | |||
@@ -822,6 +827,8 @@ marreta $alt2 | |||
martelo $alt | |||
merda $alt | |||
megera $alt | |||
mentora $alt2 | |||
metro $alt | |||
mexa m'eS& | |||
mexi meS'i | |||
mexo m'eSU | |||
@@ -846,18 +853,20 @@ mutreta $alt2 | |||
naquele $alt2 | |||
negro $alt2 | |||
novos $alt | |||
obstera $alt | |||
obstetra $alt | |||
obsoleta $alt2 | |||
obsoleto $alt2 | |||
odisseia $alt | |||
onu $1 | |||
opereta $alt2 | |||
ordens $alt | |||
osso $alt2 | |||
ovos $alt | |||
padeceste $alt2 | |||
paexa paeSa | |||
pangeia $alt | |||
palheta $alt2 | |||
paralelo $alt | |||
parede $alt2 | |||
pastora $alt2 | |||
pedra $alt | |||
@@ -908,6 +917,7 @@ selo $alt $verb | |||
selvagem seUv'aZeIN | |||
sincera $alt | |||
sincero $alt | |||
sinopse $alt | |||
sobe $alt | |||
sobem $alt | |||
soco $alt2 | |||
@@ -938,14 +948,17 @@ traqueia $alt | |||
travesso $alt2 | |||
trevo $alt2 | |||
troco $alt2 $noun | |||
tropa $alt | |||
tropeço $alt2 $noun | |||
trombeta $alt2 | |||
valeta $alt2 | |||
vanessa $alt2 | |||
velozes $alt | |||
verbo $alt | |||
verme $alt | |||
vierem $alt | |||
violeta $alt2 | |||
voga $alt | |||
vozes $alt | |||
zelo $alt $verb | |||
zero $alt |
@@ -20,6 +20,7 @@ | |||
.L03 a am o | |||
.L04 a am e em o ue uem | |||
.L05 r ra ram re rem rdes | |||
.L06 ra ram ste re rem | |||
.group a | |||
@@ -337,6 +338,9 @@ | |||
?1 v) e (l_ ,E // eg: possível, amovível, disponível, etc... | |||
?1 l) e (ta 'E // eg: bicicleta, atleta. | |||
sf) e (ra_ E // esfera, biosfera | |||
//sort | |||
qu) e (brL04_ E | |||
_hosp) e (dL03_ E | |||
@@ -362,7 +366,7 @@ _comprom) e (tL03_ e | |||
_atr) e (vL03_ e | |||
_escr) e (vL03_ e | |||
_descr) e (vL03_ e | |||
_embel) e (zL01_ e | |||
_embel) e (zL04_ E | |||
_pr) e (zL04_ E | |||
_acont) e (çL01_ e | |||
_ado) e (çL03_ e | |||
@@ -383,6 +387,10 @@ _envaid) e (çL03_ e | |||
_esclar) e (çL03_ e | |||
_reapar) e (çL03_ e | |||
_reconh) e (çL03_ e | |||
_coop) e (rL04_ E | |||
_sup) e (rL04_ E | |||
_imp) e (rL03_ E | |||
_temp) e (rL04_ E | |||
//endsort | |||
_exag) e (L05_ E | |||
@@ -390,6 +398,12 @@ _reconh) e (çL03_ e | |||
_houv) e (L05_ E | |||
_soub) e (L05_ E | |||
_compreend) e (L06_ e | |||
_correspond) e (L06_ e | |||
_entend) e (L06_ e | |||
_estend) e (L06_ e | |||
_respond)e (L06_ e | |||
_vend) e (L06_ e | |||
.group é | |||
é ''E | |||
@@ -687,6 +701,7 @@ _reconh) e (çL03_ e | |||
c) o (rrL01_ o // escorra, incorra, corra, recorra | |||
_m) o (rrL01_ o | |||
p) ostos (_ Ost=Us# // dispostos etc. | |||
_esn) o (bL04_ O | |||
//endsort | |||
.group ô | |||
@@ -824,6 +839,7 @@ _reconh) e (çL03_ e | |||
r) u (_A u | |||
u (A_ 'u | |||
u (em_ 'u | |||
ui uI | |||
u (iu w | |||
u (iCK u |
@@ -1831,7 +1831,7 @@ _அகாலி_) த (ள dV | |||
_) த (ீரரை d | |||
_) த (ீர்க d | |||
_) த (ீர்க்க t | |||
_) தீர்க்க (_சுமங்கலி dirggV | |||
_) தீர்க்க (_சுமங்கலி di:rggV | |||
_) தீர்க்க (தரிச di:rggV | |||
_) தீர்க்க (த்துடன di:rggV# | |||
_) த (ீர்க்கமா d |
@@ -266,7 +266,7 @@ char *DecodeRule(const char *group_chars, int group_length, char *rule, int cont | |||
} | |||
continue; | |||
} | |||
if(rb == RULE_DOLLAR) | |||
{ | |||
value = *rule++ & 0xff; | |||
@@ -380,13 +380,13 @@ static int compile_line(char *linebuf, char *dict_line, int *hash) | |||
int multiple_numeric_hyphen = 0; | |||
char *multiple_string = NULL; | |||
char *multiple_string_end = NULL; | |||
int len_word; | |||
int len_phonetic; | |||
int text_not_phonemes; // this word specifies replacement text, not phonemes | |||
unsigned int wc; | |||
int all_upper_case; | |||
char *mnemptr; | |||
unsigned char flag_codes[100]; | |||
char encoded_ph[200]; | |||
@@ -416,12 +416,12 @@ static char nullstring[] = {0}; | |||
#endif | |||
step = 0; | |||
c = 0; | |||
while(c != '\n') | |||
{ | |||
c = *p; | |||
if((c == '?') && (step==0)) | |||
{ | |||
// conditional rule, allow only if the numbered condition is set for the voice | |||
@@ -449,14 +449,14 @@ static char nullstring[] = {0}; | |||
flag_codes[n_flag_codes++] = ix + flag_offset; | |||
c = *p; | |||
} | |||
if((c == '$') && isalnum(p[1])) | |||
{ | |||
/* read keyword parameter */ | |||
mnemptr = p; | |||
while(!isspace2(c = *p)) p++; | |||
*p = 0; | |||
flagnum = LookupMnem(mnem_flags,mnemptr); | |||
if(flagnum > 0) | |||
{ | |||
@@ -485,12 +485,12 @@ static char nullstring[] = {0}; | |||
error_count++; | |||
} | |||
} | |||
if((c == '/') && (p[1] == '/') && (multiple_words==0)) | |||
{ | |||
c = '\n'; /* "//" treat comment as end of line */ | |||
} | |||
switch(step) | |||
{ | |||
case 0: | |||
@@ -507,7 +507,7 @@ static char nullstring[] = {0}; | |||
step = 1; | |||
} | |||
break; | |||
case 1: | |||
if((c == '-') && multiple_words) | |||
{ | |||
@@ -567,7 +567,7 @@ static char nullstring[] = {0}; | |||
step = 3; | |||
} | |||
break; | |||
case 3: | |||
if(!isspace2(c)) | |||
{ | |||
@@ -575,7 +575,7 @@ static char nullstring[] = {0}; | |||
step = 4; | |||
} | |||
break; | |||
case 4: | |||
if(isspace2(c)) | |||
{ | |||
@@ -583,13 +583,13 @@ static char nullstring[] = {0}; | |||
step = 5; | |||
} | |||
break; | |||
case 5: | |||
break; | |||
} | |||
p++; | |||
} | |||
if(word[0] == 0) | |||
{ | |||
return(0); /* blank line */ | |||
@@ -698,7 +698,7 @@ static char nullstring[] = {0}; | |||
*hash = HashDictionary(word); | |||
len_phonetic = strlen(encoded_ph); | |||
dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed | |||
len_word &= 0x3f; | |||
@@ -715,7 +715,7 @@ static char nullstring[] = {0}; | |||
length = len_word + len_phonetic + 3; | |||
strcpy(&dict_line[(len_word)+2],encoded_ph); | |||
} | |||
for(ix=0; ix<n_flag_codes; ix++) | |||
{ | |||
dict_line[ix+length] = flag_codes[ix]; | |||
@@ -790,12 +790,12 @@ static void compile_dictlist_end(FILE *f_out) | |||
fflush(f_log); | |||
#endif | |||
} | |||
for(hash=0; hash<N_HASH_DICT; hash++) | |||
{ | |||
p = hash_chains[hash]; | |||
hash_counts[hash] = (int)ftell(f_out); | |||
while(p != NULL) | |||
{ | |||
length = *(p+sizeof(char *)); | |||
@@ -818,7 +818,7 @@ static int compile_dictlist_file(const char *path, const char* filename) | |||
char buf[200]; | |||
char fname[sizeof(path_home)+45]; | |||
char dict_line[128]; | |||
text_mode = 0; | |||
// try with and without '.txt' extension | |||
@@ -833,7 +833,7 @@ static int compile_dictlist_file(const char *path, const char* filename) | |||
fprintf(f_log,"Compiling: '%s'\n",fname); | |||
linenum=0; | |||
while(fgets(buf,sizeof(buf),f_in) != NULL) | |||
{ | |||
linenum++; | |||
@@ -842,7 +842,7 @@ static int compile_dictlist_file(const char *path, const char* filename) | |||
if(length == 0) continue; /* blank line */ | |||
hash_counts[hash]++; | |||
p = (char *)malloc(length+sizeof(char *)); | |||
if(p == NULL) | |||
{ | |||
@@ -853,13 +853,13 @@ static int compile_dictlist_file(const char *path, const char* filename) | |||
} | |||
break; | |||
} | |||
memcpy(p,&hash_chains[hash],sizeof(char *)); | |||
hash_chains[hash] = p; | |||
memcpy(p+sizeof(char *),dict_line,length); | |||
count++; | |||
} | |||
fprintf(f_log,"\t%d entries\n",count); | |||
fclose(f_in); | |||
return(0); | |||
@@ -920,7 +920,7 @@ static void copy_rule_string(char *string, int &state) | |||
output = &rule_phonemes[len]; | |||
} | |||
sxflags = 0x808000; // to ensure non-zero bytes | |||
for(p=string,ix=0;;) | |||
{ | |||
literal = 0; | |||
@@ -1126,6 +1126,9 @@ static void copy_rule_string(char *string, int &state) | |||
case 'a': | |||
sxflags |= SUFX_A; | |||
break; | |||
case 'm': | |||
sxflags |= SUFX_M; | |||
break; | |||
default: | |||
if(isdigit(c)) | |||
value = (value*10) + (c - '0'); | |||
@@ -1173,7 +1176,7 @@ static char *compile_rule(char *input) | |||
rule_phonemes[0]=0; | |||
p = buf; | |||
for(ix=0; finish==0; ix++) | |||
{ | |||
c = input[ix]; | |||
@@ -1186,7 +1189,7 @@ static char *compile_rule(char *input) | |||
copy_rule_string(buf,state); | |||
p = buf; | |||
break; | |||
case '(': // start of suffix section | |||
*p = 0; | |||
state = 2; | |||
@@ -1199,7 +1202,7 @@ static char *compile_rule(char *input) | |||
error_count++; | |||
} | |||
break; | |||
case '\n': // end of line | |||
case '\r': | |||
case 0: // end of line | |||
@@ -1207,14 +1210,14 @@ static char *compile_rule(char *input) | |||
copy_rule_string(buf,state); | |||
finish=1; | |||
break; | |||
case '\t': // end of section section | |||
case ' ': | |||
*p = 0; | |||
copy_rule_string(buf,state); | |||
p = buf; | |||
break; | |||
case '?': | |||
if(state==2) | |||
state=0; | |||
@@ -1227,7 +1230,7 @@ static char *compile_rule(char *input) | |||
break; | |||
} | |||
} | |||
if(strcmp(rule_match,"$group")==0) | |||
strcpy(rule_match,group_name); | |||
@@ -1249,7 +1252,7 @@ static char *compile_rule(char *input) | |||
} | |||
strcpy(output,buf); | |||
len = strlen(buf)+1; | |||
len_name = strlen(group_name); | |||
if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0)) | |||
{ | |||
@@ -1382,7 +1385,7 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name) | |||
len1 = strlen(p) + 1; | |||
p = &p[len1]; | |||
len2 = strlen(p); | |||
rule_match[0]=0; | |||
rule_pre[0]=0; | |||
rule_post[0]=0; | |||
@@ -1420,7 +1423,7 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name) | |||
} | |||
} | |||
*pout = 0; | |||
spaces = 12; | |||
if(condition > 0) | |||
{ | |||
@@ -1441,10 +1444,10 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name) | |||
fprintf(f_out,"%s",buf); | |||
spaces = 0; | |||
} | |||
for(ix=0; ix<spaces; ix++) | |||
fputc(' ',f_out); | |||
spaces = 14; | |||
sprintf(buf," %s ",rule_match); | |||
if(rule_post[0] != 0) | |||
@@ -1633,7 +1636,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
int n_rgroups = 0; | |||
int n_groups3 = 0; | |||
RGROUP rgroup[N_RULE_GROUP2]; | |||
linenum = 0; | |||
group_name[0] = 0; | |||
@@ -1649,7 +1652,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
if((p = (unsigned char *)strstr(buf,"//")) != NULL) | |||
*p = 0; | |||
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||
} | |||
if((buf == NULL) || (buf[0] == '.')) | |||
@@ -1711,7 +1714,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
{ | |||
// group character is given as a character code (max 16 bits) | |||
p = (unsigned char *)group_name; | |||
if(char_code > 0x100) | |||
{ | |||
*p++ = (char_code >> 8); | |||
@@ -1730,7 +1733,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
} | |||
} | |||
} | |||
if((group3_ix == 0) && (strlen(group_name) > 2)) | |||
{ | |||
if(utf8_in(&c,group_name) < 2) | |||
@@ -1738,14 +1741,14 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||
error_count++; | |||
} | |||
group_name[2] = 0; | |||
} | |||
} | |||
continue; | |||
} | |||
switch(compile_mode) | |||
{ | |||
case 1: // .group | |||
@@ -1915,10 +1918,10 @@ int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, cha | |||
compile_dictlist_file(path,"list"); | |||
} | |||
compile_dictlist_file(path,"extra"); | |||
compile_dictlist_end(f_out); | |||
offset_rules = ftell(f_out); | |||
fprintf(f_log,"Compiling: '%s'\n",fname_in); | |||
compile_dictrules(f_in,f_out,fname_temp); |
@@ -1452,14 +1452,14 @@ void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, | |||
{ | |||
if(vowel_stress[ix] == 4) | |||
vowel_stress[ix] = 3; // change marked stress (consonant clusters) to secondary (except the last) | |||
if(vowel_length[ix] > 0) | |||
{ | |||
long_vowel = ix; | |||
vowel_stress[ix] = 3; // give secondary stress to all long vowels | |||
} | |||
} | |||
// 'stressed_syllable' gives the last marked stress | |||
if(stressed_syllable == 0) | |||
{ | |||
@@ -3497,8 +3497,12 @@ int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy) | |||
*word_end = 'e'; | |||
} | |||
i = word_end - word; | |||
memcpy(word_copy,word,i); | |||
word_copy[i] = 0; | |||
if(word_copy != NULL) | |||
{ | |||
memcpy(word_copy,word,i); | |||
word_copy[i] = 0; | |||
} | |||
// look for multibyte characters to increase the number of bytes to remove | |||
for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix |
@@ -548,7 +548,7 @@ int utf8_nbytes(const char *buf) | |||
int utf8_in2(int *c, const char *buf, int backwards) | |||
{//================================================= | |||
// Read a unicode characater from a UTF8 string | |||
// Read a unicode characater from a UTF8 string | |||
// Returns the number of UTF8 bytes used. | |||
// backwards: set if we are moving backwards through the UTF8 string | |||
int c1; | |||
@@ -592,7 +592,7 @@ int utf8_in2(int *c, const char *buf, int backwards) | |||
int utf8_in(int *c, const char *buf) | |||
{//================================= | |||
// Read a unicode characater from a UTF8 string | |||
// Read a unicode characater from a UTF8 string | |||
// Returns the number of UTF8 bytes used. | |||
return(utf8_in2(c,buf,0)); | |||
} | |||
@@ -819,24 +819,28 @@ int TranslateWord(Translator *tr, char *word_start, int next_pause, WORD_TAB *wt | |||
unsigned int dictionary_flags[2]; | |||
unsigned int dictionary_flags2[2]; | |||
int end_type=0; | |||
int end_type1=0; | |||
int prefix_type=0; | |||
int prefix_stress; | |||
char *wordx; | |||
char phonemes[N_WORD_PHONEMES]; | |||
char phonemes2[N_WORD_PHONEMES]; | |||
char prefix_phonemes[N_WORD_PHONEMES]; | |||
char unpron_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
char word_copy[N_WORD_BYTES]; | |||
char word_copy2[N_WORD_BYTES]; | |||
int word_copy_length; | |||
char prefix_chars[0x3f + 2]; | |||
char prefix_chars[0x3f + 2]; | |||
int found=0; | |||
int end_flags; | |||
int end_flags; | |||
char c_temp; // save a character byte while we temporarily replace it with space | |||
int first_char; | |||
int last_char = 0; | |||
int add_plural_suffix = 0; | |||
int prefix_flags = 0; | |||
int more_suffixes; | |||
int confirm_prefix; | |||
int spell_word; | |||
int stress_bits; | |||
@@ -996,7 +1000,7 @@ if((wmark > 0) && (wmark < 8)) | |||
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters | |||
spell_word = 1; | |||
} | |||
if(!found && iswdigit(first_char)) | |||
{ | |||
Lookup(tr,"_0lang",word_phonemes); | |||
@@ -1157,7 +1161,6 @@ if((wmark > 0) && (wmark < 8)) | |||
if(confirm_prefix && !(end_type & SUFX_B)) | |||
{ | |||
int end2; | |||
char phonemes2[N_WORD_PHONEMES]; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
// remove any standard suffix and confirm that the prefix is still recognised | |||
@@ -1212,7 +1215,7 @@ if((wmark > 0) && (wmark < 8)) | |||
for(ix=0; ix < n_chars; ix++) // num. of bytes to remove | |||
{ | |||
prefix_chars[pfix++] = *wordx++; | |||
if((prefix_type & SUFX_B) && (ix == (n_chars-1))) | |||
{ | |||
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character | |||
@@ -1276,93 +1279,121 @@ if((wmark > 0) && (wmark < 8)) | |||
} | |||
} | |||
if((end_type != 0) && !(end_type & SUFX_P)) | |||
{ | |||
char phonemes2[N_WORD_PHONEMES]; | |||
strcpy(phonemes2,phonemes); | |||
end_type1 = end_type; | |||
strcpy(phonemes2,phonemes); | |||
// The word has a standard ending, re-translate without this ending | |||
end_flags = RemoveEnding(tr, wordx, end_type, word_copy); | |||
phonemes[0] = 0; | |||
if(prefix_phonemes[0] != 0) | |||
{ | |||
// lookup the stem without the prefix removed | |||
wordx[-1] = c_temp; | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix | |||
wordx[-1] = ' '; | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
if(dictionary_flags[0]==0) | |||
{ | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
if(found) | |||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
if((found==0) && (dictionary_flags2[0] != 0)) | |||
prefix_flags = 1; | |||
} | |||
if(found == 0) | |||
{ | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
if(dictionary_flags2[0] & FLAG_ABBREV) | |||
{ | |||
// Removing the suffix leaves a word which should be spoken as individual letters | |||
// Not yet implemented | |||
} | |||
if(dictionary_flags[0]==0) | |||
{ | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
} | |||
if(found == 0) | |||
{ | |||
if(end_type & SUFX_Q) | |||
{ | |||
// don't retranslate, use the original lookup result | |||
strcpy(phonemes,phonemes2); | |||
// language specific changes | |||
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]); | |||
} | |||
else | |||
{ | |||
if(end_flags & FLAG_SUFX) | |||
wflags |= FLAG_SUFFIX_REMOVED; | |||
if(end_type & SUFX_A) | |||
wflags |= FLAG_SUFFIX_VOWEL; | |||
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes,phonemes); | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
return(0); | |||
} | |||
} | |||
} | |||
if((end_type & SUFX_T) == 0) | |||
more_suffixes = 1; | |||
while(more_suffixes) | |||
{ | |||
more_suffixes = 0; | |||
phonemes[0] = 0; | |||
if(prefix_phonemes[0] != 0) | |||
{ | |||
// lookup the stem without the prefix removed | |||
wordx[-1] = c_temp; | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix | |||
wordx[-1] = ' '; | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
if(dictionary_flags[0]==0) | |||
{ | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
if(found) | |||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
if((found==0) && (dictionary_flags2[0] != 0)) | |||
prefix_flags = 1; | |||
} | |||
if(found == 0) | |||
{ | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
if(dictionary_flags2[0] & FLAG_ABBREV) | |||
{ | |||
// Removing the suffix leaves a word which should be spoken as individual letters | |||
// Not yet implemented | |||
} | |||
if(dictionary_flags[0]==0) | |||
{ | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
} | |||
if(found == 0) | |||
{ | |||
if(end_type & SUFX_Q) | |||
{ | |||
// don't retranslate, use the original lookup result | |||
strcpy(phonemes,phonemes2); | |||
// language specific changes | |||
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]); | |||
} | |||
else | |||
{ | |||
if(end_flags & FLAG_SUFX) | |||
wflags |= FLAG_SUFFIX_REMOVED; | |||
if(end_type & SUFX_A) | |||
wflags |= FLAG_SUFFIX_VOWEL; | |||
if(end_type & SUFX_M) | |||
{ | |||
// allow more suffixes before this suffix | |||
strcpy(end_phonemes2, end_phonemes); | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one | |||
if((end_type != 0) && !(end_type & SUFX_P)) | |||
{ | |||
// there is another suffix | |||
end_flags = RemoveEnding(tr, wordx, end_type, NULL); | |||
more_suffixes = 1; | |||
} | |||
} | |||
else | |||
{ | |||
// don't remove any previous suffix | |||
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags); | |||
end_type = 0; | |||
} | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes,phonemes); | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
return(0); | |||
} | |||
} | |||
} | |||
} | |||
if((end_type1 & SUFX_T) == 0) | |||
{ | |||
// the default is to add the suffix and then determine the word's stress pattern | |||
AppendPhonemes(tr,phonemes, N_WORD_PHONEMES, end_phonemes); | |||
@@ -1370,6 +1401,10 @@ if(dictionary_flags2[0] & FLAG_ABBREV) | |||
} | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
} | |||
wordx[-1] = c_temp; | |||
} | |||
} | |||
@@ -1511,7 +1546,7 @@ if(dictionary_flags2[0] & FLAG_ABBREV) | |||
// dictionary flags for this word give a clue about which alternative pronunciations of | |||
// following words to use. | |||
if(end_type & SUFX_F) | |||
if(end_type1 & SUFX_F) | |||
{ | |||
// expect a verb form, with or without -s suffix | |||
tr->expect_verb = 2; | |||
@@ -1859,7 +1894,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
ok = 0; | |||
} | |||
} | |||
if((sylimit & 0x200) && ((wtab+1)->flags & FLAG_LAST_WORD)) | |||
{ | |||
// not if the next word is end-of-sentence | |||
@@ -2079,7 +2114,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
else | |||
if(ph_code == phonX1) | |||
{ | |||
// a language specific action | |||
// a language specific action | |||
if(tr->langopts.param[LOPT_IT_DOUBLING]) | |||
{ | |||
flags |= FLAG_DOUBLING; | |||
@@ -2125,7 +2160,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
{ | |||
if(first_phoneme && tr->langopts.param[LOPT_IT_DOUBLING]) | |||
{ | |||
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) || | |||
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) || | |||
(tr->end_stressed_vowel && (tr->langopts.param[LOPT_IT_DOUBLING] & 2))) | |||
{ | |||
// italian, double the initial consonant if the previous word ends with a | |||
@@ -2690,7 +2725,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
} | |||
#endif | |||
if((c == 0x92) || (c == 0xb4) || (c == 0x2019) || (c == 0x2032)) | |||
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe | |||
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe | |||
if(((c == 0x2018) || (c == '?')) && IsAlpha(prev_out) && IsAlpha(next_in)) | |||
{ | |||
@@ -2755,7 +2790,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
{ | |||
if((next_in == '\002') || ((next_in == '[') && option_phoneme_input)) | |||
{ | |||
// "[\002" is used internally to start phoneme mode | |||
// "[\002" is used internally to start phoneme mode | |||
phoneme_mode = FLAG_PHONEMES; | |||
source_index++; | |||
continue; | |||
@@ -2874,7 +2909,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
if(!IsSpace(prev_in) && IsAlpha(next_in)) | |||
{ | |||
if(prev_out != ' ') | |||
{ | |||
{ | |||
// previous 'word' not yet ended (not alpha or numeric), start new word now. | |||
c = ' '; | |||
space_inserted = 1; | |||
@@ -3110,7 +3145,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
if((word_count==0) && (embedded_count > 0)) | |||
{ | |||
// add a null 'word' to carry the embedded command flag | |||
embedded_list[embedded_ix-1] |= 0x80; | |||
embedded_list[embedded_ix-1] |= 0x80; | |||
words[word_count].flags |= FLAG_EMBEDDED; | |||
word_count = 1; | |||
} | |||
@@ -3126,7 +3161,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
ix--; // the last word is a bracket, mark the previous word as last | |||
words[ix].flags |= FLAG_LAST_WORD; | |||
// FLAG_NOSPACE check to avoid recognizing .mr -mr | |||
// FLAG_NOSPACE check to avoid recognizing .mr -mr | |||
if((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE)) | |||
words[word_count-1].flags |= FLAG_HAS_DOT; | |||
} |
@@ -129,6 +129,7 @@ | |||
#define SUFX_T 0x10000 // don't affect the stress position in the stem | |||
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P) | |||
#define SUFX_A 0x40000 // remember that the suffix starts with a vowel | |||
#define SUFX_M 0x80000 // bit 19, allow multiple suffixes | |||
#define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules | |||