Browse Source

[1.46.29]

Languages changes: it, pt.

Added experimental suffix option to allow multiple suffixes to be removed from a word (eg. (_S2m ).


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@295 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 12 years ago
parent
commit
de97085d1e
8 changed files with 221 additions and 151 deletions
  1. 1
    3
      dictsource/it_listx
  2. 14
    1
      dictsource/pt_list
  3. 17
    1
      dictsource/pt_rules
  4. 1
    1
      dictsource/ta_rules
  5. 46
    43
      src/compiledict.cpp
  6. 8
    4
      src/dictionary.cpp
  7. 133
    98
      src/translate.cpp
  8. 1
    0
      src/translate.h

+ 1
- 3
dictsource/it_listx View File

cortile $2 cortile $2
cortisol $3 cortisol $3
cortocircuito kO@-*totSi@-*k'uito cortocircuito kO@-*totSi@-*k'uito
cosa $alt
cos $alt
cosc $alt cosc $alt
coscia $alt coscia $alt
cosen $alt2 cosen $alt2
cosi $2
cosm $alt cosm $alt
cosmic $alt cosmic $alt
cosm $alt cosm $alt
ros $alt ros $alt
rosalia $3 rosalia $3
roseo $1 $alt roseo $1 $alt
rose $2
rosolia $3 rosolia $3
rospigliosi $alt2 rospigliosi $alt2
rosp $alt rosp $alt

+ 14
- 1
dictsource/pt_list View File

careta $alt2 careta $alt2
carreta $alt2 carreta $alt2
casebre $alt casebre $alt
cateto $alt2
catorze $alt2 catorze $alt2
cefaleia $alt cefaleia $alt
cerca $alt $verb cerca $alt $verb
desprezo $alt $verb desprezo $alt $verb
deva $alt2 deva $alt2
devo $alt2 devo $alt2
deveras $alt
dez $alt dez $alt
diarreia $alt diarreia $alt
discordo $alt discordo $alt
estiveres $alt estiveres $alt
estrofe $alt estrofe $alt
etiqueta $alt2 etiqueta $alt2
europa $alt
exagero $alt $verb exagero $alt $verb
expeça $alt2 expeça $alt2
expeço $alt2 expeço $alt2
farofa $alt farofa $alt
febre $alt febre $alt
feitora $alt2
fera $alt fera $alt
fere $alt fere $alt
ferem $alt ferem $alt
ferozes $alt
fezes $alt fezes $alt
folga $alt folga $alt
fogos $alt fogos $alt
martelo $alt martelo $alt
merda $alt merda $alt
megera $alt megera $alt
mentora $alt2
metro $alt
mexa m'eS& mexa m'eS&
mexi meS'i mexi meS'i
mexo m'eSU mexo m'eSU
naquele $alt2 naquele $alt2
negro $alt2 negro $alt2
novos $alt novos $alt
obstera $alt
obstetra $alt
obsoleta $alt2 obsoleta $alt2
obsoleto $alt2 obsoleto $alt2
odisseia $alt odisseia $alt
onu $1 onu $1
opereta $alt2 opereta $alt2
ordens $alt
osso $alt2 osso $alt2
ovos $alt ovos $alt
padeceste $alt2 padeceste $alt2
paexa paeSa paexa paeSa
pangeia $alt pangeia $alt
palheta $alt2 palheta $alt2
paralelo $alt
parede $alt2 parede $alt2
pastora $alt2 pastora $alt2
pedra $alt pedra $alt
selvagem seUv'aZeIN selvagem seUv'aZeIN
sincera $alt sincera $alt
sincero $alt sincero $alt
sinopse $alt
sobe $alt sobe $alt
sobem $alt sobem $alt
soco $alt2 soco $alt2
travesso $alt2 travesso $alt2
trevo $alt2 trevo $alt2
troco $alt2 $noun troco $alt2 $noun
tropa $alt
tropeço $alt2 $noun tropeço $alt2 $noun
trombeta $alt2 trombeta $alt2
valeta $alt2 valeta $alt2
vanessa $alt2 vanessa $alt2
velozes $alt
verbo $alt verbo $alt
verme $alt verme $alt
vierem $alt vierem $alt
violeta $alt2 violeta $alt2
voga $alt
vozes $alt vozes $alt
zelo $alt $verb zelo $alt $verb
zero $alt zero $alt

+ 17
- 1
dictsource/pt_rules View File

.L03 a am o .L03 a am o
.L04 a am e em o ue uem .L04 a am e em o ue uem
.L05 r ra ram re rem rdes .L05 r ra ram re rem rdes
.L06 ra ram ste re rem




.group a .group a
?1 v) e (l_ ,E // eg: possível, amovível, disponível, etc... ?1 v) e (l_ ,E // eg: possível, amovível, disponível, etc...
?1 l) e (ta 'E // eg: bicicleta, atleta. ?1 l) e (ta 'E // eg: bicicleta, atleta.



sf) e (ra_ E // esfera, biosfera
//sort //sort
qu) e (brL04_ E qu) e (brL04_ E
_hosp) e (dL03_ E _hosp) e (dL03_ E
_atr) e (vL03_ e _atr) e (vL03_ e
_escr) e (vL03_ e _escr) e (vL03_ e
_descr) e (vL03_ e _descr) e (vL03_ e
_embel) e (zL01_ e
_embel) e (zL04_ E
_pr) e (zL04_ E _pr) e (zL04_ E
_acont) e (çL01_ e _acont) e (çL01_ e
_ado) e (çL03_ e _ado) e (çL03_ e
_esclar) e (çL03_ e _esclar) e (çL03_ e
_reapar) e (çL03_ e _reapar) e (çL03_ e
_reconh) e (çL03_ e _reconh) e (çL03_ e
_coop) e (rL04_ E
_sup) e (rL04_ E
_imp) e (rL03_ E
_temp) e (rL04_ E
//endsort //endsort


_exag) e (L05_ E _exag) e (L05_ E
_houv) e (L05_ E _houv) e (L05_ E
_soub) e (L05_ E _soub) e (L05_ E


_compreend) e (L06_ e
_correspond) e (L06_ e
_entend) e (L06_ e
_estend) e (L06_ e
_respond)e (L06_ e
_vend) e (L06_ e


.group é .group é
é ''E é ''E
c) o (rrL01_ o // escorra, incorra, corra, recorra c) o (rrL01_ o // escorra, incorra, corra, recorra
_m) o (rrL01_ o _m) o (rrL01_ o
p) ostos (_ Ost=Us# // dispostos etc. p) ostos (_ Ost=Us# // dispostos etc.
_esn) o (bL04_ O
//endsort //endsort


.group ô .group ô
r) u (_A u r) u (_A u
u (A_ 'u u (A_ 'u


u (em_ 'u
ui uI ui uI
u (iu w u (iu w
u (iCK u u (iCK u

+ 1
- 1
dictsource/ta_rules View File

_) த (ீரரை d _) த (ீரரை d
_) த (ீர்க d _) த (ீர்க d
_) த (ீர்க்க t _) த (ீர்க்க t
_) தீர்க்க (_சுமங்கலி dirggV
_) தீர்க்க (_சுமங்கலி di:rggV
_) தீர்க்க (தரிச di:rggV _) தீர்க்க (தரிச di:rggV
_) தீர்க்க (த்துடன di:rggV# _) தீர்க்க (த்துடன di:rggV#
_) த (ீர்க்கமா d _) த (ீர்க்கமா d

+ 46
- 43
src/compiledict.cpp View File

} }
continue; continue;
} }
if(rb == RULE_DOLLAR) if(rb == RULE_DOLLAR)
{ {
value = *rule++ & 0xff; value = *rule++ & 0xff;
int multiple_numeric_hyphen = 0; int multiple_numeric_hyphen = 0;
char *multiple_string = NULL; char *multiple_string = NULL;
char *multiple_string_end = NULL; char *multiple_string_end = NULL;
int len_word; int len_word;
int len_phonetic; int len_phonetic;
int text_not_phonemes; // this word specifies replacement text, not phonemes int text_not_phonemes; // this word specifies replacement text, not phonemes
unsigned int wc; unsigned int wc;
int all_upper_case; int all_upper_case;
char *mnemptr; char *mnemptr;
unsigned char flag_codes[100]; unsigned char flag_codes[100];
char encoded_ph[200]; char encoded_ph[200];
#endif #endif


step = 0; step = 0;
c = 0; c = 0;
while(c != '\n') while(c != '\n')
{ {
c = *p; c = *p;
if((c == '?') && (step==0)) if((c == '?') && (step==0))
{ {
// conditional rule, allow only if the numbered condition is set for the voice // conditional rule, allow only if the numbered condition is set for the voice
flag_codes[n_flag_codes++] = ix + flag_offset; flag_codes[n_flag_codes++] = ix + flag_offset;
c = *p; c = *p;
} }
if((c == '$') && isalnum(p[1])) if((c == '$') && isalnum(p[1]))
{ {
/* read keyword parameter */ /* read keyword parameter */
mnemptr = p; mnemptr = p;
while(!isspace2(c = *p)) p++; while(!isspace2(c = *p)) p++;
*p = 0; *p = 0;
flagnum = LookupMnem(mnem_flags,mnemptr); flagnum = LookupMnem(mnem_flags,mnemptr);
if(flagnum > 0) if(flagnum > 0)
{ {
error_count++; error_count++;
} }
} }
if((c == '/') && (p[1] == '/') && (multiple_words==0)) if((c == '/') && (p[1] == '/') && (multiple_words==0))
{ {
c = '\n'; /* "//" treat comment as end of line */ c = '\n'; /* "//" treat comment as end of line */
} }
switch(step) switch(step)
{ {
case 0: case 0:
step = 1; step = 1;
} }
break; break;
case 1: case 1:
if((c == '-') && multiple_words) if((c == '-') && multiple_words)
{ {
step = 3; step = 3;
} }
break; break;
case 3: case 3:
if(!isspace2(c)) if(!isspace2(c))
{ {
step = 4; step = 4;
} }
break; break;
case 4: case 4:
if(isspace2(c)) if(isspace2(c))
{ {
step = 5; step = 5;
} }
break; break;
case 5: case 5:
break; break;
} }
p++; p++;
} }
if(word[0] == 0) if(word[0] == 0)
{ {
return(0); /* blank line */ return(0); /* blank line */


*hash = HashDictionary(word); *hash = HashDictionary(word);
len_phonetic = strlen(encoded_ph); len_phonetic = strlen(encoded_ph);
dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed
len_word &= 0x3f; len_word &= 0x3f;


length = len_word + len_phonetic + 3; length = len_word + len_phonetic + 3;
strcpy(&dict_line[(len_word)+2],encoded_ph); strcpy(&dict_line[(len_word)+2],encoded_ph);
} }
for(ix=0; ix<n_flag_codes; ix++) for(ix=0; ix<n_flag_codes; ix++)
{ {
dict_line[ix+length] = flag_codes[ix]; dict_line[ix+length] = flag_codes[ix];
fflush(f_log); fflush(f_log);
#endif #endif
} }
for(hash=0; hash<N_HASH_DICT; hash++) for(hash=0; hash<N_HASH_DICT; hash++)
{ {
p = hash_chains[hash]; p = hash_chains[hash];
hash_counts[hash] = (int)ftell(f_out); hash_counts[hash] = (int)ftell(f_out);
while(p != NULL) while(p != NULL)
{ {
length = *(p+sizeof(char *)); length = *(p+sizeof(char *));
char buf[200]; char buf[200];
char fname[sizeof(path_home)+45]; char fname[sizeof(path_home)+45];
char dict_line[128]; char dict_line[128];
text_mode = 0; text_mode = 0;


// try with and without '.txt' extension // try with and without '.txt' extension
fprintf(f_log,"Compiling: '%s'\n",fname); fprintf(f_log,"Compiling: '%s'\n",fname);


linenum=0; linenum=0;
while(fgets(buf,sizeof(buf),f_in) != NULL) while(fgets(buf,sizeof(buf),f_in) != NULL)
{ {
linenum++; linenum++;
if(length == 0) continue; /* blank line */ if(length == 0) continue; /* blank line */


hash_counts[hash]++; hash_counts[hash]++;
p = (char *)malloc(length+sizeof(char *)); p = (char *)malloc(length+sizeof(char *));
if(p == NULL) if(p == NULL)
{ {
} }
break; break;
} }
memcpy(p,&hash_chains[hash],sizeof(char *)); memcpy(p,&hash_chains[hash],sizeof(char *));
hash_chains[hash] = p; hash_chains[hash] = p;
memcpy(p+sizeof(char *),dict_line,length); memcpy(p+sizeof(char *),dict_line,length);
count++; count++;
} }
fprintf(f_log,"\t%d entries\n",count); fprintf(f_log,"\t%d entries\n",count);
fclose(f_in); fclose(f_in);
return(0); return(0);
output = &rule_phonemes[len]; output = &rule_phonemes[len];
} }
sxflags = 0x808000; // to ensure non-zero bytes sxflags = 0x808000; // to ensure non-zero bytes
for(p=string,ix=0;;) for(p=string,ix=0;;)
{ {
literal = 0; literal = 0;
case 'a': case 'a':
sxflags |= SUFX_A; sxflags |= SUFX_A;
break; break;
case 'm':
sxflags |= SUFX_M;
break;
default: default:
if(isdigit(c)) if(isdigit(c))
value = (value*10) + (c - '0'); value = (value*10) + (c - '0');
rule_phonemes[0]=0; rule_phonemes[0]=0;


p = buf; p = buf;
for(ix=0; finish==0; ix++) for(ix=0; finish==0; ix++)
{ {
c = input[ix]; c = input[ix];
copy_rule_string(buf,state); copy_rule_string(buf,state);
p = buf; p = buf;
break; break;
case '(': // start of suffix section case '(': // start of suffix section
*p = 0; *p = 0;
state = 2; state = 2;
error_count++; error_count++;
} }
break; break;
case '\n': // end of line case '\n': // end of line
case '\r': case '\r':
case 0: // end of line case 0: // end of line
copy_rule_string(buf,state); copy_rule_string(buf,state);
finish=1; finish=1;
break; break;
case '\t': // end of section section case '\t': // end of section section
case ' ': case ' ':
*p = 0; *p = 0;
copy_rule_string(buf,state); copy_rule_string(buf,state);
p = buf; p = buf;
break; break;
case '?': case '?':
if(state==2) if(state==2)
state=0; state=0;
break; break;
} }
} }
if(strcmp(rule_match,"$group")==0) if(strcmp(rule_match,"$group")==0)
strcpy(rule_match,group_name); strcpy(rule_match,group_name);


} }
strcpy(output,buf); strcpy(output,buf);
len = strlen(buf)+1; len = strlen(buf)+1;
len_name = strlen(group_name); len_name = strlen(group_name);
if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0)) if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0))
{ {
len1 = strlen(p) + 1; len1 = strlen(p) + 1;
p = &p[len1]; p = &p[len1];
len2 = strlen(p); len2 = strlen(p);
rule_match[0]=0; rule_match[0]=0;
rule_pre[0]=0; rule_pre[0]=0;
rule_post[0]=0; rule_post[0]=0;
} }
} }
*pout = 0; *pout = 0;
spaces = 12; spaces = 12;
if(condition > 0) if(condition > 0)
{ {
fprintf(f_out,"%s",buf); fprintf(f_out,"%s",buf);
spaces = 0; spaces = 0;
} }
for(ix=0; ix<spaces; ix++) for(ix=0; ix<spaces; ix++)
fputc(' ',f_out); fputc(' ',f_out);
spaces = 14; spaces = 14;
sprintf(buf," %s ",rule_match); sprintf(buf," %s ",rule_match);
if(rule_post[0] != 0) if(rule_post[0] != 0)
int n_rgroups = 0; int n_rgroups = 0;
int n_groups3 = 0; int n_groups3 = 0;
RGROUP rgroup[N_RULE_GROUP2]; RGROUP rgroup[N_RULE_GROUP2];
linenum = 0; linenum = 0;
group_name[0] = 0; group_name[0] = 0;


if((p = (unsigned char *)strstr(buf,"//")) != NULL) if((p = (unsigned char *)strstr(buf,"//")) != NULL)
*p = 0; *p = 0;


if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
} }


if((buf == NULL) || (buf[0] == '.')) if((buf == NULL) || (buf[0] == '.'))
{ {
// group character is given as a character code (max 16 bits) // group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name; p = (unsigned char *)group_name;
if(char_code > 0x100) if(char_code > 0x100)
{ {
*p++ = (char_code >> 8); *p++ = (char_code >> 8);
} }
} }
} }
if((group3_ix == 0) && (strlen(group_name) > 2)) if((group3_ix == 0) && (strlen(group_name) > 2))
{ {
if(utf8_in(&c,group_name) < 2) if(utf8_in(&c,group_name) < 2)
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++; error_count++;
} }
group_name[2] = 0; group_name[2] = 0;
} }
} }


continue; continue;
} }
switch(compile_mode) switch(compile_mode)
{ {
case 1: // .group case 1: // .group
compile_dictlist_file(path,"list"); compile_dictlist_file(path,"list");
} }
compile_dictlist_file(path,"extra"); compile_dictlist_file(path,"extra");
compile_dictlist_end(f_out); compile_dictlist_end(f_out);
offset_rules = ftell(f_out); offset_rules = ftell(f_out);
fprintf(f_log,"Compiling: '%s'\n",fname_in); fprintf(f_log,"Compiling: '%s'\n",fname_in);


compile_dictrules(f_in,f_out,fname_temp); compile_dictrules(f_in,f_out,fname_temp);

+ 8
- 4
src/dictionary.cpp View File

{ {
if(vowel_stress[ix] == 4) if(vowel_stress[ix] == 4)
vowel_stress[ix] = 3; // change marked stress (consonant clusters) to secondary (except the last) vowel_stress[ix] = 3; // change marked stress (consonant clusters) to secondary (except the last)
if(vowel_length[ix] > 0) if(vowel_length[ix] > 0)
{ {
long_vowel = ix; long_vowel = ix;
vowel_stress[ix] = 3; // give secondary stress to all long vowels vowel_stress[ix] = 3; // give secondary stress to all long vowels
} }
} }
// 'stressed_syllable' gives the last marked stress // 'stressed_syllable' gives the last marked stress
if(stressed_syllable == 0) if(stressed_syllable == 0)
{ {
*word_end = 'e'; *word_end = 'e';
} }
i = word_end - word; i = word_end - word;
memcpy(word_copy,word,i);
word_copy[i] = 0;

if(word_copy != NULL)
{
memcpy(word_copy,word,i);
word_copy[i] = 0;
}


// look for multibyte characters to increase the number of bytes to remove // look for multibyte characters to increase the number of bytes to remove
for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix

+ 133
- 98
src/translate.cpp View File



int utf8_in2(int *c, const char *buf, int backwards) int utf8_in2(int *c, const char *buf, int backwards)
{//================================================= {//=================================================
// Read a unicode characater from a UTF8 string
// Read a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used. // Returns the number of UTF8 bytes used.
// backwards: set if we are moving backwards through the UTF8 string // backwards: set if we are moving backwards through the UTF8 string
int c1; int c1;


int utf8_in(int *c, const char *buf) int utf8_in(int *c, const char *buf)
{//================================= {//=================================
// Read a unicode characater from a UTF8 string
// Read a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used. // Returns the number of UTF8 bytes used.
return(utf8_in2(c,buf,0)); return(utf8_in2(c,buf,0));
} }
unsigned int dictionary_flags[2]; unsigned int dictionary_flags[2];
unsigned int dictionary_flags2[2]; unsigned int dictionary_flags2[2];
int end_type=0; int end_type=0;
int end_type1=0;
int prefix_type=0; int prefix_type=0;
int prefix_stress; int prefix_stress;
char *wordx; char *wordx;
char phonemes[N_WORD_PHONEMES]; char phonemes[N_WORD_PHONEMES];
char phonemes2[N_WORD_PHONEMES];
char prefix_phonemes[N_WORD_PHONEMES]; char prefix_phonemes[N_WORD_PHONEMES];
char unpron_phonemes[N_WORD_PHONEMES]; char unpron_phonemes[N_WORD_PHONEMES];
char end_phonemes[N_WORD_PHONEMES]; char end_phonemes[N_WORD_PHONEMES];
char end_phonemes2[N_WORD_PHONEMES];
char word_copy[N_WORD_BYTES]; char word_copy[N_WORD_BYTES];
char word_copy2[N_WORD_BYTES]; char word_copy2[N_WORD_BYTES];
int word_copy_length; int word_copy_length;
char prefix_chars[0x3f + 2];
char prefix_chars[0x3f + 2];
int found=0; int found=0;
int end_flags;
int end_flags;
char c_temp; // save a character byte while we temporarily replace it with space char c_temp; // save a character byte while we temporarily replace it with space
int first_char; int first_char;
int last_char = 0; int last_char = 0;
int add_plural_suffix = 0; int add_plural_suffix = 0;
int prefix_flags = 0; int prefix_flags = 0;
int more_suffixes;
int confirm_prefix; int confirm_prefix;
int spell_word; int spell_word;
int stress_bits; int stress_bits;
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters // the word has $abbrev flag, but no pronunciation specified. Speak as individual letters
spell_word = 1; spell_word = 1;
} }
if(!found && iswdigit(first_char)) if(!found && iswdigit(first_char))
{ {
Lookup(tr,"_0lang",word_phonemes); Lookup(tr,"_0lang",word_phonemes);
if(confirm_prefix && !(end_type & SUFX_B)) if(confirm_prefix && !(end_type & SUFX_B))
{ {
int end2; int end2;
char phonemes2[N_WORD_PHONEMES];
char end_phonemes2[N_WORD_PHONEMES]; char end_phonemes2[N_WORD_PHONEMES];


// remove any standard suffix and confirm that the prefix is still recognised // remove any standard suffix and confirm that the prefix is still recognised
for(ix=0; ix < n_chars; ix++) // num. of bytes to remove for(ix=0; ix < n_chars; ix++) // num. of bytes to remove
{ {
prefix_chars[pfix++] = *wordx++; prefix_chars[pfix++] = *wordx++;
if((prefix_type & SUFX_B) && (ix == (n_chars-1))) if((prefix_type & SUFX_B) && (ix == (n_chars-1)))
{ {
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
} }
} }





if((end_type != 0) && !(end_type & SUFX_P)) if((end_type != 0) && !(end_type & SUFX_P))
{ {
char phonemes2[N_WORD_PHONEMES];
strcpy(phonemes2,phonemes);
end_type1 = end_type;
strcpy(phonemes2,phonemes);


// The word has a standard ending, re-translate without this ending // The word has a standard ending, re-translate without this ending
end_flags = RemoveEnding(tr, wordx, end_type, word_copy); end_flags = RemoveEnding(tr, wordx, end_type, word_copy);

phonemes[0] = 0;

if(prefix_phonemes[0] != 0)
{
// lookup the stem without the prefix removed
wordx[-1] = c_temp;
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
wordx[-1] = ' ';
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
if(found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now

if((found==0) && (dictionary_flags2[0] != 0))
prefix_flags = 1;
}
if(found == 0)
{
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}

if(dictionary_flags2[0] & FLAG_ABBREV)
{
// Removing the suffix leaves a word which should be spoken as individual letters
// Not yet implemented
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
}
if(found == 0)
{
if(end_type & SUFX_Q)
{
// don't retranslate, use the original lookup result
strcpy(phonemes,phonemes2);

// language specific changes
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]);
}
else
{
if(end_flags & FLAG_SUFX)
wflags |= FLAG_SUFFIX_REMOVED;
if(end_type & SUFX_A)
wflags |= FLAG_SUFFIX_VOWEL;

TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
strcpy(word_phonemes,phonemes);
memcpy(wordx,word_copy,strlen(word_copy));
wordx[-1] = c_temp;
return(0);
}
}
}

if((end_type & SUFX_T) == 0)
more_suffixes = 1;

while(more_suffixes)
{
more_suffixes = 0;
phonemes[0] = 0;

if(prefix_phonemes[0] != 0)
{
// lookup the stem without the prefix removed
wordx[-1] = c_temp;
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
wordx[-1] = ' ';
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
if(found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now

if((found==0) && (dictionary_flags2[0] != 0))
prefix_flags = 1;
}
if(found == 0)
{
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}

if(dictionary_flags2[0] & FLAG_ABBREV)
{
// Removing the suffix leaves a word which should be spoken as individual letters
// Not yet implemented
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
}
if(found == 0)
{
if(end_type & SUFX_Q)
{
// don't retranslate, use the original lookup result
strcpy(phonemes,phonemes2);

// language specific changes
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]);
}
else
{
if(end_flags & FLAG_SUFX)
wflags |= FLAG_SUFFIX_REMOVED;
if(end_type & SUFX_A)
wflags |= FLAG_SUFFIX_VOWEL;

if(end_type & SUFX_M)
{
// allow more suffixes before this suffix
strcpy(end_phonemes2, end_phonemes);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one

if((end_type != 0) && !(end_type & SUFX_P))
{
// there is another suffix
end_flags = RemoveEnding(tr, wordx, end_type, NULL);
more_suffixes = 1;
}
}
else
{
// don't remove any previous suffix
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);
end_type = 0;
}

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
strcpy(word_phonemes,phonemes);
memcpy(wordx,word_copy,strlen(word_copy));
wordx[-1] = c_temp;
return(0);
}
}
}
}


if((end_type1 & SUFX_T) == 0)
{ {
// the default is to add the suffix and then determine the word's stress pattern // the default is to add the suffix and then determine the word's stress pattern
AppendPhonemes(tr,phonemes, N_WORD_PHONEMES, end_phonemes); AppendPhonemes(tr,phonemes, N_WORD_PHONEMES, end_phonemes);
} }
memcpy(wordx,word_copy,strlen(word_copy)); memcpy(wordx,word_copy,strlen(word_copy));
} }




wordx[-1] = c_temp; wordx[-1] = c_temp;
} }
} }


// dictionary flags for this word give a clue about which alternative pronunciations of // dictionary flags for this word give a clue about which alternative pronunciations of
// following words to use. // following words to use.
if(end_type & SUFX_F)
if(end_type1 & SUFX_F)
{ {
// expect a verb form, with or without -s suffix // expect a verb form, with or without -s suffix
tr->expect_verb = 2; tr->expect_verb = 2;
ok = 0; ok = 0;
} }
} }
if((sylimit & 0x200) && ((wtab+1)->flags & FLAG_LAST_WORD)) if((sylimit & 0x200) && ((wtab+1)->flags & FLAG_LAST_WORD))
{ {
// not if the next word is end-of-sentence // not if the next word is end-of-sentence
else else
if(ph_code == phonX1) if(ph_code == phonX1)
{ {
// a language specific action
// a language specific action
if(tr->langopts.param[LOPT_IT_DOUBLING]) if(tr->langopts.param[LOPT_IT_DOUBLING])
{ {
flags |= FLAG_DOUBLING; flags |= FLAG_DOUBLING;
{ {
if(first_phoneme && tr->langopts.param[LOPT_IT_DOUBLING]) if(first_phoneme && tr->langopts.param[LOPT_IT_DOUBLING])
{ {
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) ||
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) ||
(tr->end_stressed_vowel && (tr->langopts.param[LOPT_IT_DOUBLING] & 2))) (tr->end_stressed_vowel && (tr->langopts.param[LOPT_IT_DOUBLING] & 2)))
{ {
// italian, double the initial consonant if the previous word ends with a // italian, double the initial consonant if the previous word ends with a
} }
#endif #endif
if((c == 0x92) || (c == 0xb4) || (c == 0x2019) || (c == 0x2032)) if((c == 0x92) || (c == 0xb4) || (c == 0x2019) || (c == 0x2032))
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe


if(((c == 0x2018) || (c == '?')) && IsAlpha(prev_out) && IsAlpha(next_in)) if(((c == 0x2018) || (c == '?')) && IsAlpha(prev_out) && IsAlpha(next_in))
{ {
{ {
if((next_in == '\002') || ((next_in == '[') && option_phoneme_input)) if((next_in == '\002') || ((next_in == '[') && option_phoneme_input))
{ {
// "[\002" is used internally to start phoneme mode
// "[\002" is used internally to start phoneme mode
phoneme_mode = FLAG_PHONEMES; phoneme_mode = FLAG_PHONEMES;
source_index++; source_index++;
continue; continue;
if(!IsSpace(prev_in) && IsAlpha(next_in)) if(!IsSpace(prev_in) && IsAlpha(next_in))
{ {
if(prev_out != ' ') if(prev_out != ' ')
{
{
// previous 'word' not yet ended (not alpha or numeric), start new word now. // previous 'word' not yet ended (not alpha or numeric), start new word now.
c = ' '; c = ' ';
space_inserted = 1; space_inserted = 1;
if((word_count==0) && (embedded_count > 0)) if((word_count==0) && (embedded_count > 0))
{ {
// add a null 'word' to carry the embedded command flag // add a null 'word' to carry the embedded command flag
embedded_list[embedded_ix-1] |= 0x80;
embedded_list[embedded_ix-1] |= 0x80;
words[word_count].flags |= FLAG_EMBEDDED; words[word_count].flags |= FLAG_EMBEDDED;
word_count = 1; word_count = 1;
} }
ix--; // the last word is a bracket, mark the previous word as last ix--; // the last word is a bracket, mark the previous word as last
words[ix].flags |= FLAG_LAST_WORD; words[ix].flags |= FLAG_LAST_WORD;


// FLAG_NOSPACE check to avoid recognizing .mr -mr
// FLAG_NOSPACE check to avoid recognizing .mr -mr
if((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE)) if((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE))
words[word_count-1].flags |= FLAG_HAS_DOT; words[word_count-1].flags |= FLAG_HAS_DOT;
} }

+ 1
- 0
src/translate.h View File

#define SUFX_T 0x10000 // don't affect the stress position in the stem #define SUFX_T 0x10000 // don't affect the stress position in the stem
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P) #define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)
#define SUFX_A 0x40000 // remember that the suffix starts with a vowel #define SUFX_A 0x40000 // remember that the suffix starts with a vowel
#define SUFX_M 0x80000 // bit 19, allow multiple suffixes


#define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules #define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules



Loading…
Cancel
Save