Browse Source

[1.46.29]

Languages changes: it, pt.

Added experimental suffix option to allow multiple suffixes to be removed from a word (eg. (_S2m ).


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@295 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 12 years ago
parent
commit
de97085d1e
8 changed files with 221 additions and 151 deletions
  1. 1
    3
      dictsource/it_listx
  2. 14
    1
      dictsource/pt_list
  3. 17
    1
      dictsource/pt_rules
  4. 1
    1
      dictsource/ta_rules
  5. 46
    43
      src/compiledict.cpp
  6. 8
    4
      src/dictionary.cpp
  7. 133
    98
      src/translate.cpp
  8. 1
    0
      src/translate.h

+ 1
- 3
dictsource/it_listx View File

@@ -1078,11 +1078,10 @@ cortes $2
cortile $2
cortisol $3
cortocircuito kO@-*totSi@-*k'uito
cosa $alt
cos $alt
cosc $alt
coscia $alt
cosen $alt2
cosi $2
cosm $alt
cosmic $alt
cosm $alt
@@ -3274,7 +3273,6 @@ rore $alt
ros $alt
rosalia $3
roseo $1 $alt
rose $2
rosolia $3
rospigliosi $alt2
rosp $alt

+ 14
- 1
dictsource/pt_list View File

@@ -695,6 +695,7 @@ caractere $alt
careta $alt2
carreta $alt2
casebre $alt
cateto $alt2
catorze $alt2
cefaleia $alt
cerca $alt $verb
@@ -741,6 +742,7 @@ desfecho $alt2
desprezo $alt $verb
deva $alt2
devo $alt2
deveras $alt
dez $alt
diarreia $alt
discordo $alt
@@ -772,14 +774,17 @@ estiverdes $alt
estiveres $alt
estrofe $alt
etiqueta $alt2
europa $alt
exagero $alt $verb
expeça $alt2
expeço $alt2
farofa $alt
febre $alt
feitora $alt2
fera $alt
fere $alt
ferem $alt
ferozes $alt
fezes $alt
folga $alt
fogos $alt
@@ -822,6 +827,8 @@ marreta $alt2
martelo $alt
merda $alt
megera $alt
mentora $alt2
metro $alt
mexa m'eS&
mexi meS'i
mexo m'eSU
@@ -846,18 +853,20 @@ mutreta $alt2
naquele $alt2
negro $alt2
novos $alt
obstera $alt
obstetra $alt
obsoleta $alt2
obsoleto $alt2
odisseia $alt
onu $1
opereta $alt2
ordens $alt
osso $alt2
ovos $alt
padeceste $alt2
paexa paeSa
pangeia $alt
palheta $alt2
paralelo $alt
parede $alt2
pastora $alt2
pedra $alt
@@ -908,6 +917,7 @@ selo $alt $verb
selvagem seUv'aZeIN
sincera $alt
sincero $alt
sinopse $alt
sobe $alt
sobem $alt
soco $alt2
@@ -938,14 +948,17 @@ traqueia $alt
travesso $alt2
trevo $alt2
troco $alt2 $noun
tropa $alt
tropeço $alt2 $noun
trombeta $alt2
valeta $alt2
vanessa $alt2
velozes $alt
verbo $alt
verme $alt
vierem $alt
violeta $alt2
voga $alt
vozes $alt
zelo $alt $verb
zero $alt

+ 17
- 1
dictsource/pt_rules View File

@@ -20,6 +20,7 @@
.L03 a am o
.L04 a am e em o ue uem
.L05 r ra ram re rem rdes
.L06 ra ram ste re rem


.group a
@@ -337,6 +338,9 @@
?1 v) e (l_ ,E // eg: possível, amovível, disponível, etc...
?1 l) e (ta 'E // eg: bicicleta, atleta.


sf) e (ra_ E // esfera, biosfera
//sort
qu) e (brL04_ E
_hosp) e (dL03_ E
@@ -362,7 +366,7 @@ _comprom) e (tL03_ e
_atr) e (vL03_ e
_escr) e (vL03_ e
_descr) e (vL03_ e
_embel) e (zL01_ e
_embel) e (zL04_ E
_pr) e (zL04_ E
_acont) e (çL01_ e
_ado) e (çL03_ e
@@ -383,6 +387,10 @@ _envaid) e (çL03_ e
_esclar) e (çL03_ e
_reapar) e (çL03_ e
_reconh) e (çL03_ e
_coop) e (rL04_ E
_sup) e (rL04_ E
_imp) e (rL03_ E
_temp) e (rL04_ E
//endsort

_exag) e (L05_ E
@@ -390,6 +398,12 @@ _reconh) e (çL03_ e
_houv) e (L05_ E
_soub) e (L05_ E

_compreend) e (L06_ e
_correspond) e (L06_ e
_entend) e (L06_ e
_estend) e (L06_ e
_respond)e (L06_ e
_vend) e (L06_ e

.group é
é ''E
@@ -687,6 +701,7 @@ _reconh) e (çL03_ e
c) o (rrL01_ o // escorra, incorra, corra, recorra
_m) o (rrL01_ o
p) ostos (_ Ost=Us# // dispostos etc.
_esn) o (bL04_ O
//endsort

.group ô
@@ -824,6 +839,7 @@ _reconh) e (çL03_ e
r) u (_A u
u (A_ 'u

u (em_ 'u
ui uI
u (iu w
u (iCK u

+ 1
- 1
dictsource/ta_rules View File

@@ -1831,7 +1831,7 @@ _அகாலி_) த (ள dV
_) த (ீரரை d
_) த (ீர்க d
_) த (ீர்க்க t
_) தீர்க்க (_சுமங்கலி dirggV
_) தீர்க்க (_சுமங்கலி di:rggV
_) தீர்க்க (தரிச di:rggV
_) தீர்க்க (த்துடன di:rggV#
_) த (ீர்க்கமா d

+ 46
- 43
src/compiledict.cpp View File

@@ -266,7 +266,7 @@ char *DecodeRule(const char *group_chars, int group_length, char *rule, int cont
}
continue;
}
if(rb == RULE_DOLLAR)
{
value = *rule++ & 0xff;
@@ -380,13 +380,13 @@ static int compile_line(char *linebuf, char *dict_line, int *hash)
int multiple_numeric_hyphen = 0;
char *multiple_string = NULL;
char *multiple_string_end = NULL;
int len_word;
int len_phonetic;
int text_not_phonemes; // this word specifies replacement text, not phonemes
unsigned int wc;
int all_upper_case;
char *mnemptr;
unsigned char flag_codes[100];
char encoded_ph[200];
@@ -416,12 +416,12 @@ static char nullstring[] = {0};
#endif

step = 0;
c = 0;
while(c != '\n')
{
c = *p;
if((c == '?') && (step==0))
{
// conditional rule, allow only if the numbered condition is set for the voice
@@ -449,14 +449,14 @@ static char nullstring[] = {0};
flag_codes[n_flag_codes++] = ix + flag_offset;
c = *p;
}
if((c == '$') && isalnum(p[1]))
{
/* read keyword parameter */
mnemptr = p;
while(!isspace2(c = *p)) p++;
*p = 0;
flagnum = LookupMnem(mnem_flags,mnemptr);
if(flagnum > 0)
{
@@ -485,12 +485,12 @@ static char nullstring[] = {0};
error_count++;
}
}
if((c == '/') && (p[1] == '/') && (multiple_words==0))
{
c = '\n'; /* "//" treat comment as end of line */
}
switch(step)
{
case 0:
@@ -507,7 +507,7 @@ static char nullstring[] = {0};
step = 1;
}
break;
case 1:
if((c == '-') && multiple_words)
{
@@ -567,7 +567,7 @@ static char nullstring[] = {0};
step = 3;
}
break;
case 3:
if(!isspace2(c))
{
@@ -575,7 +575,7 @@ static char nullstring[] = {0};
step = 4;
}
break;
case 4:
if(isspace2(c))
{
@@ -583,13 +583,13 @@ static char nullstring[] = {0};
step = 5;
}
break;
case 5:
break;
}
p++;
}
if(word[0] == 0)
{
return(0); /* blank line */
@@ -698,7 +698,7 @@ static char nullstring[] = {0};

*hash = HashDictionary(word);
len_phonetic = strlen(encoded_ph);
dict_line[1] = len_word; // bit 6 indicates whether the word has been compressed
len_word &= 0x3f;

@@ -715,7 +715,7 @@ static char nullstring[] = {0};
length = len_word + len_phonetic + 3;
strcpy(&dict_line[(len_word)+2],encoded_ph);
}
for(ix=0; ix<n_flag_codes; ix++)
{
dict_line[ix+length] = flag_codes[ix];
@@ -790,12 +790,12 @@ static void compile_dictlist_end(FILE *f_out)
fflush(f_log);
#endif
}
for(hash=0; hash<N_HASH_DICT; hash++)
{
p = hash_chains[hash];
hash_counts[hash] = (int)ftell(f_out);
while(p != NULL)
{
length = *(p+sizeof(char *));
@@ -818,7 +818,7 @@ static int compile_dictlist_file(const char *path, const char* filename)
char buf[200];
char fname[sizeof(path_home)+45];
char dict_line[128];
text_mode = 0;

// try with and without '.txt' extension
@@ -833,7 +833,7 @@ static int compile_dictlist_file(const char *path, const char* filename)
fprintf(f_log,"Compiling: '%s'\n",fname);

linenum=0;
while(fgets(buf,sizeof(buf),f_in) != NULL)
{
linenum++;
@@ -842,7 +842,7 @@ static int compile_dictlist_file(const char *path, const char* filename)
if(length == 0) continue; /* blank line */

hash_counts[hash]++;
p = (char *)malloc(length+sizeof(char *));
if(p == NULL)
{
@@ -853,13 +853,13 @@ static int compile_dictlist_file(const char *path, const char* filename)
}
break;
}
memcpy(p,&hash_chains[hash],sizeof(char *));
hash_chains[hash] = p;
memcpy(p+sizeof(char *),dict_line,length);
count++;
}
fprintf(f_log,"\t%d entries\n",count);
fclose(f_in);
return(0);
@@ -920,7 +920,7 @@ static void copy_rule_string(char *string, int &state)
output = &rule_phonemes[len];
}
sxflags = 0x808000; // to ensure non-zero bytes
for(p=string,ix=0;;)
{
literal = 0;
@@ -1126,6 +1126,9 @@ static void copy_rule_string(char *string, int &state)
case 'a':
sxflags |= SUFX_A;
break;
case 'm':
sxflags |= SUFX_M;
break;
default:
if(isdigit(c))
value = (value*10) + (c - '0');
@@ -1173,7 +1176,7 @@ static char *compile_rule(char *input)
rule_phonemes[0]=0;

p = buf;
for(ix=0; finish==0; ix++)
{
c = input[ix];
@@ -1186,7 +1189,7 @@ static char *compile_rule(char *input)
copy_rule_string(buf,state);
p = buf;
break;
case '(': // start of suffix section
*p = 0;
state = 2;
@@ -1199,7 +1202,7 @@ static char *compile_rule(char *input)
error_count++;
}
break;
case '\n': // end of line
case '\r':
case 0: // end of line
@@ -1207,14 +1210,14 @@ static char *compile_rule(char *input)
copy_rule_string(buf,state);
finish=1;
break;
case '\t': // end of section section
case ' ':
*p = 0;
copy_rule_string(buf,state);
p = buf;
break;
case '?':
if(state==2)
state=0;
@@ -1227,7 +1230,7 @@ static char *compile_rule(char *input)
break;
}
}
if(strcmp(rule_match,"$group")==0)
strcpy(rule_match,group_name);

@@ -1249,7 +1252,7 @@ static char *compile_rule(char *input)
}
strcpy(output,buf);
len = strlen(buf)+1;
len_name = strlen(group_name);
if((len_name > 0) && (memcmp(rule_match,group_name,len_name) != 0))
{
@@ -1382,7 +1385,7 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name)
len1 = strlen(p) + 1;
p = &p[len1];
len2 = strlen(p);
rule_match[0]=0;
rule_pre[0]=0;
rule_post[0]=0;
@@ -1420,7 +1423,7 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name)
}
}
*pout = 0;
spaces = 12;
if(condition > 0)
{
@@ -1441,10 +1444,10 @@ static void print_rule_group(FILE *f_out, int n_rules, char **rules, char *name)
fprintf(f_out,"%s",buf);
spaces = 0;
}
for(ix=0; ix<spaces; ix++)
fputc(' ',f_out);
spaces = 14;
sprintf(buf," %s ",rule_match);
if(rule_post[0] != 0)
@@ -1633,7 +1636,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
int n_rgroups = 0;
int n_groups3 = 0;
RGROUP rgroup[N_RULE_GROUP2];
linenum = 0;
group_name[0] = 0;

@@ -1649,7 +1652,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
if((p = (unsigned char *)strstr(buf,"//")) != NULL)
*p = 0;

if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
if(buf[0] == '\r') buf++; // ignore extra \r in \r\n
}

if((buf == NULL) || (buf[0] == '.'))
@@ -1711,7 +1714,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
{
// group character is given as a character code (max 16 bits)
p = (unsigned char *)group_name;
if(char_code > 0x100)
{
*p++ = (char_code >> 8);
@@ -1730,7 +1733,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
}
}
}
if((group3_ix == 0) && (strlen(group_name) > 2))
{
if(utf8_in(&c,group_name) < 2)
@@ -1738,14 +1741,14 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum);
error_count++;
}
group_name[2] = 0;
}
}

continue;
}
switch(compile_mode)
{
case 1: // .group
@@ -1915,10 +1918,10 @@ int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, cha
compile_dictlist_file(path,"list");
}
compile_dictlist_file(path,"extra");
compile_dictlist_end(f_out);
offset_rules = ftell(f_out);
fprintf(f_log,"Compiling: '%s'\n",fname_in);

compile_dictrules(f_in,f_out,fname_temp);

+ 8
- 4
src/dictionary.cpp View File

@@ -1452,14 +1452,14 @@ void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags,
{
if(vowel_stress[ix] == 4)
vowel_stress[ix] = 3; // change marked stress (consonant clusters) to secondary (except the last)
if(vowel_length[ix] > 0)
{
long_vowel = ix;
vowel_stress[ix] = 3; // give secondary stress to all long vowels
}
}
// 'stressed_syllable' gives the last marked stress
if(stressed_syllable == 0)
{
@@ -3497,8 +3497,12 @@ int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy)
*word_end = 'e';
}
i = word_end - word;
memcpy(word_copy,word,i);
word_copy[i] = 0;

if(word_copy != NULL)
{
memcpy(word_copy,word,i);
word_copy[i] = 0;
}

// look for multibyte characters to increase the number of bytes to remove
for(len_ending = i = (end_type & 0x3f); i>0 ;i--) // num.of characters of the suffix

+ 133
- 98
src/translate.cpp View File

@@ -548,7 +548,7 @@ int utf8_nbytes(const char *buf)

int utf8_in2(int *c, const char *buf, int backwards)
{//=================================================
// Read a unicode characater from a UTF8 string
// Read a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used.
// backwards: set if we are moving backwards through the UTF8 string
int c1;
@@ -592,7 +592,7 @@ int utf8_in2(int *c, const char *buf, int backwards)

int utf8_in(int *c, const char *buf)
{//=================================
// Read a unicode characater from a UTF8 string
// Read a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used.
return(utf8_in2(c,buf,0));
}
@@ -819,24 +819,28 @@ int TranslateWord(Translator *tr, char *word_start, int next_pause, WORD_TAB *wt
unsigned int dictionary_flags[2];
unsigned int dictionary_flags2[2];
int end_type=0;
int end_type1=0;
int prefix_type=0;
int prefix_stress;
char *wordx;
char phonemes[N_WORD_PHONEMES];
char phonemes2[N_WORD_PHONEMES];
char prefix_phonemes[N_WORD_PHONEMES];
char unpron_phonemes[N_WORD_PHONEMES];
char end_phonemes[N_WORD_PHONEMES];
char end_phonemes2[N_WORD_PHONEMES];
char word_copy[N_WORD_BYTES];
char word_copy2[N_WORD_BYTES];
int word_copy_length;
char prefix_chars[0x3f + 2];
char prefix_chars[0x3f + 2];
int found=0;
int end_flags;
int end_flags;
char c_temp; // save a character byte while we temporarily replace it with space
int first_char;
int last_char = 0;
int add_plural_suffix = 0;
int prefix_flags = 0;
int more_suffixes;
int confirm_prefix;
int spell_word;
int stress_bits;
@@ -996,7 +1000,7 @@ if((wmark > 0) && (wmark < 8))
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters
spell_word = 1;
}
if(!found && iswdigit(first_char))
{
Lookup(tr,"_0lang",word_phonemes);
@@ -1157,7 +1161,6 @@ if((wmark > 0) && (wmark < 8))
if(confirm_prefix && !(end_type & SUFX_B))
{
int end2;
char phonemes2[N_WORD_PHONEMES];
char end_phonemes2[N_WORD_PHONEMES];

// remove any standard suffix and confirm that the prefix is still recognised
@@ -1212,7 +1215,7 @@ if((wmark > 0) && (wmark < 8))
for(ix=0; ix < n_chars; ix++) // num. of bytes to remove
{
prefix_chars[pfix++] = *wordx++;
if((prefix_type & SUFX_B) && (ix == (n_chars-1)))
{
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
@@ -1276,93 +1279,121 @@ if((wmark > 0) && (wmark < 8))
}
}




if((end_type != 0) && !(end_type & SUFX_P))
{
char phonemes2[N_WORD_PHONEMES];
strcpy(phonemes2,phonemes);
end_type1 = end_type;
strcpy(phonemes2,phonemes);

// The word has a standard ending, re-translate without this ending
end_flags = RemoveEnding(tr, wordx, end_type, word_copy);

phonemes[0] = 0;

if(prefix_phonemes[0] != 0)
{
// lookup the stem without the prefix removed
wordx[-1] = c_temp;
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
wordx[-1] = ' ';
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
if(found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now

if((found==0) && (dictionary_flags2[0] != 0))
prefix_flags = 1;
}
if(found == 0)
{
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}

if(dictionary_flags2[0] & FLAG_ABBREV)
{
// Removing the suffix leaves a word which should be spoken as individual letters
// Not yet implemented
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
}
if(found == 0)
{
if(end_type & SUFX_Q)
{
// don't retranslate, use the original lookup result
strcpy(phonemes,phonemes2);

// language specific changes
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]);
}
else
{
if(end_flags & FLAG_SUFX)
wflags |= FLAG_SUFFIX_REMOVED;
if(end_type & SUFX_A)
wflags |= FLAG_SUFFIX_VOWEL;

TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
strcpy(word_phonemes,phonemes);
memcpy(wordx,word_copy,strlen(word_copy));
wordx[-1] = c_temp;
return(0);
}
}
}

if((end_type & SUFX_T) == 0)
more_suffixes = 1;

while(more_suffixes)
{
more_suffixes = 0;
phonemes[0] = 0;

if(prefix_phonemes[0] != 0)
{
// lookup the stem without the prefix removed
wordx[-1] = c_temp;
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
wordx[-1] = ' ';
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
if(found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now

if((found==0) && (dictionary_flags2[0] != 0))
prefix_flags = 1;
}
if(found == 0)
{
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes);
return(0);
}

if(dictionary_flags2[0] & FLAG_ABBREV)
{
// Removing the suffix leaves a word which should be spoken as individual letters
// Not yet implemented
}
if(dictionary_flags[0]==0)
{
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
}
if(found == 0)
{
if(end_type & SUFX_Q)
{
// don't retranslate, use the original lookup result
strcpy(phonemes,phonemes2);

// language specific changes
ApplySpecialAttribute(tr,phonemes,dictionary_flags[0]);
}
else
{
if(end_flags & FLAG_SUFX)
wflags |= FLAG_SUFFIX_REMOVED;
if(end_type & SUFX_A)
wflags |= FLAG_SUFFIX_VOWEL;

if(end_type & SUFX_M)
{
// allow more suffixes before this suffix
strcpy(end_phonemes2, end_phonemes);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one

if((end_type != 0) && !(end_type & SUFX_P))
{
// there is another suffix
end_flags = RemoveEnding(tr, wordx, end_type, NULL);
more_suffixes = 1;
}
}
else
{
// don't remove any previous suffix
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);
end_type = 0;
}

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
strcpy(word_phonemes,phonemes);
memcpy(wordx,word_copy,strlen(word_copy));
wordx[-1] = c_temp;
return(0);
}
}
}
}


if((end_type1 & SUFX_T) == 0)
{
// the default is to add the suffix and then determine the word's stress pattern
AppendPhonemes(tr,phonemes, N_WORD_PHONEMES, end_phonemes);
@@ -1370,6 +1401,10 @@ if(dictionary_flags2[0] & FLAG_ABBREV)
}
memcpy(wordx,word_copy,strlen(word_copy));
}




wordx[-1] = c_temp;
}
}
@@ -1511,7 +1546,7 @@ if(dictionary_flags2[0] & FLAG_ABBREV)

// dictionary flags for this word give a clue about which alternative pronunciations of
// following words to use.
if(end_type & SUFX_F)
if(end_type1 & SUFX_F)
{
// expect a verb form, with or without -s suffix
tr->expect_verb = 2;
@@ -1859,7 +1894,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
ok = 0;
}
}
if((sylimit & 0x200) && ((wtab+1)->flags & FLAG_LAST_WORD))
{
// not if the next word is end-of-sentence
@@ -2079,7 +2114,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
else
if(ph_code == phonX1)
{
// a language specific action
// a language specific action
if(tr->langopts.param[LOPT_IT_DOUBLING])
{
flags |= FLAG_DOUBLING;
@@ -2125,7 +2160,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
{
if(first_phoneme && tr->langopts.param[LOPT_IT_DOUBLING])
{
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) ||
if(((tr->prev_dict_flags & FLAG_DOUBLING) && (tr->langopts.param[LOPT_IT_DOUBLING] & 1)) ||
(tr->end_stressed_vowel && (tr->langopts.param[LOPT_IT_DOUBLING] & 2)))
{
// italian, double the initial consonant if the previous word ends with a
@@ -2690,7 +2725,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
}
#endif
if((c == 0x92) || (c == 0xb4) || (c == 0x2019) || (c == 0x2032))
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe
c = '\''; // 'microsoft' quote or sexed closing single quote, or prime - possibly used as apostrophe

if(((c == 0x2018) || (c == '?')) && IsAlpha(prev_out) && IsAlpha(next_in))
{
@@ -2755,7 +2790,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
{
if((next_in == '\002') || ((next_in == '[') && option_phoneme_input))
{
// "[\002" is used internally to start phoneme mode
// "[\002" is used internally to start phoneme mode
phoneme_mode = FLAG_PHONEMES;
source_index++;
continue;
@@ -2874,7 +2909,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
if(!IsSpace(prev_in) && IsAlpha(next_in))
{
if(prev_out != ' ')
{
{
// previous 'word' not yet ended (not alpha or numeric), start new word now.
c = ' ';
space_inserted = 1;
@@ -3110,7 +3145,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
if((word_count==0) && (embedded_count > 0))
{
// add a null 'word' to carry the embedded command flag
embedded_list[embedded_ix-1] |= 0x80;
embedded_list[embedded_ix-1] |= 0x80;
words[word_count].flags |= FLAG_EMBEDDED;
word_count = 1;
}
@@ -3126,7 +3161,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
ix--; // the last word is a bracket, mark the previous word as last
words[ix].flags |= FLAG_LAST_WORD;

// FLAG_NOSPACE check to avoid recognizing .mr -mr
// FLAG_NOSPACE check to avoid recognizing .mr -mr
if((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE))
words[word_count-1].flags |= FLAG_HAS_DOT;
}

+ 1
- 0
src/translate.h View File

@@ -129,6 +129,7 @@
#define SUFX_T 0x10000 // don't affect the stress position in the stem
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)
#define SUFX_A 0x40000 // remember that the suffix starts with a vowel
#define SUFX_M 0x80000 // bit 19, allow multiple suffixes

#define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules


Loading…
Cancel
Save