@@ -1369,6 +1369,7 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t | |||
if (compile_mode == 2) { | |||
// end of the character replacements section | |||
fwrite(&n_rules, 1, 4, f_out); // write a zero word to terminate the replacemenmt list | |||
fputc(RULE_GROUP_END, f_out); | |||
compile_mode = 0; | |||
} | |||
@@ -1447,33 +1448,23 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t | |||
} | |||
break; | |||
case 2: // .replace | |||
{ | |||
int replace1; | |||
int replace2; | |||
char *p; | |||
p = (unsigned char *)buf; | |||
p = buf; | |||
replace1 = 0; | |||
replace2 = 0; | |||
while (isspace2(*p)) p++; | |||
ix = 0; | |||
while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||
p += utf8_in(&c, p); | |||
replace1 += (c << ix); | |||
ix += 16; | |||
} | |||
while (isspace2(*p)) p++; | |||
ix = 0; | |||
while ((unsigned char)(*p) > 0x20) { | |||
p += utf8_in(&c, p); | |||
replace2 += (c << ix); | |||
ix += 16; | |||
} | |||
if (replace1 != 0) { | |||
Write4Bytes(f_out, replace1); // write as little-endian | |||
Write4Bytes(f_out, replace2); // if big-endian, reverse the bytes in LoadDictionary() | |||
if ((unsigned char)(*p) > 0x20) { | |||
while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||
fputc(*p, f_out); | |||
p++; | |||
} | |||
fputc(0, f_out); | |||
while (isspace2(*p)) p++; | |||
while ((unsigned char)(*p) > 0x20) { | |||
fputc(*p, f_out); | |||
p++; | |||
} | |||
fputc(0, f_out); | |||
} | |||
} | |||
break; | |||
} | |||
} |
@@ -149,20 +149,11 @@ static void InitGroups(Translator *tr) | |||
if (p[0] == RULE_REPLACEMENTS) { | |||
p = (char *)(((intptr_t)p+4) & ~3); // advance to next word boundary | |||
tr->langopts.replace_chars = (unsigned int *)p; | |||
tr->langopts.replace_chars = (unsigned char *)p; | |||
while (*(unsigned int *)p != 0) | |||
p += 8; // find the end of the replacement list, each entry is 2 words. | |||
p += 4; | |||
#ifdef ARCH_BIG | |||
pw = (unsigned int *)(tr->langopts.replace_chars); | |||
while (*pw != 0) { | |||
*pw = Reverse4Bytes(*pw); | |||
pw++; | |||
*pw = Reverse4Bytes(*pw); | |||
pw++; | |||
} | |||
#endif | |||
p++; | |||
while (*p != RULE_GROUP_END) p++; | |||
p++; | |||
continue; | |||
} | |||
@@ -1790,15 +1790,38 @@ static int EmbeddedCommand(unsigned int *source_index_out) | |||
return 1; | |||
} | |||
static const char * | |||
FindReplacementChars(Translator *tr, unsigned int c, unsigned int nextc, bool *ignore_next) { | |||
unsigned int uc = 0; | |||
const char *from = (const char *)tr->langopts.replace_chars; | |||
while (*(unsigned int *)from != 0) { | |||
from += utf8_in((int *)&uc, from); | |||
if (c == uc) { | |||
if (*from == 0) return from + 1; | |||
from += utf8_in((int *)&uc, from); | |||
if (*from == 0 && uc == (unsigned int)towlower2(nextc, tr)) { | |||
*ignore_next = true; | |||
return from + 1; | |||
} | |||
} | |||
// replacement 'from' string (skip the remaining part, if any) | |||
while (*from != '\0') from++; | |||
from++; | |||
// replacement 'to' string | |||
while (*from != '\0') from++; | |||
from++; | |||
} | |||
return NULL; | |||
} | |||
// handle .replace rule in xx_rules file | |||
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||
{ | |||
int ix; | |||
unsigned int word; | |||
unsigned int new_c, c2 = ' ', c_lower; | |||
int upper_case = 0; | |||
static bool ignore_next = false; | |||
const unsigned int *replace_chars; | |||
if (ignore_next) { | |||
ignore_next = false; | |||
@@ -1806,7 +1829,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
} | |||
if (c == 0) return 0; | |||
if ((replace_chars = tr->langopts.replace_chars) == NULL) | |||
if (tr->langopts.replace_chars == NULL) | |||
return c; | |||
// there is a list of character codes to be substituted with alternative codes | |||
@@ -1816,32 +1839,18 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
upper_case = 1; | |||
} | |||
new_c = 0; | |||
for (ix = 0; (word = replace_chars[ix]) != 0; ix += 2) { | |||
if (c_lower == (word & 0xffff)) { | |||
if ((word >> 16) == 0) { | |||
new_c = replace_chars[ix+1]; | |||
break; | |||
} | |||
if ((word >> 16) == (unsigned int)towlower2(next_in, tr)) { | |||
new_c = replace_chars[ix+1]; | |||
ignore_next = true; | |||
break; | |||
} | |||
} | |||
} | |||
if (new_c == 0) | |||
const char *to = FindReplacementChars(tr, c_lower, next_in, &ignore_next); | |||
if (to == NULL) | |||
return c; // no substitution | |||
if (new_c & 0xffe00000) { | |||
to += utf8_in((int *)&new_c, to); | |||
if (*to != 0) { | |||
// there is a second character to be inserted | |||
// don't convert the case of the second character unless the next letter is also upper case | |||
c2 = new_c >> 16; | |||
to += utf8_in((int *)&c2, to); | |||
if (upper_case && iswupper(next_in)) | |||
c2 = ucd_toupper(c2); | |||
*insert = c2; | |||
new_c &= 0xffff; | |||
} | |||
if (upper_case) |
@@ -556,7 +556,7 @@ typedef struct { | |||
bool textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled) | |||
char dotless_i; // uses letter U+0131 | |||
int listx; // compile *_listx after *list | |||
const unsigned int *replace_chars; // characters to be substitutes | |||
const unsigned char *replace_chars; // characters to be substitutes | |||
int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset) | |||
int alt_alphabet; // offset for another language to recognize | |||
int alt_alphabet_lang; // language for the alt_alphabet |