| @@ -1369,6 +1369,7 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t | |||
| if (compile_mode == 2) { | |||
| // end of the character replacements section | |||
| fwrite(&n_rules, 1, 4, f_out); // write a zero word to terminate the replacemenmt list | |||
| fputc(RULE_GROUP_END, f_out); | |||
| compile_mode = 0; | |||
| } | |||
| @@ -1447,33 +1448,23 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t | |||
| } | |||
| break; | |||
| case 2: // .replace | |||
| { | |||
| int replace1; | |||
| int replace2; | |||
| char *p; | |||
| p = (unsigned char *)buf; | |||
| p = buf; | |||
| replace1 = 0; | |||
| replace2 = 0; | |||
| while (isspace2(*p)) p++; | |||
| ix = 0; | |||
| while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||
| p += utf8_in(&c, p); | |||
| replace1 += (c << ix); | |||
| ix += 16; | |||
| } | |||
| while (isspace2(*p)) p++; | |||
| ix = 0; | |||
| while ((unsigned char)(*p) > 0x20) { | |||
| p += utf8_in(&c, p); | |||
| replace2 += (c << ix); | |||
| ix += 16; | |||
| } | |||
| if (replace1 != 0) { | |||
| Write4Bytes(f_out, replace1); // write as little-endian | |||
| Write4Bytes(f_out, replace2); // if big-endian, reverse the bytes in LoadDictionary() | |||
| if ((unsigned char)(*p) > 0x20) { | |||
| while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||
| fputc(*p, f_out); | |||
| p++; | |||
| } | |||
| fputc(0, f_out); | |||
| while (isspace2(*p)) p++; | |||
| while ((unsigned char)(*p) > 0x20) { | |||
| fputc(*p, f_out); | |||
| p++; | |||
| } | |||
| fputc(0, f_out); | |||
| } | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| @@ -149,20 +149,11 @@ static void InitGroups(Translator *tr) | |||
| if (p[0] == RULE_REPLACEMENTS) { | |||
| p = (char *)(((intptr_t)p+4) & ~3); // advance to next word boundary | |||
| tr->langopts.replace_chars = (unsigned int *)p; | |||
| tr->langopts.replace_chars = (unsigned char *)p; | |||
| while (*(unsigned int *)p != 0) | |||
| p += 8; // find the end of the replacement list, each entry is 2 words. | |||
| p += 4; | |||
| #ifdef ARCH_BIG | |||
| pw = (unsigned int *)(tr->langopts.replace_chars); | |||
| while (*pw != 0) { | |||
| *pw = Reverse4Bytes(*pw); | |||
| pw++; | |||
| *pw = Reverse4Bytes(*pw); | |||
| pw++; | |||
| } | |||
| #endif | |||
| p++; | |||
| while (*p != RULE_GROUP_END) p++; | |||
| p++; | |||
| continue; | |||
| } | |||
| @@ -1790,15 +1790,38 @@ static int EmbeddedCommand(unsigned int *source_index_out) | |||
| return 1; | |||
| } | |||
| static const char * | |||
| FindReplacementChars(Translator *tr, unsigned int c, unsigned int nextc, bool *ignore_next) { | |||
| unsigned int uc = 0; | |||
| const char *from = (const char *)tr->langopts.replace_chars; | |||
| while (*(unsigned int *)from != 0) { | |||
| from += utf8_in((int *)&uc, from); | |||
| if (c == uc) { | |||
| if (*from == 0) return from + 1; | |||
| from += utf8_in((int *)&uc, from); | |||
| if (*from == 0 && uc == (unsigned int)towlower2(nextc, tr)) { | |||
| *ignore_next = true; | |||
| return from + 1; | |||
| } | |||
| } | |||
| // replacement 'from' string (skip the remaining part, if any) | |||
| while (*from != '\0') from++; | |||
| from++; | |||
| // replacement 'to' string | |||
| while (*from != '\0') from++; | |||
| from++; | |||
| } | |||
| return NULL; | |||
| } | |||
| // handle .replace rule in xx_rules file | |||
| static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||
| { | |||
| int ix; | |||
| unsigned int word; | |||
| unsigned int new_c, c2 = ' ', c_lower; | |||
| int upper_case = 0; | |||
| static bool ignore_next = false; | |||
| const unsigned int *replace_chars; | |||
| if (ignore_next) { | |||
| ignore_next = false; | |||
| @@ -1806,7 +1829,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
| } | |||
| if (c == 0) return 0; | |||
| if ((replace_chars = tr->langopts.replace_chars) == NULL) | |||
| if (tr->langopts.replace_chars == NULL) | |||
| return c; | |||
| // there is a list of character codes to be substituted with alternative codes | |||
| @@ -1816,32 +1839,18 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
| upper_case = 1; | |||
| } | |||
| new_c = 0; | |||
| for (ix = 0; (word = replace_chars[ix]) != 0; ix += 2) { | |||
| if (c_lower == (word & 0xffff)) { | |||
| if ((word >> 16) == 0) { | |||
| new_c = replace_chars[ix+1]; | |||
| break; | |||
| } | |||
| if ((word >> 16) == (unsigned int)towlower2(next_in, tr)) { | |||
| new_c = replace_chars[ix+1]; | |||
| ignore_next = true; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| if (new_c == 0) | |||
| const char *to = FindReplacementChars(tr, c_lower, next_in, &ignore_next); | |||
| if (to == NULL) | |||
| return c; // no substitution | |||
| if (new_c & 0xffe00000) { | |||
| to += utf8_in((int *)&new_c, to); | |||
| if (*to != 0) { | |||
| // there is a second character to be inserted | |||
| // don't convert the case of the second character unless the next letter is also upper case | |||
| c2 = new_c >> 16; | |||
| to += utf8_in((int *)&c2, to); | |||
| if (upper_case && iswupper(next_in)) | |||
| c2 = ucd_toupper(c2); | |||
| *insert = c2; | |||
| new_c &= 0xffff; | |||
| } | |||
| if (upper_case) | |||
| @@ -556,7 +556,7 @@ typedef struct { | |||
| bool textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled) | |||
| char dotless_i; // uses letter U+0131 | |||
| int listx; // compile *_listx after *list | |||
| const unsigned int *replace_chars; // characters to be substitutes | |||
| const unsigned char *replace_chars; // characters to be substitutes | |||
| int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset) | |||
| int alt_alphabet; // offset for another language to recognize | |||
| int alt_alphabet_lang; // language for the alt_alphabet | |||