| if (compile_mode == 2) { | if (compile_mode == 2) { | ||||
| // end of the character replacements section | // end of the character replacements section | ||||
| fwrite(&n_rules, 1, 4, f_out); // write a zero word to terminate the replacemenmt list | fwrite(&n_rules, 1, 4, f_out); // write a zero word to terminate the replacemenmt list | ||||
| fputc(RULE_GROUP_END, f_out); | |||||
| compile_mode = 0; | compile_mode = 0; | ||||
| } | } | ||||
| } | } | ||||
| break; | break; | ||||
| case 2: // .replace | case 2: // .replace | ||||
| { | |||||
| int replace1; | |||||
| int replace2; | |||||
| char *p; | |||||
| p = (unsigned char *)buf; | |||||
| p = buf; | |||||
| replace1 = 0; | |||||
| replace2 = 0; | |||||
| while (isspace2(*p)) p++; | |||||
| ix = 0; | |||||
| while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||||
| p += utf8_in(&c, p); | |||||
| replace1 += (c << ix); | |||||
| ix += 16; | |||||
| } | |||||
| while (isspace2(*p)) p++; | while (isspace2(*p)) p++; | ||||
| ix = 0; | |||||
| while ((unsigned char)(*p) > 0x20) { | |||||
| p += utf8_in(&c, p); | |||||
| replace2 += (c << ix); | |||||
| ix += 16; | |||||
| } | |||||
| if (replace1 != 0) { | |||||
| Write4Bytes(f_out, replace1); // write as little-endian | |||||
| Write4Bytes(f_out, replace2); // if big-endian, reverse the bytes in LoadDictionary() | |||||
| if ((unsigned char)(*p) > 0x20) { | |||||
| while ((unsigned char)(*p) > 0x20) { // not space or zero-byte | |||||
| fputc(*p, f_out); | |||||
| p++; | |||||
| } | |||||
| fputc(0, f_out); | |||||
| while (isspace2(*p)) p++; | |||||
| while ((unsigned char)(*p) > 0x20) { | |||||
| fputc(*p, f_out); | |||||
| p++; | |||||
| } | |||||
| fputc(0, f_out); | |||||
| } | } | ||||
| } | |||||
| break; | break; | ||||
| } | } | ||||
| } | } |
| if (p[0] == RULE_REPLACEMENTS) { | if (p[0] == RULE_REPLACEMENTS) { | ||||
| p = (char *)(((intptr_t)p+4) & ~3); // advance to next word boundary | p = (char *)(((intptr_t)p+4) & ~3); // advance to next word boundary | ||||
| tr->langopts.replace_chars = (unsigned int *)p; | |||||
| tr->langopts.replace_chars = (unsigned char *)p; | |||||
| while (*(unsigned int *)p != 0) | while (*(unsigned int *)p != 0) | ||||
| p += 8; // find the end of the replacement list, each entry is 2 words. | |||||
| p += 4; | |||||
| #ifdef ARCH_BIG | |||||
| pw = (unsigned int *)(tr->langopts.replace_chars); | |||||
| while (*pw != 0) { | |||||
| *pw = Reverse4Bytes(*pw); | |||||
| pw++; | |||||
| *pw = Reverse4Bytes(*pw); | |||||
| pw++; | |||||
| } | |||||
| #endif | |||||
| p++; | |||||
| while (*p != RULE_GROUP_END) p++; | |||||
| p++; | |||||
| continue; | continue; | ||||
| } | } | ||||
| return 1; | return 1; | ||||
| } | } | ||||
| static const char * | |||||
| FindReplacementChars(Translator *tr, unsigned int c, unsigned int nextc, bool *ignore_next) { | |||||
| unsigned int uc = 0; | |||||
| const char *from = (const char *)tr->langopts.replace_chars; | |||||
| while (*(unsigned int *)from != 0) { | |||||
| from += utf8_in((int *)&uc, from); | |||||
| if (c == uc) { | |||||
| if (*from == 0) return from + 1; | |||||
| from += utf8_in((int *)&uc, from); | |||||
| if (*from == 0 && uc == (unsigned int)towlower2(nextc, tr)) { | |||||
| *ignore_next = true; | |||||
| return from + 1; | |||||
| } | |||||
| } | |||||
| // replacement 'from' string (skip the remaining part, if any) | |||||
| while (*from != '\0') from++; | |||||
| from++; | |||||
| // replacement 'to' string | |||||
| while (*from != '\0') from++; | |||||
| from++; | |||||
| } | |||||
| return NULL; | |||||
| } | |||||
| // handle .replace rule in xx_rules file | // handle .replace rule in xx_rules file | ||||
| static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | ||||
| { | { | ||||
| int ix; | |||||
| unsigned int word; | |||||
| unsigned int new_c, c2 = ' ', c_lower; | unsigned int new_c, c2 = ' ', c_lower; | ||||
| int upper_case = 0; | int upper_case = 0; | ||||
| static bool ignore_next = false; | static bool ignore_next = false; | ||||
| const unsigned int *replace_chars; | |||||
| if (ignore_next) { | if (ignore_next) { | ||||
| ignore_next = false; | ignore_next = false; | ||||
| } | } | ||||
| if (c == 0) return 0; | if (c == 0) return 0; | ||||
| if ((replace_chars = tr->langopts.replace_chars) == NULL) | |||||
| if (tr->langopts.replace_chars == NULL) | |||||
| return c; | return c; | ||||
| // there is a list of character codes to be substituted with alternative codes | // there is a list of character codes to be substituted with alternative codes | ||||
| upper_case = 1; | upper_case = 1; | ||||
| } | } | ||||
| new_c = 0; | |||||
| for (ix = 0; (word = replace_chars[ix]) != 0; ix += 2) { | |||||
| if (c_lower == (word & 0xffff)) { | |||||
| if ((word >> 16) == 0) { | |||||
| new_c = replace_chars[ix+1]; | |||||
| break; | |||||
| } | |||||
| if ((word >> 16) == (unsigned int)towlower2(next_in, tr)) { | |||||
| new_c = replace_chars[ix+1]; | |||||
| ignore_next = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (new_c == 0) | |||||
| const char *to = FindReplacementChars(tr, c_lower, next_in, &ignore_next); | |||||
| if (to == NULL) | |||||
| return c; // no substitution | return c; // no substitution | ||||
| if (new_c & 0xffe00000) { | |||||
| to += utf8_in((int *)&new_c, to); | |||||
| if (*to != 0) { | |||||
| // there is a second character to be inserted | // there is a second character to be inserted | ||||
| // don't convert the case of the second character unless the next letter is also upper case | // don't convert the case of the second character unless the next letter is also upper case | ||||
| c2 = new_c >> 16; | |||||
| to += utf8_in((int *)&c2, to); | |||||
| if (upper_case && iswupper(next_in)) | if (upper_case && iswupper(next_in)) | ||||
| c2 = ucd_toupper(c2); | c2 = ucd_toupper(c2); | ||||
| *insert = c2; | *insert = c2; | ||||
| new_c &= 0xffff; | |||||
| } | } | ||||
| if (upper_case) | if (upper_case) |
| bool textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled) | bool textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled) | ||||
| char dotless_i; // uses letter U+0131 | char dotless_i; // uses letter U+0131 | ||||
| int listx; // compile *_listx after *list | int listx; // compile *_listx after *list | ||||
| const unsigned int *replace_chars; // characters to be substitutes | |||||
| const unsigned char *replace_chars; // characters to be substitutes | |||||
| int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset) | int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset) | ||||
| int alt_alphabet; // offset for another language to recognize | int alt_alphabet; // offset for another language to recognize | ||||
| int alt_alphabet_lang; // language for the alt_alphabet | int alt_alphabet_lang; // language for the alt_alphabet |