| int ix; | int ix; | ||||
| int match_type; // left, right, or consume | int match_type; // left, right, or consume | ||||
| int failed; | |||||
| int unpron_ignore; | |||||
| int consumed; // number of letters consumed from input | |||||
| int syllable_count; | int syllable_count; | ||||
| int vowel; | int vowel; | ||||
| int letter_group; | int letter_group; | ||||
| int distance_right; | |||||
| int distance_left; | |||||
| int lg_pts; | int lg_pts; | ||||
| int n_bytes; | int n_bytes; | ||||
| int add_points; | int add_points; | ||||
| int command; | int command; | ||||
| bool check_atstart; | |||||
| unsigned int *flags; | unsigned int *flags; | ||||
| MatchRecord match; | MatchRecord match; | ||||
| // search through dictionary rules | // search through dictionary rules | ||||
| while (rule[0] != RULE_GROUP_END) { | while (rule[0] != RULE_GROUP_END) { | ||||
| unpron_ignore = word_flags & FLAG_UNPRON_TEST; | |||||
| bool check_atstart = false; | |||||
| int consumed = 0; // number of letters consumed from input | |||||
| int distance_left = -2; | |||||
| int distance_right = -6; // used to reduce points for matches further away the current letter | |||||
| int failed = 0; | |||||
| int unpron_ignore = word_flags & FLAG_UNPRON_TEST; | |||||
| match_type = 0; | match_type = 0; | ||||
| consumed = 0; | |||||
| letter_w = 0; | letter_w = 0; | ||||
| distance_right = -6; // used to reduce points for matches further away the current letter | |||||
| distance_left = -2; | |||||
| check_atstart = false; | |||||
| match.points = 1; | match.points = 1; | ||||
| match.end_type = 0; | match.end_type = 0; | ||||
| // work through next rule until end, or until no-match proved | // work through next rule until end, or until no-match proved | ||||
| rule_start = rule; | rule_start = rule; | ||||
| failed = 0; | |||||
| while (!failed) { | while (!failed) { | ||||
| rb = *rule++; | rb = *rule++; | ||||
| add_points = 0; | |||||
| if (rb <= RULE_LINENUM) { | if (rb <= RULE_LINENUM) { | ||||
| switch (rb) | switch (rb) | ||||
| continue; | continue; | ||||
| } | } | ||||
| add_points = 0; | |||||
| switch (match_type) | switch (match_type) | ||||
| { | { | ||||
| case 0: | case 0: | ||||
| unsigned char c, c2; | unsigned char c, c2; | ||||
| unsigned int c12; | unsigned int c12; | ||||
| int wc = 0; | int wc = 0; | ||||
| int wc_bytes; | |||||
| char *p2; // copy of p for use in double letter chain match | char *p2; // copy of p for use in double letter chain match | ||||
| int found; | int found; | ||||
| int g; // group chain number | int g; // group chain number | ||||
| int g1; // first group for this letter | int g1; // first group for this letter | ||||
| int n; | |||||
| int letter; | int letter; | ||||
| int any_alpha = 0; | int any_alpha = 0; | ||||
| int ix; | int ix; | ||||
| end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
| while (((c = *p) != ' ') && (c != 0)) { | while (((c = *p) != ' ') && (c != 0)) { | ||||
| wc_bytes = utf8_in(&wc, p); | |||||
| int wc_bytes = utf8_in(&wc, p); | |||||
| if (IsAlpha(wc)) | if (IsAlpha(wc)) | ||||
| any_alpha++; | any_alpha++; | ||||
| n = tr->groups2_count[c]; | |||||
| int n = tr->groups2_count[c]; | |||||
| if (IsDigit(wc) && ((tr->langopts.tone_numbers == 0) || !any_alpha)) { | if (IsDigit(wc) && ((tr->langopts.tone_numbers == 0) || !any_alpha)) { | ||||
| // lookup the number in *_list not *_rules | // lookup the number in *_list not *_rules | ||||
| char string[8]; | char string[8]; | ||||
| // return: number of bytes, bit 6: 1=used compression | // return: number of bytes, bit 6: 1=used compression | ||||
| int c; | int c; | ||||
| int c2; | |||||
| int ix; | |||||
| int offset; | int offset; | ||||
| int min; | int min; | ||||
| int max; | int max; | ||||
| char *p = text; | char *p = text; | ||||
| char *p2; | char *p2; | ||||
| bool all_alpha = true; | bool all_alpha = true; | ||||
| int bits; | |||||
| int acc; | |||||
| int pairs_start; | int pairs_start; | ||||
| const short *pairs_list; | |||||
| int bufix; | int bufix; | ||||
| char buf[N_WORD_BYTES+1]; | char buf[N_WORD_BYTES+1]; | ||||
| if (all_alpha) { | if (all_alpha) { | ||||
| // compress to 6 bits per character | // compress to 6 bits per character | ||||
| acc = 0; | |||||
| bits = 0; | |||||
| int ix; | |||||
| int acc = 0; | |||||
| int bits = 0; | |||||
| p = buf; | p = buf; | ||||
| p2 = buf; | p2 = buf; | ||||
| while ((c = *p++) != 0) { | while ((c = *p++) != 0) { | ||||
| const short *pairs_list; | |||||
| if ((pairs_list = tr->frequent_pairs) != NULL) { | if ((pairs_list = tr->frequent_pairs) != NULL) { | ||||
| c2 = c + (*p << 8); | |||||
| int c2 = c + (*p << 8); | |||||
| for (ix = 0; c2 >= pairs_list[ix]; ix++) { | for (ix = 0; c2 >= pairs_list[ix]; ix++) { | ||||
| if (c2 == pairs_list[ix]) { | if (c2 == pairs_list[ix]) { | ||||
| // found an encoding for a 2-character pair | // found an encoding for a 2-character pair | ||||
| int flags0; | int flags0; | ||||
| unsigned int flags[2]; | unsigned int flags[2]; | ||||
| int say_as; | |||||
| char *word1 = (char *)word; | char *word1 = (char *)word; | ||||
| char text[80]; | |||||
| flags[0] = 0; | flags[0] = 0; | ||||
| flags[1] = FLAG_LOOKUP_SYMBOL; | flags[1] = FLAG_LOOKUP_SYMBOL; | ||||
| flags0 = flags[0]; | flags0 = flags[0]; | ||||
| if (flags[0] & FLAG_TEXTMODE) { | if (flags[0] & FLAG_TEXTMODE) { | ||||
| say_as = option_sayas; | |||||
| int say_as = option_sayas; | |||||
| option_sayas = 0; // don't speak replacement word as letter names | option_sayas = 0; // don't speak replacement word as letter names | ||||
| // NOTE: TranslateRoman checks text[-2] and IsLetterGroup looks | // NOTE: TranslateRoman checks text[-2] and IsLetterGroup looks | ||||
| // for a heading \0, so pad the start of text to prevent | // for a heading \0, so pad the start of text to prevent | ||||
| // it reading data on the stack. | // it reading data on the stack. | ||||
| char text[80]; | |||||
| text[0] = 0; | text[0] = 0; | ||||
| text[1] = ' '; | text[1] = ' '; | ||||
| text[2] = ' '; | text[2] = ' '; | ||||
| char *word_end; | char *word_end; | ||||
| int len_ending; | int len_ending; | ||||
| int end_flags; | int end_flags; | ||||
| const char *p; | |||||
| int len; | |||||
| char ending[50] = {0}; | char ending[50] = {0}; | ||||
| // these lists are language specific, but are only relevant if the 'e' suffix flag is used | // these lists are language specific, but are only relevant if the 'e' suffix flag is used | ||||
| if (IsLetter(tr, word_end[-1], LETTERGP_VOWEL2) && IsLetter(tr, word_end[0], 1)) { | if (IsLetter(tr, word_end[-1], LETTERGP_VOWEL2) && IsLetter(tr, word_end[0], 1)) { | ||||
| // vowel(incl.'y') + hard.consonant | // vowel(incl.'y') + hard.consonant | ||||
| const char *p; | |||||
| for (i = 0; (p = add_e_exceptions[i]) != NULL; i++) { | for (i = 0; (p = add_e_exceptions[i]) != NULL; i++) { | ||||
| len = strlen(p); | |||||
| int len = strlen(p); | |||||
| if (memcmp(p, &word_end[1-len], len) == 0) | if (memcmp(p, &word_end[1-len], len) == 0) | ||||
| break; | break; | ||||
| } | } | ||||
| if (p == NULL) | if (p == NULL) | ||||
| end_flags |= FLAG_SUFX_E_ADDED; // no exception found | end_flags |= FLAG_SUFX_E_ADDED; // no exception found | ||||
| } else { | } else { | ||||
| const char *p; | |||||
| for (i = 0; (p = add_e_additions[i]) != NULL; i++) { | for (i = 0; (p = add_e_additions[i]) != NULL; i++) { | ||||
| len = strlen(p); | |||||
| int len = strlen(p); | |||||
| if (memcmp(p, &word_end[1-len], len) == 0) { | if (memcmp(p, &word_end[1-len], len) == 0) { | ||||
| end_flags |= FLAG_SUFX_E_ADDED; | end_flags |= FLAG_SUFX_E_ADDED; | ||||
| break; | break; |