| return phon_out_buf; | return phon_out_buf; | ||||
| } | } | ||||
| static int LetterGroupNo(char *rule) | |||||
| { | |||||
| /* | |||||
| * Returns number of letter group | |||||
| */ | |||||
| int groupNo = *rule; | |||||
| groupNo = groupNo - 'A'; // substracting 'A' makes letter_group equal to number in .Lxx definition | |||||
| if (groupNo < 0) // fix sign if necessary | |||||
| groupNo += 256; | |||||
| return groupNo; | |||||
| } | |||||
| static int IsLetterGroup(Translator *tr, char *word, int group, int pre) | static int IsLetterGroup(Translator *tr, char *word, int group, int pre) | ||||
| { | { | ||||
| /* Match the word against a list of utf-8 strings. | /* Match the word against a list of utf-8 strings. | ||||
| switch (rb) | switch (rb) | ||||
| { | { | ||||
| case RULE_LETTERGP: | case RULE_LETTERGP: | ||||
| letter_group = *rule++ - 'A'; | |||||
| letter_group = LetterGroupNo(rule++); | |||||
| if (IsLetter(tr, letter_w, letter_group)) { | if (IsLetter(tr, letter_w, letter_group)) { | ||||
| lg_pts = 20; | lg_pts = 20; | ||||
| if (letter_group == 2) | if (letter_group == 2) | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_LETTERGP2: // match against a list of utf-8 strings | case RULE_LETTERGP2: // match against a list of utf-8 strings | ||||
| letter_group = *rule++ - 'A'; | |||||
| if (letter_group < 0) | |||||
| letter_group += 256; | |||||
| letter_group = LetterGroupNo(rule++); | |||||
| if ((n_bytes = IsLetterGroup(tr, post_ptr-1, letter_group, 0)) > 0) { | if ((n_bytes = IsLetterGroup(tr, post_ptr-1, letter_group, 0)) > 0) { | ||||
| add_points = (20-distance_right); | add_points = (20-distance_right); | ||||
| post_ptr += (n_bytes-1); | post_ptr += (n_bytes-1); | ||||
| { | { | ||||
| // '(Jxy' means 'skip characters until xy' | // '(Jxy' means 'skip characters until xy' | ||||
| char *p = post_ptr + letter_xbytes; | char *p = post_ptr + letter_xbytes; | ||||
| char *p2 = p; // pointer to the previous character in the word | |||||
| int rule_w; // first wide character of skip rule | |||||
| char *p2 = p; // pointer to the previous character in the word | |||||
| int rule_w; // first wide character of skip rule | |||||
| utf8_in(&rule_w, rule); | utf8_in(&rule_w, rule); | ||||
| while ((letter_w != rule_w) && (letter_w != RULE_SPACE) && (letter_w != 0)) { | |||||
| int g_bytes = 0; // bytes of successfully found character group | |||||
| while ((letter_w != rule_w) && (letter_w != RULE_SPACE) && (letter_w != 0) && (g_bytes == 0)) { | |||||
| p2 = p; | p2 = p; | ||||
| p += utf8_in(&letter_w, p); | p += utf8_in(&letter_w, p); | ||||
| if (rule_w == RULE_LETTERGP2) | |||||
| g_bytes = IsLetterGroup(tr, p2, LetterGroupNo(rule + 1), 0); | |||||
| } | } | ||||
| if (letter_w == rule_w) | |||||
| if ((letter_w == rule_w) || (g_bytes > 0)) | |||||
| post_ptr = p2; | post_ptr = p2; | ||||
| } | } | ||||
| break; | break; | ||||
| switch (rb) | switch (rb) | ||||
| { | { | ||||
| case RULE_LETTERGP: | case RULE_LETTERGP: | ||||
| letter_group = *rule++ - 'A'; | |||||
| letter_group = LetterGroupNo(rule++); | |||||
| if (IsLetter(tr, letter_w, letter_group)) { | if (IsLetter(tr, letter_w, letter_group)) { | ||||
| lg_pts = 20; | lg_pts = 20; | ||||
| if (letter_group == 2) | if (letter_group == 2) | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_LETTERGP2: // match against a list of utf-8 strings | case RULE_LETTERGP2: // match against a list of utf-8 strings | ||||
| letter_group = *rule++ - 'A'; // substracting 'A' makes letter_group equal to number in .Lxx definition | |||||
| if(letter_group<0) | |||||
| letter_group += 256; | |||||
| letter_group = LetterGroupNo(rule++); | |||||
| if ((n_bytes = IsLetterGroup(tr, pre_ptr, letter_group, 1)) > 0) { | if ((n_bytes = IsLetterGroup(tr, pre_ptr, letter_group, 1)) > 0) { | ||||
| add_points = (20-distance_right); | add_points = (20-distance_right); | ||||
| pre_ptr -= (n_bytes-1); | pre_ptr -= (n_bytes-1); | ||||
| // 'xyJ)' means 'skip characters backwards until xy' | // 'xyJ)' means 'skip characters backwards until xy' | ||||
| char *p = pre_ptr; // pointer to current character in word | char *p = pre_ptr; // pointer to current character in word | ||||
| char *p2 = p; // pointer to previous character in word | char *p2 = p; // pointer to previous character in word | ||||
| int g_bytes = 0; // bytes of successfully found character group | |||||
| while ((*p != *rule) && (*p != RULE_SPACE) && (*p != 0)) { | |||||
| while ((*p != *rule) && (*p != RULE_SPACE) && (*p != 0) && (g_bytes == 0)) { | |||||
| p2 = p; | p2 = p; | ||||
| p--; | p--; | ||||
| if (*rule == RULE_LETTERGP2) | |||||
| g_bytes = IsLetterGroup(tr, p2, LetterGroupNo(rule + 1), 1); | |||||
| } | } | ||||
| // if succeed, set pre_ptr to next character after 'xy' and remaining | // if succeed, set pre_ptr to next character after 'xy' and remaining | ||||
| // 'xy' part is checked as usual in following cycles of PRE rule characters | // 'xy' part is checked as usual in following cycles of PRE rule characters | ||||
| if (*p == *rule) | if (*p == *rule) | ||||
| pre_ptr = p2; | pre_ptr = p2; | ||||
| if (g_bytes > 0) | |||||
| pre_ptr = p2 + 1; | |||||
| } | } | ||||
| break; | break; | ||||