| int cprev = ' '; // previous character | int cprev = ' '; // previous character | ||||
| int cprev2 = ' '; | int cprev2 = ' '; | ||||
| int c_next; | int c_next; | ||||
| int c_next_2; | |||||
| int parag; | int parag; | ||||
| int ix = 0; | int ix = 0; | ||||
| int j; | int j; | ||||
| is_end_clause = false; // Roman number followed by dot | is_end_clause = false; // Roman number followed by dot | ||||
| else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | ||||
| is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | ||||
| } else if (c_next == '\'') | |||||
| is_end_clause = false; // eg. u.s.a.'s | |||||
| } else if (c_next == '\'') { | |||||
| // A special case to handle english acronym + genitive | |||||
| // eg. u.s.a.'s | |||||
| // But avoid breaking clause handling if anything else follows the apostrophe. | |||||
| c_next_2 = GetC(); | |||||
| if(c_next_2 == 's') | |||||
| is_end_clause = false; | |||||
| UngetC(c_next_2); | |||||
| } | |||||
| if (iswlower(c_next)) { | if (iswlower(c_next)) { | ||||
| // next word has no capital letter, this dot is probably from an abbreviation | // next word has no capital letter, this dot is probably from an abbreviation | ||||
| is_end_clause = 0; | is_end_clause = 0; |
| if (word[nbytes+1] == '.') { | if (word[nbytes+1] == '.') { | ||||
| if (word[nbytes+2] == ' ') | if (word[nbytes+2] == ' ') | ||||
| ok = 1; | ok = 1; | ||||
| else if (word[nbytes+2] == '\'') { | |||||
| else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { | |||||
| nbytes += 2; // delete the final dot (eg. u.s.a.'s) | nbytes += 2; // delete the final dot (eg. u.s.a.'s) | ||||
| ok = 2; | ok = 2; | ||||
| } | } | ||||
| c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced | c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced | ||||
| } | } | ||||
| } else if (c == '\'') { | } else if (c == '\'') { | ||||
| if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||||
| if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||||
| // between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word | // between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word | ||||
| single_quoted = false; | single_quoted = false; | ||||
| } else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) | } else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) |
| 'i: | 'i: | ||||
| 'Ef" "A. B C, D. E: F." | 'Ef" "A. B C, D. E: F." | ||||
| # Handling of english acronym + genitive without breaking simple quote | |||||
| test_phonemes en "hi: s'Ed | |||||
| aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left." | |||||
| test_phonemes en "hi: s'Ed | |||||
| h@l'oU | |||||
| aIa#m b'ak | |||||
| and l'Eft" "He said : 'Hello, I am back.' and left." | |||||
| #----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- | #----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- | ||||
| # ED-3 - emoji_character | # ED-3 - emoji_character |