int cprev = ' '; // previous character | int cprev = ' '; // previous character | ||||
int cprev2 = ' '; | int cprev2 = ' '; | ||||
int c_next; | int c_next; | ||||
int c_next_2; | |||||
int parag; | int parag; | ||||
int ix = 0; | int ix = 0; | ||||
int j; | int j; | ||||
is_end_clause = false; // Roman number followed by dot | is_end_clause = false; // Roman number followed by dot | ||||
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | ||||
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | ||||
} else if (c_next == '\'') | |||||
is_end_clause = false; // eg. u.s.a.'s | |||||
} else if (c_next == '\'') { | |||||
// A special case to handle english acronym + genitive | |||||
// eg. u.s.a.'s | |||||
// But avoid breaking clause handling if anything else follows the apostrophe. | |||||
c_next_2 = GetC(); | |||||
if(c_next_2 == 's') | |||||
is_end_clause = false; | |||||
UngetC(c_next_2); | |||||
} | |||||
if (iswlower(c_next)) { | if (iswlower(c_next)) { | ||||
// next word has no capital letter, this dot is probably from an abbreviation | // next word has no capital letter, this dot is probably from an abbreviation | ||||
is_end_clause = 0; | is_end_clause = 0; |
if (word[nbytes+1] == '.') { | if (word[nbytes+1] == '.') { | ||||
if (word[nbytes+2] == ' ') | if (word[nbytes+2] == ' ') | ||||
ok = 1; | ok = 1; | ||||
else if (word[nbytes+2] == '\'') { | |||||
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { | |||||
nbytes += 2; // delete the final dot (eg. u.s.a.'s) | nbytes += 2; // delete the final dot (eg. u.s.a.'s) | ||||
ok = 2; | ok = 2; | ||||
} | } | ||||
c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced | c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced | ||||
} | } | ||||
} else if (c == '\'') { | } else if (c == '\'') { | ||||
if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||||
if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||||
// between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word | // between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word | ||||
single_quoted = false; | single_quoted = false; | ||||
} else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) | } else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) |
'i: | 'i: | ||||
'Ef" "A. B C, D. E: F." | 'Ef" "A. B C, D. E: F." | ||||
# Handling of english acronym + genitive without breaking simple quote | |||||
test_phonemes en "hi: s'Ed | |||||
aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left." | |||||
test_phonemes en "hi: s'Ed | |||||
h@l'oU | |||||
aIa#m b'ak | |||||
and l'Eft" "He said : 'Hello, I am back.' and left." | |||||
#----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- | #----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- | ||||
# ED-3 - emoji_character | # ED-3 - emoji_character |