@@ -632,6 +632,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
int cprev = ' '; // previous character | |||
int cprev2 = ' '; | |||
int c_next; | |||
int c_next_2; | |||
int parag; | |||
int ix = 0; | |||
int j; | |||
@@ -1081,8 +1082,15 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
is_end_clause = false; // Roman number followed by dot | |||
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | |||
} else if (c_next == '\'') | |||
is_end_clause = false; // eg. u.s.a.'s | |||
} else if (c_next == '\'') { | |||
// A special case to handle english acronym + genitive | |||
// eg. u.s.a.'s | |||
// But avoid breaking clause handling if anything else follows the apostrophe. | |||
c_next_2 = GetC(); | |||
if(c_next_2 == 's') | |||
is_end_clause = false; | |||
UngetC(c_next_2); | |||
} | |||
if (iswlower(c_next)) { | |||
// next word has no capital letter, this dot is probably from an abbreviation | |||
is_end_clause = 0; |
@@ -513,7 +513,7 @@ static int CheckDottedAbbrev(char *word1) | |||
if (word[nbytes+1] == '.') { | |||
if (word[nbytes+2] == ' ') | |||
ok = 1; | |||
else if (word[nbytes+2] == '\'') { | |||
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { | |||
nbytes += 2; // delete the final dot (eg. u.s.a.'s) | |||
ok = 2; | |||
} | |||
@@ -2408,7 +2408,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change) | |||
c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced | |||
} | |||
} else if (c == '\'') { | |||
if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||
if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) { | |||
// between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word | |||
single_quoted = false; | |||
} else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) |
@@ -55,6 +55,15 @@ d'i: | |||
'i: | |||
'Ef" "A. B C, D. E: F." | |||
# Handling of english acronym + genitive without breaking simple quote | |||
test_phonemes en "hi: s'Ed | |||
aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left." | |||
test_phonemes en "hi: s'Ed | |||
h@l'oU | |||
aIa#m b'ak | |||
and l'Eft" "He said : 'Hello, I am back.' and left." | |||
#----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- | |||
# ED-3 - emoji_character |