5 years ago · d36d3bd197
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -632,6 +632,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 	int cprev = ' '; // previous character
 	int cprev2 = ' ';
 	int c_next;
 	int c_next_2;
 	int parag;
 	int ix = 0;
 	int j;
@@ -1081,8 +1082,15 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 								is_end_clause = false; // Roman number followed by dot
 							else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
 								is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag)
 						} else if (c_next == '\'')
 							is_end_clause = false;    // eg. u.s.a.'s
 						} else if (c_next == '\'') {
 							// A special case to handle english acronym + genitive
 							// eg. u.s.a.'s
 							// But avoid breaking clause handling if anything else follows the apostrophe.
 							c_next_2 = GetC();
 							if(c_next_2 == 's')
 								is_end_clause = false;
 							UngetC(c_next_2);
 						}
 						if (iswlower(c_next)) {
 							// next word has no capital letter, this dot is probably from an abbreviation
 							is_end_clause = 0;
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -513,7 +513,7 @@ static int CheckDottedAbbrev(char *word1)
 			if (word[nbytes+1] == '.') {
 				if (word[nbytes+2] == ' ')
 					ok = 1;
 				else if (word[nbytes+2] == '\'') {
 				else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
 					nbytes += 2; // delete the final dot (eg. u.s.a.'s)
 					ok = 2;
 				}
@@ -2408,7 +2408,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 						c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced
 				}
 			} else if (c == '\'') {
 				if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) {
 				if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) {
 					// between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word
 					single_quoted = false;
 				} else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in))
--- a/tests/translate.test
+++ b/tests/translate.test
@@ -55,6 +55,15 @@ d'i:
 'i:
 'Ef" "A. B C, D. E: F."

 # Handling of english acronym + genitive without breaking simple quote
 test_phonemes en "hi: s'Ed
 aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left."

 test_phonemes en "hi: s'Ed
 h@l'oU
 aIa#m b'ak
 and l'Eft" "He said : 'Hello, I am back.' and left."

 #----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] -----

 # ED-3 - emoji_character