Browse Source

Merge remote-tracking branch 'BenTalagan/fix/genitive_on_acronym'

master
Reece H. Dunn 5 years ago
parent
commit
d36d3bd197
3 changed files with 21 additions and 4 deletions
  1. 10
    2
      src/libespeak-ng/readclause.c
  2. 2
    2
      src/libespeak-ng/translate.c
  3. 9
    0
      tests/translate.test

+ 10
- 2
src/libespeak-ng/readclause.c View File

@@ -632,6 +632,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
int cprev = ' '; // previous character
int cprev2 = ' ';
int c_next;
int c_next_2;
int parag;
int ix = 0;
int j;
@@ -1081,8 +1082,15 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
is_end_clause = false; // Roman number followed by dot
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag)
} else if (c_next == '\'')
is_end_clause = false; // eg. u.s.a.'s
} else if (c_next == '\'') {
// A special case to handle english acronym + genitive
// eg. u.s.a.'s
// But avoid breaking clause handling if anything else follows the apostrophe.
c_next_2 = GetC();
if(c_next_2 == 's')
is_end_clause = false;
UngetC(c_next_2);
}
if (iswlower(c_next)) {
// next word has no capital letter, this dot is probably from an abbreviation
is_end_clause = 0;

+ 2
- 2
src/libespeak-ng/translate.c View File

@@ -513,7 +513,7 @@ static int CheckDottedAbbrev(char *word1)
if (word[nbytes+1] == '.') {
if (word[nbytes+2] == ' ')
ok = 1;
else if (word[nbytes+2] == '\'') {
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
nbytes += 2; // delete the final dot (eg. u.s.a.'s)
ok = 2;
}
@@ -2408,7 +2408,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced
}
} else if (c == '\'') {
if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) {
if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) {
// between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word
single_quoted = false;
} else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in))

+ 9
- 0
tests/translate.test View File

@@ -55,6 +55,15 @@ d'i:
'i:
'Ef" "A. B C, D. E: F."

# Handling of english acronym + genitive without breaking simple quote
test_phonemes en "hi: s'Ed
aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left."

test_phonemes en "hi: s'Ed
h@l'oU
aIa#m b'ak
and l'Eft" "He said : 'Hello, I am back.' and left."

#----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] -----

# ED-3 - emoji_character

Loading…
Cancel
Save