Browse Source

Merge remote-tracking branch 'BenTalagan/fix/genitive_on_acronym'

master
Reece H. Dunn 5 years ago
parent
commit
d36d3bd197
3 changed files with 21 additions and 4 deletions
  1. 10
    2
      src/libespeak-ng/readclause.c
  2. 2
    2
      src/libespeak-ng/translate.c
  3. 9
    0
      tests/translate.test

+ 10
- 2
src/libespeak-ng/readclause.c View File

int cprev = ' '; // previous character int cprev = ' '; // previous character
int cprev2 = ' '; int cprev2 = ' ';
int c_next; int c_next;
int c_next_2;
int parag; int parag;
int ix = 0; int ix = 0;
int j; int j;
is_end_clause = false; // Roman number followed by dot is_end_clause = false; // Roman number followed by dot
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag)
} else if (c_next == '\'')
is_end_clause = false; // eg. u.s.a.'s
} else if (c_next == '\'') {
// A special case to handle english acronym + genitive
// eg. u.s.a.'s
// But avoid breaking clause handling if anything else follows the apostrophe.
c_next_2 = GetC();
if(c_next_2 == 's')
is_end_clause = false;
UngetC(c_next_2);
}
if (iswlower(c_next)) { if (iswlower(c_next)) {
// next word has no capital letter, this dot is probably from an abbreviation // next word has no capital letter, this dot is probably from an abbreviation
is_end_clause = 0; is_end_clause = 0;

+ 2
- 2
src/libespeak-ng/translate.c View File

if (word[nbytes+1] == '.') { if (word[nbytes+1] == '.') {
if (word[nbytes+2] == ' ') if (word[nbytes+2] == ' ')
ok = 1; ok = 1;
else if (word[nbytes+2] == '\'') {
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
nbytes += 2; // delete the final dot (eg. u.s.a.'s) nbytes += 2; // delete the final dot (eg. u.s.a.'s)
ok = 2; ok = 2;
} }
c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced c = ' '; // remove the dot if it's followed by a space or hyphen, so that it's not pronounced
} }
} else if (c == '\'') { } else if (c == '\'') {
if (((prev_in == '.') || iswalnum(prev_in)) && IsAlpha(next_in)) {
if (((prev_in == '.' && next_in == 's') || iswalnum(prev_in)) && IsAlpha(next_in)) {
// between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word // between two letters, or in an abbreviation (eg. u.s.a.'s). Consider the apostrophe as part of the word
single_quoted = false; single_quoted = false;
} else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in)) } else if ((tr->langopts.param[LOPT_APOSTROPHE] & 1) && IsAlpha(next_in))

+ 9
- 0
tests/translate.test View File

'i: 'i:
'Ef" "A. B C, D. E: F." 'Ef" "A. B C, D. E: F."


# Handling of english acronym + genitive without breaking simple quote
test_phonemes en "hi: s'Ed
aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left."

test_phonemes en "hi: s'Ed
h@l'oU
aIa#m b'ak
and l'Eft" "He said : 'Hello, I am back.' and left."

#----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] ----- #----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] -----


# ED-3 - emoji_character # ED-3 - emoji_character

Loading…
Cancel
Save