Use parameters instead of globals. Note the change from sizeof(word_phonemes) to size_word_phonemes.master
@@ -34,7 +34,6 @@ | |||
#include "translate.h" | |||
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | |||
#include "numbers.h" // for SetSpellingStress | |||
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | |||
#include "phonemelist.h" // for MakePhonemeList | |||
#include "readclause.h" // for towlower2, Eof, ReadClause, is_str_... | |||
@@ -369,696 +368,6 @@ char *strchr_w(const char *s, int c) | |||
return strchr((char *)s, c); // (char *) is needed for Borland compiler | |||
} | |||
// append plural suffixes depending on preceding letter | |||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes) | |||
{ | |||
char word_zz[4] = { ' ', 'z', 'z', 0 }; | |||
char word_iz[4] = { ' ', 'i', 'z', 0 }; | |||
char word_ss[4] = { ' ', 's', 's', 0 }; | |||
if (flags & FLAG_HAS_PLURAL) { | |||
// s or 's suffix, append [s], [z] or [Iz] depending on previous letter | |||
if (last_char == 'f') | |||
TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL)) | |||
TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
else | |||
TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
} | |||
} | |||
static int CheckDottedAbbrev(char *word1) | |||
{ | |||
int wc; | |||
int count = 0; | |||
int nbytes; | |||
int ok; | |||
int ix; | |||
char *word; | |||
char *wbuf; | |||
char word_buf[80]; | |||
word = word1; | |||
wbuf = word_buf; | |||
for (;;) { | |||
ok = 0; | |||
nbytes = utf8_in(&wc, word); | |||
if ((word[nbytes] == ' ') && IsAlpha(wc)) { | |||
if (word[nbytes+1] == '.') { | |||
if (word[nbytes+2] == ' ') | |||
ok = 1; | |||
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { | |||
nbytes += 2; // delete the final dot (eg. u.s.a.'s) | |||
ok = 2; | |||
} | |||
} else if ((count > 0) && (word[nbytes] == ' ')) | |||
ok = 2; | |||
} | |||
if (ok == 0) | |||
break; | |||
for (ix = 0; ix < nbytes; ix++) | |||
*wbuf++ = word[ix]; | |||
count++; | |||
if (ok == 2) { | |||
word += nbytes; | |||
break; | |||
} | |||
word += (nbytes + 3); | |||
} | |||
if (count > 1) { | |||
ix = wbuf - word_buf; | |||
memcpy(word1, word_buf, ix); | |||
while (&word1[ix] < word) | |||
word1[ix++] = ' '; | |||
dictionary_skipwords = (count - 1)*2; | |||
} | |||
return count; | |||
} | |||
static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out) | |||
{ | |||
// word1 is terminated by space (0x20) character | |||
char *word1; | |||
int word_length; | |||
int ix; | |||
char *p; | |||
int pfix; | |||
int n_chars; | |||
unsigned int dictionary_flags[2]; | |||
unsigned int dictionary_flags2[2]; | |||
int end_type = 0; | |||
int end_type1 = 0; | |||
int prefix_type = 0; | |||
int prefix_stress; | |||
char *wordx; | |||
char phonemes[N_WORD_PHONEMES]; | |||
char phonemes2[N_WORD_PHONEMES]; | |||
char prefix_phonemes[N_WORD_PHONEMES]; | |||
char unpron_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
char word_copy[N_WORD_BYTES]; | |||
char word_copy2[N_WORD_BYTES]; | |||
int word_copy_length; | |||
char prefix_chars[0x3f + 2]; | |||
bool found = false; | |||
int end_flags; | |||
int c_temp; // save a character byte while we temporarily replace it with space | |||
int first_char; | |||
int last_char = 0; | |||
int prefix_flags = 0; | |||
bool more_suffixes; | |||
bool confirm_prefix; | |||
int spell_word; | |||
int emphasize_allcaps = 0; | |||
int wflags; | |||
int was_unpronouncable = 0; | |||
int loopcount; | |||
int add_suffix_phonemes = 0; | |||
WORD_TAB wtab_null[8]; | |||
if (wtab == NULL) { | |||
memset(wtab_null, 0, sizeof(wtab_null)); | |||
wtab = wtab_null; | |||
} | |||
wflags = wtab->flags; | |||
dictionary_flags[0] = 0; | |||
dictionary_flags[1] = 0; | |||
dictionary_flags2[0] = 0; | |||
dictionary_flags2[1] = 0; | |||
dictionary_skipwords = 0; | |||
phonemes[0] = 0; | |||
unpron_phonemes[0] = 0; | |||
prefix_phonemes[0] = 0; | |||
end_phonemes[0] = 0; | |||
if (tr->data_dictlist == NULL) { | |||
// dictionary is not loaded | |||
word_phonemes[0] = 0; | |||
return 0; | |||
} | |||
// count the length of the word | |||
word1 = word_start; | |||
if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot | |||
wordx = word1; | |||
utf8_in(&first_char, wordx); | |||
word_length = 0; | |||
while ((*wordx != 0) && (*wordx != ' ')) { | |||
wordx += utf8_in(&last_char, wordx); | |||
word_length++; | |||
} | |||
word_copy_length = wordx - word_start; | |||
if (word_copy_length >= N_WORD_BYTES) | |||
word_copy_length = N_WORD_BYTES-1; | |||
memcpy(word_copy2, word_start, word_copy_length); | |||
spell_word = 0; | |||
if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) { | |||
// retranslating a 1-character word using a different language, say its name | |||
utf8_in(&c_temp, wordx+1); // the next character | |||
if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp))) | |||
spell_word = 1; | |||
} | |||
if (option_sayas == SAYAS_KEY) { | |||
if (word_length == 1) | |||
spell_word = 4; | |||
else { | |||
// is there a translation for this keyname ? | |||
word1--; | |||
*word1 = '_'; // prefix keyname with '_' | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab); | |||
} | |||
} | |||
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or | |||
// we may just find some flags | |||
if (option_sayas & 0x10) { | |||
// SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR | |||
spell_word = option_sayas & 0xf; // 2,3,4 | |||
} else { | |||
if (!found) | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word | |||
if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.')) | |||
wordx[1] = ' '; // remove a Dot after this word | |||
if (dictionary_flags[0] & FLAG_TEXTMODE) { | |||
if (word_out != NULL) | |||
strcpy(word_out, word1); | |||
return dictionary_flags[0]; | |||
} else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
// grouped words, but no translation. Join the words with hyphens. | |||
wordx = word1; | |||
ix = 0; | |||
while (ix < dictionary_skipwords) { | |||
if (*wordx == ' ') { | |||
*wordx = '-'; | |||
ix++; | |||
} | |||
wordx++; | |||
} | |||
} | |||
if ((word_length == 1) && (dictionary_skipwords == 0)) { | |||
// is this a series of single letters separated by dots? | |||
if (CheckDottedAbbrev(word1)) { | |||
dictionary_flags[0] = 0; | |||
dictionary_flags[1] = 0; | |||
spell_word = 1; | |||
if (dictionary_skipwords) | |||
dictionary_flags[0] = FLAG_SKIPWORDS; | |||
} | |||
} | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (!found && (dictionary_flags[0] & FLAG_ABBREV)) { | |||
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters | |||
spell_word = 1; | |||
} | |||
if (!found && iswdigit(first_char)) { | |||
Lookup(tr, "_0lang", word_phonemes); | |||
if (word_phonemes[0] == phonSWITCH) | |||
return 0; | |||
if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) { | |||
// for this language, speak English numerals (0-9) with the English voice | |||
sprintf(word_phonemes, "%c", phonSWITCH); | |||
return 0; | |||
} | |||
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); | |||
} | |||
if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) { | |||
// either all upper or all lower case | |||
if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) { | |||
if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) { | |||
// don't use Roman number if this word is not separated from the next word (eg. "XLTest") | |||
if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0) | |||
dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals | |||
} | |||
} | |||
} | |||
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) { | |||
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
// emphasize words which are in capitals | |||
emphasize_allcaps = FLAG_EMPHASIZED; | |||
} else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3) | |||
&& (tr->clause_upper_count <= tr->clause_lower_count)) { | |||
// An upper case word in a lower case clause. This could be an abbreviation. | |||
spell_word = 1; | |||
} | |||
} | |||
} | |||
if (spell_word > 0) { | |||
// Speak as individual letters | |||
phonemes[0] = 0; | |||
if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) { | |||
if (word_length > 1) | |||
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces | |||
return 0; | |||
} | |||
strcpy(word_phonemes, phonemes); | |||
if (wflags & FLAG_TRANSLATOR2) | |||
return 0; | |||
addPluralSuffixes(wflags, tr, last_char, word_phonemes); | |||
return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d" | |||
} else if (found == false) { | |||
// word's pronunciation is not given in the dictionary list, although | |||
// dictionary_flags may have ben set there | |||
int posn; | |||
bool non_initial = false; | |||
int length; | |||
posn = 0; | |||
length = 999; | |||
wordx = word1; | |||
while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) { | |||
// This word looks "unpronouncable", so speak letters individually until we | |||
// find a remainder that we can pronounce. | |||
was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE; | |||
emphasize_allcaps = 0; | |||
if (wordx[0] == '\'') | |||
break; | |||
if (posn > 0) | |||
non_initial = true; | |||
wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet); | |||
posn++; | |||
if (unpron_phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, unpron_phonemes); | |||
if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0) | |||
return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_ | |||
return 0; | |||
} | |||
length = 0; | |||
while (wordx[length] != ' ') length++; | |||
} | |||
SetSpellingStress(tr, unpron_phonemes, 0, posn); | |||
// anything left ? | |||
if (*wordx != ' ') { | |||
if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) { | |||
// letters which have been spoken individually from affecting the pronunciation of the pronuncable part | |||
wordx[-1] = ' '; | |||
} | |||
// Translate the stem | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) { | |||
int wc; | |||
// characters not recognised, speak them individually | |||
// ?? should we say super/sub-script numbers and letters here? | |||
utf8_in(&wc, wordx); | |||
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { | |||
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) | |||
return 0; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
} | |||
c_temp = wordx[-1]; | |||
found = false; | |||
confirm_prefix = true; | |||
for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) { | |||
// Found a standard prefix, remove it and retranslate | |||
// loopcount guards against an endless loop | |||
if (confirm_prefix && !(end_type & SUFX_B)) { | |||
int end2; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
// remove any standard suffix and confirm that the prefix is still recognised | |||
phonemes2[0] = 0; | |||
end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
if (end2) { | |||
RemoveEnding(tr, wordx, end2, word_copy); | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
if ((end_type & SUFX_P) == 0) { | |||
// after removing the suffix, the prefix is no longer recognised. | |||
// Keep the suffix, but don't use the prefix | |||
end_type = end2; | |||
strcpy(phonemes, phonemes2); | |||
strcpy(end_phonemes, end_phonemes2); | |||
if (option_phonemes & espeakPHONEMES_TRACE) { | |||
DecodePhonemes(end_phonemes, end_phonemes2); | |||
fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2); | |||
} | |||
} | |||
confirm_prefix = false; | |||
continue; | |||
} | |||
} | |||
prefix_type = end_type; | |||
if (prefix_type & SUFX_V) | |||
tr->expect_verb = 1; // use the verb form of the word | |||
wordx[-1] = c_temp; | |||
if ((prefix_type & SUFX_B) == 0) { | |||
for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove | |||
wordx++; | |||
while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||
} | |||
} else { | |||
pfix = 1; | |||
prefix_chars[0] = 0; | |||
n_chars = prefix_type & 0x3f; | |||
for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove | |||
prefix_chars[pfix++] = *wordx++; | |||
if ((prefix_type & SUFX_B) && (ix == (n_chars-1))) | |||
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character | |||
} | |||
prefix_chars[pfix] = 0; | |||
} | |||
c_temp = wordx[-1]; | |||
wordx[-1] = ' '; | |||
confirm_prefix = true; | |||
wflags |= FLAG_PREFIX_REMOVED; | |||
if (prefix_type & SUFX_B) { | |||
// SUFX_B is used for Turkish, tr_rules contains " ' (Pb" | |||
// examine the prefix part | |||
char *wordpf; | |||
char prefix_phonemes2[12]; | |||
strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2)); | |||
wordpf = &prefix_chars[1]; | |||
strcpy(prefix_phonemes, phonemes); | |||
// look for stress marker or $abbrev | |||
found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab); | |||
if (found) | |||
strcpy(prefix_phonemes, phonemes); | |||
if (dictionary_flags[0] & FLAG_ABBREV) { | |||
prefix_phonemes[0] = 0; | |||
SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes); | |||
} | |||
} else | |||
strcat(prefix_phonemes, end_phonemes); | |||
end_phonemes[0] = 0; | |||
end_type = 0; | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} else | |||
prefix_flags = 1; | |||
if (found == false) { | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags); | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
wordx[-1] = c_temp; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
} | |||
} | |||
if ((end_type != 0) && !(end_type & SUFX_P)) { | |||
end_type1 = end_type; | |||
strcpy(phonemes2, phonemes); | |||
// The word has a standard ending, re-translate without this ending | |||
end_flags = RemoveEnding(tr, wordx, end_type, word_copy); | |||
more_suffixes = true; | |||
while (more_suffixes) { | |||
more_suffixes = false; | |||
phonemes[0] = 0; | |||
if (prefix_phonemes[0] != 0) { | |||
// lookup the stem without the prefix removed | |||
wordx[-1] = c_temp; | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix | |||
wordx[-1] = ' '; | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
if (found) | |||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
if ((found == false) && (dictionary_flags2[0] != 0)) | |||
prefix_flags = 1; | |||
} | |||
if (found == false) { | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
} | |||
if (found == false) { | |||
if (end_type & SUFX_Q) { | |||
// don't retranslate, use the original lookup result | |||
strcpy(phonemes, phonemes2); | |||
} else { | |||
if (end_flags & FLAG_SUFX) | |||
wflags |= FLAG_SUFFIX_REMOVED; | |||
if (end_type & SUFX_A) | |||
wflags |= FLAG_SUFFIX_VOWEL; | |||
if (end_type & SUFX_M) { | |||
// allow more suffixes before this suffix | |||
strcpy(end_phonemes2, end_phonemes); | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one | |||
if ((end_type != 0) && !(end_type & SUFX_P)) { | |||
// there is another suffix | |||
end_flags = RemoveEnding(tr, wordx, end_type, NULL); | |||
more_suffixes = true; | |||
} | |||
} else { | |||
// don't remove any previous suffix | |||
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags); | |||
end_type = 0; | |||
} | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
return 0; | |||
} | |||
} | |||
} | |||
} | |||
if ((end_type1 & SUFX_T) == 0) { | |||
// the default is to add the suffix and then determine the word's stress pattern | |||
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | |||
end_phonemes[0] = 0; | |||
} | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
} | |||
wordx[-1] = c_temp; | |||
} | |||
} | |||
addPluralSuffixes(wflags, tr, last_char, word_phonemes); | |||
wflags |= emphasize_allcaps; | |||
// determine stress pattern for this word | |||
add_suffix_phonemes = 0; | |||
if (end_phonemes[0] != 0) | |||
add_suffix_phonemes = 2; | |||
prefix_stress = 0; | |||
for (p = prefix_phonemes; *p != 0; p++) { | |||
if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2)) | |||
prefix_stress = *p; | |||
} | |||
if (prefix_flags || (prefix_stress != 0)) { | |||
if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) { | |||
char *p; | |||
// German, keep a secondary stress on the stem | |||
SetWordStress(tr, phonemes, dictionary_flags, 3, 0); | |||
// reduce all but the first primary stress | |||
ix = 0; | |||
for (p = prefix_phonemes; *p != 0; p++) { | |||
if (*p == phonSTRESS_P) { | |||
if (ix == 0) | |||
ix = 1; | |||
else | |||
*p = phonSTRESS_3; | |||
} | |||
} | |||
snprintf(word_phonemes, sizeof(word_phonemes), "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); | |||
} else { | |||
// stress position affects the whole word, including prefix | |||
snprintf(word_phonemes, sizeof(word_phonemes), "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); | |||
} | |||
} else { | |||
SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes); | |||
snprintf(word_phonemes, sizeof(word_phonemes), "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
} | |||
if (end_phonemes[0] != 0) { | |||
// a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined | |||
ix = strlen(word_phonemes); | |||
end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow | |||
strcpy(&word_phonemes[ix], end_phonemes); | |||
} | |||
if (wflags & FLAG_LAST_WORD) { | |||
// don't use $brk pause before the last word of a sentence | |||
// (but allow it for emphasis, see below | |||
dictionary_flags[0] &= ~FLAG_PAUSE1; | |||
} | |||
if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS)) | |||
ChangeWordStress(tr, word_phonemes, 3); | |||
else if (wflags & FLAG_EMPHASIZED2) { | |||
// A word is indicated in the source text as stressed | |||
// Give it stress level 6 (for the intonation module) | |||
ChangeWordStress(tr, word_phonemes, 6); | |||
if (wflags & FLAG_EMPHASIZED) | |||
dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause | |||
} else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) { | |||
// the word has attribute to stress or unstress when at end of clause | |||
if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2)) | |||
ChangeWordStress(tr, word_phonemes, 4); | |||
else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words)) | |||
ChangeWordStress(tr, word_phonemes, 3); | |||
} | |||
// dictionary flags for this word give a clue about which alternative pronunciations of | |||
// following words to use. | |||
if (end_type1 & SUFX_F) { | |||
// expect a verb form, with or without -s suffix | |||
tr->expect_verb = 2; | |||
tr->expect_verb_s = 2; | |||
} | |||
if (dictionary_flags[1] & FLAG_PASTF) { | |||
// expect perfect tense in next two words | |||
tr->expect_past = 3; | |||
tr->expect_verb = 0; | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_VERBF) { | |||
// expect a verb in the next word | |||
tr->expect_verb = 2; | |||
tr->expect_verb_s = 0; // verb won't have -s suffix | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_VERBSF) { | |||
// expect a verb, must have a -s suffix | |||
tr->expect_verb = 0; | |||
tr->expect_verb_s = 2; | |||
tr->expect_past = 0; | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_NOUNF) { | |||
// not expecting a verb next | |||
tr->expect_noun = 2; | |||
tr->expect_verb = 0; | |||
tr->expect_verb_s = 0; | |||
tr->expect_past = 0; | |||
} | |||
if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) { | |||
if (tr->expect_verb > 0) | |||
tr->expect_verb--; | |||
if (tr->expect_verb_s > 0) | |||
tr->expect_verb_s--; | |||
if (tr->expect_noun > 0) | |||
tr->expect_noun--; | |||
if (tr->expect_past > 0) | |||
tr->expect_past--; | |||
} | |||
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) { | |||
// English Specific !!!! | |||
// any single letter before a dot is an abbreviation, except 'I' | |||
dictionary_flags[0] |= FLAG_ALLOW_DOT; | |||
} | |||
if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0)) | |||
ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]); | |||
dictionary_flags[0] |= was_unpronouncable; | |||
memcpy(word_start, word_copy2, word_copy_length); | |||
return dictionary_flags[0]; | |||
} | |||
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out) | |||
{ | |||
char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes | |||
@@ -1066,7 +375,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
int available = N_WORD_PHONEMES; | |||
bool first_word = true; | |||
int flags = TranslateWord3(tr, word_start, wtab, word_out); | |||
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||
if (flags & FLAG_TEXTMODE && word_out) { | |||
// Ensure that start of word rules match with the replaced text, | |||
// so that emoji and other characters are pronounced correctly. | |||
@@ -1086,7 +395,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
wtab->flags &= ~FLAG_FIRST_UPPER; | |||
} | |||
TranslateWord3(tr, word_out, wtab, NULL); | |||
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||
int n; | |||
if (first_word) { |
@@ -1,3 +1,4 @@ | |||
/* | |||
* Copyright (C) 2005 to 2014 by Jonathan Duddington | |||
* email: [email protected] | |||
@@ -34,21 +35,637 @@ | |||
#include "translate.h" | |||
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | |||
#include "numbers.h" // for SetSpellingStress, TranslateLetter | |||
#include "numbers.h" // for SetSpellingStress, ... | |||
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | |||
#include "phonemelist.h" // for MakePhonemeList | |||
#include "readclause.h" // for towlower2, Eof, ReadClause, is_str_... | |||
#include "readclause.h" // for towlower2 | |||
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable | |||
#include "synthesize.h" // for PHONEME_LIST2, N_PHONEME_LIST, PHON... | |||
#include "ucd/ucd.h" // for ucd_toupper | |||
#include "voice.h" // for voice, voice_t | |||
#include "speech.h" // for MAKE_MEM_UNDEFINED | |||
#include "translateword.h" | |||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | |||
static int CheckDottedAbbrev(char *word1); | |||
static int NonAsciiNumber(int letter); | |||
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | |||
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | |||
{ | |||
// word1 is terminated by space (0x20) character | |||
char *word1; | |||
int word_length; | |||
int ix; | |||
char *p; | |||
int pfix; | |||
int n_chars; | |||
unsigned int dictionary_flags[2]; | |||
unsigned int dictionary_flags2[2]; | |||
int end_type = 0; | |||
int end_type1 = 0; | |||
int prefix_type = 0; | |||
int prefix_stress; | |||
char *wordx; | |||
char phonemes[N_WORD_PHONEMES]; | |||
char phonemes2[N_WORD_PHONEMES]; | |||
char prefix_phonemes[N_WORD_PHONEMES]; | |||
char unpron_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes[N_WORD_PHONEMES]; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
char word_copy[N_WORD_BYTES]; | |||
char word_copy2[N_WORD_BYTES]; | |||
int word_copy_length; | |||
char prefix_chars[0x3f + 2]; | |||
bool found = false; | |||
int end_flags; | |||
int c_temp; // save a character byte while we temporarily replace it with space | |||
int first_char; | |||
int last_char = 0; | |||
int prefix_flags = 0; | |||
bool more_suffixes; | |||
bool confirm_prefix; | |||
int spell_word; | |||
int emphasize_allcaps = 0; | |||
int wflags; | |||
int was_unpronouncable = 0; | |||
int loopcount; | |||
int add_suffix_phonemes = 0; | |||
WORD_TAB wtab_null[8]; | |||
if (wtab == NULL) { | |||
memset(wtab_null, 0, sizeof(wtab_null)); | |||
wtab = wtab_null; | |||
} | |||
wflags = wtab->flags; | |||
dictionary_flags[0] = 0; | |||
dictionary_flags[1] = 0; | |||
dictionary_flags2[0] = 0; | |||
dictionary_flags2[1] = 0; | |||
dictionary_skipwords = 0; | |||
phonemes[0] = 0; | |||
unpron_phonemes[0] = 0; | |||
prefix_phonemes[0] = 0; | |||
end_phonemes[0] = 0; | |||
if (tr->data_dictlist == NULL) { | |||
// dictionary is not loaded | |||
word_phonemes[0] = 0; | |||
return 0; | |||
} | |||
// count the length of the word | |||
word1 = word_start; | |||
if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot | |||
wordx = word1; | |||
utf8_in(&first_char, wordx); | |||
word_length = 0; | |||
while ((*wordx != 0) && (*wordx != ' ')) { | |||
wordx += utf8_in(&last_char, wordx); | |||
word_length++; | |||
} | |||
word_copy_length = wordx - word_start; | |||
if (word_copy_length >= N_WORD_BYTES) | |||
word_copy_length = N_WORD_BYTES-1; | |||
memcpy(word_copy2, word_start, word_copy_length); | |||
spell_word = 0; | |||
if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) { | |||
// retranslating a 1-character word using a different language, say its name | |||
utf8_in(&c_temp, wordx+1); // the next character | |||
if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp))) | |||
spell_word = 1; | |||
} | |||
if (option_sayas == SAYAS_KEY) { | |||
if (word_length == 1) | |||
spell_word = 4; | |||
else { | |||
// is there a translation for this keyname ? | |||
word1--; | |||
*word1 = '_'; // prefix keyname with '_' | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab); | |||
} | |||
} | |||
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or | |||
// we may just find some flags | |||
if (option_sayas & 0x10) { | |||
// SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR | |||
spell_word = option_sayas & 0xf; // 2,3,4 | |||
} else { | |||
if (!found) | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word | |||
if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.')) | |||
wordx[1] = ' '; // remove a Dot after this word | |||
if (dictionary_flags[0] & FLAG_TEXTMODE) { | |||
if (word_out != NULL) | |||
strcpy(word_out, word1); | |||
return dictionary_flags[0]; | |||
} else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
// grouped words, but no translation. Join the words with hyphens. | |||
wordx = word1; | |||
ix = 0; | |||
while (ix < dictionary_skipwords) { | |||
if (*wordx == ' ') { | |||
*wordx = '-'; | |||
ix++; | |||
} | |||
wordx++; | |||
} | |||
} | |||
if ((word_length == 1) && (dictionary_skipwords == 0)) { | |||
// is this a series of single letters separated by dots? | |||
if (CheckDottedAbbrev(word1)) { | |||
dictionary_flags[0] = 0; | |||
dictionary_flags[1] = 0; | |||
spell_word = 1; | |||
if (dictionary_skipwords) | |||
dictionary_flags[0] = FLAG_SKIPWORDS; | |||
} | |||
} | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (!found && (dictionary_flags[0] & FLAG_ABBREV)) { | |||
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters | |||
spell_word = 1; | |||
} | |||
if (!found && iswdigit(first_char)) { | |||
Lookup(tr, "_0lang", word_phonemes); | |||
if (word_phonemes[0] == phonSWITCH) | |||
return 0; | |||
if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) { | |||
// for this language, speak English numerals (0-9) with the English voice | |||
sprintf(word_phonemes, "%c", phonSWITCH); | |||
return 0; | |||
} | |||
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); | |||
} | |||
if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) { | |||
// either all upper or all lower case | |||
char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | |||
if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) { | |||
if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) { | |||
// don't use Roman number if this word is not separated from the next word (eg. "XLTest") | |||
if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0) | |||
dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals | |||
} | |||
} | |||
} | |||
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) { | |||
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
// emphasize words which are in capitals | |||
emphasize_allcaps = FLAG_EMPHASIZED; | |||
} else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3) | |||
&& (tr->clause_upper_count <= tr->clause_lower_count)) { | |||
// An upper case word in a lower case clause. This could be an abbreviation. | |||
spell_word = 1; | |||
} | |||
} | |||
} | |||
if (spell_word > 0) { | |||
// Speak as individual letters | |||
phonemes[0] = 0; | |||
if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) { | |||
if (word_length > 1) | |||
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces | |||
return 0; | |||
} | |||
strcpy(word_phonemes, phonemes); | |||
if (wflags & FLAG_TRANSLATOR2) | |||
return 0; | |||
addPluralSuffixes(wflags, tr, last_char, word_phonemes); | |||
return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d" | |||
} else if (found == false) { | |||
// word's pronunciation is not given in the dictionary list, although | |||
// dictionary_flags may have ben set there | |||
int posn; | |||
bool non_initial = false; | |||
int length; | |||
posn = 0; | |||
length = 999; | |||
wordx = word1; | |||
while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) { | |||
// This word looks "unpronouncable", so speak letters individually until we | |||
// find a remainder that we can pronounce. | |||
was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE; | |||
emphasize_allcaps = 0; | |||
if (wordx[0] == '\'') | |||
break; | |||
if (posn > 0) | |||
non_initial = true; | |||
wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet); | |||
posn++; | |||
if (unpron_phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, unpron_phonemes); | |||
if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0) | |||
return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_ | |||
return 0; | |||
} | |||
length = 0; | |||
while (wordx[length] != ' ') length++; | |||
} | |||
SetSpellingStress(tr, unpron_phonemes, 0, posn); | |||
// anything left ? | |||
if (*wordx != ' ') { | |||
if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) { | |||
// letters which have been spoken individually from affecting the pronunciation of the pronuncable part | |||
wordx[-1] = ' '; | |||
} | |||
// Translate the stem | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) { | |||
int wc; | |||
// characters not recognised, speak them individually | |||
// ?? should we say super/sub-script numbers and letters here? | |||
utf8_in(&wc, wordx); | |||
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { | |||
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) | |||
return 0; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
} | |||
c_temp = wordx[-1]; | |||
found = false; | |||
confirm_prefix = true; | |||
for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) { | |||
// Found a standard prefix, remove it and retranslate | |||
// loopcount guards against an endless loop | |||
if (confirm_prefix && !(end_type & SUFX_B)) { | |||
int end2; | |||
char end_phonemes2[N_WORD_PHONEMES]; | |||
// remove any standard suffix and confirm that the prefix is still recognised | |||
phonemes2[0] = 0; | |||
end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
if (end2) { | |||
RemoveEnding(tr, wordx, end2, word_copy); | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
if ((end_type & SUFX_P) == 0) { | |||
// after removing the suffix, the prefix is no longer recognised. | |||
// Keep the suffix, but don't use the prefix | |||
end_type = end2; | |||
strcpy(phonemes, phonemes2); | |||
strcpy(end_phonemes, end_phonemes2); | |||
if (option_phonemes & espeakPHONEMES_TRACE) { | |||
DecodePhonemes(end_phonemes, end_phonemes2); | |||
fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2); | |||
} | |||
} | |||
confirm_prefix = false; | |||
continue; | |||
} | |||
} | |||
prefix_type = end_type; | |||
if (prefix_type & SUFX_V) | |||
tr->expect_verb = 1; // use the verb form of the word | |||
wordx[-1] = c_temp; | |||
if ((prefix_type & SUFX_B) == 0) { | |||
for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove | |||
wordx++; | |||
while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||
} | |||
} else { | |||
pfix = 1; | |||
prefix_chars[0] = 0; | |||
n_chars = prefix_type & 0x3f; | |||
for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove | |||
prefix_chars[pfix++] = *wordx++; | |||
if ((prefix_type & SUFX_B) && (ix == (n_chars-1))) | |||
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character | |||
} | |||
prefix_chars[pfix] = 0; | |||
} | |||
c_temp = wordx[-1]; | |||
wordx[-1] = ' '; | |||
confirm_prefix = true; | |||
wflags |= FLAG_PREFIX_REMOVED; | |||
if (prefix_type & SUFX_B) { | |||
// SUFX_B is used for Turkish, tr_rules contains " ' (Pb" | |||
// examine the prefix part | |||
char *wordpf; | |||
char prefix_phonemes2[12]; | |||
strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2)); | |||
wordpf = &prefix_chars[1]; | |||
strcpy(prefix_phonemes, phonemes); | |||
// look for stress marker or $abbrev | |||
found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab); | |||
if (found) | |||
strcpy(prefix_phonemes, phonemes); | |||
if (dictionary_flags[0] & FLAG_ABBREV) { | |||
prefix_phonemes[0] = 0; | |||
SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes); | |||
} | |||
} else | |||
strcat(prefix_phonemes, end_phonemes); | |||
end_phonemes[0] = 0; | |||
end_type = 0; | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} else | |||
prefix_flags = 1; | |||
if (found == false) { | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags); | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
wordx[-1] = c_temp; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
} | |||
} | |||
if ((end_type != 0) && !(end_type & SUFX_P)) { | |||
end_type1 = end_type; | |||
strcpy(phonemes2, phonemes); | |||
// The word has a standard ending, re-translate without this ending | |||
end_flags = RemoveEnding(tr, wordx, end_type, word_copy); | |||
more_suffixes = true; | |||
while (more_suffixes) { | |||
more_suffixes = false; | |||
phonemes[0] = 0; | |||
if (prefix_phonemes[0] != 0) { | |||
// lookup the stem without the prefix removed | |||
wordx[-1] = c_temp; | |||
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix | |||
wordx[-1] = ' '; | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
if (found) | |||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
if ((found == false) && (dictionary_flags2[0] != 0)) | |||
prefix_flags = 1; | |||
} | |||
if (found == false) { | |||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
if (dictionary_flags[0] == 0) { | |||
dictionary_flags[0] = dictionary_flags2[0]; | |||
dictionary_flags[1] = dictionary_flags2[1]; | |||
} | |||
} | |||
if (found == false) { | |||
if (end_type & SUFX_Q) { | |||
// don't retranslate, use the original lookup result | |||
strcpy(phonemes, phonemes2); | |||
} else { | |||
if (end_flags & FLAG_SUFX) | |||
wflags |= FLAG_SUFFIX_REMOVED; | |||
if (end_type & SUFX_A) | |||
wflags |= FLAG_SUFFIX_VOWEL; | |||
if (end_type & SUFX_M) { | |||
// allow more suffixes before this suffix | |||
strcpy(end_phonemes2, end_phonemes); | |||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one | |||
if ((end_type != 0) && !(end_type & SUFX_P)) { | |||
// there is another suffix | |||
end_flags = RemoveEnding(tr, wordx, end_type, NULL); | |||
more_suffixes = true; | |||
} | |||
} else { | |||
// don't remove any previous suffix | |||
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags); | |||
end_type = 0; | |||
} | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
return 0; | |||
} | |||
} | |||
} | |||
} | |||
if ((end_type1 & SUFX_T) == 0) { | |||
// the default is to add the suffix and then determine the word's stress pattern | |||
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | |||
end_phonemes[0] = 0; | |||
} | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
} | |||
wordx[-1] = c_temp; | |||
} | |||
} | |||
addPluralSuffixes(wflags, tr, last_char, word_phonemes); | |||
wflags |= emphasize_allcaps; | |||
// determine stress pattern for this word | |||
add_suffix_phonemes = 0; | |||
if (end_phonemes[0] != 0) | |||
add_suffix_phonemes = 2; | |||
prefix_stress = 0; | |||
for (p = prefix_phonemes; *p != 0; p++) { | |||
if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2)) | |||
prefix_stress = *p; | |||
} | |||
if (prefix_flags || (prefix_stress != 0)) { | |||
if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) { | |||
char *p; | |||
// German, keep a secondary stress on the stem | |||
SetWordStress(tr, phonemes, dictionary_flags, 3, 0); | |||
// reduce all but the first primary stress | |||
ix = 0; | |||
for (p = prefix_phonemes; *p != 0; p++) { | |||
if (*p == phonSTRESS_P) { | |||
if (ix == 0) | |||
ix = 1; | |||
else | |||
*p = phonSTRESS_3; | |||
} | |||
} | |||
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); | |||
} else { | |||
// stress position affects the whole word, including prefix | |||
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); | |||
} | |||
} else { | |||
SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes); | |||
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); | |||
word_phonemes[N_WORD_PHONEMES-1] = 0; | |||
} | |||
if (end_phonemes[0] != 0) { | |||
// a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined | |||
ix = strlen(word_phonemes); | |||
end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow | |||
strcpy(&word_phonemes[ix], end_phonemes); | |||
} | |||
if (wflags & FLAG_LAST_WORD) { | |||
// don't use $brk pause before the last word of a sentence | |||
// (but allow it for emphasis, see below | |||
dictionary_flags[0] &= ~FLAG_PAUSE1; | |||
} | |||
if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS)) | |||
ChangeWordStress(tr, word_phonemes, 3); | |||
else if (wflags & FLAG_EMPHASIZED2) { | |||
// A word is indicated in the source text as stressed | |||
// Give it stress level 6 (for the intonation module) | |||
ChangeWordStress(tr, word_phonemes, 6); | |||
if (wflags & FLAG_EMPHASIZED) | |||
dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause | |||
} else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) { | |||
// the word has attribute to stress or unstress when at end of clause | |||
if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2)) | |||
ChangeWordStress(tr, word_phonemes, 4); | |||
else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words)) | |||
ChangeWordStress(tr, word_phonemes, 3); | |||
} | |||
// dictionary flags for this word give a clue about which alternative pronunciations of | |||
// following words to use. | |||
if (end_type1 & SUFX_F) { | |||
// expect a verb form, with or without -s suffix | |||
tr->expect_verb = 2; | |||
tr->expect_verb_s = 2; | |||
} | |||
if (dictionary_flags[1] & FLAG_PASTF) { | |||
// expect perfect tense in next two words | |||
tr->expect_past = 3; | |||
tr->expect_verb = 0; | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_VERBF) { | |||
// expect a verb in the next word | |||
tr->expect_verb = 2; | |||
tr->expect_verb_s = 0; // verb won't have -s suffix | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_VERBSF) { | |||
// expect a verb, must have a -s suffix | |||
tr->expect_verb = 0; | |||
tr->expect_verb_s = 2; | |||
tr->expect_past = 0; | |||
tr->expect_noun = 0; | |||
} else if (dictionary_flags[1] & FLAG_NOUNF) { | |||
// not expecting a verb next | |||
tr->expect_noun = 2; | |||
tr->expect_verb = 0; | |||
tr->expect_verb_s = 0; | |||
tr->expect_past = 0; | |||
} | |||
if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) { | |||
if (tr->expect_verb > 0) | |||
tr->expect_verb--; | |||
if (tr->expect_verb_s > 0) | |||
tr->expect_verb_s--; | |||
if (tr->expect_noun > 0) | |||
tr->expect_noun--; | |||
if (tr->expect_past > 0) | |||
tr->expect_past--; | |||
} | |||
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) { | |||
// English Specific !!!! | |||
// any single letter before a dot is an abbreviation, except 'I' | |||
dictionary_flags[0] |= FLAG_ALLOW_DOT; | |||
} | |||
if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0)) | |||
ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]); | |||
dictionary_flags[0] |= was_unpronouncable; | |||
memcpy(word_start, word_copy2, word_copy_length); | |||
return dictionary_flags[0]; | |||
} | |||
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | |||
{ | |||
int posn = 0; | |||
int capitals = 0; | |||
@@ -85,7 +702,7 @@ static const int number_ranges[] = { | |||
}; | |||
int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet) | |||
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet) | |||
{ | |||
// get pronunciation for an isolated letter | |||
// return number of bytes used by the letter | |||
@@ -326,6 +943,80 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALP | |||
return n_bytes; | |||
} | |||
// append plural suffixes depending on preceding letter | |||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes) | |||
{ | |||
char word_zz[4] = { ' ', 'z', 'z', 0 }; | |||
char word_iz[4] = { ' ', 'i', 'z', 0 }; | |||
char word_ss[4] = { ' ', 's', 's', 0 }; | |||
if (flags & FLAG_HAS_PLURAL) { | |||
// s or 's suffix, append [s], [z] or [Iz] depending on previous letter | |||
if (last_char == 'f') | |||
TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL)) | |||
TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
else | |||
TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES, | |||
NULL, 0, NULL); | |||
} | |||
} | |||
static int CheckDottedAbbrev(char *word1) | |||
{ | |||
int wc; | |||
int count = 0; | |||
int nbytes; | |||
int ok; | |||
int ix; | |||
char *word; | |||
char *wbuf; | |||
char word_buf[80]; | |||
word = word1; | |||
wbuf = word_buf; | |||
for (;;) { | |||
ok = 0; | |||
nbytes = utf8_in(&wc, word); | |||
if ((word[nbytes] == ' ') && IsAlpha(wc)) { | |||
if (word[nbytes+1] == '.') { | |||
if (word[nbytes+2] == ' ') | |||
ok = 1; | |||
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { | |||
nbytes += 2; // delete the final dot (eg. u.s.a.'s) | |||
ok = 2; | |||
} | |||
} else if ((count > 0) && (word[nbytes] == ' ')) | |||
ok = 2; | |||
} | |||
if (ok == 0) | |||
break; | |||
for (ix = 0; ix < nbytes; ix++) | |||
*wbuf++ = word[ix]; | |||
count++; | |||
if (ok == 2) { | |||
word += nbytes; | |||
break; | |||
} | |||
word += (nbytes + 3); | |||
} | |||
if (count > 1) { | |||
ix = wbuf - word_buf; | |||
memcpy(word1, word_buf, ix); | |||
while (&word1[ix] < word) | |||
word1[ix++] = ' '; | |||
dictionary_skipwords = (count - 1)*2; | |||
} | |||
return count; | |||
} | |||
static int NonAsciiNumber(int letter) | |||
{ |
@@ -30,8 +30,8 @@ extern "C" | |||
{ | |||
#endif | |||
char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | |||
int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes); | |||
#ifdef __cplusplus | |||
} | |||
#endif |