|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
/* |
|
|
* Copyright (C) 2005 to 2014 by Jonathan Duddington |
|
|
* Copyright (C) 2005 to 2014 by Jonathan Duddington |
|
|
* email: [email protected] |
|
|
* email: [email protected] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "translate.h" |
|
|
#include "translate.h" |
|
|
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... |
|
|
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... |
|
|
#include "numbers.h" // for SetSpellingStress, TranslateLetter |
|
|
|
|
|
|
|
|
#include "numbers.h" // for SetSpellingStress, ... |
|
|
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... |
|
|
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... |
|
|
#include "phonemelist.h" // for MakePhonemeList |
|
|
|
|
|
#include "readclause.h" // for towlower2, Eof, ReadClause, is_str_... |
|
|
|
|
|
|
|
|
#include "readclause.h" // for towlower2 |
|
|
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable |
|
|
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable |
|
|
#include "synthesize.h" // for PHONEME_LIST2, N_PHONEME_LIST, PHON... |
|
|
|
|
|
#include "ucd/ucd.h" // for ucd_toupper |
|
|
#include "ucd/ucd.h" // for ucd_toupper |
|
|
#include "voice.h" // for voice, voice_t |
|
|
#include "voice.h" // for voice, voice_t |
|
|
#include "speech.h" // for MAKE_MEM_UNDEFINED |
|
|
|
|
|
#include "translateword.h" |
|
|
#include "translateword.h" |
|
|
|
|
|
|
|
|
|
|
|
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); |
|
|
|
|
|
static int CheckDottedAbbrev(char *word1); |
|
|
static int NonAsciiNumber(int letter); |
|
|
static int NonAsciiNumber(int letter); |
|
|
|
|
|
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); |
|
|
|
|
|
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); |
|
|
|
|
|
|
|
|
|
|
|
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) |
|
|
|
|
|
{ |
|
|
|
|
|
// word1 is terminated by space (0x20) character |
|
|
|
|
|
|
|
|
|
|
|
char *word1; |
|
|
|
|
|
int word_length; |
|
|
|
|
|
int ix; |
|
|
|
|
|
char *p; |
|
|
|
|
|
int pfix; |
|
|
|
|
|
int n_chars; |
|
|
|
|
|
unsigned int dictionary_flags[2]; |
|
|
|
|
|
unsigned int dictionary_flags2[2]; |
|
|
|
|
|
int end_type = 0; |
|
|
|
|
|
int end_type1 = 0; |
|
|
|
|
|
int prefix_type = 0; |
|
|
|
|
|
int prefix_stress; |
|
|
|
|
|
char *wordx; |
|
|
|
|
|
char phonemes[N_WORD_PHONEMES]; |
|
|
|
|
|
char phonemes2[N_WORD_PHONEMES]; |
|
|
|
|
|
char prefix_phonemes[N_WORD_PHONEMES]; |
|
|
|
|
|
char unpron_phonemes[N_WORD_PHONEMES]; |
|
|
|
|
|
char end_phonemes[N_WORD_PHONEMES]; |
|
|
|
|
|
char end_phonemes2[N_WORD_PHONEMES]; |
|
|
|
|
|
char word_copy[N_WORD_BYTES]; |
|
|
|
|
|
char word_copy2[N_WORD_BYTES]; |
|
|
|
|
|
int word_copy_length; |
|
|
|
|
|
char prefix_chars[0x3f + 2]; |
|
|
|
|
|
bool found = false; |
|
|
|
|
|
int end_flags; |
|
|
|
|
|
int c_temp; // save a character byte while we temporarily replace it with space |
|
|
|
|
|
int first_char; |
|
|
|
|
|
int last_char = 0; |
|
|
|
|
|
int prefix_flags = 0; |
|
|
|
|
|
bool more_suffixes; |
|
|
|
|
|
bool confirm_prefix; |
|
|
|
|
|
int spell_word; |
|
|
|
|
|
int emphasize_allcaps = 0; |
|
|
|
|
|
int wflags; |
|
|
|
|
|
int was_unpronouncable = 0; |
|
|
|
|
|
int loopcount; |
|
|
|
|
|
int add_suffix_phonemes = 0; |
|
|
|
|
|
WORD_TAB wtab_null[8]; |
|
|
|
|
|
|
|
|
|
|
|
if (wtab == NULL) { |
|
|
|
|
|
memset(wtab_null, 0, sizeof(wtab_null)); |
|
|
|
|
|
wtab = wtab_null; |
|
|
|
|
|
} |
|
|
|
|
|
wflags = wtab->flags; |
|
|
|
|
|
|
|
|
|
|
|
dictionary_flags[0] = 0; |
|
|
|
|
|
dictionary_flags[1] = 0; |
|
|
|
|
|
dictionary_flags2[0] = 0; |
|
|
|
|
|
dictionary_flags2[1] = 0; |
|
|
|
|
|
dictionary_skipwords = 0; |
|
|
|
|
|
|
|
|
|
|
|
phonemes[0] = 0; |
|
|
|
|
|
unpron_phonemes[0] = 0; |
|
|
|
|
|
prefix_phonemes[0] = 0; |
|
|
|
|
|
end_phonemes[0] = 0; |
|
|
|
|
|
|
|
|
|
|
|
if (tr->data_dictlist == NULL) { |
|
|
|
|
|
// dictionary is not loaded |
|
|
|
|
|
word_phonemes[0] = 0; |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// count the length of the word |
|
|
|
|
|
word1 = word_start; |
|
|
|
|
|
if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot |
|
|
|
|
|
wordx = word1; |
|
|
|
|
|
|
|
|
|
|
|
utf8_in(&first_char, wordx); |
|
|
|
|
|
word_length = 0; |
|
|
|
|
|
while ((*wordx != 0) && (*wordx != ' ')) { |
|
|
|
|
|
wordx += utf8_in(&last_char, wordx); |
|
|
|
|
|
word_length++; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
word_copy_length = wordx - word_start; |
|
|
|
|
|
if (word_copy_length >= N_WORD_BYTES) |
|
|
|
|
|
word_copy_length = N_WORD_BYTES-1; |
|
|
|
|
|
memcpy(word_copy2, word_start, word_copy_length); |
|
|
|
|
|
|
|
|
|
|
|
spell_word = 0; |
|
|
|
|
|
|
|
|
|
|
|
if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) { |
|
|
|
|
|
// retranslating a 1-character word using a different language, say its name |
|
|
|
|
|
utf8_in(&c_temp, wordx+1); // the next character |
|
|
|
|
|
if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp))) |
|
|
|
|
|
spell_word = 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (option_sayas == SAYAS_KEY) { |
|
|
|
|
|
if (word_length == 1) |
|
|
|
|
|
spell_word = 4; |
|
|
|
|
|
else { |
|
|
|
|
|
// is there a translation for this keyname ? |
|
|
|
|
|
word1--; |
|
|
|
|
|
*word1 = '_'; // prefix keyname with '_' |
|
|
|
|
|
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or |
|
|
|
|
|
// we may just find some flags |
|
|
|
|
|
if (option_sayas & 0x10) { |
|
|
|
|
|
// SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR |
|
|
|
|
|
spell_word = option_sayas & 0xf; // 2,3,4 |
|
|
|
|
|
} else { |
|
|
|
|
|
if (!found) |
|
|
|
|
|
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word |
|
|
|
|
|
|
|
|
|
|
|
if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.')) |
|
|
|
|
|
wordx[1] = ' '; // remove a Dot after this word |
|
|
|
|
|
|
|
|
|
|
|
if (dictionary_flags[0] & FLAG_TEXTMODE) { |
|
|
|
|
|
if (word_out != NULL) |
|
|
|
|
|
strcpy(word_out, word1); |
|
|
|
|
|
|
|
|
|
|
|
return dictionary_flags[0]; |
|
|
|
|
|
} else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) { |
|
|
|
|
|
// grouped words, but no translation. Join the words with hyphens. |
|
|
|
|
|
wordx = word1; |
|
|
|
|
|
ix = 0; |
|
|
|
|
|
while (ix < dictionary_skipwords) { |
|
|
|
|
|
if (*wordx == ' ') { |
|
|
|
|
|
*wordx = '-'; |
|
|
|
|
|
ix++; |
|
|
|
|
|
} |
|
|
|
|
|
wordx++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((word_length == 1) && (dictionary_skipwords == 0)) { |
|
|
|
|
|
// is this a series of single letters separated by dots? |
|
|
|
|
|
if (CheckDottedAbbrev(word1)) { |
|
|
|
|
|
dictionary_flags[0] = 0; |
|
|
|
|
|
dictionary_flags[1] = 0; |
|
|
|
|
|
spell_word = 1; |
|
|
|
|
|
if (dictionary_skipwords) |
|
|
|
|
|
dictionary_flags[0] = FLAG_SKIPWORDS; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!found && (dictionary_flags[0] & FLAG_ABBREV)) { |
|
|
|
|
|
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters |
|
|
|
|
|
spell_word = 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!found && iswdigit(first_char)) { |
|
|
|
|
|
Lookup(tr, "_0lang", word_phonemes); |
|
|
|
|
|
if (word_phonemes[0] == phonSWITCH) |
|
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
|
|
if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) { |
|
|
|
|
|
// for this language, speak English numerals (0-9) with the English voice |
|
|
|
|
|
sprintf(word_phonemes, "%c", phonSWITCH); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) { |
|
|
|
|
|
// either all upper or all lower case |
|
|
|
|
|
|
|
|
char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) |
|
|
|
|
|
|
|
|
if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) { |
|
|
|
|
|
if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) { |
|
|
|
|
|
// don't use Roman number if this word is not separated from the next word (eg. "XLTest") |
|
|
|
|
|
if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0) |
|
|
|
|
|
dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) { |
|
|
|
|
|
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { |
|
|
|
|
|
// emphasize words which are in capitals |
|
|
|
|
|
emphasize_allcaps = FLAG_EMPHASIZED; |
|
|
|
|
|
} else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3) |
|
|
|
|
|
&& (tr->clause_upper_count <= tr->clause_lower_count)) { |
|
|
|
|
|
// An upper case word in a lower case clause. This could be an abbreviation. |
|
|
|
|
|
spell_word = 1; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (spell_word > 0) { |
|
|
|
|
|
// Speak as individual letters |
|
|
|
|
|
phonemes[0] = 0; |
|
|
|
|
|
|
|
|
|
|
|
if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) { |
|
|
|
|
|
if (word_length > 1) |
|
|
|
|
|
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
if (wflags & FLAG_TRANSLATOR2) |
|
|
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
|
|
addPluralSuffixes(wflags, tr, last_char, word_phonemes); |
|
|
|
|
|
return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d" |
|
|
|
|
|
} else if (found == false) { |
|
|
|
|
|
// word's pronunciation is not given in the dictionary list, although |
|
|
|
|
|
// dictionary_flags may have ben set there |
|
|
|
|
|
|
|
|
|
|
|
int posn; |
|
|
|
|
|
bool non_initial = false; |
|
|
|
|
|
int length; |
|
|
|
|
|
|
|
|
|
|
|
posn = 0; |
|
|
|
|
|
length = 999; |
|
|
|
|
|
wordx = word1; |
|
|
|
|
|
|
|
|
|
|
|
while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) { |
|
|
|
|
|
// This word looks "unpronouncable", so speak letters individually until we |
|
|
|
|
|
// find a remainder that we can pronounce. |
|
|
|
|
|
was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE; |
|
|
|
|
|
emphasize_allcaps = 0; |
|
|
|
|
|
|
|
|
|
|
|
if (wordx[0] == '\'') |
|
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
if (posn > 0) |
|
|
|
|
|
non_initial = true; |
|
|
|
|
|
|
|
|
|
|
|
wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet); |
|
|
|
|
|
posn++; |
|
|
|
|
|
if (unpron_phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
strcpy(word_phonemes, unpron_phonemes); |
|
|
|
|
|
if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0) |
|
|
|
|
|
return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_ |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
length = 0; |
|
|
|
|
|
while (wordx[length] != ' ') length++; |
|
|
|
|
|
} |
|
|
|
|
|
SetSpellingStress(tr, unpron_phonemes, 0, posn); |
|
|
|
|
|
|
|
|
|
|
|
// anything left ? |
|
|
|
|
|
if (*wordx != ' ') { |
|
|
|
|
|
if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) { |
|
|
|
|
|
// letters which have been spoken individually from affecting the pronunciation of the pronuncable part |
|
|
|
|
|
wordx[-1] = ' '; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Translate the stem |
|
|
|
|
|
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); |
|
|
|
|
|
|
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) { |
|
|
|
|
|
int wc; |
|
|
|
|
|
// characters not recognised, speak them individually |
|
|
|
|
|
// ?? should we say super/sub-script numbers and letters here? |
|
|
|
|
|
utf8_in(&wc, wordx); |
|
|
|
|
|
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { |
|
|
|
|
|
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) |
|
|
|
|
|
return 0; |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
c_temp = wordx[-1]; |
|
|
|
|
|
|
|
|
|
|
|
found = false; |
|
|
|
|
|
confirm_prefix = true; |
|
|
|
|
|
for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) { |
|
|
|
|
|
// Found a standard prefix, remove it and retranslate |
|
|
|
|
|
// loopcount guards against an endless loop |
|
|
|
|
|
if (confirm_prefix && !(end_type & SUFX_B)) { |
|
|
|
|
|
int end2; |
|
|
|
|
|
char end_phonemes2[N_WORD_PHONEMES]; |
|
|
|
|
|
|
|
|
|
|
|
// remove any standard suffix and confirm that the prefix is still recognised |
|
|
|
|
|
phonemes2[0] = 0; |
|
|
|
|
|
end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); |
|
|
|
|
|
if (end2) { |
|
|
|
|
|
RemoveEnding(tr, wordx, end2, word_copy); |
|
|
|
|
|
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); |
|
|
|
|
|
memcpy(wordx, word_copy, strlen(word_copy)); |
|
|
|
|
|
if ((end_type & SUFX_P) == 0) { |
|
|
|
|
|
// after removing the suffix, the prefix is no longer recognised. |
|
|
|
|
|
// Keep the suffix, but don't use the prefix |
|
|
|
|
|
end_type = end2; |
|
|
|
|
|
strcpy(phonemes, phonemes2); |
|
|
|
|
|
strcpy(end_phonemes, end_phonemes2); |
|
|
|
|
|
if (option_phonemes & espeakPHONEMES_TRACE) { |
|
|
|
|
|
DecodePhonemes(end_phonemes, end_phonemes2); |
|
|
|
|
|
fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
confirm_prefix = false; |
|
|
|
|
|
continue; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
prefix_type = end_type; |
|
|
|
|
|
|
|
|
|
|
|
if (prefix_type & SUFX_V) |
|
|
|
|
|
tr->expect_verb = 1; // use the verb form of the word |
|
|
|
|
|
|
|
|
|
|
|
wordx[-1] = c_temp; |
|
|
|
|
|
|
|
|
|
|
|
if ((prefix_type & SUFX_B) == 0) { |
|
|
|
|
|
for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove |
|
|
|
|
|
wordx++; |
|
|
|
|
|
while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
pfix = 1; |
|
|
|
|
|
prefix_chars[0] = 0; |
|
|
|
|
|
n_chars = prefix_type & 0x3f; |
|
|
|
|
|
|
|
|
|
|
|
for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove |
|
|
|
|
|
prefix_chars[pfix++] = *wordx++; |
|
|
|
|
|
|
|
|
|
|
|
if ((prefix_type & SUFX_B) && (ix == (n_chars-1))) |
|
|
|
|
|
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character |
|
|
|
|
|
} |
|
|
|
|
|
prefix_chars[pfix] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
c_temp = wordx[-1]; |
|
|
|
|
|
wordx[-1] = ' '; |
|
|
|
|
|
confirm_prefix = true; |
|
|
|
|
|
wflags |= FLAG_PREFIX_REMOVED; |
|
|
|
|
|
|
|
|
|
|
|
if (prefix_type & SUFX_B) { |
|
|
|
|
|
// SUFX_B is used for Turkish, tr_rules contains " ' (Pb" |
|
|
|
|
|
// examine the prefix part |
|
|
|
|
|
char *wordpf; |
|
|
|
|
|
char prefix_phonemes2[12]; |
|
|
|
|
|
|
|
|
|
|
|
strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2)); |
|
|
|
|
|
wordpf = &prefix_chars[1]; |
|
|
|
|
|
strcpy(prefix_phonemes, phonemes); |
|
|
|
|
|
|
|
|
|
|
|
// look for stress marker or $abbrev |
|
|
|
|
|
found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab); |
|
|
|
|
|
if (found) |
|
|
|
|
|
strcpy(prefix_phonemes, phonemes); |
|
|
|
|
|
if (dictionary_flags[0] & FLAG_ABBREV) { |
|
|
|
|
|
prefix_phonemes[0] = 0; |
|
|
|
|
|
SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes); |
|
|
|
|
|
} |
|
|
|
|
|
} else |
|
|
|
|
|
strcat(prefix_phonemes, end_phonemes); |
|
|
|
|
|
end_phonemes[0] = 0; |
|
|
|
|
|
|
|
|
|
|
|
end_type = 0; |
|
|
|
|
|
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix |
|
|
|
|
|
if (dictionary_flags[0] == 0) { |
|
|
|
|
|
dictionary_flags[0] = dictionary_flags2[0]; |
|
|
|
|
|
dictionary_flags[1] = dictionary_flags2[1]; |
|
|
|
|
|
} else |
|
|
|
|
|
prefix_flags = 1; |
|
|
|
|
|
if (found == false) { |
|
|
|
|
|
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags); |
|
|
|
|
|
|
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
wordx[-1] = c_temp; |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((end_type != 0) && !(end_type & SUFX_P)) { |
|
|
|
|
|
end_type1 = end_type; |
|
|
|
|
|
strcpy(phonemes2, phonemes); |
|
|
|
|
|
|
|
|
|
|
|
// The word has a standard ending, re-translate without this ending |
|
|
|
|
|
end_flags = RemoveEnding(tr, wordx, end_type, word_copy); |
|
|
|
|
|
more_suffixes = true; |
|
|
|
|
|
|
|
|
|
|
|
while (more_suffixes) { |
|
|
|
|
|
more_suffixes = false; |
|
|
|
|
|
phonemes[0] = 0; |
|
|
|
|
|
|
|
|
|
|
|
if (prefix_phonemes[0] != 0) { |
|
|
|
|
|
// lookup the stem without the prefix removed |
|
|
|
|
|
wordx[-1] = c_temp; |
|
|
|
|
|
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix |
|
|
|
|
|
wordx[-1] = ' '; |
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
memcpy(wordx, word_copy, strlen(word_copy)); |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
if (dictionary_flags[0] == 0) { |
|
|
|
|
|
dictionary_flags[0] = dictionary_flags2[0]; |
|
|
|
|
|
dictionary_flags[1] = dictionary_flags2[1]; |
|
|
|
|
|
} |
|
|
|
|
|
if (found) |
|
|
|
|
|
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now |
|
|
|
|
|
|
|
|
|
|
|
if ((found == false) && (dictionary_flags2[0] != 0)) |
|
|
|
|
|
prefix_flags = 1; |
|
|
|
|
|
} |
|
|
|
|
|
if (found == false) { |
|
|
|
|
|
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix |
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
memcpy(wordx, word_copy, strlen(word_copy)); |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (dictionary_flags[0] == 0) { |
|
|
|
|
|
dictionary_flags[0] = dictionary_flags2[0]; |
|
|
|
|
|
dictionary_flags[1] = dictionary_flags2[1]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
if (found == false) { |
|
|
|
|
|
if (end_type & SUFX_Q) { |
|
|
|
|
|
// don't retranslate, use the original lookup result |
|
|
|
|
|
strcpy(phonemes, phonemes2); |
|
|
|
|
|
} else { |
|
|
|
|
|
if (end_flags & FLAG_SUFX) |
|
|
|
|
|
wflags |= FLAG_SUFFIX_REMOVED; |
|
|
|
|
|
if (end_type & SUFX_A) |
|
|
|
|
|
wflags |= FLAG_SUFFIX_VOWEL; |
|
|
|
|
|
|
|
|
|
|
|
if (end_type & SUFX_M) { |
|
|
|
|
|
// allow more suffixes before this suffix |
|
|
|
|
|
strcpy(end_phonemes2, end_phonemes); |
|
|
|
|
|
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); |
|
|
|
|
|
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one |
|
|
|
|
|
|
|
|
|
|
|
if ((end_type != 0) && !(end_type & SUFX_P)) { |
|
|
|
|
|
// there is another suffix |
|
|
|
|
|
end_flags = RemoveEnding(tr, wordx, end_type, NULL); |
|
|
|
|
|
more_suffixes = true; |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
// don't remove any previous suffix |
|
|
|
|
|
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags); |
|
|
|
|
|
end_type = 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (phonemes[0] == phonSWITCH) { |
|
|
|
|
|
// change to another language in order to translate this word |
|
|
|
|
|
strcpy(word_phonemes, phonemes); |
|
|
|
|
|
memcpy(wordx, word_copy, strlen(word_copy)); |
|
|
|
|
|
wordx[-1] = c_temp; |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((end_type1 & SUFX_T) == 0) { |
|
|
|
|
|
// the default is to add the suffix and then determine the word's stress pattern |
|
|
|
|
|
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); |
|
|
|
|
|
end_phonemes[0] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
memcpy(wordx, word_copy, strlen(word_copy)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
wordx[-1] = c_temp; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
addPluralSuffixes(wflags, tr, last_char, word_phonemes); |
|
|
|
|
|
wflags |= emphasize_allcaps; |
|
|
|
|
|
|
|
|
|
|
|
// determine stress pattern for this word |
|
|
|
|
|
|
|
|
|
|
|
add_suffix_phonemes = 0; |
|
|
|
|
|
if (end_phonemes[0] != 0) |
|
|
|
|
|
add_suffix_phonemes = 2; |
|
|
|
|
|
|
|
|
|
|
|
prefix_stress = 0; |
|
|
|
|
|
for (p = prefix_phonemes; *p != 0; p++) { |
|
|
|
|
|
if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2)) |
|
|
|
|
|
prefix_stress = *p; |
|
|
|
|
|
} |
|
|
|
|
|
if (prefix_flags || (prefix_stress != 0)) { |
|
|
|
|
|
if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) { |
|
|
|
|
|
char *p; |
|
|
|
|
|
// German, keep a secondary stress on the stem |
|
|
|
|
|
SetWordStress(tr, phonemes, dictionary_flags, 3, 0); |
|
|
|
|
|
|
|
|
|
|
|
// reduce all but the first primary stress |
|
|
|
|
|
ix = 0; |
|
|
|
|
|
for (p = prefix_phonemes; *p != 0; p++) { |
|
|
|
|
|
if (*p == phonSTRESS_P) { |
|
|
|
|
|
if (ix == 0) |
|
|
|
|
|
ix = 1; |
|
|
|
|
|
else |
|
|
|
|
|
*p = phonSTRESS_3; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); |
|
|
|
|
|
|
|
|
|
|
|
word_phonemes[N_WORD_PHONEMES-1] = 0; |
|
|
|
|
|
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); |
|
|
|
|
|
} else { |
|
|
|
|
|
// stress position affects the whole word, including prefix |
|
|
|
|
|
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); |
|
|
|
|
|
word_phonemes[N_WORD_PHONEMES-1] = 0; |
|
|
|
|
|
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0); |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes); |
|
|
|
|
|
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes); |
|
|
|
|
|
word_phonemes[N_WORD_PHONEMES-1] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (end_phonemes[0] != 0) { |
|
|
|
|
|
// a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined |
|
|
|
|
|
ix = strlen(word_phonemes); |
|
|
|
|
|
end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow |
|
|
|
|
|
strcpy(&word_phonemes[ix], end_phonemes); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (wflags & FLAG_LAST_WORD) { |
|
|
|
|
|
// don't use $brk pause before the last word of a sentence |
|
|
|
|
|
// (but allow it for emphasis, see below |
|
|
|
|
|
dictionary_flags[0] &= ~FLAG_PAUSE1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS)) |
|
|
|
|
|
ChangeWordStress(tr, word_phonemes, 3); |
|
|
|
|
|
else if (wflags & FLAG_EMPHASIZED2) { |
|
|
|
|
|
// A word is indicated in the source text as stressed |
|
|
|
|
|
// Give it stress level 6 (for the intonation module) |
|
|
|
|
|
ChangeWordStress(tr, word_phonemes, 6); |
|
|
|
|
|
|
|
|
|
|
|
if (wflags & FLAG_EMPHASIZED) |
|
|
|
|
|
dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause |
|
|
|
|
|
} else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) { |
|
|
|
|
|
// the word has attribute to stress or unstress when at end of clause |
|
|
|
|
|
if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2)) |
|
|
|
|
|
ChangeWordStress(tr, word_phonemes, 4); |
|
|
|
|
|
else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words)) |
|
|
|
|
|
ChangeWordStress(tr, word_phonemes, 3); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// dictionary flags for this word give a clue about which alternative pronunciations of |
|
|
|
|
|
// following words to use. |
|
|
|
|
|
if (end_type1 & SUFX_F) { |
|
|
|
|
|
// expect a verb form, with or without -s suffix |
|
|
|
|
|
tr->expect_verb = 2; |
|
|
|
|
|
tr->expect_verb_s = 2; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (dictionary_flags[1] & FLAG_PASTF) { |
|
|
|
|
|
// expect perfect tense in next two words |
|
|
|
|
|
tr->expect_past = 3; |
|
|
|
|
|
tr->expect_verb = 0; |
|
|
|
|
|
tr->expect_noun = 0; |
|
|
|
|
|
} else if (dictionary_flags[1] & FLAG_VERBF) { |
|
|
|
|
|
// expect a verb in the next word |
|
|
|
|
|
tr->expect_verb = 2; |
|
|
|
|
|
tr->expect_verb_s = 0; // verb won't have -s suffix |
|
|
|
|
|
tr->expect_noun = 0; |
|
|
|
|
|
} else if (dictionary_flags[1] & FLAG_VERBSF) { |
|
|
|
|
|
// expect a verb, must have a -s suffix |
|
|
|
|
|
tr->expect_verb = 0; |
|
|
|
|
|
tr->expect_verb_s = 2; |
|
|
|
|
|
tr->expect_past = 0; |
|
|
|
|
|
tr->expect_noun = 0; |
|
|
|
|
|
} else if (dictionary_flags[1] & FLAG_NOUNF) { |
|
|
|
|
|
// not expecting a verb next |
|
|
|
|
|
tr->expect_noun = 2; |
|
|
|
|
|
tr->expect_verb = 0; |
|
|
|
|
|
tr->expect_verb_s = 0; |
|
|
|
|
|
tr->expect_past = 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) { |
|
|
|
|
|
if (tr->expect_verb > 0) |
|
|
|
|
|
tr->expect_verb--; |
|
|
|
|
|
|
|
|
|
|
|
if (tr->expect_verb_s > 0) |
|
|
|
|
|
tr->expect_verb_s--; |
|
|
|
|
|
|
|
|
|
|
|
if (tr->expect_noun > 0) |
|
|
|
|
|
tr->expect_noun--; |
|
|
|
|
|
|
|
|
|
|
|
if (tr->expect_past > 0) |
|
|
|
|
|
tr->expect_past--; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) { |
|
|
|
|
|
// English Specific !!!! |
|
|
|
|
|
// any single letter before a dot is an abbreviation, except 'I' |
|
|
|
|
|
dictionary_flags[0] |= FLAG_ALLOW_DOT; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0)) |
|
|
|
|
|
ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]); |
|
|
|
|
|
|
|
|
|
|
|
dictionary_flags[0] |= was_unpronouncable; |
|
|
|
|
|
memcpy(word_start, word_copy2, word_copy_length); |
|
|
|
|
|
return dictionary_flags[0]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) |
|
|
{ |
|
|
{ |
|
|
int posn = 0; |
|
|
int posn = 0; |
|
|
int capitals = 0; |
|
|
int capitals = 0; |
|
|
|
|
|
|
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet) |
|
|
|
|
|
|
|
|
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet) |
|
|
{ |
|
|
{ |
|
|
// get pronunciation for an isolated letter |
|
|
// get pronunciation for an isolated letter |
|
|
// return number of bytes used by the letter |
|
|
// return number of bytes used by the letter |
|
|
|
|
|
|
|
|
return n_bytes; |
|
|
return n_bytes; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// append plural suffixes depending on preceding letter |
|
|
|
|
|
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes) |
|
|
|
|
|
{ |
|
|
|
|
|
char word_zz[4] = { ' ', 'z', 'z', 0 }; |
|
|
|
|
|
char word_iz[4] = { ' ', 'i', 'z', 0 }; |
|
|
|
|
|
char word_ss[4] = { ' ', 's', 's', 0 }; |
|
|
|
|
|
if (flags & FLAG_HAS_PLURAL) { |
|
|
|
|
|
// s or 's suffix, append [s], [z] or [Iz] depending on previous letter |
|
|
|
|
|
if (last_char == 'f') |
|
|
|
|
|
TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES, |
|
|
|
|
|
NULL, 0, NULL); |
|
|
|
|
|
else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL)) |
|
|
|
|
|
TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES, |
|
|
|
|
|
NULL, 0, NULL); |
|
|
|
|
|
else |
|
|
|
|
|
TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES, |
|
|
|
|
|
NULL, 0, NULL); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static int CheckDottedAbbrev(char *word1) |
|
|
|
|
|
{ |
|
|
|
|
|
int wc; |
|
|
|
|
|
int count = 0; |
|
|
|
|
|
int nbytes; |
|
|
|
|
|
int ok; |
|
|
|
|
|
int ix; |
|
|
|
|
|
char *word; |
|
|
|
|
|
char *wbuf; |
|
|
|
|
|
char word_buf[80]; |
|
|
|
|
|
|
|
|
|
|
|
word = word1; |
|
|
|
|
|
wbuf = word_buf; |
|
|
|
|
|
|
|
|
|
|
|
for (;;) { |
|
|
|
|
|
ok = 0; |
|
|
|
|
|
nbytes = utf8_in(&wc, word); |
|
|
|
|
|
if ((word[nbytes] == ' ') && IsAlpha(wc)) { |
|
|
|
|
|
if (word[nbytes+1] == '.') { |
|
|
|
|
|
if (word[nbytes+2] == ' ') |
|
|
|
|
|
ok = 1; |
|
|
|
|
|
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') { |
|
|
|
|
|
nbytes += 2; // delete the final dot (eg. u.s.a.'s) |
|
|
|
|
|
ok = 2; |
|
|
|
|
|
} |
|
|
|
|
|
} else if ((count > 0) && (word[nbytes] == ' ')) |
|
|
|
|
|
ok = 2; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (ok == 0) |
|
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
for (ix = 0; ix < nbytes; ix++) |
|
|
|
|
|
*wbuf++ = word[ix]; |
|
|
|
|
|
|
|
|
|
|
|
count++; |
|
|
|
|
|
|
|
|
|
|
|
if (ok == 2) { |
|
|
|
|
|
word += nbytes; |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
word += (nbytes + 3); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (count > 1) { |
|
|
|
|
|
ix = wbuf - word_buf; |
|
|
|
|
|
memcpy(word1, word_buf, ix); |
|
|
|
|
|
while (&word1[ix] < word) |
|
|
|
|
|
word1[ix++] = ' '; |
|
|
|
|
|
dictionary_skipwords = (count - 1)*2; |
|
|
|
|
|
} |
|
|
|
|
|
return count; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
static int NonAsciiNumber(int letter) |
|
|
static int NonAsciiNumber(int letter) |
|
|
{ |
|
|
{ |