/*
* Copyright (C) 2005 to 2014 by Jonathan Duddington
* email: jonsd@users.sourceforge.net
* Copyright (C) 2015-2017 Reece H. Dunn
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see: .
*/
#include "config.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "translate.h"
#include "translateword.h"
#include "common.h" // for strncpy0
#include "dictionary.h" // for TranslateRules, LookupDictList
#include "numbers.h" // for SetSpellingStress, ...
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_...
#include "readclause.h" // for towlower2
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable
#include "ucd/ucd.h" // for ucd_toupper
#include "voice.h" // for voice, voice_t
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
static void ChangeWordStress(Translator *tr, char *word, int new_stress);
static int CheckDottedAbbrev(char *word1);
static int NonAsciiNumber(int letter);
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[]);
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet);
static int Unpronouncable(Translator *tr, char *word, int posn);
static int Unpronouncable2(Translator *tr, char *word);
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
{
// word1 is terminated by space (0x20) character
char *word1;
int word_length;
int ix;
char *p;
int pfix;
int n_chars;
unsigned int dictionary_flags[2];
unsigned int dictionary_flags2[2];
int end_type = 0;
int end_type1 = 0;
int prefix_type = 0;
int prefix_stress;
char *wordx;
char phonemes[N_WORD_PHONEMES];
char phonemes2[N_WORD_PHONEMES];
char prefix_phonemes[N_WORD_PHONEMES];
char unpron_phonemes[N_WORD_PHONEMES];
char end_phonemes[N_WORD_PHONEMES];
char end_phonemes2[N_WORD_PHONEMES];
char word_copy[N_WORD_BYTES];
char word_copy2[N_WORD_BYTES];
int word_copy_length;
char prefix_chars[0x3f + 2];
bool found = false;
int end_flags;
int c_temp; // save a character byte while we temporarily replace it with space
int first_char;
int last_char = 0;
int prefix_flags = 0;
bool more_suffixes;
bool confirm_prefix;
int spell_word;
int emphasize_allcaps = 0;
int wflags;
int was_unpronouncable = 0;
int loopcount;
int add_suffix_phonemes = 0;
WORD_TAB wtab_null[8];
if (wtab == NULL) {
memset(wtab_null, 0, sizeof(wtab_null));
wtab = wtab_null;
}
wflags = wtab->flags;
dictionary_flags[0] = 0;
dictionary_flags[1] = 0;
dictionary_flags2[0] = 0;
dictionary_flags2[1] = 0;
dictionary_skipwords = 0;
phonemes[0] = 0;
unpron_phonemes[0] = 0;
prefix_phonemes[0] = 0;
end_phonemes[0] = 0;
if (tr->data_dictlist == NULL) {
// dictionary is not loaded
word_phonemes[0] = 0;
return 0;
}
// count the length of the word
word1 = word_start;
if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot
wordx = word1;
utf8_in(&first_char, wordx);
word_length = 0;
while ((*wordx != 0) && (*wordx != ' ')) {
wordx += utf8_in(&last_char, wordx);
word_length++;
}
word_copy_length = wordx - word_start;
if (word_copy_length >= N_WORD_BYTES)
word_copy_length = N_WORD_BYTES-1;
memcpy(word_copy2, word_start, word_copy_length);
spell_word = 0;
if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) {
// retranslating a 1-character word using a different language, say its name
utf8_in(&c_temp, wordx+1); // the next character
if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp)))
spell_word = 1;
}
if (option_sayas == SAYAS_KEY) {
if (word_length == 1)
spell_word = 4;
else {
// is there a translation for this keyname ?
word1--;
*word1 = '_'; // prefix keyname with '_'
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab);
}
}
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or
// we may just find some flags
if (option_sayas & 0x10) {
// SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR
spell_word = option_sayas & 0xf; // 2,3,4
} else {
if (!found)
found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word
if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.'))
wordx[1] = ' '; // remove a Dot after this word
if (dictionary_flags[0] & FLAG_TEXTMODE) {
if (word_out != NULL)
strcpy(word_out, word1);
return dictionary_flags[0];
} else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
// grouped words, but no translation. Join the words with hyphens.
wordx = word1;
ix = 0;
while (ix < dictionary_skipwords) {
if (*wordx == ' ') {
*wordx = '-';
ix++;
}
wordx++;
}
}
if ((word_length == 1) && (dictionary_skipwords == 0)) {
// is this a series of single letters separated by dots?
if (CheckDottedAbbrev(word1)) {
dictionary_flags[0] = 0;
dictionary_flags[1] = 0;
spell_word = 1;
if (dictionary_skipwords)
dictionary_flags[0] = FLAG_SKIPWORDS;
}
}
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return 0;
}
if (!found && (dictionary_flags[0] & FLAG_ABBREV)) {
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters
spell_word = 1;
}
if (!found && iswdigit(first_char)) {
Lookup(tr, "_0lang", word_phonemes);
if (word_phonemes[0] == phonSWITCH)
return 0;
if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) {
// for this language, speak English numerals (0-9) with the English voice
sprintf(word_phonemes, "%c", phonSWITCH);
return 0;
}
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
}
if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) {
// either all upper or all lower case
if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) {
if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) {
// don't use Roman number if this word is not separated from the next word (eg. "XLTest")
if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0)
dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals
}
}
}
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
// emphasize words which are in capitals
emphasize_allcaps = FLAG_EMPHASIZED;
} else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3)
&& (tr->clause_upper_count <= tr->clause_lower_count)) {
// An upper case word in a lower case clause. This could be an abbreviation.
spell_word = 1;
}
}
}
if (spell_word > 0) {
// Speak as individual letters
phonemes[0] = 0;
if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) {
if (word_length > 1)
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces
return 0;
}
strcpy(word_phonemes, phonemes);
if (wflags & FLAG_TRANSLATOR2)
return 0;
addPluralSuffixes(wflags, tr, last_char, word_phonemes);
return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d"
} else if (found == false) {
// word's pronunciation is not given in the dictionary list, although
// dictionary_flags may have ben set there
int posn;
bool non_initial = false;
int length;
posn = 0;
length = 999;
wordx = word1;
while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) {
// This word looks "unpronouncable", so speak letters individually until we
// find a remainder that we can pronounce.
was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE;
emphasize_allcaps = 0;
if (wordx[0] == '\'')
break;
if (posn > 0)
non_initial = true;
wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet);
posn++;
if (unpron_phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, unpron_phonemes);
if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0)
return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_
return 0;
}
length = 0;
while (wordx[length] != ' ') length++;
}
SetSpellingStress(tr, unpron_phonemes, 0, posn);
// anything left ?
if (*wordx != ' ') {
if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) {
// letters which have been spoken individually from affecting the pronunciation of the pronuncable part
wordx[-1] = ' ';
}
// Translate the stem
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return 0;
}
if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) {
int wc;
// characters not recognised, speak them individually
// ?? should we say super/sub-script numbers and letters here?
utf8_in(&wc, wordx);
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) {
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL)
return 0;
strcpy(word_phonemes, phonemes);
return 0;
}
}
c_temp = wordx[-1];
found = false;
confirm_prefix = true;
for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) {
// Found a standard prefix, remove it and retranslate
// loopcount guards against an endless loop
if (confirm_prefix && !(end_type & SUFX_B)) {
int end2;
char end_phonemes2[N_WORD_PHONEMES];
// remove any standard suffix and confirm that the prefix is still recognised
phonemes2[0] = 0;
end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags);
if (end2) {
RemoveEnding(tr, wordx, end2, word_copy);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
memcpy(wordx, word_copy, strlen(word_copy));
if ((end_type & SUFX_P) == 0) {
// after removing the suffix, the prefix is no longer recognised.
// Keep the suffix, but don't use the prefix
end_type = end2;
strcpy(phonemes, phonemes2);
strcpy(end_phonemes, end_phonemes2);
if (option_phonemes & espeakPHONEMES_TRACE) {
DecodePhonemes(end_phonemes, end_phonemes2);
fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2);
}
}
confirm_prefix = false;
continue;
}
}
prefix_type = end_type;
if (prefix_type & SUFX_V)
tr->expect_verb = 1; // use the verb form of the word
wordx[-1] = c_temp;
if ((prefix_type & SUFX_B) == 0) {
for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove
wordx++;
while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters
}
} else {
pfix = 1;
prefix_chars[0] = 0;
n_chars = prefix_type & 0x3f;
for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove
prefix_chars[pfix++] = *wordx++;
if ((prefix_type & SUFX_B) && (ix == (n_chars-1)))
prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
}
prefix_chars[pfix] = 0;
}
c_temp = wordx[-1];
wordx[-1] = ' ';
confirm_prefix = true;
wflags |= FLAG_PREFIX_REMOVED;
if (prefix_type & SUFX_B) {
// SUFX_B is used for Turkish, tr_rules contains " ' (Pb"
// examine the prefix part
char *wordpf;
char prefix_phonemes2[12];
strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2));
wordpf = &prefix_chars[1];
strcpy(prefix_phonemes, phonemes);
// look for stress marker or $abbrev
found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab);
if (found)
strcpy(prefix_phonemes, phonemes);
if (dictionary_flags[0] & FLAG_ABBREV) {
prefix_phonemes[0] = 0;
SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes);
}
} else
strcat(prefix_phonemes, end_phonemes);
end_phonemes[0] = 0;
end_type = 0;
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix
if (dictionary_flags[0] == 0) {
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
} else
prefix_flags = 1;
if (found == false) {
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags);
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
wordx[-1] = c_temp;
strcpy(word_phonemes, phonemes);
return 0;
}
}
}
if ((end_type != 0) && !(end_type & SUFX_P)) {
end_type1 = end_type;
strcpy(phonemes2, phonemes);
// The word has a standard ending, re-translate without this ending
end_flags = RemoveEnding(tr, wordx, end_type, word_copy);
more_suffixes = true;
while (more_suffixes) {
more_suffixes = false;
phonemes[0] = 0;
if (prefix_phonemes[0] != 0) {
// lookup the stem without the prefix removed
wordx[-1] = c_temp;
found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
wordx[-1] = ' ';
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(word_phonemes, phonemes);
return 0;
}
if (dictionary_flags[0] == 0) {
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
if (found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now
if ((found == false) && (dictionary_flags2[0] != 0))
prefix_flags = 1;
}
if (found == false) {
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(word_phonemes, phonemes);
return 0;
}
if (dictionary_flags[0] == 0) {
dictionary_flags[0] = dictionary_flags2[0];
dictionary_flags[1] = dictionary_flags2[1];
}
}
if (found == false) {
if (end_type & SUFX_Q) {
// don't retranslate, use the original lookup result
strcpy(phonemes, phonemes2);
} else {
if (end_flags & FLAG_SUFX)
wflags |= FLAG_SUFFIX_REMOVED;
if (end_type & SUFX_A)
wflags |= FLAG_SUFFIX_VOWEL;
if (end_type & SUFX_M) {
// allow more suffixes before this suffix
strcpy(end_phonemes2, end_phonemes);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one
if ((end_type != 0) && !(end_type & SUFX_P)) {
// there is another suffix
end_flags = RemoveEnding(tr, wordx, end_type, NULL);
more_suffixes = true;
}
} else {
// don't remove any previous suffix
TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);
end_type = 0;
}
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
memcpy(wordx, word_copy, strlen(word_copy));
wordx[-1] = c_temp;
return 0;
}
}
}
}
if ((end_type1 & SUFX_T) == 0) {
// the default is to add the suffix and then determine the word's stress pattern
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
end_phonemes[0] = 0;
}
memcpy(wordx, word_copy, strlen(word_copy));
}
wordx[-1] = c_temp;
}
}
addPluralSuffixes(wflags, tr, last_char, word_phonemes);
wflags |= emphasize_allcaps;
// determine stress pattern for this word
add_suffix_phonemes = 0;
if (end_phonemes[0] != 0)
add_suffix_phonemes = 2;
prefix_stress = 0;
for (p = prefix_phonemes; *p != 0; p++) {
if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2))
prefix_stress = *p;
}
if (prefix_flags || (prefix_stress != 0)) {
if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) {
char *p;
// German, keep a secondary stress on the stem
SetWordStress(tr, phonemes, dictionary_flags, 3, 0);
// reduce all but the first primary stress
ix = 0;
for (p = prefix_phonemes; *p != 0; p++) {
if (*p == phonSTRESS_P) {
if (ix == 0)
ix = 1;
else
*p = phonSTRESS_3;
}
}
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
word_phonemes[N_WORD_PHONEMES-1] = 0;
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
} else {
// stress position affects the whole word, including prefix
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
word_phonemes[N_WORD_PHONEMES-1] = 0;
SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
}
} else {
SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes);
snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
word_phonemes[N_WORD_PHONEMES-1] = 0;
}
if (end_phonemes[0] != 0) {
// a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined
ix = strlen(word_phonemes);
end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow
strcpy(&word_phonemes[ix], end_phonemes);
}
if (wflags & FLAG_LAST_WORD) {
// don't use $brk pause before the last word of a sentence
// (but allow it for emphasis, see below
dictionary_flags[0] &= ~FLAG_PAUSE1;
}
if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS))
ChangeWordStress(tr, word_phonemes, 3);
else if (wflags & FLAG_EMPHASIZED2) {
// A word is indicated in the source text as stressed
// Give it stress level 6 (for the intonation module)
ChangeWordStress(tr, word_phonemes, 6);
if (wflags & FLAG_EMPHASIZED)
dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause
} else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) {
// the word has attribute to stress or unstress when at end of clause
if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2))
ChangeWordStress(tr, word_phonemes, 4);
else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words))
ChangeWordStress(tr, word_phonemes, 3);
}
// dictionary flags for this word give a clue about which alternative pronunciations of
// following words to use.
if (end_type1 & SUFX_F) {
// expect a verb form, with or without -s suffix
tr->expect_verb = 2;
tr->expect_verb_s = 2;
}
if (dictionary_flags[1] & FLAG_PASTF) {
// expect perfect tense in next two words
tr->expect_past = 3;
tr->expect_verb = 0;
tr->expect_noun = 0;
} else if (dictionary_flags[1] & FLAG_VERBF) {
// expect a verb in the next word
tr->expect_verb = 2;
tr->expect_verb_s = 0; // verb won't have -s suffix
tr->expect_noun = 0;
} else if (dictionary_flags[1] & FLAG_VERBSF) {
// expect a verb, must have a -s suffix
tr->expect_verb = 0;
tr->expect_verb_s = 2;
tr->expect_past = 0;
tr->expect_noun = 0;
} else if (dictionary_flags[1] & FLAG_NOUNF) {
// not expecting a verb next
tr->expect_noun = 2;
tr->expect_verb = 0;
tr->expect_verb_s = 0;
tr->expect_past = 0;
}
if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) {
if (tr->expect_verb > 0)
tr->expect_verb--;
if (tr->expect_verb_s > 0)
tr->expect_verb_s--;
if (tr->expect_noun > 0)
tr->expect_noun--;
if (tr->expect_past > 0)
tr->expect_past--;
}
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
// English Specific !!!!
// any single letter before a dot is an abbreviation, except 'I'
dictionary_flags[0] |= FLAG_ALLOW_DOT;
}
if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0))
ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]);
dictionary_flags[0] |= was_unpronouncable;
memcpy(word_start, word_copy2, word_copy_length);
return dictionary_flags[0];
}
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags)
{
// apply after the translation is complete
int len;
len = strlen(phonemes);
if (tr->langopts.param[LOPT_ALT] & 2) {
for (int ix = 0; ix < (len-1); ix++) {
if (phonemes[ix] == phonSTRESS_P) {
char *p;
p = &phonemes[ix+1];
if ((dict_flags & FLAG_ALT2_TRANS) != 0) {
if (*p == PhonemeCode('E'))
*p = PhonemeCode('e');
if (*p == PhonemeCode('O'))
*p = PhonemeCode('o');
} else {
if (*p == PhonemeCode('e'))
*p = PhonemeCode('E');
if (*p == PhonemeCode('o'))
*p = PhonemeCode('O');
}
break;
}
}
}
}
static void ChangeWordStress(Translator *tr, char *word, int new_stress)
{
int ix;
unsigned char *p;
int max_stress;
int vowel_count; // num of vowels + 1
int stressed_syllable = 0; // position of stressed syllable
unsigned char phonetic[N_WORD_PHONEMES];
signed char vowel_stress[N_WORD_PHONEMES/2];
strcpy((char *)phonetic, word);
max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0);
if (new_stress >= STRESS_IS_PRIMARY) {
// promote to primary stress
for (ix = 1; ix < vowel_count; ix++) {
if (vowel_stress[ix] >= max_stress) {
vowel_stress[ix] = new_stress;
break;
}
}
} else {
// remove primary stress
for (ix = 1; ix < vowel_count; ix++) {
if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1)
vowel_stress[ix] = new_stress;
}
}
// write out phonemes
ix = 1;
p = phonetic;
while (*p != 0) {
if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) {
if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED))
*word++ = stress_phonemes[(unsigned char)vowel_stress[ix]];
ix++;
}
*word++ = *p++;
}
*word = 0;
}
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[])
{
int posn = 0;
int capitals = 0;
bool non_initial = false;
if (spell_word > 2)
capitals = 2; // speak 'capital'
if (spell_word > 1)
capitals |= 4; // speak character code for unknown letters
while ((*word != ' ') && (*word != 0)) {
word += TranslateLetter(tr, word, phonemes, capitals | non_initial, current_alphabet);
posn++;
non_initial = true;
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return NULL;
}
}
SetSpellingStress(tr, phonemes, spell_word, posn);
return word;
}
static const char *const hex_letters[] = {"'e:j", "b'i:", "s'i:", "d'i:", "'i:", "'ef"};
static const char *const modifiers[] = { NULL, "_sub", "_sup", NULL };
// unicode ranges for non-ascii digits 0-9 (these must be in ascending order)
static const int number_ranges[] = {
0x660, 0x6f0, // arabic
0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
0
};
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet)
{
// get pronunciation for an isolated letter
// return number of bytes used by the letter
// control bit 0: a non-initial letter in a word
// bit 1: say 'capital'
// bit 2: say character code for unknown letters
int n_bytes;
int letter;
int len;
const ALPHABET *alphabet;
int al_offset;
int al_flags;
int number;
int phontab_1;
char capital[30];
char ph_buf[80];
char ph_buf2[80];
char ph_alphabet[80];
char hexbuf[12];
static const char pause_string[] = { phonPAUSE, 0 };
ph_buf[0] = 0;
ph_alphabet[0] = 0;
capital[0] = 0;
phontab_1 = translator->phoneme_tab_ix;
n_bytes = utf8_in(&letter, word);
if ((letter & 0xfff00) == 0x0e000)
letter &= 0xff; // uncode private usage area
if (control & 2) {
// include CAPITAL information
if (iswupper(letter))
Lookup(tr, "_cap", capital);
}
letter = towlower2(letter, tr);
LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
if (ph_buf[0] == 0) {
// is this a subscript or superscript letter ?
int c;
if ((c = IsSuperscript(letter)) != 0) {
letter = c & 0x3fff;
const char *modifier;
if ((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL)) {
// don't say "superscript" during normal text reading
Lookup(tr, modifier, capital);
if (capital[0] == 0) {
capital[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
Lookup(translator3, modifier, &capital[3]);
if (capital[3] != 0) {
capital[0] = phonPAUSE;
capital[1] = phonSWITCH;
len = strlen(&capital[3]);
capital[len+3] = phonSWITCH;
capital[len+4] = phontab_1;
capital[len+5] = 0;
}
}
}
}
LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
}
if (ph_buf[0] == phonSWITCH) {
strcpy(phonemes, ph_buf);
return 0;
}
if ((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0)) {
// convert a non-ascii number to 0-9
LookupLetter(tr, number, 0, ph_buf, control & 1);
}
al_offset = 0;
al_flags = 0;
if ((alphabet = AlphabetFromChar(letter)) != NULL) {
al_offset = alphabet->offset;
al_flags = alphabet->flags;
}
if (alphabet != current_alphabet) {
// speak the name of the alphabet
current_alphabet = alphabet;
if ((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset)) {
if ((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet)) {
// don't say the alphabet name
} else {
ph_buf2[0] = 0;
if (Lookup(translator, alphabet->name, ph_alphabet) == 0) { // the original language for the current voice
// Can't find the local name for this alphabet, use the English name
ph_alphabet[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
Lookup(translator3, alphabet->name, ph_buf2);
} else if (translator != tr) {
phontab_1 = tr->phoneme_tab_ix;
strcpy(ph_buf2, ph_alphabet);
ph_alphabet[2] = translator->phoneme_tab_ix;
}
if (ph_buf2[0] != 0) {
// we used a different language for the alphabet name (now in ph_buf2)
ph_alphabet[0] = phonPAUSE;
ph_alphabet[1] = phonSWITCH;
strcpy(&ph_alphabet[3], ph_buf2);
len = strlen(ph_buf2) + 3;
ph_alphabet[len] = phonSWITCH;
ph_alphabet[len+1] = phontab_1;
ph_alphabet[len+2] = 0;
}
}
}
}
// caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
if (ph_buf[0] == 0) {
int language;
if ((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
language = translator->langopts.alt_alphabet_lang;
else if ((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
language = alphabet->language;
else
language = L('e', 'n');
if ((language != tr->translator_name) || (language == L('k', 'o'))) {
char *p3;
//int initial, code;
char hangul_buf[12];
// speak in the language for this alphabet (or English)
char word_buf[5];
ph_buf[2] = SetTranslator3(WordToString2(word_buf, language));
if (translator3 != NULL) {
int code;
if (((code = letter - 0xac00) >= 0) && (letter <= 0xd7af)) {
// Special case for Korean letters.
// break a syllable hangul into 2 or 3 individual jamo
hangul_buf[0] = ' ';
p3 = &hangul_buf[1];
int initial;
if ((initial = (code/28)/21) != 11) {
p3 += utf8_out(initial + 0x1100, p3);
}
utf8_out(((code/28) % 21) + 0x1161, p3); // medial
utf8_out((code % 28) + 0x11a7, &p3[3]); // final
p3[6] = ' ';
p3[7] = 0;
ph_buf[3] = 0;
TranslateRules(translator3, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
SetWordStress(translator3, &ph_buf[3], NULL, -1, 0);
} else
LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
if (ph_buf[3] == phonSWITCH) {
// another level of language change
ph_buf[2] = SetTranslator3(&ph_buf[4]);
LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
}
SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
if (ph_buf[3] != 0) {
ph_buf[0] = phonPAUSE;
ph_buf[1] = phonSWITCH;
len = strlen(&ph_buf[3]) + 3;
ph_buf[len] = phonSWITCH; // switch back
ph_buf[len+1] = tr->phoneme_tab_ix;
ph_buf[len+2] = 0;
}
}
}
}
if (ph_buf[0] == 0) {
// character name not found
int speak_letter_number = 1;
if (!(al_flags & AL_NO_SYMBOL)) {
if (iswalpha(letter))
Lookup(translator, "_?A", ph_buf);
if ((ph_buf[0] == 0) && !iswspace(letter))
Lookup(translator, "_??", ph_buf);
if (ph_buf[0] == 0)
EncodePhonemes("l'et@", ph_buf, NULL);
}
if (!(control & 4) && (al_flags & AL_NOT_CODE)) {
// don't speak the character code number, unless we want full details of this character
speak_letter_number = 0;
}
if (speak_letter_number) {
char *p2;
if (al_offset == 0x2800) {
// braille dots symbol, list the numbered dots
p2 = hexbuf;
for (int ix = 0; ix < 8; ix++) {
if (letter & (1 << ix))
*p2++ = '1'+ix;
}
*p2 = 0;
} else {
// speak the hexadecimal number of the character code
sprintf(hexbuf, "%x", letter);
}
char *pbuf;
pbuf = ph_buf;
for (p2 = hexbuf; *p2 != 0; p2++) {
pbuf += strlen(pbuf);
*pbuf++ = phonPAUSE_VSHORT;
LookupLetter(translator, *p2, 0, pbuf, 1);
if (((pbuf[0] == 0) || (pbuf[0] == phonSWITCH)) && (*p2 >= 'a')) {
// This language has no translation for 'a' to 'f', speak English names using base phonemes
EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
}
}
strcat(pbuf, pause_string);
}
}
len = strlen(phonemes);
if (tr->langopts.accents & 2) // 'capital' before or after the word ?
sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, ph_buf, capital);
else
sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, capital, ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
if ((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
strcpy(&phonemes[len], ph_buf2);
return n_bytes;
}
// append plural suffixes depending on preceding letter
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes)
{
char word_zz[4] = { ' ', 'z', 'z', 0 };
char word_iz[4] = { ' ', 'i', 'z', 0 };
char word_ss[4] = { ' ', 's', 's', 0 };
if (flags & FLAG_HAS_PLURAL) {
// s or 's suffix, append [s], [z] or [Iz] depending on previous letter
if (last_char == 'f')
TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES,
NULL, 0, NULL);
else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL))
TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES,
NULL, 0, NULL);
else
TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES,
NULL, 0, NULL);
}
}
static int CheckDottedAbbrev(char *word1)
{
int wc;
int count = 0;
int ix;
char *word;
char *wbuf;
char word_buf[80];
word = word1;
wbuf = word_buf;
for (;;) {
int ok = 0;
int nbytes = utf8_in(&wc, word);
if ((word[nbytes] == ' ') && IsAlpha(wc)) {
if (word[nbytes+1] == '.') {
if (word[nbytes+2] == ' ')
ok = 1;
else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
nbytes += 2; // delete the final dot (eg. u.s.a.'s)
ok = 2;
}
} else if ((count > 0))
ok = 2;
}
if (ok == 0)
break;
for (ix = 0; ix < nbytes; ix++)
*wbuf++ = word[ix];
count++;
if (ok == 2) {
word += nbytes;
break;
}
word += (nbytes + 3);
}
if (count > 1) {
ix = wbuf - word_buf;
memcpy(word1, word_buf, ix);
while (&word1[ix] < word)
word1[ix++] = ' ';
dictionary_skipwords = (count - 1)*2;
}
return count;
}
static int NonAsciiNumber(int letter)
{
// Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
const int *p;
int base;
for (p = number_ranges; (base = *p) != 0; p++) {
if (letter < base)
break; // not found
if (letter < (base+10))
return letter-base+'0';
}
return -1;
}
static int Unpronouncable(Translator *tr, char *word, int posn)
{
/* Determines whether a word in 'unpronouncable', i.e. whether it should
be spoken as individual letters.
This function may be language specific. This is a generic version.
*/
int c;
int c1 = 0;
int vowel_posn = 9;
int index;
int count;
const ALPHABET *alphabet;
utf8_in(&c, word);
if ((tr->letter_bits_offset > 0) && (c < 0x241)) {
// Latin characters for a language with a non-latin alphabet
return 0; // so we can re-translate the word as English
}
if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) {
// Character is not in our alphabet
return 0;
}
if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1)
return 0;
if (((c = *word) == ' ') || (c == 0) || (c == '\''))
return 0;
index = 0;
count = 0;
for (;;) {
index += utf8_in(&c, &word[index]);
if ((c == 0) || (c == ' '))
break;
if ((c == '\'') && ((count > 1) || (posn > 0)))
break; // "tv'" but not "l'"
if (count == 0)
c1 = c;
if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) {
// don't count apostrophe
} else
count++;
if (IsVowel(tr, c)) {
vowel_posn = count; // position of the first vowel
break;
}
if ((c != '\'') && !iswalpha(c))
return 0;
}
if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) {
// Lookup unpronounable rules in *_rules
return Unpronouncable2(tr, word);
}
if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE])
vowel_posn--; // disregard this as the initial letter when counting
if (vowel_posn > (tr->langopts.max_initial_consonants+1))
return 1; // no vowel, or no vowel in first few letters
return 0;
}
static int Unpronouncable2(Translator *tr, char *word)
{
int c;
int end_flags;
char ph_buf[N_WORD_PHONEMES];
ph_buf[0] = 0;
c = word[-1];
word[-1] = ' '; // ensure there is a space before the "word"
end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL);
word[-1] = c;
if ((end_flags == 0) || (end_flags & SUFX_UNPRON))
return 1;
return 0;
}