1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201 |
-
- /*
- * Copyright (C) 2005 to 2014 by Jonathan Duddington
- * email: [email protected]
- * Copyright (C) 2015-2017 Reece H. Dunn
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see: <http://www.gnu.org/licenses/>.
- */
-
- #include "config.h"
-
- #include <ctype.h>
- #include <stdbool.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <wchar.h>
- #include <wctype.h>
-
- #include <espeak-ng/espeak_ng.h>
- #include <espeak-ng/speak_lib.h>
- #include <espeak-ng/encoding.h>
-
- #include "translate.h"
- #include "translateword.h"
- #include "common.h" // for strncpy0
- #include "dictionary.h" // for TranslateRules, LookupDictList
- #include "numbers.h" // for SetSpellingStress, ...
- #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_...
- #include "readclause.h" // for towlower2
- #include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable
- #include "ucd/ucd.h" // for ucd_toupper
- #include "voice.h" // for voice, voice_t
-
-
- static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
- static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
- static void ChangeWordStress(Translator *tr, char *word, int new_stress);
- static int CheckDottedAbbrev(char *word1);
- static int NonAsciiNumber(int letter);
- static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[]);
- static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet);
- static int Unpronouncable(Translator *tr, char *word, int posn);
- static int Unpronouncable2(Translator *tr, char *word);
-
- int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
- {
- // word1 is terminated by space (0x20) character
-
- char *word1;
- int word_length;
- int ix;
- char *p;
- int pfix;
- int n_chars;
- unsigned int dictionary_flags[2];
- unsigned int dictionary_flags2[2];
- int end_type = 0;
- int end_type1 = 0;
- int prefix_type = 0;
- int prefix_stress;
- char *wordx;
- char phonemes[N_WORD_PHONEMES];
- char phonemes2[N_WORD_PHONEMES];
- char prefix_phonemes[N_WORD_PHONEMES];
- char unpron_phonemes[N_WORD_PHONEMES];
- char end_phonemes[N_WORD_PHONEMES];
- char end_phonemes2[N_WORD_PHONEMES];
- char word_copy[N_WORD_BYTES];
- char word_copy2[N_WORD_BYTES];
- int word_copy_length;
- char prefix_chars[0x3f + 2];
- bool found = false;
- int end_flags;
- int c_temp; // save a character byte while we temporarily replace it with space
- int first_char;
- int last_char = 0;
- int prefix_flags = 0;
- bool more_suffixes;
- bool confirm_prefix;
- int spell_word;
- int emphasize_allcaps = 0;
- int wflags;
- int was_unpronouncable = 0;
- int loopcount;
- int add_suffix_phonemes = 0;
- WORD_TAB wtab_null[8];
-
- if (wtab == NULL) {
- memset(wtab_null, 0, sizeof(wtab_null));
- wtab = wtab_null;
- }
- wflags = wtab->flags;
-
- dictionary_flags[0] = 0;
- dictionary_flags[1] = 0;
- dictionary_flags2[0] = 0;
- dictionary_flags2[1] = 0;
- dictionary_skipwords = 0;
-
- phonemes[0] = 0;
- unpron_phonemes[0] = 0;
- prefix_phonemes[0] = 0;
- end_phonemes[0] = 0;
-
- if (tr->data_dictlist == NULL) {
- // dictionary is not loaded
- word_phonemes[0] = 0;
- return 0;
- }
-
- // count the length of the word
- word1 = word_start;
- if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot
- wordx = word1;
-
- utf8_in(&first_char, wordx);
- word_length = 0;
- while ((*wordx != 0) && (*wordx != ' ')) {
- wordx += utf8_in(&last_char, wordx);
- word_length++;
- }
-
- word_copy_length = wordx - word_start;
- if (word_copy_length >= N_WORD_BYTES)
- word_copy_length = N_WORD_BYTES-1;
- memcpy(word_copy2, word_start, word_copy_length);
-
- spell_word = 0;
-
- if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) {
- // retranslating a 1-character word using a different language, say its name
- utf8_in(&c_temp, wordx+1); // the next character
- if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp)))
- spell_word = 1;
- }
-
- if (option_sayas == SAYAS_KEY) {
- if (word_length == 1)
- spell_word = 4;
- else {
- // is there a translation for this keyname ?
- word1--;
- *word1 = '_'; // prefix keyname with '_'
- found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab);
- }
- }
-
- // try an initial lookup in the dictionary list, we may find a pronunciation specified, or
- // we may just find some flags
- if (option_sayas & 0x10) {
- // SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR
- spell_word = option_sayas & 0xf; // 2,3,4
- } else {
- if (!found)
- found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word
-
- if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.'))
- wordx[1] = ' '; // remove a Dot after this word
-
- if (dictionary_flags[0] & FLAG_TEXTMODE) {
- if (word_out != NULL)
- strcpy(word_out, word1);
-
- return dictionary_flags[0];
- } else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
- // grouped words, but no translation. Join the words with hyphens.
- wordx = word1;
- ix = 0;
- while (ix < dictionary_skipwords) {
- if (*wordx == ' ') {
- *wordx = '-';
- ix++;
- }
- wordx++;
- }
- }
-
- if ((word_length == 1) && (dictionary_skipwords == 0)) {
- // is this a series of single letters separated by dots?
- if (CheckDottedAbbrev(word1)) {
- dictionary_flags[0] = 0;
- dictionary_flags[1] = 0;
- spell_word = 1;
- if (dictionary_skipwords)
- dictionary_flags[0] = FLAG_SKIPWORDS;
- }
- }
-
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- strcpy(word_phonemes, phonemes);
- return 0;
- }
-
- if (!found && (dictionary_flags[0] & FLAG_ABBREV)) {
- // the word has $abbrev flag, but no pronunciation specified. Speak as individual letters
- spell_word = 1;
- }
-
- if (!found && iswdigit(first_char)) {
- Lookup(tr, "_0lang", word_phonemes);
- if (word_phonemes[0] == phonSWITCH)
- return 0;
-
- if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) {
- // for this language, speak English numerals (0-9) with the English voice
- sprintf(word_phonemes, "%c", phonSWITCH);
- return 0;
- }
-
- found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
- }
-
- if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) {
- // either all upper or all lower case
-
- if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) {
- if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) {
- // don't use Roman number if this word is not separated from the next word (eg. "XLTest")
- if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0)
- dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals
- }
- }
- }
-
- if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
- if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
- // emphasize words which are in capitals
- emphasize_allcaps = FLAG_EMPHASIZED;
- } else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3)
- && (tr->clause_upper_count <= tr->clause_lower_count)) {
- // An upper case word in a lower case clause. This could be an abbreviation.
- spell_word = 1;
- }
- }
- }
-
- if (spell_word > 0) {
- // Speak as individual letters
- phonemes[0] = 0;
-
- if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) {
- if (word_length > 1)
- return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces
- return 0;
- }
- strcpy(word_phonemes, phonemes);
- if (wflags & FLAG_TRANSLATOR2)
- return 0;
-
- addPluralSuffixes(wflags, tr, last_char, word_phonemes);
- return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d"
- } else if (found == false) {
- // word's pronunciation is not given in the dictionary list, although
- // dictionary_flags may have ben set there
-
- int posn;
- bool non_initial = false;
- int length;
-
- posn = 0;
- length = 999;
- wordx = word1;
-
- while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) {
- // This word looks "unpronouncable", so speak letters individually until we
- // find a remainder that we can pronounce.
- was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE;
- emphasize_allcaps = 0;
-
- if (wordx[0] == '\'')
- break;
-
- if (posn > 0)
- non_initial = true;
-
- wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet);
- posn++;
- if (unpron_phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- strcpy(word_phonemes, unpron_phonemes);
- if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0)
- return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_
- return 0;
- }
-
- length = 0;
- while (wordx[length] != ' ') length++;
- }
- SetSpellingStress(tr, unpron_phonemes, 0, posn);
-
- // anything left ?
- if (*wordx != ' ') {
- if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) {
- // letters which have been spoken individually from affecting the pronunciation of the pronuncable part
- wordx[-1] = ' ';
- }
-
- // Translate the stem
- end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
-
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- strcpy(word_phonemes, phonemes);
- return 0;
- }
-
- if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) {
- int wc;
- // characters not recognised, speak them individually
- // ?? should we say super/sub-script numbers and letters here?
- utf8_in(&wc, wordx);
- if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) {
- if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL)
- return 0;
- strcpy(word_phonemes, phonemes);
- return 0;
- }
- }
-
- c_temp = wordx[-1];
-
- found = false;
- confirm_prefix = true;
- for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) {
- // Found a standard prefix, remove it and retranslate
- // loopcount guards against an endless loop
- if (confirm_prefix && !(end_type & SUFX_B)) {
- int end2;
- char end_phonemes2[N_WORD_PHONEMES];
-
- // remove any standard suffix and confirm that the prefix is still recognised
- phonemes2[0] = 0;
- end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags);
- if (end2) {
- RemoveEnding(tr, wordx, end2, word_copy);
- end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
- memcpy(wordx, word_copy, strlen(word_copy));
- if ((end_type & SUFX_P) == 0) {
- // after removing the suffix, the prefix is no longer recognised.
- // Keep the suffix, but don't use the prefix
- end_type = end2;
- strcpy(phonemes, phonemes2);
- strcpy(end_phonemes, end_phonemes2);
- if (option_phonemes & espeakPHONEMES_TRACE) {
- DecodePhonemes(end_phonemes, end_phonemes2);
- fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2);
- }
- }
- confirm_prefix = false;
- continue;
- }
- }
-
- prefix_type = end_type;
-
- if (prefix_type & SUFX_V)
- tr->expect_verb = 1; // use the verb form of the word
-
- wordx[-1] = c_temp;
-
- if ((prefix_type & SUFX_B) == 0) {
- for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove
- wordx++;
- while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters
- }
- } else {
- pfix = 1;
- prefix_chars[0] = 0;
- n_chars = prefix_type & 0x3f;
-
- for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove
- prefix_chars[pfix++] = *wordx++;
-
- if ((prefix_type & SUFX_B) && (ix == (n_chars-1)))
- prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
- }
- prefix_chars[pfix] = 0;
- }
- c_temp = wordx[-1];
- wordx[-1] = ' ';
- confirm_prefix = true;
- wflags |= FLAG_PREFIX_REMOVED;
-
- if (prefix_type & SUFX_B) {
- // SUFX_B is used for Turkish, tr_rules contains " ' (Pb"
- // examine the prefix part
- char *wordpf;
- char prefix_phonemes2[12];
-
- strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2));
- wordpf = &prefix_chars[1];
- strcpy(prefix_phonemes, phonemes);
-
- // look for stress marker or $abbrev
- found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab);
- if (found)
- strcpy(prefix_phonemes, phonemes);
- if (dictionary_flags[0] & FLAG_ABBREV) {
- prefix_phonemes[0] = 0;
- SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes);
- }
- } else
- strcat(prefix_phonemes, end_phonemes);
- end_phonemes[0] = 0;
-
- end_type = 0;
- found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix
- if (dictionary_flags[0] == 0) {
- dictionary_flags[0] = dictionary_flags2[0];
- dictionary_flags[1] = dictionary_flags2[1];
- } else
- prefix_flags = 1;
- if (found == false) {
- end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags);
-
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- wordx[-1] = c_temp;
- strcpy(word_phonemes, phonemes);
- return 0;
- }
- }
- }
-
- if ((end_type != 0) && !(end_type & SUFX_P)) {
- end_type1 = end_type;
- strcpy(phonemes2, phonemes);
-
- // The word has a standard ending, re-translate without this ending
- end_flags = RemoveEnding(tr, wordx, end_type, word_copy);
- more_suffixes = true;
-
- while (more_suffixes) {
- more_suffixes = false;
- phonemes[0] = 0;
-
- if (prefix_phonemes[0] != 0) {
- // lookup the stem without the prefix removed
- wordx[-1] = c_temp;
- found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
- wordx[-1] = ' ';
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- memcpy(wordx, word_copy, strlen(word_copy));
- strcpy(word_phonemes, phonemes);
- return 0;
- }
- if (dictionary_flags[0] == 0) {
- dictionary_flags[0] = dictionary_flags2[0];
- dictionary_flags[1] = dictionary_flags2[1];
- }
- if (found)
- prefix_phonemes[0] = 0; // matched whole word, don't need prefix now
-
- if ((found == false) && (dictionary_flags2[0] != 0))
- prefix_flags = 1;
- }
- if (found == false) {
- found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- memcpy(wordx, word_copy, strlen(word_copy));
- strcpy(word_phonemes, phonemes);
- return 0;
- }
-
- if (dictionary_flags[0] == 0) {
- dictionary_flags[0] = dictionary_flags2[0];
- dictionary_flags[1] = dictionary_flags2[1];
- }
- }
- if (found == false) {
- if (end_type & SUFX_Q) {
- // don't retranslate, use the original lookup result
- strcpy(phonemes, phonemes2);
- } else {
- if (end_flags & FLAG_SUFX)
- wflags |= FLAG_SUFFIX_REMOVED;
- if (end_type & SUFX_A)
- wflags |= FLAG_SUFFIX_VOWEL;
-
- if (end_type & SUFX_M) {
- // allow more suffixes before this suffix
- strcpy(end_phonemes2, end_phonemes);
- end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
- strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one
-
- if ((end_type != 0) && !(end_type & SUFX_P)) {
- // there is another suffix
- end_flags = RemoveEnding(tr, wordx, end_type, NULL);
- more_suffixes = true;
- }
- } else {
- // don't remove any previous suffix
- TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);
- end_type = 0;
- }
-
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- strcpy(word_phonemes, phonemes);
- memcpy(wordx, word_copy, strlen(word_copy));
- wordx[-1] = c_temp;
- return 0;
- }
- }
- }
- }
-
-
- if ((end_type1 & SUFX_T) == 0) {
- // the default is to add the suffix and then determine the word's stress pattern
- AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
- end_phonemes[0] = 0;
- }
- memcpy(wordx, word_copy, strlen(word_copy));
- }
-
- wordx[-1] = c_temp;
- }
- }
-
- addPluralSuffixes(wflags, tr, last_char, word_phonemes);
- wflags |= emphasize_allcaps;
-
- // determine stress pattern for this word
-
- add_suffix_phonemes = 0;
- if (end_phonemes[0] != 0)
- add_suffix_phonemes = 2;
-
- prefix_stress = 0;
- for (p = prefix_phonemes; *p != 0; p++) {
- if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2))
- prefix_stress = *p;
- }
- if (prefix_flags || (prefix_stress != 0)) {
- if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) {
- char *p;
- // German, keep a secondary stress on the stem
- SetWordStress(tr, phonemes, dictionary_flags, 3, 0);
-
- // reduce all but the first primary stress
- ix = 0;
- for (p = prefix_phonemes; *p != 0; p++) {
- if (*p == phonSTRESS_P) {
- if (ix == 0)
- ix = 1;
- else
- *p = phonSTRESS_3;
- }
- }
- snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
-
- word_phonemes[N_WORD_PHONEMES-1] = 0;
- SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
- } else {
- // stress position affects the whole word, including prefix
- snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
- word_phonemes[N_WORD_PHONEMES-1] = 0;
- SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
- }
- } else {
- SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes);
- snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
- word_phonemes[N_WORD_PHONEMES-1] = 0;
- }
-
- if (end_phonemes[0] != 0) {
- // a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined
- ix = strlen(word_phonemes);
- end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow
- strcpy(&word_phonemes[ix], end_phonemes);
- }
-
- if (wflags & FLAG_LAST_WORD) {
- // don't use $brk pause before the last word of a sentence
- // (but allow it for emphasis, see below
- dictionary_flags[0] &= ~FLAG_PAUSE1;
- }
-
- if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS))
- ChangeWordStress(tr, word_phonemes, 3);
- else if (wflags & FLAG_EMPHASIZED2) {
- // A word is indicated in the source text as stressed
- // Give it stress level 6 (for the intonation module)
- ChangeWordStress(tr, word_phonemes, 6);
-
- if (wflags & FLAG_EMPHASIZED)
- dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause
- } else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) {
- // the word has attribute to stress or unstress when at end of clause
- if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2))
- ChangeWordStress(tr, word_phonemes, 4);
- else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words))
- ChangeWordStress(tr, word_phonemes, 3);
- }
-
- // dictionary flags for this word give a clue about which alternative pronunciations of
- // following words to use.
- if (end_type1 & SUFX_F) {
- // expect a verb form, with or without -s suffix
- tr->expect_verb = 2;
- tr->expect_verb_s = 2;
- }
-
- if (dictionary_flags[1] & FLAG_PASTF) {
- // expect perfect tense in next two words
- tr->expect_past = 3;
- tr->expect_verb = 0;
- tr->expect_noun = 0;
- } else if (dictionary_flags[1] & FLAG_VERBF) {
- // expect a verb in the next word
- tr->expect_verb = 2;
- tr->expect_verb_s = 0; // verb won't have -s suffix
- tr->expect_noun = 0;
- } else if (dictionary_flags[1] & FLAG_VERBSF) {
- // expect a verb, must have a -s suffix
- tr->expect_verb = 0;
- tr->expect_verb_s = 2;
- tr->expect_past = 0;
- tr->expect_noun = 0;
- } else if (dictionary_flags[1] & FLAG_NOUNF) {
- // not expecting a verb next
- tr->expect_noun = 2;
- tr->expect_verb = 0;
- tr->expect_verb_s = 0;
- tr->expect_past = 0;
- }
-
- if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) {
- if (tr->expect_verb > 0)
- tr->expect_verb--;
-
- if (tr->expect_verb_s > 0)
- tr->expect_verb_s--;
-
- if (tr->expect_noun > 0)
- tr->expect_noun--;
-
- if (tr->expect_past > 0)
- tr->expect_past--;
- }
-
- if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
- // English Specific !!!!
- // any single letter before a dot is an abbreviation, except 'I'
- dictionary_flags[0] |= FLAG_ALLOW_DOT;
- }
-
- if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0))
- ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]);
-
- dictionary_flags[0] |= was_unpronouncable;
- memcpy(word_start, word_copy2, word_copy_length);
- return dictionary_flags[0];
- }
-
-
- void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags)
- {
- // apply after the translation is complete
- int len;
-
-
- len = strlen(phonemes);
-
- if (tr->langopts.param[LOPT_ALT] & 2) {
- for (int ix = 0; ix < (len-1); ix++) {
- if (phonemes[ix] == phonSTRESS_P) {
- char *p;
- p = &phonemes[ix+1];
- if ((dict_flags & FLAG_ALT2_TRANS) != 0) {
- if (*p == PhonemeCode('E'))
- *p = PhonemeCode('e');
- if (*p == PhonemeCode('O'))
- *p = PhonemeCode('o');
- } else {
- if (*p == PhonemeCode('e'))
- *p = PhonemeCode('E');
- if (*p == PhonemeCode('o'))
- *p = PhonemeCode('O');
- }
- break;
- }
- }
- }
- }
-
-
- static void ChangeWordStress(Translator *tr, char *word, int new_stress)
- {
- int ix;
- unsigned char *p;
- int max_stress;
- int vowel_count; // num of vowels + 1
- int stressed_syllable = 0; // position of stressed syllable
- unsigned char phonetic[N_WORD_PHONEMES];
- signed char vowel_stress[N_WORD_PHONEMES/2];
-
- strcpy((char *)phonetic, word);
- max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0);
-
- if (new_stress >= STRESS_IS_PRIMARY) {
- // promote to primary stress
- for (ix = 1; ix < vowel_count; ix++) {
- if (vowel_stress[ix] >= max_stress) {
- vowel_stress[ix] = new_stress;
- break;
- }
- }
- } else {
- // remove primary stress
- for (ix = 1; ix < vowel_count; ix++) {
- if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1)
- vowel_stress[ix] = new_stress;
- }
- }
-
- // write out phonemes
- ix = 1;
- p = phonetic;
- while (*p != 0) {
- if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) {
- if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED))
- *word++ = stress_phonemes[(unsigned char)vowel_stress[ix]];
-
- ix++;
- }
- *word++ = *p++;
- }
- *word = 0;
- }
-
- static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[])
- {
- int posn = 0;
- int capitals = 0;
- bool non_initial = false;
-
- if (spell_word > 2)
- capitals = 2; // speak 'capital'
- if (spell_word > 1)
- capitals |= 4; // speak character code for unknown letters
-
- while ((*word != ' ') && (*word != 0)) {
- word += TranslateLetter(tr, word, phonemes, capitals | non_initial, current_alphabet);
- posn++;
- non_initial = true;
- if (phonemes[0] == phonSWITCH) {
- // change to another language in order to translate this word
- strcpy(word_phonemes, phonemes);
- return NULL;
- }
- }
- SetSpellingStress(tr, phonemes, spell_word, posn);
- return word;
- }
-
-
- static const char *const hex_letters[] = {"'e:j", "b'i:", "s'i:", "d'i:", "'i:", "'ef"};
- static const char *const modifiers[] = { NULL, "_sub", "_sup", NULL };
- // unicode ranges for non-ascii digits 0-9 (these must be in ascending order)
- static const int number_ranges[] = {
- 0x660, 0x6f0, // arabic
- 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
- 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
- 0
- };
-
-
- static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet)
- {
- // get pronunciation for an isolated letter
- // return number of bytes used by the letter
- // control bit 0: a non-initial letter in a word
- // bit 1: say 'capital'
- // bit 2: say character code for unknown letters
-
- int n_bytes;
- int letter;
- int len;
- const ALPHABET *alphabet;
- int al_offset;
- int al_flags;
- int number;
- int phontab_1;
- char capital[30];
- char ph_buf[80];
- char ph_buf2[80];
- char ph_alphabet[80];
- char hexbuf[12];
- static const char pause_string[] = { phonPAUSE, 0 };
-
- ph_buf[0] = 0;
- ph_alphabet[0] = 0;
- capital[0] = 0;
- phontab_1 = translator->phoneme_tab_ix;
-
- n_bytes = utf8_in(&letter, word);
-
- if ((letter & 0xfff00) == 0x0e000)
- letter &= 0xff; // uncode private usage area
-
- if (control & 2) {
- // include CAPITAL information
- if (iswupper(letter))
- Lookup(tr, "_cap", capital);
- }
- letter = towlower2(letter, tr);
- LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
-
- if (ph_buf[0] == 0) {
- // is this a subscript or superscript letter ?
- int c;
- if ((c = IsSuperscript(letter)) != 0) {
- letter = c & 0x3fff;
-
- const char *modifier;
- if ((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL)) {
- // don't say "superscript" during normal text reading
- Lookup(tr, modifier, capital);
- if (capital[0] == 0) {
- capital[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
- Lookup(translator3, modifier, &capital[3]);
- if (capital[3] != 0) {
- capital[0] = phonPAUSE;
- capital[1] = phonSWITCH;
- len = strlen(&capital[3]);
- capital[len+3] = phonSWITCH;
- capital[len+4] = phontab_1;
- capital[len+5] = 0;
- }
- }
- }
- }
- LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
- }
-
- if (ph_buf[0] == phonSWITCH) {
- strcpy(phonemes, ph_buf);
- return 0;
- }
-
-
- if ((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0)) {
- // convert a non-ascii number to 0-9
- LookupLetter(tr, number, 0, ph_buf, control & 1);
- }
-
- al_offset = 0;
- al_flags = 0;
- if ((alphabet = AlphabetFromChar(letter)) != NULL) {
- al_offset = alphabet->offset;
- al_flags = alphabet->flags;
- }
-
- if (alphabet != current_alphabet) {
- // speak the name of the alphabet
- current_alphabet = alphabet;
- if ((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset)) {
- if ((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet)) {
- // don't say the alphabet name
- } else {
- ph_buf2[0] = 0;
- if (Lookup(translator, alphabet->name, ph_alphabet) == 0) { // the original language for the current voice
- // Can't find the local name for this alphabet, use the English name
- ph_alphabet[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
- Lookup(translator3, alphabet->name, ph_buf2);
- } else if (translator != tr) {
- phontab_1 = tr->phoneme_tab_ix;
- strcpy(ph_buf2, ph_alphabet);
- ph_alphabet[2] = translator->phoneme_tab_ix;
- }
-
- if (ph_buf2[0] != 0) {
- // we used a different language for the alphabet name (now in ph_buf2)
- ph_alphabet[0] = phonPAUSE;
- ph_alphabet[1] = phonSWITCH;
- strcpy(&ph_alphabet[3], ph_buf2);
- len = strlen(ph_buf2) + 3;
- ph_alphabet[len] = phonSWITCH;
- ph_alphabet[len+1] = phontab_1;
- ph_alphabet[len+2] = 0;
- }
- }
- }
- }
-
- // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
-
- if (ph_buf[0] == 0) {
- int language;
- if ((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
- language = translator->langopts.alt_alphabet_lang;
- else if ((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
- language = alphabet->language;
- else
- language = L('e', 'n');
-
- if ((language != tr->translator_name) || (language == L('k', 'o'))) {
- char *p3;
- //int initial, code;
- char hangul_buf[12];
-
- // speak in the language for this alphabet (or English)
- char word_buf[5];
- ph_buf[2] = SetTranslator3(WordToString2(word_buf, language));
-
- if (translator3 != NULL) {
- int code;
- if (((code = letter - 0xac00) >= 0) && (letter <= 0xd7af)) {
- // Special case for Korean letters.
- // break a syllable hangul into 2 or 3 individual jamo
-
- hangul_buf[0] = ' ';
- p3 = &hangul_buf[1];
- int initial;
- if ((initial = (code/28)/21) != 11) {
- p3 += utf8_out(initial + 0x1100, p3);
- }
- utf8_out(((code/28) % 21) + 0x1161, p3); // medial
- utf8_out((code % 28) + 0x11a7, &p3[3]); // final
- p3[6] = ' ';
- p3[7] = 0;
- ph_buf[3] = 0;
- TranslateRules(translator3, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
- SetWordStress(translator3, &ph_buf[3], NULL, -1, 0);
- } else
- LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
-
- if (ph_buf[3] == phonSWITCH) {
- // another level of language change
- ph_buf[2] = SetTranslator3(&ph_buf[4]);
- LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
- }
-
- SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
-
- if (ph_buf[3] != 0) {
- ph_buf[0] = phonPAUSE;
- ph_buf[1] = phonSWITCH;
- len = strlen(&ph_buf[3]) + 3;
- ph_buf[len] = phonSWITCH; // switch back
- ph_buf[len+1] = tr->phoneme_tab_ix;
- ph_buf[len+2] = 0;
- }
- }
- }
- }
-
- if (ph_buf[0] == 0) {
- // character name not found
- int speak_letter_number = 1;
- if (!(al_flags & AL_NO_SYMBOL)) {
- if (iswalpha(letter))
- Lookup(translator, "_?A", ph_buf);
-
- if ((ph_buf[0] == 0) && !iswspace(letter))
- Lookup(translator, "_??", ph_buf);
-
- if (ph_buf[0] == 0)
- EncodePhonemes("l'et@", ph_buf, NULL);
- }
-
- if (!(control & 4) && (al_flags & AL_NOT_CODE)) {
- // don't speak the character code number, unless we want full details of this character
- speak_letter_number = 0;
- }
-
- if (speak_letter_number) {
- char *p2;
- if (al_offset == 0x2800) {
- // braille dots symbol, list the numbered dots
- p2 = hexbuf;
- for (int ix = 0; ix < 8; ix++) {
- if (letter & (1 << ix))
- *p2++ = '1'+ix;
- }
- *p2 = 0;
- } else {
- // speak the hexadecimal number of the character code
- sprintf(hexbuf, "%x", letter);
- }
-
- char *pbuf;
- pbuf = ph_buf;
- for (p2 = hexbuf; *p2 != 0; p2++) {
- pbuf += strlen(pbuf);
- *pbuf++ = phonPAUSE_VSHORT;
- LookupLetter(translator, *p2, 0, pbuf, 1);
- if (((pbuf[0] == 0) || (pbuf[0] == phonSWITCH)) && (*p2 >= 'a')) {
- // This language has no translation for 'a' to 'f', speak English names using base phonemes
- EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
- }
- }
- strcat(pbuf, pause_string);
- }
- }
-
- len = strlen(phonemes);
-
- if (tr->langopts.accents & 2) // 'capital' before or after the word ?
- sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, ph_buf, capital);
- else
- sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, capital, ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
- if ((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
- strcpy(&phonemes[len], ph_buf2);
- return n_bytes;
- }
-
- // append plural suffixes depending on preceding letter
- static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes)
- {
- char word_zz[4] = { ' ', 'z', 'z', 0 };
- char word_iz[4] = { ' ', 'i', 'z', 0 };
- char word_ss[4] = { ' ', 's', 's', 0 };
- if (flags & FLAG_HAS_PLURAL) {
- // s or 's suffix, append [s], [z] or [Iz] depending on previous letter
- if (last_char == 'f')
- TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES,
- NULL, 0, NULL);
- else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL))
- TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES,
- NULL, 0, NULL);
- else
- TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES,
- NULL, 0, NULL);
- }
- }
-
- static int CheckDottedAbbrev(char *word1)
- {
- int wc;
- int count = 0;
- int ix;
- char *word;
- char *wbuf;
- char word_buf[80];
-
- word = word1;
- wbuf = word_buf;
-
- for (;;) {
- int ok = 0;
- int nbytes = utf8_in(&wc, word);
- if ((word[nbytes] == ' ') && IsAlpha(wc)) {
- if (word[nbytes+1] == '.') {
- if (word[nbytes+2] == ' ')
- ok = 1;
- else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
- nbytes += 2; // delete the final dot (eg. u.s.a.'s)
- ok = 2;
- }
- } else if ((count > 0))
- ok = 2;
- }
-
- if (ok == 0)
- break;
-
- for (ix = 0; ix < nbytes; ix++)
- *wbuf++ = word[ix];
-
- count++;
-
- if (ok == 2) {
- word += nbytes;
- break;
- }
-
- word += (nbytes + 3);
- }
-
- if (count > 1) {
- ix = wbuf - word_buf;
- memcpy(word1, word_buf, ix);
- while (&word1[ix] < word)
- word1[ix++] = ' ';
- dictionary_skipwords = (count - 1)*2;
- }
- return count;
- }
-
- static int NonAsciiNumber(int letter)
- {
- // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
- const int *p;
- int base;
-
- for (p = number_ranges; (base = *p) != 0; p++) {
- if (letter < base)
- break; // not found
- if (letter < (base+10))
- return letter-base+'0';
- }
- return -1;
- }
-
- static int Unpronouncable(Translator *tr, char *word, int posn)
- {
- /* Determines whether a word in 'unpronouncable', i.e. whether it should
- be spoken as individual letters.
-
- This function may be language specific. This is a generic version.
- */
-
- int c;
- int c1 = 0;
- int vowel_posn = 9;
- int index;
- int count;
- const ALPHABET *alphabet;
-
- utf8_in(&c, word);
- if ((tr->letter_bits_offset > 0) && (c < 0x241)) {
- // Latin characters for a language with a non-latin alphabet
- return 0; // so we can re-translate the word as English
- }
-
- if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) {
- // Character is not in our alphabet
- return 0;
- }
-
- if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1)
- return 0;
-
- if (((c = *word) == ' ') || (c == 0) || (c == '\''))
- return 0;
-
- index = 0;
- count = 0;
- for (;;) {
- index += utf8_in(&c, &word[index]);
- if ((c == 0) || (c == ' '))
- break;
-
- if ((c == '\'') && ((count > 1) || (posn > 0)))
- break; // "tv'" but not "l'"
-
- if (count == 0)
- c1 = c;
-
- if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) {
- // don't count apostrophe
- } else
- count++;
-
- if (IsVowel(tr, c)) {
- vowel_posn = count; // position of the first vowel
- break;
- }
-
- if ((c != '\'') && !iswalpha(c))
- return 0;
- }
-
- if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) {
- // Lookup unpronounable rules in *_rules
- return Unpronouncable2(tr, word);
- }
-
- if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE])
- vowel_posn--; // disregard this as the initial letter when counting
-
- if (vowel_posn > (tr->langopts.max_initial_consonants+1))
- return 1; // no vowel, or no vowel in first few letters
-
- return 0;
- }
-
- static int Unpronouncable2(Translator *tr, char *word)
- {
- int c;
- int end_flags;
- char ph_buf[N_WORD_PHONEMES];
-
- ph_buf[0] = 0;
- c = word[-1];
- word[-1] = ' '; // ensure there is a space before the "word"
- end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL);
- word[-1] = c;
- if ((end_flags == 0) || (end_flags & SUFX_UNPRON))
- return 1;
- return 0;
- }
|