Browse Source

enhance homograph disamniguation

master
MahtaFetrat 2 weeks ago
parent
commit
5d124160f7

+ 2
- 2
Makefile.am View File

@@ -140,12 +140,12 @@ espeak_ng_include_HEADERS = \
lib_LTLIBRARIES += src/libespeak-ng.la

src_libespeak_ng_la_LDFLAGS = -version-info $(SHARED_VERSION) -lpthread -lm \
${PCAUDIOLIB_LIBS}
${PCAUDIOLIB_LIBS} ${JSONC_LIBS}

src_libespeak_ng_la_CFLAGS = \
-fPIC -fvisibility=hidden \
-pedantic -fno-exceptions -DPATH_ESPEAK_DATA=\"$(DATADIR)\" -DLIBESPEAK_NG_EXPORT \
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${AM_CFLAGS}
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${JSONC_CFLAGS} ${AM_CFLAGS}

src_libespeak_ng_la_SOURCES = \
src/ucd-tools/src/case.c \

+ 27
- 0
configure.ac View File

@@ -224,6 +224,33 @@ AC_CHECK_FUNCS([strerror])
AC_CHECK_FUNCS([strrchr])
AC_CHECK_FUNCS([strstr])

dnl ================================================================
dnl JSON-C library checks.
dnl ================================================================

AC_CHECK_HEADERS([json-c/json.h],
[
have_jsonc=yes
JSONC_CFLAGS=-DUSE_JSONC=1
AC_CHECK_LIB([json-c], [json_object_from_file],
[
JSONC_LIBS=-ljson-c
],
[
have_jsonc=no
AC_MSG_ERROR([json-c library is required for homograph support])
]
)
],
[
have_jsonc=no
AC_MSG_ERROR([json-c library is required for homograph support])
]
)

AC_SUBST(JSONC_CFLAGS)
AC_SUBST(JSONC_LIBS)

dnl ================================================================
dnl PCAudioLib checks.
dnl ================================================================

+ 1728665
- 0
espeak-ng-data/dataset.json
File diff suppressed because it is too large
View File


+ 290
- 0
espeak-ng-data/homographs.txt View File

@@ -0,0 +1,290 @@
رو
ترک
دور
فک
راند
دم
ترکان
گرد
جلد
پر
گردان
عمر
حل
رد
رود
کن
هل
اره
سرو
تن
کش
رس
کشت
نقل
رم
گرداند
کل
سر
گنج
تو
برنج
قل
کرد
پی
رب
تاباند
شو
شکر
شست
خرد
برند
اعمال
کرم
برنده
دوره
نشست
هری
جنب
ملا
بعد
شش
طبق
مد
بری
برم
شوید
چسباند
تست
روی
ولو
خواند
سمت
کنده
گنگ
بکن
درهم
شناساند
یاس
معبر
مهم
مردم
خوراند
جست
بنا
ترکاند
ملاک
پلاس
فوت
کت
برده
گله
بده
چرخاند
کندی
خلق
چک
بخار
نزد
چپاند
اجر
شان
رسم
شنو
خواباند
نرم
رساند
لمباند
میدانی
دود
دواند
جدا
معین
رسی
جو
فرانک
هلیم
بردار
شرف
کشند
گزید
مقدم
زهره
سری
کیف
کشیم
پژمرد
مجاز
لرزاند
نشاند
نشسته
جرم
خورد
علم
جوشاند
برگرداند
سوزاند
بره
امین
لب
اه
آرم
گنجاند
قطعه
فراری
لغز
تنگ
عرضه
بدو
صفر
المان
برید
فرق
محقق
پیچاند
پرورد
کشتی
مسلم
روم
پرش
نصاب
تکه
جوید
پراند
وسطی
کارد
سپرد
افشاند
مراجع
خرم
کرات
نمونه
سند
کند
بر
سنت
شبه
ابر
اپل
درد
خیر
خاراند
ابی
مبدل
مو
جنگ
سرم
قدم
مایل
اسکی
نسبی
گذراند
بخور
صرف
قسم
گرده
مبلغ
حسن
کره
عود
چشم
خلف
حقه
شل
ادبا
برس
گرم
سحر
سنی
ببر
فهماند
اوا
اشکال
بین
سرور
مفصل
فرار
سبک
تپاند
سیر
پهن
مهر
درک
چلاند
رنجاند
مشرف
کمی
فن
مقطع
پوشاند
عرفا
بدهی
شنود
قطر
شهرت
سپر
رحم
حلال
دوران
ترساند
پروراند
رویه
آورد
کلفت
تکاند
گریاند
کابل
شهره
رهاند
دین
عقبی
لنگ
مصر
پرس
منکر
قمری
امل
خفت
سمبل
کنه
عالم
bedah (فعل)
تند
محرم
عرق
خیساند
لی‌لی
کشاند
یمن
شدید
قوت
اشراف
نبرد
کر
شما
گریم
شوم
جور
خنداند
اشغال
علی
مسکن
مثل
نفس
ارایه
دوم
دهم
سپرم
نکن
ده
ماند
به
گل
ور
مرد
پست
کنف
qet؟e
شبه
شوم
persian word

+ 1
- 0
espeak-ng-data/lang/zle/ru-cl View File

@@ -2,4 +2,5 @@ name Russian (Classic)
language ru-cl
replace 03 a a#
dict_min 20000
speed 95
dictrules 3

+ 389
- 0
espeak-ng-data/stopwords.dat View File

@@ -0,0 +1,389 @@
و
در
به
از
که
این
را
با
است
برای
آن
یک
خود
تا
کرد
بر
هم
نیز
گفت
می‌شود
وی
شد
دارد
ما
اما
یا
شده
باید
هر
آنها
بود
او
دیگر
دو
مورد
می‌کند
شود
کند
وجود
بین
پیش
شده_است
پس
نظر
اگر
همه
یکی
حال
هستند
من
کنند
نیست
باشد
چه
بی
می
بخش
می‌کنند
همین
افزود
هایی
دارند
راه
همچنین
روی
داد
بیشتر
بسیار
سه
داشت
چند
سوی
تنها
هیچ
میان
اینکه
شدن
بعد
جدید
ولی
حتی
کردن
برخی
کردند
می‌دهد
اول
نه
کرده_است
نسبت
بیش
شما
چنین
طور
افراد
تمام
درباره
بار
بسیاری
می‌تواند
کرده
چون
ندارد
دوم
بزرگ
طی
حدود
همان
بدون
البته
آنان
می‌گوید
دیگری
خواهد_شد
کنیم
قابل
یعنی
رشد
می‌توان
وارد
کل
ویژه
قبل
براساس
نیاز
گذاری
هنوز
لازم
سازی
بوده_است
چرا
می‌شوند
وقتی
گرفت
کم
جای
حالی
تغییر
پیدا
اکنون
تحت
باعث
مدت
فقط
زیادی
تعداد
آیا
بیان
رو
شدند
عدم
کرده_اند
بودن
نوع
بلکه
جاری
دهد
برابر
مهم
بوده
اخیر
مربوط
امر
زیر
گیری
شاید
خصوص
آقای
اثر
کننده
بودند
فکر
کنار
اولین
سوم
سایر
کنید
ضمن
مانند
باز
می‌گیرد
ممکن
حل
دارای
پی
مثل
می‌رسد
اجرا
دور
منظور
کسی
موجب
طول
امکان
آنچه
تعیین
گفته
شوند
جمع
خیلی
علاوه
گونه
تاکنون
رسید
ساله
گرفته
شده_اند
علت
چهار
داشته_باشد
خواهد_بود
طرف
تهیه
تبدیل
مناسب
زیرا
مشخص
می‌توانند
نزدیک
جریان
روند
بنابراین
می‌دهند
یافت
نخستین
بالا
پنج
ریزی
عالی
چیزی
نخست
بیشتری
ترتیب
شده_بود
خاص
خوبی
خوب
شروع
فرد
کامل
غیر
می‌رود
دهند
آخرین
دادن
جدی
بهترین
شامل
گیرد
بخشی
باشند
تمامی
بهتر
داده_است
حد
نبود
کسانی
می‌کرد
داریم
علیه
می‌باشد
دانست
ناشی
داشتند
دهه
می‌شد
ایشان
آنجا
گرفته_است
دچار
می‌آید
لحاظ
آنکه
داده
بعضی
هستیم
اند
برداری
نباید
می‌کنیم
نشست
سهم
همیشه
آمد
اش
وگو
می‌کنم
حداقل
طبق
جا
خواهد_کرد
نوعی
چگونه
رفت
هنگام
فوق
روش
ندارند
سعی
بندی
شمار
کلی
کافی
مواجه
همچنان
زیاد
سمت
کوچک
داشته_است
چیز
پشت
آورد
حالا
روبه
سال‌های
دادند
می‌کردند
عهده
نیمه
جایی
دیگران
سی
بروز
یکدیگر
آمده_است
جز
کنم
سپس
کنندگان
خودش
همواره
یافته
شان
صرف
نمی‌شود
رسیدن
چهارم
یابد
متر
ساز
داشته
کرده_بود
باره
نحوه
کردم
تو
شخصی
داشته_باشند
محسوب
پخش
کمی
متفاوت
سراسر
کاملا
داشتن
نظیر
آمده
گروهی
فردی
ع
همچون
خطر
خویش
کدام
دسته
سبب
عین
آوری
متاسفانه
بیرون
دار
ابتدا
شش
افرادی
می‌گویند
سالهای
درون
نیستند
یافته_است
پر
خاطرنشان
گاه
جمعی
اغلب
دوباره
می‌یابد
لذا
زاده
گردد
اینجا

+ 57
- 5
src/espeak-ng.c View File

@@ -314,6 +314,8 @@ int main(int argc, char **argv)
static const struct option long_options[] = {
{ "help", no_argument, 0, 'h' },
{ "stdin", no_argument, 0, 0x100 },
{ "input", required_argument, 0, 0x113 },
{ "output", required_argument, 0, 0x114 },
{ "compile-debug", optional_argument, 0, 0x101 },
{ "compile", optional_argument, 0, 0x102 },
{ "punct", optional_argument, 0, 0x103 },
@@ -339,8 +341,10 @@ int main(int argc, char **argv)

FILE *f_text = NULL;
char *p_text = NULL;
FILE *f_phonemes_out = stdout;
FILE *f_phonemes_out = NULL;
char *data_path = NULL; // use default path for espeak-ng-data
char input_file[256] = {0};
char output_file[256] = {0}; // Add output file name variable

int option_index = 0;
int c;
@@ -593,6 +597,12 @@ int main(int argc, char **argv)
case 0x112: // --ssml-break
ssml_break = atoi(optarg2);
break;
case 0x113: // --input
strncpy0(input_file, optarg2, sizeof(input_file));
break;
case 0x114: // --output
strncpy0(output_file, optarg2, sizeof(output_file));
break;
default:
exit(0);
}
@@ -689,9 +699,54 @@ int main(int argc, char **argv)
if (option_punctuation == 2)
espeak_SetPunctuationList(option_punctlist);

// Open output file for phoneme output, first overwrite to clear previous content
if (output_file[0] == 0) {
strcpy(output_file, "output.txt"); // Default output file if none specified
}
if ((f_phonemes_out = fopen(output_file, "w")) == NULL) {
fprintf(stderr, "Can't write to output file: %s\n", output_file);
exit(EXIT_FAILURE);
}
fclose(f_phonemes_out); // Close after clearing
f_phonemes_out = NULL; // Set to NULL after closing

espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out);

if (filename[0] == 0) {
// Process input file if specified
if (input_file[0] != 0) {
f_text = fopen(input_file, "r");
if (f_text == NULL) {
fprintf(stderr, "Can't open input file: %s\n", input_file);
exit(EXIT_FAILURE);
}

char line[1000];
while (fgets(line, sizeof(line), f_text) != NULL) {
// Remove trailing newline
line[strcspn(line, "\n")] = 0;
// Process each line independently
if (line[0] != 0) { // Skip empty lines
// Reopen output file in append mode for each line
if (f_phonemes_out != NULL) {
fclose(f_phonemes_out);
f_phonemes_out = NULL;
}
if ((f_phonemes_out = fopen(output_file, "a")) == NULL) {
fprintf(stderr, "Can't append to output file: %s\n", output_file);
exit(EXIT_FAILURE);
}
espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out);
espeak_Synth(line, strlen(line)+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
espeak_ng_Synchronize();
fclose(f_phonemes_out);
f_phonemes_out = NULL;
}
}
fclose(f_text);
} else if (filename[0] == 0) {
if ((optind < argc) && (flag_stdin == 0)) {
// there's a non-option parameter, and no -f or --stdin
// use it as text
@@ -788,9 +843,6 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}

if (f_phonemes_out != stdout)
fclose(f_phonemes_out);

CloseWavFile();
espeak_ng_Terminate();
return 0;

+ 1
- 1
src/libespeak-ng/compiledict.c View File

@@ -566,7 +566,7 @@ static int compile_line(CompileContext *ctx, char *linebuf, char *dict_line, int

// PROBLEM vowel reductions are not applied to the translated phonemes
// condition rules are not applied
TranslateWord(translator, phonetic, NULL, NULL);
TranslateWord(translator, phonetic, NULL, NULL, NULL, NULL, 0);
text_not_phonemes = false;
strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4);


+ 1
- 1
src/libespeak-ng/dictionary.c View File

@@ -2882,7 +2882,7 @@ int Lookup(Translator *tr, const char *word, char *ph_out)
text[1] = ' ';
text[2] = ' ';
strncpy0(text+3, word1, sizeof(text)-3);
flags0 = TranslateWord(tr, text+3, NULL, NULL);
flags0 = TranslateWord(tr, text+3, NULL, NULL, NULL, NULL, 0);
strcpy(ph_out, word_phonemes);
option_sayas = say_as;
}

+ 1
- 1
src/libespeak-ng/numbers.c View File

@@ -715,7 +715,7 @@ static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB
// lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
nextflags = 0;
if (IsAlpha(c2))
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL);
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL, NULL, NULL, 0);

if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
ordinal = 0; // TEST 09.02.10

+ 1
- 1
src/libespeak-ng/setlengths.c View File

@@ -163,7 +163,7 @@ void SetSpeed(int control)

if (control & 2)
DoSonicSpeed(1 * 1024);
if ((wpm_value > espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) {
if ((wpm_value >= espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) {
int wpm2;
wpm2 = wpm;
wpm = espeakRATE_NORMAL;

+ 1
- 8
src/libespeak-ng/tr_languages.c View File

@@ -66,7 +66,7 @@ static const ALPHABET alphabets[] = {
{ "_el", OFFSET_GREEK, 0x380, 0x3ff, L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS },
{ "_cyr", OFFSET_CYRILLIC, 0x400, 0x52f, 0, 0 },
{ "_hy", OFFSET_ARMENIAN, 0x530, 0x58f, L('h', 'y'), AL_WORDS },
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, L('h', 'e'), 0 },
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, 0, 0 },
{ "_ar", OFFSET_ARABIC, 0x600, 0x6ff, 0, 0 },
{ "_syc", OFFSET_SYRIAC, 0x700, 0x74f, 0, 0 },
{ "_hi", OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS },
@@ -868,13 +868,6 @@ Translator *SelectTranslator(const char *name)
tr->langopts.ideographs = 1;
}
break;
case L('h','e'): // Hebrew
{
tr->langopts.param[LOPT_APOSTROPHE] = 2; // bit 1 Apostrophe at end of word is part of the word, for words like בָּגָאז׳
tr->langopts.stress_flags = S_NO_AUTO_2; // don't use secondary stress
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DFRACTION_2 | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND;
}
break;
case L('g', 'a'): // irish
case L('g', 'd'): // scots gaelic
{

+ 31
- 17
src/libespeak-ng/translate.c View File

@@ -46,7 +46,7 @@
#include "translateword.h"

static int CalcWordLength(int source_index, int charix_top, short int *charix, WORD_TAB *words, int word_count);
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes);
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count);
static void SwitchLanguage(char *word, char *word_phonemes);

Translator *translator = NULL; // the main translator
@@ -141,13 +141,13 @@ char *strchr_w(const char *s, int c)
return strchr((char *)s, c); // (char *) is needed for Borland compiler
}

int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out)
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, WORD_TAB words[], char sbuf[], int word_count)
{
char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes
char *phonemes = words_phonemes;


int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes));
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count);
if (flags & FLAG_TEXTMODE && word_out) {
// Ensure that start of word rules match with the replaced text,
// so that emoji and other characters are pronounced correctly.
@@ -173,7 +173,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
// However, dictionary_skipwords value is still needed outside this scope.
// So we backup and restore it at the end of this scope.
int skipwords = dictionary_skipwords;
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes));
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count);

int n;
if (first_word) {
@@ -186,7 +186,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
available -= n;
phonemes += n;

// skip to the next word in a multi-word replacement. Always skip at least one word.
// skip to the next word in a multi-word rplacement. Always skip at least one word.
for (dictionary_skipwords++; dictionary_skipwords > 0; dictionary_skipwords--) {
while (!isspace(*word_out)) ++word_out;
while (isspace(*word_out)) ++word_out;
@@ -299,7 +299,7 @@ int SetTranslator3(const char *new_language)
return SetAlternateTranslator(new_language, &translator3, translator3_language);
}

static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause)
static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause, WORD_TAB words [], char sbuf[], int word_count)
{
int flags = 0;
int stress;
@@ -401,7 +401,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
word_copy_len = ix;

word_replaced[2] = 0;
flags = TranslateWord(translator, word, wtab, &word_replaced[2]);
flags = TranslateWord(translator, word, wtab, &word_replaced[2], words, sbuf, word_count);

if (flags & FLAG_SPELLWORD) {
// re-translate the word as individual letters, separated by spaces
@@ -410,7 +410,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
}

if ((flags & FLAG_COMBINE) && !(wtab[1].flags & FLAG_PHONEMES)) {
CombineFlag(tr, wtab, word, &flags, p, word_phonemes);
CombineFlag(tr, wtab, word, &flags, p, word_phonemes, words, sbuf, word_count);
}

if (p[0] == phonSWITCH) {
@@ -433,9 +433,9 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
if (word_replaced[2] != 0) {
word_replaced[0] = 0; // byte before the start of the word
word_replaced[1] = ' ';
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL);
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL, words, sbuf, word_count);
} else
flags = TranslateWord(translator2, word, wtab, &word_replaced[2]);
flags = TranslateWord(translator2, word, wtab, &word_replaced[2], words, sbuf, word_count);
}

if (p[0] != phonSWITCH)
@@ -1489,6 +1489,20 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c
}
words[0].flags |= FLAG_FIRST_WORD;

// Add debug print header and print all words before processing
// fprintf(stderr, "\n=== Words in sentence ===\n");
// for (ix = 0; ix < word_count; ix++) {
// char word_copy[150];
// int word_len = 0;
// char *pw = &sbuf[words[ix].start];
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
// word_copy[word_len] = pw[word_len];
// word_len++;
// }
// word_copy[word_len] = 0;
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy);
// }

// Each TranslateWord2 may require up to 7 phonemes
// and after this loop we require 2 phonemes
for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) {
@@ -1590,7 +1604,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c

for (pw = &number_buf[3]; pw < pn && nw < N_CLAUSE_WORDS;) {
// keep wflags for each part, for FLAG_HYPHEN_AFTER
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause);
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause, words, sbuf, word_count);
while (pw < pn && *pw++ != ' ')
;
words[ix].pre_pause = 0;
@@ -1598,7 +1612,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c
} else {
pre_pause = 0;

dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause);
dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause, words, sbuf, word_count);

if (pre_pause > words[ix+1].pre_pause) {
words[ix+1].pre_pause = pre_pause;
@@ -1612,7 +1626,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c
memset(number_buf+1, ' ', 9);
nx = utf8_in(&c_temp, pw);
memcpy(&number_buf[3], pw, nx);
TranslateWord2(tr, &number_buf[3], &words[ix], 0);
TranslateWord2(tr, &number_buf[3], &words[ix], 0, words, sbuf, word_count);
pw += nx;
}
}
@@ -1696,7 +1710,7 @@ static int CalcWordLength(int source_index, int charix_top, short int *charix, W
return k;
}

static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes) {
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count) {
// combine a preposition with the following word


@@ -1724,7 +1738,7 @@ static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags,
char ph_buf[N_WORD_PHONEMES];
strcpy(ph_buf, word_phonemes);

flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL);
flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL, words, sbuf, word_count);
if ((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH))
ok = false;

@@ -1745,11 +1759,11 @@ static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags,
if (ok) {
*p2 = '-'; // replace next space by hyphen
wtab[0].flags &= ~FLAG_ALL_UPPER; // prevent it being considered an abbreviation
*flags = TranslateWord(translator, word, wtab, NULL); // translate the combined word
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); // translate the combined word
if ((sylimit > 0) && (CountSyllables(p) > (sylimit & 0x1f))) {
// revert to separate words
*p2 = ' ';
*flags = TranslateWord(translator, word, wtab, NULL);
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count);
} else {
if (*flags == 0)
*flags = flags2[0]; // no flags for the combined word, so use flags from the second word eg. lang-hu "nem december 7-e"

+ 1
- 1
src/libespeak-ng/translate.h View File

@@ -667,7 +667,7 @@ void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts);

void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len);

int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out);
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out, WORD_TAB *words, char sbuf [], int word_count);
void TranslateClause(Translator *tr, int *tone, char **voice_change);
void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_change, int *terminator_out);


+ 433
- 2
src/libespeak-ng/translateword.c View File

@@ -1,4 +1,3 @@

/*
* Copyright (C) 2005 to 2014 by Jonathan Duddington
* email: [email protected]
@@ -43,7 +42,402 @@
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable
#include "ucd/ucd.h" // for ucd_toupper
#include "voice.h" // for voice, voice_t
#include "speech.h" // for path_home

// Add JSON parsing headers
#include <json-c/json.h>

// Global variables for homographs
static char **homographs_list = NULL;
static int homographs_count = 0;
static json_object *homograph_data = NULL;

// Add these near the other global variables at the top
static char **stopwords_list = NULL;
static int stopwords_count = 0;

static void LoadHomographData(void)
{
char path[256];
json_object *root = NULL;
// Get the path to dataset.json
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cdataset.json", path_home, PATHSEP, PATHSEP);
root = json_object_from_file("/content/espeak-ng/espeak-ng-data/dataset.json");
if (root == NULL) {
fprintf(stderr, "Failed to load homograph data from %s\n", path);
return;
}
homograph_data = root;
// Print a sample entry for debugging
// json_object *sample_word = json_object_object_get(root, "read");
// if (sample_word != NULL) {
// fprintf(stderr, "Sample homograph data for 'read':\n");
// json_object_object_foreach(sample_word, key, val) {
// fprintf(stderr, " Pronunciation: %s\n", key);
// fprintf(stderr, " Context words: ");
// if (json_object_get_type(val) == json_type_array) {
// int array_len = json_object_array_length(val);
// for (int i = 0; i < array_len; i++) {
// json_object *item = json_object_array_get_idx(val, i);
// fprintf(stderr, "%s ", json_object_get_string(item));
// }
// }
// fprintf(stderr, "\n");
// }
// }
}

static void LoadHomographs(void)
{
FILE *f;
char path[256];
char line[256];
int count = 0;
int i = 0;

// Get the path to homographs.txt
snprintf(path, sizeof(path), "%s%cespeak-ng-data%chomographs.txt", path_home, PATHSEP, PATHSEP);

f = fopen("/content/espeak-ng/espeak-ng-data/homographs.txt", "r");
if (f == NULL) {
fprintf(stderr, "Failed to open homographs.txt\n");
return;
}

// First count the number of lines
while (fgets(line, sizeof(line), f) != NULL) {
count++;
}

// Allocate memory for the list
homographs_list = (char **)malloc(count * sizeof(char *));
if (homographs_list == NULL) {
fclose(f);
return;
}

// Rewind and read the file
rewind(f);
while (fgets(line, sizeof(line), f) != NULL) {
// Remove newline
line[strcspn(line, "\n")] = 0;
homographs_list[i] = strdup(line);
if (homographs_list[i] == NULL) {
// Cleanup on error
for (int j = 0; j < i; j++) {
free(homographs_list[j]);
}
free(homographs_list);
homographs_list = NULL;
fclose(f);
return;
}
i++;
}

homographs_count = count;
fclose(f);
}

static bool IsHomograph(const char *word)
{
if (homographs_list == NULL) {
LoadHomographs();
}

if (homographs_list == NULL) {
return false;
}

for (int i = 0; i < homographs_count; i++) {
if (strcmp(word, homographs_list[i]) == 0) {
return true;
}
}

return false;
}

static void LoadStopwords(void)
{
FILE *f;
char path[256];
char line[256];
int count = 0;
int i = 0;

// Get the path to stopwords.dat
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cstopwords.dat", path_home, PATHSEP, PATHSEP);

f = fopen("/content/espeak-ng/espeak-ng-data/stopwords.dat", "r");
if (f == NULL) {
fprintf(stderr, "Failed to open stopwords.dat\n");
return;
}

// First count the number of lines
while (fgets(line, sizeof(line), f) != NULL) {
count++;
}

// Allocate memory for the list
stopwords_list = (char **)malloc(count * sizeof(char *));
if (stopwords_list == NULL) {
fclose(f);
return;
}

// Rewind and read the file
rewind(f);
while (fgets(line, sizeof(line), f) != NULL) {
// Remove newline
line[strcspn(line, "\n")] = 0;
stopwords_list[i] = strdup(line);
if (stopwords_list[i] == NULL) {
// Cleanup on error
for (int j = 0; j < i; j++) {
free(stopwords_list[j]);
}
free(stopwords_list);
stopwords_list = NULL;
fclose(f);
return;
}
i++;
}

stopwords_count = count;
fclose(f);
}

static bool IsStopword(const char *word)
{
if (stopwords_list == NULL) {
LoadStopwords();
}

if (stopwords_list == NULL) {
return false;
}

for (int i = 0; i < stopwords_count; i++) {
if (strcmp(word, stopwords_list[i]) == 0) {
return true;
}
}

return false;
}

// Function to generate phonemes for homograph words
static void GenerateHomographPhonemes(const char *word, char *phonemes, WORD_TAB words[], char sbuf[], int word_count) {
if (homograph_data == NULL) {
LoadHomographData();
}
// fprintf(stderr, "\n=== Homograph Processing ===\n");
// fprintf(stderr, "Looking up word: '%s'\n", word);
// Get the word data from homograph dictionary
json_object *word_data = json_object_object_get(homograph_data, word);
if (word_data == NULL) {
fprintf(stderr, "Word not found in homograph dictionary\n");
// Word not found in homograph dictionary, use word's characters as phonemes
int i = 0;
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) {
phonemes[i] = PhonemeCode(word[i]);
i++;
}
phonemes[i] = 0;
return;
}

// fprintf(stderr, "\nFound homograph '%s' with pronunciations:\n", word);
struct json_object_iterator it = json_object_iter_begin(word_data);
struct json_object_iterator itEnd = json_object_iter_end(word_data);
while (!json_object_iter_equal(&it, &itEnd)) {
const char *debug_pron_key = json_object_iter_peek_name(&it);
json_object *debug_pron_val = json_object_iter_peek_value(&it);
// fprintf(stderr, " Pronunciation: %s\n", debug_pron_key);
// fprintf(stderr, " Context words: ");
if (json_object_get_type(debug_pron_val) == json_type_array) {
int array_len = json_object_array_length(debug_pron_val);
for (int i = 0; i < array_len; i++) {
json_object *item = json_object_array_get_idx(debug_pron_val, i);
// fprintf(stderr, "%s ", json_object_get_string(item));
}
}
fprintf(stderr, "\n");
json_object_iter_next(&it);
}
fprintf(stderr, "\n");

// Count context word frequencies
int context_counts[256] = {0}; // Assuming max 256 unique context words
char *context_words[256] = {0};
int num_context_words = 0;

// fprintf(stderr, "\n=== Context Words ===\n");
// Process context words
for (int i = 0; i < word_count; i++) {
char word_copy[150];
int word_len = 0;
char *pw = &sbuf[words[i].start];
// Extract the word
while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
word_copy[word_len] = pw[word_len];
word_len++;
}
word_copy[word_len] = 0;
// Skip if it's the target word, too short, or a stopword
if (word_len <= 1 || strcmp(word_copy, word) == 0 || IsStopword(word_copy)) {
continue;
}

// fprintf(stderr, "Word %d: '%s'\n", i + 1, word_copy);

// Check if we've seen this word before
int found = 0;
for (int j = 0; j < num_context_words; j++) {
if (strcmp(context_words[j], word_copy) == 0) {
context_counts[j]++;
found = 1;
break;
}
}

// Add new word if not found
if (!found && num_context_words < 255) {
context_words[num_context_words] = strdup(word_copy);
if (context_words[num_context_words] == NULL) {
fprintf(stderr, "Warning: Failed to allocate memory for context word\n");
continue;
}
context_counts[num_context_words] = 1;
num_context_words++;
}
}

// Find best pronunciation
const char *best_phoneme = NULL;
double max_normalized_score = -1;
int max_raw_overlap = 0;

// fprintf(stderr, "\nEvaluating pronunciations:\n");
// Iterate through each pronunciation option
json_object_object_foreach(word_data, pron_key, pron_val) {
if (json_object_get_type(pron_val) != json_type_array) {
continue;
}

// fprintf(stderr, "\nEvaluating pronunciation: %s\n", pron_key);

// Count word frequencies in this pronunciation's associated words
int phoneme_word_counts[256] = {0};
int total_phoneme_words = 0;
int array_len = json_object_array_length(pron_val);
for (int i = 0; i < array_len; i++) {
json_object *item = json_object_array_get_idx(pron_val, i);
if (item == NULL) {
fprintf(stderr, " Warning: NULL item at index %d\n", i);
continue;
}
const char *assoc_word = json_object_get_string(item);
if (assoc_word == NULL) {
fprintf(stderr, " Warning: NULL string at index %d\n", i);
continue;
}
// fprintf(stderr, " Processing associated word[%d]: '%s' (length: %zu)\n",
// i, assoc_word, strlen(assoc_word));
// Count occurrences of this associated word
for (int j = 0; j < num_context_words; j++) {
if (context_words[j] == NULL) {
fprintf(stderr, " Warning: NULL context word at index %d\n", j);
continue;
}
// fprintf(stderr, " Comparing with context word[%d]: '%s' (length: %zu)\n",
// j, context_words[j], strlen(context_words[j]));
if (strcmp(context_words[j], assoc_word) == 0) {
phoneme_word_counts[j]++;
// fprintf(stderr, " Match found! New count for word '%s': %d\n",
// context_words[j], phoneme_word_counts[j]);
}
}
total_phoneme_words++;
}

// Calculate weighted overlap
int weighted_overlap = 0;
// fprintf(stderr, " Calculating weighted overlap:\n");
for (int i = 0; i < num_context_words; i++) {
if (context_words[i] == NULL) continue;
weighted_overlap += context_counts[i] * phoneme_word_counts[i];
// fprintf(stderr, " Word '%s': count=%d, matches=%d, contribution=%d\n",
// context_words[i], context_counts[i], phoneme_word_counts[i],
// context_counts[i] * phoneme_word_counts[i]);
}
// fprintf(stderr, " Total weighted overlap: %d\n", weighted_overlap);

// Calculate normalized score
double normalized_score = (total_phoneme_words > 0) ?
(double)weighted_overlap / total_phoneme_words : 0.0;

// fprintf(stderr, " Raw overlap: %d\n", weighted_overlap);
// fprintf(stderr, " Normalized score: %.2f\n", normalized_score);

// Select best phoneme
if (normalized_score > max_normalized_score) {
max_normalized_score = normalized_score;
max_raw_overlap = weighted_overlap;
best_phoneme = pron_key;
// fprintf(stderr, " New best pronunciation selected!\n");
} else if (normalized_score == max_normalized_score) {
// Tiebreaker: prefer the phoneme with higher raw overlap
if (weighted_overlap > max_raw_overlap) {
max_raw_overlap = weighted_overlap;
best_phoneme = pron_key;
// fprintf(stderr, " New best pronunciation selected (tiebreaker)!\n");
}
}
}

// fprintf(stderr, "\nFinal selection:\n");
// fprintf(stderr, "Selected pronunciation: %s\n", best_phoneme ? best_phoneme : "none");
// fprintf(stderr, "Final score: %.2f\n", max_normalized_score);
// fprintf(stderr, "Final raw overlap: %d\n", max_raw_overlap);

// Copy the best phoneme to output
if (best_phoneme != NULL) {
int i = 0;
while (best_phoneme[i] != 0 && i < N_WORD_PHONEMES - 1) {
phonemes[i] = PhonemeCode(best_phoneme[i]);
i++;
}
phonemes[i] = 0;
} else {
// No suitable pronunciation found, use word's characters as phonemes
int i = 0;
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) {
phonemes[i] = PhonemeCode(word[i]);
i++;
}
phonemes[i] = 0;
}
}

static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
@@ -55,7 +449,7 @@ static int TranslateLetter(Translator *tr, char *word, char *phonemes, int contr
static int Unpronouncable(Translator *tr, char *word, int posn);
static int Unpronouncable2(Translator *tr, char *word);

int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count)
{
// word1 is terminated by space (0x20) character

@@ -98,6 +492,34 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
int add_suffix_phonemes = 0;
WORD_TAB wtab_null[8];

// Debug print the word being processed
char wordbuf[120];
unsigned int ix2;
for (ix2 = 0; ((c_temp = word_start[ix2]) != ' ') && (c_temp != 0) && (ix2 < (sizeof(wordbuf)-1)); ix2++)
wordbuf[ix2] = c_temp;
wordbuf[ix2] = 0;
// fprintf(stderr, "Processing word: '%s'\n", wordbuf);

// Add debug print header and print all words before processing
// fprintf(stderr, "\n=== Context Words ===\n");
// for (ix = 0; ix < word_count; ix++) {
// char word_copy[150];
// int word_len = 0;
// char *pw = &sbuf[words[ix].start];
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
// word_copy[word_len] = pw[word_len];
// word_len++;
// }
// word_copy[word_len] = 0;
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy);
// }

// Check if the word is a homograph
if (IsHomograph(wordbuf)) {
GenerateHomographPhonemes(wordbuf, word_phonemes, words, sbuf, word_count);
return dictionary_flags[0]; // Return early with current dictionary flags
}

if (wtab == NULL) {
memset(wtab_null, 0, sizeof(wtab_null));
wtab = wtab_null;
@@ -201,6 +623,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_

if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -257,6 +680,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces
return 0;
}
strcpy(word_phonemes, phonemes);
if (wflags & FLAG_TRANSLATOR2)
return 0;
@@ -314,6 +738,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_

if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -326,6 +751,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) {
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL)
return 0;
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -430,6 +856,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
wordx[-1] = c_temp;
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -456,6 +883,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -474,6 +902,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -512,6 +941,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_

if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
memcpy(wordx, word_copy, strlen(word_copy));
wordx[-1] = c_temp;
@@ -763,6 +1193,7 @@ static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes,
non_initial = true;
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
return NULL;
}

+ 1
- 1
src/libespeak-ng/translateword.h View File

@@ -30,7 +30,7 @@ extern "C"
{
#endif

int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes);
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count);

#ifdef __cplusplus
}

Loading…
Cancel
Save