2 weeks ago · 5d124160f7
--- a/Makefile.am
+++ b/Makefile.am
 lib_LTLIBRARIES += src/libespeak-ng.la
 src_libespeak_ng_la_LDFLAGS = -version-info $(SHARED_VERSION) -lpthread -lm \
 	${PCAUDIOLIB_LIBS}
 	${PCAUDIOLIB_LIBS} ${JSONC_LIBS}
 src_libespeak_ng_la_CFLAGS = \
 	-fPIC -fvisibility=hidden \
 	-pedantic -fno-exceptions -DPATH_ESPEAK_DATA=\"$(DATADIR)\" -DLIBESPEAK_NG_EXPORT \
 	${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${AM_CFLAGS}
 	${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${JSONC_CFLAGS} ${AM_CFLAGS}
 src_libespeak_ng_la_SOURCES = \
 	src/ucd-tools/src/case.c \
--- a/configure.ac
+++ b/configure.ac
 AC_CHECK_FUNCS([strrchr])
 AC_CHECK_FUNCS([strstr])
 dnl ================================================================
 dnl JSON-C library checks.
 dnl ================================================================
 AC_CHECK_HEADERS([json-c/json.h],
    [
        have_jsonc=yes
        JSONC_CFLAGS=-DUSE_JSONC=1
        AC_CHECK_LIB([json-c], [json_object_from_file],
            [
                JSONC_LIBS=-ljson-c
            ],
            [
                have_jsonc=no
                AC_MSG_ERROR([json-c library is required for homograph support])
            ]
        )
    ],
    [
        have_jsonc=no
        AC_MSG_ERROR([json-c library is required for homograph support])
    ]
 )
 AC_SUBST(JSONC_CFLAGS)
 AC_SUBST(JSONC_LIBS)
 dnl ================================================================
 dnl PCAudioLib checks.
 dnl ================================================================
--- a/espeak-ng-data/dataset.json
+++ b/espeak-ng-data/dataset.json
--- a/espeak-ng-data/homographs.txt
+++ b/espeak-ng-data/homographs.txt
 رو
 ترک
 دور
 فک
 راند
 دم
 ترکان
 گرد
 جلد
 پر
 گردان
 عمر
 حل
 رد
 رود
 کن
 هل
 اره
 سرو
 تن
 کش
 رس
 کشت
 نقل
 رم
 گرداند
 کل
 سر
 گنج
 تو
 برنج
 قل
 کرد
 پی
 رب
 تاباند
 شو
 شکر
 شست
 خرد
 برند
 اعمال
 کرم
 برنده
 دوره
 نشست
 هری
 جنب
 ملا
 بعد
 شش
 طبق
 مد
 بری
 برم
 شوید
 چسباند
 تست
 روی
 ولو
 خواند
 سمت
 کنده
 گنگ
 بکن
 درهم
 شناساند
 یاس
 معبر
 مهم
 مردم
 خوراند
 جست
 بنا
 ترکاند
 ملاک
 پلاس
 فوت
 کت
 برده
 گله
 بده
 چرخاند
 کندی
 خلق
 چک
 بخار
 نزد
 چپاند
 اجر
 شان
 رسم
 شنو
 خواباند
 نرم
 رساند
 لمباند
 میدانی
 دود
 دواند
 جدا
 معین
 رسی
 جو
 فرانک
 هلیم
 بردار
 شرف
 کشند
 گزید
 مقدم
 زهره
 سری
 کیف
 کشیم
 پژمرد
 مجاز
 لرزاند
 نشاند
 نشسته
 جرم
 خورد
 علم
 جوشاند
 برگرداند
 سوزاند
 بره
 امین
 لب
 اه
 آرم
 گنجاند
 قطعه
 فراری
 لغز
 تنگ
 عرضه
 بدو
 صفر
 المان
 برید
 فرق
 محقق
 پیچاند
 پرورد
 کشتی
 مسلم
 روم
 پرش
 نصاب
 تکه
 جوید
 پراند
 وسطی
 کارد
 سپرد
 افشاند
 مراجع
 خرم
 کرات
 نمونه
 سند
 کند
 بر
 سنت
 شبه
 ابر
 اپل
 درد
 خیر
 خاراند
 ابی
 مبدل
 مو
 جنگ
 سرم
 قدم
 مایل
 اسکی
 نسبی
 گذراند
 بخور
 صرف
 قسم
 گرده
 مبلغ
 حسن
 کره
 عود
 چشم
 خلف
 حقه
 شل
 ادبا
 برس
 گرم
 سحر
 سنی
 ببر
 فهماند
 اوا
 اشکال
 بین
 سرور
 مفصل
 فرار
 سبک
 تپاند
 سیر
 پهن
 مهر
 درک
 چلاند
 رنجاند
 مشرف
 کمی
 فن
 مقطع
 پوشاند
 عرفا
 بدهی
 شنود
 قطر
 شهرت
 سپر
 رحم
 حلال
 دوران
 ترساند
 پروراند
 رویه
 آورد
 کلفت
 تکاند
 گریاند
 کابل
 شهره
 رهاند
 دین
 عقبی
 لنگ
 مصر
 پرس
 منکر
 قمری
 امل
 خفت
 سمبل
 کنه
 عالم
 bedah (فعل)
 تند
 محرم
 عرق
 خیساند
 لی‌لی
 کشاند
 یمن
 شدید
 قوت
 اشراف
 نبرد
 کر
 شما
 گریم
 شوم
 جور
 خنداند
 اشغال
 علی
 مسکن
 مثل
 نفس
 ارایه
 دوم
 دهم
 سپرم
 نکن
 ده
 ماند
 به
 گل
 ور
 مرد
 پست
 کنف
 qet؟e
 شبه 
 شوم 
 persian word
--- a/espeak-ng-data/lang/zle/ru-cl
+++ b/espeak-ng-data/lang/zle/ru-cl
 language ru-cl
 replace 03 a a#
 dict_min  20000
 speed 95
 dictrules 3
--- a/espeak-ng-data/stopwords.dat
+++ b/espeak-ng-data/stopwords.dat
 و
 در
 به
 از
 که
 این
 را
 با
 است
 برای
 آن
 یک
 خود
 تا
 کرد
 بر
 هم
 نیز
 گفت
 می‌شود
 وی
 شد
 دارد
 ما
 اما
 یا
 شده
 باید
 هر
 آنها
 بود
 او
 دیگر
 دو
 مورد
 می‌کند
 شود
 کند
 وجود
 بین
 پیش
 شده_است
 پس
 نظر
 اگر
 همه
 یکی
 حال
 هستند
 من
 کنند
 نیست
 باشد
 چه
 بی
 می
 بخش
 می‌کنند
 همین
 افزود
 هایی
 دارند
 راه
 همچنین
 روی
 داد
 بیشتر
 بسیار
 سه
 داشت
 چند
 سوی
 تنها
 هیچ
 میان
 اینکه
 شدن
 بعد
 جدید
 ولی
 حتی
 کردن
 برخی
 کردند
 می‌دهد
 اول
 نه
 کرده_است
 نسبت
 بیش
 شما
 چنین
 طور
 افراد
 تمام
 درباره
 بار
 بسیاری
 می‌تواند
 کرده
 چون
 ندارد
 دوم
 بزرگ
 طی
 حدود
 همان
 بدون
 البته
 آنان
 می‌گوید
 دیگری
 خواهد_شد
 کنیم
 قابل
 یعنی
 رشد
 می‌توان
 وارد
 کل
 ویژه
 قبل
 براساس
 نیاز
 گذاری
 هنوز
 لازم
 سازی
 بوده_است
 چرا
 می‌شوند
 وقتی
 گرفت
 کم
 جای
 حالی
 تغییر
 پیدا
 اکنون
 تحت
 باعث
 مدت
 فقط
 زیادی
 تعداد
 آیا
 بیان
 رو
 شدند
 عدم
 کرده_اند
 بودن
 نوع
 بلکه
 جاری
 دهد
 برابر
 مهم
 بوده
 اخیر
 مربوط
 امر
 زیر
 گیری
 شاید
 خصوص
 آقای
 اثر
 کننده
 بودند
 فکر
 کنار
 اولین
 سوم
 سایر
 کنید
 ضمن
 مانند
 باز
 می‌گیرد
 ممکن
 حل
 دارای
 پی
 مثل
 می‌رسد
 اجرا
 دور
 منظور
 کسی
 موجب
 طول
 امکان
 آنچه
 تعیین
 گفته
 شوند
 جمع
 خیلی
 علاوه
 گونه
 تاکنون
 رسید
 ساله
 گرفته
 شده_اند
 علت
 چهار
 داشته_باشد
 خواهد_بود
 طرف
 تهیه
 تبدیل
 مناسب
 زیرا
 مشخص
 می‌توانند
 نزدیک
 جریان
 روند
 بنابراین
 می‌دهند
 یافت
 نخستین
 بالا
 پنج
 ریزی
 عالی
 چیزی
 نخست
 بیشتری
 ترتیب
 شده_بود
 خاص
 خوبی
 خوب
 شروع
 فرد
 کامل
 غیر
 می‌رود
 دهند
 آخرین
 دادن
 جدی
 بهترین
 شامل
 گیرد
 بخشی
 باشند
 تمامی
 بهتر
 داده_است
 حد
 نبود
 کسانی
 می‌کرد
 داریم
 علیه
 می‌باشد
 دانست
 ناشی
 داشتند
 دهه
 می‌شد
 ایشان
 آنجا
 گرفته_است
 دچار
 می‌آید
 لحاظ
 آنکه
 داده
 بعضی
 هستیم
 اند
 برداری
 نباید
 می‌کنیم
 نشست
 سهم
 همیشه
 آمد
 اش
 وگو
 می‌کنم
 حداقل
 طبق
 جا
 خواهد_کرد
 نوعی
 چگونه
 رفت
 هنگام
 فوق
 روش
 ندارند
 سعی
 بندی
 شمار
 کلی
 کافی
 مواجه
 همچنان
 زیاد
 سمت
 کوچک
 داشته_است
 چیز
 پشت
 آورد
 حالا
 روبه
 سال‌های
 دادند
 می‌کردند
 عهده
 نیمه
 جایی
 دیگران
 سی
 بروز
 یکدیگر
 آمده_است
 جز
 کنم
 سپس
 کنندگان
 خودش
 همواره
 یافته
 شان
 صرف
 نمی‌شود
 رسیدن
 چهارم
 یابد
 متر
 ساز
 داشته
 کرده_بود
 باره
 نحوه
 کردم
 تو
 شخصی
 داشته_باشند
 محسوب
 پخش
 کمی
 متفاوت
 سراسر
 کاملا
 داشتن
 نظیر
 آمده
 گروهی
 فردی
 ع
 همچون
 خطر
 خویش
 کدام
 دسته
 سبب
 عین
 آوری
 متاسفانه
 بیرون
 دار
 ابتدا
 شش
 افرادی
 می‌گویند
 سالهای
 درون
 نیستند
 یافته_است
 پر
 خاطرنشان
 گاه
 جمعی
 اغلب
 دوباره
 می‌یابد
 لذا
 زاده
 گردد
 اینجا
--- a/src/espeak-ng.c
+++ b/src/espeak-ng.c
 	static const struct option long_options[] = {
 		{ "help",    no_argument,       0, 'h' },
 		{ "stdin",   no_argument,       0, 0x100 },
 		{ "input",   required_argument, 0, 0x113 },
 		{ "output",  required_argument, 0, 0x114 },
 		{ "compile-debug", optional_argument, 0, 0x101 },
 		{ "compile", optional_argument, 0, 0x102 },
 		{ "punct",   optional_argument, 0, 0x103 },
 	FILE *f_text = NULL;
 	char *p_text = NULL;
 	FILE *f_phonemes_out = stdout;
 	FILE *f_phonemes_out = NULL;
 	char *data_path = NULL; // use default path for espeak-ng-data
 	char input_file[256] = {0};
 	char output_file[256] = {0};  // Add output file name variable
 	int option_index = 0;
 	int c;
 		case 0x112: // --ssml-break
 			ssml_break = atoi(optarg2);
 			break;
 		case 0x113: // --input
 			strncpy0(input_file, optarg2, sizeof(input_file));
 			break;
 		case 0x114: // --output
 			strncpy0(output_file, optarg2, sizeof(output_file));
 			break;
 		default:
 			exit(0);
 		}
 	if (option_punctuation == 2)
 		espeak_SetPunctuationList(option_punctlist);
 	// Open output file for phoneme output, first overwrite to clear previous content
 	if (output_file[0] == 0) {
 		strcpy(output_file, "output.txt");  // Default output file if none specified
 	}
 	if ((f_phonemes_out = fopen(output_file, "w")) == NULL) {
 		fprintf(stderr, "Can't write to output file: %s\n", output_file);
 		exit(EXIT_FAILURE);
 	}
 	fclose(f_phonemes_out);  // Close after clearing
 	f_phonemes_out = NULL;  // Set to NULL after closing
 	espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out);
 	if (filename[0] == 0) {
 	// Process input file if specified
 	if (input_file[0] != 0) {
 		f_text = fopen(input_file, "r");
 		if (f_text == NULL) {
 			fprintf(stderr, "Can't open input file: %s\n", input_file);
 			exit(EXIT_FAILURE);
 		}
 		char line[1000];
 		while (fgets(line, sizeof(line), f_text) != NULL) {
 			// Remove trailing newline
 			line[strcspn(line, "\n")] = 0;
 			// Process each line independently
 			if (line[0] != 0) {  // Skip empty lines
 				// Reopen output file in append mode for each line
 				if (f_phonemes_out != NULL) {
 					fclose(f_phonemes_out);
 					f_phonemes_out = NULL;
 				}
 				if ((f_phonemes_out = fopen(output_file, "a")) == NULL) {
 					fprintf(stderr, "Can't append to output file: %s\n", output_file);
 					exit(EXIT_FAILURE);
 				}
 				espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out);
 				espeak_Synth(line, strlen(line)+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL);
 				espeak_ng_Synchronize();
 				fclose(f_phonemes_out);
 				f_phonemes_out = NULL;
 			}
 		}
 		fclose(f_text);
 	} else if (filename[0] == 0) {
 		if ((optind < argc) && (flag_stdin == 0)) {
 			// there's a non-option parameter, and no -f or --stdin
 			// use it as text
 		exit(EXIT_FAILURE);
 	}
 	if (f_phonemes_out != stdout)
 		fclose(f_phonemes_out);
 	CloseWavFile();
 	espeak_ng_Terminate();
 	return 0;
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
 			// PROBLEM  vowel reductions are not applied to the translated phonemes
 			// condition rules are not applied
 			TranslateWord(translator, phonetic, NULL, NULL);
 			TranslateWord(translator, phonetic, NULL, NULL, NULL, NULL, 0);
 			text_not_phonemes = false;
 			strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4);
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
 		text[1] = ' ';
 		text[2] = ' ';
 		strncpy0(text+3, word1, sizeof(text)-3);
 		flags0 = TranslateWord(tr, text+3, NULL, NULL);
 		flags0 = TranslateWord(tr, text+3, NULL, NULL, NULL, NULL, 0);
 		strcpy(ph_out, word_phonemes);
 		option_sayas = say_as;
 	}
--- a/src/libespeak-ng/numbers.c
+++ b/src/libespeak-ng/numbers.c
 					// lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix.
 					nextflags = 0;
 					if (IsAlpha(c2))
 						nextflags = TranslateWord(tr, &word_end[2], NULL, NULL);
 						nextflags = TranslateWord(tr, &word_end[2], NULL, NULL, NULL, NULL, 0);
 					if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2)))
 						ordinal = 0; // TEST  09.02.10
--- a/src/libespeak-ng/setlengths.c
+++ b/src/libespeak-ng/setlengths.c
 	if (control & 2)
 		DoSonicSpeed(1 * 1024);
 	if ((wpm_value > espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) {
 	if ((wpm_value >= espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) {
 		int wpm2;
 		wpm2 = wpm;
 		wpm = espeakRATE_NORMAL;
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
 	{ "_el",    OFFSET_GREEK,    0x380, 0x3ff,  L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS },
 	{ "_cyr",   OFFSET_CYRILLIC, 0x400, 0x52f,  0, 0 },
 	{ "_hy",    OFFSET_ARMENIAN, 0x530, 0x58f,  L('h', 'y'), AL_WORDS },
 	{ "_he",    OFFSET_HEBREW,   0x590, 0x5ff,  L('h', 'e'), 0 },
 	{ "_he",    OFFSET_HEBREW,   0x590, 0x5ff,  0, 0 },
 	{ "_ar",    OFFSET_ARABIC,   0x600, 0x6ff,  0, 0 },
 	{ "_syc",   OFFSET_SYRIAC,   0x700, 0x74f,  0, 0 },
 	{ "_hi",    OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS },
        tr->langopts.ideographs = 1;
    }
        break;
 	case L('h','e'): // Hebrew
 	{
 		tr->langopts.param[LOPT_APOSTROPHE] = 2; // bit 1  Apostrophe at end of word is part of the word, for words like בָּגָאז׳
 		tr->langopts.stress_flags = S_NO_AUTO_2; // don't use secondary stress
 		tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DFRACTION_2 | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND;
 	}
 		break;
 	case L('g', 'a'): // irish
 	case L('g', 'd'): // scots gaelic
 	{
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
 #include "translateword.h"
 static int CalcWordLength(int source_index, int charix_top, short int *charix, WORD_TAB *words, int word_count);
 static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes);
 static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count);
 static void SwitchLanguage(char *word, char *word_phonemes);
 Translator *translator = NULL; // the main translator
 	return strchr((char *)s, c); // (char *) is needed for Borland compiler
 }
 int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out)
 int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, WORD_TAB words[], char sbuf[], int word_count)
 {
 	char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes
 	char *phonemes = words_phonemes;
 	int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes));
 	int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count);
 	if (flags & FLAG_TEXTMODE && word_out) {
 		// Ensure that start of word rules match with the replaced text,
 		// so that emoji and other characters are pronounced correctly.
 			// However, dictionary_skipwords value is still needed outside this scope.
 			// So we backup and restore it at the end of this scope.
 			int skipwords = dictionary_skipwords;
 			TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes));
 			TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count);
 			int n;
 			if (first_word) {
 			available -= n;
 			phonemes += n;
 			// skip to the next word in a multi-word replacement. Always skip at least one word.
 			// skip to the next word in a multi-word rplacement. Always skip at least one word.
 			for (dictionary_skipwords++; dictionary_skipwords > 0; dictionary_skipwords--) {
 				while (!isspace(*word_out)) ++word_out;
 				while (isspace(*word_out))  ++word_out;
 	return SetAlternateTranslator(new_language, &translator3, translator3_language);
 }
 static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause)
 static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause, WORD_TAB words [], char sbuf[], int word_count)
 {
 	int flags = 0;
 	int stress;
 		word_copy_len = ix;
 		word_replaced[2] = 0;
 		flags = TranslateWord(translator, word, wtab, &word_replaced[2]);
 		flags = TranslateWord(translator, word, wtab, &word_replaced[2], words, sbuf, word_count);
 		if (flags & FLAG_SPELLWORD) {
 			// re-translate the word as individual letters, separated by spaces
 		}
 		if ((flags & FLAG_COMBINE) && !(wtab[1].flags & FLAG_PHONEMES)) {
 			CombineFlag(tr, wtab, word, &flags, p, word_phonemes);
 			CombineFlag(tr, wtab, word, &flags, p, word_phonemes, words, sbuf, word_count);
 		}
 		if (p[0] == phonSWITCH) {
 					if (word_replaced[2] != 0) {
 						word_replaced[0] = 0; // byte before the start of the word
 						word_replaced[1] = ' ';
 						flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL);
 						flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL, words, sbuf, word_count);
 					} else
 						flags = TranslateWord(translator2, word, wtab, &word_replaced[2]);
 						flags = TranslateWord(translator2, word, wtab, &word_replaced[2], words, sbuf, word_count);
 				}
 				if (p[0] != phonSWITCH)
 	}
 	words[0].flags |= FLAG_FIRST_WORD;
 	// Add debug print header and print all words before processing
 	// fprintf(stderr, "\n=== Words in sentence ===\n");
 	// for (ix = 0; ix < word_count; ix++) {
 	// 	char word_copy[150];
 	// 	int word_len = 0;
 	// 	char *pw = &sbuf[words[ix].start];
 	// 	while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
 	// 		word_copy[word_len] = pw[word_len];
 	// 		word_len++;
 	// 	}
 	// 	word_copy[word_len] = 0;
 	// 	fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy);
 	// }
 	// Each TranslateWord2 may require up to 7 phonemes
 	// and after this loop we require 2 phonemes
 	for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) {
 			for (pw = &number_buf[3]; pw < pn && nw < N_CLAUSE_WORDS;) {
 				// keep wflags for each part, for FLAG_HYPHEN_AFTER
 				dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause);
 				dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause, words, sbuf, word_count);
 				while (pw < pn && *pw++ != ' ')
 					;
 				words[ix].pre_pause = 0;
 		} else {
 			pre_pause = 0;
 			dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause);
 			dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause, words, sbuf, word_count);
 			if (pre_pause > words[ix+1].pre_pause) {
 				words[ix+1].pre_pause = pre_pause;
 					memset(number_buf+1, ' ', 9);
 					nx = utf8_in(&c_temp, pw);
 					memcpy(&number_buf[3], pw, nx);
 					TranslateWord2(tr, &number_buf[3], &words[ix], 0);
 					TranslateWord2(tr, &number_buf[3], &words[ix], 0, words, sbuf, word_count);
 					pw += nx;
 				}
 			}
 	return k;
 	}
 static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes) {
 static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count) {
 	// combine a preposition with the following word
 		char ph_buf[N_WORD_PHONEMES];
 		strcpy(ph_buf, word_phonemes);
 		flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL);
 		flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL, words, sbuf, word_count);
 		if ((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH))
 			ok = false;
 	if (ok) {
 		*p2 = '-'; // replace next space by hyphen
 		wtab[0].flags &= ~FLAG_ALL_UPPER; // prevent it being considered an abbreviation
 		*flags = TranslateWord(translator, word, wtab, NULL); // translate the combined word
 		*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); // translate the combined word
 		if ((sylimit > 0) && (CountSyllables(p) > (sylimit & 0x1f))) {
 			// revert to separate words
 			*p2 = ' ';
 			*flags = TranslateWord(translator, word, wtab, NULL);
 			*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count);
 		} else {
 			if (*flags == 0)
 				*flags = flags2[0]; // no flags for the combined word, so use flags from the second word eg. lang-hu "nem december 7-e"
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
 void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len);
 int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out);
 int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out, WORD_TAB *words, char sbuf [], int word_count);
 void TranslateClause(Translator *tr, int *tone, char **voice_change);
 void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_change, int *terminator_out);
--- a/src/libespeak-ng/translateword.c
+++ b/src/libespeak-ng/translateword.c
 /*
 * Copyright (C) 2005 to 2014 by Jonathan Duddington
 * email: [email protected]
 #include "synthdata.h"            // for SelectPhonemeTable, LookupPhonemeTable
 #include "ucd/ucd.h"              // for ucd_toupper
 #include "voice.h"                // for voice, voice_t
 #include "speech.h"               // for path_home
 // Add JSON parsing headers
 #include <json-c/json.h>
 // Global variables for homographs
 static char **homographs_list = NULL;
 static int homographs_count = 0;
 static json_object *homograph_data = NULL;
 // Add these near the other global variables at the top
 static char **stopwords_list = NULL;
 static int stopwords_count = 0;
 static void LoadHomographData(void)
 {
 	char path[256];
 	json_object *root = NULL;
 	// Get the path to dataset.json
 	snprintf(path, sizeof(path), "%s%cespeak-ng-data%cdataset.json", path_home, PATHSEP, PATHSEP);
 	root = json_object_from_file("/content/espeak-ng/espeak-ng-data/dataset.json");
 	if (root == NULL) {
 		fprintf(stderr, "Failed to load homograph data from %s\n", path);
 		return;
 	}
 	homograph_data = root;
 	// Print a sample entry for debugging
 	// json_object *sample_word = json_object_object_get(root, "read");
 	// if (sample_word != NULL) {
 	// 	fprintf(stderr, "Sample homograph data for 'read':\n");
 	// 	json_object_object_foreach(sample_word, key, val) {
 	// 		fprintf(stderr, "  Pronunciation: %s\n", key);
 	// 		fprintf(stderr, "  Context words: ");
 	// 		if (json_object_get_type(val) == json_type_array) {
 	// 			int array_len = json_object_array_length(val);
 	// 			for (int i = 0; i < array_len; i++) {
 	// 				json_object *item = json_object_array_get_idx(val, i);
 	// 				fprintf(stderr, "%s ", json_object_get_string(item));
 	// 			}
 	// 		}
 	// 		fprintf(stderr, "\n");
 	// 	}
 	// }
 }
 static void LoadHomographs(void)
 {
 	FILE *f;
 	char path[256];
 	char line[256];
 	int count = 0;
 	int i = 0;
 	// Get the path to homographs.txt
 	snprintf(path, sizeof(path), "%s%cespeak-ng-data%chomographs.txt", path_home, PATHSEP, PATHSEP);
 	f = fopen("/content/espeak-ng/espeak-ng-data/homographs.txt", "r");
 	if (f == NULL) {
 		fprintf(stderr, "Failed to open homographs.txt\n");
 		return;
 	}
 	// First count the number of lines
 	while (fgets(line, sizeof(line), f) != NULL) {
 		count++;
 	}
 	// Allocate memory for the list
 	homographs_list = (char **)malloc(count * sizeof(char *));
 	if (homographs_list == NULL) {
 		fclose(f);
 		return;
 	}
 	// Rewind and read the file
 	rewind(f);
 	while (fgets(line, sizeof(line), f) != NULL) {
 		// Remove newline
 		line[strcspn(line, "\n")] = 0;
 		homographs_list[i] = strdup(line);
 		if (homographs_list[i] == NULL) {
 			// Cleanup on error
 			for (int j = 0; j < i; j++) {
 				free(homographs_list[j]);
 			}
 			free(homographs_list);
 			homographs_list = NULL;
 			fclose(f);
 			return;
 		}
 		i++;
 	}
 	homographs_count = count;
 	fclose(f);
 }
 static bool IsHomograph(const char *word)
 {
 	if (homographs_list == NULL) {
 		LoadHomographs();
 	}
 	if (homographs_list == NULL) {
 		return false;
 	}
 	for (int i = 0; i < homographs_count; i++) {
 		if (strcmp(word, homographs_list[i]) == 0) {
 			return true;
 		}
 	}
 	return false;
 }
 static void LoadStopwords(void)
 {
 	FILE *f;
 	char path[256];
 	char line[256];
 	int count = 0;
 	int i = 0;
 	// Get the path to stopwords.dat
 	snprintf(path, sizeof(path), "%s%cespeak-ng-data%cstopwords.dat", path_home, PATHSEP, PATHSEP);
 	f = fopen("/content/espeak-ng/espeak-ng-data/stopwords.dat", "r");
 	if (f == NULL) {
 		fprintf(stderr, "Failed to open stopwords.dat\n");
 		return;
 	}
 	// First count the number of lines
 	while (fgets(line, sizeof(line), f) != NULL) {
 		count++;
 	}
 	// Allocate memory for the list
 	stopwords_list = (char **)malloc(count * sizeof(char *));
 	if (stopwords_list == NULL) {
 		fclose(f);
 		return;
 	}
 	// Rewind and read the file
 	rewind(f);
 	while (fgets(line, sizeof(line), f) != NULL) {
 		// Remove newline
 		line[strcspn(line, "\n")] = 0;
 		stopwords_list[i] = strdup(line);
 		if (stopwords_list[i] == NULL) {
 			// Cleanup on error
 			for (int j = 0; j < i; j++) {
 				free(stopwords_list[j]);
 			}
 			free(stopwords_list);
 			stopwords_list = NULL;
 			fclose(f);
 			return;
 		}
 		i++;
 	}
 	stopwords_count = count;
 	fclose(f);
 }
 static bool IsStopword(const char *word)
 {
 	if (stopwords_list == NULL) {
 		LoadStopwords();
 	}
 	if (stopwords_list == NULL) {
 		return false;
 	}
 	for (int i = 0; i < stopwords_count; i++) {
 		if (strcmp(word, stopwords_list[i]) == 0) {
 			return true;
 		}
 	}
 	return false;
 }
 // Function to generate phonemes for homograph words
 static void GenerateHomographPhonemes(const char *word, char *phonemes, WORD_TAB words[], char sbuf[], int word_count) {
 	if (homograph_data == NULL) {
 		LoadHomographData();
 	}
 	// fprintf(stderr, "\n=== Homograph Processing ===\n");
 	// fprintf(stderr, "Looking up word: '%s'\n", word);
 	// Get the word data from homograph dictionary
 	json_object *word_data = json_object_object_get(homograph_data, word);
 	if (word_data == NULL) {
 		fprintf(stderr, "Word not found in homograph dictionary\n");
 		// Word not found in homograph dictionary, use word's characters as phonemes
 		int i = 0;
 		while (word[i] != 0 && i < N_WORD_PHONEMES - 1) {
 			phonemes[i] = PhonemeCode(word[i]);
 			i++;
 		}
 		phonemes[i] = 0;
 		return;
 	}
 	// fprintf(stderr, "\nFound homograph '%s' with pronunciations:\n", word);
 	struct json_object_iterator it = json_object_iter_begin(word_data);
 	struct json_object_iterator itEnd = json_object_iter_end(word_data);
 	while (!json_object_iter_equal(&it, &itEnd)) {
 		const char *debug_pron_key = json_object_iter_peek_name(&it);
 		json_object *debug_pron_val = json_object_iter_peek_value(&it);
 		// fprintf(stderr, "  Pronunciation: %s\n", debug_pron_key);
 		// fprintf(stderr, "  Context words: ");
 		if (json_object_get_type(debug_pron_val) == json_type_array) {
 			int array_len = json_object_array_length(debug_pron_val);
 			for (int i = 0; i < array_len; i++) {
 				json_object *item = json_object_array_get_idx(debug_pron_val, i);
 				// fprintf(stderr, "%s ", json_object_get_string(item));
 			}
 		}
 		fprintf(stderr, "\n");
 		json_object_iter_next(&it);
 	}
 	fprintf(stderr, "\n");
 	// Count context word frequencies
 	int context_counts[256] = {0}; // Assuming max 256 unique context words
 	char *context_words[256] = {0};
 	int num_context_words = 0;
 	// fprintf(stderr, "\n=== Context Words ===\n");
 	// Process context words
 	for (int i = 0; i < word_count; i++) {
 		char word_copy[150];
 		int word_len = 0;
 		char *pw = &sbuf[words[i].start];
 		// Extract the word
 		while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
 			word_copy[word_len] = pw[word_len];
 			word_len++;
 		}
 		word_copy[word_len] = 0;
 		// Skip if it's the target word, too short, or a stopword
 		if (word_len <= 1 || strcmp(word_copy, word) == 0 || IsStopword(word_copy)) {
 			continue;
 		}
 		// fprintf(stderr, "Word %d: '%s'\n", i + 1, word_copy);
 		// Check if we've seen this word before
 		int found = 0;
 		for (int j = 0; j < num_context_words; j++) {
 			if (strcmp(context_words[j], word_copy) == 0) {
 				context_counts[j]++;
 				found = 1;
 				break;
 			}
 		}
 		// Add new word if not found
 		if (!found && num_context_words < 255) {
 			context_words[num_context_words] = strdup(word_copy);
 			if (context_words[num_context_words] == NULL) {
 				fprintf(stderr, "Warning: Failed to allocate memory for context word\n");
 				continue;
 			}
 			context_counts[num_context_words] = 1;
 			num_context_words++;
 		}
 	}
 	// Find best pronunciation
 	const char *best_phoneme = NULL;
 	double max_normalized_score = -1;
 	int max_raw_overlap = 0;
 	// fprintf(stderr, "\nEvaluating pronunciations:\n");
 	// Iterate through each pronunciation option
 	json_object_object_foreach(word_data, pron_key, pron_val) {
 		if (json_object_get_type(pron_val) != json_type_array) {
 			continue;
 		}
 		// fprintf(stderr, "\nEvaluating pronunciation: %s\n", pron_key);
 		// Count word frequencies in this pronunciation's associated words
 		int phoneme_word_counts[256] = {0};
 		int total_phoneme_words = 0;
 		int array_len = json_object_array_length(pron_val);
 		for (int i = 0; i < array_len; i++) {
 			json_object *item = json_object_array_get_idx(pron_val, i);
 			if (item == NULL) {
 				fprintf(stderr, "  Warning: NULL item at index %d\n", i);
 				continue;
 			}
 			const char *assoc_word = json_object_get_string(item);
 			if (assoc_word == NULL) {
 				fprintf(stderr, "  Warning: NULL string at index %d\n", i);
 				continue;
 			}
 			// fprintf(stderr, "  Processing associated word[%d]: '%s' (length: %zu)\n", 
 			// 	i, assoc_word, strlen(assoc_word));
 			// Count occurrences of this associated word
 			for (int j = 0; j < num_context_words; j++) {
 				if (context_words[j] == NULL) {
 					fprintf(stderr, "    Warning: NULL context word at index %d\n", j);
 					continue;
 				}
 				// fprintf(stderr, "    Comparing with context word[%d]: '%s' (length: %zu)\n", 
 				// 	j, context_words[j], strlen(context_words[j]));
 				if (strcmp(context_words[j], assoc_word) == 0) {
 					phoneme_word_counts[j]++;
 					// fprintf(stderr, "    Match found! New count for word '%s': %d\n", 
 					// 	context_words[j], phoneme_word_counts[j]);
 				}
 			}
 			total_phoneme_words++;
 		}
 		// Calculate weighted overlap
 		int weighted_overlap = 0;
 		// fprintf(stderr, "  Calculating weighted overlap:\n");
 		for (int i = 0; i < num_context_words; i++) {
 			if (context_words[i] == NULL) continue;
 			weighted_overlap += context_counts[i] * phoneme_word_counts[i];
 			// fprintf(stderr, "    Word '%s': count=%d, matches=%d, contribution=%d\n",
 			// 	context_words[i], context_counts[i], phoneme_word_counts[i],
 			// 	context_counts[i] * phoneme_word_counts[i]);
 		}
 		// fprintf(stderr, "  Total weighted overlap: %d\n", weighted_overlap);
 		// Calculate normalized score
 		double normalized_score = (total_phoneme_words > 0) ? 
 			(double)weighted_overlap / total_phoneme_words : 0.0;
 		// fprintf(stderr, "  Raw overlap: %d\n", weighted_overlap);
 		// fprintf(stderr, "  Normalized score: %.2f\n", normalized_score);
 		// Select best phoneme
 		if (normalized_score > max_normalized_score) {
 			max_normalized_score = normalized_score;
 			max_raw_overlap = weighted_overlap;
 			best_phoneme = pron_key;
 			// fprintf(stderr, "  New best pronunciation selected!\n");
 		} else if (normalized_score == max_normalized_score) {
 			// Tiebreaker: prefer the phoneme with higher raw overlap
 			if (weighted_overlap > max_raw_overlap) {
 				max_raw_overlap = weighted_overlap;
 				best_phoneme = pron_key;
 				// fprintf(stderr, "  New best pronunciation selected (tiebreaker)!\n");
 			}
 		}
 	}
 	// fprintf(stderr, "\nFinal selection:\n");
 	// fprintf(stderr, "Selected pronunciation: %s\n", best_phoneme ? best_phoneme : "none");
 	// fprintf(stderr, "Final score: %.2f\n", max_normalized_score);
 	// fprintf(stderr, "Final raw overlap: %d\n", max_raw_overlap);
 	// Copy the best phoneme to output
 	if (best_phoneme != NULL) {
 		int i = 0;
 		while (best_phoneme[i] != 0 && i < N_WORD_PHONEMES - 1) {
 			phonemes[i] = PhonemeCode(best_phoneme[i]);
 			i++;
 		}
 		phonemes[i] = 0;
 	} else {
 		// No suitable pronunciation found, use word's characters as phonemes
 		int i = 0;
 		while (word[i] != 0 && i < N_WORD_PHONEMES - 1) {
 			phonemes[i] = PhonemeCode(word[i]);
 			i++;
 		}
 		phonemes[i] = 0;
 	}
 }
 static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
 static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
 static int Unpronouncable(Translator *tr, char *word, int posn);
 static int Unpronouncable2(Translator *tr, char *word);
 int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
 int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count)
 {
 	// word1 is terminated by space (0x20) character
 	int add_suffix_phonemes = 0;
 	WORD_TAB wtab_null[8];
 	// Debug print the word being processed
 	char wordbuf[120];
 	unsigned int ix2;
 	for (ix2 = 0; ((c_temp = word_start[ix2]) != ' ') && (c_temp != 0) && (ix2 < (sizeof(wordbuf)-1)); ix2++)
 		wordbuf[ix2] = c_temp;
 	wordbuf[ix2] = 0;
 	// fprintf(stderr, "Processing word: '%s'\n", wordbuf);
 	// Add debug print header and print all words before processing
 	// fprintf(stderr, "\n=== Context Words ===\n");
 	// for (ix = 0; ix < word_count; ix++) {
 	// 	char word_copy[150];
 	// 	int word_len = 0;
 	// 	char *pw = &sbuf[words[ix].start];
 	// 	while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) {
 	// 		word_copy[word_len] = pw[word_len];
 	// 		word_len++;
 	// 	}
 	// 	word_copy[word_len] = 0;
 	// 	fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy);
 	// }
 	// Check if the word is a homograph
 	if (IsHomograph(wordbuf)) {
 		GenerateHomographPhonemes(wordbuf, word_phonemes, words, sbuf, word_count);
 		return dictionary_flags[0]; // Return early with current dictionary flags
 	}
 	if (wtab == NULL) {
 		memset(wtab_null, 0, sizeof(wtab_null));
 		wtab = wtab_null;
 		if (phonemes[0] == phonSWITCH) {
 			// change to another language in order to translate this word
 			strcpy(word_phonemes, phonemes);
 			return 0;
 		}
 				return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces
 			return 0;
 		}
 		strcpy(word_phonemes, phonemes);
 		if (wflags & FLAG_TRANSLATOR2)
 			return 0;
 			if (phonemes[0] == phonSWITCH) {
 				// change to another language in order to translate this word
 				strcpy(word_phonemes, phonemes);
 				return 0;
 			}
 				if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) {
 					if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL)
 						return 0;
 					strcpy(word_phonemes, phonemes);
 					return 0;
 				}
 					if (phonemes[0] == phonSWITCH) {
 						// change to another language in order to translate this word
 						wordx[-1] = c_temp;
 						strcpy(word_phonemes, phonemes);
 						return 0;
 					}
 						if (phonemes[0] == phonSWITCH) {
 							// change to another language in order to translate this word
 							memcpy(wordx, word_copy, strlen(word_copy));
 							strcpy(word_phonemes, phonemes);
 							return 0;
 						}
 						if (phonemes[0] == phonSWITCH) {
 							// change to another language in order to translate this word
 							memcpy(wordx, word_copy, strlen(word_copy));
 							strcpy(word_phonemes, phonemes);
 							return 0;
 						}
 							if (phonemes[0] == phonSWITCH) {
 								// change to another language in order to translate this word
 								strcpy(word_phonemes, phonemes);
 								memcpy(wordx, word_copy, strlen(word_copy));
 								wordx[-1] = c_temp;
 		non_initial = true;
 		if (phonemes[0] == phonSWITCH) {
 			// change to another language in order to translate this word
 			strcpy(word_phonemes, phonemes);
 			return NULL;
 		}
--- a/src/libespeak-ng/translateword.h
+++ b/src/libespeak-ng/translateword.h
 {
 #endif
 int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes);
 int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count);
 #ifdef __cplusplus
 }