lib_LTLIBRARIES += src/libespeak-ng.la | lib_LTLIBRARIES += src/libespeak-ng.la | ||||
src_libespeak_ng_la_LDFLAGS = -version-info $(SHARED_VERSION) -lpthread -lm \ | src_libespeak_ng_la_LDFLAGS = -version-info $(SHARED_VERSION) -lpthread -lm \ | ||||
${PCAUDIOLIB_LIBS} | |||||
${PCAUDIOLIB_LIBS} ${JSONC_LIBS} | |||||
src_libespeak_ng_la_CFLAGS = \ | src_libespeak_ng_la_CFLAGS = \ | ||||
-fPIC -fvisibility=hidden \ | -fPIC -fvisibility=hidden \ | ||||
-pedantic -fno-exceptions -DPATH_ESPEAK_DATA=\"$(DATADIR)\" -DLIBESPEAK_NG_EXPORT \ | -pedantic -fno-exceptions -DPATH_ESPEAK_DATA=\"$(DATADIR)\" -DLIBESPEAK_NG_EXPORT \ | ||||
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${AM_CFLAGS} | |||||
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${JSONC_CFLAGS} ${AM_CFLAGS} | |||||
src_libespeak_ng_la_SOURCES = \ | src_libespeak_ng_la_SOURCES = \ | ||||
src/ucd-tools/src/case.c \ | src/ucd-tools/src/case.c \ |
AC_CHECK_FUNCS([strrchr]) | AC_CHECK_FUNCS([strrchr]) | ||||
AC_CHECK_FUNCS([strstr]) | AC_CHECK_FUNCS([strstr]) | ||||
dnl ================================================================ | |||||
dnl JSON-C library checks. | |||||
dnl ================================================================ | |||||
AC_CHECK_HEADERS([json-c/json.h], | |||||
[ | |||||
have_jsonc=yes | |||||
JSONC_CFLAGS=-DUSE_JSONC=1 | |||||
AC_CHECK_LIB([json-c], [json_object_from_file], | |||||
[ | |||||
JSONC_LIBS=-ljson-c | |||||
], | |||||
[ | |||||
have_jsonc=no | |||||
AC_MSG_ERROR([json-c library is required for homograph support]) | |||||
] | |||||
) | |||||
], | |||||
[ | |||||
have_jsonc=no | |||||
AC_MSG_ERROR([json-c library is required for homograph support]) | |||||
] | |||||
) | |||||
AC_SUBST(JSONC_CFLAGS) | |||||
AC_SUBST(JSONC_LIBS) | |||||
dnl ================================================================ | dnl ================================================================ | ||||
dnl PCAudioLib checks. | dnl PCAudioLib checks. | ||||
dnl ================================================================ | dnl ================================================================ |
رو | |||||
ترک | |||||
دور | |||||
فک | |||||
راند | |||||
دم | |||||
ترکان | |||||
گرد | |||||
جلد | |||||
پر | |||||
گردان | |||||
عمر | |||||
حل | |||||
رد | |||||
رود | |||||
کن | |||||
هل | |||||
اره | |||||
سرو | |||||
تن | |||||
کش | |||||
رس | |||||
کشت | |||||
نقل | |||||
رم | |||||
گرداند | |||||
کل | |||||
سر | |||||
گنج | |||||
تو | |||||
برنج | |||||
قل | |||||
کرد | |||||
پی | |||||
رب | |||||
تاباند | |||||
شو | |||||
شکر | |||||
شست | |||||
خرد | |||||
برند | |||||
اعمال | |||||
کرم | |||||
برنده | |||||
دوره | |||||
نشست | |||||
هری | |||||
جنب | |||||
ملا | |||||
بعد | |||||
شش | |||||
طبق | |||||
مد | |||||
بری | |||||
برم | |||||
شوید | |||||
چسباند | |||||
تست | |||||
روی | |||||
ولو | |||||
خواند | |||||
سمت | |||||
کنده | |||||
گنگ | |||||
بکن | |||||
درهم | |||||
شناساند | |||||
یاس | |||||
معبر | |||||
مهم | |||||
مردم | |||||
خوراند | |||||
جست | |||||
بنا | |||||
ترکاند | |||||
ملاک | |||||
پلاس | |||||
فوت | |||||
کت | |||||
برده | |||||
گله | |||||
بده | |||||
چرخاند | |||||
کندی | |||||
خلق | |||||
چک | |||||
بخار | |||||
نزد | |||||
چپاند | |||||
اجر | |||||
شان | |||||
رسم | |||||
شنو | |||||
خواباند | |||||
نرم | |||||
رساند | |||||
لمباند | |||||
میدانی | |||||
دود | |||||
دواند | |||||
جدا | |||||
معین | |||||
رسی | |||||
جو | |||||
فرانک | |||||
هلیم | |||||
بردار | |||||
شرف | |||||
کشند | |||||
گزید | |||||
مقدم | |||||
زهره | |||||
سری | |||||
کیف | |||||
کشیم | |||||
پژمرد | |||||
مجاز | |||||
لرزاند | |||||
نشاند | |||||
نشسته | |||||
جرم | |||||
خورد | |||||
علم | |||||
جوشاند | |||||
برگرداند | |||||
سوزاند | |||||
بره | |||||
امین | |||||
لب | |||||
اه | |||||
آرم | |||||
گنجاند | |||||
قطعه | |||||
فراری | |||||
لغز | |||||
تنگ | |||||
عرضه | |||||
بدو | |||||
صفر | |||||
المان | |||||
برید | |||||
فرق | |||||
محقق | |||||
پیچاند | |||||
پرورد | |||||
کشتی | |||||
مسلم | |||||
روم | |||||
پرش | |||||
نصاب | |||||
تکه | |||||
جوید | |||||
پراند | |||||
وسطی | |||||
کارد | |||||
سپرد | |||||
افشاند | |||||
مراجع | |||||
خرم | |||||
کرات | |||||
نمونه | |||||
سند | |||||
کند | |||||
بر | |||||
سنت | |||||
شبه | |||||
ابر | |||||
اپل | |||||
درد | |||||
خیر | |||||
خاراند | |||||
ابی | |||||
مبدل | |||||
مو | |||||
جنگ | |||||
سرم | |||||
قدم | |||||
مایل | |||||
اسکی | |||||
نسبی | |||||
گذراند | |||||
بخور | |||||
صرف | |||||
قسم | |||||
گرده | |||||
مبلغ | |||||
حسن | |||||
کره | |||||
عود | |||||
چشم | |||||
خلف | |||||
حقه | |||||
شل | |||||
ادبا | |||||
برس | |||||
گرم | |||||
سحر | |||||
سنی | |||||
ببر | |||||
فهماند | |||||
اوا | |||||
اشکال | |||||
بین | |||||
سرور | |||||
مفصل | |||||
فرار | |||||
سبک | |||||
تپاند | |||||
سیر | |||||
پهن | |||||
مهر | |||||
درک | |||||
چلاند | |||||
رنجاند | |||||
مشرف | |||||
کمی | |||||
فن | |||||
مقطع | |||||
پوشاند | |||||
عرفا | |||||
بدهی | |||||
شنود | |||||
قطر | |||||
شهرت | |||||
سپر | |||||
رحم | |||||
حلال | |||||
دوران | |||||
ترساند | |||||
پروراند | |||||
رویه | |||||
آورد | |||||
کلفت | |||||
تکاند | |||||
گریاند | |||||
کابل | |||||
شهره | |||||
رهاند | |||||
دین | |||||
عقبی | |||||
لنگ | |||||
مصر | |||||
پرس | |||||
منکر | |||||
قمری | |||||
امل | |||||
خفت | |||||
سمبل | |||||
کنه | |||||
عالم | |||||
bedah (فعل) | |||||
تند | |||||
محرم | |||||
عرق | |||||
خیساند | |||||
لیلی | |||||
کشاند | |||||
یمن | |||||
شدید | |||||
قوت | |||||
اشراف | |||||
نبرد | |||||
کر | |||||
شما | |||||
گریم | |||||
شوم | |||||
جور | |||||
خنداند | |||||
اشغال | |||||
علی | |||||
مسکن | |||||
مثل | |||||
نفس | |||||
ارایه | |||||
دوم | |||||
دهم | |||||
سپرم | |||||
نکن | |||||
ده | |||||
ماند | |||||
به | |||||
گل | |||||
ور | |||||
مرد | |||||
پست | |||||
کنف | |||||
qet؟e | |||||
شبه | |||||
شوم | |||||
persian word |
language ru-cl | language ru-cl | ||||
replace 03 a a# | replace 03 a a# | ||||
dict_min 20000 | dict_min 20000 | ||||
speed 95 | |||||
dictrules 3 | dictrules 3 |
و | |||||
در | |||||
به | |||||
از | |||||
که | |||||
این | |||||
را | |||||
با | |||||
است | |||||
برای | |||||
آن | |||||
یک | |||||
خود | |||||
تا | |||||
کرد | |||||
بر | |||||
هم | |||||
نیز | |||||
گفت | |||||
میشود | |||||
وی | |||||
شد | |||||
دارد | |||||
ما | |||||
اما | |||||
یا | |||||
شده | |||||
باید | |||||
هر | |||||
آنها | |||||
بود | |||||
او | |||||
دیگر | |||||
دو | |||||
مورد | |||||
میکند | |||||
شود | |||||
کند | |||||
وجود | |||||
بین | |||||
پیش | |||||
شده_است | |||||
پس | |||||
نظر | |||||
اگر | |||||
همه | |||||
یکی | |||||
حال | |||||
هستند | |||||
من | |||||
کنند | |||||
نیست | |||||
باشد | |||||
چه | |||||
بی | |||||
می | |||||
بخش | |||||
میکنند | |||||
همین | |||||
افزود | |||||
هایی | |||||
دارند | |||||
راه | |||||
همچنین | |||||
روی | |||||
داد | |||||
بیشتر | |||||
بسیار | |||||
سه | |||||
داشت | |||||
چند | |||||
سوی | |||||
تنها | |||||
هیچ | |||||
میان | |||||
اینکه | |||||
شدن | |||||
بعد | |||||
جدید | |||||
ولی | |||||
حتی | |||||
کردن | |||||
برخی | |||||
کردند | |||||
میدهد | |||||
اول | |||||
نه | |||||
کرده_است | |||||
نسبت | |||||
بیش | |||||
شما | |||||
چنین | |||||
طور | |||||
افراد | |||||
تمام | |||||
درباره | |||||
بار | |||||
بسیاری | |||||
میتواند | |||||
کرده | |||||
چون | |||||
ندارد | |||||
دوم | |||||
بزرگ | |||||
طی | |||||
حدود | |||||
همان | |||||
بدون | |||||
البته | |||||
آنان | |||||
میگوید | |||||
دیگری | |||||
خواهد_شد | |||||
کنیم | |||||
قابل | |||||
یعنی | |||||
رشد | |||||
میتوان | |||||
وارد | |||||
کل | |||||
ویژه | |||||
قبل | |||||
براساس | |||||
نیاز | |||||
گذاری | |||||
هنوز | |||||
لازم | |||||
سازی | |||||
بوده_است | |||||
چرا | |||||
میشوند | |||||
وقتی | |||||
گرفت | |||||
کم | |||||
جای | |||||
حالی | |||||
تغییر | |||||
پیدا | |||||
اکنون | |||||
تحت | |||||
باعث | |||||
مدت | |||||
فقط | |||||
زیادی | |||||
تعداد | |||||
آیا | |||||
بیان | |||||
رو | |||||
شدند | |||||
عدم | |||||
کرده_اند | |||||
بودن | |||||
نوع | |||||
بلکه | |||||
جاری | |||||
دهد | |||||
برابر | |||||
مهم | |||||
بوده | |||||
اخیر | |||||
مربوط | |||||
امر | |||||
زیر | |||||
گیری | |||||
شاید | |||||
خصوص | |||||
آقای | |||||
اثر | |||||
کننده | |||||
بودند | |||||
فکر | |||||
کنار | |||||
اولین | |||||
سوم | |||||
سایر | |||||
کنید | |||||
ضمن | |||||
مانند | |||||
باز | |||||
میگیرد | |||||
ممکن | |||||
حل | |||||
دارای | |||||
پی | |||||
مثل | |||||
میرسد | |||||
اجرا | |||||
دور | |||||
منظور | |||||
کسی | |||||
موجب | |||||
طول | |||||
امکان | |||||
آنچه | |||||
تعیین | |||||
گفته | |||||
شوند | |||||
جمع | |||||
خیلی | |||||
علاوه | |||||
گونه | |||||
تاکنون | |||||
رسید | |||||
ساله | |||||
گرفته | |||||
شده_اند | |||||
علت | |||||
چهار | |||||
داشته_باشد | |||||
خواهد_بود | |||||
طرف | |||||
تهیه | |||||
تبدیل | |||||
مناسب | |||||
زیرا | |||||
مشخص | |||||
میتوانند | |||||
نزدیک | |||||
جریان | |||||
روند | |||||
بنابراین | |||||
میدهند | |||||
یافت | |||||
نخستین | |||||
بالا | |||||
پنج | |||||
ریزی | |||||
عالی | |||||
چیزی | |||||
نخست | |||||
بیشتری | |||||
ترتیب | |||||
شده_بود | |||||
خاص | |||||
خوبی | |||||
خوب | |||||
شروع | |||||
فرد | |||||
کامل | |||||
غیر | |||||
میرود | |||||
دهند | |||||
آخرین | |||||
دادن | |||||
جدی | |||||
بهترین | |||||
شامل | |||||
گیرد | |||||
بخشی | |||||
باشند | |||||
تمامی | |||||
بهتر | |||||
داده_است | |||||
حد | |||||
نبود | |||||
کسانی | |||||
میکرد | |||||
داریم | |||||
علیه | |||||
میباشد | |||||
دانست | |||||
ناشی | |||||
داشتند | |||||
دهه | |||||
میشد | |||||
ایشان | |||||
آنجا | |||||
گرفته_است | |||||
دچار | |||||
میآید | |||||
لحاظ | |||||
آنکه | |||||
داده | |||||
بعضی | |||||
هستیم | |||||
اند | |||||
برداری | |||||
نباید | |||||
میکنیم | |||||
نشست | |||||
سهم | |||||
همیشه | |||||
آمد | |||||
اش | |||||
وگو | |||||
میکنم | |||||
حداقل | |||||
طبق | |||||
جا | |||||
خواهد_کرد | |||||
نوعی | |||||
چگونه | |||||
رفت | |||||
هنگام | |||||
فوق | |||||
روش | |||||
ندارند | |||||
سعی | |||||
بندی | |||||
شمار | |||||
کلی | |||||
کافی | |||||
مواجه | |||||
همچنان | |||||
زیاد | |||||
سمت | |||||
کوچک | |||||
داشته_است | |||||
چیز | |||||
پشت | |||||
آورد | |||||
حالا | |||||
روبه | |||||
سالهای | |||||
دادند | |||||
میکردند | |||||
عهده | |||||
نیمه | |||||
جایی | |||||
دیگران | |||||
سی | |||||
بروز | |||||
یکدیگر | |||||
آمده_است | |||||
جز | |||||
کنم | |||||
سپس | |||||
کنندگان | |||||
خودش | |||||
همواره | |||||
یافته | |||||
شان | |||||
صرف | |||||
نمیشود | |||||
رسیدن | |||||
چهارم | |||||
یابد | |||||
متر | |||||
ساز | |||||
داشته | |||||
کرده_بود | |||||
باره | |||||
نحوه | |||||
کردم | |||||
تو | |||||
شخصی | |||||
داشته_باشند | |||||
محسوب | |||||
پخش | |||||
کمی | |||||
متفاوت | |||||
سراسر | |||||
کاملا | |||||
داشتن | |||||
نظیر | |||||
آمده | |||||
گروهی | |||||
فردی | |||||
ع | |||||
همچون | |||||
خطر | |||||
خویش | |||||
کدام | |||||
دسته | |||||
سبب | |||||
عین | |||||
آوری | |||||
متاسفانه | |||||
بیرون | |||||
دار | |||||
ابتدا | |||||
شش | |||||
افرادی | |||||
میگویند | |||||
سالهای | |||||
درون | |||||
نیستند | |||||
یافته_است | |||||
پر | |||||
خاطرنشان | |||||
گاه | |||||
جمعی | |||||
اغلب | |||||
دوباره | |||||
مییابد | |||||
لذا | |||||
زاده | |||||
گردد | |||||
اینجا |
static const struct option long_options[] = { | static const struct option long_options[] = { | ||||
{ "help", no_argument, 0, 'h' }, | { "help", no_argument, 0, 'h' }, | ||||
{ "stdin", no_argument, 0, 0x100 }, | { "stdin", no_argument, 0, 0x100 }, | ||||
{ "input", required_argument, 0, 0x113 }, | |||||
{ "output", required_argument, 0, 0x114 }, | |||||
{ "compile-debug", optional_argument, 0, 0x101 }, | { "compile-debug", optional_argument, 0, 0x101 }, | ||||
{ "compile", optional_argument, 0, 0x102 }, | { "compile", optional_argument, 0, 0x102 }, | ||||
{ "punct", optional_argument, 0, 0x103 }, | { "punct", optional_argument, 0, 0x103 }, | ||||
FILE *f_text = NULL; | FILE *f_text = NULL; | ||||
char *p_text = NULL; | char *p_text = NULL; | ||||
FILE *f_phonemes_out = stdout; | |||||
FILE *f_phonemes_out = NULL; | |||||
char *data_path = NULL; // use default path for espeak-ng-data | char *data_path = NULL; // use default path for espeak-ng-data | ||||
char input_file[256] = {0}; | |||||
char output_file[256] = {0}; // Add output file name variable | |||||
int option_index = 0; | int option_index = 0; | ||||
int c; | int c; | ||||
case 0x112: // --ssml-break | case 0x112: // --ssml-break | ||||
ssml_break = atoi(optarg2); | ssml_break = atoi(optarg2); | ||||
break; | break; | ||||
case 0x113: // --input | |||||
strncpy0(input_file, optarg2, sizeof(input_file)); | |||||
break; | |||||
case 0x114: // --output | |||||
strncpy0(output_file, optarg2, sizeof(output_file)); | |||||
break; | |||||
default: | default: | ||||
exit(0); | exit(0); | ||||
} | } | ||||
if (option_punctuation == 2) | if (option_punctuation == 2) | ||||
espeak_SetPunctuationList(option_punctlist); | espeak_SetPunctuationList(option_punctlist); | ||||
// Open output file for phoneme output, first overwrite to clear previous content | |||||
if (output_file[0] == 0) { | |||||
strcpy(output_file, "output.txt"); // Default output file if none specified | |||||
} | |||||
if ((f_phonemes_out = fopen(output_file, "w")) == NULL) { | |||||
fprintf(stderr, "Can't write to output file: %s\n", output_file); | |||||
exit(EXIT_FAILURE); | |||||
} | |||||
fclose(f_phonemes_out); // Close after clearing | |||||
f_phonemes_out = NULL; // Set to NULL after closing | |||||
espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); | espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); | ||||
if (filename[0] == 0) { | |||||
// Process input file if specified | |||||
if (input_file[0] != 0) { | |||||
f_text = fopen(input_file, "r"); | |||||
if (f_text == NULL) { | |||||
fprintf(stderr, "Can't open input file: %s\n", input_file); | |||||
exit(EXIT_FAILURE); | |||||
} | |||||
char line[1000]; | |||||
while (fgets(line, sizeof(line), f_text) != NULL) { | |||||
// Remove trailing newline | |||||
line[strcspn(line, "\n")] = 0; | |||||
// Process each line independently | |||||
if (line[0] != 0) { // Skip empty lines | |||||
// Reopen output file in append mode for each line | |||||
if (f_phonemes_out != NULL) { | |||||
fclose(f_phonemes_out); | |||||
f_phonemes_out = NULL; | |||||
} | |||||
if ((f_phonemes_out = fopen(output_file, "a")) == NULL) { | |||||
fprintf(stderr, "Can't append to output file: %s\n", output_file); | |||||
exit(EXIT_FAILURE); | |||||
} | |||||
espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); | |||||
espeak_Synth(line, strlen(line)+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); | |||||
espeak_ng_Synchronize(); | |||||
fclose(f_phonemes_out); | |||||
f_phonemes_out = NULL; | |||||
} | |||||
} | |||||
fclose(f_text); | |||||
} else if (filename[0] == 0) { | |||||
if ((optind < argc) && (flag_stdin == 0)) { | if ((optind < argc) && (flag_stdin == 0)) { | ||||
// there's a non-option parameter, and no -f or --stdin | // there's a non-option parameter, and no -f or --stdin | ||||
// use it as text | // use it as text | ||||
exit(EXIT_FAILURE); | exit(EXIT_FAILURE); | ||||
} | } | ||||
if (f_phonemes_out != stdout) | |||||
fclose(f_phonemes_out); | |||||
CloseWavFile(); | CloseWavFile(); | ||||
espeak_ng_Terminate(); | espeak_ng_Terminate(); | ||||
return 0; | return 0; |
// PROBLEM vowel reductions are not applied to the translated phonemes | // PROBLEM vowel reductions are not applied to the translated phonemes | ||||
// condition rules are not applied | // condition rules are not applied | ||||
TranslateWord(translator, phonetic, NULL, NULL); | |||||
TranslateWord(translator, phonetic, NULL, NULL, NULL, NULL, 0); | |||||
text_not_phonemes = false; | text_not_phonemes = false; | ||||
strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4); | strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4); | ||||
text[1] = ' '; | text[1] = ' '; | ||||
text[2] = ' '; | text[2] = ' '; | ||||
strncpy0(text+3, word1, sizeof(text)-3); | strncpy0(text+3, word1, sizeof(text)-3); | ||||
flags0 = TranslateWord(tr, text+3, NULL, NULL); | |||||
flags0 = TranslateWord(tr, text+3, NULL, NULL, NULL, NULL, 0); | |||||
strcpy(ph_out, word_phonemes); | strcpy(ph_out, word_phonemes); | ||||
option_sayas = say_as; | option_sayas = say_as; | ||||
} | } |
// lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix. | // lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix. | ||||
nextflags = 0; | nextflags = 0; | ||||
if (IsAlpha(c2)) | if (IsAlpha(c2)) | ||||
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL); | |||||
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL, NULL, NULL, 0); | |||||
if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2))) | if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2))) | ||||
ordinal = 0; // TEST 09.02.10 | ordinal = 0; // TEST 09.02.10 |
if (control & 2) | if (control & 2) | ||||
DoSonicSpeed(1 * 1024); | DoSonicSpeed(1 * 1024); | ||||
if ((wpm_value > espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) { | |||||
if ((wpm_value >= espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) { | |||||
int wpm2; | int wpm2; | ||||
wpm2 = wpm; | wpm2 = wpm; | ||||
wpm = espeakRATE_NORMAL; | wpm = espeakRATE_NORMAL; |
{ "_el", OFFSET_GREEK, 0x380, 0x3ff, L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS }, | { "_el", OFFSET_GREEK, 0x380, 0x3ff, L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS }, | ||||
{ "_cyr", OFFSET_CYRILLIC, 0x400, 0x52f, 0, 0 }, | { "_cyr", OFFSET_CYRILLIC, 0x400, 0x52f, 0, 0 }, | ||||
{ "_hy", OFFSET_ARMENIAN, 0x530, 0x58f, L('h', 'y'), AL_WORDS }, | { "_hy", OFFSET_ARMENIAN, 0x530, 0x58f, L('h', 'y'), AL_WORDS }, | ||||
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, L('h', 'e'), 0 }, | |||||
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, 0, 0 }, | |||||
{ "_ar", OFFSET_ARABIC, 0x600, 0x6ff, 0, 0 }, | { "_ar", OFFSET_ARABIC, 0x600, 0x6ff, 0, 0 }, | ||||
{ "_syc", OFFSET_SYRIAC, 0x700, 0x74f, 0, 0 }, | { "_syc", OFFSET_SYRIAC, 0x700, 0x74f, 0, 0 }, | ||||
{ "_hi", OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS }, | { "_hi", OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS }, | ||||
tr->langopts.ideographs = 1; | tr->langopts.ideographs = 1; | ||||
} | } | ||||
break; | break; | ||||
case L('h','e'): // Hebrew | |||||
{ | |||||
tr->langopts.param[LOPT_APOSTROPHE] = 2; // bit 1 Apostrophe at end of word is part of the word, for words like בָּגָאז׳ | |||||
tr->langopts.stress_flags = S_NO_AUTO_2; // don't use secondary stress | |||||
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DFRACTION_2 | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND; | |||||
} | |||||
break; | |||||
case L('g', 'a'): // irish | case L('g', 'a'): // irish | ||||
case L('g', 'd'): // scots gaelic | case L('g', 'd'): // scots gaelic | ||||
{ | { |
#include "translateword.h" | #include "translateword.h" | ||||
static int CalcWordLength(int source_index, int charix_top, short int *charix, WORD_TAB *words, int word_count); | static int CalcWordLength(int source_index, int charix_top, short int *charix, WORD_TAB *words, int word_count); | ||||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes); | |||||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count); | |||||
static void SwitchLanguage(char *word, char *word_phonemes); | static void SwitchLanguage(char *word, char *word_phonemes); | ||||
Translator *translator = NULL; // the main translator | Translator *translator = NULL; // the main translator | ||||
return strchr((char *)s, c); // (char *) is needed for Borland compiler | return strchr((char *)s, c); // (char *) is needed for Borland compiler | ||||
} | } | ||||
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out) | |||||
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, WORD_TAB words[], char sbuf[], int word_count) | |||||
{ | { | ||||
char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes | char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes | ||||
char *phonemes = words_phonemes; | char *phonemes = words_phonemes; | ||||
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||||
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count); | |||||
if (flags & FLAG_TEXTMODE && word_out) { | if (flags & FLAG_TEXTMODE && word_out) { | ||||
// Ensure that start of word rules match with the replaced text, | // Ensure that start of word rules match with the replaced text, | ||||
// so that emoji and other characters are pronounced correctly. | // so that emoji and other characters are pronounced correctly. | ||||
// However, dictionary_skipwords value is still needed outside this scope. | // However, dictionary_skipwords value is still needed outside this scope. | ||||
// So we backup and restore it at the end of this scope. | // So we backup and restore it at the end of this scope. | ||||
int skipwords = dictionary_skipwords; | int skipwords = dictionary_skipwords; | ||||
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||||
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count); | |||||
int n; | int n; | ||||
if (first_word) { | if (first_word) { | ||||
available -= n; | available -= n; | ||||
phonemes += n; | phonemes += n; | ||||
// skip to the next word in a multi-word replacement. Always skip at least one word. | |||||
// skip to the next word in a multi-word rplacement. Always skip at least one word. | |||||
for (dictionary_skipwords++; dictionary_skipwords > 0; dictionary_skipwords--) { | for (dictionary_skipwords++; dictionary_skipwords > 0; dictionary_skipwords--) { | ||||
while (!isspace(*word_out)) ++word_out; | while (!isspace(*word_out)) ++word_out; | ||||
while (isspace(*word_out)) ++word_out; | while (isspace(*word_out)) ++word_out; | ||||
return SetAlternateTranslator(new_language, &translator3, translator3_language); | return SetAlternateTranslator(new_language, &translator3, translator3_language); | ||||
} | } | ||||
static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause) | |||||
static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause, WORD_TAB words [], char sbuf[], int word_count) | |||||
{ | { | ||||
int flags = 0; | int flags = 0; | ||||
int stress; | int stress; | ||||
word_copy_len = ix; | word_copy_len = ix; | ||||
word_replaced[2] = 0; | word_replaced[2] = 0; | ||||
flags = TranslateWord(translator, word, wtab, &word_replaced[2]); | |||||
flags = TranslateWord(translator, word, wtab, &word_replaced[2], words, sbuf, word_count); | |||||
if (flags & FLAG_SPELLWORD) { | if (flags & FLAG_SPELLWORD) { | ||||
// re-translate the word as individual letters, separated by spaces | // re-translate the word as individual letters, separated by spaces | ||||
} | } | ||||
if ((flags & FLAG_COMBINE) && !(wtab[1].flags & FLAG_PHONEMES)) { | if ((flags & FLAG_COMBINE) && !(wtab[1].flags & FLAG_PHONEMES)) { | ||||
CombineFlag(tr, wtab, word, &flags, p, word_phonemes); | |||||
CombineFlag(tr, wtab, word, &flags, p, word_phonemes, words, sbuf, word_count); | |||||
} | } | ||||
if (p[0] == phonSWITCH) { | if (p[0] == phonSWITCH) { | ||||
if (word_replaced[2] != 0) { | if (word_replaced[2] != 0) { | ||||
word_replaced[0] = 0; // byte before the start of the word | word_replaced[0] = 0; // byte before the start of the word | ||||
word_replaced[1] = ' '; | word_replaced[1] = ' '; | ||||
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL); | |||||
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL, words, sbuf, word_count); | |||||
} else | } else | ||||
flags = TranslateWord(translator2, word, wtab, &word_replaced[2]); | |||||
flags = TranslateWord(translator2, word, wtab, &word_replaced[2], words, sbuf, word_count); | |||||
} | } | ||||
if (p[0] != phonSWITCH) | if (p[0] != phonSWITCH) | ||||
} | } | ||||
words[0].flags |= FLAG_FIRST_WORD; | words[0].flags |= FLAG_FIRST_WORD; | ||||
// Add debug print header and print all words before processing | |||||
// fprintf(stderr, "\n=== Words in sentence ===\n"); | |||||
// for (ix = 0; ix < word_count; ix++) { | |||||
// char word_copy[150]; | |||||
// int word_len = 0; | |||||
// char *pw = &sbuf[words[ix].start]; | |||||
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||||
// word_copy[word_len] = pw[word_len]; | |||||
// word_len++; | |||||
// } | |||||
// word_copy[word_len] = 0; | |||||
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy); | |||||
// } | |||||
// Each TranslateWord2 may require up to 7 phonemes | // Each TranslateWord2 may require up to 7 phonemes | ||||
// and after this loop we require 2 phonemes | // and after this loop we require 2 phonemes | ||||
for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) { | for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) { | ||||
for (pw = &number_buf[3]; pw < pn && nw < N_CLAUSE_WORDS;) { | for (pw = &number_buf[3]; pw < pn && nw < N_CLAUSE_WORDS;) { | ||||
// keep wflags for each part, for FLAG_HYPHEN_AFTER | // keep wflags for each part, for FLAG_HYPHEN_AFTER | ||||
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause); | |||||
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause, words, sbuf, word_count); | |||||
while (pw < pn && *pw++ != ' ') | while (pw < pn && *pw++ != ' ') | ||||
; | ; | ||||
words[ix].pre_pause = 0; | words[ix].pre_pause = 0; | ||||
} else { | } else { | ||||
pre_pause = 0; | pre_pause = 0; | ||||
dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause); | |||||
dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause, words, sbuf, word_count); | |||||
if (pre_pause > words[ix+1].pre_pause) { | if (pre_pause > words[ix+1].pre_pause) { | ||||
words[ix+1].pre_pause = pre_pause; | words[ix+1].pre_pause = pre_pause; | ||||
memset(number_buf+1, ' ', 9); | memset(number_buf+1, ' ', 9); | ||||
nx = utf8_in(&c_temp, pw); | nx = utf8_in(&c_temp, pw); | ||||
memcpy(&number_buf[3], pw, nx); | memcpy(&number_buf[3], pw, nx); | ||||
TranslateWord2(tr, &number_buf[3], &words[ix], 0); | |||||
TranslateWord2(tr, &number_buf[3], &words[ix], 0, words, sbuf, word_count); | |||||
pw += nx; | pw += nx; | ||||
} | } | ||||
} | } | ||||
return k; | return k; | ||||
} | } | ||||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes) { | |||||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count) { | |||||
// combine a preposition with the following word | // combine a preposition with the following word | ||||
char ph_buf[N_WORD_PHONEMES]; | char ph_buf[N_WORD_PHONEMES]; | ||||
strcpy(ph_buf, word_phonemes); | strcpy(ph_buf, word_phonemes); | ||||
flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL); | |||||
flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL, words, sbuf, word_count); | |||||
if ((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH)) | if ((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH)) | ||||
ok = false; | ok = false; | ||||
if (ok) { | if (ok) { | ||||
*p2 = '-'; // replace next space by hyphen | *p2 = '-'; // replace next space by hyphen | ||||
wtab[0].flags &= ~FLAG_ALL_UPPER; // prevent it being considered an abbreviation | wtab[0].flags &= ~FLAG_ALL_UPPER; // prevent it being considered an abbreviation | ||||
*flags = TranslateWord(translator, word, wtab, NULL); // translate the combined word | |||||
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); // translate the combined word | |||||
if ((sylimit > 0) && (CountSyllables(p) > (sylimit & 0x1f))) { | if ((sylimit > 0) && (CountSyllables(p) > (sylimit & 0x1f))) { | ||||
// revert to separate words | // revert to separate words | ||||
*p2 = ' '; | *p2 = ' '; | ||||
*flags = TranslateWord(translator, word, wtab, NULL); | |||||
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); | |||||
} else { | } else { | ||||
if (*flags == 0) | if (*flags == 0) | ||||
*flags = flags2[0]; // no flags for the combined word, so use flags from the second word eg. lang-hu "nem december 7-e" | *flags = flags2[0]; // no flags for the combined word, so use flags from the second word eg. lang-hu "nem december 7-e" |
void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | ||||
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | |||||
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out, WORD_TAB *words, char sbuf [], int word_count); | |||||
void TranslateClause(Translator *tr, int *tone, char **voice_change); | void TranslateClause(Translator *tr, int *tone, char **voice_change); | ||||
void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_change, int *terminator_out); | void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_change, int *terminator_out); | ||||
/* | /* | ||||
* Copyright (C) 2005 to 2014 by Jonathan Duddington | * Copyright (C) 2005 to 2014 by Jonathan Duddington | ||||
* email: [email protected] | * email: [email protected] | ||||
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable | #include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable | ||||
#include "ucd/ucd.h" // for ucd_toupper | #include "ucd/ucd.h" // for ucd_toupper | ||||
#include "voice.h" // for voice, voice_t | #include "voice.h" // for voice, voice_t | ||||
#include "speech.h" // for path_home | |||||
// Add JSON parsing headers | |||||
#include <json-c/json.h> | |||||
// Global variables for homographs | |||||
static char **homographs_list = NULL; | |||||
static int homographs_count = 0; | |||||
static json_object *homograph_data = NULL; | |||||
// Add these near the other global variables at the top | |||||
static char **stopwords_list = NULL; | |||||
static int stopwords_count = 0; | |||||
static void LoadHomographData(void) | |||||
{ | |||||
char path[256]; | |||||
json_object *root = NULL; | |||||
// Get the path to dataset.json | |||||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cdataset.json", path_home, PATHSEP, PATHSEP); | |||||
root = json_object_from_file("/content/espeak-ng/espeak-ng-data/dataset.json"); | |||||
if (root == NULL) { | |||||
fprintf(stderr, "Failed to load homograph data from %s\n", path); | |||||
return; | |||||
} | |||||
homograph_data = root; | |||||
// Print a sample entry for debugging | |||||
// json_object *sample_word = json_object_object_get(root, "read"); | |||||
// if (sample_word != NULL) { | |||||
// fprintf(stderr, "Sample homograph data for 'read':\n"); | |||||
// json_object_object_foreach(sample_word, key, val) { | |||||
// fprintf(stderr, " Pronunciation: %s\n", key); | |||||
// fprintf(stderr, " Context words: "); | |||||
// if (json_object_get_type(val) == json_type_array) { | |||||
// int array_len = json_object_array_length(val); | |||||
// for (int i = 0; i < array_len; i++) { | |||||
// json_object *item = json_object_array_get_idx(val, i); | |||||
// fprintf(stderr, "%s ", json_object_get_string(item)); | |||||
// } | |||||
// } | |||||
// fprintf(stderr, "\n"); | |||||
// } | |||||
// } | |||||
} | |||||
static void LoadHomographs(void) | |||||
{ | |||||
FILE *f; | |||||
char path[256]; | |||||
char line[256]; | |||||
int count = 0; | |||||
int i = 0; | |||||
// Get the path to homographs.txt | |||||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%chomographs.txt", path_home, PATHSEP, PATHSEP); | |||||
f = fopen("/content/espeak-ng/espeak-ng-data/homographs.txt", "r"); | |||||
if (f == NULL) { | |||||
fprintf(stderr, "Failed to open homographs.txt\n"); | |||||
return; | |||||
} | |||||
// First count the number of lines | |||||
while (fgets(line, sizeof(line), f) != NULL) { | |||||
count++; | |||||
} | |||||
// Allocate memory for the list | |||||
homographs_list = (char **)malloc(count * sizeof(char *)); | |||||
if (homographs_list == NULL) { | |||||
fclose(f); | |||||
return; | |||||
} | |||||
// Rewind and read the file | |||||
rewind(f); | |||||
while (fgets(line, sizeof(line), f) != NULL) { | |||||
// Remove newline | |||||
line[strcspn(line, "\n")] = 0; | |||||
homographs_list[i] = strdup(line); | |||||
if (homographs_list[i] == NULL) { | |||||
// Cleanup on error | |||||
for (int j = 0; j < i; j++) { | |||||
free(homographs_list[j]); | |||||
} | |||||
free(homographs_list); | |||||
homographs_list = NULL; | |||||
fclose(f); | |||||
return; | |||||
} | |||||
i++; | |||||
} | |||||
homographs_count = count; | |||||
fclose(f); | |||||
} | |||||
static bool IsHomograph(const char *word) | |||||
{ | |||||
if (homographs_list == NULL) { | |||||
LoadHomographs(); | |||||
} | |||||
if (homographs_list == NULL) { | |||||
return false; | |||||
} | |||||
for (int i = 0; i < homographs_count; i++) { | |||||
if (strcmp(word, homographs_list[i]) == 0) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
static void LoadStopwords(void) | |||||
{ | |||||
FILE *f; | |||||
char path[256]; | |||||
char line[256]; | |||||
int count = 0; | |||||
int i = 0; | |||||
// Get the path to stopwords.dat | |||||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cstopwords.dat", path_home, PATHSEP, PATHSEP); | |||||
f = fopen("/content/espeak-ng/espeak-ng-data/stopwords.dat", "r"); | |||||
if (f == NULL) { | |||||
fprintf(stderr, "Failed to open stopwords.dat\n"); | |||||
return; | |||||
} | |||||
// First count the number of lines | |||||
while (fgets(line, sizeof(line), f) != NULL) { | |||||
count++; | |||||
} | |||||
// Allocate memory for the list | |||||
stopwords_list = (char **)malloc(count * sizeof(char *)); | |||||
if (stopwords_list == NULL) { | |||||
fclose(f); | |||||
return; | |||||
} | |||||
// Rewind and read the file | |||||
rewind(f); | |||||
while (fgets(line, sizeof(line), f) != NULL) { | |||||
// Remove newline | |||||
line[strcspn(line, "\n")] = 0; | |||||
stopwords_list[i] = strdup(line); | |||||
if (stopwords_list[i] == NULL) { | |||||
// Cleanup on error | |||||
for (int j = 0; j < i; j++) { | |||||
free(stopwords_list[j]); | |||||
} | |||||
free(stopwords_list); | |||||
stopwords_list = NULL; | |||||
fclose(f); | |||||
return; | |||||
} | |||||
i++; | |||||
} | |||||
stopwords_count = count; | |||||
fclose(f); | |||||
} | |||||
static bool IsStopword(const char *word) | |||||
{ | |||||
if (stopwords_list == NULL) { | |||||
LoadStopwords(); | |||||
} | |||||
if (stopwords_list == NULL) { | |||||
return false; | |||||
} | |||||
for (int i = 0; i < stopwords_count; i++) { | |||||
if (strcmp(word, stopwords_list[i]) == 0) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
// Function to generate phonemes for homograph words | |||||
static void GenerateHomographPhonemes(const char *word, char *phonemes, WORD_TAB words[], char sbuf[], int word_count) { | |||||
if (homograph_data == NULL) { | |||||
LoadHomographData(); | |||||
} | |||||
// fprintf(stderr, "\n=== Homograph Processing ===\n"); | |||||
// fprintf(stderr, "Looking up word: '%s'\n", word); | |||||
// Get the word data from homograph dictionary | |||||
json_object *word_data = json_object_object_get(homograph_data, word); | |||||
if (word_data == NULL) { | |||||
fprintf(stderr, "Word not found in homograph dictionary\n"); | |||||
// Word not found in homograph dictionary, use word's characters as phonemes | |||||
int i = 0; | |||||
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||||
phonemes[i] = PhonemeCode(word[i]); | |||||
i++; | |||||
} | |||||
phonemes[i] = 0; | |||||
return; | |||||
} | |||||
// fprintf(stderr, "\nFound homograph '%s' with pronunciations:\n", word); | |||||
struct json_object_iterator it = json_object_iter_begin(word_data); | |||||
struct json_object_iterator itEnd = json_object_iter_end(word_data); | |||||
while (!json_object_iter_equal(&it, &itEnd)) { | |||||
const char *debug_pron_key = json_object_iter_peek_name(&it); | |||||
json_object *debug_pron_val = json_object_iter_peek_value(&it); | |||||
// fprintf(stderr, " Pronunciation: %s\n", debug_pron_key); | |||||
// fprintf(stderr, " Context words: "); | |||||
if (json_object_get_type(debug_pron_val) == json_type_array) { | |||||
int array_len = json_object_array_length(debug_pron_val); | |||||
for (int i = 0; i < array_len; i++) { | |||||
json_object *item = json_object_array_get_idx(debug_pron_val, i); | |||||
// fprintf(stderr, "%s ", json_object_get_string(item)); | |||||
} | |||||
} | |||||
fprintf(stderr, "\n"); | |||||
json_object_iter_next(&it); | |||||
} | |||||
fprintf(stderr, "\n"); | |||||
// Count context word frequencies | |||||
int context_counts[256] = {0}; // Assuming max 256 unique context words | |||||
char *context_words[256] = {0}; | |||||
int num_context_words = 0; | |||||
// fprintf(stderr, "\n=== Context Words ===\n"); | |||||
// Process context words | |||||
for (int i = 0; i < word_count; i++) { | |||||
char word_copy[150]; | |||||
int word_len = 0; | |||||
char *pw = &sbuf[words[i].start]; | |||||
// Extract the word | |||||
while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||||
word_copy[word_len] = pw[word_len]; | |||||
word_len++; | |||||
} | |||||
word_copy[word_len] = 0; | |||||
// Skip if it's the target word, too short, or a stopword | |||||
if (word_len <= 1 || strcmp(word_copy, word) == 0 || IsStopword(word_copy)) { | |||||
continue; | |||||
} | |||||
// fprintf(stderr, "Word %d: '%s'\n", i + 1, word_copy); | |||||
// Check if we've seen this word before | |||||
int found = 0; | |||||
for (int j = 0; j < num_context_words; j++) { | |||||
if (strcmp(context_words[j], word_copy) == 0) { | |||||
context_counts[j]++; | |||||
found = 1; | |||||
break; | |||||
} | |||||
} | |||||
// Add new word if not found | |||||
if (!found && num_context_words < 255) { | |||||
context_words[num_context_words] = strdup(word_copy); | |||||
if (context_words[num_context_words] == NULL) { | |||||
fprintf(stderr, "Warning: Failed to allocate memory for context word\n"); | |||||
continue; | |||||
} | |||||
context_counts[num_context_words] = 1; | |||||
num_context_words++; | |||||
} | |||||
} | |||||
// Find best pronunciation | |||||
const char *best_phoneme = NULL; | |||||
double max_normalized_score = -1; | |||||
int max_raw_overlap = 0; | |||||
// fprintf(stderr, "\nEvaluating pronunciations:\n"); | |||||
// Iterate through each pronunciation option | |||||
json_object_object_foreach(word_data, pron_key, pron_val) { | |||||
if (json_object_get_type(pron_val) != json_type_array) { | |||||
continue; | |||||
} | |||||
// fprintf(stderr, "\nEvaluating pronunciation: %s\n", pron_key); | |||||
// Count word frequencies in this pronunciation's associated words | |||||
int phoneme_word_counts[256] = {0}; | |||||
int total_phoneme_words = 0; | |||||
int array_len = json_object_array_length(pron_val); | |||||
for (int i = 0; i < array_len; i++) { | |||||
json_object *item = json_object_array_get_idx(pron_val, i); | |||||
if (item == NULL) { | |||||
fprintf(stderr, " Warning: NULL item at index %d\n", i); | |||||
continue; | |||||
} | |||||
const char *assoc_word = json_object_get_string(item); | |||||
if (assoc_word == NULL) { | |||||
fprintf(stderr, " Warning: NULL string at index %d\n", i); | |||||
continue; | |||||
} | |||||
// fprintf(stderr, " Processing associated word[%d]: '%s' (length: %zu)\n", | |||||
// i, assoc_word, strlen(assoc_word)); | |||||
// Count occurrences of this associated word | |||||
for (int j = 0; j < num_context_words; j++) { | |||||
if (context_words[j] == NULL) { | |||||
fprintf(stderr, " Warning: NULL context word at index %d\n", j); | |||||
continue; | |||||
} | |||||
// fprintf(stderr, " Comparing with context word[%d]: '%s' (length: %zu)\n", | |||||
// j, context_words[j], strlen(context_words[j])); | |||||
if (strcmp(context_words[j], assoc_word) == 0) { | |||||
phoneme_word_counts[j]++; | |||||
// fprintf(stderr, " Match found! New count for word '%s': %d\n", | |||||
// context_words[j], phoneme_word_counts[j]); | |||||
} | |||||
} | |||||
total_phoneme_words++; | |||||
} | |||||
// Calculate weighted overlap | |||||
int weighted_overlap = 0; | |||||
// fprintf(stderr, " Calculating weighted overlap:\n"); | |||||
for (int i = 0; i < num_context_words; i++) { | |||||
if (context_words[i] == NULL) continue; | |||||
weighted_overlap += context_counts[i] * phoneme_word_counts[i]; | |||||
// fprintf(stderr, " Word '%s': count=%d, matches=%d, contribution=%d\n", | |||||
// context_words[i], context_counts[i], phoneme_word_counts[i], | |||||
// context_counts[i] * phoneme_word_counts[i]); | |||||
} | |||||
// fprintf(stderr, " Total weighted overlap: %d\n", weighted_overlap); | |||||
// Calculate normalized score | |||||
double normalized_score = (total_phoneme_words > 0) ? | |||||
(double)weighted_overlap / total_phoneme_words : 0.0; | |||||
// fprintf(stderr, " Raw overlap: %d\n", weighted_overlap); | |||||
// fprintf(stderr, " Normalized score: %.2f\n", normalized_score); | |||||
// Select best phoneme | |||||
if (normalized_score > max_normalized_score) { | |||||
max_normalized_score = normalized_score; | |||||
max_raw_overlap = weighted_overlap; | |||||
best_phoneme = pron_key; | |||||
// fprintf(stderr, " New best pronunciation selected!\n"); | |||||
} else if (normalized_score == max_normalized_score) { | |||||
// Tiebreaker: prefer the phoneme with higher raw overlap | |||||
if (weighted_overlap > max_raw_overlap) { | |||||
max_raw_overlap = weighted_overlap; | |||||
best_phoneme = pron_key; | |||||
// fprintf(stderr, " New best pronunciation selected (tiebreaker)!\n"); | |||||
} | |||||
} | |||||
} | |||||
// fprintf(stderr, "\nFinal selection:\n"); | |||||
// fprintf(stderr, "Selected pronunciation: %s\n", best_phoneme ? best_phoneme : "none"); | |||||
// fprintf(stderr, "Final score: %.2f\n", max_normalized_score); | |||||
// fprintf(stderr, "Final raw overlap: %d\n", max_raw_overlap); | |||||
// Copy the best phoneme to output | |||||
if (best_phoneme != NULL) { | |||||
int i = 0; | |||||
while (best_phoneme[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||||
phonemes[i] = PhonemeCode(best_phoneme[i]); | |||||
i++; | |||||
} | |||||
phonemes[i] = 0; | |||||
} else { | |||||
// No suitable pronunciation found, use word's characters as phonemes | |||||
int i = 0; | |||||
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||||
phonemes[i] = PhonemeCode(word[i]); | |||||
i++; | |||||
} | |||||
phonemes[i] = 0; | |||||
} | |||||
} | |||||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | ||||
static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | ||||
static int Unpronouncable(Translator *tr, char *word, int posn); | static int Unpronouncable(Translator *tr, char *word, int posn); | ||||
static int Unpronouncable2(Translator *tr, char *word); | static int Unpronouncable2(Translator *tr, char *word); | ||||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | |||||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count) | |||||
{ | { | ||||
// word1 is terminated by space (0x20) character | // word1 is terminated by space (0x20) character | ||||
int add_suffix_phonemes = 0; | int add_suffix_phonemes = 0; | ||||
WORD_TAB wtab_null[8]; | WORD_TAB wtab_null[8]; | ||||
// Debug print the word being processed | |||||
char wordbuf[120]; | |||||
unsigned int ix2; | |||||
for (ix2 = 0; ((c_temp = word_start[ix2]) != ' ') && (c_temp != 0) && (ix2 < (sizeof(wordbuf)-1)); ix2++) | |||||
wordbuf[ix2] = c_temp; | |||||
wordbuf[ix2] = 0; | |||||
// fprintf(stderr, "Processing word: '%s'\n", wordbuf); | |||||
// Add debug print header and print all words before processing | |||||
// fprintf(stderr, "\n=== Context Words ===\n"); | |||||
// for (ix = 0; ix < word_count; ix++) { | |||||
// char word_copy[150]; | |||||
// int word_len = 0; | |||||
// char *pw = &sbuf[words[ix].start]; | |||||
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||||
// word_copy[word_len] = pw[word_len]; | |||||
// word_len++; | |||||
// } | |||||
// word_copy[word_len] = 0; | |||||
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy); | |||||
// } | |||||
// Check if the word is a homograph | |||||
if (IsHomograph(wordbuf)) { | |||||
GenerateHomographPhonemes(wordbuf, word_phonemes, words, sbuf, word_count); | |||||
return dictionary_flags[0]; // Return early with current dictionary flags | |||||
} | |||||
if (wtab == NULL) { | if (wtab == NULL) { | ||||
memset(wtab_null, 0, sizeof(wtab_null)); | memset(wtab_null, 0, sizeof(wtab_null)); | ||||
wtab = wtab_null; | wtab = wtab_null; | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces | return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces | ||||
return 0; | return 0; | ||||
} | } | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
if (wflags & FLAG_TRANSLATOR2) | if (wflags & FLAG_TRANSLATOR2) | ||||
return 0; | return 0; | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { | if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { | ||||
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) | if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) | ||||
return 0; | return 0; | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | memcpy(wordx, word_copy, strlen(word_copy)); | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | memcpy(wordx, word_copy, strlen(word_copy)); | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | memcpy(wordx, word_copy, strlen(word_copy)); | ||||
wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
non_initial = true; | non_initial = true; | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return NULL; | return NULL; | ||||
} | } |
{ | { | ||||
#endif | #endif | ||||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes); | |||||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } |