@@ -140,12 +140,12 @@ espeak_ng_include_HEADERS = \ | |||
lib_LTLIBRARIES += src/libespeak-ng.la | |||
src_libespeak_ng_la_LDFLAGS = -version-info $(SHARED_VERSION) -lpthread -lm \ | |||
${PCAUDIOLIB_LIBS} | |||
${PCAUDIOLIB_LIBS} ${JSONC_LIBS} | |||
src_libespeak_ng_la_CFLAGS = \ | |||
-fPIC -fvisibility=hidden \ | |||
-pedantic -fno-exceptions -DPATH_ESPEAK_DATA=\"$(DATADIR)\" -DLIBESPEAK_NG_EXPORT \ | |||
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${AM_CFLAGS} | |||
${PCAUDIOLIB_CFLAGS} ${SONIC_CFLAGS} ${JSONC_CFLAGS} ${AM_CFLAGS} | |||
src_libespeak_ng_la_SOURCES = \ | |||
src/ucd-tools/src/case.c \ |
@@ -224,6 +224,33 @@ AC_CHECK_FUNCS([strerror]) | |||
AC_CHECK_FUNCS([strrchr]) | |||
AC_CHECK_FUNCS([strstr]) | |||
dnl ================================================================ | |||
dnl JSON-C library checks. | |||
dnl ================================================================ | |||
AC_CHECK_HEADERS([json-c/json.h], | |||
[ | |||
have_jsonc=yes | |||
JSONC_CFLAGS=-DUSE_JSONC=1 | |||
AC_CHECK_LIB([json-c], [json_object_from_file], | |||
[ | |||
JSONC_LIBS=-ljson-c | |||
], | |||
[ | |||
have_jsonc=no | |||
AC_MSG_ERROR([json-c library is required for homograph support]) | |||
] | |||
) | |||
], | |||
[ | |||
have_jsonc=no | |||
AC_MSG_ERROR([json-c library is required for homograph support]) | |||
] | |||
) | |||
AC_SUBST(JSONC_CFLAGS) | |||
AC_SUBST(JSONC_LIBS) | |||
dnl ================================================================ | |||
dnl PCAudioLib checks. | |||
dnl ================================================================ |
@@ -0,0 +1,290 @@ | |||
رو | |||
ترک | |||
دور | |||
فک | |||
راند | |||
دم | |||
ترکان | |||
گرد | |||
جلد | |||
پر | |||
گردان | |||
عمر | |||
حل | |||
رد | |||
رود | |||
کن | |||
هل | |||
اره | |||
سرو | |||
تن | |||
کش | |||
رس | |||
کشت | |||
نقل | |||
رم | |||
گرداند | |||
کل | |||
سر | |||
گنج | |||
تو | |||
برنج | |||
قل | |||
کرد | |||
پی | |||
رب | |||
تاباند | |||
شو | |||
شکر | |||
شست | |||
خرد | |||
برند | |||
اعمال | |||
کرم | |||
برنده | |||
دوره | |||
نشست | |||
هری | |||
جنب | |||
ملا | |||
بعد | |||
شش | |||
طبق | |||
مد | |||
بری | |||
برم | |||
شوید | |||
چسباند | |||
تست | |||
روی | |||
ولو | |||
خواند | |||
سمت | |||
کنده | |||
گنگ | |||
بکن | |||
درهم | |||
شناساند | |||
یاس | |||
معبر | |||
مهم | |||
مردم | |||
خوراند | |||
جست | |||
بنا | |||
ترکاند | |||
ملاک | |||
پلاس | |||
فوت | |||
کت | |||
برده | |||
گله | |||
بده | |||
چرخاند | |||
کندی | |||
خلق | |||
چک | |||
بخار | |||
نزد | |||
چپاند | |||
اجر | |||
شان | |||
رسم | |||
شنو | |||
خواباند | |||
نرم | |||
رساند | |||
لمباند | |||
میدانی | |||
دود | |||
دواند | |||
جدا | |||
معین | |||
رسی | |||
جو | |||
فرانک | |||
هلیم | |||
بردار | |||
شرف | |||
کشند | |||
گزید | |||
مقدم | |||
زهره | |||
سری | |||
کیف | |||
کشیم | |||
پژمرد | |||
مجاز | |||
لرزاند | |||
نشاند | |||
نشسته | |||
جرم | |||
خورد | |||
علم | |||
جوشاند | |||
برگرداند | |||
سوزاند | |||
بره | |||
امین | |||
لب | |||
اه | |||
آرم | |||
گنجاند | |||
قطعه | |||
فراری | |||
لغز | |||
تنگ | |||
عرضه | |||
بدو | |||
صفر | |||
المان | |||
برید | |||
فرق | |||
محقق | |||
پیچاند | |||
پرورد | |||
کشتی | |||
مسلم | |||
روم | |||
پرش | |||
نصاب | |||
تکه | |||
جوید | |||
پراند | |||
وسطی | |||
کارد | |||
سپرد | |||
افشاند | |||
مراجع | |||
خرم | |||
کرات | |||
نمونه | |||
سند | |||
کند | |||
بر | |||
سنت | |||
شبه | |||
ابر | |||
اپل | |||
درد | |||
خیر | |||
خاراند | |||
ابی | |||
مبدل | |||
مو | |||
جنگ | |||
سرم | |||
قدم | |||
مایل | |||
اسکی | |||
نسبی | |||
گذراند | |||
بخور | |||
صرف | |||
قسم | |||
گرده | |||
مبلغ | |||
حسن | |||
کره | |||
عود | |||
چشم | |||
خلف | |||
حقه | |||
شل | |||
ادبا | |||
برس | |||
گرم | |||
سحر | |||
سنی | |||
ببر | |||
فهماند | |||
اوا | |||
اشکال | |||
بین | |||
سرور | |||
مفصل | |||
فرار | |||
سبک | |||
تپاند | |||
سیر | |||
پهن | |||
مهر | |||
درک | |||
چلاند | |||
رنجاند | |||
مشرف | |||
کمی | |||
فن | |||
مقطع | |||
پوشاند | |||
عرفا | |||
بدهی | |||
شنود | |||
قطر | |||
شهرت | |||
سپر | |||
رحم | |||
حلال | |||
دوران | |||
ترساند | |||
پروراند | |||
رویه | |||
آورد | |||
کلفت | |||
تکاند | |||
گریاند | |||
کابل | |||
شهره | |||
رهاند | |||
دین | |||
عقبی | |||
لنگ | |||
مصر | |||
پرس | |||
منکر | |||
قمری | |||
امل | |||
خفت | |||
سمبل | |||
کنه | |||
عالم | |||
bedah (فعل) | |||
تند | |||
محرم | |||
عرق | |||
خیساند | |||
لیلی | |||
کشاند | |||
یمن | |||
شدید | |||
قوت | |||
اشراف | |||
نبرد | |||
کر | |||
شما | |||
گریم | |||
شوم | |||
جور | |||
خنداند | |||
اشغال | |||
علی | |||
مسکن | |||
مثل | |||
نفس | |||
ارایه | |||
دوم | |||
دهم | |||
سپرم | |||
نکن | |||
ده | |||
ماند | |||
به | |||
گل | |||
ور | |||
مرد | |||
پست | |||
کنف | |||
qet؟e | |||
شبه | |||
شوم | |||
persian word |
@@ -2,4 +2,5 @@ name Russian (Classic) | |||
language ru-cl | |||
replace 03 a a# | |||
dict_min 20000 | |||
speed 95 | |||
dictrules 3 |
@@ -0,0 +1,389 @@ | |||
و | |||
در | |||
به | |||
از | |||
که | |||
این | |||
را | |||
با | |||
است | |||
برای | |||
آن | |||
یک | |||
خود | |||
تا | |||
کرد | |||
بر | |||
هم | |||
نیز | |||
گفت | |||
میشود | |||
وی | |||
شد | |||
دارد | |||
ما | |||
اما | |||
یا | |||
شده | |||
باید | |||
هر | |||
آنها | |||
بود | |||
او | |||
دیگر | |||
دو | |||
مورد | |||
میکند | |||
شود | |||
کند | |||
وجود | |||
بین | |||
پیش | |||
شده_است | |||
پس | |||
نظر | |||
اگر | |||
همه | |||
یکی | |||
حال | |||
هستند | |||
من | |||
کنند | |||
نیست | |||
باشد | |||
چه | |||
بی | |||
می | |||
بخش | |||
میکنند | |||
همین | |||
افزود | |||
هایی | |||
دارند | |||
راه | |||
همچنین | |||
روی | |||
داد | |||
بیشتر | |||
بسیار | |||
سه | |||
داشت | |||
چند | |||
سوی | |||
تنها | |||
هیچ | |||
میان | |||
اینکه | |||
شدن | |||
بعد | |||
جدید | |||
ولی | |||
حتی | |||
کردن | |||
برخی | |||
کردند | |||
میدهد | |||
اول | |||
نه | |||
کرده_است | |||
نسبت | |||
بیش | |||
شما | |||
چنین | |||
طور | |||
افراد | |||
تمام | |||
درباره | |||
بار | |||
بسیاری | |||
میتواند | |||
کرده | |||
چون | |||
ندارد | |||
دوم | |||
بزرگ | |||
طی | |||
حدود | |||
همان | |||
بدون | |||
البته | |||
آنان | |||
میگوید | |||
دیگری | |||
خواهد_شد | |||
کنیم | |||
قابل | |||
یعنی | |||
رشد | |||
میتوان | |||
وارد | |||
کل | |||
ویژه | |||
قبل | |||
براساس | |||
نیاز | |||
گذاری | |||
هنوز | |||
لازم | |||
سازی | |||
بوده_است | |||
چرا | |||
میشوند | |||
وقتی | |||
گرفت | |||
کم | |||
جای | |||
حالی | |||
تغییر | |||
پیدا | |||
اکنون | |||
تحت | |||
باعث | |||
مدت | |||
فقط | |||
زیادی | |||
تعداد | |||
آیا | |||
بیان | |||
رو | |||
شدند | |||
عدم | |||
کرده_اند | |||
بودن | |||
نوع | |||
بلکه | |||
جاری | |||
دهد | |||
برابر | |||
مهم | |||
بوده | |||
اخیر | |||
مربوط | |||
امر | |||
زیر | |||
گیری | |||
شاید | |||
خصوص | |||
آقای | |||
اثر | |||
کننده | |||
بودند | |||
فکر | |||
کنار | |||
اولین | |||
سوم | |||
سایر | |||
کنید | |||
ضمن | |||
مانند | |||
باز | |||
میگیرد | |||
ممکن | |||
حل | |||
دارای | |||
پی | |||
مثل | |||
میرسد | |||
اجرا | |||
دور | |||
منظور | |||
کسی | |||
موجب | |||
طول | |||
امکان | |||
آنچه | |||
تعیین | |||
گفته | |||
شوند | |||
جمع | |||
خیلی | |||
علاوه | |||
گونه | |||
تاکنون | |||
رسید | |||
ساله | |||
گرفته | |||
شده_اند | |||
علت | |||
چهار | |||
داشته_باشد | |||
خواهد_بود | |||
طرف | |||
تهیه | |||
تبدیل | |||
مناسب | |||
زیرا | |||
مشخص | |||
میتوانند | |||
نزدیک | |||
جریان | |||
روند | |||
بنابراین | |||
میدهند | |||
یافت | |||
نخستین | |||
بالا | |||
پنج | |||
ریزی | |||
عالی | |||
چیزی | |||
نخست | |||
بیشتری | |||
ترتیب | |||
شده_بود | |||
خاص | |||
خوبی | |||
خوب | |||
شروع | |||
فرد | |||
کامل | |||
غیر | |||
میرود | |||
دهند | |||
آخرین | |||
دادن | |||
جدی | |||
بهترین | |||
شامل | |||
گیرد | |||
بخشی | |||
باشند | |||
تمامی | |||
بهتر | |||
داده_است | |||
حد | |||
نبود | |||
کسانی | |||
میکرد | |||
داریم | |||
علیه | |||
میباشد | |||
دانست | |||
ناشی | |||
داشتند | |||
دهه | |||
میشد | |||
ایشان | |||
آنجا | |||
گرفته_است | |||
دچار | |||
میآید | |||
لحاظ | |||
آنکه | |||
داده | |||
بعضی | |||
هستیم | |||
اند | |||
برداری | |||
نباید | |||
میکنیم | |||
نشست | |||
سهم | |||
همیشه | |||
آمد | |||
اش | |||
وگو | |||
میکنم | |||
حداقل | |||
طبق | |||
جا | |||
خواهد_کرد | |||
نوعی | |||
چگونه | |||
رفت | |||
هنگام | |||
فوق | |||
روش | |||
ندارند | |||
سعی | |||
بندی | |||
شمار | |||
کلی | |||
کافی | |||
مواجه | |||
همچنان | |||
زیاد | |||
سمت | |||
کوچک | |||
داشته_است | |||
چیز | |||
پشت | |||
آورد | |||
حالا | |||
روبه | |||
سالهای | |||
دادند | |||
میکردند | |||
عهده | |||
نیمه | |||
جایی | |||
دیگران | |||
سی | |||
بروز | |||
یکدیگر | |||
آمده_است | |||
جز | |||
کنم | |||
سپس | |||
کنندگان | |||
خودش | |||
همواره | |||
یافته | |||
شان | |||
صرف | |||
نمیشود | |||
رسیدن | |||
چهارم | |||
یابد | |||
متر | |||
ساز | |||
داشته | |||
کرده_بود | |||
باره | |||
نحوه | |||
کردم | |||
تو | |||
شخصی | |||
داشته_باشند | |||
محسوب | |||
پخش | |||
کمی | |||
متفاوت | |||
سراسر | |||
کاملا | |||
داشتن | |||
نظیر | |||
آمده | |||
گروهی | |||
فردی | |||
ع | |||
همچون | |||
خطر | |||
خویش | |||
کدام | |||
دسته | |||
سبب | |||
عین | |||
آوری | |||
متاسفانه | |||
بیرون | |||
دار | |||
ابتدا | |||
شش | |||
افرادی | |||
میگویند | |||
سالهای | |||
درون | |||
نیستند | |||
یافته_است | |||
پر | |||
خاطرنشان | |||
گاه | |||
جمعی | |||
اغلب | |||
دوباره | |||
مییابد | |||
لذا | |||
زاده | |||
گردد | |||
اینجا |
@@ -314,6 +314,8 @@ int main(int argc, char **argv) | |||
static const struct option long_options[] = { | |||
{ "help", no_argument, 0, 'h' }, | |||
{ "stdin", no_argument, 0, 0x100 }, | |||
{ "input", required_argument, 0, 0x113 }, | |||
{ "output", required_argument, 0, 0x114 }, | |||
{ "compile-debug", optional_argument, 0, 0x101 }, | |||
{ "compile", optional_argument, 0, 0x102 }, | |||
{ "punct", optional_argument, 0, 0x103 }, | |||
@@ -339,8 +341,10 @@ int main(int argc, char **argv) | |||
FILE *f_text = NULL; | |||
char *p_text = NULL; | |||
FILE *f_phonemes_out = stdout; | |||
FILE *f_phonemes_out = NULL; | |||
char *data_path = NULL; // use default path for espeak-ng-data | |||
char input_file[256] = {0}; | |||
char output_file[256] = {0}; // Add output file name variable | |||
int option_index = 0; | |||
int c; | |||
@@ -593,6 +597,12 @@ int main(int argc, char **argv) | |||
case 0x112: // --ssml-break | |||
ssml_break = atoi(optarg2); | |||
break; | |||
case 0x113: // --input | |||
strncpy0(input_file, optarg2, sizeof(input_file)); | |||
break; | |||
case 0x114: // --output | |||
strncpy0(output_file, optarg2, sizeof(output_file)); | |||
break; | |||
default: | |||
exit(0); | |||
} | |||
@@ -689,9 +699,54 @@ int main(int argc, char **argv) | |||
if (option_punctuation == 2) | |||
espeak_SetPunctuationList(option_punctlist); | |||
// Open output file for phoneme output, first overwrite to clear previous content | |||
if (output_file[0] == 0) { | |||
strcpy(output_file, "output.txt"); // Default output file if none specified | |||
} | |||
if ((f_phonemes_out = fopen(output_file, "w")) == NULL) { | |||
fprintf(stderr, "Can't write to output file: %s\n", output_file); | |||
exit(EXIT_FAILURE); | |||
} | |||
fclose(f_phonemes_out); // Close after clearing | |||
f_phonemes_out = NULL; // Set to NULL after closing | |||
espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); | |||
if (filename[0] == 0) { | |||
// Process input file if specified | |||
if (input_file[0] != 0) { | |||
f_text = fopen(input_file, "r"); | |||
if (f_text == NULL) { | |||
fprintf(stderr, "Can't open input file: %s\n", input_file); | |||
exit(EXIT_FAILURE); | |||
} | |||
char line[1000]; | |||
while (fgets(line, sizeof(line), f_text) != NULL) { | |||
// Remove trailing newline | |||
line[strcspn(line, "\n")] = 0; | |||
// Process each line independently | |||
if (line[0] != 0) { // Skip empty lines | |||
// Reopen output file in append mode for each line | |||
if (f_phonemes_out != NULL) { | |||
fclose(f_phonemes_out); | |||
f_phonemes_out = NULL; | |||
} | |||
if ((f_phonemes_out = fopen(output_file, "a")) == NULL) { | |||
fprintf(stderr, "Can't append to output file: %s\n", output_file); | |||
exit(EXIT_FAILURE); | |||
} | |||
espeak_SetPhonemeTrace(phoneme_options | (phonemes_separator << 8), f_phonemes_out); | |||
espeak_Synth(line, strlen(line)+1, 0, POS_CHARACTER, 0, synth_flags, NULL, NULL); | |||
espeak_ng_Synchronize(); | |||
fclose(f_phonemes_out); | |||
f_phonemes_out = NULL; | |||
} | |||
} | |||
fclose(f_text); | |||
} else if (filename[0] == 0) { | |||
if ((optind < argc) && (flag_stdin == 0)) { | |||
// there's a non-option parameter, and no -f or --stdin | |||
// use it as text | |||
@@ -788,9 +843,6 @@ int main(int argc, char **argv) | |||
exit(EXIT_FAILURE); | |||
} | |||
if (f_phonemes_out != stdout) | |||
fclose(f_phonemes_out); | |||
CloseWavFile(); | |||
espeak_ng_Terminate(); | |||
return 0; |
@@ -566,7 +566,7 @@ static int compile_line(CompileContext *ctx, char *linebuf, char *dict_line, int | |||
// PROBLEM vowel reductions are not applied to the translated phonemes | |||
// condition rules are not applied | |||
TranslateWord(translator, phonetic, NULL, NULL); | |||
TranslateWord(translator, phonetic, NULL, NULL, NULL, NULL, 0); | |||
text_not_phonemes = false; | |||
strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4); | |||
@@ -2882,7 +2882,7 @@ int Lookup(Translator *tr, const char *word, char *ph_out) | |||
text[1] = ' '; | |||
text[2] = ' '; | |||
strncpy0(text+3, word1, sizeof(text)-3); | |||
flags0 = TranslateWord(tr, text+3, NULL, NULL); | |||
flags0 = TranslateWord(tr, text+3, NULL, NULL, NULL, NULL, 0); | |||
strcpy(ph_out, word_phonemes); | |||
option_sayas = say_as; | |||
} |
@@ -715,7 +715,7 @@ static int CheckDotOrdinal(Translator *tr, char *word, char *word_end, WORD_TAB | |||
// lang=hu don't treat dot as ordinal indicator if the next word is a month name ($alt). It may have a suffix. | |||
nextflags = 0; | |||
if (IsAlpha(c2)) | |||
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL); | |||
nextflags = TranslateWord(tr, &word_end[2], NULL, NULL, NULL, NULL, 0); | |||
if ((tr->prev_dict_flags[0] & FLAG_ALT_TRANS) && ((c2 == 0) || (wtab[0].flags & FLAG_COMMA_AFTER) || iswdigit(c2))) | |||
ordinal = 0; // TEST 09.02.10 |
@@ -163,7 +163,7 @@ void SetSpeed(int control) | |||
if (control & 2) | |||
DoSonicSpeed(1 * 1024); | |||
if ((wpm_value > espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) { | |||
if ((wpm_value >= espeakRATE_MAXIMUM) || ((wpm_value > speed.fast_settings) && (wpm > 350))) { | |||
int wpm2; | |||
wpm2 = wpm; | |||
wpm = espeakRATE_NORMAL; |
@@ -66,7 +66,7 @@ static const ALPHABET alphabets[] = { | |||
{ "_el", OFFSET_GREEK, 0x380, 0x3ff, L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS }, | |||
{ "_cyr", OFFSET_CYRILLIC, 0x400, 0x52f, 0, 0 }, | |||
{ "_hy", OFFSET_ARMENIAN, 0x530, 0x58f, L('h', 'y'), AL_WORDS }, | |||
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, L('h', 'e'), 0 }, | |||
{ "_he", OFFSET_HEBREW, 0x590, 0x5ff, 0, 0 }, | |||
{ "_ar", OFFSET_ARABIC, 0x600, 0x6ff, 0, 0 }, | |||
{ "_syc", OFFSET_SYRIAC, 0x700, 0x74f, 0, 0 }, | |||
{ "_hi", OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS }, | |||
@@ -868,13 +868,6 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.ideographs = 1; | |||
} | |||
break; | |||
case L('h','e'): // Hebrew | |||
{ | |||
tr->langopts.param[LOPT_APOSTROPHE] = 2; // bit 1 Apostrophe at end of word is part of the word, for words like בָּגָאז׳ | |||
tr->langopts.stress_flags = S_NO_AUTO_2; // don't use secondary stress | |||
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DFRACTION_2 | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND; | |||
} | |||
break; | |||
case L('g', 'a'): // irish | |||
case L('g', 'd'): // scots gaelic | |||
{ |
@@ -46,7 +46,7 @@ | |||
#include "translateword.h" | |||
static int CalcWordLength(int source_index, int charix_top, short int *charix, WORD_TAB *words, int word_count); | |||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes); | |||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count); | |||
static void SwitchLanguage(char *word, char *word_phonemes); | |||
Translator *translator = NULL; // the main translator | |||
@@ -141,13 +141,13 @@ char *strchr_w(const char *s, int c) | |||
return strchr((char *)s, c); // (char *) is needed for Borland compiler | |||
} | |||
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out) | |||
int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, WORD_TAB words[], char sbuf[], int word_count) | |||
{ | |||
char words_phonemes[N_WORD_PHONEMES]; // a word translated into phoneme codes | |||
char *phonemes = words_phonemes; | |||
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||
int flags = TranslateWord3(tr, word_start, wtab, word_out, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count); | |||
if (flags & FLAG_TEXTMODE && word_out) { | |||
// Ensure that start of word rules match with the replaced text, | |||
// so that emoji and other characters are pronounced correctly. | |||
@@ -173,7 +173,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
// However, dictionary_skipwords value is still needed outside this scope. | |||
// So we backup and restore it at the end of this scope. | |||
int skipwords = dictionary_skipwords; | |||
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||
TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes), words, sbuf, word_count); | |||
int n; | |||
if (first_word) { | |||
@@ -186,7 +186,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
available -= n; | |||
phonemes += n; | |||
// skip to the next word in a multi-word replacement. Always skip at least one word. | |||
// skip to the next word in a multi-word rplacement. Always skip at least one word. | |||
for (dictionary_skipwords++; dictionary_skipwords > 0; dictionary_skipwords--) { | |||
while (!isspace(*word_out)) ++word_out; | |||
while (isspace(*word_out)) ++word_out; | |||
@@ -299,7 +299,7 @@ int SetTranslator3(const char *new_language) | |||
return SetAlternateTranslator(new_language, &translator3, translator3_language); | |||
} | |||
static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause) | |||
static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pause, WORD_TAB words [], char sbuf[], int word_count) | |||
{ | |||
int flags = 0; | |||
int stress; | |||
@@ -401,7 +401,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
word_copy_len = ix; | |||
word_replaced[2] = 0; | |||
flags = TranslateWord(translator, word, wtab, &word_replaced[2]); | |||
flags = TranslateWord(translator, word, wtab, &word_replaced[2], words, sbuf, word_count); | |||
if (flags & FLAG_SPELLWORD) { | |||
// re-translate the word as individual letters, separated by spaces | |||
@@ -410,7 +410,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
} | |||
if ((flags & FLAG_COMBINE) && !(wtab[1].flags & FLAG_PHONEMES)) { | |||
CombineFlag(tr, wtab, word, &flags, p, word_phonemes); | |||
CombineFlag(tr, wtab, word, &flags, p, word_phonemes, words, sbuf, word_count); | |||
} | |||
if (p[0] == phonSWITCH) { | |||
@@ -433,9 +433,9 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
if (word_replaced[2] != 0) { | |||
word_replaced[0] = 0; // byte before the start of the word | |||
word_replaced[1] = ' '; | |||
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL); | |||
flags = TranslateWord(translator2, &word_replaced[1], wtab, NULL, words, sbuf, word_count); | |||
} else | |||
flags = TranslateWord(translator2, word, wtab, &word_replaced[2]); | |||
flags = TranslateWord(translator2, word, wtab, &word_replaced[2], words, sbuf, word_count); | |||
} | |||
if (p[0] != phonSWITCH) | |||
@@ -1489,6 +1489,20 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c | |||
} | |||
words[0].flags |= FLAG_FIRST_WORD; | |||
// Add debug print header and print all words before processing | |||
// fprintf(stderr, "\n=== Words in sentence ===\n"); | |||
// for (ix = 0; ix < word_count; ix++) { | |||
// char word_copy[150]; | |||
// int word_len = 0; | |||
// char *pw = &sbuf[words[ix].start]; | |||
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||
// word_copy[word_len] = pw[word_len]; | |||
// word_len++; | |||
// } | |||
// word_copy[word_len] = 0; | |||
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy); | |||
// } | |||
// Each TranslateWord2 may require up to 7 phonemes | |||
// and after this loop we require 2 phonemes | |||
for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) { | |||
@@ -1590,7 +1604,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c | |||
for (pw = &number_buf[3]; pw < pn && nw < N_CLAUSE_WORDS;) { | |||
// keep wflags for each part, for FLAG_HYPHEN_AFTER | |||
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause); | |||
dict_flags = TranslateWord2(tr, pw, &num_wtab[nw++], words[ix].pre_pause, words, sbuf, word_count); | |||
while (pw < pn && *pw++ != ' ') | |||
; | |||
words[ix].pre_pause = 0; | |||
@@ -1598,7 +1612,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c | |||
} else { | |||
pre_pause = 0; | |||
dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause); | |||
dict_flags = TranslateWord2(tr, word, &words[ix], words[ix].pre_pause, words, sbuf, word_count); | |||
if (pre_pause > words[ix+1].pre_pause) { | |||
words[ix+1].pre_pause = pre_pause; | |||
@@ -1612,7 +1626,7 @@ void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_c | |||
memset(number_buf+1, ' ', 9); | |||
nx = utf8_in(&c_temp, pw); | |||
memcpy(&number_buf[3], pw, nx); | |||
TranslateWord2(tr, &number_buf[3], &words[ix], 0); | |||
TranslateWord2(tr, &number_buf[3], &words[ix], 0, words, sbuf, word_count); | |||
pw += nx; | |||
} | |||
} | |||
@@ -1696,7 +1710,7 @@ static int CalcWordLength(int source_index, int charix_top, short int *charix, W | |||
return k; | |||
} | |||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes) { | |||
static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, unsigned char *p, char *word_phonemes, WORD_TAB words[], char sbuf[], int word_count) { | |||
// combine a preposition with the following word | |||
@@ -1724,7 +1738,7 @@ static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, | |||
char ph_buf[N_WORD_PHONEMES]; | |||
strcpy(ph_buf, word_phonemes); | |||
flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL); | |||
flags2[0] = TranslateWord(tr, p2+1, wtab+1, NULL, words, sbuf, word_count); | |||
if ((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH)) | |||
ok = false; | |||
@@ -1745,11 +1759,11 @@ static void CombineFlag(Translator *tr, WORD_TAB *wtab, char *word, int *flags, | |||
if (ok) { | |||
*p2 = '-'; // replace next space by hyphen | |||
wtab[0].flags &= ~FLAG_ALL_UPPER; // prevent it being considered an abbreviation | |||
*flags = TranslateWord(translator, word, wtab, NULL); // translate the combined word | |||
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); // translate the combined word | |||
if ((sylimit > 0) && (CountSyllables(p) > (sylimit & 0x1f))) { | |||
// revert to separate words | |||
*p2 = ' '; | |||
*flags = TranslateWord(translator, word, wtab, NULL); | |||
*flags = TranslateWord(translator, word, wtab, NULL, words, sbuf, word_count); | |||
} else { | |||
if (*flags == 0) | |||
*flags = flags2[0]; // no flags for the combined word, so use flags from the second word eg. lang-hu "nem december 7-e" |
@@ -667,7 +667,7 @@ void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts); | |||
void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | |||
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | |||
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out, WORD_TAB *words, char sbuf [], int word_count); | |||
void TranslateClause(Translator *tr, int *tone, char **voice_change); | |||
void TranslateClauseWithTerminator(Translator *tr, int *tone_out, char **voice_change, int *terminator_out); | |||
@@ -1,4 +1,3 @@ | |||
/* | |||
* Copyright (C) 2005 to 2014 by Jonathan Duddington | |||
* email: [email protected] | |||
@@ -43,7 +42,402 @@ | |||
#include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable | |||
#include "ucd/ucd.h" // for ucd_toupper | |||
#include "voice.h" // for voice, voice_t | |||
#include "speech.h" // for path_home | |||
// Add JSON parsing headers | |||
#include <json-c/json.h> | |||
// Global variables for homographs | |||
static char **homographs_list = NULL; | |||
static int homographs_count = 0; | |||
static json_object *homograph_data = NULL; | |||
// Add these near the other global variables at the top | |||
static char **stopwords_list = NULL; | |||
static int stopwords_count = 0; | |||
static void LoadHomographData(void) | |||
{ | |||
char path[256]; | |||
json_object *root = NULL; | |||
// Get the path to dataset.json | |||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cdataset.json", path_home, PATHSEP, PATHSEP); | |||
root = json_object_from_file("/content/espeak-ng/espeak-ng-data/dataset.json"); | |||
if (root == NULL) { | |||
fprintf(stderr, "Failed to load homograph data from %s\n", path); | |||
return; | |||
} | |||
homograph_data = root; | |||
// Print a sample entry for debugging | |||
// json_object *sample_word = json_object_object_get(root, "read"); | |||
// if (sample_word != NULL) { | |||
// fprintf(stderr, "Sample homograph data for 'read':\n"); | |||
// json_object_object_foreach(sample_word, key, val) { | |||
// fprintf(stderr, " Pronunciation: %s\n", key); | |||
// fprintf(stderr, " Context words: "); | |||
// if (json_object_get_type(val) == json_type_array) { | |||
// int array_len = json_object_array_length(val); | |||
// for (int i = 0; i < array_len; i++) { | |||
// json_object *item = json_object_array_get_idx(val, i); | |||
// fprintf(stderr, "%s ", json_object_get_string(item)); | |||
// } | |||
// } | |||
// fprintf(stderr, "\n"); | |||
// } | |||
// } | |||
} | |||
static void LoadHomographs(void) | |||
{ | |||
FILE *f; | |||
char path[256]; | |||
char line[256]; | |||
int count = 0; | |||
int i = 0; | |||
// Get the path to homographs.txt | |||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%chomographs.txt", path_home, PATHSEP, PATHSEP); | |||
f = fopen("/content/espeak-ng/espeak-ng-data/homographs.txt", "r"); | |||
if (f == NULL) { | |||
fprintf(stderr, "Failed to open homographs.txt\n"); | |||
return; | |||
} | |||
// First count the number of lines | |||
while (fgets(line, sizeof(line), f) != NULL) { | |||
count++; | |||
} | |||
// Allocate memory for the list | |||
homographs_list = (char **)malloc(count * sizeof(char *)); | |||
if (homographs_list == NULL) { | |||
fclose(f); | |||
return; | |||
} | |||
// Rewind and read the file | |||
rewind(f); | |||
while (fgets(line, sizeof(line), f) != NULL) { | |||
// Remove newline | |||
line[strcspn(line, "\n")] = 0; | |||
homographs_list[i] = strdup(line); | |||
if (homographs_list[i] == NULL) { | |||
// Cleanup on error | |||
for (int j = 0; j < i; j++) { | |||
free(homographs_list[j]); | |||
} | |||
free(homographs_list); | |||
homographs_list = NULL; | |||
fclose(f); | |||
return; | |||
} | |||
i++; | |||
} | |||
homographs_count = count; | |||
fclose(f); | |||
} | |||
static bool IsHomograph(const char *word) | |||
{ | |||
if (homographs_list == NULL) { | |||
LoadHomographs(); | |||
} | |||
if (homographs_list == NULL) { | |||
return false; | |||
} | |||
for (int i = 0; i < homographs_count; i++) { | |||
if (strcmp(word, homographs_list[i]) == 0) { | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
static void LoadStopwords(void) | |||
{ | |||
FILE *f; | |||
char path[256]; | |||
char line[256]; | |||
int count = 0; | |||
int i = 0; | |||
// Get the path to stopwords.dat | |||
snprintf(path, sizeof(path), "%s%cespeak-ng-data%cstopwords.dat", path_home, PATHSEP, PATHSEP); | |||
f = fopen("/content/espeak-ng/espeak-ng-data/stopwords.dat", "r"); | |||
if (f == NULL) { | |||
fprintf(stderr, "Failed to open stopwords.dat\n"); | |||
return; | |||
} | |||
// First count the number of lines | |||
while (fgets(line, sizeof(line), f) != NULL) { | |||
count++; | |||
} | |||
// Allocate memory for the list | |||
stopwords_list = (char **)malloc(count * sizeof(char *)); | |||
if (stopwords_list == NULL) { | |||
fclose(f); | |||
return; | |||
} | |||
// Rewind and read the file | |||
rewind(f); | |||
while (fgets(line, sizeof(line), f) != NULL) { | |||
// Remove newline | |||
line[strcspn(line, "\n")] = 0; | |||
stopwords_list[i] = strdup(line); | |||
if (stopwords_list[i] == NULL) { | |||
// Cleanup on error | |||
for (int j = 0; j < i; j++) { | |||
free(stopwords_list[j]); | |||
} | |||
free(stopwords_list); | |||
stopwords_list = NULL; | |||
fclose(f); | |||
return; | |||
} | |||
i++; | |||
} | |||
stopwords_count = count; | |||
fclose(f); | |||
} | |||
static bool IsStopword(const char *word) | |||
{ | |||
if (stopwords_list == NULL) { | |||
LoadStopwords(); | |||
} | |||
if (stopwords_list == NULL) { | |||
return false; | |||
} | |||
for (int i = 0; i < stopwords_count; i++) { | |||
if (strcmp(word, stopwords_list[i]) == 0) { | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
// Function to generate phonemes for homograph words | |||
static void GenerateHomographPhonemes(const char *word, char *phonemes, WORD_TAB words[], char sbuf[], int word_count) { | |||
if (homograph_data == NULL) { | |||
LoadHomographData(); | |||
} | |||
// fprintf(stderr, "\n=== Homograph Processing ===\n"); | |||
// fprintf(stderr, "Looking up word: '%s'\n", word); | |||
// Get the word data from homograph dictionary | |||
json_object *word_data = json_object_object_get(homograph_data, word); | |||
if (word_data == NULL) { | |||
fprintf(stderr, "Word not found in homograph dictionary\n"); | |||
// Word not found in homograph dictionary, use word's characters as phonemes | |||
int i = 0; | |||
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||
phonemes[i] = PhonemeCode(word[i]); | |||
i++; | |||
} | |||
phonemes[i] = 0; | |||
return; | |||
} | |||
// fprintf(stderr, "\nFound homograph '%s' with pronunciations:\n", word); | |||
struct json_object_iterator it = json_object_iter_begin(word_data); | |||
struct json_object_iterator itEnd = json_object_iter_end(word_data); | |||
while (!json_object_iter_equal(&it, &itEnd)) { | |||
const char *debug_pron_key = json_object_iter_peek_name(&it); | |||
json_object *debug_pron_val = json_object_iter_peek_value(&it); | |||
// fprintf(stderr, " Pronunciation: %s\n", debug_pron_key); | |||
// fprintf(stderr, " Context words: "); | |||
if (json_object_get_type(debug_pron_val) == json_type_array) { | |||
int array_len = json_object_array_length(debug_pron_val); | |||
for (int i = 0; i < array_len; i++) { | |||
json_object *item = json_object_array_get_idx(debug_pron_val, i); | |||
// fprintf(stderr, "%s ", json_object_get_string(item)); | |||
} | |||
} | |||
fprintf(stderr, "\n"); | |||
json_object_iter_next(&it); | |||
} | |||
fprintf(stderr, "\n"); | |||
// Count context word frequencies | |||
int context_counts[256] = {0}; // Assuming max 256 unique context words | |||
char *context_words[256] = {0}; | |||
int num_context_words = 0; | |||
// fprintf(stderr, "\n=== Context Words ===\n"); | |||
// Process context words | |||
for (int i = 0; i < word_count; i++) { | |||
char word_copy[150]; | |||
int word_len = 0; | |||
char *pw = &sbuf[words[i].start]; | |||
// Extract the word | |||
while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||
word_copy[word_len] = pw[word_len]; | |||
word_len++; | |||
} | |||
word_copy[word_len] = 0; | |||
// Skip if it's the target word, too short, or a stopword | |||
if (word_len <= 1 || strcmp(word_copy, word) == 0 || IsStopword(word_copy)) { | |||
continue; | |||
} | |||
// fprintf(stderr, "Word %d: '%s'\n", i + 1, word_copy); | |||
// Check if we've seen this word before | |||
int found = 0; | |||
for (int j = 0; j < num_context_words; j++) { | |||
if (strcmp(context_words[j], word_copy) == 0) { | |||
context_counts[j]++; | |||
found = 1; | |||
break; | |||
} | |||
} | |||
// Add new word if not found | |||
if (!found && num_context_words < 255) { | |||
context_words[num_context_words] = strdup(word_copy); | |||
if (context_words[num_context_words] == NULL) { | |||
fprintf(stderr, "Warning: Failed to allocate memory for context word\n"); | |||
continue; | |||
} | |||
context_counts[num_context_words] = 1; | |||
num_context_words++; | |||
} | |||
} | |||
// Find best pronunciation | |||
const char *best_phoneme = NULL; | |||
double max_normalized_score = -1; | |||
int max_raw_overlap = 0; | |||
// fprintf(stderr, "\nEvaluating pronunciations:\n"); | |||
// Iterate through each pronunciation option | |||
json_object_object_foreach(word_data, pron_key, pron_val) { | |||
if (json_object_get_type(pron_val) != json_type_array) { | |||
continue; | |||
} | |||
// fprintf(stderr, "\nEvaluating pronunciation: %s\n", pron_key); | |||
// Count word frequencies in this pronunciation's associated words | |||
int phoneme_word_counts[256] = {0}; | |||
int total_phoneme_words = 0; | |||
int array_len = json_object_array_length(pron_val); | |||
for (int i = 0; i < array_len; i++) { | |||
json_object *item = json_object_array_get_idx(pron_val, i); | |||
if (item == NULL) { | |||
fprintf(stderr, " Warning: NULL item at index %d\n", i); | |||
continue; | |||
} | |||
const char *assoc_word = json_object_get_string(item); | |||
if (assoc_word == NULL) { | |||
fprintf(stderr, " Warning: NULL string at index %d\n", i); | |||
continue; | |||
} | |||
// fprintf(stderr, " Processing associated word[%d]: '%s' (length: %zu)\n", | |||
// i, assoc_word, strlen(assoc_word)); | |||
// Count occurrences of this associated word | |||
for (int j = 0; j < num_context_words; j++) { | |||
if (context_words[j] == NULL) { | |||
fprintf(stderr, " Warning: NULL context word at index %d\n", j); | |||
continue; | |||
} | |||
// fprintf(stderr, " Comparing with context word[%d]: '%s' (length: %zu)\n", | |||
// j, context_words[j], strlen(context_words[j])); | |||
if (strcmp(context_words[j], assoc_word) == 0) { | |||
phoneme_word_counts[j]++; | |||
// fprintf(stderr, " Match found! New count for word '%s': %d\n", | |||
// context_words[j], phoneme_word_counts[j]); | |||
} | |||
} | |||
total_phoneme_words++; | |||
} | |||
// Calculate weighted overlap | |||
int weighted_overlap = 0; | |||
// fprintf(stderr, " Calculating weighted overlap:\n"); | |||
for (int i = 0; i < num_context_words; i++) { | |||
if (context_words[i] == NULL) continue; | |||
weighted_overlap += context_counts[i] * phoneme_word_counts[i]; | |||
// fprintf(stderr, " Word '%s': count=%d, matches=%d, contribution=%d\n", | |||
// context_words[i], context_counts[i], phoneme_word_counts[i], | |||
// context_counts[i] * phoneme_word_counts[i]); | |||
} | |||
// fprintf(stderr, " Total weighted overlap: %d\n", weighted_overlap); | |||
// Calculate normalized score | |||
double normalized_score = (total_phoneme_words > 0) ? | |||
(double)weighted_overlap / total_phoneme_words : 0.0; | |||
// fprintf(stderr, " Raw overlap: %d\n", weighted_overlap); | |||
// fprintf(stderr, " Normalized score: %.2f\n", normalized_score); | |||
// Select best phoneme | |||
if (normalized_score > max_normalized_score) { | |||
max_normalized_score = normalized_score; | |||
max_raw_overlap = weighted_overlap; | |||
best_phoneme = pron_key; | |||
// fprintf(stderr, " New best pronunciation selected!\n"); | |||
} else if (normalized_score == max_normalized_score) { | |||
// Tiebreaker: prefer the phoneme with higher raw overlap | |||
if (weighted_overlap > max_raw_overlap) { | |||
max_raw_overlap = weighted_overlap; | |||
best_phoneme = pron_key; | |||
// fprintf(stderr, " New best pronunciation selected (tiebreaker)!\n"); | |||
} | |||
} | |||
} | |||
// fprintf(stderr, "\nFinal selection:\n"); | |||
// fprintf(stderr, "Selected pronunciation: %s\n", best_phoneme ? best_phoneme : "none"); | |||
// fprintf(stderr, "Final score: %.2f\n", max_normalized_score); | |||
// fprintf(stderr, "Final raw overlap: %d\n", max_raw_overlap); | |||
// Copy the best phoneme to output | |||
if (best_phoneme != NULL) { | |||
int i = 0; | |||
while (best_phoneme[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||
phonemes[i] = PhonemeCode(best_phoneme[i]); | |||
i++; | |||
} | |||
phonemes[i] = 0; | |||
} else { | |||
// No suitable pronunciation found, use word's characters as phonemes | |||
int i = 0; | |||
while (word[i] != 0 && i < N_WORD_PHONEMES - 1) { | |||
phonemes[i] = PhonemeCode(word[i]); | |||
i++; | |||
} | |||
phonemes[i] = 0; | |||
} | |||
} | |||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | |||
static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | |||
@@ -55,7 +449,7 @@ static int TranslateLetter(Translator *tr, char *word, char *phonemes, int contr | |||
static int Unpronouncable(Translator *tr, char *word, int posn); | |||
static int Unpronouncable2(Translator *tr, char *word); | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count) | |||
{ | |||
// word1 is terminated by space (0x20) character | |||
@@ -98,6 +492,34 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
int add_suffix_phonemes = 0; | |||
WORD_TAB wtab_null[8]; | |||
// Debug print the word being processed | |||
char wordbuf[120]; | |||
unsigned int ix2; | |||
for (ix2 = 0; ((c_temp = word_start[ix2]) != ' ') && (c_temp != 0) && (ix2 < (sizeof(wordbuf)-1)); ix2++) | |||
wordbuf[ix2] = c_temp; | |||
wordbuf[ix2] = 0; | |||
// fprintf(stderr, "Processing word: '%s'\n", wordbuf); | |||
// Add debug print header and print all words before processing | |||
// fprintf(stderr, "\n=== Context Words ===\n"); | |||
// for (ix = 0; ix < word_count; ix++) { | |||
// char word_copy[150]; | |||
// int word_len = 0; | |||
// char *pw = &sbuf[words[ix].start]; | |||
// while (pw[word_len] != ' ' && pw[word_len] != 0 && word_len < 149) { | |||
// word_copy[word_len] = pw[word_len]; | |||
// word_len++; | |||
// } | |||
// word_copy[word_len] = 0; | |||
// fprintf(stderr, "Word %d: '%s'\n", ix + 1, word_copy); | |||
// } | |||
// Check if the word is a homograph | |||
if (IsHomograph(wordbuf)) { | |||
GenerateHomographPhonemes(wordbuf, word_phonemes, words, sbuf, word_count); | |||
return dictionary_flags[0]; // Return early with current dictionary flags | |||
} | |||
if (wtab == NULL) { | |||
memset(wtab_null, 0, sizeof(wtab_null)); | |||
wtab = wtab_null; | |||
@@ -201,6 +623,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -257,6 +680,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces | |||
return 0; | |||
} | |||
strcpy(word_phonemes, phonemes); | |||
if (wflags & FLAG_TRANSLATOR2) | |||
return 0; | |||
@@ -314,6 +738,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -326,6 +751,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) { | |||
if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL) | |||
return 0; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -430,6 +856,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
wordx[-1] = c_temp; | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -456,6 +883,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -474,6 +902,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
strcpy(word_phonemes, phonemes); | |||
return 0; | |||
} | |||
@@ -512,6 +941,7 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_ | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
memcpy(wordx, word_copy, strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
@@ -763,6 +1193,7 @@ static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, | |||
non_initial = true; | |||
if (phonemes[0] == phonSWITCH) { | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes, phonemes); | |||
return NULL; | |||
} |
@@ -30,7 +30,7 @@ extern "C" | |||
{ | |||
#endif | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes); | |||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes, WORD_TAB words[], char sbuf[], int word_count); | |||
#ifdef __cplusplus | |||
} |