Set LOPT_APOSTROPHE bit 1 for Hebrew to ensure final apostrophes are treated as part of the word. This is relevant for words like בָּגָאז׳ or חָגָ׳ג׳, where the apostrophe represents a phonemic element and should not be ignored during tokenization or phoneme conversion. Replacement rules will change ׳ (Hebrew GERESH = U+05F3) to ' (standard apostrophe)

2 months ago · 890e946c0f
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
@@ -66,7 +66,7 @@ static const ALPHABET alphabets[] = {
 	{ "_el",    OFFSET_GREEK,    0x380, 0x3ff,  L('e', 'l'), AL_DONT_NAME | AL_NOT_LETTERS | AL_WORDS },
 	{ "_cyr",   OFFSET_CYRILLIC, 0x400, 0x52f,  0, 0 },
 	{ "_hy",    OFFSET_ARMENIAN, 0x530, 0x58f,  L('h', 'y'), AL_WORDS },
 	{ "_he",    OFFSET_HEBREW,   0x590, 0x5ff,  0, 0 },
 	{ "_he",    OFFSET_HEBREW,   0x590, 0x5ff,  L('h', 'e'), 0 },
 	{ "_ar",    OFFSET_ARABIC,   0x600, 0x6ff,  0, 0 },
 	{ "_syc",   OFFSET_SYRIAC,   0x700, 0x74f,  0, 0 },
 	{ "_hi",    OFFSET_DEVANAGARI, 0x900, 0x97f, L('h', 'i'), AL_WORDS },
@@ -868,6 +868,11 @@ Translator *SelectTranslator(const char *name)
        tr->langopts.ideographs = 1;
    }
        break;
 	case L('h','e'): // Hebrew
 	{
 		tr->langopts.param[LOPT_APOSTROPHE] = 2; // bit 1  Apostrophe at end of word is part of the word, for words like בָּגָאז׳
 	}
 		break;
 	case L('g', 'a'): // irish
 	case L('g', 'd'): // scots gaelic
 	{