1. Store the encoding enumeration values in the Translation object, instead of the charset table. 2. Use the encoding.c charset table data instead of the ones in translate.c. 3. Remove the charset language file option -- it is only used in the Arabic language file, but is used incorrectly there. 4. Specify ISO 8859-6 for the 8-bit encoding for Arabic instead of UTF-8, so that espeakCHARS_8BIT and espeakCHARS_AUTO work correctly for Arabic.

8 years ago · 0b0661cef0
--- a/docs/voices.md
+++ b/docs/voices.md
  - [stressAdd](#stressadd)
  - [stressAmp](#stressamp)
  - [intonation](#intonation)
  - [charset](#charset)
  - [dictmin](#dictmin)
  - [alphabet2](#alphabet2)
 * 3 -- Less intonation, and comma does not raise the pitch.  
 * 4 -- Pitch rises (rather than falls) at the end of sentence.
 ### charset
 	charset <param1>
 The ISO 8859 character set number. (not all are implemented).
 ### dictmin
 	dictmin <value>
--- a/espeak-ng-data/lang/sem/ar
+++ b/espeak-ng-data/lang/sem/ar
 echo 20 10 
 voicing 75 
 consonants 150 150
 charset UTF-8
--- a/src/libespeak-ng/encoding.c
+++ b/src/libespeak-ng/encoding.c
 	0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
 };
 const uint16_t *codepage_tables[] = {
 	NULL, // unknown
 	NULL, // ASCII
 	ISO_8859_1,
 	ISO_8859_2,
 	ISO_8859_3,
 	ISO_8859_4,
 	ISO_8859_5,
 	ISO_8859_6,
 	ISO_8859_7,
 	ISO_8859_8,
 	ISO_8859_9,
 	ISO_8859_10,
 	ISO_8859_11,
 	// ISO-8859-12 is not a valid encoding.
 	ISO_8859_13,
 	ISO_8859_14,
 	ISO_8859_15,
 	ISO_8859_16,
 	KOI8_R,
 	ISCII,
 	NULL, // UTF-8
 	NULL, // UCS-2
 };
 static uint32_t
 string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
 {
--- a/src/libespeak-ng/encoding.h
+++ b/src/libespeak-ng/encoding.h
 {
 #endif
 extern const uint16_t *codepage_tables[]; // transitional data table
 typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;
 espeak_ng_TEXT_DECODER *
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
 #include <espeak-ng/speak_lib.h>
 #include "error.h"
 #include "encoding.h"
 #include "speech.h"
 #include "phoneme.h"
 #include "synthesize.h"
 	// 8 bit character set, convert to unicode if
 	count_characters++;
 	if (c1 >= 0xa0)
 		return translator->charset_a0[c1-0xa0];
 	if (c1 >= 0x80) {
 		const uint16_t *codepage = codepage_tables[translator->encoding];
 		return codepage ? codepage[c1 - 0x80] : ' ';
 	}
 	return c1;
 }
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
 	if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL)
 		return NULL;
 	tr->charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8
 	tr->encoding = ESPEAKNG_ENCODING_ISO_8859_1;
 	dictionary_name[0] = 0;
 	tr->dictionary_name[0] = 0;
 	tr->dict_condition = 0;
 	static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 };
 	static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 };    // letter group G  (voiced obstruents)
 	static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 };   // letter group Y  (iotated vowels & soft-sign)
 	tr->charset_a0 = charsets[18];   // KOI8-R
 	tr->encoding = ESPEAKNG_ENCODING_KOI8_R;
 	tr->transpose_min = 0x430;  // convert cyrillic from unicode into range 0x01 to 0x22
 	tr->transpose_max = 0x451;
 	tr->transpose_map = NULL;
 		tr->letter_bits_offset = OFFSET_ARABIC;
 		tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
 		tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
 		break;
 	case L('b', 'g'): // Bulgarian
 	{
 		SetCyrillicLetters(tr);
 		SetLetterVowel(tr, 0x2a);
 		tr->charset_a0 = charsets[5]; // ISO-8859-5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
 		tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v]  don't count this character at start of word
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v)
 		tr->langopts.param[LOPT_REDUCE] = 2;
 		SetupTranslator(tr, stress_lengths_cy, stress_amps_cy);
 		tr->charset_a0 = charsets[14]; // ISO-8859-14
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_14;
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		// 'diminished' is an unstressed final syllable
 		SetupTranslator(tr, stress_lengths_el, stress_amps_el);
 		tr->charset_a0 = charsets[7]; // ISO-8859-7
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_7;
 		tr->char_plus_apostrophe = el_char_apostrophe;
 		tr->letter_bits_offset = OFFSET_GREEK;
 		SetupTranslator(tr, stress_lengths_eo, stress_amps_eo);
 		tr->charset_a0 = charsets[3]; // ISO-8859-3
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
 		tr->char_plus_apostrophe = eo_char_apostrophe;
 		tr->langopts.vowel_pause = 2;
 	}
 		break;
 	case L('e', 't'): // Estonian
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		// fallthrough:
 	case L('f', 'i'): // Finnish
 	{
 		static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 };
 		SetupTranslator(tr, stress_lengths_hi, stress_amps_hi);
 		tr->charset_a0 = charsets[19]; // ISCII
 		tr->encoding = ESPEAKNG_ENCODING_ISCII;
 		tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
 		tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
 			SetupTranslator(tr, stress_lengths_sr, stress_amps_hr);
 		else
 			SetupTranslator(tr, stress_lengths_hr, stress_amps_hr);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.stress_flags = S_FINAL_NO_2;
 		static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 };
 		SetupTranslator(tr, stress_lengths_hu, stress_amps_hu);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.vowel_pause = 0x20;
 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 };
 		SetupTranslator(tr, stress_lengths_ku, stress_amps_ku);
 		tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;
 		tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
 		break;
 	case L('l', 'a'): // Latin
 	{
 		tr->charset_a0 = charsets[4]; // ISO-8859-4, includes a,e,i,o,u-macron
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; // includes a,e,i,o,u-macron
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.unstressed_wd1 = 0;
 		break;
 	case L('l', 't'): // Lithuanian
 	{
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.unstressed_wd1 = 0;
 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.spelling_stress = 1;
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT;
 		tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1;
 	}
 		static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 };
 		SetupTranslator(tr, stress_lengths_mk, stress_amps_mk);
 		tr->charset_a0 = charsets[5]; // ISO-8859-5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
 		tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic;
 		tr->letter_bits_offset = OFFSET_CYRILLIC;
 		break;
 	case L('m', 't'): // Maltese
 	{
 		tr->charset_a0 = charsets[3]; // ISO-8859-3
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word
 		tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate
 		tr->langopts.numbers = 1;
 		SetupTranslator(tr, stress_lengths_pl, stress_amps_pl);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9;
 		tr->langopts.stress_rule = STRESSPOSN_1R;
 		tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY;
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN;
 		tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
 	}
 		static const char *sk_voiced = "bdgjlmnrvwzaeiouy";
 		SetupTranslator(tr, stress_lengths_sk, stress_amps_sk);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2;
 	}
 		break;
 	case L('s', 'l'): // Slovenian
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103;
 		static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 };
 		SetupTranslator(tr, stress_lengths_tr, stress_amps_tr);
 		tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;
 		tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
 		tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
 // other characters which break a word, but don't produce a pause
 static const unsigned short breaks[] = { '_', 0 };
 // Translate character codes 0xA0 to 0xFF into their unicode values
 // ISO_8859_1 is set as default
 static const unsigned short ISO_8859_1[0x60] = {
 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // f8
 };
 static const unsigned short ISO_8859_2[0x60] = {
 	0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, // a0
 	0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, // a8
 	0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, // b0
 	0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, // b8
 	0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, // c0
 	0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, // c8
 	0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, // d0
 	0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, // d8
 	0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, // e0
 	0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, // e8
 	0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, // f0
 	0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, // f8
 };
 static const unsigned short ISO_8859_3[0x60] = {
 	0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, // a0
 	0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, // a8
 	0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, // b0
 	0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, // d0
 	0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, // f0
 	0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, // f8
 };
 static const unsigned short ISO_8859_4[0x60] = {
 	0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, // a0
 	0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, // a8
 	0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, // b0
 	0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, // b8
 	0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, // c0
 	0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, // c8
 	0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, // d8
 	0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, // e0
 	0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, // e8
 	0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8
 };
 static const unsigned short ISO_8859_5[0x60] = {
 	0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0  Cyrillic
 	0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8
 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0
 	0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8
 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0
 	0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8
 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0
 	0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8
 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0
 	0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8
 	0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0
 	0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8
 };
 static const unsigned short ISO_8859_7[0x60] = {
 	0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7, // a0  Greek
 	0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, // b0
 	0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, // b8
 	0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, // c0
 	0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, // c8
 	0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, // d0
 	0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, // d8
 	0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, // e0
 	0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, // e8
 	0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, // f0
 	0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, // f8
 };
 static const unsigned short ISO_8859_9[0x60] = {
 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, // f8
 };
 static const unsigned short ISO_8859_14[0x60] = {
 	0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, // a0  Welsh
 	0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, // a8
 	0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, // b0
 	0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, // f8
 };
 static const unsigned short KOI8_R[0x60] = {
 	0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0  Russian
 	0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8
 	0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0
 	0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8
 	0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0
 	0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8
 	0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0
 	0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8
 	0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0
 	0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8
 	0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0
 	0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8
 };
 static const unsigned short ISCII[0x60] = {
 	0x0020, 0x0901, 0x0902, 0x0903, 0x0905, 0x0906, 0x0907, 0x0908, // a0
 	0x0909, 0x090a, 0x090b, 0x090e, 0x090f, 0x0910, 0x090d, 0x0912, // a8
 	0x0913, 0x0914, 0x0911, 0x0915, 0x0916, 0x0917, 0x0918, 0x0919, // b0
 	0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f, 0x0920, 0x0921, // b8
 	0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927, 0x0928, 0x0929, // c0
 	0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f, 0x095f, 0x0930, // c8
 	0x0931, 0x0932, 0x0933, 0x0934, 0x0935, 0x0936, 0x0937, 0x0938, // d0
 	0x0939, 0x0020, 0x093e, 0x093f, 0x0940, 0x0941, 0x0942, 0x0943, // d8
 	0x0946, 0x0947, 0x0948, 0x0945, 0x094a, 0x094b, 0x094c, 0x0949, // e0
 	0x094d, 0x093c, 0x0964, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, // e8
 	0x0020, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, // f0
 	0x0037, 0x0038, 0x0039, 0x20,   0x20,   0x20,   0x20,   0x20,   // f8
 };
 const unsigned short *charsets[N_CHARSETS] = {
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_2,
 	ISO_8859_3,
 	ISO_8859_4,
 	ISO_8859_5,
 	ISO_8859_1,
 	ISO_8859_7,
 	ISO_8859_1,
 	ISO_8859_9,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_14,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	KOI8_R,          // 18
 	ISCII
 };
 // Tables of the relative lengths of vowels, depending on the
 // type of the two phonemes that follow
 // indexes are the "length_mod" value for the following phonemes
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
 #define N_RULE_GROUP2    120 // max num of two-letter rule chains
 #define N_HASH_DICT     1024
 #define N_CHARSETS        20
 #define N_LETTER_GROUPS   95 // maximum is 127-32
 // dictionary flags, word 1
 	short stress_lengths[8];
 	int dict_condition;    // conditional apply some pronunciation rules and dict.lookups
 	int dict_min_size;
 	const unsigned short *charset_a0;   // unicodes for characters 0xa0 to oxff
 	espeak_ng_ENCODING encoding;
 	const wchar_t *char_plus_apostrophe;  // single chars + apostrophe treated as words
 	const wchar_t *punct_within_word;   // allow these punctuation characters within words
 	const unsigned short *chars_ignore;
 extern Translator *translator;
 extern Translator *translator2;
 extern const unsigned short *charsets[N_CHARSETS];
 extern char dictionary_name[40];
 extern char ctrl_embedded;    // to allow an alternative CTRL for embedded commands
 extern unsigned char *p_textinput;
--- a/src/libespeak-ng/voices.c
+++ b/src/libespeak-ng/voices.c
 	V_DICTRULES,
 	V_STRESSRULE,
 	V_STRESSOPT,
 	V_CHARSET,
 	V_NUMBERS,
 	V_OPTION,
 	{ "dictrules",    V_DICTRULES },
 	{ "stressrule",   V_STRESSRULE },
 	{ "stressopt",    V_STRESSOPT },
 	{ "charset",      V_CHARSET },
 	{ "replace",      V_REPLACE },
 	{ "words",        V_WORDGAP },
 	{ "echo",         V_ECHO },
 			       &langopts->unstressed_wd1,
 			       &langopts->unstressed_wd2);
 			break;
 		case V_CHARSET:
 			if ((sscanf(p, "%d", &value) == 1) && (value < N_CHARSETS)) {
 				if (new_translator != NULL)
 					new_translator->charset_a0 = charsets[value];
 				else
 					fprintf(stderr, "The charset attribute is specified before language.\n");
 			}
 			break;
 		case V_OPTION:
 			value2 = 0;
 			if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) ||