1. Store the encoding enumeration values in the Translation object, instead of the charset table. 2. Use the encoding.c charset table data instead of the ones in translate.c. 3. Remove the charset language file option -- it is only used in the Arabic language file, but is used incorrectly there. 4. Specify ISO 8859-6 for the 8-bit encoding for Arabic instead of UTF-8, so that espeakCHARS_8BIT and espeakCHARS_AUTO work correctly for Arabic.

8 years ago · 0b0661cef0
--- a/docs/voices.md
+++ b/docs/voices.md
@@ -29,7 +29,6 @@
  - [stressAdd](#stressadd)
  - [stressAmp](#stressamp)
  - [intonation](#intonation)
  - [charset](#charset)
  - [dictmin](#dictmin)
  - [alphabet2](#alphabet2)

@@ -418,12 +417,6 @@ these defaults may be different for particular languages.
 * 3 -- Less intonation, and comma does not raise the pitch.  
 * 4 -- Pitch rises (rather than falls) at the end of sentence.

 ### charset

 	charset <param1>

 The ISO 8859 character set number. (not all are implemented).

 ### dictmin

 	dictmin <value>
--- a/espeak-ng-data/lang/sem/ar
+++ b/espeak-ng-data/lang/sem/ar
@@ -19,8 +19,3 @@ stressAdd 0 0 -10 -10 0 0 10 40
 echo 20 10 
 voicing 75 
 consonants 150 150

 charset UTF-8



--- a/src/libespeak-ng/encoding.c
+++ b/src/libespeak-ng/encoding.c
@@ -517,6 +517,31 @@ static const uint16_t ISCII[0x80] = {
 	0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
 };

 const uint16_t *codepage_tables[] = {
 	NULL, // unknown
 	NULL, // ASCII
 	ISO_8859_1,
 	ISO_8859_2,
 	ISO_8859_3,
 	ISO_8859_4,
 	ISO_8859_5,
 	ISO_8859_6,
 	ISO_8859_7,
 	ISO_8859_8,
 	ISO_8859_9,
 	ISO_8859_10,
 	ISO_8859_11,
 	// ISO-8859-12 is not a valid encoding.
 	ISO_8859_13,
 	ISO_8859_14,
 	ISO_8859_15,
 	ISO_8859_16,
 	KOI8_R,
 	ISCII,
 	NULL, // UTF-8
 	NULL, // UCS-2
 };

 static uint32_t
 string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
 {
--- a/src/libespeak-ng/encoding.h
+++ b/src/libespeak-ng/encoding.h
@@ -22,6 +22,8 @@ extern "C"
 {
 #endif

 extern const uint16_t *codepage_tables[]; // transitional data table

 typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;

 espeak_ng_TEXT_DECODER *
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -35,6 +35,7 @@
 #include <espeak-ng/speak_lib.h>

 #include "error.h"
 #include "encoding.h"
 #include "speech.h"
 #include "phoneme.h"
 #include "synthesize.h"
@@ -375,8 +376,10 @@ static int GetC(void)

 	// 8 bit character set, convert to unicode if
 	count_characters++;
 	if (c1 >= 0xa0)
 		return translator->charset_a0[c1-0xa0];
 	if (c1 >= 0x80) {
 		const uint16_t *codepage = codepage_tables[translator->encoding];
 		return codepage ? codepage[c1 - 0x80] : ' ';
 	}
 	return c1;
 }

--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
@@ -223,7 +223,7 @@ static Translator *NewTranslator(void)
 	if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL)
 		return NULL;

 	tr->charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8
 	tr->encoding = ESPEAKNG_ENCODING_ISO_8859_1;
 	dictionary_name[0] = 0;
 	tr->dictionary_name[0] = 0;
 	tr->dict_condition = 0;
@@ -380,7 +380,7 @@ static void SetCyrillicLetters(Translator *tr)
 	static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 };
 	static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 };    // letter group G  (voiced obstruents)
 	static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 };   // letter group Y  (iotated vowels & soft-sign)
 	tr->charset_a0 = charsets[18];   // KOI8-R
 	tr->encoding = ESPEAKNG_ENCODING_KOI8_R;
 	tr->transpose_min = 0x430;  // convert cyrillic from unicode into range 0x01 to 0x22
 	tr->transpose_max = 0x451;
 	tr->transpose_map = NULL;
@@ -491,12 +491,13 @@ Translator *SelectTranslator(const char *name)
 		tr->letter_bits_offset = OFFSET_ARABIC;
 		tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
 		tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
 		break;
 	case L('b', 'g'): // Bulgarian
 	{
 		SetCyrillicLetters(tr);
 		SetLetterVowel(tr, 0x2a);
 		tr->charset_a0 = charsets[5]; // ISO-8859-5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
 		tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v]  don't count this character at start of word
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v)
 		tr->langopts.param[LOPT_REDUCE] = 2;
@@ -553,7 +554,7 @@ Translator *SelectTranslator(const char *name)

 		SetupTranslator(tr, stress_lengths_cy, stress_amps_cy);

 		tr->charset_a0 = charsets[14]; // ISO-8859-14
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_14;
 		tr->langopts.stress_rule = STRESSPOSN_2R;

 		// 'diminished' is an unstressed final syllable
@@ -638,7 +639,7 @@ Translator *SelectTranslator(const char *name)

 		SetupTranslator(tr, stress_lengths_el, stress_amps_el);

 		tr->charset_a0 = charsets[7]; // ISO-8859-7
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_7;
 		tr->char_plus_apostrophe = el_char_apostrophe;

 		tr->letter_bits_offset = OFFSET_GREEK;
@@ -673,7 +674,7 @@ Translator *SelectTranslator(const char *name)

 		SetupTranslator(tr, stress_lengths_eo, stress_amps_eo);

 		tr->charset_a0 = charsets[3]; // ISO-8859-3
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
 		tr->char_plus_apostrophe = eo_char_apostrophe;

 		tr->langopts.vowel_pause = 2;
@@ -769,7 +770,7 @@ Translator *SelectTranslator(const char *name)
 	}
 		break;
 	case L('e', 't'): // Estonian
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		// fallthrough:
 	case L('f', 'i'): // Finnish
 	{
@@ -830,7 +831,7 @@ Translator *SelectTranslator(const char *name)
 		static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 };

 		SetupTranslator(tr, stress_lengths_hi, stress_amps_hi);
 		tr->charset_a0 = charsets[19]; // ISCII
 		tr->encoding = ESPEAKNG_ENCODING_ISCII;
 		tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable

 		tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
@@ -869,7 +870,7 @@ Translator *SelectTranslator(const char *name)
 			SetupTranslator(tr, stress_lengths_sr, stress_amps_hr);
 		else
 			SetupTranslator(tr, stress_lengths_hr, stress_amps_hr);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.stress_flags = S_FINAL_NO_2;
@@ -898,7 +899,7 @@ Translator *SelectTranslator(const char *name)
 		static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 };

 		SetupTranslator(tr, stress_lengths_hu, stress_amps_hu);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

 		tr->langopts.vowel_pause = 0x20;
 		tr->langopts.stress_rule = STRESSPOSN_1L;
@@ -1091,7 +1092,7 @@ Translator *SelectTranslator(const char *name)
 		static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 };

 		SetupTranslator(tr, stress_lengths_ku, stress_amps_ku);
 		tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;

 		tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable

@@ -1104,7 +1105,7 @@ Translator *SelectTranslator(const char *name)
 		break;
 	case L('l', 'a'): // Latin
 	{
 		tr->charset_a0 = charsets[4]; // ISO-8859-4, includes a,e,i,o,u-macron
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; // includes a,e,i,o,u-macron
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.unstressed_wd1 = 0;
@@ -1116,7 +1117,7 @@ Translator *SelectTranslator(const char *name)
 		break;
 	case L('l', 't'): // Lithuanian
 	{
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.unstressed_wd1 = 0;
@@ -1136,7 +1137,7 @@ Translator *SelectTranslator(const char *name)

 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.spelling_stress = 1;
 		tr->charset_a0 = charsets[4]; // ISO-8859-4
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
 		tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT;
 		tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1;
 	}
@@ -1152,7 +1153,7 @@ Translator *SelectTranslator(const char *name)
 		static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 };

 		SetupTranslator(tr, stress_lengths_mk, stress_amps_mk);
 		tr->charset_a0 = charsets[5]; // ISO-8859-5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
 		tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic;
 		tr->letter_bits_offset = OFFSET_CYRILLIC;

@@ -1163,7 +1164,7 @@ Translator *SelectTranslator(const char *name)
 		break;
 	case L('m', 't'): // Maltese
 	{
 		tr->charset_a0 = charsets[3]; // ISO-8859-3
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word
 		tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate
 		tr->langopts.numbers = 1;
@@ -1215,7 +1216,7 @@ Translator *SelectTranslator(const char *name)

 		SetupTranslator(tr, stress_lengths_pl, stress_amps_pl);

 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9;
@@ -1256,7 +1257,7 @@ Translator *SelectTranslator(const char *name)
 		tr->langopts.stress_rule = STRESSPOSN_1R;
 		tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY;

 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN;
 		tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
 	}
@@ -1280,7 +1281,7 @@ Translator *SelectTranslator(const char *name)
 		static const char *sk_voiced = "bdgjlmnrvwzaeiouy";

 		SetupTranslator(tr, stress_lengths_sk, stress_amps_sk);
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

 		tr->langopts.stress_rule = STRESSPOSN_1L;
 		tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2;
@@ -1329,7 +1330,7 @@ Translator *SelectTranslator(const char *name)
 	}
 		break;
 	case L('s', 'l'): // Slovenian
 		tr->charset_a0 = charsets[2]; // ISO-8859-2
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
 		tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary
 		tr->langopts.stress_flags = S_NO_AUTO_2;
 		tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103;
@@ -1432,7 +1433,7 @@ Translator *SelectTranslator(const char *name)
 		static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 };

 		SetupTranslator(tr, stress_lengths_tr, stress_amps_tr);
 		tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
 		tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;

 		tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
 		tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -112,181 +112,6 @@ static const unsigned short brackets[] = {
 // other characters which break a word, but don't produce a pause
 static const unsigned short breaks[] = { '_', 0 };

 // Translate character codes 0xA0 to 0xFF into their unicode values
 // ISO_8859_1 is set as default
 static const unsigned short ISO_8859_1[0x60] = {
 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // f8
 };

 static const unsigned short ISO_8859_2[0x60] = {
 	0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, // a0
 	0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, // a8
 	0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, // b0
 	0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, // b8
 	0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, // c0
 	0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, // c8
 	0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, // d0
 	0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, // d8
 	0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, // e0
 	0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, // e8
 	0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, // f0
 	0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, // f8
 };

 static const unsigned short ISO_8859_3[0x60] = {
 	0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, // a0
 	0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, // a8
 	0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, // b0
 	0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, // d0
 	0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, // f0
 	0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, // f8
 };

 static const unsigned short ISO_8859_4[0x60] = {
 	0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, // a0
 	0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, // a8
 	0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, // b0
 	0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, // b8
 	0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, // c0
 	0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, // c8
 	0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, // d8
 	0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, // e0
 	0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, // e8
 	0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8
 };

 static const unsigned short ISO_8859_5[0x60] = {
 	0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0  Cyrillic
 	0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8
 	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0
 	0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8
 	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0
 	0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8
 	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0
 	0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8
 	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0
 	0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8
 	0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0
 	0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8
 };

 static const unsigned short ISO_8859_7[0x60] = {
 	0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7, // a0  Greek
 	0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, // b0
 	0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, // b8
 	0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, // c0
 	0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, // c8
 	0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, // d0
 	0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, // d8
 	0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, // e0
 	0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, // e8
 	0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, // f0
 	0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, // f8
 };

 static const unsigned short ISO_8859_9[0x60] = {
 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, // f8
 };

 static const unsigned short ISO_8859_14[0x60] = {
 	0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, // a0  Welsh
 	0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, // a8
 	0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, // b0
 	0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, // b8
 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
 	0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, // d0
 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, // d8
 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
 	0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, // f0
 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, // f8
 };

 static const unsigned short KOI8_R[0x60] = {
 	0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0  Russian
 	0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8
 	0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0
 	0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8
 	0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0
 	0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8
 	0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0
 	0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8
 	0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0
 	0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8
 	0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0
 	0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8
 };

 static const unsigned short ISCII[0x60] = {
 	0x0020, 0x0901, 0x0902, 0x0903, 0x0905, 0x0906, 0x0907, 0x0908, // a0
 	0x0909, 0x090a, 0x090b, 0x090e, 0x090f, 0x0910, 0x090d, 0x0912, // a8
 	0x0913, 0x0914, 0x0911, 0x0915, 0x0916, 0x0917, 0x0918, 0x0919, // b0
 	0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f, 0x0920, 0x0921, // b8
 	0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927, 0x0928, 0x0929, // c0
 	0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f, 0x095f, 0x0930, // c8
 	0x0931, 0x0932, 0x0933, 0x0934, 0x0935, 0x0936, 0x0937, 0x0938, // d0
 	0x0939, 0x0020, 0x093e, 0x093f, 0x0940, 0x0941, 0x0942, 0x0943, // d8
 	0x0946, 0x0947, 0x0948, 0x0945, 0x094a, 0x094b, 0x094c, 0x0949, // e0
 	0x094d, 0x093c, 0x0964, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, // e8
 	0x0020, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, // f0
 	0x0037, 0x0038, 0x0039, 0x20,   0x20,   0x20,   0x20,   0x20,   // f8
 };

 const unsigned short *charsets[N_CHARSETS] = {
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_2,
 	ISO_8859_3,
 	ISO_8859_4,
 	ISO_8859_5,
 	ISO_8859_1,
 	ISO_8859_7,
 	ISO_8859_1,
 	ISO_8859_9,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_14,
 	ISO_8859_1,
 	ISO_8859_1,
 	ISO_8859_1,
 	KOI8_R,          // 18
 	ISCII
 };

 // Tables of the relative lengths of vowels, depending on the
 // type of the two phonemes that follow
 // indexes are the "length_mod" value for the following phonemes
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -34,7 +34,6 @@ extern "C"

 #define N_RULE_GROUP2    120 // max num of two-letter rule chains
 #define N_HASH_DICT     1024
 #define N_CHARSETS        20
 #define N_LETTER_GROUPS   95 // maximum is 127-32

 // dictionary flags, word 1
@@ -603,7 +602,7 @@ typedef struct {
 	short stress_lengths[8];
 	int dict_condition;    // conditional apply some pronunciation rules and dict.lookups
 	int dict_min_size;
 	const unsigned short *charset_a0;   // unicodes for characters 0xa0 to oxff
 	espeak_ng_ENCODING encoding;
 	const wchar_t *char_plus_apostrophe;  // single chars + apostrophe treated as words
 	const wchar_t *punct_within_word;   // allow these punctuation characters within words
 	const unsigned short *chars_ignore;
@@ -694,7 +693,6 @@ extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];

 extern Translator *translator;
 extern Translator *translator2;
 extern const unsigned short *charsets[N_CHARSETS];
 extern char dictionary_name[40];
 extern char ctrl_embedded;    // to allow an alternative CTRL for embedded commands
 extern unsigned char *p_textinput;
--- a/src/libespeak-ng/voices.c
+++ b/src/libespeak-ng/voices.c
@@ -96,7 +96,6 @@ enum {
 	V_DICTRULES,
 	V_STRESSRULE,
 	V_STRESSOPT,
 	V_CHARSET,
 	V_NUMBERS,
 	V_OPTION,

@@ -140,7 +139,6 @@ static MNEM_TAB keyword_tab[] = {
 	{ "dictrules",    V_DICTRULES },
 	{ "stressrule",   V_STRESSRULE },
 	{ "stressopt",    V_STRESSOPT },
 	{ "charset",      V_CHARSET },
 	{ "replace",      V_REPLACE },
 	{ "words",        V_WORDGAP },
 	{ "echo",         V_ECHO },
@@ -766,14 +764,6 @@ voice_t *LoadVoice(const char *vname, int control)
 			       &langopts->unstressed_wd1,
 			       &langopts->unstressed_wd2);
 			break;
 		case V_CHARSET:
 			if ((sscanf(p, "%d", &value) == 1) && (value < N_CHARSETS)) {
 				if (new_translator != NULL)
 					new_translator->charset_a0 = charsets[value];
 				else
 					fprintf(stderr, "The charset attribute is specified before language.\n");
 			}
 			break;
 		case V_OPTION:
 			value2 = 0;
 			if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) ||