1. Store the encoding enumeration values in the Translation object, instead of the charset table. 2. Use the encoding.c charset table data instead of the ones in translate.c. 3. Remove the charset language file option -- it is only used in the Arabic language file, but is used incorrectly there. 4. Specify ISO 8859-6 for the 8-bit encoding for Arabic instead of UTF-8, so that espeakCHARS_8BIT and espeakCHARS_AUTO work correctly for Arabic.master
@@ -29,7 +29,6 @@ | |||
- [stressAdd](#stressadd) | |||
- [stressAmp](#stressamp) | |||
- [intonation](#intonation) | |||
- [charset](#charset) | |||
- [dictmin](#dictmin) | |||
- [alphabet2](#alphabet2) | |||
@@ -418,12 +417,6 @@ these defaults may be different for particular languages. | |||
* 3 -- Less intonation, and comma does not raise the pitch. | |||
* 4 -- Pitch rises (rather than falls) at the end of sentence. | |||
### charset | |||
charset <param1> | |||
The ISO 8859 character set number. (not all are implemented). | |||
### dictmin | |||
dictmin <value> |
@@ -19,8 +19,3 @@ stressAdd 0 0 -10 -10 0 0 10 40 | |||
echo 20 10 | |||
voicing 75 | |||
consonants 150 150 | |||
charset UTF-8 | |||
@@ -517,6 +517,31 @@ static const uint16_t ISCII[0x80] = { | |||
0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | |||
}; | |||
const uint16_t *codepage_tables[] = { | |||
NULL, // unknown | |||
NULL, // ASCII | |||
ISO_8859_1, | |||
ISO_8859_2, | |||
ISO_8859_3, | |||
ISO_8859_4, | |||
ISO_8859_5, | |||
ISO_8859_6, | |||
ISO_8859_7, | |||
ISO_8859_8, | |||
ISO_8859_9, | |||
ISO_8859_10, | |||
ISO_8859_11, | |||
// ISO-8859-12 is not a valid encoding. | |||
ISO_8859_13, | |||
ISO_8859_14, | |||
ISO_8859_15, | |||
ISO_8859_16, | |||
KOI8_R, | |||
ISCII, | |||
NULL, // UTF-8 | |||
NULL, // UCS-2 | |||
}; | |||
static uint32_t | |||
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | |||
{ |
@@ -22,6 +22,8 @@ extern "C" | |||
{ | |||
#endif | |||
extern const uint16_t *codepage_tables[]; // transitional data table | |||
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | |||
espeak_ng_TEXT_DECODER * |
@@ -35,6 +35,7 @@ | |||
#include <espeak-ng/speak_lib.h> | |||
#include "error.h" | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" | |||
@@ -375,8 +376,10 @@ static int GetC(void) | |||
// 8 bit character set, convert to unicode if | |||
count_characters++; | |||
if (c1 >= 0xa0) | |||
return translator->charset_a0[c1-0xa0]; | |||
if (c1 >= 0x80) { | |||
const uint16_t *codepage = codepage_tables[translator->encoding]; | |||
return codepage ? codepage[c1 - 0x80] : ' '; | |||
} | |||
return c1; | |||
} | |||
@@ -223,7 +223,7 @@ static Translator *NewTranslator(void) | |||
if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL) | |||
return NULL; | |||
tr->charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_1; | |||
dictionary_name[0] = 0; | |||
tr->dictionary_name[0] = 0; | |||
tr->dict_condition = 0; | |||
@@ -380,7 +380,7 @@ static void SetCyrillicLetters(Translator *tr) | |||
static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 }; | |||
static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 }; // letter group G (voiced obstruents) | |||
static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 }; // letter group Y (iotated vowels & soft-sign) | |||
tr->charset_a0 = charsets[18]; // KOI8-R | |||
tr->encoding = ESPEAKNG_ENCODING_KOI8_R; | |||
tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x22 | |||
tr->transpose_max = 0x451; | |||
tr->transpose_map = NULL; | |||
@@ -491,12 +491,13 @@ Translator *SelectTranslator(const char *name) | |||
tr->letter_bits_offset = OFFSET_ARABIC; | |||
tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND; | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6; | |||
break; | |||
case L('b', 'g'): // Bulgarian | |||
{ | |||
SetCyrillicLetters(tr); | |||
SetLetterVowel(tr, 0x2a); | |||
tr->charset_a0 = charsets[5]; // ISO-8859-5 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5; | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | |||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v) | |||
tr->langopts.param[LOPT_REDUCE] = 2; | |||
@@ -553,7 +554,7 @@ Translator *SelectTranslator(const char *name) | |||
SetupTranslator(tr, stress_lengths_cy, stress_amps_cy); | |||
tr->charset_a0 = charsets[14]; // ISO-8859-14 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_14; | |||
tr->langopts.stress_rule = STRESSPOSN_2R; | |||
// 'diminished' is an unstressed final syllable | |||
@@ -638,7 +639,7 @@ Translator *SelectTranslator(const char *name) | |||
SetupTranslator(tr, stress_lengths_el, stress_amps_el); | |||
tr->charset_a0 = charsets[7]; // ISO-8859-7 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_7; | |||
tr->char_plus_apostrophe = el_char_apostrophe; | |||
tr->letter_bits_offset = OFFSET_GREEK; | |||
@@ -673,7 +674,7 @@ Translator *SelectTranslator(const char *name) | |||
SetupTranslator(tr, stress_lengths_eo, stress_amps_eo); | |||
tr->charset_a0 = charsets[3]; // ISO-8859-3 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3; | |||
tr->char_plus_apostrophe = eo_char_apostrophe; | |||
tr->langopts.vowel_pause = 2; | |||
@@ -769,7 +770,7 @@ Translator *SelectTranslator(const char *name) | |||
} | |||
break; | |||
case L('e', 't'): // Estonian | |||
tr->charset_a0 = charsets[4]; // ISO-8859-4 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; | |||
// fallthrough: | |||
case L('f', 'i'): // Finnish | |||
{ | |||
@@ -830,7 +831,7 @@ Translator *SelectTranslator(const char *name) | |||
static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 }; | |||
SetupTranslator(tr, stress_lengths_hi, stress_amps_hi); | |||
tr->charset_a0 = charsets[19]; // ISCII | |||
tr->encoding = ESPEAKNG_ENCODING_ISCII; | |||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | |||
tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable | |||
@@ -869,7 +870,7 @@ Translator *SelectTranslator(const char *name) | |||
SetupTranslator(tr, stress_lengths_sr, stress_amps_hr); | |||
else | |||
SetupTranslator(tr, stress_lengths_hr, stress_amps_hr); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.stress_rule = STRESSPOSN_1L; | |||
tr->langopts.stress_flags = S_FINAL_NO_2; | |||
@@ -898,7 +899,7 @@ Translator *SelectTranslator(const char *name) | |||
static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 }; | |||
SetupTranslator(tr, stress_lengths_hu, stress_amps_hu); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.vowel_pause = 0x20; | |||
tr->langopts.stress_rule = STRESSPOSN_1L; | |||
@@ -1091,7 +1092,7 @@ Translator *SelectTranslator(const char *name) | |||
static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 }; | |||
SetupTranslator(tr, stress_lengths_ku, stress_amps_ku); | |||
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9; | |||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
@@ -1104,7 +1105,7 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('l', 'a'): // Latin | |||
{ | |||
tr->charset_a0 = charsets[4]; // ISO-8859-4, includes a,e,i,o,u-macron | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; // includes a,e,i,o,u-macron | |||
tr->langopts.stress_rule = STRESSPOSN_2R; | |||
tr->langopts.stress_flags = S_NO_AUTO_2; | |||
tr->langopts.unstressed_wd1 = 0; | |||
@@ -1116,7 +1117,7 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('l', 't'): // Lithuanian | |||
{ | |||
tr->charset_a0 = charsets[4]; // ISO-8859-4 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; | |||
tr->langopts.stress_rule = STRESSPOSN_2R; | |||
tr->langopts.stress_flags = S_NO_AUTO_2; | |||
tr->langopts.unstressed_wd1 = 0; | |||
@@ -1136,7 +1137,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.stress_rule = STRESSPOSN_1L; | |||
tr->langopts.spelling_stress = 1; | |||
tr->charset_a0 = charsets[4]; // ISO-8859-4 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; | |||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT; | |||
tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1; | |||
} | |||
@@ -1152,7 +1153,7 @@ Translator *SelectTranslator(const char *name) | |||
static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 }; | |||
SetupTranslator(tr, stress_lengths_mk, stress_amps_mk); | |||
tr->charset_a0 = charsets[5]; // ISO-8859-5 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5; | |||
tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic; | |||
tr->letter_bits_offset = OFFSET_CYRILLIC; | |||
@@ -1163,7 +1164,7 @@ Translator *SelectTranslator(const char *name) | |||
break; | |||
case L('m', 't'): // Maltese | |||
{ | |||
tr->charset_a0 = charsets[3]; // ISO-8859-3 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3; | |||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word | |||
tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate | |||
tr->langopts.numbers = 1; | |||
@@ -1215,7 +1216,7 @@ Translator *SelectTranslator(const char *name) | |||
SetupTranslator(tr, stress_lengths_pl, stress_amps_pl); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.stress_rule = STRESSPOSN_2R; | |||
tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished | |||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9; | |||
@@ -1256,7 +1257,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.stress_rule = STRESSPOSN_1R; | |||
tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY; | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN; | |||
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | |||
} | |||
@@ -1280,7 +1281,7 @@ Translator *SelectTranslator(const char *name) | |||
static const char *sk_voiced = "bdgjlmnrvwzaeiouy"; | |||
SetupTranslator(tr, stress_lengths_sk, stress_amps_sk); | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.stress_rule = STRESSPOSN_1L; | |||
tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2; | |||
@@ -1329,7 +1330,7 @@ Translator *SelectTranslator(const char *name) | |||
} | |||
break; | |||
case L('s', 'l'): // Slovenian | |||
tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2; | |||
tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary | |||
tr->langopts.stress_flags = S_NO_AUTO_2; | |||
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103; | |||
@@ -1432,7 +1433,7 @@ Translator *SelectTranslator(const char *name) | |||
static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 }; | |||
SetupTranslator(tr, stress_lengths_tr, stress_amps_tr); | |||
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5 | |||
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9; | |||
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable | |||
tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress |
@@ -112,181 +112,6 @@ static const unsigned short brackets[] = { | |||
// other characters which break a word, but don't produce a pause | |||
static const unsigned short breaks[] = { '_', 0 }; | |||
// Translate character codes 0xA0 to 0xFF into their unicode values | |||
// ISO_8859_1 is set as default | |||
static const unsigned short ISO_8859_1[0x60] = { | |||
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0 | |||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8 | |||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0 | |||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8 | |||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0 | |||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8 | |||
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0 | |||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // d8 | |||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0 | |||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8 | |||
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0 | |||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // f8 | |||
}; | |||
static const unsigned short ISO_8859_2[0x60] = { | |||
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, // a0 | |||
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, // a8 | |||
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, // b0 | |||
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, // b8 | |||
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, // c0 | |||
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, // c8 | |||
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, // d0 | |||
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, // d8 | |||
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, // e0 | |||
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, // e8 | |||
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, // f0 | |||
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, // f8 | |||
}; | |||
static const unsigned short ISO_8859_3[0x60] = { | |||
0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, // a0 | |||
0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, // a8 | |||
0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, // b0 | |||
0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, // b8 | |||
0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, // c0 | |||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8 | |||
0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, // d0 | |||
0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, // d8 | |||
0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, // e0 | |||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8 | |||
0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, // f0 | |||
0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, // f8 | |||
}; | |||
static const unsigned short ISO_8859_4[0x60] = { | |||
0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, // a0 | |||
0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, // a8 | |||
0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, // b0 | |||
0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, // b8 | |||
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, // c0 | |||
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, // c8 | |||
0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0 | |||
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, // d8 | |||
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, // e0 | |||
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, // e8 | |||
0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0 | |||
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8 | |||
}; | |||
static const unsigned short ISO_8859_5[0x60] = { | |||
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0 Cyrillic | |||
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8 | |||
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0 | |||
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8 | |||
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0 | |||
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8 | |||
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0 | |||
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8 | |||
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0 | |||
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8 | |||
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0 | |||
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8 | |||
}; | |||
static const unsigned short ISO_8859_7[0x60] = { | |||
0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7, // a0 Greek | |||
0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, // a8 | |||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, // b0 | |||
0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, // b8 | |||
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, // c0 | |||
0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, // c8 | |||
0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, // d0 | |||
0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, // d8 | |||
0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, // e0 | |||
0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, // e8 | |||
0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, // f0 | |||
0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, // f8 | |||
}; | |||
static const unsigned short ISO_8859_9[0x60] = { | |||
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0 | |||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8 | |||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0 | |||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8 | |||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0 | |||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8 | |||
0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0 | |||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, // d8 | |||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0 | |||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8 | |||
0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0 | |||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, // f8 | |||
}; | |||
static const unsigned short ISO_8859_14[0x60] = { | |||
0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, // a0 Welsh | |||
0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, // a8 | |||
0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, // b0 | |||
0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, // b8 | |||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0 | |||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8 | |||
0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, // d0 | |||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, // d8 | |||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0 | |||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8 | |||
0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, // f0 | |||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, // f8 | |||
}; | |||
static const unsigned short KOI8_R[0x60] = { | |||
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0 Russian | |||
0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8 | |||
0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0 | |||
0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8 | |||
0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0 | |||
0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8 | |||
0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0 | |||
0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8 | |||
0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0 | |||
0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8 | |||
0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0 | |||
0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8 | |||
}; | |||
static const unsigned short ISCII[0x60] = { | |||
0x0020, 0x0901, 0x0902, 0x0903, 0x0905, 0x0906, 0x0907, 0x0908, // a0 | |||
0x0909, 0x090a, 0x090b, 0x090e, 0x090f, 0x0910, 0x090d, 0x0912, // a8 | |||
0x0913, 0x0914, 0x0911, 0x0915, 0x0916, 0x0917, 0x0918, 0x0919, // b0 | |||
0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f, 0x0920, 0x0921, // b8 | |||
0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927, 0x0928, 0x0929, // c0 | |||
0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f, 0x095f, 0x0930, // c8 | |||
0x0931, 0x0932, 0x0933, 0x0934, 0x0935, 0x0936, 0x0937, 0x0938, // d0 | |||
0x0939, 0x0020, 0x093e, 0x093f, 0x0940, 0x0941, 0x0942, 0x0943, // d8 | |||
0x0946, 0x0947, 0x0948, 0x0945, 0x094a, 0x094b, 0x094c, 0x0949, // e0 | |||
0x094d, 0x093c, 0x0964, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, // e8 | |||
0x0020, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, // f0 | |||
0x0037, 0x0038, 0x0039, 0x20, 0x20, 0x20, 0x20, 0x20, // f8 | |||
}; | |||
const unsigned short *charsets[N_CHARSETS] = { | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
ISO_8859_2, | |||
ISO_8859_3, | |||
ISO_8859_4, | |||
ISO_8859_5, | |||
ISO_8859_1, | |||
ISO_8859_7, | |||
ISO_8859_1, | |||
ISO_8859_9, | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
ISO_8859_14, | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
ISO_8859_1, | |||
KOI8_R, // 18 | |||
ISCII | |||
}; | |||
// Tables of the relative lengths of vowels, depending on the | |||
// type of the two phonemes that follow | |||
// indexes are the "length_mod" value for the following phonemes |
@@ -34,7 +34,6 @@ extern "C" | |||
#define N_RULE_GROUP2 120 // max num of two-letter rule chains | |||
#define N_HASH_DICT 1024 | |||
#define N_CHARSETS 20 | |||
#define N_LETTER_GROUPS 95 // maximum is 127-32 | |||
// dictionary flags, word 1 | |||
@@ -603,7 +602,7 @@ typedef struct { | |||
short stress_lengths[8]; | |||
int dict_condition; // conditional apply some pronunciation rules and dict.lookups | |||
int dict_min_size; | |||
const unsigned short *charset_a0; // unicodes for characters 0xa0 to oxff | |||
espeak_ng_ENCODING encoding; | |||
const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words | |||
const wchar_t *punct_within_word; // allow these punctuation characters within words | |||
const unsigned short *chars_ignore; | |||
@@ -694,7 +693,6 @@ extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS]; | |||
extern Translator *translator; | |||
extern Translator *translator2; | |||
extern const unsigned short *charsets[N_CHARSETS]; | |||
extern char dictionary_name[40]; | |||
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | |||
extern unsigned char *p_textinput; |
@@ -96,7 +96,6 @@ enum { | |||
V_DICTRULES, | |||
V_STRESSRULE, | |||
V_STRESSOPT, | |||
V_CHARSET, | |||
V_NUMBERS, | |||
V_OPTION, | |||
@@ -140,7 +139,6 @@ static MNEM_TAB keyword_tab[] = { | |||
{ "dictrules", V_DICTRULES }, | |||
{ "stressrule", V_STRESSRULE }, | |||
{ "stressopt", V_STRESSOPT }, | |||
{ "charset", V_CHARSET }, | |||
{ "replace", V_REPLACE }, | |||
{ "words", V_WORDGAP }, | |||
{ "echo", V_ECHO }, | |||
@@ -766,14 +764,6 @@ voice_t *LoadVoice(const char *vname, int control) | |||
&langopts->unstressed_wd1, | |||
&langopts->unstressed_wd2); | |||
break; | |||
case V_CHARSET: | |||
if ((sscanf(p, "%d", &value) == 1) && (value < N_CHARSETS)) { | |||
if (new_translator != NULL) | |||
new_translator->charset_a0 = charsets[value]; | |||
else | |||
fprintf(stderr, "The charset attribute is specified before language.\n"); | |||
} | |||
break; | |||
case V_OPTION: | |||
value2 = 0; | |||
if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) || |