Browse Source

Use the encoding.c tables for 8-bit encodings.

1.  Store the encoding enumeration values in the Translation
    object, instead of the charset table.

2.  Use the encoding.c charset table data instead of the ones
    in translate.c.

3.  Remove the charset language file option -- it is only used
    in the Arabic language file, but is used incorrectly there.

4.  Specify ISO 8859-6 for the 8-bit encoding for Arabic instead
    of UTF-8, so that espeakCHARS_8BIT and espeakCHARS_AUTO work
    correctly for Arabic.
master
Reece H. Dunn 8 years ago
parent
commit
0b0661cef0

+ 0
- 7
docs/voices.md View File

@@ -29,7 +29,6 @@
- [stressAdd](#stressadd)
- [stressAmp](#stressamp)
- [intonation](#intonation)
- [charset](#charset)
- [dictmin](#dictmin)
- [alphabet2](#alphabet2)

@@ -418,12 +417,6 @@ these defaults may be different for particular languages.
* 3 -- Less intonation, and comma does not raise the pitch.
* 4 -- Pitch rises (rather than falls) at the end of sentence.

### charset

charset <param1>

The ISO 8859 character set number. (not all are implemented).

### dictmin

dictmin <value>

+ 0
- 5
espeak-ng-data/lang/sem/ar View File

@@ -19,8 +19,3 @@ stressAdd 0 0 -10 -10 0 0 10 40
echo 20 10
voicing 75
consonants 150 150

charset UTF-8




+ 25
- 0
src/libespeak-ng/encoding.c View File

@@ -517,6 +517,31 @@ static const uint16_t ISCII[0x80] = {
0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
};

const uint16_t *codepage_tables[] = {
NULL, // unknown
NULL, // ASCII
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_6,
ISO_8859_7,
ISO_8859_8,
ISO_8859_9,
ISO_8859_10,
ISO_8859_11,
// ISO-8859-12 is not a valid encoding.
ISO_8859_13,
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
ISCII,
NULL, // UTF-8
NULL, // UCS-2
};

static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{

+ 2
- 0
src/libespeak-ng/encoding.h View File

@@ -22,6 +22,8 @@ extern "C"
{
#endif

extern const uint16_t *codepage_tables[]; // transitional data table

typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;

espeak_ng_TEXT_DECODER *

+ 5
- 2
src/libespeak-ng/readclause.c View File

@@ -35,6 +35,7 @@
#include <espeak-ng/speak_lib.h>

#include "error.h"
#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
@@ -375,8 +376,10 @@ static int GetC(void)

// 8 bit character set, convert to unicode if
count_characters++;
if (c1 >= 0xa0)
return translator->charset_a0[c1-0xa0];
if (c1 >= 0x80) {
const uint16_t *codepage = codepage_tables[translator->encoding];
return codepage ? codepage[c1 - 0x80] : ' ';
}
return c1;
}


+ 22
- 21
src/libespeak-ng/tr_languages.c View File

@@ -223,7 +223,7 @@ static Translator *NewTranslator(void)
if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL)
return NULL;

tr->charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_1;
dictionary_name[0] = 0;
tr->dictionary_name[0] = 0;
tr->dict_condition = 0;
@@ -380,7 +380,7 @@ static void SetCyrillicLetters(Translator *tr)
static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 };
static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 }; // letter group G (voiced obstruents)
static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 }; // letter group Y (iotated vowels & soft-sign)
tr->charset_a0 = charsets[18]; // KOI8-R
tr->encoding = ESPEAKNG_ENCODING_KOI8_R;
tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x22
tr->transpose_max = 0x451;
tr->transpose_map = NULL;
@@ -491,12 +491,13 @@ Translator *SelectTranslator(const char *name)
tr->letter_bits_offset = OFFSET_ARABIC;
tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
break;
case L('b', 'g'): // Bulgarian
{
SetCyrillicLetters(tr);
SetLetterVowel(tr, 0x2a);
tr->charset_a0 = charsets[5]; // ISO-8859-5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v)
tr->langopts.param[LOPT_REDUCE] = 2;
@@ -553,7 +554,7 @@ Translator *SelectTranslator(const char *name)

SetupTranslator(tr, stress_lengths_cy, stress_amps_cy);

tr->charset_a0 = charsets[14]; // ISO-8859-14
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_14;
tr->langopts.stress_rule = STRESSPOSN_2R;

// 'diminished' is an unstressed final syllable
@@ -638,7 +639,7 @@ Translator *SelectTranslator(const char *name)

SetupTranslator(tr, stress_lengths_el, stress_amps_el);

tr->charset_a0 = charsets[7]; // ISO-8859-7
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_7;
tr->char_plus_apostrophe = el_char_apostrophe;

tr->letter_bits_offset = OFFSET_GREEK;
@@ -673,7 +674,7 @@ Translator *SelectTranslator(const char *name)

SetupTranslator(tr, stress_lengths_eo, stress_amps_eo);

tr->charset_a0 = charsets[3]; // ISO-8859-3
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
tr->char_plus_apostrophe = eo_char_apostrophe;

tr->langopts.vowel_pause = 2;
@@ -769,7 +770,7 @@ Translator *SelectTranslator(const char *name)
}
break;
case L('e', 't'): // Estonian
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
// fallthrough:
case L('f', 'i'): // Finnish
{
@@ -830,7 +831,7 @@ Translator *SelectTranslator(const char *name)
static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 };

SetupTranslator(tr, stress_lengths_hi, stress_amps_hi);
tr->charset_a0 = charsets[19]; // ISCII
tr->encoding = ESPEAKNG_ENCODING_ISCII;
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable

tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
@@ -869,7 +870,7 @@ Translator *SelectTranslator(const char *name)
SetupTranslator(tr, stress_lengths_sr, stress_amps_hr);
else
SetupTranslator(tr, stress_lengths_hr, stress_amps_hr);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.stress_flags = S_FINAL_NO_2;
@@ -898,7 +899,7 @@ Translator *SelectTranslator(const char *name)
static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 };

SetupTranslator(tr, stress_lengths_hu, stress_amps_hu);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

tr->langopts.vowel_pause = 0x20;
tr->langopts.stress_rule = STRESSPOSN_1L;
@@ -1091,7 +1092,7 @@ Translator *SelectTranslator(const char *name)
static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 };

SetupTranslator(tr, stress_lengths_ku, stress_amps_ku);
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;

tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable

@@ -1104,7 +1105,7 @@ Translator *SelectTranslator(const char *name)
break;
case L('l', 'a'): // Latin
{
tr->charset_a0 = charsets[4]; // ISO-8859-4, includes a,e,i,o,u-macron
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; // includes a,e,i,o,u-macron
tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.unstressed_wd1 = 0;
@@ -1116,7 +1117,7 @@ Translator *SelectTranslator(const char *name)
break;
case L('l', 't'): // Lithuanian
{
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.unstressed_wd1 = 0;
@@ -1136,7 +1137,7 @@ Translator *SelectTranslator(const char *name)

tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.spelling_stress = 1;
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT;
tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1;
}
@@ -1152,7 +1153,7 @@ Translator *SelectTranslator(const char *name)
static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 };

SetupTranslator(tr, stress_lengths_mk, stress_amps_mk);
tr->charset_a0 = charsets[5]; // ISO-8859-5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic;
tr->letter_bits_offset = OFFSET_CYRILLIC;

@@ -1163,7 +1164,7 @@ Translator *SelectTranslator(const char *name)
break;
case L('m', 't'): // Maltese
{
tr->charset_a0 = charsets[3]; // ISO-8859-3
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word
tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate
tr->langopts.numbers = 1;
@@ -1215,7 +1216,7 @@ Translator *SelectTranslator(const char *name)

SetupTranslator(tr, stress_lengths_pl, stress_amps_pl);

tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9;
@@ -1256,7 +1257,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.stress_rule = STRESSPOSN_1R;
tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY;

tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
}
@@ -1280,7 +1281,7 @@ Translator *SelectTranslator(const char *name)
static const char *sk_voiced = "bdgjlmnrvwzaeiouy";

SetupTranslator(tr, stress_lengths_sk, stress_amps_sk);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;

tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2;
@@ -1329,7 +1330,7 @@ Translator *SelectTranslator(const char *name)
}
break;
case L('s', 'l'): // Slovenian
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary
tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103;
@@ -1432,7 +1433,7 @@ Translator *SelectTranslator(const char *name)
static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 };

SetupTranslator(tr, stress_lengths_tr, stress_amps_tr);
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;

tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress

+ 0
- 175
src/libespeak-ng/translate.c View File

@@ -112,181 +112,6 @@ static const unsigned short brackets[] = {
// other characters which break a word, but don't produce a pause
static const unsigned short breaks[] = { '_', 0 };

// Translate character codes 0xA0 to 0xFF into their unicode values
// ISO_8859_1 is set as default
static const unsigned short ISO_8859_1[0x60] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // f8
};

static const unsigned short ISO_8859_2[0x60] = {
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, // a0
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, // a8
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, // b0
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, // b8
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, // c0
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, // c8
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, // d0
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, // d8
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, // e0
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, // e8
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, // f0
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, // f8
};

static const unsigned short ISO_8859_3[0x60] = {
0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, // a0
0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, // a8
0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, // b0
0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, // b8
0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, // d0
0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, // f0
0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, // f8
};

static const unsigned short ISO_8859_4[0x60] = {
0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, // a0
0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, // a8
0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, // b0
0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, // b8
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, // c0
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, // c8
0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, // d8
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, // e0
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, // e8
0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8
};

static const unsigned short ISO_8859_5[0x60] = {
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0 Cyrillic
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8
};

static const unsigned short ISO_8859_7[0x60] = {
0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7, // a0 Greek
0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, // b0
0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, // b8
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, // c0
0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, // c8
0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, // d0
0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, // d8
0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, // e0
0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, // e8
0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, // f0
0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, // f8
};

static const unsigned short ISO_8859_9[0x60] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, // f8
};

static const unsigned short ISO_8859_14[0x60] = {
0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, // a0 Welsh
0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, // a8
0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, // b0
0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, // f8
};

static const unsigned short KOI8_R[0x60] = {
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0 Russian
0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8
0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0
0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8
0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0
0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8
0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0
0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8
0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0
0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8
0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0
0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8
};

static const unsigned short ISCII[0x60] = {
0x0020, 0x0901, 0x0902, 0x0903, 0x0905, 0x0906, 0x0907, 0x0908, // a0
0x0909, 0x090a, 0x090b, 0x090e, 0x090f, 0x0910, 0x090d, 0x0912, // a8
0x0913, 0x0914, 0x0911, 0x0915, 0x0916, 0x0917, 0x0918, 0x0919, // b0
0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f, 0x0920, 0x0921, // b8
0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927, 0x0928, 0x0929, // c0
0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f, 0x095f, 0x0930, // c8
0x0931, 0x0932, 0x0933, 0x0934, 0x0935, 0x0936, 0x0937, 0x0938, // d0
0x0939, 0x0020, 0x093e, 0x093f, 0x0940, 0x0941, 0x0942, 0x0943, // d8
0x0946, 0x0947, 0x0948, 0x0945, 0x094a, 0x094b, 0x094c, 0x0949, // e0
0x094d, 0x093c, 0x0964, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, // e8
0x0020, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, // f0
0x0037, 0x0038, 0x0039, 0x20, 0x20, 0x20, 0x20, 0x20, // f8
};

const unsigned short *charsets[N_CHARSETS] = {
ISO_8859_1,
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_1,
ISO_8859_7,
ISO_8859_1,
ISO_8859_9,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
ISO_8859_14,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
KOI8_R, // 18
ISCII
};

// Tables of the relative lengths of vowels, depending on the
// type of the two phonemes that follow
// indexes are the "length_mod" value for the following phonemes

+ 1
- 3
src/libespeak-ng/translate.h View File

@@ -34,7 +34,6 @@ extern "C"

#define N_RULE_GROUP2 120 // max num of two-letter rule chains
#define N_HASH_DICT 1024
#define N_CHARSETS 20
#define N_LETTER_GROUPS 95 // maximum is 127-32

// dictionary flags, word 1
@@ -603,7 +602,7 @@ typedef struct {
short stress_lengths[8];
int dict_condition; // conditional apply some pronunciation rules and dict.lookups
int dict_min_size;
const unsigned short *charset_a0; // unicodes for characters 0xa0 to oxff
espeak_ng_ENCODING encoding;
const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words
const wchar_t *punct_within_word; // allow these punctuation characters within words
const unsigned short *chars_ignore;
@@ -694,7 +693,6 @@ extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];

extern Translator *translator;
extern Translator *translator2;
extern const unsigned short *charsets[N_CHARSETS];
extern char dictionary_name[40];
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
extern unsigned char *p_textinput;

+ 0
- 10
src/libespeak-ng/voices.c View File

@@ -96,7 +96,6 @@ enum {
V_DICTRULES,
V_STRESSRULE,
V_STRESSOPT,
V_CHARSET,
V_NUMBERS,
V_OPTION,

@@ -140,7 +139,6 @@ static MNEM_TAB keyword_tab[] = {
{ "dictrules", V_DICTRULES },
{ "stressrule", V_STRESSRULE },
{ "stressopt", V_STRESSOPT },
{ "charset", V_CHARSET },
{ "replace", V_REPLACE },
{ "words", V_WORDGAP },
{ "echo", V_ECHO },
@@ -766,14 +764,6 @@ voice_t *LoadVoice(const char *vname, int control)
&langopts->unstressed_wd1,
&langopts->unstressed_wd2);
break;
case V_CHARSET:
if ((sscanf(p, "%d", &value) == 1) && (value < N_CHARSETS)) {
if (new_translator != NULL)
new_translator->charset_a0 = charsets[value];
else
fprintf(stderr, "The charset attribute is specified before language.\n");
}
break;
case V_OPTION:
value2 = 0;
if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) ||

Loading…
Cancel
Save