Browse Source

Use the encoding.c tables for 8-bit encodings.

1.  Store the encoding enumeration values in the Translation
    object, instead of the charset table.

2.  Use the encoding.c charset table data instead of the ones
    in translate.c.

3.  Remove the charset language file option -- it is only used
    in the Arabic language file, but is used incorrectly there.

4.  Specify ISO 8859-6 for the 8-bit encoding for Arabic instead
    of UTF-8, so that espeakCHARS_8BIT and espeakCHARS_AUTO work
    correctly for Arabic.
master
Reece H. Dunn 8 years ago
parent
commit
0b0661cef0

+ 0
- 7
docs/voices.md View File

- [stressAdd](#stressadd) - [stressAdd](#stressadd)
- [stressAmp](#stressamp) - [stressAmp](#stressamp)
- [intonation](#intonation) - [intonation](#intonation)
- [charset](#charset)
- [dictmin](#dictmin) - [dictmin](#dictmin)
- [alphabet2](#alphabet2) - [alphabet2](#alphabet2)


* 3 -- Less intonation, and comma does not raise the pitch. * 3 -- Less intonation, and comma does not raise the pitch.
* 4 -- Pitch rises (rather than falls) at the end of sentence. * 4 -- Pitch rises (rather than falls) at the end of sentence.


### charset

charset <param1>

The ISO 8859 character set number. (not all are implemented).

### dictmin ### dictmin


dictmin <value> dictmin <value>

+ 0
- 5
espeak-ng-data/lang/sem/ar View File

echo 20 10 echo 20 10
voicing 75 voicing 75
consonants 150 150 consonants 150 150

charset UTF-8




+ 25
- 0
src/libespeak-ng/encoding.c View File

0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
}; };


const uint16_t *codepage_tables[] = {
NULL, // unknown
NULL, // ASCII
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_6,
ISO_8859_7,
ISO_8859_8,
ISO_8859_9,
ISO_8859_10,
ISO_8859_11,
// ISO-8859-12 is not a valid encoding.
ISO_8859_13,
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
ISCII,
NULL, // UTF-8
NULL, // UCS-2
};

static uint32_t static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{ {

+ 2
- 0
src/libespeak-ng/encoding.h View File

{ {
#endif #endif


extern const uint16_t *codepage_tables[]; // transitional data table

typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;


espeak_ng_TEXT_DECODER * espeak_ng_TEXT_DECODER *

+ 5
- 2
src/libespeak-ng/readclause.c View File

#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "error.h" #include "error.h"
#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"


// 8 bit character set, convert to unicode if // 8 bit character set, convert to unicode if
count_characters++; count_characters++;
if (c1 >= 0xa0)
return translator->charset_a0[c1-0xa0];
if (c1 >= 0x80) {
const uint16_t *codepage = codepage_tables[translator->encoding];
return codepage ? codepage[c1 - 0x80] : ' ';
}
return c1; return c1;
} }



+ 22
- 21
src/libespeak-ng/tr_languages.c View File

if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL) if ((tr = (Translator *)malloc(sizeof(Translator))) == NULL)
return NULL; return NULL;


tr->charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_1;
dictionary_name[0] = 0; dictionary_name[0] = 0;
tr->dictionary_name[0] = 0; tr->dictionary_name[0] = 0;
tr->dict_condition = 0; tr->dict_condition = 0;
static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 }; static const char ru_nothard[] = { 0x11, 0x12, 0x13, 0x14, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x27, 0x29, 0x2c, 0 };
static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 }; // letter group G (voiced obstruents) static const char ru_voiced[] = { 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0 }; // letter group G (voiced obstruents)
static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 }; // letter group Y (iotated vowels & soft-sign) static const char ru_ivowels[] = { 0x2c, 0x2e, 0x2f, 0x31, 0 }; // letter group Y (iotated vowels & soft-sign)
tr->charset_a0 = charsets[18]; // KOI8-R
tr->encoding = ESPEAKNG_ENCODING_KOI8_R;
tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x22 tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x22
tr->transpose_max = 0x451; tr->transpose_max = 0x451;
tr->transpose_map = NULL; tr->transpose_map = NULL;
tr->letter_bits_offset = OFFSET_ARABIC; tr->letter_bits_offset = OFFSET_ARABIC;
tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND; tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
break; break;
case L('b', 'g'): // Bulgarian case L('b', 'g'): // Bulgarian
{ {
SetCyrillicLetters(tr); SetCyrillicLetters(tr);
SetLetterVowel(tr, 0x2a); SetLetterVowel(tr, 0x2a);
tr->charset_a0 = charsets[5]; // ISO-8859-5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v) tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x107; // devoice at end of word, and change voicing to match a following consonant (except v)
tr->langopts.param[LOPT_REDUCE] = 2; tr->langopts.param[LOPT_REDUCE] = 2;


SetupTranslator(tr, stress_lengths_cy, stress_amps_cy); SetupTranslator(tr, stress_lengths_cy, stress_amps_cy);


tr->charset_a0 = charsets[14]; // ISO-8859-14
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_14;
tr->langopts.stress_rule = STRESSPOSN_2R; tr->langopts.stress_rule = STRESSPOSN_2R;


// 'diminished' is an unstressed final syllable // 'diminished' is an unstressed final syllable


SetupTranslator(tr, stress_lengths_el, stress_amps_el); SetupTranslator(tr, stress_lengths_el, stress_amps_el);


tr->charset_a0 = charsets[7]; // ISO-8859-7
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_7;
tr->char_plus_apostrophe = el_char_apostrophe; tr->char_plus_apostrophe = el_char_apostrophe;


tr->letter_bits_offset = OFFSET_GREEK; tr->letter_bits_offset = OFFSET_GREEK;


SetupTranslator(tr, stress_lengths_eo, stress_amps_eo); SetupTranslator(tr, stress_lengths_eo, stress_amps_eo);


tr->charset_a0 = charsets[3]; // ISO-8859-3
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
tr->char_plus_apostrophe = eo_char_apostrophe; tr->char_plus_apostrophe = eo_char_apostrophe;


tr->langopts.vowel_pause = 2; tr->langopts.vowel_pause = 2;
} }
break; break;
case L('e', 't'): // Estonian case L('e', 't'): // Estonian
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
// fallthrough: // fallthrough:
case L('f', 'i'): // Finnish case L('f', 'i'): // Finnish
{ {
static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 }; static const unsigned char stress_amps_hi[8] = { 17, 14, 20, 19, 20, 22, 22, 21 };


SetupTranslator(tr, stress_lengths_hi, stress_amps_hi); SetupTranslator(tr, stress_lengths_hi, stress_amps_hi);
tr->charset_a0 = charsets[19]; // ISCII
tr->encoding = ESPEAKNG_ENCODING_ISCII;
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable


tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
SetupTranslator(tr, stress_lengths_sr, stress_amps_hr); SetupTranslator(tr, stress_lengths_sr, stress_amps_hr);
else else
SetupTranslator(tr, stress_lengths_hr, stress_amps_hr); SetupTranslator(tr, stress_lengths_hr, stress_amps_hr);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;


tr->langopts.stress_rule = STRESSPOSN_1L; tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.stress_flags = S_FINAL_NO_2; tr->langopts.stress_flags = S_FINAL_NO_2;
static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 }; static const short stress_lengths_hu[8] = { 185, 195, 195, 190, 0, 0, 210, 220 };


SetupTranslator(tr, stress_lengths_hu, stress_amps_hu); SetupTranslator(tr, stress_lengths_hu, stress_amps_hu);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;


tr->langopts.vowel_pause = 0x20; tr->langopts.vowel_pause = 0x20;
tr->langopts.stress_rule = STRESSPOSN_1L; tr->langopts.stress_rule = STRESSPOSN_1L;
static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 }; static const short stress_lengths_ku[8] = { 180, 180, 190, 180, 0, 0, 230, 240 };


SetupTranslator(tr, stress_lengths_ku, stress_amps_ku); SetupTranslator(tr, stress_lengths_ku, stress_amps_ku);
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;


tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable


break; break;
case L('l', 'a'): // Latin case L('l', 'a'): // Latin
{ {
tr->charset_a0 = charsets[4]; // ISO-8859-4, includes a,e,i,o,u-macron
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4; // includes a,e,i,o,u-macron
tr->langopts.stress_rule = STRESSPOSN_2R; tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_NO_AUTO_2; tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.unstressed_wd1 = 0; tr->langopts.unstressed_wd1 = 0;
break; break;
case L('l', 't'): // Lithuanian case L('l', 't'): // Lithuanian
{ {
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
tr->langopts.stress_rule = STRESSPOSN_2R; tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_NO_AUTO_2; tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.unstressed_wd1 = 0; tr->langopts.unstressed_wd1 = 0;


tr->langopts.stress_rule = STRESSPOSN_1L; tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.spelling_stress = 1; tr->langopts.spelling_stress = 1;
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_4;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT; tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT;
tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1; tr->langopts.stress_flags = S_NO_AUTO_2 | S_FINAL_DIM | S_FINAL_DIM_ONLY | S_EO_CLAUSE1;
} }
static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 }; static const short stress_lengths_mk[8] = { 180, 160, 200, 200, 0, 0, 220, 230 };


SetupTranslator(tr, stress_lengths_mk, stress_amps_mk); SetupTranslator(tr, stress_lengths_mk, stress_amps_mk);
tr->charset_a0 = charsets[5]; // ISO-8859-5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic; tr->letter_groups[0] = tr->letter_groups[7] = vowels_cyrillic;
tr->letter_bits_offset = OFFSET_CYRILLIC; tr->letter_bits_offset = OFFSET_CYRILLIC;


break; break;
case L('m', 't'): // Maltese case L('m', 't'): // Maltese
{ {
tr->charset_a0 = charsets[3]; // ISO-8859-3
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_3;
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x100; // devoice at end of word
tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate tr->langopts.stress_rule = STRESSPOSN_2R; // penultimate
tr->langopts.numbers = 1; tr->langopts.numbers = 1;


SetupTranslator(tr, stress_lengths_pl, stress_amps_pl); SetupTranslator(tr, stress_lengths_pl, stress_amps_pl);


tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.stress_rule = STRESSPOSN_2R; tr->langopts.stress_rule = STRESSPOSN_2R;
tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished tr->langopts.stress_flags = S_FINAL_DIM_ONLY; // mark unstressed final syllables as diminished
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9; tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x9;
tr->langopts.stress_rule = STRESSPOSN_1R; tr->langopts.stress_rule = STRESSPOSN_1R;
tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY; tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_FINAL_DIM_ONLY;


tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN; tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
} }
static const char *sk_voiced = "bdgjlmnrvwzaeiouy"; static const char *sk_voiced = "bdgjlmnrvwzaeiouy";


SetupTranslator(tr, stress_lengths_sk, stress_amps_sk); SetupTranslator(tr, stress_lengths_sk, stress_amps_sk);
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;


tr->langopts.stress_rule = STRESSPOSN_1L; tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2; tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2;
} }
break; break;
case L('s', 'l'): // Slovenian case L('s', 'l'): // Slovenian
tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_2;
tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary
tr->langopts.stress_flags = S_NO_AUTO_2; tr->langopts.stress_flags = S_NO_AUTO_2;
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103; tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x103;
static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 }; static const short stress_lengths_tr[8] = { 190, 180, 200, 230, 0, 0, 240, 250 };


SetupTranslator(tr, stress_lengths_tr, stress_amps_tr); SetupTranslator(tr, stress_lengths_tr, stress_amps_tr);
tr->charset_a0 = charsets[9]; // ISO-8859-9 - Latin5
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_9;


tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress tr->langopts.stress_flags = S_NO_AUTO_2; // no automatic secondary stress

+ 0
- 175
src/libespeak-ng/translate.c View File

// other characters which break a word, but don't produce a pause // other characters which break a word, but don't produce a pause
static const unsigned short breaks[] = { '_', 0 }; static const unsigned short breaks[] = { '_', 0 };


// Translate character codes 0xA0 to 0xFF into their unicode values
// ISO_8859_1 is set as default
static const unsigned short ISO_8859_1[0x60] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, // f8
};

static const unsigned short ISO_8859_2[0x60] = {
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, // a0
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, // a8
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, // b0
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, // b8
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, // c0
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, // c8
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, // d0
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, // d8
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, // e0
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, // e8
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, // f0
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, // f8
};

static const unsigned short ISO_8859_3[0x60] = {
0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, // a0
0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, // a8
0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, // b0
0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, // b8
0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, // d0
0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, // f0
0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, // f8
};

static const unsigned short ISO_8859_4[0x60] = {
0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, // a0
0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, // a8
0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, // b0
0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, // b8
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, // c0
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, // c8
0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, // d8
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, // e0
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, // e8
0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, // f8
};

static const unsigned short ISO_8859_5[0x60] = {
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, // a0 Cyrillic
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, // a8
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, // b0
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, // b8
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, // c0
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, // c8
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, // d0
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, // d8
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, // e0
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, // e8
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, // f0
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, // f8
};

static const unsigned short ISO_8859_7[0x60] = {
0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7, // a0 Greek
0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, // b0
0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, // b8
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, // c0
0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, // c8
0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, // d0
0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, // d8
0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, // e0
0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, // e8
0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, // f0
0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, // f8
};

static const unsigned short ISO_8859_9[0x60] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, // a0
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, // a8
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, // b0
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, // f8
};

static const unsigned short ISO_8859_14[0x60] = {
0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, // a0 Welsh
0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, // a8
0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, // b0
0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, // b8
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, // d0
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, // f0
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, // f8
};

static const unsigned short KOI8_R[0x60] = {
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, // a0 Russian
0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, // a8
0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, // b0
0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, // b8
0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, // c0
0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, // c8
0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, // d0
0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, // d8
0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, // e0
0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, // e8
0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, // f0
0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, // f8
};

static const unsigned short ISCII[0x60] = {
0x0020, 0x0901, 0x0902, 0x0903, 0x0905, 0x0906, 0x0907, 0x0908, // a0
0x0909, 0x090a, 0x090b, 0x090e, 0x090f, 0x0910, 0x090d, 0x0912, // a8
0x0913, 0x0914, 0x0911, 0x0915, 0x0916, 0x0917, 0x0918, 0x0919, // b0
0x091a, 0x091b, 0x091c, 0x091d, 0x091e, 0x091f, 0x0920, 0x0921, // b8
0x0922, 0x0923, 0x0924, 0x0925, 0x0926, 0x0927, 0x0928, 0x0929, // c0
0x092a, 0x092b, 0x092c, 0x092d, 0x092e, 0x092f, 0x095f, 0x0930, // c8
0x0931, 0x0932, 0x0933, 0x0934, 0x0935, 0x0936, 0x0937, 0x0938, // d0
0x0939, 0x0020, 0x093e, 0x093f, 0x0940, 0x0941, 0x0942, 0x0943, // d8
0x0946, 0x0947, 0x0948, 0x0945, 0x094a, 0x094b, 0x094c, 0x0949, // e0
0x094d, 0x093c, 0x0964, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, // e8
0x0020, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, // f0
0x0037, 0x0038, 0x0039, 0x20, 0x20, 0x20, 0x20, 0x20, // f8
};

const unsigned short *charsets[N_CHARSETS] = {
ISO_8859_1,
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_1,
ISO_8859_7,
ISO_8859_1,
ISO_8859_9,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
ISO_8859_14,
ISO_8859_1,
ISO_8859_1,
ISO_8859_1,
KOI8_R, // 18
ISCII
};

// Tables of the relative lengths of vowels, depending on the // Tables of the relative lengths of vowels, depending on the
// type of the two phonemes that follow // type of the two phonemes that follow
// indexes are the "length_mod" value for the following phonemes // indexes are the "length_mod" value for the following phonemes

+ 1
- 3
src/libespeak-ng/translate.h View File



#define N_RULE_GROUP2 120 // max num of two-letter rule chains #define N_RULE_GROUP2 120 // max num of two-letter rule chains
#define N_HASH_DICT 1024 #define N_HASH_DICT 1024
#define N_CHARSETS 20
#define N_LETTER_GROUPS 95 // maximum is 127-32 #define N_LETTER_GROUPS 95 // maximum is 127-32


// dictionary flags, word 1 // dictionary flags, word 1
short stress_lengths[8]; short stress_lengths[8];
int dict_condition; // conditional apply some pronunciation rules and dict.lookups int dict_condition; // conditional apply some pronunciation rules and dict.lookups
int dict_min_size; int dict_min_size;
const unsigned short *charset_a0; // unicodes for characters 0xa0 to oxff
espeak_ng_ENCODING encoding;
const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words
const wchar_t *punct_within_word; // allow these punctuation characters within words const wchar_t *punct_within_word; // allow these punctuation characters within words
const unsigned short *chars_ignore; const unsigned short *chars_ignore;


extern Translator *translator; extern Translator *translator;
extern Translator *translator2; extern Translator *translator2;
extern const unsigned short *charsets[N_CHARSETS];
extern char dictionary_name[40]; extern char dictionary_name[40];
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
extern unsigned char *p_textinput; extern unsigned char *p_textinput;

+ 0
- 10
src/libespeak-ng/voices.c View File

V_DICTRULES, V_DICTRULES,
V_STRESSRULE, V_STRESSRULE,
V_STRESSOPT, V_STRESSOPT,
V_CHARSET,
V_NUMBERS, V_NUMBERS,
V_OPTION, V_OPTION,


{ "dictrules", V_DICTRULES }, { "dictrules", V_DICTRULES },
{ "stressrule", V_STRESSRULE }, { "stressrule", V_STRESSRULE },
{ "stressopt", V_STRESSOPT }, { "stressopt", V_STRESSOPT },
{ "charset", V_CHARSET },
{ "replace", V_REPLACE }, { "replace", V_REPLACE },
{ "words", V_WORDGAP }, { "words", V_WORDGAP },
{ "echo", V_ECHO }, { "echo", V_ECHO },
&langopts->unstressed_wd1, &langopts->unstressed_wd1,
&langopts->unstressed_wd2); &langopts->unstressed_wd2);
break; break;
case V_CHARSET:
if ((sscanf(p, "%d", &value) == 1) && (value < N_CHARSETS)) {
if (new_translator != NULL)
new_translator->charset_a0 = charsets[value];
else
fprintf(stderr, "The charset attribute is specified before language.\n");
}
break;
case V_OPTION: case V_OPTION:
value2 = 0; value2 = 0;
if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) || if (((sscanf(p, "%s %d %d", option_name, &value, &value2) >= 2) && ((ix = LookupMnem(options_tab, option_name)) >= 0)) ||

Loading…
Cancel
Save