Browse Source

encoding.c: Support the ISO 8859-11 encoding.

master
Reece H. Dunn 8 years ago
parent
commit
495c0aed20
3 changed files with 57 additions and 1 deletions
  1. 1
    0
      src/include/espeak-ng/espeak_ng.h
  2. 25
    1
      src/libespeak-ng/encoding.c
  3. 31
    0
      tests/encoding.c

+ 1
- 0
src/include/espeak-ng/espeak_ng.h View File

ESPEAKNG_ENCODING_ISO_8859_8, ESPEAKNG_ENCODING_ISO_8859_8,
ESPEAKNG_ENCODING_ISO_8859_9, ESPEAKNG_ENCODING_ISO_8859_9,
ESPEAKNG_ENCODING_ISO_8859_10, ESPEAKNG_ENCODING_ISO_8859_10,
ESPEAKNG_ENCODING_ISO_8859_11,
} espeak_ng_ENCODING; } espeak_ng_ENCODING;


ESPEAK_NG_API espeak_ng_ENCODING ESPEAK_NG_API espeak_ng_ENCODING

+ 25
- 1
src/libespeak-ng/encoding.c View File

{ "ISO-8859-9:1989", ESPEAKNG_ENCODING_ISO_8859_9 }, { "ISO-8859-9:1989", ESPEAKNG_ENCODING_ISO_8859_9 },
{ "ISO-8859-10", ESPEAKNG_ENCODING_ISO_8859_10 }, { "ISO-8859-10", ESPEAKNG_ENCODING_ISO_8859_10 },
{ "ISO-8859-10:1992", ESPEAKNG_ENCODING_ISO_8859_10 }, { "ISO-8859-10:1992", ESPEAKNG_ENCODING_ISO_8859_10 },
{ "ISO-8859-11", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "TIS-620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "US-ASCII", ESPEAKNG_ENCODING_US_ASCII }, { "US-ASCII", ESPEAKNG_ENCODING_US_ASCII },
{ "cp367", ESPEAKNG_ENCODING_US_ASCII }, { "cp367", ESPEAKNG_ENCODING_US_ASCII },
{ "cp819", ESPEAKNG_ENCODING_ISO_8859_1 }, { "cp819", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "csISOLatinCyrillic",ESPEAKNG_ENCODING_ISO_8859_5 }, { "csISOLatinCyrillic",ESPEAKNG_ENCODING_ISO_8859_5 },
{ "csISOLatinGreek", ESPEAKNG_ENCODING_ISO_8859_7 }, { "csISOLatinGreek", ESPEAKNG_ENCODING_ISO_8859_7 },
{ "csISOLatinHebrew", ESPEAKNG_ENCODING_ISO_8859_8 }, { "csISOLatinHebrew", ESPEAKNG_ENCODING_ISO_8859_8 },
{ "csTIS620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "arabic", ESPEAKNG_ENCODING_ISO_8859_6 }, { "arabic", ESPEAKNG_ENCODING_ISO_8859_6 },
{ "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 }, { "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "greek", ESPEAKNG_ENCODING_ISO_8859_7 }, { "greek", ESPEAKNG_ENCODING_ISO_8859_7 },
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, // f8 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, // f8
}; };


// Reference: http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT
static const uint32_t ISO_8859_11[0x80] = {
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, // 80
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, // 88
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, // 90
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, // 98
0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, // a0
0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, // a8
0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, // b0
0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, // b8
0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, // c0
0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, // c8
0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, // d0
0x0e38, 0x0e39, 0x0e3a, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x0e3f, // d8
0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, // e0
0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, // e8
0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, // f0
0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
};

static uint32_t static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{ {
{ string_decoder_getc_codepage, ISO_8859_8 }, { string_decoder_getc_codepage, ISO_8859_8 },
{ string_decoder_getc_codepage, ISO_8859_9 }, { string_decoder_getc_codepage, ISO_8859_9 },
{ string_decoder_getc_codepage, ISO_8859_10 }, { string_decoder_getc_codepage, ISO_8859_10 },
{ string_decoder_getc_codepage, ISO_8859_11 },
}; };


espeak_ng_TEXT_DECODER * espeak_ng_TEXT_DECODER *
int length, int length,
espeak_ng_ENCODING encoding) espeak_ng_ENCODING encoding)
{ {
if (encoding > ESPEAKNG_ENCODING_ISO_8859_10)
if (encoding > ESPEAKNG_ENCODING_ISO_8859_11)
return ENS_UNKNOWN_TEXT_ENCODING; return ENS_UNKNOWN_TEXT_ENCODING;


const encoding_t *enc = string_decoders + encoding; const encoding_t *enc = string_decoders + encoding;

+ 31
- 0
tests/encoding.c View File

destroy_text_decoder(decoder); destroy_text_decoder(decoder);
} }


void
test_iso_8859_11_encoding()
{
printf("testing ISO-8859-11 encoding\n");

assert(espeak_ng_EncodingFromName("ISO-8859-11") == ESPEAKNG_ENCODING_ISO_8859_11);
assert(espeak_ng_EncodingFromName("TIS-620") == ESPEAKNG_ENCODING_ISO_8859_11);
assert(espeak_ng_EncodingFromName("csTIS620") == ESPEAKNG_ENCODING_ISO_8859_11);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_11) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x92);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x0e4e);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
test_unbound_text_decoder(); test_unbound_text_decoder();
test_unknown_encoding(); test_unknown_encoding();

test_us_ascii_encoding(); test_us_ascii_encoding();

test_iso_8859_1_encoding(); test_iso_8859_1_encoding();
test_iso_8859_2_encoding(); test_iso_8859_2_encoding();
test_iso_8859_3_encoding(); test_iso_8859_3_encoding();
test_iso_8859_8_encoding(); test_iso_8859_8_encoding();
test_iso_8859_9_encoding(); test_iso_8859_9_encoding();
test_iso_8859_10_encoding(); test_iso_8859_10_encoding();
test_iso_8859_11_encoding();

printf("done\n"); printf("done\n");


return EXIT_SUCCESS; return EXIT_SUCCESS;

Loading…
Cancel
Save