Browse Source

encoding.c: Support the KOI8-R encoding.

master
Reece H. Dunn 8 years ago
parent
commit
39f3ea54cf
3 changed files with 53 additions and 1 deletions
  1. 1
    0
      src/include/espeak-ng/espeak_ng.h
  2. 25
    1
      src/libespeak-ng/encoding.c
  3. 27
    0
      tests/encoding.c

+ 1
- 0
src/include/espeak-ng/espeak_ng.h View File

@@ -206,6 +206,7 @@ typedef enum
ESPEAKNG_ENCODING_ISO_8859_14,
ESPEAKNG_ENCODING_ISO_8859_15,
ESPEAKNG_ENCODING_ISO_8859_16,
ESPEAKNG_ENCODING_KOI8_R,
} espeak_ng_ENCODING;

ESPEAK_NG_API espeak_ng_ENCODING

+ 25
- 1
src/libespeak-ng/encoding.c View File

@@ -77,6 +77,7 @@ MNEM_TAB mnem_encoding[] = {
{ "ISO-8859-14", ESPEAKNG_ENCODING_ISO_8859_14 },
{ "ISO-8859-15", ESPEAKNG_ENCODING_ISO_8859_15 },
{ "ISO-8859-16", ESPEAKNG_ENCODING_ISO_8859_16 },
{ "KOI8-R", ESPEAKNG_ENCODING_KOI8_R },
{ "Latin-9", ESPEAKNG_ENCODING_ISO_8859_15 },
{ "TIS-620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "US-ASCII", ESPEAKNG_ENCODING_US_ASCII },
@@ -97,6 +98,7 @@ MNEM_TAB mnem_encoding[] = {
{ "csISOLatinCyrillic",ESPEAKNG_ENCODING_ISO_8859_5 },
{ "csISOLatinGreek", ESPEAKNG_ENCODING_ISO_8859_7 },
{ "csISOLatinHebrew", ESPEAKNG_ENCODING_ISO_8859_8 },
{ "csKOI8R", ESPEAKNG_ENCODING_KOI8_R },
{ "csTIS620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "arabic", ESPEAKNG_ENCODING_ISO_8859_6 },
{ "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 },
@@ -445,6 +447,27 @@ static const uint32_t ISO_8859_16[0x80] = {
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, // f8
};

// Reference: http://www.iana.org/go/rfc1489
// Reference: http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
static const uint32_t KOI8_R[0x80] = {
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, // 80
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, // 88
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, // 90
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, // 98
0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, // a0
0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, // a8
0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, // b0
0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, // b8
0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, // c0
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, // c8
0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, // d0
0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, // d8
0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, // e0
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, // e8
0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, // f0
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, // f8
};

static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{
@@ -492,6 +515,7 @@ static const encoding_t string_decoders[] = {
{ string_decoder_getc_codepage, ISO_8859_14 },
{ string_decoder_getc_codepage, ISO_8859_15 },
{ string_decoder_getc_codepage, ISO_8859_16 },
{ string_decoder_getc_codepage, KOI8_R },
};

espeak_ng_TEXT_DECODER *
@@ -519,7 +543,7 @@ text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder,
int length,
espeak_ng_ENCODING encoding)
{
if (encoding > ESPEAKNG_ENCODING_ISO_8859_16)
if (encoding > ESPEAKNG_ENCODING_KOI8_R)
return ENS_UNKNOWN_TEXT_ENCODING;

const encoding_t *enc = string_decoders + encoding;

+ 27
- 0
tests/encoding.c View File

@@ -91,6 +91,32 @@ test_us_ascii_encoding()
destroy_text_decoder(decoder);
}

void
test_koi8_r_encoding()
{
printf("testing KOI8-R encoding\n");

assert(espeak_ng_EncodingFromName("KOI8-R") == ESPEAKNG_ENCODING_KOI8_R);
assert(espeak_ng_EncodingFromName("csKOI8R") == ESPEAKNG_ENCODING_KOI8_R);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_KOI8_R) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x92);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x021a);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

void
test_iso_8859_1_encoding()
{
@@ -555,6 +581,7 @@ main(int argc, char **argv)
test_unknown_encoding();

test_us_ascii_encoding();
test_koi8_r_encoding();

test_iso_8859_1_encoding();
test_iso_8859_2_encoding();

Loading…
Cancel
Save