Browse Source

encoding.c: Support the ISO-10646-UCS-2 encoding.

master
Reece H. Dunn 8 years ago
parent
commit
b74f756f00
3 changed files with 45 additions and 1 deletions
  1. 1
    0
      src/include/espeak-ng/espeak_ng.h
  2. 17
    1
      src/libespeak-ng/encoding.c
  3. 27
    0
      tests/encoding.c

+ 1
- 0
src/include/espeak-ng/espeak_ng.h View File

ESPEAKNG_ENCODING_KOI8_R, ESPEAKNG_ENCODING_KOI8_R,
ESPEAKNG_ENCODING_ISCII, ESPEAKNG_ENCODING_ISCII,
ESPEAKNG_ENCODING_UTF_8, ESPEAKNG_ENCODING_UTF_8,
ESPEAKNG_ENCODING_ISO_10646_UCS_2,
} espeak_ng_ENCODING; } espeak_ng_ENCODING;


ESPEAK_NG_API espeak_ng_ENCODING ESPEAK_NG_API espeak_ng_ENCODING

+ 17
- 1
src/libespeak-ng/encoding.c View File

{ "ISO_8859-16", ESPEAKNG_ENCODING_ISO_8859_16 }, { "ISO_8859-16", ESPEAKNG_ENCODING_ISO_8859_16 },
{ "ISO_8859-16:2001", ESPEAKNG_ENCODING_ISO_8859_16 }, { "ISO_8859-16:2001", ESPEAKNG_ENCODING_ISO_8859_16 },
{ "ISO646-US", ESPEAKNG_ENCODING_US_ASCII }, { "ISO646-US", ESPEAKNG_ENCODING_US_ASCII },
{ "ISO-10646-UCS-2", ESPEAKNG_ENCODING_ISO_10646_UCS_2 },
{ "ISO-8859-1", ESPEAKNG_ENCODING_ISO_8859_1 }, { "ISO-8859-1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO-8859-2", ESPEAKNG_ENCODING_ISO_8859_2 }, { "ISO-8859-2", ESPEAKNG_ENCODING_ISO_8859_2 },
{ "ISO-8859-3", ESPEAKNG_ENCODING_ISO_8859_3 }, { "ISO-8859-3", ESPEAKNG_ENCODING_ISO_8859_3 },
{ "csKOI8R", ESPEAKNG_ENCODING_KOI8_R }, { "csKOI8R", ESPEAKNG_ENCODING_KOI8_R },
{ "csTIS620", ESPEAKNG_ENCODING_ISO_8859_11 }, { "csTIS620", ESPEAKNG_ENCODING_ISO_8859_11 },
{ "csUTF8", ESPEAKNG_ENCODING_UTF_8 }, { "csUTF8", ESPEAKNG_ENCODING_UTF_8 },
{ "csUnicode", ESPEAKNG_ENCODING_ISO_10646_UCS_2 },
{ "arabic", ESPEAKNG_ENCODING_ISO_8859_6 }, { "arabic", ESPEAKNG_ENCODING_ISO_8859_6 },
{ "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 }, { "cyrillic", ESPEAKNG_ENCODING_ISO_8859_5 },
{ "greek", ESPEAKNG_ENCODING_ISO_8859_7 }, { "greek", ESPEAKNG_ENCODING_ISO_8859_7 },
return 0xFFFD; return 0xFFFD;
} }


static uint32_t
string_decoder_getc_iso_10646_ucs_2(espeak_ng_TEXT_DECODER *decoder)
{
if (decoder->current + 1 >= decoder->end) {
decoder->current = decoder->end;
return 0xFFFD;
}

uint8_t c1 = *decoder->current++ & 0xFF;
uint8_t c2 = *decoder->current++ & 0xFF;
return c1 + (c2 << 8);
}

typedef struct typedef struct
{ {
uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder); uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder);
{ string_decoder_getc_codepage, KOI8_R }, { string_decoder_getc_codepage, KOI8_R },
{ string_decoder_getc_codepage, ISCII }, { string_decoder_getc_codepage, ISCII },
{ string_decoder_getc_utf_8, NULL }, { string_decoder_getc_utf_8, NULL },
{ string_decoder_getc_iso_10646_ucs_2, NULL },
}; };


espeak_ng_TEXT_DECODER * espeak_ng_TEXT_DECODER *
int length, int length,
espeak_ng_ENCODING encoding) espeak_ng_ENCODING encoding)
{ {
if (encoding > ESPEAKNG_ENCODING_UTF_8)
if (encoding > ESPEAKNG_ENCODING_ISO_10646_UCS_2)
return ENS_UNKNOWN_TEXT_ENCODING; return ENS_UNKNOWN_TEXT_ENCODING;


const encoding_t *enc = string_decoders + encoding; const encoding_t *enc = string_decoders + encoding;

+ 27
- 0
tests/encoding.c View File

destroy_text_decoder(decoder); destroy_text_decoder(decoder);
} }


void
test_iso_10646_ucs_2_encoding()
{
printf("testing ISO-10646-UCS-2 encoding\n");

assert(espeak_ng_EncodingFromName("ISO-10646-UCS-2") == ESPEAKNG_ENCODING_ISO_10646_UCS_2);
assert(espeak_ng_EncodingFromName("csUnicode") == ESPEAKNG_ENCODING_ISO_10646_UCS_2);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "a\00G\00\xA0\00\x22\x21\x23\x21", 9, ESPEAKNG_ENCODING_ISO_10646_UCS_2) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x2122);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xFFFD);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
test_iso_8859_16_encoding(); test_iso_8859_16_encoding();


test_utf_8_encoding(); test_utf_8_encoding();
test_iso_10646_ucs_2_encoding();


printf("done\n"); printf("done\n");



Loading…
Cancel
Save