Browse Source

encoding.c: Support the ISO 8859-1 encoding.

master
Reece H. Dunn 8 years ago
parent
commit
26bec1eedf
3 changed files with 67 additions and 2 deletions
  1. 1
    0
      src/include/espeak-ng/espeak_ng.h
  2. 32
    2
      src/libespeak-ng/encoding.c
  3. 34
    0
      tests/encoding.c

+ 1
- 0
src/include/espeak-ng/espeak_ng.h View File

{ {
ESPEAKNG_ENCODING_UNKNOWN, ESPEAKNG_ENCODING_UNKNOWN,
ESPEAKNG_ENCODING_US_ASCII, ESPEAKNG_ENCODING_US_ASCII,
ESPEAKNG_ENCODING_ISO_8859_1,
} espeak_ng_ENCODING; } espeak_ng_ENCODING;


ESPEAK_NG_API espeak_ng_ENCODING ESPEAK_NG_API espeak_ng_ENCODING

+ 32
- 2
src/libespeak-ng/encoding.c View File

{ "ANSI_X3.4-1968", ESPEAKNG_ENCODING_US_ASCII }, { "ANSI_X3.4-1968", ESPEAKNG_ENCODING_US_ASCII },
{ "ANSI_X3.4-1986", ESPEAKNG_ENCODING_US_ASCII }, { "ANSI_X3.4-1986", ESPEAKNG_ENCODING_US_ASCII },
{ "IBM367", ESPEAKNG_ENCODING_US_ASCII }, { "IBM367", ESPEAKNG_ENCODING_US_ASCII },
{ "IBM819", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO_646.irv:1991", ESPEAKNG_ENCODING_US_ASCII }, { "ISO_646.irv:1991", ESPEAKNG_ENCODING_US_ASCII },
{ "ISO_8859-1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO646-US", ESPEAKNG_ENCODING_US_ASCII }, { "ISO646-US", ESPEAKNG_ENCODING_US_ASCII },
{ "ISO-8859-1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "ISO-8859-1:1987", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "US-ASCII", ESPEAKNG_ENCODING_US_ASCII }, { "US-ASCII", ESPEAKNG_ENCODING_US_ASCII },
{ "cp367", ESPEAKNG_ENCODING_US_ASCII }, { "cp367", ESPEAKNG_ENCODING_US_ASCII },
{ "cp819", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "csASCII", ESPEAKNG_ENCODING_US_ASCII }, { "csASCII", ESPEAKNG_ENCODING_US_ASCII },
{ "csISOLatin1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "iso-ir-6", ESPEAKNG_ENCODING_US_ASCII }, { "iso-ir-6", ESPEAKNG_ENCODING_US_ASCII },
{ "iso-ir-100", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "latin1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "l1", ESPEAKNG_ENCODING_ISO_8859_1 },
{ "us", ESPEAKNG_ENCODING_US_ASCII }, { "us", ESPEAKNG_ENCODING_US_ASCII },
{ NULL, ESPEAKNG_ENCODING_UNKNOWN } { NULL, ESPEAKNG_ENCODING_UNKNOWN }
}; };
{ {
const char *current; const char *current;
const char *end; const char *end;

uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder);
}; };


static uint32_t
text_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{
uint8_t c = *decoder->current++ & 0xFF;
return (c >= 0x80) ? 0xFFFD : c;
}

// Reference: http://www.iana.org/go/rfc1345
// Reference: http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
static uint32_t
text_decoder_getc_iso_8859_1(espeak_ng_TEXT_DECODER *decoder)
{
return *decoder->current++ & 0xFF;
}

espeak_ng_TEXT_DECODER * espeak_ng_TEXT_DECODER *
create_text_decoder(void) create_text_decoder(void)
{ {


decoder->current = NULL; decoder->current = NULL;
decoder->end = NULL; decoder->end = NULL;
decoder->get = NULL;
return decoder; return decoder;
} }


switch (encoding) switch (encoding)
{ {
case ESPEAKNG_ENCODING_US_ASCII: case ESPEAKNG_ENCODING_US_ASCII:
decoder->get = text_decoder_getc_us_ascii;
break;
case ESPEAKNG_ENCODING_ISO_8859_1:
decoder->get = text_decoder_getc_iso_8859_1;
break; break;
default: default:
return 0; return 0;
uint32_t uint32_t
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder) text_decoder_getc(espeak_ng_TEXT_DECODER *decoder)
{ {
uint8_t c = *decoder->current++ & 0xFF;
return (c >= 0x80) ? 0xFFFD : c;
return decoder->get(decoder);
} }

+ 34
- 0
tests/encoding.c View File

destroy_text_decoder(decoder); destroy_text_decoder(decoder);
} }


void
test_iso_8859_1_encoding()
{
printf("testing ISO-8859-1 encoding\n");

assert(espeak_ng_EncodingFromName("ISO-8859-1:1987") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("ISO-8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("ISO_8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("iso-ir-100") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("latin1") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("l1") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("IBM819") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("cp819") == ESPEAKNG_ENCODING_ISO_8859_1);
assert(espeak_ng_EncodingFromName("csISOLatin1") == ESPEAKNG_ENCODING_ISO_8859_1);

espeak_ng_TEXT_DECODER *decoder = create_text_decoder();

assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_1) == ENS_OK);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'a');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 'G');
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0x92);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xA0);
assert(text_decoder_eof(decoder) == 0);
assert(text_decoder_getc(decoder) == 0xDE);
assert(text_decoder_eof(decoder) == 1);

destroy_text_decoder(decoder);
}

int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
test_unbound_text_decoder(); test_unbound_text_decoder();
test_unknown_encoding(); test_unknown_encoding();
test_us_ascii_encoding(); test_us_ascii_encoding();
test_iso_8859_1_encoding();
printf("done\n"); printf("done\n");


return EXIT_SUCCESS; return EXIT_SUCCESS;

Loading…
Cancel
Save