|
|
@@ -294,7 +294,7 @@ static const uint32_t ISO_8859_9[0x80] = { |
|
|
|
}; |
|
|
|
|
|
|
|
static uint32_t |
|
|
|
text_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
{ |
|
|
|
uint8_t c = *decoder->current++ & 0xFF; |
|
|
|
return (c >= 0x80) ? 0xFFFD : c; |
|
|
@@ -303,18 +303,38 @@ text_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
// Reference: http://www.iana.org/go/rfc1345 |
|
|
|
// Reference: http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT |
|
|
|
static uint32_t |
|
|
|
text_decoder_getc_iso_8859_1(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
string_decoder_getc_iso_8859_1(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
{ |
|
|
|
return *decoder->current++ & 0xFF; |
|
|
|
} |
|
|
|
|
|
|
|
static uint32_t |
|
|
|
text_decoder_getc_codepage(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
string_decoder_getc_codepage(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
{ |
|
|
|
uint8_t c = *decoder->current++ & 0xFF; |
|
|
|
return (c >= 0x80) ? decoder->codepage[c - 0x80] : c; |
|
|
|
} |
|
|
|
|
|
|
|
typedef struct |
|
|
|
{ |
|
|
|
uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder); |
|
|
|
const uint32_t *codepage; |
|
|
|
} encoding_t; |
|
|
|
|
|
|
|
static const encoding_t string_decoders[] = { |
|
|
|
{ NULL, NULL }, |
|
|
|
{ string_decoder_getc_us_ascii, NULL }, |
|
|
|
{ string_decoder_getc_iso_8859_1, NULL }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_2 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_3 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_4 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_5 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_6 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_7 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_8 }, |
|
|
|
{ string_decoder_getc_codepage, ISO_8859_9 }, |
|
|
|
}; |
|
|
|
|
|
|
|
espeak_ng_TEXT_DECODER * |
|
|
|
create_text_decoder(void) |
|
|
|
{ |
|
|
@@ -334,67 +354,21 @@ destroy_text_decoder(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
if (decoder) free(decoder); |
|
|
|
} |
|
|
|
|
|
|
|
static int |
|
|
|
initialize_encoding(espeak_ng_TEXT_DECODER *decoder, |
|
|
|
espeak_ng_ENCODING encoding) |
|
|
|
{ |
|
|
|
switch (encoding) |
|
|
|
{ |
|
|
|
case ESPEAKNG_ENCODING_US_ASCII: |
|
|
|
decoder->get = text_decoder_getc_us_ascii; |
|
|
|
decoder->codepage = NULL; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_1: |
|
|
|
decoder->get = text_decoder_getc_iso_8859_1; |
|
|
|
decoder->codepage = NULL; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_2: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_2; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_3: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_3; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_4: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_4; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_5: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_5; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_6: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_6; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_7: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_7; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_8: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_8; |
|
|
|
break; |
|
|
|
case ESPEAKNG_ENCODING_ISO_8859_9: |
|
|
|
decoder->get = text_decoder_getc_codepage; |
|
|
|
decoder->codepage = ISO_8859_9; |
|
|
|
break; |
|
|
|
default: |
|
|
|
return 0; |
|
|
|
} |
|
|
|
return 1; |
|
|
|
} |
|
|
|
|
|
|
|
espeak_ng_STATUS |
|
|
|
text_decoder_decode_string(espeak_ng_TEXT_DECODER *decoder, |
|
|
|
const char *string, |
|
|
|
int length, |
|
|
|
espeak_ng_ENCODING encoding) |
|
|
|
{ |
|
|
|
if (!initialize_encoding(decoder, encoding)) |
|
|
|
if (encoding > ESPEAKNG_ENCODING_ISO_8859_9) |
|
|
|
return ENS_UNKNOWN_TEXT_ENCODING; |
|
|
|
|
|
|
|
const encoding_t *enc = string_decoders + encoding; |
|
|
|
if (enc->get == NULL) |
|
|
|
return ENS_UNKNOWN_TEXT_ENCODING; |
|
|
|
|
|
|
|
decoder->get = enc->get; |
|
|
|
decoder->codepage = enc->codepage; |
|
|
|
decoder->current = string; |
|
|
|
decoder->end = string + length; |
|
|
|
return ENS_OK; |