| struct espeak_ng_TEXT_DECODER_ | struct espeak_ng_TEXT_DECODER_ | ||||
| { | { | ||||
| const char *current; | |||||
| const char *end; | |||||
| const uint8_t *current; | |||||
| const uint8_t *end; | |||||
| uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder); | uint32_t (*get)(espeak_ng_TEXT_DECODER *decoder); | ||||
| const uint16_t *codepage; | const uint16_t *codepage; | ||||
| static uint32_t | static uint32_t | ||||
| string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | ||||
| { | { | ||||
| uint8_t c = *decoder->current++ & 0xFF; | |||||
| uint8_t c = *decoder->current++; | |||||
| return (c >= 0x80) ? 0xFFFD : c; | return (c >= 0x80) ? 0xFFFD : c; | ||||
| } | } | ||||
| static uint32_t | static uint32_t | ||||
| string_decoder_getc_iso_8859_1(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_iso_8859_1(espeak_ng_TEXT_DECODER *decoder) | ||||
| { | { | ||||
| return *decoder->current++ & 0xFF; | |||||
| return *decoder->current++; | |||||
| } | } | ||||
| static uint32_t | static uint32_t | ||||
| string_decoder_getc_codepage(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_codepage(espeak_ng_TEXT_DECODER *decoder) | ||||
| { | { | ||||
| uint8_t c = *decoder->current++ & 0xFF; | |||||
| uint8_t c = *decoder->current++; | |||||
| return (c >= 0x80) ? decoder->codepage[c - 0x80] : c; | return (c >= 0x80) ? decoder->codepage[c - 0x80] : c; | ||||
| } | } | ||||
| static uint32_t | static uint32_t | ||||
| string_decoder_getc_utf_8(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_utf_8(espeak_ng_TEXT_DECODER *decoder) | ||||
| { | { | ||||
| uint8_t c = *decoder->current++ & 0xFF; | |||||
| uint8_t c = *decoder->current++; | |||||
| uint32_t ret; | uint32_t ret; | ||||
| switch (c & 0xF0) | switch (c & 0xF0) | ||||
| { | { | ||||
| case 0xC0: case 0xD0: | case 0xC0: case 0xD0: | ||||
| if (decoder->current + 1 >= decoder->end) goto eof; | if (decoder->current + 1 >= decoder->end) goto eof; | ||||
| ret = c & 0x1F; | ret = c & 0x1F; | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| return ret; | return ret; | ||||
| // 3-byte UTF-8 sequence | // 3-byte UTF-8 sequence | ||||
| case 0xE0: | case 0xE0: | ||||
| if (decoder->current + 2 >= decoder->end) goto eof; | if (decoder->current + 2 >= decoder->end) goto eof; | ||||
| ret = c & 0x0F; | ret = c & 0x0F; | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| return ret; | return ret; | ||||
| // 4-byte UTF-8 sequence | // 4-byte UTF-8 sequence | ||||
| case 0xF0: | case 0xF0: | ||||
| if (decoder->current + 3 >= decoder->end) goto eof; | if (decoder->current + 3 >= decoder->end) goto eof; | ||||
| ret = c & 0x0F; | ret = c & 0x0F; | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| if (((c = *decoder->current++ & 0xFF) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||||
| ret = (ret << 6) + (c & 0x3F); | ret = (ret << 6) + (c & 0x3F); | ||||
| return (ret <= 0x10FFFF) ? ret : 0xFFFD; | return (ret <= 0x10FFFF) ? ret : 0xFFFD; | ||||
| } | } | ||||
| return 0xFFFD; | return 0xFFFD; | ||||
| } | } | ||||
| uint8_t c1 = *decoder->current++ & 0xFF; | |||||
| uint8_t c2 = *decoder->current++ & 0xFF; | |||||
| uint8_t c1 = *decoder->current++; | |||||
| uint8_t c2 = *decoder->current++; | |||||
| return c1 + (c2 << 8); | return c1 + (c2 << 8); | ||||
| } | } | ||||