| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "error.h" | #include "error.h" | ||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | ||||
| }; | }; | ||||
| const uint16_t *codepage_tables[] = { | |||||
| NULL, // unknown | |||||
| NULL, // ASCII | |||||
| ISO_8859_1, | |||||
| ISO_8859_2, | |||||
| ISO_8859_3, | |||||
| ISO_8859_4, | |||||
| ISO_8859_5, | |||||
| ISO_8859_6, | |||||
| ISO_8859_7, | |||||
| ISO_8859_8, | |||||
| ISO_8859_9, | |||||
| ISO_8859_10, | |||||
| ISO_8859_11, | |||||
| // ISO-8859-12 is not a valid encoding. | |||||
| ISO_8859_13, | |||||
| ISO_8859_14, | |||||
| ISO_8859_15, | |||||
| ISO_8859_16, | |||||
| KOI8_R, | |||||
| ISCII, | |||||
| NULL, // UTF-8 | |||||
| NULL, // UCS-2 | |||||
| }; | |||||
| static uint32_t | static uint32_t | ||||
| string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | ||||
| { | { | ||||
| { | { | ||||
| return decoder->get(decoder); | return decoder->get(decoder); | ||||
| } | } | ||||
| const void * | |||||
| text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder) | |||||
| { | |||||
| return decoder->current; | |||||
| } |
| { | { | ||||
| #endif | #endif | ||||
| extern const uint16_t *codepage_tables[]; // transitional data table | |||||
| typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | ||||
| espeak_ng_TEXT_DECODER * | espeak_ng_TEXT_DECODER * | ||||
| uint32_t | uint32_t | ||||
| text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); | text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); | ||||
| const void * | |||||
| text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| char *namedata = NULL; | char *namedata = NULL; | ||||
| static int ungot_char2 = 0; | static int ungot_char2 = 0; | ||||
| unsigned char *p_textinput; | |||||
| wchar_t *p_wchar_input; | |||||
| espeak_ng_TEXT_DECODER *p_decoder = NULL; | |||||
| static int ungot_char; | static int ungot_char; | ||||
| static const char *ungot_word = NULL; | static const char *ungot_word = NULL; | ||||
| static int end_of_input; | |||||
| static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias | static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias | ||||
| static int audio_text = 0; // set during <audio> ... </audio> | static int audio_text = 0; // set during <audio> ... </audio> | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| static void GetC_unget(int c) | |||||
| { | |||||
| // This is only called with UTF8 input, not wchar input | |||||
| p_textinput--; | |||||
| *p_textinput = c; | |||||
| end_of_input = 0; | |||||
| } | |||||
| int Eof(void) | int Eof(void) | ||||
| { | { | ||||
| if (ungot_char != 0) | if (ungot_char != 0) | ||||
| return 0; | return 0; | ||||
| return end_of_input; | |||||
| } | |||||
| static int GetC_get(void) | |||||
| { | |||||
| unsigned int c; | |||||
| unsigned int c2; | |||||
| if (option_multibyte == espeakCHARS_WCHAR) { | |||||
| if (*p_wchar_input == 0) { | |||||
| end_of_input = 1; | |||||
| return 0; | |||||
| } | |||||
| if (!end_of_input) | |||||
| return *p_wchar_input++; | |||||
| } else { | |||||
| if (*p_textinput == 0) { | |||||
| end_of_input = 1; | |||||
| return 0; | |||||
| } | |||||
| if (!end_of_input) { | |||||
| if (option_multibyte == espeakCHARS_16BIT) { | |||||
| c = p_textinput[0] + (p_textinput[1] << 8); | |||||
| p_textinput += 2; | |||||
| return c; | |||||
| } | |||||
| return *p_textinput++ & 0xff; | |||||
| } | |||||
| } | |||||
| return 0; | |||||
| return text_decoder_eof(p_decoder); | |||||
| } | } | ||||
| static int GetC(void) | static int GetC(void) | ||||
| { | { | ||||
| // Returns a unicode wide character | |||||
| // Performs UTF8 checking and conversion | |||||
| int c; | |||||
| int c1; | int c1; | ||||
| int c2; | |||||
| int cbuf[4]; | |||||
| int ix; | |||||
| int n_bytes; | |||||
| static int ungot2 = 0; | static int ungot2 = 0; | ||||
| static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
| if ((c1 = ungot_char) != 0) { | if ((c1 = ungot_char) != 0) { | ||||
| ungot_char = 0; | ungot_char = 0; | ||||
| return c1; | return c1; | ||||
| } | } | ||||
| if (ungot2 != 0) { | |||||
| c1 = ungot2; | |||||
| ungot2 = 0; | |||||
| } else | |||||
| c1 = GetC_get(); | |||||
| if ((option_multibyte == espeakCHARS_WCHAR) || (option_multibyte == espeakCHARS_16BIT)) { | |||||
| count_characters++; | |||||
| return c1; // wchar_t text | |||||
| } | |||||
| if ((option_multibyte < 2) && (c1 & 0x80)) { | |||||
| // multi-byte utf8 encoding, convert to unicode | |||||
| n_bytes = 0; | |||||
| if (((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0)) | |||||
| n_bytes = 1; | |||||
| else if ((c1 & 0xf0) == 0xe0) | |||||
| n_bytes = 2; | |||||
| else if (((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4)) | |||||
| n_bytes = 3; | |||||
| if ((ix = n_bytes) > 0) { | |||||
| c = c1 & mask[ix]; | |||||
| while (ix > 0) { | |||||
| if ((c2 = cbuf[ix] = GetC_get()) == 0) { | |||||
| if (option_multibyte == espeakCHARS_AUTO) | |||||
| option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||||
| GetC_unget(' '); | |||||
| break; | |||||
| } | |||||
| if ((c2 & 0xc0) != 0x80) { | |||||
| // This is not UTF8. Change to 8-bit characterset. | |||||
| if ((n_bytes == 2) && (ix == 1)) | |||||
| ungot2 = cbuf[2]; | |||||
| GetC_unget(c2); | |||||
| break; | |||||
| } | |||||
| c = (c << 6) + (c2 & 0x3f); | |||||
| ix--; | |||||
| } | |||||
| if (ix == 0) { | |||||
| count_characters++; | |||||
| return c; | |||||
| } | |||||
| } | |||||
| // top-bit-set character is not utf8, drop through to 8bit charset case | |||||
| if ((option_multibyte == espeakCHARS_AUTO) && !Eof()) | |||||
| option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||||
| } | |||||
| // 8 bit character set, convert to unicode if | |||||
| count_characters++; | count_characters++; | ||||
| if (c1 >= 0x80) { | |||||
| const uint16_t *codepage = codepage_tables[translator->encoding]; | |||||
| return codepage ? codepage[c1 - 0x80] : ' '; | |||||
| } | |||||
| return c1; | |||||
| return text_decoder_getc(p_decoder); | |||||
| } | } | ||||
| static void UngetC(int c) | static void UngetC(int c) | ||||
| tr->phonemes_repeat_count = 0; | tr->phonemes_repeat_count = 0; | ||||
| tr->clause_upper_count = 0; | tr->clause_upper_count = 0; | ||||
| tr->clause_lower_count = 0; | tr->clause_lower_count = 0; | ||||
| end_of_input = 0; | |||||
| *tone_type = 0; | *tone_type = 0; | ||||
| *voice_change = 0; | *voice_change = 0; | ||||
| while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { | while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { | ||||
| if (!iswalnum(c1)) { | if (!iswalnum(c1)) { | ||||
| if ((end_character_position > 0) && (count_characters > end_character_position)) { | if ((end_character_position > 0) && (count_characters > end_character_position)) { | ||||
| end_of_input = 1; | |||||
| return CLAUSE_EOF; | return CLAUSE_EOF; | ||||
| } | } | ||||
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "error.h" | #include "error.h" | ||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| static espeak_ng_STATUS init_wstring_decoder(const wchar_t *text) | |||||
| { | |||||
| return text_decoder_decode_wstring(p_decoder, text, wcslen(text) + 1); | |||||
| } | |||||
| static espeak_ng_STATUS init_string_decoder(const char *text, espeak_ng_ENCODING encoding) | |||||
| { | |||||
| return text_decoder_decode_string(p_decoder, text, strlen(text) + 1, encoding); | |||||
| } | |||||
| void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) | void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) | ||||
| { | { | ||||
| int ix; | int ix; | ||||
| if (tr == NULL) | if (tr == NULL) | ||||
| return NULL; | return NULL; | ||||
| p_textinput = (unsigned char *)vp_input; | |||||
| p_wchar_input = (wchar_t *)vp_input; | |||||
| if (p_decoder == NULL) | |||||
| p_decoder = create_text_decoder(); | |||||
| switch (option_multibyte) | |||||
| { | |||||
| case espeakCHARS_WCHAR: | |||||
| init_wstring_decoder((const wchar_t *)vp_input); | |||||
| break; | |||||
| case espeakCHARS_AUTO: // TODO: Implement UTF-8 => 8BIT fallback on 0xFFFD UTF-8 characters. | |||||
| case espeakCHARS_UTF8: | |||||
| init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_UTF_8); | |||||
| break; | |||||
| case espeakCHARS_8BIT: | |||||
| init_string_decoder((const char *)vp_input, tr->encoding); | |||||
| break; | |||||
| case espeakCHARS_16BIT: | |||||
| init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_ISO_10646_UCS_2); | |||||
| break; | |||||
| default: | |||||
| return NULL; // unknown multibyte option value | |||||
| } | |||||
| embedded_ix = 0; | embedded_ix = 0; | ||||
| embedded_read = 0; | embedded_read = 0; | ||||
| if (Eof() || (vp_input == NULL)) | if (Eof() || (vp_input == NULL)) | ||||
| return NULL; | return NULL; | ||||
| if (option_multibyte == espeakCHARS_WCHAR) | |||||
| return (void *)p_wchar_input; | |||||
| else | |||||
| return (void *)p_textinput; | |||||
| return text_decoder_get_buffer(p_decoder); | |||||
| } | } | ||||
| void InitText(int control) | void InitText(int control) |
| extern Translator *translator2; | extern Translator *translator2; | ||||
| extern char dictionary_name[40]; | extern char dictionary_name[40]; | ||||
| extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | ||||
| extern unsigned char *p_textinput; | |||||
| extern wchar_t *p_wchar_input; | |||||
| extern espeak_ng_TEXT_DECODER *p_decoder; | |||||
| extern int dictionary_skipwords; | extern int dictionary_skipwords; | ||||
| extern int (*uri_callback)(int, const char *, const char *); | extern int (*uri_callback)(int, const char *, const char *); |
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include "encoding.h" | |||||
| #include "speech.h" | #include "speech.h" | ||||
| #include "phoneme.h" | #include "phoneme.h" | ||||
| #include "synthesize.h" | #include "synthesize.h" |