#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "error.h" | #include "error.h" | ||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | ||||
}; | }; | ||||
const uint16_t *codepage_tables[] = { | |||||
NULL, // unknown | |||||
NULL, // ASCII | |||||
ISO_8859_1, | |||||
ISO_8859_2, | |||||
ISO_8859_3, | |||||
ISO_8859_4, | |||||
ISO_8859_5, | |||||
ISO_8859_6, | |||||
ISO_8859_7, | |||||
ISO_8859_8, | |||||
ISO_8859_9, | |||||
ISO_8859_10, | |||||
ISO_8859_11, | |||||
// ISO-8859-12 is not a valid encoding. | |||||
ISO_8859_13, | |||||
ISO_8859_14, | |||||
ISO_8859_15, | |||||
ISO_8859_16, | |||||
KOI8_R, | |||||
ISCII, | |||||
NULL, // UTF-8 | |||||
NULL, // UCS-2 | |||||
}; | |||||
static uint32_t | static uint32_t | ||||
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | ||||
{ | { | ||||
{ | { | ||||
return decoder->get(decoder); | return decoder->get(decoder); | ||||
} | } | ||||
const void * | |||||
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder) | |||||
{ | |||||
return decoder->current; | |||||
} |
{ | { | ||||
#endif | #endif | ||||
extern const uint16_t *codepage_tables[]; // transitional data table | |||||
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | ||||
espeak_ng_TEXT_DECODER * | espeak_ng_TEXT_DECODER * | ||||
uint32_t | uint32_t | ||||
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); | text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); | ||||
const void * | |||||
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
char *namedata = NULL; | char *namedata = NULL; | ||||
static int ungot_char2 = 0; | static int ungot_char2 = 0; | ||||
unsigned char *p_textinput; | |||||
wchar_t *p_wchar_input; | |||||
espeak_ng_TEXT_DECODER *p_decoder = NULL; | |||||
static int ungot_char; | static int ungot_char; | ||||
static const char *ungot_word = NULL; | static const char *ungot_word = NULL; | ||||
static int end_of_input; | |||||
static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias | static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias | ||||
static int audio_text = 0; // set during <audio> ... </audio> | static int audio_text = 0; // set during <audio> ... </audio> | ||||
return 0; | return 0; | ||||
} | } | ||||
static void GetC_unget(int c) | |||||
{ | |||||
// This is only called with UTF8 input, not wchar input | |||||
p_textinput--; | |||||
*p_textinput = c; | |||||
end_of_input = 0; | |||||
} | |||||
int Eof(void) | int Eof(void) | ||||
{ | { | ||||
if (ungot_char != 0) | if (ungot_char != 0) | ||||
return 0; | return 0; | ||||
return end_of_input; | |||||
} | |||||
static int GetC_get(void) | |||||
{ | |||||
unsigned int c; | |||||
unsigned int c2; | |||||
if (option_multibyte == espeakCHARS_WCHAR) { | |||||
if (*p_wchar_input == 0) { | |||||
end_of_input = 1; | |||||
return 0; | |||||
} | |||||
if (!end_of_input) | |||||
return *p_wchar_input++; | |||||
} else { | |||||
if (*p_textinput == 0) { | |||||
end_of_input = 1; | |||||
return 0; | |||||
} | |||||
if (!end_of_input) { | |||||
if (option_multibyte == espeakCHARS_16BIT) { | |||||
c = p_textinput[0] + (p_textinput[1] << 8); | |||||
p_textinput += 2; | |||||
return c; | |||||
} | |||||
return *p_textinput++ & 0xff; | |||||
} | |||||
} | |||||
return 0; | |||||
return text_decoder_eof(p_decoder); | |||||
} | } | ||||
static int GetC(void) | static int GetC(void) | ||||
{ | { | ||||
// Returns a unicode wide character | |||||
// Performs UTF8 checking and conversion | |||||
int c; | |||||
int c1; | int c1; | ||||
int c2; | |||||
int cbuf[4]; | |||||
int ix; | |||||
int n_bytes; | |||||
static int ungot2 = 0; | static int ungot2 = 0; | ||||
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
if ((c1 = ungot_char) != 0) { | if ((c1 = ungot_char) != 0) { | ||||
ungot_char = 0; | ungot_char = 0; | ||||
return c1; | return c1; | ||||
} | } | ||||
if (ungot2 != 0) { | |||||
c1 = ungot2; | |||||
ungot2 = 0; | |||||
} else | |||||
c1 = GetC_get(); | |||||
if ((option_multibyte == espeakCHARS_WCHAR) || (option_multibyte == espeakCHARS_16BIT)) { | |||||
count_characters++; | |||||
return c1; // wchar_t text | |||||
} | |||||
if ((option_multibyte < 2) && (c1 & 0x80)) { | |||||
// multi-byte utf8 encoding, convert to unicode | |||||
n_bytes = 0; | |||||
if (((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0)) | |||||
n_bytes = 1; | |||||
else if ((c1 & 0xf0) == 0xe0) | |||||
n_bytes = 2; | |||||
else if (((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4)) | |||||
n_bytes = 3; | |||||
if ((ix = n_bytes) > 0) { | |||||
c = c1 & mask[ix]; | |||||
while (ix > 0) { | |||||
if ((c2 = cbuf[ix] = GetC_get()) == 0) { | |||||
if (option_multibyte == espeakCHARS_AUTO) | |||||
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||||
GetC_unget(' '); | |||||
break; | |||||
} | |||||
if ((c2 & 0xc0) != 0x80) { | |||||
// This is not UTF8. Change to 8-bit characterset. | |||||
if ((n_bytes == 2) && (ix == 1)) | |||||
ungot2 = cbuf[2]; | |||||
GetC_unget(c2); | |||||
break; | |||||
} | |||||
c = (c << 6) + (c2 & 0x3f); | |||||
ix--; | |||||
} | |||||
if (ix == 0) { | |||||
count_characters++; | |||||
return c; | |||||
} | |||||
} | |||||
// top-bit-set character is not utf8, drop through to 8bit charset case | |||||
if ((option_multibyte == espeakCHARS_AUTO) && !Eof()) | |||||
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||||
} | |||||
// 8 bit character set, convert to unicode if | |||||
count_characters++; | count_characters++; | ||||
if (c1 >= 0x80) { | |||||
const uint16_t *codepage = codepage_tables[translator->encoding]; | |||||
return codepage ? codepage[c1 - 0x80] : ' '; | |||||
} | |||||
return c1; | |||||
return text_decoder_getc(p_decoder); | |||||
} | } | ||||
static void UngetC(int c) | static void UngetC(int c) | ||||
tr->phonemes_repeat_count = 0; | tr->phonemes_repeat_count = 0; | ||||
tr->clause_upper_count = 0; | tr->clause_upper_count = 0; | ||||
tr->clause_lower_count = 0; | tr->clause_lower_count = 0; | ||||
end_of_input = 0; | |||||
*tone_type = 0; | *tone_type = 0; | ||||
*voice_change = 0; | *voice_change = 0; | ||||
while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { | while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { | ||||
if (!iswalnum(c1)) { | if (!iswalnum(c1)) { | ||||
if ((end_character_position > 0) && (count_characters > end_character_position)) { | if ((end_character_position > 0) && (count_characters > end_character_position)) { | ||||
end_of_input = 1; | |||||
return CLAUSE_EOF; | return CLAUSE_EOF; | ||||
} | } | ||||
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "error.h" | #include "error.h" | ||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" | ||||
return 0; | return 0; | ||||
} | } | ||||
static espeak_ng_STATUS init_wstring_decoder(const wchar_t *text) | |||||
{ | |||||
return text_decoder_decode_wstring(p_decoder, text, wcslen(text) + 1); | |||||
} | |||||
static espeak_ng_STATUS init_string_decoder(const char *text, espeak_ng_ENCODING encoding) | |||||
{ | |||||
return text_decoder_decode_string(p_decoder, text, strlen(text) + 1, encoding); | |||||
} | |||||
void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) | void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) | ||||
{ | { | ||||
int ix; | int ix; | ||||
if (tr == NULL) | if (tr == NULL) | ||||
return NULL; | return NULL; | ||||
p_textinput = (unsigned char *)vp_input; | |||||
p_wchar_input = (wchar_t *)vp_input; | |||||
if (p_decoder == NULL) | |||||
p_decoder = create_text_decoder(); | |||||
switch (option_multibyte) | |||||
{ | |||||
case espeakCHARS_WCHAR: | |||||
init_wstring_decoder((const wchar_t *)vp_input); | |||||
break; | |||||
case espeakCHARS_AUTO: // TODO: Implement UTF-8 => 8BIT fallback on 0xFFFD UTF-8 characters. | |||||
case espeakCHARS_UTF8: | |||||
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_UTF_8); | |||||
break; | |||||
case espeakCHARS_8BIT: | |||||
init_string_decoder((const char *)vp_input, tr->encoding); | |||||
break; | |||||
case espeakCHARS_16BIT: | |||||
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_ISO_10646_UCS_2); | |||||
break; | |||||
default: | |||||
return NULL; // unknown multibyte option value | |||||
} | |||||
embedded_ix = 0; | embedded_ix = 0; | ||||
embedded_read = 0; | embedded_read = 0; | ||||
if (Eof() || (vp_input == NULL)) | if (Eof() || (vp_input == NULL)) | ||||
return NULL; | return NULL; | ||||
if (option_multibyte == espeakCHARS_WCHAR) | |||||
return (void *)p_wchar_input; | |||||
else | |||||
return (void *)p_textinput; | |||||
return text_decoder_get_buffer(p_decoder); | |||||
} | } | ||||
void InitText(int control) | void InitText(int control) |
extern Translator *translator2; | extern Translator *translator2; | ||||
extern char dictionary_name[40]; | extern char dictionary_name[40]; | ||||
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | ||||
extern unsigned char *p_textinput; | |||||
extern wchar_t *p_wchar_input; | |||||
extern espeak_ng_TEXT_DECODER *p_decoder; | |||||
extern int dictionary_skipwords; | extern int dictionary_skipwords; | ||||
extern int (*uri_callback)(int, const char *, const char *); | extern int (*uri_callback)(int, const char *, const char *); |
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include "encoding.h" | |||||
#include "speech.h" | #include "speech.h" | ||||
#include "phoneme.h" | #include "phoneme.h" | ||||
#include "synthesize.h" | #include "synthesize.h" |