@@ -31,6 +31,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "error.h" | |||
#include "speech.h" | |||
#include "phoneme.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -517,31 +517,6 @@ static const uint16_t ISCII[0x80] = { | |||
0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 | |||
}; | |||
const uint16_t *codepage_tables[] = { | |||
NULL, // unknown | |||
NULL, // ASCII | |||
ISO_8859_1, | |||
ISO_8859_2, | |||
ISO_8859_3, | |||
ISO_8859_4, | |||
ISO_8859_5, | |||
ISO_8859_6, | |||
ISO_8859_7, | |||
ISO_8859_8, | |||
ISO_8859_9, | |||
ISO_8859_10, | |||
ISO_8859_11, | |||
// ISO-8859-12 is not a valid encoding. | |||
ISO_8859_13, | |||
ISO_8859_14, | |||
ISO_8859_15, | |||
ISO_8859_16, | |||
KOI8_R, | |||
ISCII, | |||
NULL, // UTF-8 | |||
NULL, // UCS-2 | |||
}; | |||
static uint32_t | |||
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) | |||
{ | |||
@@ -720,3 +695,9 @@ text_decoder_getc(espeak_ng_TEXT_DECODER *decoder) | |||
{ | |||
return decoder->get(decoder); | |||
} | |||
const void * | |||
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder) | |||
{ | |||
return decoder->current; | |||
} |
@@ -22,8 +22,6 @@ extern "C" | |||
{ | |||
#endif | |||
extern const uint16_t *codepage_tables[]; // transitional data table | |||
typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; | |||
espeak_ng_TEXT_DECODER * | |||
@@ -49,6 +47,9 @@ text_decoder_eof(espeak_ng_TEXT_DECODER *decoder); | |||
uint32_t | |||
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); | |||
const void * | |||
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder); | |||
#ifdef __cplusplus | |||
} | |||
#endif |
@@ -26,6 +26,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -27,6 +27,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -27,6 +27,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -51,11 +51,9 @@ static int n_namedata = 0; | |||
char *namedata = NULL; | |||
static int ungot_char2 = 0; | |||
unsigned char *p_textinput; | |||
wchar_t *p_wchar_input; | |||
espeak_ng_TEXT_DECODER *p_decoder = NULL; | |||
static int ungot_char; | |||
static const char *ungot_word = NULL; | |||
static int end_of_input; | |||
static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias | |||
static int audio_text = 0; // set during <audio> ... </audio> | |||
@@ -236,131 +234,26 @@ static int IsRomanU(unsigned int c) | |||
return 0; | |||
} | |||
static void GetC_unget(int c) | |||
{ | |||
// This is only called with UTF8 input, not wchar input | |||
p_textinput--; | |||
*p_textinput = c; | |||
end_of_input = 0; | |||
} | |||
int Eof(void) | |||
{ | |||
if (ungot_char != 0) | |||
return 0; | |||
return end_of_input; | |||
} | |||
static int GetC_get(void) | |||
{ | |||
unsigned int c; | |||
unsigned int c2; | |||
if (option_multibyte == espeakCHARS_WCHAR) { | |||
if (*p_wchar_input == 0) { | |||
end_of_input = 1; | |||
return 0; | |||
} | |||
if (!end_of_input) | |||
return *p_wchar_input++; | |||
} else { | |||
if (*p_textinput == 0) { | |||
end_of_input = 1; | |||
return 0; | |||
} | |||
if (!end_of_input) { | |||
if (option_multibyte == espeakCHARS_16BIT) { | |||
c = p_textinput[0] + (p_textinput[1] << 8); | |||
p_textinput += 2; | |||
return c; | |||
} | |||
return *p_textinput++ & 0xff; | |||
} | |||
} | |||
return 0; | |||
return text_decoder_eof(p_decoder); | |||
} | |||
static int GetC(void) | |||
{ | |||
// Returns a unicode wide character | |||
// Performs UTF8 checking and conversion | |||
int c; | |||
int c1; | |||
int c2; | |||
int cbuf[4]; | |||
int ix; | |||
int n_bytes; | |||
static int ungot2 = 0; | |||
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||
if ((c1 = ungot_char) != 0) { | |||
ungot_char = 0; | |||
return c1; | |||
} | |||
if (ungot2 != 0) { | |||
c1 = ungot2; | |||
ungot2 = 0; | |||
} else | |||
c1 = GetC_get(); | |||
if ((option_multibyte == espeakCHARS_WCHAR) || (option_multibyte == espeakCHARS_16BIT)) { | |||
count_characters++; | |||
return c1; // wchar_t text | |||
} | |||
if ((option_multibyte < 2) && (c1 & 0x80)) { | |||
// multi-byte utf8 encoding, convert to unicode | |||
n_bytes = 0; | |||
if (((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0)) | |||
n_bytes = 1; | |||
else if ((c1 & 0xf0) == 0xe0) | |||
n_bytes = 2; | |||
else if (((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4)) | |||
n_bytes = 3; | |||
if ((ix = n_bytes) > 0) { | |||
c = c1 & mask[ix]; | |||
while (ix > 0) { | |||
if ((c2 = cbuf[ix] = GetC_get()) == 0) { | |||
if (option_multibyte == espeakCHARS_AUTO) | |||
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||
GetC_unget(' '); | |||
break; | |||
} | |||
if ((c2 & 0xc0) != 0x80) { | |||
// This is not UTF8. Change to 8-bit characterset. | |||
if ((n_bytes == 2) && (ix == 1)) | |||
ungot2 = cbuf[2]; | |||
GetC_unget(c2); | |||
break; | |||
} | |||
c = (c << 6) + (c2 & 0x3f); | |||
ix--; | |||
} | |||
if (ix == 0) { | |||
count_characters++; | |||
return c; | |||
} | |||
} | |||
// top-bit-set character is not utf8, drop through to 8bit charset case | |||
if ((option_multibyte == espeakCHARS_AUTO) && !Eof()) | |||
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no" | |||
} | |||
// 8 bit character set, convert to unicode if | |||
count_characters++; | |||
if (c1 >= 0x80) { | |||
const uint16_t *codepage = codepage_tables[translator->encoding]; | |||
return codepage ? codepage[c1 - 0x80] : ' '; | |||
} | |||
return c1; | |||
return text_decoder_getc(p_decoder); | |||
} | |||
static void UngetC(int c) | |||
@@ -1756,7 +1649,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
tr->phonemes_repeat_count = 0; | |||
tr->clause_upper_count = 0; | |||
tr->clause_lower_count = 0; | |||
end_of_input = 0; | |||
*tone_type = 0; | |||
*voice_change = 0; | |||
@@ -1774,7 +1666,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { | |||
if (!iswalnum(c1)) { | |||
if ((end_character_position > 0) && (count_characters > end_character_position)) { | |||
end_of_input = 1; | |||
return CLAUSE_EOF; | |||
} | |||
@@ -27,6 +27,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -47,6 +47,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "error.h" | |||
#include "speech.h" | |||
#include "phoneme.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -29,6 +29,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |
@@ -30,6 +30,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" | |||
@@ -1915,6 +1916,16 @@ int UpperCaseInWord(Translator *tr, char *word, int c) | |||
return 0; | |||
} | |||
static espeak_ng_STATUS init_wstring_decoder(const wchar_t *text) | |||
{ | |||
return text_decoder_decode_wstring(p_decoder, text, wcslen(text) + 1); | |||
} | |||
static espeak_ng_STATUS init_string_decoder(const char *text, espeak_ng_ENCODING encoding) | |||
{ | |||
return text_decoder_decode_string(p_decoder, text, strlen(text) + 1, encoding); | |||
} | |||
void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) | |||
{ | |||
int ix; | |||
@@ -1966,8 +1977,27 @@ void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char | |||
if (tr == NULL) | |||
return NULL; | |||
p_textinput = (unsigned char *)vp_input; | |||
p_wchar_input = (wchar_t *)vp_input; | |||
if (p_decoder == NULL) | |||
p_decoder = create_text_decoder(); | |||
switch (option_multibyte) | |||
{ | |||
case espeakCHARS_WCHAR: | |||
init_wstring_decoder((const wchar_t *)vp_input); | |||
break; | |||
case espeakCHARS_AUTO: // TODO: Implement UTF-8 => 8BIT fallback on 0xFFFD UTF-8 characters. | |||
case espeakCHARS_UTF8: | |||
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_UTF_8); | |||
break; | |||
case espeakCHARS_8BIT: | |||
init_string_decoder((const char *)vp_input, tr->encoding); | |||
break; | |||
case espeakCHARS_16BIT: | |||
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_ISO_10646_UCS_2); | |||
break; | |||
default: | |||
return NULL; // unknown multibyte option value | |||
} | |||
embedded_ix = 0; | |||
embedded_read = 0; | |||
@@ -2683,10 +2713,7 @@ void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char | |||
if (Eof() || (vp_input == NULL)) | |||
return NULL; | |||
if (option_multibyte == espeakCHARS_WCHAR) | |||
return (void *)p_wchar_input; | |||
else | |||
return (void *)p_textinput; | |||
return text_decoder_get_buffer(p_decoder); | |||
} | |||
void InitText(int control) |
@@ -695,8 +695,7 @@ extern Translator *translator; | |||
extern Translator *translator2; | |||
extern char dictionary_name[40]; | |||
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands | |||
extern unsigned char *p_textinput; | |||
extern wchar_t *p_wchar_input; | |||
extern espeak_ng_TEXT_DECODER *p_decoder; | |||
extern int dictionary_skipwords; | |||
extern int (*uri_callback)(int, const char *, const char *); |
@@ -37,6 +37,7 @@ | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" |