Browse Source

Use the text decoder API in readclause.c.

master
Reece H. Dunn 8 years ago
parent
commit
7c16ac543c

+ 1
- 0
src/libespeak-ng/compiledict.c View File

@@ -31,6 +31,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "error.h"
#include "speech.h"
#include "phoneme.h"

+ 1
- 0
src/libespeak-ng/dictionary.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 6
- 25
src/libespeak-ng/encoding.c View File

@@ -517,31 +517,6 @@ static const uint16_t ISCII[0x80] = {
0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
};

const uint16_t *codepage_tables[] = {
NULL, // unknown
NULL, // ASCII
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_6,
ISO_8859_7,
ISO_8859_8,
ISO_8859_9,
ISO_8859_10,
ISO_8859_11,
// ISO-8859-12 is not a valid encoding.
ISO_8859_13,
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
ISCII,
NULL, // UTF-8
NULL, // UCS-2
};

static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{
@@ -720,3 +695,9 @@ text_decoder_getc(espeak_ng_TEXT_DECODER *decoder)
{
return decoder->get(decoder);
}

const void *
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder)
{
return decoder->current;
}

+ 3
- 2
src/libespeak-ng/encoding.h View File

@@ -22,8 +22,6 @@ extern "C"
{
#endif

extern const uint16_t *codepage_tables[]; // transitional data table

typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;

espeak_ng_TEXT_DECODER *
@@ -49,6 +47,9 @@ text_decoder_eof(espeak_ng_TEXT_DECODER *decoder);
uint32_t
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);

const void *
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);

#ifdef __cplusplus
}
#endif

+ 1
- 0
src/libespeak-ng/espeak_api.c View File

@@ -26,6 +26,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/intonation.c View File

@@ -27,6 +27,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/numbers.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/phonemelist.c View File

@@ -27,6 +27,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 3
- 112
src/libespeak-ng/readclause.c View File

@@ -51,11 +51,9 @@ static int n_namedata = 0;
char *namedata = NULL;

static int ungot_char2 = 0;
unsigned char *p_textinput;
wchar_t *p_wchar_input;
espeak_ng_TEXT_DECODER *p_decoder = NULL;
static int ungot_char;
static const char *ungot_word = NULL;
static int end_of_input;

static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias
static int audio_text = 0; // set during <audio> ... </audio>
@@ -236,131 +234,26 @@ static int IsRomanU(unsigned int c)
return 0;
}

static void GetC_unget(int c)
{
// This is only called with UTF8 input, not wchar input
p_textinput--;
*p_textinput = c;
end_of_input = 0;
}

int Eof(void)
{
if (ungot_char != 0)
return 0;

return end_of_input;
}

static int GetC_get(void)
{
unsigned int c;
unsigned int c2;

if (option_multibyte == espeakCHARS_WCHAR) {
if (*p_wchar_input == 0) {
end_of_input = 1;
return 0;
}

if (!end_of_input)
return *p_wchar_input++;
} else {
if (*p_textinput == 0) {
end_of_input = 1;
return 0;
}

if (!end_of_input) {
if (option_multibyte == espeakCHARS_16BIT) {
c = p_textinput[0] + (p_textinput[1] << 8);
p_textinput += 2;
return c;
}
return *p_textinput++ & 0xff;
}
}
return 0;
return text_decoder_eof(p_decoder);
}

static int GetC(void)
{
// Returns a unicode wide character
// Performs UTF8 checking and conversion

int c;
int c1;
int c2;
int cbuf[4];
int ix;
int n_bytes;
static int ungot2 = 0;
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 };

if ((c1 = ungot_char) != 0) {
ungot_char = 0;
return c1;
}

if (ungot2 != 0) {
c1 = ungot2;
ungot2 = 0;
} else
c1 = GetC_get();

if ((option_multibyte == espeakCHARS_WCHAR) || (option_multibyte == espeakCHARS_16BIT)) {
count_characters++;
return c1; // wchar_t text
}

if ((option_multibyte < 2) && (c1 & 0x80)) {
// multi-byte utf8 encoding, convert to unicode
n_bytes = 0;

if (((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0))
n_bytes = 1;
else if ((c1 & 0xf0) == 0xe0)
n_bytes = 2;
else if (((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4))
n_bytes = 3;

if ((ix = n_bytes) > 0) {
c = c1 & mask[ix];
while (ix > 0) {
if ((c2 = cbuf[ix] = GetC_get()) == 0) {
if (option_multibyte == espeakCHARS_AUTO)
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no"
GetC_unget(' ');
break;
}

if ((c2 & 0xc0) != 0x80) {
// This is not UTF8. Change to 8-bit characterset.
if ((n_bytes == 2) && (ix == 1))
ungot2 = cbuf[2];
GetC_unget(c2);
break;
}
c = (c << 6) + (c2 & 0x3f);
ix--;
}
if (ix == 0) {
count_characters++;
return c;
}
}
// top-bit-set character is not utf8, drop through to 8bit charset case
if ((option_multibyte == espeakCHARS_AUTO) && !Eof())
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no"
}

// 8 bit character set, convert to unicode if
count_characters++;
if (c1 >= 0x80) {
const uint16_t *codepage = codepage_tables[translator->encoding];
return codepage ? codepage[c1 - 0x80] : ' ';
}
return c1;
return text_decoder_getc(p_decoder);
}

static void UngetC(int c)
@@ -1756,7 +1649,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
tr->phonemes_repeat_count = 0;
tr->clause_upper_count = 0;
tr->clause_lower_count = 0;
end_of_input = 0;
*tone_type = 0;
*voice_change = 0;

@@ -1774,7 +1666,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) {
if (!iswalnum(c1)) {
if ((end_character_position > 0) && (count_characters > end_character_position)) {
end_of_input = 1;
return CLAUSE_EOF;
}


+ 1
- 0
src/libespeak-ng/setlengths.c View File

@@ -27,6 +27,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/speech.c View File

@@ -47,6 +47,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/synth_mbrola.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/synthdata.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "error.h"
#include "speech.h"
#include "phoneme.h"

+ 1
- 0
src/libespeak-ng/synthesize.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 1
- 0
src/libespeak-ng/tr_languages.c View File

@@ -29,6 +29,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

+ 33
- 6
src/libespeak-ng/translate.c View File

@@ -30,6 +30,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
@@ -1915,6 +1916,16 @@ int UpperCaseInWord(Translator *tr, char *word, int c)
return 0;
}

static espeak_ng_STATUS init_wstring_decoder(const wchar_t *text)
{
return text_decoder_decode_wstring(p_decoder, text, wcslen(text) + 1);
}

static espeak_ng_STATUS init_string_decoder(const char *text, espeak_ng_ENCODING encoding)
{
return text_decoder_decode_string(p_decoder, text, strlen(text) + 1, encoding);
}

void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change)
{
int ix;
@@ -1966,8 +1977,27 @@ void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char
if (tr == NULL)
return NULL;

p_textinput = (unsigned char *)vp_input;
p_wchar_input = (wchar_t *)vp_input;
if (p_decoder == NULL)
p_decoder = create_text_decoder();

switch (option_multibyte)
{
case espeakCHARS_WCHAR:
init_wstring_decoder((const wchar_t *)vp_input);
break;
case espeakCHARS_AUTO: // TODO: Implement UTF-8 => 8BIT fallback on 0xFFFD UTF-8 characters.
case espeakCHARS_UTF8:
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_UTF_8);
break;
case espeakCHARS_8BIT:
init_string_decoder((const char *)vp_input, tr->encoding);
break;
case espeakCHARS_16BIT:
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_ISO_10646_UCS_2);
break;
default:
return NULL; // unknown multibyte option value
}

embedded_ix = 0;
embedded_read = 0;
@@ -2683,10 +2713,7 @@ void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char
if (Eof() || (vp_input == NULL))
return NULL;

if (option_multibyte == espeakCHARS_WCHAR)
return (void *)p_wchar_input;
else
return (void *)p_textinput;
return text_decoder_get_buffer(p_decoder);
}

void InitText(int control)

+ 1
- 2
src/libespeak-ng/translate.h View File

@@ -695,8 +695,7 @@ extern Translator *translator;
extern Translator *translator2;
extern char dictionary_name[40];
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
extern unsigned char *p_textinput;
extern wchar_t *p_wchar_input;
extern espeak_ng_TEXT_DECODER *p_decoder;
extern int dictionary_skipwords;

extern int (*uri_callback)(int, const char *, const char *);

+ 1
- 0
src/libespeak-ng/voices.c View File

@@ -37,6 +37,7 @@
#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>

#include "encoding.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"

Loading…
Cancel
Save