Browse Source

Use the text decoder API in readclause.c.

master
Reece H. Dunn 8 years ago
parent
commit
7c16ac543c

+ 1
- 0
src/libespeak-ng/compiledict.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "error.h" #include "error.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"

+ 1
- 0
src/libespeak-ng/dictionary.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 6
- 25
src/libespeak-ng/encoding.c View File

0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8 0x0037, 0x0038, 0x0039, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // f8
}; };


const uint16_t *codepage_tables[] = {
NULL, // unknown
NULL, // ASCII
ISO_8859_1,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_6,
ISO_8859_7,
ISO_8859_8,
ISO_8859_9,
ISO_8859_10,
ISO_8859_11,
// ISO-8859-12 is not a valid encoding.
ISO_8859_13,
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
ISCII,
NULL, // UTF-8
NULL, // UCS-2
};

static uint32_t static uint32_t
string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder) string_decoder_getc_us_ascii(espeak_ng_TEXT_DECODER *decoder)
{ {
{ {
return decoder->get(decoder); return decoder->get(decoder);
} }

const void *
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder)
{
return decoder->current;
}

+ 3
- 2
src/libespeak-ng/encoding.h View File

{ {
#endif #endif


extern const uint16_t *codepage_tables[]; // transitional data table

typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER; typedef struct espeak_ng_TEXT_DECODER_ espeak_ng_TEXT_DECODER;


espeak_ng_TEXT_DECODER * espeak_ng_TEXT_DECODER *
uint32_t uint32_t
text_decoder_getc(espeak_ng_TEXT_DECODER *decoder); text_decoder_getc(espeak_ng_TEXT_DECODER *decoder);


const void *
text_decoder_get_buffer(espeak_ng_TEXT_DECODER *decoder);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

+ 1
- 0
src/libespeak-ng/espeak_api.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/intonation.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/numbers.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/phonemelist.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 3
- 112
src/libespeak-ng/readclause.c View File

char *namedata = NULL; char *namedata = NULL;


static int ungot_char2 = 0; static int ungot_char2 = 0;
unsigned char *p_textinput;
wchar_t *p_wchar_input;
espeak_ng_TEXT_DECODER *p_decoder = NULL;
static int ungot_char; static int ungot_char;
static const char *ungot_word = NULL; static const char *ungot_word = NULL;
static int end_of_input;


static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias static int ignore_text = 0; // set during <sub> ... </sub> to ignore text which has been replaced by an alias
static int audio_text = 0; // set during <audio> ... </audio> static int audio_text = 0; // set during <audio> ... </audio>
return 0; return 0;
} }


static void GetC_unget(int c)
{
// This is only called with UTF8 input, not wchar input
p_textinput--;
*p_textinput = c;
end_of_input = 0;
}

int Eof(void) int Eof(void)
{ {
if (ungot_char != 0) if (ungot_char != 0)
return 0; return 0;


return end_of_input;
}

static int GetC_get(void)
{
unsigned int c;
unsigned int c2;

if (option_multibyte == espeakCHARS_WCHAR) {
if (*p_wchar_input == 0) {
end_of_input = 1;
return 0;
}

if (!end_of_input)
return *p_wchar_input++;
} else {
if (*p_textinput == 0) {
end_of_input = 1;
return 0;
}

if (!end_of_input) {
if (option_multibyte == espeakCHARS_16BIT) {
c = p_textinput[0] + (p_textinput[1] << 8);
p_textinput += 2;
return c;
}
return *p_textinput++ & 0xff;
}
}
return 0;
return text_decoder_eof(p_decoder);
} }


static int GetC(void) static int GetC(void)
{ {
// Returns a unicode wide character
// Performs UTF8 checking and conversion

int c;
int c1; int c1;
int c2;
int cbuf[4];
int ix;
int n_bytes;
static int ungot2 = 0; static int ungot2 = 0;
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 };


if ((c1 = ungot_char) != 0) { if ((c1 = ungot_char) != 0) {
ungot_char = 0; ungot_char = 0;
return c1; return c1;
} }


if (ungot2 != 0) {
c1 = ungot2;
ungot2 = 0;
} else
c1 = GetC_get();

if ((option_multibyte == espeakCHARS_WCHAR) || (option_multibyte == espeakCHARS_16BIT)) {
count_characters++;
return c1; // wchar_t text
}

if ((option_multibyte < 2) && (c1 & 0x80)) {
// multi-byte utf8 encoding, convert to unicode
n_bytes = 0;

if (((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0))
n_bytes = 1;
else if ((c1 & 0xf0) == 0xe0)
n_bytes = 2;
else if (((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4))
n_bytes = 3;

if ((ix = n_bytes) > 0) {
c = c1 & mask[ix];
while (ix > 0) {
if ((c2 = cbuf[ix] = GetC_get()) == 0) {
if (option_multibyte == espeakCHARS_AUTO)
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no"
GetC_unget(' ');
break;
}

if ((c2 & 0xc0) != 0x80) {
// This is not UTF8. Change to 8-bit characterset.
if ((n_bytes == 2) && (ix == 1))
ungot2 = cbuf[2];
GetC_unget(c2);
break;
}
c = (c << 6) + (c2 & 0x3f);
ix--;
}
if (ix == 0) {
count_characters++;
return c;
}
}
// top-bit-set character is not utf8, drop through to 8bit charset case
if ((option_multibyte == espeakCHARS_AUTO) && !Eof())
option_multibyte = espeakCHARS_8BIT; // change "auto" option to "no"
}

// 8 bit character set, convert to unicode if
count_characters++; count_characters++;
if (c1 >= 0x80) {
const uint16_t *codepage = codepage_tables[translator->encoding];
return codepage ? codepage[c1 - 0x80] : ' ';
}
return c1;
return text_decoder_getc(p_decoder);
} }


static void UngetC(int c) static void UngetC(int c)
tr->phonemes_repeat_count = 0; tr->phonemes_repeat_count = 0;
tr->clause_upper_count = 0; tr->clause_upper_count = 0;
tr->clause_lower_count = 0; tr->clause_lower_count = 0;
end_of_input = 0;
*tone_type = 0; *tone_type = 0;
*voice_change = 0; *voice_change = 0;


while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) { while (!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0)) {
if (!iswalnum(c1)) { if (!iswalnum(c1)) {
if ((end_character_position > 0) && (count_characters > end_character_position)) { if ((end_character_position > 0) && (count_characters > end_character_position)) {
end_of_input = 1;
return CLAUSE_EOF; return CLAUSE_EOF;
} }



+ 1
- 0
src/libespeak-ng/setlengths.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/speech.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/synth_mbrola.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/synthdata.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "error.h" #include "error.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"

+ 1
- 0
src/libespeak-ng/synthesize.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 1
- 0
src/libespeak-ng/tr_languages.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

+ 33
- 6
src/libespeak-ng/translate.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"
return 0; return 0;
} }


static espeak_ng_STATUS init_wstring_decoder(const wchar_t *text)
{
return text_decoder_decode_wstring(p_decoder, text, wcslen(text) + 1);
}

static espeak_ng_STATUS init_string_decoder(const char *text, espeak_ng_ENCODING encoding)
{
return text_decoder_decode_string(p_decoder, text, strlen(text) + 1, encoding);
}

void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change) void *TranslateClause(Translator *tr, const void *vp_input, int *tone_out, char **voice_change)
{ {
int ix; int ix;
if (tr == NULL) if (tr == NULL)
return NULL; return NULL;


p_textinput = (unsigned char *)vp_input;
p_wchar_input = (wchar_t *)vp_input;
if (p_decoder == NULL)
p_decoder = create_text_decoder();

switch (option_multibyte)
{
case espeakCHARS_WCHAR:
init_wstring_decoder((const wchar_t *)vp_input);
break;
case espeakCHARS_AUTO: // TODO: Implement UTF-8 => 8BIT fallback on 0xFFFD UTF-8 characters.
case espeakCHARS_UTF8:
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_UTF_8);
break;
case espeakCHARS_8BIT:
init_string_decoder((const char *)vp_input, tr->encoding);
break;
case espeakCHARS_16BIT:
init_string_decoder((const char *)vp_input, ESPEAKNG_ENCODING_ISO_10646_UCS_2);
break;
default:
return NULL; // unknown multibyte option value
}


embedded_ix = 0; embedded_ix = 0;
embedded_read = 0; embedded_read = 0;
if (Eof() || (vp_input == NULL)) if (Eof() || (vp_input == NULL))
return NULL; return NULL;


if (option_multibyte == espeakCHARS_WCHAR)
return (void *)p_wchar_input;
else
return (void *)p_textinput;
return text_decoder_get_buffer(p_decoder);
} }


void InitText(int control) void InitText(int control)

+ 1
- 2
src/libespeak-ng/translate.h View File

extern Translator *translator2; extern Translator *translator2;
extern char dictionary_name[40]; extern char dictionary_name[40];
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
extern unsigned char *p_textinput;
extern wchar_t *p_wchar_input;
extern espeak_ng_TEXT_DECODER *p_decoder;
extern int dictionary_skipwords; extern int dictionary_skipwords;


extern int (*uri_callback)(int, const char *, const char *); extern int (*uri_callback)(int, const char *, const char *);

+ 1
- 0
src/libespeak-ng/voices.c View File

#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>


#include "encoding.h"
#include "speech.h" #include "speech.h"
#include "phoneme.h" #include "phoneme.h"
#include "synthesize.h" #include "synthesize.h"

Loading…
Cancel
Save