memset(p, ' ', utf8_in(&c, p)); | memset(p, ' ', utf8_in(&c, p)); | ||||
} | } | ||||
static MNEM_TAB xml_char_mnemonics[] = { | |||||
{ "gt", '>' }, | |||||
{ "lt", 0xe000 + '<' }, // private usage area, to avoid confusion with XML tag | |||||
{ "amp", '&' }, | |||||
{ "quot", '"' }, | |||||
{ "nbsp", ' ' }, | |||||
{ "apos", '\'' }, | |||||
{ NULL, -1 } | |||||
}; | |||||
int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change) | int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change) | ||||
{ | { | ||||
/* Find the end of the current clause. | /* Find the end of the current clause. | ||||
int phoneme_mode = 0; | int phoneme_mode = 0; | ||||
int n_xml_buf; | int n_xml_buf; | ||||
int terminator; | int terminator; | ||||
int found; | |||||
bool any_alnum = false; | bool any_alnum = false; | ||||
bool self_closing; | |||||
int punct_data = 0; | int punct_data = 0; | ||||
bool is_end_clause; | bool is_end_clause; | ||||
int announced_punctuation = 0; | int announced_punctuation = 0; | ||||
c2 = GetC(); | c2 = GetC(); | ||||
sprintf(ungot_string, "%s%c%c", &xml_buf2[0], c1, c2); | sprintf(ungot_string, "%s%c%c", &xml_buf2[0], c1, c2); | ||||
int found = -1; | |||||
if (c1 == ';') { | if (c1 == ';') { | ||||
if (xml_buf2[0] == '#') { | |||||
// character code number | |||||
if (xml_buf2[1] == 'x') | |||||
found = sscanf(&xml_buf2[2], "%x", (unsigned int *)(&c1)); | |||||
else | |||||
found = sscanf(&xml_buf2[1], "%d", &c1); | |||||
} else { | |||||
if ((found = LookupMnem(xml_char_mnemonics, xml_buf2)) != -1) { | |||||
c1 = found; | |||||
if (c2 == 0) | |||||
c2 = ' '; | |||||
} | |||||
} | |||||
} else | |||||
found = -1; | |||||
found = ParseSsmlReference(xml_buf2, &c1, &c2); | |||||
} | |||||
if (found <= 0) { | if (found <= 0) { | ||||
ungot_string_ix = 0; | ungot_string_ix = 0; | ||||
if ((c1 <= 0x20) && ((sayas_mode == SAYAS_SINGLE_CHARS) || (sayas_mode == SAYAS_KEY))) | if ((c1 <= 0x20) && ((sayas_mode == SAYAS_SINGLE_CHARS) || (sayas_mode == SAYAS_KEY))) | ||||
c1 += 0xe000; // move into unicode private usage area | c1 += 0xe000; // move into unicode private usage area | ||||
} else if ((c1 == '<') && (ssml_ignore_l_angle != '<')) { | } else if ((c1 == '<') && (ssml_ignore_l_angle != '<')) { | ||||
if ((c2 == '!') || (c2 == '?')) { | |||||
// a comment, ignore until closing '<' (or <?xml tag ) | |||||
while (!Eof() && (c1 != '>')) | |||||
c1 = GetC(); | |||||
c2 = ' '; | |||||
} else if ((c2 == '/') || iswalpha(c2)) { | |||||
if ((c2 == '/') || iswalpha(c2) || c2 == '!' || c2 == '?') { | |||||
// check for space in the output buffer for embedded commands produced by the SSML tag | // check for space in the output buffer for embedded commands produced by the SSML tag | ||||
if (ix > (n_buf - 20)) { | if (ix > (n_buf - 20)) { | ||||
// Perhaps not enough room, end the clause before the SSML tag | // Perhaps not enough room, end the clause before the SSML tag | ||||
xml_buf[n_xml_buf] = 0; | xml_buf[n_xml_buf] = 0; | ||||
c2 = ' '; | c2 = ' '; | ||||
self_closing = false; | |||||
if (xml_buf[n_xml_buf-1] == '/') { | |||||
// a self-closing tag | |||||
xml_buf[n_xml_buf-1] = ' '; | |||||
self_closing = true; | |||||
} | |||||
terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters); | |||||
terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters); | |||||
if (terminator != 0) { | if (terminator != 0) { | ||||
buf[ix] = ' '; | buf[ix] = ' '; |
} | } | ||||
} | } | ||||
int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters) | |||||
int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters) | |||||
{ | { | ||||
// xml_buf is the tag and attributes with a zero terminator in place of the original '>' | // xml_buf is the tag and attributes with a zero terminator in place of the original '>' | ||||
// returns a clause terminator value. | // returns a clause terminator value. | ||||
PARAM_STACK *sp; | PARAM_STACK *sp; | ||||
SSML_STACK *ssml_sp; | SSML_STACK *ssml_sp; | ||||
// don't process comments and xml declarations | |||||
if (wcsncmp(xml_buf, (wchar_t *) "!--", 3) == 0 || wcsncmp(xml_buf, (wchar_t *) "?xml", 4) == 0) { | |||||
return 0; | |||||
} | |||||
// these tags have no effect if they are self-closing, eg. <voice /> | // these tags have no effect if they are self-closing, eg. <voice /> | ||||
static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | ||||
bool self_closing = false; | |||||
int len; | |||||
len = wcslen(xml_buf); | |||||
if (xml_buf[len - 1] == '/') { | |||||
// a self-closing tag | |||||
xml_buf[len - 1] = ' '; | |||||
self_closing = true; | |||||
} | |||||
static const MNEM_TAB mnem_phoneme_alphabet[] = { | static const MNEM_TAB mnem_phoneme_alphabet[] = { | ||||
{ "espeak", 1 }, | { "espeak", 1 }, | ||||
{ NULL, -1 } | { NULL, -1 } | ||||
} | } | ||||
return 0; | return 0; | ||||
} | } | ||||
static MNEM_TAB xml_entity_mnemonics[] = { | |||||
{ "gt", '>' }, | |||||
{ "lt", 0xe000 + '<' }, // private usage area, to avoid confusion with XML tag | |||||
{ "amp", '&' }, | |||||
{ "quot", '"' }, | |||||
{ "nbsp", ' ' }, | |||||
{ "apos", '\'' }, | |||||
{ NULL, -1 } | |||||
}; | |||||
int ParseSsmlReference(char *ref, int *c1, int *c2) { | |||||
// Check if buffer *ref contains an XML character or entity reference | |||||
// if found, set *c1 to the replacement char | |||||
// change *c2 for entity references | |||||
// returns >= 0 on success | |||||
if (ref[0] == '#') { | |||||
// character reference | |||||
if (ref[1] == 'x') | |||||
return sscanf(&ref[2], "%x", c1); | |||||
else | |||||
return sscanf(&ref[1], "%d", c1); | |||||
} else { | |||||
// entity reference | |||||
int found; | |||||
if ((found = LookupMnem(xml_entity_mnemonics, ref)) != -1) { | |||||
*c1 = found; | |||||
if (*c2 == 0) | |||||
*c2 = ' '; | |||||
return found; | |||||
} | |||||
} | |||||
return -1; | |||||
} |
char *outbuf, | char *outbuf, | ||||
int *outix, | int *outix, | ||||
int n_outbuf, | int n_outbuf, | ||||
bool self_closing, | |||||
const char *xmlbase, | const char *xmlbase, | ||||
bool *audio_text, | bool *audio_text, | ||||
char *current_voice_id, | char *current_voice_id, | ||||
int *n_param_stack, | int *n_param_stack, | ||||
int *speech_parameters); | int *speech_parameters); | ||||
int ParseSsmlReference(char *ref, | |||||
int *c1, | |||||
int *c2); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif |
void SynthesizeInit(void); | void SynthesizeInit(void); | ||||
int Generate(PHONEME_LIST *phoneme_list, int *n_ph, bool resume); | int Generate(PHONEME_LIST *phoneme_list, int *n_ph, bool resume); | ||||
void MakeWave2(PHONEME_LIST *p, int n_ph); | |||||
int SpeakNextClause(int control); | int SpeakNextClause(int control); | ||||
void SetSpeed(int control); | void SetSpeed(int control); | ||||
void SetEmbedded(int control, int value); | void SetEmbedded(int control, int value); |
// match 1 pre 2 post 0 - use common phoneme string | // match 1 pre 2 post 0 - use common phoneme string | ||||
// match 1 pre 2 post 3 0 - empty phoneme string | // match 1 pre 2 post 3 0 - empty phoneme string | ||||
typedef const char *constcharptr; | |||||
// used to mark words with the source[] buffer | // used to mark words with the source[] buffer | ||||
typedef struct { | typedef struct { | ||||
unsigned int flags; | unsigned int flags; |
test_ssml() { | test_ssml() { | ||||
INPUT=$1 | INPUT=$1 | ||||
if [ "$2" = "punct" ] | |||||
then | |||||
PARAMETERS="--punct -x" | |||||
else | |||||
PARAMETERS="-v en-US --ipa=2" | |||||
fi | |||||
echo "testing ${INPUT}" | echo "testing ${INPUT}" | ||||
cp $(dirname $INPUT)/$(basename ${INPUT%.*}).expected expected.txt | cp $(dirname $INPUT)/$(basename ${INPUT%.*}).expected expected.txt | ||||
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | ||||
src/espeak-ng -m -q -v en-US --ipa=2 -f ${INPUT} > actual.txt | |||||
src/espeak-ng -m -q $PARAMETERS -f ${INPUT} > actual.txt | |||||
diff expected.txt actual.txt || exit 1 | diff expected.txt actual.txt || exit 1 | ||||
} | } | ||||
for i in `ls tests/ssml/*.ssml` ; do test_ssml $i ; done | |||||
for i in `ls tests/ssml/*.ssml` ; do test_ssml $i; done | |||||
for i in `ls tests/ssml/*.ssml2` ; do test_ssml $i punct; done | |||||
l'EsDan_: gr'eIt@D,an_: 'amp@s,and t'Ik_: kw'oUts | |||||
b'i: b'i: | |||||
z'Ed z'Ed | |||||
<!-- SSML reference test | |||||
Entity references < > & ' and " should be replaced in the buffer | |||||
Character references like A are translated to Unicode (65 = 'A') | |||||
See https://www.tutorialspoint.com/xml/xml_syntax.htm for example | |||||
--> | |||||
<speak>< > & ' "</speak> | |||||
<speak>B B</speak> | |||||
<speak>z z</speak> |