| memset(p, ' ', utf8_in(&c, p)); | memset(p, ' ', utf8_in(&c, p)); | ||||
| } | } | ||||
| static MNEM_TAB xml_char_mnemonics[] = { | |||||
| { "gt", '>' }, | |||||
| { "lt", 0xe000 + '<' }, // private usage area, to avoid confusion with XML tag | |||||
| { "amp", '&' }, | |||||
| { "quot", '"' }, | |||||
| { "nbsp", ' ' }, | |||||
| { "apos", '\'' }, | |||||
| { NULL, -1 } | |||||
| }; | |||||
| int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change) | int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change) | ||||
| { | { | ||||
| /* Find the end of the current clause. | /* Find the end of the current clause. | ||||
| int phoneme_mode = 0; | int phoneme_mode = 0; | ||||
| int n_xml_buf; | int n_xml_buf; | ||||
| int terminator; | int terminator; | ||||
| int found; | |||||
| bool any_alnum = false; | bool any_alnum = false; | ||||
| bool self_closing; | |||||
| int punct_data = 0; | int punct_data = 0; | ||||
| bool is_end_clause; | bool is_end_clause; | ||||
| int announced_punctuation = 0; | int announced_punctuation = 0; | ||||
| c2 = GetC(); | c2 = GetC(); | ||||
| sprintf(ungot_string, "%s%c%c", &xml_buf2[0], c1, c2); | sprintf(ungot_string, "%s%c%c", &xml_buf2[0], c1, c2); | ||||
| int found = -1; | |||||
| if (c1 == ';') { | if (c1 == ';') { | ||||
| if (xml_buf2[0] == '#') { | |||||
| // character code number | |||||
| if (xml_buf2[1] == 'x') | |||||
| found = sscanf(&xml_buf2[2], "%x", (unsigned int *)(&c1)); | |||||
| else | |||||
| found = sscanf(&xml_buf2[1], "%d", &c1); | |||||
| } else { | |||||
| if ((found = LookupMnem(xml_char_mnemonics, xml_buf2)) != -1) { | |||||
| c1 = found; | |||||
| if (c2 == 0) | |||||
| c2 = ' '; | |||||
| } | |||||
| } | |||||
| } else | |||||
| found = -1; | |||||
| found = ParseSsmlReference(xml_buf2, &c1, &c2); | |||||
| } | |||||
| if (found <= 0) { | if (found <= 0) { | ||||
| ungot_string_ix = 0; | ungot_string_ix = 0; | ||||
| if ((c1 <= 0x20) && ((sayas_mode == SAYAS_SINGLE_CHARS) || (sayas_mode == SAYAS_KEY))) | if ((c1 <= 0x20) && ((sayas_mode == SAYAS_SINGLE_CHARS) || (sayas_mode == SAYAS_KEY))) | ||||
| c1 += 0xe000; // move into unicode private usage area | c1 += 0xe000; // move into unicode private usage area | ||||
| } else if ((c1 == '<') && (ssml_ignore_l_angle != '<')) { | } else if ((c1 == '<') && (ssml_ignore_l_angle != '<')) { | ||||
| if ((c2 == '!') || (c2 == '?')) { | |||||
| // a comment, ignore until closing '<' (or <?xml tag ) | |||||
| while (!Eof() && (c1 != '>')) | |||||
| c1 = GetC(); | |||||
| c2 = ' '; | |||||
| } else if ((c2 == '/') || iswalpha(c2)) { | |||||
| if ((c2 == '/') || iswalpha(c2) || c2 == '!' || c2 == '?') { | |||||
| // check for space in the output buffer for embedded commands produced by the SSML tag | // check for space in the output buffer for embedded commands produced by the SSML tag | ||||
| if (ix > (n_buf - 20)) { | if (ix > (n_buf - 20)) { | ||||
| // Perhaps not enough room, end the clause before the SSML tag | // Perhaps not enough room, end the clause before the SSML tag | ||||
| xml_buf[n_xml_buf] = 0; | xml_buf[n_xml_buf] = 0; | ||||
| c2 = ' '; | c2 = ' '; | ||||
| self_closing = false; | |||||
| if (xml_buf[n_xml_buf-1] == '/') { | |||||
| // a self-closing tag | |||||
| xml_buf[n_xml_buf-1] = ' '; | |||||
| self_closing = true; | |||||
| } | |||||
| terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters); | |||||
| terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters); | |||||
| if (terminator != 0) { | if (terminator != 0) { | ||||
| buf[ix] = ' '; | buf[ix] = ' '; |
| } | } | ||||
| } | } | ||||
| int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters) | |||||
| int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters) | |||||
| { | { | ||||
| // xml_buf is the tag and attributes with a zero terminator in place of the original '>' | // xml_buf is the tag and attributes with a zero terminator in place of the original '>' | ||||
| // returns a clause terminator value. | // returns a clause terminator value. | ||||
| PARAM_STACK *sp; | PARAM_STACK *sp; | ||||
| SSML_STACK *ssml_sp; | SSML_STACK *ssml_sp; | ||||
| // don't process comments and xml declarations | |||||
| if (wcsncmp(xml_buf, (wchar_t *) "!--", 3) == 0 || wcsncmp(xml_buf, (wchar_t *) "?xml", 4) == 0) { | |||||
| return 0; | |||||
| } | |||||
| // these tags have no effect if they are self-closing, eg. <voice /> | // these tags have no effect if they are self-closing, eg. <voice /> | ||||
| static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | ||||
| bool self_closing = false; | |||||
| int len; | |||||
| len = wcslen(xml_buf); | |||||
| if (xml_buf[len - 1] == '/') { | |||||
| // a self-closing tag | |||||
| xml_buf[len - 1] = ' '; | |||||
| self_closing = true; | |||||
| } | |||||
| static const MNEM_TAB mnem_phoneme_alphabet[] = { | static const MNEM_TAB mnem_phoneme_alphabet[] = { | ||||
| { "espeak", 1 }, | { "espeak", 1 }, | ||||
| { NULL, -1 } | { NULL, -1 } | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| static MNEM_TAB xml_entity_mnemonics[] = { | |||||
| { "gt", '>' }, | |||||
| { "lt", 0xe000 + '<' }, // private usage area, to avoid confusion with XML tag | |||||
| { "amp", '&' }, | |||||
| { "quot", '"' }, | |||||
| { "nbsp", ' ' }, | |||||
| { "apos", '\'' }, | |||||
| { NULL, -1 } | |||||
| }; | |||||
| int ParseSsmlReference(char *ref, int *c1, int *c2) { | |||||
| // Check if buffer *ref contains an XML character or entity reference | |||||
| // if found, set *c1 to the replacement char | |||||
| // change *c2 for entity references | |||||
| // returns >= 0 on success | |||||
| if (ref[0] == '#') { | |||||
| // character reference | |||||
| if (ref[1] == 'x') | |||||
| return sscanf(&ref[2], "%x", c1); | |||||
| else | |||||
| return sscanf(&ref[1], "%d", c1); | |||||
| } else { | |||||
| // entity reference | |||||
| int found; | |||||
| if ((found = LookupMnem(xml_entity_mnemonics, ref)) != -1) { | |||||
| *c1 = found; | |||||
| if (*c2 == 0) | |||||
| *c2 = ' '; | |||||
| return found; | |||||
| } | |||||
| } | |||||
| return -1; | |||||
| } |
| char *outbuf, | char *outbuf, | ||||
| int *outix, | int *outix, | ||||
| int n_outbuf, | int n_outbuf, | ||||
| bool self_closing, | |||||
| const char *xmlbase, | const char *xmlbase, | ||||
| bool *audio_text, | bool *audio_text, | ||||
| char *current_voice_id, | char *current_voice_id, | ||||
| int *n_param_stack, | int *n_param_stack, | ||||
| int *speech_parameters); | int *speech_parameters); | ||||
| int ParseSsmlReference(char *ref, | |||||
| int *c1, | |||||
| int *c2); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif |
| void SynthesizeInit(void); | void SynthesizeInit(void); | ||||
| int Generate(PHONEME_LIST *phoneme_list, int *n_ph, bool resume); | int Generate(PHONEME_LIST *phoneme_list, int *n_ph, bool resume); | ||||
| void MakeWave2(PHONEME_LIST *p, int n_ph); | |||||
| int SpeakNextClause(int control); | int SpeakNextClause(int control); | ||||
| void SetSpeed(int control); | void SetSpeed(int control); | ||||
| void SetEmbedded(int control, int value); | void SetEmbedded(int control, int value); |
| // match 1 pre 2 post 0 - use common phoneme string | // match 1 pre 2 post 0 - use common phoneme string | ||||
| // match 1 pre 2 post 3 0 - empty phoneme string | // match 1 pre 2 post 3 0 - empty phoneme string | ||||
| typedef const char *constcharptr; | |||||
| // used to mark words with the source[] buffer | // used to mark words with the source[] buffer | ||||
| typedef struct { | typedef struct { | ||||
| unsigned int flags; | unsigned int flags; |
| test_ssml() { | test_ssml() { | ||||
| INPUT=$1 | INPUT=$1 | ||||
| if [ "$2" = "punct" ] | |||||
| then | |||||
| PARAMETERS="--punct -x" | |||||
| else | |||||
| PARAMETERS="-v en-US --ipa=2" | |||||
| fi | |||||
| echo "testing ${INPUT}" | echo "testing ${INPUT}" | ||||
| cp $(dirname $INPUT)/$(basename ${INPUT%.*}).expected expected.txt | cp $(dirname $INPUT)/$(basename ${INPUT%.*}).expected expected.txt | ||||
| ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | ||||
| src/espeak-ng -m -q -v en-US --ipa=2 -f ${INPUT} > actual.txt | |||||
| src/espeak-ng -m -q $PARAMETERS -f ${INPUT} > actual.txt | |||||
| diff expected.txt actual.txt || exit 1 | diff expected.txt actual.txt || exit 1 | ||||
| } | } | ||||
| for i in `ls tests/ssml/*.ssml` ; do test_ssml $i ; done | |||||
| for i in `ls tests/ssml/*.ssml` ; do test_ssml $i; done | |||||
| for i in `ls tests/ssml/*.ssml2` ; do test_ssml $i punct; done | |||||
| l'EsDan_: gr'eIt@D,an_: 'amp@s,and t'Ik_: kw'oUts | |||||
| b'i: b'i: | |||||
| z'Ed z'Ed | |||||
| <!-- SSML reference test | |||||
| Entity references < > & ' and " should be replaced in the buffer | |||||
| Character references like A are translated to Unicode (65 = 'A') | |||||
| See https://www.tutorialspoint.com/xml/xml_syntax.htm for example | |||||
| --> | |||||
| <speak>< > & ' "</speak> | |||||
| <speak>B B</speak> | |||||
| <speak>z z</speak> |