use parameters instead of globalsmaster
| @@ -416,7 +416,7 @@ static int LookupSoundicon(int c) | |||
| return -1; | |||
| } | |||
| static int LoadSoundFile2(const char *fname) | |||
| int LoadSoundFile2(const char *fname) | |||
| { | |||
| // Load a sound file into one of the reserved slots in the sound icon table | |||
| // (if it'snot already loaded) | |||
| @@ -549,7 +549,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output | |||
| return short_pause; | |||
| } | |||
| static int AddNameData(const char *name, int wide) | |||
| int AddNameData(const char *name, int wide) | |||
| { | |||
| // Add the name to the namedata and return its position | |||
| // (Used by the Windows SAPI wrapper) | |||
| @@ -601,390 +601,6 @@ void SetVoiceStack(espeak_VOICE *v, const char *variant_name) | |||
| memcpy(&base_voice, ¤t_voice_selected, sizeof(base_voice)); | |||
| } | |||
| static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing) | |||
| { | |||
| // xml_buf is the tag and attributes with a zero terminator in place of the original '>' | |||
| // returns a clause terminator value. | |||
| unsigned int ix; | |||
| int index; | |||
| int c; | |||
| int tag_type; | |||
| int value; | |||
| int value2; | |||
| int value3; | |||
| int voice_change_flag; | |||
| wchar_t *px; | |||
| wchar_t *attr1; | |||
| wchar_t *attr2; | |||
| wchar_t *attr3; | |||
| int terminator; | |||
| char *uri; | |||
| int param_type; | |||
| char tag_name[40]; | |||
| char buf[80]; | |||
| PARAM_STACK *sp; | |||
| SSML_STACK *ssml_sp; | |||
| static const MNEM_TAB mnem_phoneme_alphabet[] = { | |||
| { "espeak", 1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_punct[] = { | |||
| { "none", 1 }, | |||
| { "all", 2 }, | |||
| { "some", 3 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_capitals[] = { | |||
| { "no", 0 }, | |||
| { "icon", 1 }, | |||
| { "spelling", 2 }, | |||
| { "pitch", 20 }, // this is the amount by which to raise the pitch | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_interpret_as[] = { | |||
| { "characters", SAYAS_CHARS }, | |||
| { "tts:char", SAYAS_SINGLE_CHARS }, | |||
| { "tts:key", SAYAS_KEY }, | |||
| { "tts:digits", SAYAS_DIGITS }, | |||
| { "telephone", SAYAS_DIGITS1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_sayas_format[] = { | |||
| { "glyphs", 1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_break[] = { | |||
| { "none", 0 }, | |||
| { "x-weak", 1 }, | |||
| { "weak", 2 }, | |||
| { "medium", 3 }, | |||
| { "strong", 4 }, | |||
| { "x-strong", 5 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_emphasis[] = { | |||
| { "none", 1 }, | |||
| { "reduced", 2 }, | |||
| { "moderate", 3 }, | |||
| { "strong", 4 }, | |||
| { "x-strong", 5 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const char *prosody_attr[5] = { | |||
| NULL, "rate", "volume", "pitch", "range" | |||
| }; | |||
| for (ix = 0; ix < (sizeof(tag_name)-1); ix++) { | |||
| if (((c = xml_buf[ix]) == 0) || iswspace(c)) | |||
| break; | |||
| tag_name[ix] = tolower((char)c); | |||
| } | |||
| tag_name[ix] = 0; | |||
| px = &xml_buf[ix]; // the tag's attributes | |||
| if (tag_name[0] == '/') { | |||
| // closing tag | |||
| if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE) | |||
| outbuf[(*outix)++] = ' '; | |||
| tag_type += SSML_CLOSE; | |||
| } else { | |||
| if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) { | |||
| // separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word) | |||
| outbuf[(*outix)++] = ' '; | |||
| } | |||
| if (self_closing && ignore_if_self_closing[tag_type]) | |||
| return 0; | |||
| } | |||
| voice_change_flag = 0; | |||
| ssml_sp = &ssml_stack[n_ssml_stack-1]; | |||
| switch (tag_type) | |||
| { | |||
| case SSML_STYLE: | |||
| sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack); | |||
| attr1 = GetSsmlAttribute(px, "field"); | |||
| attr2 = GetSsmlAttribute(px, "mode"); | |||
| if (attrcmp(attr1, "punctuation") == 0) { | |||
| value = attrlookup(attr2, mnem_punct); | |||
| sp->parameter[espeakPUNCTUATION] = value; | |||
| } else if (attrcmp(attr1, "capital_letters") == 0) { | |||
| value = attrlookup(attr2, mnem_capitals); | |||
| sp->parameter[espeakCAPITALS] = value; | |||
| } | |||
| ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_PROSODY: | |||
| sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack); | |||
| // look for attributes: rate, volume, pitch, range | |||
| for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) { | |||
| if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL) | |||
| SetProsodyParameter(param_type, attr1, sp, ¶m_stack, &speech_parameters); | |||
| } | |||
| ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_EMPHASIS: | |||
| sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack); | |||
| value = 3; // default is "moderate" | |||
| if ((attr1 = GetSsmlAttribute(px, "level")) != NULL) | |||
| value = attrlookup(attr1, mnem_emphasis); | |||
| if (translator->langopts.tone_language == 1) { | |||
| static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 }; | |||
| static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 }; | |||
| // tone language (eg.Chinese) do emphasis by increasing the pitch range. | |||
| sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value]; | |||
| sp->parameter[espeakVOLUME] = emphasis_to_volume[value]; | |||
| } else { | |||
| static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 }; | |||
| sp->parameter[espeakVOLUME] = emphasis_to_volume2[value]; | |||
| sp->parameter[espeakEMPHASIS] = value; | |||
| } | |||
| ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_STYLE + SSML_CLOSE: | |||
| case SSML_PROSODY + SSML_CLOSE: | |||
| case SSML_EMPHASIS + SSML_CLOSE: | |||
| PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| break; | |||
| case SSML_PHONEME: | |||
| attr1 = GetSsmlAttribute(px, "alphabet"); | |||
| attr2 = GetSsmlAttribute(px, "ph"); | |||
| value = attrlookup(attr1, mnem_phoneme_alphabet); | |||
| if (value == 1) { // alphabet="espeak" | |||
| outbuf[(*outix)++] = '['; | |||
| outbuf[(*outix)++] = '['; | |||
| *outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix); | |||
| outbuf[(*outix)++] = ']'; | |||
| outbuf[(*outix)++] = ']'; | |||
| } | |||
| break; | |||
| case SSML_SAYAS: | |||
| attr1 = GetSsmlAttribute(px, "interpret-as"); | |||
| attr2 = GetSsmlAttribute(px, "format"); | |||
| attr3 = GetSsmlAttribute(px, "detail"); | |||
| value = attrlookup(attr1, mnem_interpret_as); | |||
| value2 = attrlookup(attr2, mnem_sayas_format); | |||
| if (value2 == 1) | |||
| value = SAYAS_GLYPHS; | |||
| value3 = attrnumber(attr3, 0, 0); | |||
| if (value == SAYAS_DIGITS) { | |||
| if (value3 <= 1) | |||
| value = SAYAS_DIGITS1; | |||
| else | |||
| value = SAYAS_DIGITS + value3; | |||
| } | |||
| sprintf(buf, "%c%dY", CTRL_EMBEDDED, value); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| sayas_start = *outix; | |||
| sayas_mode = value; // punctuation doesn't end clause during SAY-AS | |||
| break; | |||
| case SSML_SAYAS + SSML_CLOSE: | |||
| if (sayas_mode == SAYAS_KEY) { | |||
| outbuf[*outix] = 0; | |||
| ReplaceKeyName(outbuf, sayas_start, outix); | |||
| } | |||
| outbuf[(*outix)++] = CTRL_EMBEDDED; | |||
| outbuf[(*outix)++] = 'Y'; | |||
| sayas_mode = 0; | |||
| break; | |||
| case SSML_SUB: | |||
| if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) { | |||
| // use the alias rather than the text | |||
| ignore_text = true; | |||
| *outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix); | |||
| } | |||
| break; | |||
| case SSML_IGNORE_TEXT: | |||
| ignore_text = true; | |||
| break; | |||
| case SSML_SUB + SSML_CLOSE: | |||
| case SSML_IGNORE_TEXT + SSML_CLOSE: | |||
| ignore_text = false; | |||
| break; | |||
| case SSML_MARK: | |||
| if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) { | |||
| // add name to circular buffer of marker names | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if (strcmp(skip_marker, buf) == 0) { | |||
| // This is the marker we are waiting for before starting to speak | |||
| clear_skipping_text = true; | |||
| skip_marker[0] = 0; | |||
| return CLAUSE_NONE; | |||
| } | |||
| if ((index = AddNameData(buf, 0)) >= 0) { | |||
| sprintf(buf, "%c%dM", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| } | |||
| } | |||
| break; | |||
| case SSML_AUDIO: | |||
| sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *)param_stack); | |||
| if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) { | |||
| char fname[256]; | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if (uri_callback == NULL) { | |||
| if ((xmlbase != NULL) && (buf[0] != '/')) { | |||
| sprintf(fname, "%s/%s", xmlbase, buf); | |||
| index = LoadSoundFile2(fname); | |||
| } else | |||
| index = LoadSoundFile2(buf); | |||
| if (index >= 0) { | |||
| sprintf(buf, "%c%dI", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| sp->parameter[espeakSILENCE] = 1; | |||
| } | |||
| } else { | |||
| if ((index = AddNameData(buf, 0)) >= 0) { | |||
| uri = &namedata[index]; | |||
| if (uri_callback(1, uri, xmlbase) == 0) { | |||
| sprintf(buf, "%c%dU", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| sp->parameter[espeakSILENCE] = 1; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters); | |||
| if (self_closing) | |||
| PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| else | |||
| audio_text = true; | |||
| return CLAUSE_NONE; | |||
| case SSML_AUDIO + SSML_CLOSE: | |||
| PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| audio_text = false; | |||
| return CLAUSE_NONE; | |||
| case SSML_BREAK: | |||
| value = 21; | |||
| terminator = CLAUSE_NONE; | |||
| if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) { | |||
| static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS | |||
| value = attrlookup(attr1, mnem_break); | |||
| if (value < 3) { | |||
| // adjust prepause on the following word | |||
| sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value); | |||
| *outix += 3; | |||
| terminator = 0; | |||
| } | |||
| value = break_value[value]; | |||
| } | |||
| if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) { | |||
| value2 = attrnumber(attr2, 0, 1); // pause in mS | |||
| // compensate for speaking speed to keep constant pause length, see function PauseLength() | |||
| // 'value' here is x 10mS | |||
| value = (value2 * 256) / (speed.clause_pause_factor * 10); | |||
| if (value < 200) | |||
| value = (value2 * 256) / (speed.pause_factor * 10); | |||
| if (terminator == 0) | |||
| terminator = CLAUSE_NONE; | |||
| } | |||
| if (terminator) { | |||
| if (value > 0xfff) { | |||
| // scale down the value and set a scaling indicator bit | |||
| value = value / 32; | |||
| if (value > 0xfff) | |||
| value = 0xfff; | |||
| terminator |= CLAUSE_PAUSE_LONG; | |||
| } | |||
| return terminator + value; | |||
| } | |||
| break; | |||
| case SSML_SPEAK: | |||
| if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) { | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if ((index = AddNameData(buf, 0)) >= 0) | |||
| xmlbase = &namedata[index]; | |||
| } | |||
| if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) == 0) | |||
| return 0; // no voice change | |||
| return CLAUSE_VOICE; | |||
| case SSML_VOICE: | |||
| if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) == 0) | |||
| return 0; // no voice change | |||
| return CLAUSE_VOICE; | |||
| case SSML_SPEAK + SSML_CLOSE: | |||
| // unwind stack until the previous <voice> or <speak> tag | |||
| while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_SPEAK)) | |||
| n_ssml_stack--; | |||
| return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| case SSML_VOICE + SSML_CLOSE: | |||
| // unwind stack until the previous <voice> or <speak> tag | |||
| while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_VOICE)) | |||
| n_ssml_stack--; | |||
| terminator = 0; // ?? Sentence intonation, but no pause ?? | |||
| return terminator + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| case HTML_BREAK: | |||
| case HTML_BREAK + SSML_CLOSE: | |||
| return CLAUSE_COLON; | |||
| case SSML_SENTENCE: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // new sentence implies end-of-sentence | |||
| voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| } | |||
| voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| return CLAUSE_PARAGRAPH + voice_change_flag; | |||
| case SSML_PARAGRAPH: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // new paragraph implies end-of-sentence or end-of-paragraph | |||
| voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| } | |||
| if (ssml_sp->tag_type == SSML_PARAGRAPH) { | |||
| // new paragraph implies end-of-sentence or end-of-paragraph | |||
| voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| } | |||
| voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| return CLAUSE_PARAGRAPH + voice_change_flag; | |||
| case SSML_SENTENCE + SSML_CLOSE: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // end of a sentence which specified a language | |||
| voice_change_flag = GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name); | |||
| } | |||
| return CLAUSE_PERIOD + voice_change_flag; | |||
| case SSML_PARAGRAPH + SSML_CLOSE: | |||
| if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) { | |||
| // End of a paragraph which specified a language. | |||
| // (End-of-paragraph also implies end-of-sentence) | |||
| return GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) + CLAUSE_PARAGRAPH; | |||
| } | |||
| return CLAUSE_PARAGRAPH; | |||
| } | |||
| return 0; | |||
| } | |||
| static void RemoveChar(char *p) | |||
| { | |||
| // Replace a UTF-8 character by spaces | |||
| @@ -1176,7 +792,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
| self_closing = true; | |||
| } | |||
| terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing); | |||
| terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters); | |||
| if (terminator != 0) { | |||
| buf[ix] = ' '; | |||
| @@ -46,6 +46,7 @@ | |||
| #include "translate.h" | |||
| #include "ssml.h" | |||
| int attrcmp(const wchar_t *string1, const char *string2) | |||
| { | |||
| int ix; | |||
| @@ -158,7 +159,7 @@ int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out) | |||
| return sign; // -1, 0, or 1 | |||
| } | |||
| int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char base_voice_variant_name[40]) | |||
| static int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char *base_voice_variant_name) | |||
| { | |||
| // Determines whether voice attribute are specified in this tag, and if so, whether this means | |||
| // a voice change. | |||
| @@ -507,3 +508,389 @@ void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp, PARAM_ | |||
| } | |||
| } | |||
| int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters) | |||
| { | |||
| // xml_buf is the tag and attributes with a zero terminator in place of the original '>' | |||
| // returns a clause terminator value. | |||
| unsigned int ix; | |||
| int index; | |||
| int c; | |||
| int tag_type; | |||
| int value; | |||
| int value2; | |||
| int value3; | |||
| int voice_change_flag; | |||
| wchar_t *px; | |||
| wchar_t *attr1; | |||
| wchar_t *attr2; | |||
| wchar_t *attr3; | |||
| int terminator; | |||
| char *uri; | |||
| int param_type; | |||
| char tag_name[40]; | |||
| char buf[80]; | |||
| PARAM_STACK *sp; | |||
| SSML_STACK *ssml_sp; | |||
| // these tags have no effect if they are self-closing, eg. <voice /> | |||
| static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | |||
| static const MNEM_TAB mnem_phoneme_alphabet[] = { | |||
| { "espeak", 1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_punct[] = { | |||
| { "none", 1 }, | |||
| { "all", 2 }, | |||
| { "some", 3 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_capitals[] = { | |||
| { "no", 0 }, | |||
| { "icon", 1 }, | |||
| { "spelling", 2 }, | |||
| { "pitch", 20 }, // this is the amount by which to raise the pitch | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_interpret_as[] = { | |||
| { "characters", SAYAS_CHARS }, | |||
| { "tts:char", SAYAS_SINGLE_CHARS }, | |||
| { "tts:key", SAYAS_KEY }, | |||
| { "tts:digits", SAYAS_DIGITS }, | |||
| { "telephone", SAYAS_DIGITS1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_sayas_format[] = { | |||
| { "glyphs", 1 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_break[] = { | |||
| { "none", 0 }, | |||
| { "x-weak", 1 }, | |||
| { "weak", 2 }, | |||
| { "medium", 3 }, | |||
| { "strong", 4 }, | |||
| { "x-strong", 5 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const MNEM_TAB mnem_emphasis[] = { | |||
| { "none", 1 }, | |||
| { "reduced", 2 }, | |||
| { "moderate", 3 }, | |||
| { "strong", 4 }, | |||
| { "x-strong", 5 }, | |||
| { NULL, -1 } | |||
| }; | |||
| static const char *prosody_attr[5] = { | |||
| NULL, "rate", "volume", "pitch", "range" | |||
| }; | |||
| for (ix = 0; ix < (sizeof(tag_name)-1); ix++) { | |||
| if (((c = xml_buf[ix]) == 0) || iswspace(c)) | |||
| break; | |||
| tag_name[ix] = tolower((char)c); | |||
| } | |||
| tag_name[ix] = 0; | |||
| px = &xml_buf[ix]; // the tag's attributes | |||
| if (tag_name[0] == '/') { | |||
| // closing tag | |||
| if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE) | |||
| outbuf[(*outix)++] = ' '; | |||
| tag_type += SSML_CLOSE; | |||
| } else { | |||
| if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) { | |||
| // separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word) | |||
| outbuf[(*outix)++] = ' '; | |||
| } | |||
| if (self_closing && ignore_if_self_closing[tag_type]) | |||
| return 0; | |||
| } | |||
| voice_change_flag = 0; | |||
| ssml_sp = &ssml_stack[*n_ssml_stack-1]; | |||
| switch (tag_type) | |||
| { | |||
| case SSML_STYLE: | |||
| sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack); | |||
| attr1 = GetSsmlAttribute(px, "field"); | |||
| attr2 = GetSsmlAttribute(px, "mode"); | |||
| if (attrcmp(attr1, "punctuation") == 0) { | |||
| value = attrlookup(attr2, mnem_punct); | |||
| sp->parameter[espeakPUNCTUATION] = value; | |||
| } else if (attrcmp(attr1, "capital_letters") == 0) { | |||
| value = attrlookup(attr2, mnem_capitals); | |||
| sp->parameter[espeakCAPITALS] = value; | |||
| } | |||
| ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_PROSODY: | |||
| sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack); | |||
| // look for attributes: rate, volume, pitch, range | |||
| for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) { | |||
| if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL) | |||
| SetProsodyParameter(param_type, attr1, sp, param_stack, speech_parameters); | |||
| } | |||
| ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_EMPHASIS: | |||
| sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack); | |||
| value = 3; // default is "moderate" | |||
| if ((attr1 = GetSsmlAttribute(px, "level")) != NULL) | |||
| value = attrlookup(attr1, mnem_emphasis); | |||
| if (translator->langopts.tone_language == 1) { | |||
| static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 }; | |||
| static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 }; | |||
| // tone language (eg.Chinese) do emphasis by increasing the pitch range. | |||
| sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value]; | |||
| sp->parameter[espeakVOLUME] = emphasis_to_volume[value]; | |||
| } else { | |||
| static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 }; | |||
| sp->parameter[espeakVOLUME] = emphasis_to_volume2[value]; | |||
| sp->parameter[espeakEMPHASIS] = value; | |||
| } | |||
| ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters); | |||
| break; | |||
| case SSML_STYLE + SSML_CLOSE: | |||
| case SSML_PROSODY + SSML_CLOSE: | |||
| case SSML_EMPHASIS + SSML_CLOSE: | |||
| PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| break; | |||
| case SSML_PHONEME: | |||
| attr1 = GetSsmlAttribute(px, "alphabet"); | |||
| attr2 = GetSsmlAttribute(px, "ph"); | |||
| value = attrlookup(attr1, mnem_phoneme_alphabet); | |||
| if (value == 1) { // alphabet="espeak" | |||
| outbuf[(*outix)++] = '['; | |||
| outbuf[(*outix)++] = '['; | |||
| *outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix); | |||
| outbuf[(*outix)++] = ']'; | |||
| outbuf[(*outix)++] = ']'; | |||
| } | |||
| break; | |||
| case SSML_SAYAS: | |||
| attr1 = GetSsmlAttribute(px, "interpret-as"); | |||
| attr2 = GetSsmlAttribute(px, "format"); | |||
| attr3 = GetSsmlAttribute(px, "detail"); | |||
| value = attrlookup(attr1, mnem_interpret_as); | |||
| value2 = attrlookup(attr2, mnem_sayas_format); | |||
| if (value2 == 1) | |||
| value = SAYAS_GLYPHS; | |||
| value3 = attrnumber(attr3, 0, 0); | |||
| if (value == SAYAS_DIGITS) { | |||
| if (value3 <= 1) | |||
| value = SAYAS_DIGITS1; | |||
| else | |||
| value = SAYAS_DIGITS + value3; | |||
| } | |||
| sprintf(buf, "%c%dY", CTRL_EMBEDDED, value); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| *sayas_start = *outix; | |||
| *sayas_mode = value; // punctuation doesn't end clause during SAY-AS | |||
| break; | |||
| case SSML_SAYAS + SSML_CLOSE: | |||
| if (*sayas_mode == SAYAS_KEY) { | |||
| outbuf[*outix] = 0; | |||
| ReplaceKeyName(outbuf, *sayas_start, outix); | |||
| } | |||
| outbuf[(*outix)++] = CTRL_EMBEDDED; | |||
| outbuf[(*outix)++] = 'Y'; | |||
| *sayas_mode = 0; | |||
| break; | |||
| case SSML_SUB: | |||
| if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) { | |||
| // use the alias rather than the text | |||
| *ignore_text = true; | |||
| *outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix); | |||
| } | |||
| break; | |||
| case SSML_IGNORE_TEXT: | |||
| *ignore_text = true; | |||
| break; | |||
| case SSML_SUB + SSML_CLOSE: | |||
| case SSML_IGNORE_TEXT + SSML_CLOSE: | |||
| *ignore_text = false; | |||
| break; | |||
| case SSML_MARK: | |||
| if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) { | |||
| // add name to circular buffer of marker names | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if (strcmp(skip_marker, buf) == 0) { | |||
| // This is the marker we are waiting for before starting to speak | |||
| *clear_skipping_text = true; | |||
| skip_marker[0] = 0; | |||
| return CLAUSE_NONE; | |||
| } | |||
| if ((index = AddNameData(buf, 0)) >= 0) { | |||
| sprintf(buf, "%c%dM", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| } | |||
| } | |||
| break; | |||
| case SSML_AUDIO: | |||
| sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *)param_stack); | |||
| if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) { | |||
| char fname[256]; | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if (uri_callback == NULL) { | |||
| if ((xmlbase != NULL) && (buf[0] != '/')) { | |||
| sprintf(fname, "%s/%s", xmlbase, buf); | |||
| index = LoadSoundFile2(fname); | |||
| } else | |||
| index = LoadSoundFile2(buf); | |||
| if (index >= 0) { | |||
| sprintf(buf, "%c%dI", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| sp->parameter[espeakSILENCE] = 1; | |||
| } | |||
| } else { | |||
| if ((index = AddNameData(buf, 0)) >= 0) { | |||
| uri = &namedata[index]; | |||
| if (uri_callback(1, uri, xmlbase) == 0) { | |||
| sprintf(buf, "%c%dU", CTRL_EMBEDDED, index); | |||
| strcpy(&outbuf[*outix], buf); | |||
| *outix += strlen(buf); | |||
| sp->parameter[espeakSILENCE] = 1; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters); | |||
| if (self_closing) | |||
| PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| else | |||
| *audio_text = true; | |||
| return CLAUSE_NONE; | |||
| case SSML_AUDIO + SSML_CLOSE: | |||
| PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters); | |||
| *audio_text = false; | |||
| return CLAUSE_NONE; | |||
| case SSML_BREAK: | |||
| value = 21; | |||
| terminator = CLAUSE_NONE; | |||
| if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) { | |||
| static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS | |||
| value = attrlookup(attr1, mnem_break); | |||
| if (value < 3) { | |||
| // adjust prepause on the following word | |||
| sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value); | |||
| *outix += 3; | |||
| terminator = 0; | |||
| } | |||
| value = break_value[value]; | |||
| } | |||
| if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) { | |||
| value2 = attrnumber(attr2, 0, 1); // pause in mS | |||
| // compensate for speaking speed to keep constant pause length, see function PauseLength() | |||
| // 'value' here is x 10mS | |||
| value = (value2 * 256) / (speed.clause_pause_factor * 10); | |||
| if (value < 200) | |||
| value = (value2 * 256) / (speed.pause_factor * 10); | |||
| if (terminator == 0) | |||
| terminator = CLAUSE_NONE; | |||
| } | |||
| if (terminator) { | |||
| if (value > 0xfff) { | |||
| // scale down the value and set a scaling indicator bit | |||
| value = value / 32; | |||
| if (value > 0xfff) | |||
| value = 0xfff; | |||
| terminator |= CLAUSE_PAUSE_LONG; | |||
| } | |||
| return terminator + value; | |||
| } | |||
| break; | |||
| case SSML_SPEAK: | |||
| if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) { | |||
| attrcopy_utf8(buf, attr1, sizeof(buf)); | |||
| if ((index = AddNameData(buf, 0)) >= 0) | |||
| xmlbase = &namedata[index]; | |||
| } | |||
| if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0) | |||
| return 0; // no voice change | |||
| return CLAUSE_VOICE; | |||
| case SSML_VOICE: | |||
| if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0) | |||
| return 0; // no voice change | |||
| return CLAUSE_VOICE; | |||
| case SSML_SPEAK + SSML_CLOSE: | |||
| // unwind stack until the previous <voice> or <speak> tag | |||
| while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_SPEAK)) | |||
| *n_ssml_stack = *n_ssml_stack -1; | |||
| return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| case SSML_VOICE + SSML_CLOSE: | |||
| // unwind stack until the previous <voice> or <speak> tag | |||
| while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_VOICE)) | |||
| *n_ssml_stack = *n_ssml_stack -1; | |||
| terminator = 0; // ?? Sentence intonation, but no pause ?? | |||
| return terminator + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| case HTML_BREAK: | |||
| case HTML_BREAK + SSML_CLOSE: | |||
| return CLAUSE_COLON; | |||
| case SSML_SENTENCE: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // new sentence implies end-of-sentence | |||
| voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| } | |||
| voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| return CLAUSE_PARAGRAPH + voice_change_flag; | |||
| case SSML_PARAGRAPH: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // new paragraph implies end-of-sentence or end-of-paragraph | |||
| voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| } | |||
| if (ssml_sp->tag_type == SSML_PARAGRAPH) { | |||
| // new paragraph implies end-of-sentence or end-of-paragraph | |||
| voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| } | |||
| voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| return CLAUSE_PARAGRAPH + voice_change_flag; | |||
| case SSML_SENTENCE + SSML_CLOSE: | |||
| if (ssml_sp->tag_type == SSML_SENTENCE) { | |||
| // end of a sentence which specified a language | |||
| voice_change_flag = GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name); | |||
| } | |||
| return CLAUSE_PERIOD + voice_change_flag; | |||
| case SSML_PARAGRAPH + SSML_CLOSE: | |||
| if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) { | |||
| // End of a paragraph which specified a language. | |||
| // (End-of-paragraph also implies end-of-sentence) | |||
| return GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) + CLAUSE_PARAGRAPH; | |||
| } | |||
| return CLAUSE_PARAGRAPH; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -29,9 +29,6 @@ typedef struct { | |||
| #define HTML_NOSPACE 16 // don't insert a space for this element, so it doesn't break a word | |||
| #define SSML_CLOSE 0x20 // for a closing tag, OR this with the tag type | |||
| // these tags have no effect if they are self-closing, eg. <voice /> | |||
| static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; | |||
| static MNEM_TAB ssmltags[] = { | |||
| { "speak", SSML_SPEAK }, | |||
| { "voice", SSML_VOICE }, | |||
| @@ -76,10 +73,12 @@ int attrlookup(const wchar_t *string1, const MNEM_TAB *mtab); | |||
| int attrnumber(const wchar_t *pw, int default_value, int type); | |||
| int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out); | |||
| wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name); | |||
| int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char base_voice_variant_name[40]); | |||
| void ProcessParamStack(char *outbuf, int *outix, int n_param_stack, PARAM_STACK *param_stack, int *speech_parameters); | |||
| PARAM_STACK *PushParamStack(int tag_type, int *n_param_stack, PARAM_STACK *param_stack); | |||
| const char *VoiceFromStack(SSML_STACK *ssml_stack, int n_ssml_stack, espeak_VOICE *base_voice, char base_voice_variant_name[40]); | |||
| void PopParamStack(int tag_type, char *outbuf, int *outix, int *n_param_stack, PARAM_STACK *param_stack, int *speech_parameters); | |||
| int ReplaceKeyName(char *outbuf, int index, int *outix); | |||
| void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp, PARAM_STACK *param_stack, int *speech_parameters); | |||
| int LoadSoundFile2(const char *fname); | |||
| int AddNameData(const char *name, int wide); | |||
| int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters); | |||