use parameters instead of globals

7 years ago · a47ff4f41d
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -416,7 +416,7 @@ static int LookupSoundicon(int c)
 	return -1;
 }

 static int LoadSoundFile2(const char *fname)
 int LoadSoundFile2(const char *fname)
 {
 	// Load a sound file into one of the reserved slots in the sound icon table
 	// (if it'snot already loaded)
@@ -549,7 +549,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
 	return short_pause;
 }

 static int AddNameData(const char *name, int wide)
 int AddNameData(const char *name, int wide)
 {
 	// Add the name to the namedata and return its position
 	// (Used by the Windows SAPI wrapper)
@@ -601,390 +601,6 @@ void SetVoiceStack(espeak_VOICE *v, const char *variant_name)
 	memcpy(&base_voice, &current_voice_selected, sizeof(base_voice));
 }

 static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing)
 {
 	// xml_buf is the tag and attributes with a zero terminator in place of the original '>'
 	// returns a clause terminator value.

 	unsigned int ix;
 	int index;
 	int c;
 	int tag_type;
 	int value;
 	int value2;
 	int value3;
 	int voice_change_flag;
 	wchar_t *px;
 	wchar_t *attr1;
 	wchar_t *attr2;
 	wchar_t *attr3;
 	int terminator;
 	char *uri;
 	int param_type;
 	char tag_name[40];
 	char buf[80];
 	PARAM_STACK *sp;
 	SSML_STACK *ssml_sp;

 	static const MNEM_TAB mnem_phoneme_alphabet[] = {
 		{ "espeak", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_punct[] = {
 		{ "none", 1 },
 		{ "all",  2 },
 		{ "some", 3 },
 		{ NULL,  -1 }
 	};

 	static const MNEM_TAB mnem_capitals[] = {
 		{ "no",        0 },
 		{ "icon",      1 },
 		{ "spelling",  2 },
 		{ "pitch",    20 },  // this is the amount by which to raise the pitch
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_interpret_as[] = {
 		{ "characters", SAYAS_CHARS },
 		{ "tts:char",   SAYAS_SINGLE_CHARS },
 		{ "tts:key",    SAYAS_KEY },
 		{ "tts:digits", SAYAS_DIGITS },
 		{ "telephone",  SAYAS_DIGITS1 },
 		{ NULL,         -1 }
 	};

 	static const MNEM_TAB mnem_sayas_format[] = {
 		{ "glyphs", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_break[] = {
 		{ "none",     0 },
 		{ "x-weak",   1 },
 		{ "weak",     2 },
 		{ "medium",   3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const MNEM_TAB mnem_emphasis[] = {
 		{ "none",     1 },
 		{ "reduced",  2 },
 		{ "moderate", 3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const char *prosody_attr[5] = {
 		NULL, "rate", "volume", "pitch", "range"
 	};

 	for (ix = 0; ix < (sizeof(tag_name)-1); ix++) {
 		if (((c = xml_buf[ix]) == 0) || iswspace(c))
 			break;
 		tag_name[ix] = tolower((char)c);
 	}
 	tag_name[ix] = 0;

 	px = &xml_buf[ix]; // the tag's attributes

 	if (tag_name[0] == '/') {
 		// closing tag
 		if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE)
 			outbuf[(*outix)++] = ' ';
 		tag_type += SSML_CLOSE;
 	} else {
 		if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) {
 			// separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word)
 			outbuf[(*outix)++] = ' ';
 		}

 		if (self_closing && ignore_if_self_closing[tag_type])
 			return 0;
 	}

 	voice_change_flag = 0;
 	ssml_sp = &ssml_stack[n_ssml_stack-1];

 	switch (tag_type)
 	{
 	case SSML_STYLE:
 		sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack);
 		attr1 = GetSsmlAttribute(px, "field");
 		attr2 = GetSsmlAttribute(px, "mode");


 		if (attrcmp(attr1, "punctuation") == 0) {
 			value = attrlookup(attr2, mnem_punct);
 			sp->parameter[espeakPUNCTUATION] = value;
 		} else if (attrcmp(attr1, "capital_letters") == 0) {
 			value = attrlookup(attr2, mnem_capitals);
 			sp->parameter[espeakCAPITALS] = value;
 		}
 		ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_PROSODY:
 		sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack);

 		// look for attributes:  rate, volume, pitch, range
 		for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) {
 			if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL)
 				SetProsodyParameter(param_type, attr1, sp, &param_stack, &speech_parameters);
 		}

 		ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_EMPHASIS:
 		sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *) param_stack);
 		value = 3; // default is "moderate"
 		if ((attr1 = GetSsmlAttribute(px, "level")) != NULL)
 			value = attrlookup(attr1, mnem_emphasis);

 		if (translator->langopts.tone_language == 1) {
 			static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 };
 			static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 };
 			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
 			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
 			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
 		} else {
 			static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 };
 			sp->parameter[espeakVOLUME] = emphasis_to_volume2[value];
 			sp->parameter[espeakEMPHASIS] = value;
 		}
 		ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_STYLE + SSML_CLOSE:
 	case SSML_PROSODY + SSML_CLOSE:
 	case SSML_EMPHASIS + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		break;
 	case SSML_PHONEME:
 		attr1 = GetSsmlAttribute(px, "alphabet");
 		attr2 = GetSsmlAttribute(px, "ph");
 		value = attrlookup(attr1, mnem_phoneme_alphabet);
 		if (value == 1) { // alphabet="espeak"
 			outbuf[(*outix)++] = '[';
 			outbuf[(*outix)++] = '[';
 			*outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix);
 			outbuf[(*outix)++] = ']';
 			outbuf[(*outix)++] = ']';
 		}
 		break;
 	case SSML_SAYAS:
 		attr1 = GetSsmlAttribute(px, "interpret-as");
 		attr2 = GetSsmlAttribute(px, "format");
 		attr3 = GetSsmlAttribute(px, "detail");
 		value = attrlookup(attr1, mnem_interpret_as);
 		value2 = attrlookup(attr2, mnem_sayas_format);
 		if (value2 == 1)
 			value = SAYAS_GLYPHS;

 		value3 = attrnumber(attr3, 0, 0);

 		if (value == SAYAS_DIGITS) {
 			if (value3 <= 1)
 				value = SAYAS_DIGITS1;
 			else
 				value = SAYAS_DIGITS + value3;
 		}

 		sprintf(buf, "%c%dY", CTRL_EMBEDDED, value);
 		strcpy(&outbuf[*outix], buf);
 		*outix += strlen(buf);

 		sayas_start = *outix;
 		sayas_mode = value; // punctuation doesn't end clause during SAY-AS
 		break;
 	case SSML_SAYAS + SSML_CLOSE:
 		if (sayas_mode == SAYAS_KEY) {
 			outbuf[*outix] = 0;
 			ReplaceKeyName(outbuf, sayas_start, outix);
 		}

 		outbuf[(*outix)++] = CTRL_EMBEDDED;
 		outbuf[(*outix)++] = 'Y';
 		sayas_mode = 0;
 		break;
 	case SSML_SUB:
 		if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) {
 			// use the alias  rather than the text
 			ignore_text = true;
 			*outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix);
 		}
 		break;
 	case SSML_IGNORE_TEXT:
 		ignore_text = true;
 		break;
 	case SSML_SUB + SSML_CLOSE:
 	case SSML_IGNORE_TEXT + SSML_CLOSE:
 		ignore_text = false;
 		break;
 	case SSML_MARK:
 		if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) {
 			// add name to circular buffer of marker names
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (strcmp(skip_marker, buf) == 0) {
 				// This is the marker we are waiting for before starting to speak
 				clear_skipping_text = true;
 				skip_marker[0] = 0;
 				return CLAUSE_NONE;
 			}

 			if ((index = AddNameData(buf, 0)) >= 0) {
 				sprintf(buf, "%c%dM", CTRL_EMBEDDED, index);
 				strcpy(&outbuf[*outix], buf);
 				*outix += strlen(buf);
 			}
 		}
 		break;
 	case SSML_AUDIO:
 		sp = PushParamStack(tag_type, &n_param_stack, (PARAM_STACK *)param_stack);

 		if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) {
 			char fname[256];
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (uri_callback == NULL) {
 				if ((xmlbase != NULL) && (buf[0] != '/')) {
 					sprintf(fname, "%s/%s", xmlbase, buf);
 					index = LoadSoundFile2(fname);
 				} else
 					index = LoadSoundFile2(buf);
 				if (index >= 0) {
 					sprintf(buf, "%c%dI", CTRL_EMBEDDED, index);
 					strcpy(&outbuf[*outix], buf);
 					*outix += strlen(buf);
 					sp->parameter[espeakSILENCE] = 1;
 				}
 			} else {
 				if ((index = AddNameData(buf, 0)) >= 0) {
 					uri = &namedata[index];
 					if (uri_callback(1, uri, xmlbase) == 0) {
 						sprintf(buf, "%c%dU", CTRL_EMBEDDED, index);
 						strcpy(&outbuf[*outix], buf);
 						*outix += strlen(buf);
 						sp->parameter[espeakSILENCE] = 1;
 					}
 				}
 			}
 		}
 		ProcessParamStack(outbuf, outix, n_param_stack, param_stack, speech_parameters);

 		if (self_closing)
 			PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		else
 			audio_text = true;
 		return CLAUSE_NONE;
 	case SSML_AUDIO + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, &n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		audio_text = false;
 		return CLAUSE_NONE;
 	case SSML_BREAK:
 		value = 21;
 		terminator = CLAUSE_NONE;

 		if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) {
 			static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS
 			value = attrlookup(attr1, mnem_break);
 			if (value < 3) {
 				// adjust prepause on the following word
 				sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value);
 				*outix += 3;
 				terminator = 0;
 			}
 			value = break_value[value];
 		}
 		if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) {
 			value2 = attrnumber(attr2, 0, 1);   // pause in mS

 			// compensate for speaking speed to keep constant pause length, see function PauseLength()
 			// 'value' here is x 10mS
 			value = (value2 * 256) / (speed.clause_pause_factor * 10);
 			if (value < 200)
 				value = (value2 * 256) / (speed.pause_factor * 10);

 			if (terminator == 0)
 				terminator = CLAUSE_NONE;
 		}
 		if (terminator) {
 			if (value > 0xfff) {
 				// scale down the value and set a scaling indicator bit
 				value = value / 32;
 				if (value > 0xfff)
 					value = 0xfff;
 				terminator |= CLAUSE_PAUSE_LONG;
 			}
 			return terminator + value;
 		}
 		break;
 	case SSML_SPEAK:
 		if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) {
 			attrcopy_utf8(buf, attr1, sizeof(buf));
 			if ((index = AddNameData(buf, 0)) >= 0)
 				xmlbase = &namedata[index];
 		}
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_VOICE:
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_SPEAK + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_SPEAK))
 			n_ssml_stack--;
 		return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 	case SSML_VOICE + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_VOICE))
 			n_ssml_stack--;

 		terminator = 0; // ??  Sentence intonation, but no pause ??
 		return terminator + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 	case HTML_BREAK:
 	case HTML_BREAK + SSML_CLOSE:
 		return CLAUSE_COLON;
 	case SSML_SENTENCE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new sentence implies end-of-sentence
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_PARAGRAPH:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		}
 		if (ssml_sp->tag_type == SSML_PARAGRAPH) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_SENTENCE + SSML_CLOSE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// end of a sentence which specified a language
 			voice_change_flag = GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name);
 		}
 		return CLAUSE_PERIOD + voice_change_flag;
 	case SSML_PARAGRAPH + SSML_CLOSE:
 		if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) {
 			// End of a paragraph which specified a language.
 			// (End-of-paragraph also implies end-of-sentence)
 			return GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, n_ssml_stack, current_voice_id, &base_voice, base_voice_variant_name) + CLAUSE_PARAGRAPH;
 		}
 		return CLAUSE_PARAGRAPH;
 	}
 	return 0;
 }

 static void RemoveChar(char *p)
 {
 	// Replace a UTF-8 character by spaces
@@ -1176,7 +792,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 						self_closing = true;
 					}

 					terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing);
 					terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters);

 					if (terminator != 0) {
 						buf[ix] = ' ';
--- a/src/libespeak-ng/ssml.c
+++ b/src/libespeak-ng/ssml.c
@@ -46,6 +46,7 @@
 #include "translate.h"
 #include "ssml.h"


 int attrcmp(const wchar_t *string1, const char *string2)
 {
 	int ix;
@@ -158,7 +159,7 @@ int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 	return sign;   // -1, 0, or 1
 }

 int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char base_voice_variant_name[40])
 static int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char *base_voice_variant_name)
 {
 	// Determines whether voice attribute are specified in this tag, and if so, whether this means
 	// a voice change.
@@ -507,3 +508,389 @@ void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp, PARAM_
 	}
 }

 int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters)
 {
 	// xml_buf is the tag and attributes with a zero terminator in place of the original '>'
 	// returns a clause terminator value.

 	unsigned int ix;
 	int index;
 	int c;
 	int tag_type;
 	int value;
 	int value2;
 	int value3;
 	int voice_change_flag;
 	wchar_t *px;
 	wchar_t *attr1;
 	wchar_t *attr2;
 	wchar_t *attr3;
 	int terminator;
 	char *uri;
 	int param_type;
 	char tag_name[40];
 	char buf[80];
 	PARAM_STACK *sp;
 	SSML_STACK *ssml_sp;

 	// these tags have no effect if they are self-closing, eg. <voice />
 	static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 };

 	static const MNEM_TAB mnem_phoneme_alphabet[] = {
 		{ "espeak", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_punct[] = {
 		{ "none", 1 },
 		{ "all",  2 },
 		{ "some", 3 },
 		{ NULL,  -1 }
 	};

 	static const MNEM_TAB mnem_capitals[] = {
 		{ "no",        0 },
 		{ "icon",      1 },
 		{ "spelling",  2 },
 		{ "pitch",    20 },  // this is the amount by which to raise the pitch
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_interpret_as[] = {
 		{ "characters", SAYAS_CHARS },
 		{ "tts:char",   SAYAS_SINGLE_CHARS },
 		{ "tts:key",    SAYAS_KEY },
 		{ "tts:digits", SAYAS_DIGITS },
 		{ "telephone",  SAYAS_DIGITS1 },
 		{ NULL,         -1 }
 	};

 	static const MNEM_TAB mnem_sayas_format[] = {
 		{ "glyphs", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_break[] = {
 		{ "none",     0 },
 		{ "x-weak",   1 },
 		{ "weak",     2 },
 		{ "medium",   3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const MNEM_TAB mnem_emphasis[] = {
 		{ "none",     1 },
 		{ "reduced",  2 },
 		{ "moderate", 3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const char *prosody_attr[5] = {
 		NULL, "rate", "volume", "pitch", "range"
 	};

 	for (ix = 0; ix < (sizeof(tag_name)-1); ix++) {
 		if (((c = xml_buf[ix]) == 0) || iswspace(c))
 			break;
 		tag_name[ix] = tolower((char)c);
 	}
 	tag_name[ix] = 0;

 	px = &xml_buf[ix]; // the tag's attributes

 	if (tag_name[0] == '/') {
 		// closing tag
 		if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE)
 			outbuf[(*outix)++] = ' ';
 		tag_type += SSML_CLOSE;
 	} else {
 		if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) {
 			// separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word)
 			outbuf[(*outix)++] = ' ';
 		}

 		if (self_closing && ignore_if_self_closing[tag_type])
 			return 0;
 	}

 	voice_change_flag = 0;
 	ssml_sp = &ssml_stack[*n_ssml_stack-1];

 	switch (tag_type)
 	{
 	case SSML_STYLE:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);
 		attr1 = GetSsmlAttribute(px, "field");
 		attr2 = GetSsmlAttribute(px, "mode");


 		if (attrcmp(attr1, "punctuation") == 0) {
 			value = attrlookup(attr2, mnem_punct);
 			sp->parameter[espeakPUNCTUATION] = value;
 		} else if (attrcmp(attr1, "capital_letters") == 0) {
 			value = attrlookup(attr2, mnem_capitals);
 			sp->parameter[espeakCAPITALS] = value;
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_PROSODY:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);

 		// look for attributes:  rate, volume, pitch, range
 		for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) {
 			if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL)
 				SetProsodyParameter(param_type, attr1, sp, param_stack, speech_parameters);
 		}

 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_EMPHASIS:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);
 		value = 3; // default is "moderate"
 		if ((attr1 = GetSsmlAttribute(px, "level")) != NULL)
 			value = attrlookup(attr1, mnem_emphasis);

 		if (translator->langopts.tone_language == 1) {
 			static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 };
 			static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 };
 			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
 			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
 			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
 		} else {
 			static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 };
 			sp->parameter[espeakVOLUME] = emphasis_to_volume2[value];
 			sp->parameter[espeakEMPHASIS] = value;
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_STYLE + SSML_CLOSE:
 	case SSML_PROSODY + SSML_CLOSE:
 	case SSML_EMPHASIS + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		break;
 	case SSML_PHONEME:
 		attr1 = GetSsmlAttribute(px, "alphabet");
 		attr2 = GetSsmlAttribute(px, "ph");
 		value = attrlookup(attr1, mnem_phoneme_alphabet);
 		if (value == 1) { // alphabet="espeak"
 			outbuf[(*outix)++] = '[';
 			outbuf[(*outix)++] = '[';
 			*outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix);
 			outbuf[(*outix)++] = ']';
 			outbuf[(*outix)++] = ']';
 		}
 		break;
 	case SSML_SAYAS:
 		attr1 = GetSsmlAttribute(px, "interpret-as");
 		attr2 = GetSsmlAttribute(px, "format");
 		attr3 = GetSsmlAttribute(px, "detail");
 		value = attrlookup(attr1, mnem_interpret_as);
 		value2 = attrlookup(attr2, mnem_sayas_format);
 		if (value2 == 1)
 			value = SAYAS_GLYPHS;

 		value3 = attrnumber(attr3, 0, 0);

 		if (value == SAYAS_DIGITS) {
 			if (value3 <= 1)
 				value = SAYAS_DIGITS1;
 			else
 				value = SAYAS_DIGITS + value3;
 		}

 		sprintf(buf, "%c%dY", CTRL_EMBEDDED, value);
 		strcpy(&outbuf[*outix], buf);
 		*outix += strlen(buf);

 		*sayas_start = *outix;
 		*sayas_mode = value; // punctuation doesn't end clause during SAY-AS
 		break;
 	case SSML_SAYAS + SSML_CLOSE:
 		if (*sayas_mode == SAYAS_KEY) {
 			outbuf[*outix] = 0;
 			ReplaceKeyName(outbuf, *sayas_start, outix);
 		}

 		outbuf[(*outix)++] = CTRL_EMBEDDED;
 		outbuf[(*outix)++] = 'Y';
 		*sayas_mode = 0;
 		break;
 	case SSML_SUB:
 		if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) {
 			// use the alias  rather than the text
 			*ignore_text = true;
 			*outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix);
 		}
 		break;
 	case SSML_IGNORE_TEXT:
 		*ignore_text = true;
 		break;
 	case SSML_SUB + SSML_CLOSE:
 	case SSML_IGNORE_TEXT + SSML_CLOSE:
 		*ignore_text = false;
 		break;
 	case SSML_MARK:
 		if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) {
 			// add name to circular buffer of marker names
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (strcmp(skip_marker, buf) == 0) {
 				// This is the marker we are waiting for before starting to speak
 				*clear_skipping_text = true;
 				skip_marker[0] = 0;
 				return CLAUSE_NONE;
 			}

 			if ((index = AddNameData(buf, 0)) >= 0) {
 				sprintf(buf, "%c%dM", CTRL_EMBEDDED, index);
 				strcpy(&outbuf[*outix], buf);
 				*outix += strlen(buf);
 			}
 		}
 		break;
 	case SSML_AUDIO:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *)param_stack);

 		if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) {
 			char fname[256];
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (uri_callback == NULL) {
 				if ((xmlbase != NULL) && (buf[0] != '/')) {
 					sprintf(fname, "%s/%s", xmlbase, buf);
 					index = LoadSoundFile2(fname);
 				} else
 					index = LoadSoundFile2(buf);
 				if (index >= 0) {
 					sprintf(buf, "%c%dI", CTRL_EMBEDDED, index);
 					strcpy(&outbuf[*outix], buf);
 					*outix += strlen(buf);
 					sp->parameter[espeakSILENCE] = 1;
 				}
 			} else {
 				if ((index = AddNameData(buf, 0)) >= 0) {
 					uri = &namedata[index];
 					if (uri_callback(1, uri, xmlbase) == 0) {
 						sprintf(buf, "%c%dU", CTRL_EMBEDDED, index);
 						strcpy(&outbuf[*outix], buf);
 						*outix += strlen(buf);
 						sp->parameter[espeakSILENCE] = 1;
 					}
 				}
 			}
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);

 		if (self_closing)
 			PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		else
 			*audio_text = true;
 		return CLAUSE_NONE;
 	case SSML_AUDIO + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		*audio_text = false;
 		return CLAUSE_NONE;
 	case SSML_BREAK:
 		value = 21;
 		terminator = CLAUSE_NONE;

 		if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) {
 			static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS
 			value = attrlookup(attr1, mnem_break);
 			if (value < 3) {
 				// adjust prepause on the following word
 				sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value);
 				*outix += 3;
 				terminator = 0;
 			}
 			value = break_value[value];
 		}
 		if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) {
 			value2 = attrnumber(attr2, 0, 1);   // pause in mS

 			// compensate for speaking speed to keep constant pause length, see function PauseLength()
 			// 'value' here is x 10mS
 			value = (value2 * 256) / (speed.clause_pause_factor * 10);
 			if (value < 200)
 				value = (value2 * 256) / (speed.pause_factor * 10);

 			if (terminator == 0)
 				terminator = CLAUSE_NONE;
 		}
 		if (terminator) {
 			if (value > 0xfff) {
 				// scale down the value and set a scaling indicator bit
 				value = value / 32;
 				if (value > 0xfff)
 					value = 0xfff;
 				terminator |= CLAUSE_PAUSE_LONG;
 			}
 			return terminator + value;
 		}
 		break;
 	case SSML_SPEAK:
 		if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) {
 			attrcopy_utf8(buf, attr1, sizeof(buf));
 			if ((index = AddNameData(buf, 0)) >= 0)
 				xmlbase = &namedata[index];
 		}
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_VOICE:
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_SPEAK + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_SPEAK))
 			*n_ssml_stack = *n_ssml_stack -1;
 		return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 	case SSML_VOICE + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_VOICE))
 			*n_ssml_stack = *n_ssml_stack -1;

 		terminator = 0; // ??  Sentence intonation, but no pause ??
 		return terminator + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 	case HTML_BREAK:
 	case HTML_BREAK + SSML_CLOSE:
 		return CLAUSE_COLON;
 	case SSML_SENTENCE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new sentence implies end-of-sentence
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_PARAGRAPH:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		if (ssml_sp->tag_type == SSML_PARAGRAPH) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_SENTENCE + SSML_CLOSE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// end of a sentence which specified a language
 			voice_change_flag = GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		return CLAUSE_PERIOD + voice_change_flag;
 	case SSML_PARAGRAPH + SSML_CLOSE:
 		if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) {
 			// End of a paragraph which specified a language.
 			// (End-of-paragraph also implies end-of-sentence)
 			return GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) + CLAUSE_PARAGRAPH;
 		}
 		return CLAUSE_PARAGRAPH;
 	}
 	return 0;
 }
--- a/src/libespeak-ng/ssml.h
+++ b/src/libespeak-ng/ssml.h
@@ -29,9 +29,6 @@ typedef struct {
 #define HTML_NOSPACE  16   // don't insert a space for this element, so it doesn't break a word
 #define SSML_CLOSE    0x20 // for a closing tag, OR this with the tag type

 // these tags have no effect if they are self-closing, eg. <voice />
 static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 };

 static MNEM_TAB ssmltags[] = {
 	{ "speak",     SSML_SPEAK },
 	{ "voice",     SSML_VOICE },
@@ -76,10 +73,12 @@ int attrlookup(const wchar_t *string1, const MNEM_TAB *mtab);
 int attrnumber(const wchar_t *pw, int default_value, int type);
 int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out);
 wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name);
 int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char base_voice_variant_name[40]);
 void ProcessParamStack(char *outbuf, int *outix, int n_param_stack, PARAM_STACK *param_stack, int *speech_parameters);
 PARAM_STACK *PushParamStack(int tag_type, int *n_param_stack, PARAM_STACK *param_stack);
 const char *VoiceFromStack(SSML_STACK *ssml_stack, int n_ssml_stack, espeak_VOICE *base_voice, char base_voice_variant_name[40]);
 void PopParamStack(int tag_type, char *outbuf, int *outix, int *n_param_stack, PARAM_STACK *param_stack, int *speech_parameters);
 int ReplaceKeyName(char *outbuf, int index, int *outix);
 void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp, PARAM_STACK *param_stack, int *speech_parameters);
 int LoadSoundFile2(const char *fname);
 int AddNameData(const char *name, int wide);
 int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters);