7 years ago · 7b7598d53d
--- a/Makefile.am
+++ b/Makefile.am
@@ -167,6 +167,7 @@ src_libespeak_ng_la_SOURCES = \
 	src/libespeak-ng/setlengths.c \
 	src/libespeak-ng/spect.c \
 	src/libespeak-ng/speech.c \
 	src/libespeak-ng/ssml.c \
 	src/libespeak-ng/synthdata.c \
 	src/libespeak-ng/synthesize.c \
 	src/libespeak-ng/synth_mbrola.c \
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -42,6 +42,7 @@
 #include "voice.h"
 #include "synthesize.h"
 #include "translate.h"
 #include "ssml.h"

 #define N_XML_BUF   500

@@ -64,17 +65,6 @@ static int sayas_mode;
 static int sayas_start;
 static int ssml_ignore_l_angle = 0;

 // stack for language and voice properties
 // frame 0 is for the defaults, before any ssml tags.
 typedef struct {
 	int tag_type;
 	int voice_variant_number;
 	int voice_gender;
 	int voice_age;
 	char voice_name[40];
 	char language[20];
 } SSML_STACK;

 #define N_SSML_STACK  20
 static int n_ssml_stack;
 static SSML_STACK ssml_stack[N_SSML_STACK];
@@ -83,7 +73,6 @@ static espeak_VOICE base_voice;
 static char base_voice_variant_name[40] = { 0 };
 static char current_voice_id[40] = { 0 };

 #define N_PARAM_STACK  20
 static int n_param_stack;
 PARAM_STACK param_stack[N_PARAM_STACK];

@@ -427,7 +416,7 @@ static int LookupSoundicon(int c)
 	return -1;
 }

 static int LoadSoundFile2(const char *fname)
 int LoadSoundFile2(const char *fname)
 {
 	// Load a sound file into one of the reserved slots in the sound icon table
 	// (if it'snot already loaded)
@@ -560,365 +549,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
 	return short_pause;
 }

 #define SSML_SPEAK     1
 #define SSML_VOICE     2
 #define SSML_PROSODY   3
 #define SSML_SAYAS     4
 #define SSML_MARK      5
 #define SSML_SENTENCE  6
 #define SSML_PARAGRAPH 7
 #define SSML_PHONEME   8
 #define SSML_SUB       9
 #define SSML_STYLE    10
 #define SSML_AUDIO    11
 #define SSML_EMPHASIS 12
 #define SSML_BREAK    13
 #define SSML_IGNORE_TEXT 14
 #define HTML_BREAK    15
 #define HTML_NOSPACE  16   // don't insert a space for this element, so it doesn't break a word
 #define SSML_CLOSE    0x20 // for a closing tag, OR this with the tag type

 // these tags have no effect if they are self-closing, eg. <voice />
 static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 };

 static MNEM_TAB ssmltags[] = {
 	{ "speak",     SSML_SPEAK },
 	{ "voice",     SSML_VOICE },
 	{ "prosody",   SSML_PROSODY },
 	{ "say-as",    SSML_SAYAS },
 	{ "mark",      SSML_MARK },
 	{ "s",         SSML_SENTENCE },
 	{ "p",         SSML_PARAGRAPH },
 	{ "phoneme",   SSML_PHONEME },
 	{ "sub",       SSML_SUB },
 	{ "tts:style", SSML_STYLE },
 	{ "audio",     SSML_AUDIO },
 	{ "emphasis",  SSML_EMPHASIS },
 	{ "break",     SSML_BREAK },
 	{ "metadata",  SSML_IGNORE_TEXT },

 	{ "br",     HTML_BREAK },
 	{ "li",     HTML_BREAK },
 	{ "dd",     HTML_BREAK },
 	{ "img",    HTML_BREAK },
 	{ "td",     HTML_BREAK },
 	{ "h1",     SSML_PARAGRAPH },
 	{ "h2",     SSML_PARAGRAPH },
 	{ "h3",     SSML_PARAGRAPH },
 	{ "h4",     SSML_PARAGRAPH },
 	{ "hr",     SSML_PARAGRAPH },
 	{ "script", SSML_IGNORE_TEXT },
 	{ "style",  SSML_IGNORE_TEXT },
 	{ "font",   HTML_NOSPACE },
 	{ "b",      HTML_NOSPACE },
 	{ "i",      HTML_NOSPACE },
 	{ "strong", HTML_NOSPACE },
 	{ "em",     HTML_NOSPACE },
 	{ "code",   HTML_NOSPACE },

 	{ NULL, 0 }
 };

 static const char *VoiceFromStack()
 {
 	// Use the voice properties from the SSML stack to choose a voice, and switch
 	// to that voice if it's not the current voice

 	int ix;
 	const char *p;
 	SSML_STACK *sp;
 	const char *v_id;
 	int voice_name_specified;
 	int voice_found;
 	espeak_VOICE voice_select;
 	static char voice_name[40];
 	char language[40];
 	char buf[80];

 	strcpy(voice_name, ssml_stack[0].voice_name);
 	strcpy(language, ssml_stack[0].language);
 	voice_select.age = ssml_stack[0].voice_age;
 	voice_select.gender = ssml_stack[0].voice_gender;
 	voice_select.variant = ssml_stack[0].voice_variant_number;
 	voice_select.identifier = NULL;

 	for (ix = 0; ix < n_ssml_stack; ix++) {
 		sp = &ssml_stack[ix];
 		voice_name_specified = 0;

 		if ((sp->voice_name[0] != 0) && (SelectVoiceByName(NULL, sp->voice_name) != NULL)) {
 			voice_name_specified = 1;
 			strcpy(voice_name, sp->voice_name);
 			language[0] = 0;
 			voice_select.gender = ENGENDER_UNKNOWN;
 			voice_select.age = 0;
 			voice_select.variant = 0;
 		}
 		if (sp->language[0] != 0) {
 			strcpy(language, sp->language);

 			// is this language provided by the base voice?
 			p = base_voice.languages;
 			while (*p++ != 0) {
 				if (strcmp(p, language) == 0) {
 					// yes, change the language to the main language of the base voice
 					strcpy(language, &base_voice.languages[1]);
 					break;
 				}
 				p += (strlen(p) + 1);
 			}

 			if (voice_name_specified == 0)
 				voice_name[0] = 0; // forget a previous voice name if a language is specified
 		}
 		if (sp->voice_gender != ENGENDER_UNKNOWN)
 			voice_select.gender = sp->voice_gender;

 		if (sp->voice_age != 0)
 			voice_select.age = sp->voice_age;
 		if (sp->voice_variant_number != 0)
 			voice_select.variant = sp->voice_variant_number;
 	}

 	voice_select.name = voice_name;
 	voice_select.languages = language;
 	v_id = SelectVoice(&voice_select, &voice_found);
 	if (v_id == NULL)
 		return "default";

 	if ((strchr(v_id, '+') == NULL) && ((voice_select.gender == ENGENDER_UNKNOWN) || (voice_select.gender == base_voice.gender)) && (base_voice_variant_name[0] != 0)) {
 		// a voice variant has not been selected, use the original voice variant
 		sprintf(buf, "%s+%s", v_id, base_voice_variant_name);
 		strncpy0(voice_name, buf, sizeof(voice_name));
 		return voice_name;
 	}
 	return v_id;
 }

 static void ProcessParamStack(char *outbuf, int *outix)
 {
 	// Set the speech parameters from the parameter stack
 	int param;
 	int ix;
 	int value;
 	char buf[20];
 	int new_parameters[N_SPEECH_PARAM];
 	static char cmd_letter[N_SPEECH_PARAM] = { 0, 'S', 'A', 'P', 'R', 0, 'C', 0, 0, 0, 0, 0, 'F' }; // embedded command letters

 	for (param = 0; param < N_SPEECH_PARAM; param++)
 		new_parameters[param] = -1;

 	for (ix = 0; ix < n_param_stack; ix++) {
 		for (param = 0; param < N_SPEECH_PARAM; param++) {
 			if (param_stack[ix].parameter[param] >= 0)
 				new_parameters[param] = param_stack[ix].parameter[param];
 		}
 	}

 	for (param = 0; param < N_SPEECH_PARAM; param++) {
 		if ((value = new_parameters[param]) != speech_parameters[param]) {
 			buf[0] = 0;

 			switch (param)
 			{
 			case espeakPUNCTUATION:
 				option_punctuation = value-1;
 				break;
 			case espeakCAPITALS:
 				option_capitals = value;
 				break;
 			case espeakRATE:
 			case espeakVOLUME:
 			case espeakPITCH:
 			case espeakRANGE:
 			case espeakEMPHASIS:
 				sprintf(buf, "%c%d%c", CTRL_EMBEDDED, value, cmd_letter[param]);
 				break;
 			}

 			speech_parameters[param] = new_parameters[param];
 			strcpy(&outbuf[*outix], buf);
 			*outix += strlen(buf);
 		}
 	}
 }

 static PARAM_STACK *PushParamStack(int tag_type)
 {
 	int ix;
 	PARAM_STACK *sp;

 	sp = &param_stack[n_param_stack];
 	if (n_param_stack < (N_PARAM_STACK-1))
 		n_param_stack++;

 	sp->type = tag_type;
 	for (ix = 0; ix < N_SPEECH_PARAM; ix++)
 		sp->parameter[ix] = -1;
 	return sp;
 }

 static void PopParamStack(int tag_type, char *outbuf, int *outix)
 {
 	// unwind the stack up to and including the previous tag of this type
 	int ix;
 	int top = 0;

 	if (tag_type >= SSML_CLOSE)
 		tag_type -= SSML_CLOSE;

 	for (ix = 0; ix < n_param_stack; ix++) {
 		if (param_stack[ix].type == tag_type)
 			top = ix;
 	}
 	if (top > 0)
 		n_param_stack = top;
 	ProcessParamStack(outbuf, outix);
 }

 static wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name)
 {
 	// Gets the value string for an attribute.
 	// Returns NULL if the attribute is not present

 	int ix;
 	static wchar_t empty[1] = { 0 };

 	while (*pw != 0) {
 		if (iswspace(pw[-1])) {
 			ix = 0;
 			while (*pw == name[ix]) {
 				pw++;
 				ix++;
 			}
 			if (name[ix] == 0) {
 				// found the attribute, now get the value
 				while (iswspace(*pw)) pw++;
 				if (*pw == '=') pw++;
 				while (iswspace(*pw)) pw++;
 				if ((*pw == '"') || (*pw == '\'')) // allow single-quotes ?
 					return pw+1;
 				else
 					return empty;
 			}
 		}
 		pw++;
 	}
 	return NULL;
 }

 static int attrcmp(const wchar_t *string1, const char *string2)
 {
 	int ix;

 	if (string1 == NULL)
 		return 1;

 	for (ix = 0; (string1[ix] == string2[ix]) && (string1[ix] != 0); ix++)
 		;
 	if (((string1[ix] == '"') || (string1[ix] == '\'')) && (string2[ix] == 0))
 		return 0;
 	return 1;
 }

 static int attrlookup(const wchar_t *string1, const MNEM_TAB *mtab)
 {
 	int ix;

 	for (ix = 0; mtab[ix].mnem != NULL; ix++) {
 		if (attrcmp(string1, mtab[ix].mnem) == 0)
 			return mtab[ix].value;
 	}
 	return mtab[ix].value;
 }

 static int attrnumber(const wchar_t *pw, int default_value, int type)
 {
 	int value = 0;

 	if ((pw == NULL) || !IsDigit09(*pw))
 		return default_value;

 	while (IsDigit09(*pw))
 		value = value*10 + *pw++ - '0';
 	if ((type == 1) && (ucd_tolower(*pw) == 's')) {
 		// time: seconds rather than ms
 		value *= 1000;
 	}
 	return value;
 }

 static int attrcopy_utf8(char *buf, const wchar_t *pw, int len)
 {
 	// Convert attribute string into utf8, write to buf, and return its utf8 length
 	unsigned int c;
 	int ix = 0;
 	int n;
 	int prev_c = 0;

 	if (pw != NULL) {
 		while ((ix < (len-4)) && ((c = *pw++) != 0)) {
 			if ((c == '"') && (prev_c != '\\'))
 				break; // " indicates end of attribute, unless preceded by backstroke
 			n = utf8_out(c, &buf[ix]);
 			ix += n;
 			prev_c = c;
 		}
 	}
 	buf[ix] = 0;
 	return ix;
 }

 static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 {
 	int sign = 0;
 	wchar_t *tail;
 	double value;

 	while (iswspace(*pw)) pw++;
 	if (*pw == '+') {
 		pw++;
 		sign = 1;
 	}
 	if (*pw == '-') {
 		pw++;
 		sign = -1;
 	}
 	value = (double)wcstod(pw, &tail);
 	if (tail == pw) {
 		// failed to find a number, return 100%
 		*value_out = 100;
 		return 2;
 	}

 	if (*tail == '%') {
 		if (sign != 0)
 			value = 100 + (sign * value);
 		*value_out = (int)value;
 		return 2; // percentage
 	}

 	if ((tail[0] == 's') && (tail[1] == 't')) {
 		double x;
 		// convert from semitones to a  frequency percentage
 		x = pow((double)2.0, (double)((value*sign)/12)) * 100;
 		*value_out = (int)x;
 		return 2; // percentage
 	}

 	if (param_type == espeakRATE) {
 		if (sign == 0)
 			*value_out = (int)(value * 100);
 		else
 			*value_out = 100 + (int)(sign * value * 100);
 		return 2; // percentage
 	}

 	*value_out = (int)value;
 	return sign;   // -1, 0, or 1
 }

 static int AddNameData(const char *name, int wide)
 int AddNameData(const char *name, int wide)
 {
 	// Add the name to the namedata and return its position
 	// (Used by the Windows SAPI wrapper)
@@ -970,553 +601,6 @@ void SetVoiceStack(espeak_VOICE *v, const char *variant_name)
 	memcpy(&base_voice, &current_voice_selected, sizeof(base_voice));
 }

 static int GetVoiceAttributes(wchar_t *pw, int tag_type)
 {
 	// Determines whether voice attribute are specified in this tag, and if so, whether this means
 	// a voice change.
 	// If it's a closing tag, delete the top frame of the stack and determine whether this implies
 	// a voice change.
 	// Returns  CLAUSE_TYPE_VOICE_CHANGE if there is a voice change

 	wchar_t *lang;
 	wchar_t *gender;
 	wchar_t *name;
 	wchar_t *age;
 	wchar_t *variant;
 	int value;
 	const char *new_voice_id;
 	SSML_STACK *ssml_sp;

 	static const MNEM_TAB mnem_gender[] = {
 		{ "male", ENGENDER_MALE },
 		{ "female", ENGENDER_FEMALE },
 		{ "neutral", ENGENDER_NEUTRAL },
 		{ NULL, ENGENDER_UNKNOWN }
 	};

 	if (tag_type & SSML_CLOSE) {
 		// delete a stack frame
 		if (n_ssml_stack > 1)
 			n_ssml_stack--;
 	} else {
 		// add a stack frame if any voice details are specified
 		lang = GetSsmlAttribute(pw, "xml:lang");

 		if (tag_type != SSML_VOICE) {
 			// only expect an xml:lang attribute
 			name = NULL;
 			variant = NULL;
 			age = NULL;
 			gender = NULL;
 		} else {
 			name = GetSsmlAttribute(pw, "name");
 			variant = GetSsmlAttribute(pw, "variant");
 			age = GetSsmlAttribute(pw, "age");
 			gender = GetSsmlAttribute(pw, "gender");
 		}

 		if ((tag_type != SSML_VOICE) && (lang == NULL))
 			return 0; // <s> or <p> without language spec, nothing to do

 		ssml_sp = &ssml_stack[n_ssml_stack++];

 		attrcopy_utf8(ssml_sp->language, lang, sizeof(ssml_sp->language));
 		attrcopy_utf8(ssml_sp->voice_name, name, sizeof(ssml_sp->voice_name));
 		if ((value = attrnumber(variant, 1, 0)) > 0)
 			value--; // variant='0' and variant='1' the same
 		ssml_sp->voice_variant_number = value;
 		ssml_sp->voice_age = attrnumber(age, 0, 0);
 		ssml_sp->voice_gender = attrlookup(gender, mnem_gender);
 		ssml_sp->tag_type = tag_type;
 	}

 	new_voice_id = VoiceFromStack();
 	if (strcmp(new_voice_id, current_voice_id) != 0) {
 		// add an embedded command to change the voice
 		strcpy(current_voice_id, new_voice_id);
 		return CLAUSE_TYPE_VOICE_CHANGE;
 	}

 	return 0;
 }

 static void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp)
 {
 	int value;
 	int sign;

 	static const MNEM_TAB mnem_volume[] = {
 		{ "default", 100 },
 		{ "silent",    0 },
 		{ "x-soft",   30 },
 		{ "soft",     65 },
 		{ "medium",  100 },
 		{ "loud",    150 },
 		{ "x-loud",  230 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_rate[] = {
 		{ "default", 100 },
 		{ "x-slow",   60 },
 		{ "slow",     80 },
 		{ "medium",  100 },
 		{ "fast",    125 },
 		{ "x-fast",  160 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_pitch[] = {
 		{ "default", 100 },
 		{ "x-low",    70 },
 		{ "low",      85 },
 		{ "medium",  100 },
 		{ "high",    110 },
 		{ "x-high",  120 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_range[] = {
 		{ "default", 100 },
 		{ "x-low",    20 },
 		{ "low",      50 },
 		{ "medium",  100 },
 		{ "high",    140 },
 		{ "x-high",  180 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB *mnem_tabs[5] = {
 		NULL, mnem_rate, mnem_volume, mnem_pitch, mnem_range
 	};

 	if ((value = attrlookup(attr1, mnem_tabs[param_type])) >= 0) {
 		// mnemonic specifies a value as a percentage of the base pitch/range/rate/volume
 		sp->parameter[param_type] = (param_stack[0].parameter[param_type] * value)/100;
 	} else {
 		sign = attr_prosody_value(param_type, attr1, &value);

 		if (sign == 0)
 			sp->parameter[param_type] = value; // absolute value in Hz
 		else if (sign == 2) {
 			// change specified as percentage or in semitones
 			sp->parameter[param_type] = (speech_parameters[param_type] * value)/100;
 		} else {
 			// change specified as plus or minus Hz
 			sp->parameter[param_type] = speech_parameters[param_type] + (value*sign);
 		}
 	}
 }

 static int ReplaceKeyName(char *outbuf, int index, int *outix)
 {
 	// Replace some key-names by single characters, so they can be pronounced in different languages
 	static MNEM_TAB keynames[] = {
 		{ "space ",        0xe020 },
 		{ "tab ",          0xe009 },
 		{ "underscore ",   0xe05f },
 		{ "double-quote ", '"' },
 		{ NULL,            0 }
 	};

 	int ix;
 	int letter;
 	char *p;

 	p = &outbuf[index];

 	if ((letter = LookupMnem(keynames, p)) != 0) {
 		ix = utf8_out(letter, p);
 		*outix = index + ix;
 		return letter;
 	}
 	return 0;
 }

 static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing)
 {
 	// xml_buf is the tag and attributes with a zero terminator in place of the original '>'
 	// returns a clause terminator value.

 	unsigned int ix;
 	int index;
 	int c;
 	int tag_type;
 	int value;
 	int value2;
 	int value3;
 	int voice_change_flag;
 	wchar_t *px;
 	wchar_t *attr1;
 	wchar_t *attr2;
 	wchar_t *attr3;
 	int terminator;
 	char *uri;
 	int param_type;
 	char tag_name[40];
 	char buf[80];
 	PARAM_STACK *sp;
 	SSML_STACK *ssml_sp;

 	static const MNEM_TAB mnem_phoneme_alphabet[] = {
 		{ "espeak", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_punct[] = {
 		{ "none", 1 },
 		{ "all",  2 },
 		{ "some", 3 },
 		{ NULL,  -1 }
 	};

 	static const MNEM_TAB mnem_capitals[] = {
 		{ "no",        0 },
 		{ "icon",      1 },
 		{ "spelling",  2 },
 		{ "pitch",    20 },  // this is the amount by which to raise the pitch
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_interpret_as[] = {
 		{ "characters", SAYAS_CHARS },
 		{ "tts:char",   SAYAS_SINGLE_CHARS },
 		{ "tts:key",    SAYAS_KEY },
 		{ "tts:digits", SAYAS_DIGITS },
 		{ "telephone",  SAYAS_DIGITS1 },
 		{ NULL,         -1 }
 	};

 	static const MNEM_TAB mnem_sayas_format[] = {
 		{ "glyphs", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_break[] = {
 		{ "none",     0 },
 		{ "x-weak",   1 },
 		{ "weak",     2 },
 		{ "medium",   3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const MNEM_TAB mnem_emphasis[] = {
 		{ "none",     1 },
 		{ "reduced",  2 },
 		{ "moderate", 3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const char *prosody_attr[5] = {
 		NULL, "rate", "volume", "pitch", "range"
 	};

 	for (ix = 0; ix < (sizeof(tag_name)-1); ix++) {
 		if (((c = xml_buf[ix]) == 0) || iswspace(c))
 			break;
 		tag_name[ix] = tolower((char)c);
 	}
 	tag_name[ix] = 0;

 	px = &xml_buf[ix]; // the tag's attributes

 	if (tag_name[0] == '/') {
 		// closing tag
 		if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE)
 			outbuf[(*outix)++] = ' ';
 		tag_type += SSML_CLOSE;
 	} else {
 		if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) {
 			// separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word)
 			outbuf[(*outix)++] = ' ';
 		}

 		if (self_closing && ignore_if_self_closing[tag_type])
 			return 0;
 	}

 	voice_change_flag = 0;
 	ssml_sp = &ssml_stack[n_ssml_stack-1];

 	switch (tag_type)
 	{
 	case SSML_STYLE:
 		sp = PushParamStack(tag_type);
 		attr1 = GetSsmlAttribute(px, "field");
 		attr2 = GetSsmlAttribute(px, "mode");


 		if (attrcmp(attr1, "punctuation") == 0) {
 			value = attrlookup(attr2, mnem_punct);
 			sp->parameter[espeakPUNCTUATION] = value;
 		} else if (attrcmp(attr1, "capital_letters") == 0) {
 			value = attrlookup(attr2, mnem_capitals);
 			sp->parameter[espeakCAPITALS] = value;
 		}
 		ProcessParamStack(outbuf, outix);
 		break;
 	case SSML_PROSODY:
 		sp = PushParamStack(tag_type);

 		// look for attributes:  rate, volume, pitch, range
 		for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) {
 			if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL)
 				SetProsodyParameter(param_type, attr1, sp);
 		}

 		ProcessParamStack(outbuf, outix);
 		break;
 	case SSML_EMPHASIS:
 		sp = PushParamStack(tag_type);
 		value = 3; // default is "moderate"
 		if ((attr1 = GetSsmlAttribute(px, "level")) != NULL)
 			value = attrlookup(attr1, mnem_emphasis);

 		if (translator->langopts.tone_language == 1) {
 			static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 };
 			static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 };
 			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
 			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
 			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
 		} else {
 			static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 };
 			sp->parameter[espeakVOLUME] = emphasis_to_volume2[value];
 			sp->parameter[espeakEMPHASIS] = value;
 		}
 		ProcessParamStack(outbuf, outix);
 		break;
 	case SSML_STYLE + SSML_CLOSE:
 	case SSML_PROSODY + SSML_CLOSE:
 	case SSML_EMPHASIS + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix);
 		break;
 	case SSML_PHONEME:
 		attr1 = GetSsmlAttribute(px, "alphabet");
 		attr2 = GetSsmlAttribute(px, "ph");
 		value = attrlookup(attr1, mnem_phoneme_alphabet);
 		if (value == 1) { // alphabet="espeak"
 			outbuf[(*outix)++] = '[';
 			outbuf[(*outix)++] = '[';
 			*outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix);
 			outbuf[(*outix)++] = ']';
 			outbuf[(*outix)++] = ']';
 		}
 		break;
 	case SSML_SAYAS:
 		attr1 = GetSsmlAttribute(px, "interpret-as");
 		attr2 = GetSsmlAttribute(px, "format");
 		attr3 = GetSsmlAttribute(px, "detail");
 		value = attrlookup(attr1, mnem_interpret_as);
 		value2 = attrlookup(attr2, mnem_sayas_format);
 		if (value2 == 1)
 			value = SAYAS_GLYPHS;

 		value3 = attrnumber(attr3, 0, 0);

 		if (value == SAYAS_DIGITS) {
 			if (value3 <= 1)
 				value = SAYAS_DIGITS1;
 			else
 				value = SAYAS_DIGITS + value3;
 		}

 		sprintf(buf, "%c%dY", CTRL_EMBEDDED, value);
 		strcpy(&outbuf[*outix], buf);
 		*outix += strlen(buf);

 		sayas_start = *outix;
 		sayas_mode = value; // punctuation doesn't end clause during SAY-AS
 		break;
 	case SSML_SAYAS + SSML_CLOSE:
 		if (sayas_mode == SAYAS_KEY) {
 			outbuf[*outix] = 0;
 			ReplaceKeyName(outbuf, sayas_start, outix);
 		}

 		outbuf[(*outix)++] = CTRL_EMBEDDED;
 		outbuf[(*outix)++] = 'Y';
 		sayas_mode = 0;
 		break;
 	case SSML_SUB:
 		if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) {
 			// use the alias  rather than the text
 			ignore_text = true;
 			*outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix);
 		}
 		break;
 	case SSML_IGNORE_TEXT:
 		ignore_text = true;
 		break;
 	case SSML_SUB + SSML_CLOSE:
 	case SSML_IGNORE_TEXT + SSML_CLOSE:
 		ignore_text = false;
 		break;
 	case SSML_MARK:
 		if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) {
 			// add name to circular buffer of marker names
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (strcmp(skip_marker, buf) == 0) {
 				// This is the marker we are waiting for before starting to speak
 				clear_skipping_text = true;
 				skip_marker[0] = 0;
 				return CLAUSE_NONE;
 			}

 			if ((index = AddNameData(buf, 0)) >= 0) {
 				sprintf(buf, "%c%dM", CTRL_EMBEDDED, index);
 				strcpy(&outbuf[*outix], buf);
 				*outix += strlen(buf);
 			}
 		}
 		break;
 	case SSML_AUDIO:
 		sp = PushParamStack(tag_type);

 		if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) {
 			char fname[256];
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (uri_callback == NULL) {
 				if ((xmlbase != NULL) && (buf[0] != '/')) {
 					sprintf(fname, "%s/%s", xmlbase, buf);
 					index = LoadSoundFile2(fname);
 				} else
 					index = LoadSoundFile2(buf);
 				if (index >= 0) {
 					sprintf(buf, "%c%dI", CTRL_EMBEDDED, index);
 					strcpy(&outbuf[*outix], buf);
 					*outix += strlen(buf);
 					sp->parameter[espeakSILENCE] = 1;
 				}
 			} else {
 				if ((index = AddNameData(buf, 0)) >= 0) {
 					uri = &namedata[index];
 					if (uri_callback(1, uri, xmlbase) == 0) {
 						sprintf(buf, "%c%dU", CTRL_EMBEDDED, index);
 						strcpy(&outbuf[*outix], buf);
 						*outix += strlen(buf);
 						sp->parameter[espeakSILENCE] = 1;
 					}
 				}
 			}
 		}
 		ProcessParamStack(outbuf, outix);

 		if (self_closing)
 			PopParamStack(tag_type, outbuf, outix);
 		else
 			audio_text = true;
 		return CLAUSE_NONE;
 	case SSML_AUDIO + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix);
 		audio_text = false;
 		return CLAUSE_NONE;
 	case SSML_BREAK:
 		value = 21;
 		terminator = CLAUSE_NONE;

 		if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) {
 			static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS
 			value = attrlookup(attr1, mnem_break);
 			if (value < 3) {
 				// adjust prepause on the following word
 				sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value);
 				*outix += 3;
 				terminator = 0;
 			}
 			value = break_value[value];
 		}
 		if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) {
 			value2 = attrnumber(attr2, 0, 1);   // pause in mS

 			// compensate for speaking speed to keep constant pause length, see function PauseLength()
 			// 'value' here is x 10mS
 			value = (value2 * 256) / (speed.clause_pause_factor * 10);
 			if (value < 200)
 				value = (value2 * 256) / (speed.pause_factor * 10);

 			if (terminator == 0)
 				terminator = CLAUSE_NONE;
 		}
 		if (terminator) {
 			if (value > 0xfff) {
 				// scale down the value and set a scaling indicator bit
 				value = value / 32;
 				if (value > 0xfff)
 					value = 0xfff;
 				terminator |= CLAUSE_PAUSE_LONG;
 			}
 			return terminator + value;
 		}
 		break;
 	case SSML_SPEAK:
 		if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) {
 			attrcopy_utf8(buf, attr1, sizeof(buf));
 			if ((index = AddNameData(buf, 0)) >= 0)
 				xmlbase = &namedata[index];
 		}
 		if (GetVoiceAttributes(px, tag_type) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_VOICE:
 		if (GetVoiceAttributes(px, tag_type) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_SPEAK + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_SPEAK))
 			n_ssml_stack--;
 		return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type);
 	case SSML_VOICE + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_VOICE))
 			n_ssml_stack--;

 		terminator = 0; // ??  Sentence intonation, but no pause ??
 		return terminator + GetVoiceAttributes(px, tag_type);
 	case HTML_BREAK:
 	case HTML_BREAK + SSML_CLOSE:
 		return CLAUSE_COLON;
 	case SSML_SENTENCE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new sentence implies end-of-sentence
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_PARAGRAPH:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE);
 		}
 		if (ssml_sp->tag_type == SSML_PARAGRAPH) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_SENTENCE + SSML_CLOSE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// end of a sentence which specified a language
 			voice_change_flag = GetVoiceAttributes(px, tag_type);
 		}
 		return CLAUSE_PERIOD + voice_change_flag;
 	case SSML_PARAGRAPH + SSML_CLOSE:
 		if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) {
 			// End of a paragraph which specified a language.
 			// (End-of-paragraph also implies end-of-sentence)
 			return GetVoiceAttributes(px, tag_type) + CLAUSE_PARAGRAPH;
 		}
 		return CLAUSE_PARAGRAPH;
 	}
 	return 0;
 }

 static void RemoveChar(char *p)
 {
 	// Replace a UTF-8 character by spaces
@@ -1708,7 +792,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 						self_closing = true;
 					}

 					terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing);
 					terminator = ProcessSsmlTag(xml_buf, buf, &ix, n_buf, self_closing, xmlbase, &audio_text, current_voice_id, &base_voice, base_voice_variant_name, &ignore_text, &clear_skipping_text, &sayas_mode, &sayas_start, ssml_stack, &n_ssml_stack, &n_param_stack, (int *)speech_parameters);

 					if (terminator != 0) {
 						buf[ix] = ' ';
--- a/src/libespeak-ng/ssml.c
+++ b/src/libespeak-ng/ssml.c
@@ -0,0 +1,937 @@
 /*
 * Copyright (C) 2005 to 2015 by Jonathan Duddington
 * email: [email protected]
 * Copyright (C) 2015-2017 Reece H. Dunn
 * Copyright (C) 2018 Juho Hiltunen
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 */


 #include "config.h"

 #include <ctype.h>
 #include <errno.h>
 #include <locale.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>


 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
 #include <espeak-ng/encoding.h>
 #include <ucd/ucd.h>

 #include "error.h"
 #include "speech.h"
 #include "phoneme.h"
 #include "voice.h"
 #include "synthesize.h"
 #include "translate.h"
 #include "ssml.h"

 static MNEM_TAB ssmltags[] = {
 	{ "speak",     SSML_SPEAK },
 	{ "voice",     SSML_VOICE },
 	{ "prosody",   SSML_PROSODY },
 	{ "say-as",    SSML_SAYAS },
 	{ "mark",      SSML_MARK },
 	{ "s",         SSML_SENTENCE },
 	{ "p",         SSML_PARAGRAPH },
 	{ "phoneme",   SSML_PHONEME },
 	{ "sub",       SSML_SUB },
 	{ "tts:style", SSML_STYLE },
 	{ "audio",     SSML_AUDIO },
 	{ "emphasis",  SSML_EMPHASIS },
 	{ "break",     SSML_BREAK },
 	{ "metadata",  SSML_IGNORE_TEXT },

 	{ "br",     HTML_BREAK },
 	{ "li",     HTML_BREAK },
 	{ "dd",     HTML_BREAK },
 	{ "img",    HTML_BREAK },
 	{ "td",     HTML_BREAK },
 	{ "h1",     SSML_PARAGRAPH },
 	{ "h2",     SSML_PARAGRAPH },
 	{ "h3",     SSML_PARAGRAPH },
 	{ "h4",     SSML_PARAGRAPH },
 	{ "hr",     SSML_PARAGRAPH },
 	{ "script", SSML_IGNORE_TEXT },
 	{ "style",  SSML_IGNORE_TEXT },
 	{ "font",   HTML_NOSPACE },
 	{ "b",      HTML_NOSPACE },
 	{ "i",      HTML_NOSPACE },
 	{ "strong", HTML_NOSPACE },
 	{ "em",     HTML_NOSPACE },
 	{ "code",   HTML_NOSPACE },

 	{ NULL, 0 }
 };

 static int attrcmp(const wchar_t *string1, const char *string2)
 {
 	int ix;

 	if (string1 == NULL)
 		return 1;

 	for (ix = 0; (string1[ix] == string2[ix]) && (string1[ix] != 0); ix++)
 		;
 	if (((string1[ix] == '"') || (string1[ix] == '\'')) && (string2[ix] == 0))
 		return 0;
 	return 1;
 }


 static int attrlookup(const wchar_t *string1, const MNEM_TAB *mtab)
 {
 	int ix;

 	for (ix = 0; mtab[ix].mnem != NULL; ix++) {
 		if (attrcmp(string1, mtab[ix].mnem) == 0)
 			return mtab[ix].value;
 	}
 	return mtab[ix].value;
 }

 static int attrnumber(const wchar_t *pw, int default_value, int type)
 {
 	int value = 0;

 	if ((pw == NULL) || !IsDigit09(*pw))
 		return default_value;

 	while (IsDigit09(*pw))
 		value = value*10 + *pw++ - '0';
 	if ((type == 1) && (ucd_tolower(*pw) == 's')) {
 		// time: seconds rather than ms
 		value *= 1000;
 	}
 	return value;
 }

 static int attrcopy_utf8(char *buf, const wchar_t *pw, int len)
 {
 	// Convert attribute string into utf8, write to buf, and return its utf8 length
 	unsigned int c;
 	int ix = 0;
 	int n;
 	int prev_c = 0;

 	if (pw != NULL) {
 		while ((ix < (len-4)) && ((c = *pw++) != 0)) {
 			if ((c == '"') && (prev_c != '\\'))
 				break; // " indicates end of attribute, unless preceded by backstroke
 			n = utf8_out(c, &buf[ix]);
 			ix += n;
 			prev_c = c;
 		}
 	}
 	buf[ix] = 0;
 	return ix;
 }

 static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 {
 	int sign = 0;
 	wchar_t *tail;
 	double value;

 	while (iswspace(*pw)) pw++;
 	if (*pw == '+') {
 		pw++;
 		sign = 1;
 	}
 	if (*pw == '-') {
 		pw++;
 		sign = -1;
 	}
 	value = (double)wcstod(pw, &tail);
 	if (tail == pw) {
 		// failed to find a number, return 100%
 		*value_out = 100;
 		return 2;
 	}

 	if (*tail == '%') {
 		if (sign != 0)
 			value = 100 + (sign * value);
 		*value_out = (int)value;
 		return 2; // percentage
 	}

 	if ((tail[0] == 's') && (tail[1] == 't')) {
 		double x;
 		// convert from semitones to a  frequency percentage
 		x = pow((double)2.0, (double)((value*sign)/12)) * 100;
 		*value_out = (int)x;
 		return 2; // percentage
 	}

 	if (param_type == espeakRATE) {
 		if (sign == 0)
 			*value_out = (int)(value * 100);
 		else
 			*value_out = 100 + (int)(sign * value * 100);
 		return 2; // percentage
 	}

 	*value_out = (int)value;
 	return sign;   // -1, 0, or 1
 }

 static const char *VoiceFromStack(SSML_STACK *ssml_stack, int n_ssml_stack, espeak_VOICE *base_voice, char base_voice_variant_name[40])
 {
 	// Use the voice properties from the SSML stack to choose a voice, and switch
 	// to that voice if it's not the current voice

 	int ix;
 	const char *p;
 	SSML_STACK *sp;
 	const char *v_id;
 	int voice_name_specified;
 	int voice_found;
 	espeak_VOICE voice_select;
 	static char voice_name[40];
 	char language[40];
 	char buf[80];

 	strcpy(voice_name, ssml_stack[0].voice_name);
 	strcpy(language, ssml_stack[0].language);
 	voice_select.age = ssml_stack[0].voice_age;
 	voice_select.gender = ssml_stack[0].voice_gender;
 	voice_select.variant = ssml_stack[0].voice_variant_number;
 	voice_select.identifier = NULL;

 	for (ix = 0; ix < n_ssml_stack; ix++) {
 		sp = &ssml_stack[ix];
 		voice_name_specified = 0;

 		if ((sp->voice_name[0] != 0) && (SelectVoiceByName(NULL, sp->voice_name) != NULL)) {
 			voice_name_specified = 1;
 			strcpy(voice_name, sp->voice_name);
 			language[0] = 0;
 			voice_select.gender = ENGENDER_UNKNOWN;
 			voice_select.age = 0;
 			voice_select.variant = 0;
 		}
 		if (sp->language[0] != 0) {
 			strcpy(language, sp->language);

 			// is this language provided by the base voice?
 			p = base_voice->languages;
 			while (*p++ != 0) {
 				if (strcmp(p, language) == 0) {
 					// yes, change the language to the main language of the base voice
 					strcpy(language, &base_voice->languages[1]);
 					break;
 				}
 				p += (strlen(p) + 1);
 			}

 			if (voice_name_specified == 0)
 				voice_name[0] = 0; // forget a previous voice name if a language is specified
 		}
 		if (sp->voice_gender != ENGENDER_UNKNOWN)
 			voice_select.gender = sp->voice_gender;

 		if (sp->voice_age != 0)
 			voice_select.age = sp->voice_age;
 		if (sp->voice_variant_number != 0)
 			voice_select.variant = sp->voice_variant_number;
 	}

 	voice_select.name = voice_name;
 	voice_select.languages = language;
 	v_id = SelectVoice(&voice_select, &voice_found);
 	if (v_id == NULL)
 		return "default";

 	if ((strchr(v_id, '+') == NULL) && ((voice_select.gender == ENGENDER_UNKNOWN) || (voice_select.gender == base_voice->gender)) && (base_voice_variant_name[0] != 0)) {
 		// a voice variant has not been selected, use the original voice variant
 		sprintf(buf, "%s+%s", v_id, base_voice_variant_name);
 		strncpy0(voice_name, buf, sizeof(voice_name));
 		return voice_name;
 	}
 	return v_id;
 }


 static wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name)
 {
 	// Gets the value string for an attribute.
 	// Returns NULL if the attribute is not present

 	int ix;
 	static wchar_t empty[1] = { 0 };

 	while (*pw != 0) {
 		if (iswspace(pw[-1])) {
 			ix = 0;
 			while (*pw == name[ix]) {
 				pw++;
 				ix++;
 			}
 			if (name[ix] == 0) {
 				// found the attribute, now get the value
 				while (iswspace(*pw)) pw++;
 				if (*pw == '=') pw++;
 				while (iswspace(*pw)) pw++;
 				if ((*pw == '"') || (*pw == '\'')) // allow single-quotes ?
 					return pw+1;
 				else
 					return empty;
 			}
 		}
 		pw++;
 	}
 	return NULL;
 }


 static int GetVoiceAttributes(wchar_t *pw, int tag_type, SSML_STACK *ssml_sp, SSML_STACK *ssml_stack, int n_ssml_stack, char current_voice_id[40], espeak_VOICE *base_voice, char *base_voice_variant_name)
 {
 	// Determines whether voice attribute are specified in this tag, and if so, whether this means
 	// a voice change.
 	// If it's a closing tag, delete the top frame of the stack and determine whether this implies
 	// a voice change.
 	// Returns  CLAUSE_TYPE_VOICE_CHANGE if there is a voice change

 	wchar_t *lang;
 	wchar_t *gender;
 	wchar_t *name;
 	wchar_t *age;
 	wchar_t *variant;
 	int value;
 	const char *new_voice_id;

 	static const MNEM_TAB mnem_gender[] = {
 		{ "male", ENGENDER_MALE },
 		{ "female", ENGENDER_FEMALE },
 		{ "neutral", ENGENDER_NEUTRAL },
 		{ NULL, ENGENDER_UNKNOWN }
 	};

 	if (tag_type & SSML_CLOSE) {
 		// delete a stack frame
 		if (n_ssml_stack > 1)
 			n_ssml_stack--;
 	} else {
 		// add a stack frame if any voice details are specified
 		lang = GetSsmlAttribute(pw, "xml:lang");

 		if (tag_type != SSML_VOICE) {
 			// only expect an xml:lang attribute
 			name = NULL;
 			variant = NULL;
 			age = NULL;
 			gender = NULL;
 		} else {
 			name = GetSsmlAttribute(pw, "name");
 			variant = GetSsmlAttribute(pw, "variant");
 			age = GetSsmlAttribute(pw, "age");
 			gender = GetSsmlAttribute(pw, "gender");
 		}

 		if ((tag_type != SSML_VOICE) && (lang == NULL))
 			return 0; // <s> or <p> without language spec, nothing to do

 		ssml_sp = &ssml_stack[n_ssml_stack++];

 		attrcopy_utf8(ssml_sp->language, lang, sizeof(ssml_sp->language));
 		attrcopy_utf8(ssml_sp->voice_name, name, sizeof(ssml_sp->voice_name));
 		if ((value = attrnumber(variant, 1, 0)) > 0)
 			value--; // variant='0' and variant='1' the same
 		ssml_sp->voice_variant_number = value;
 		ssml_sp->voice_age = attrnumber(age, 0, 0);
 		ssml_sp->voice_gender = attrlookup(gender, mnem_gender);
 		ssml_sp->tag_type = tag_type;
 	}

 	new_voice_id = VoiceFromStack(ssml_stack, n_ssml_stack, base_voice, base_voice_variant_name);
 	if (strcmp(new_voice_id, current_voice_id) != 0) {
 		// add an embedded command to change the voice
 		strcpy(current_voice_id, new_voice_id);
 		return CLAUSE_TYPE_VOICE_CHANGE;
 	}

 	return 0;
 }

 static void ProcessParamStack(char *outbuf, int *outix, int n_param_stack, PARAM_STACK *param_stack, int *speech_parameters)
 {
 	// Set the speech parameters from the parameter stack
 	int param;
 	int ix;
 	int value;
 	char buf[20];
 	int new_parameters[N_SPEECH_PARAM];
 	static char cmd_letter[N_SPEECH_PARAM] = { 0, 'S', 'A', 'P', 'R', 0, 'C', 0, 0, 0, 0, 0, 'F' }; // embedded command letters

 	for (param = 0; param < N_SPEECH_PARAM; param++)
 		new_parameters[param] = -1;

 	for (ix = 0; ix < n_param_stack; ix++) {
 		for (param = 0; param < N_SPEECH_PARAM; param++) {
 			if (param_stack[ix].parameter[param] >= 0)
 				new_parameters[param] = param_stack[ix].parameter[param];
 		}
 	}

 	for (param = 0; param < N_SPEECH_PARAM; param++) {
 		if ((value = new_parameters[param]) != speech_parameters[param]) {
 			buf[0] = 0;

 			switch (param)
 			{
 			case espeakPUNCTUATION:
 				option_punctuation = value-1;
 				break;
 			case espeakCAPITALS:
 				option_capitals = value;
 				break;
 			case espeakRATE:
 			case espeakVOLUME:
 			case espeakPITCH:
 			case espeakRANGE:
 			case espeakEMPHASIS:
 				sprintf(buf, "%c%d%c", CTRL_EMBEDDED, value, cmd_letter[param]);
 				break;
 			}

 			speech_parameters[param] = new_parameters[param];
 			strcpy(&outbuf[*outix], buf);
 			*outix += strlen(buf);
 		}
 	}
 }

 static PARAM_STACK *PushParamStack(int tag_type, int *n_param_stack, PARAM_STACK *param_stack)
 {
 	int ix;
 	PARAM_STACK *sp;

 	sp = &param_stack[*n_param_stack];
 	if (*n_param_stack < (N_PARAM_STACK-1))
 		(*n_param_stack)++;

 	sp->type = tag_type;
 	for (ix = 0; ix < N_SPEECH_PARAM; ix++)
 		sp->parameter[ix] = -1;
 	return sp;
 }

 static void PopParamStack(int tag_type, char *outbuf, int *outix, int *n_param_stack, PARAM_STACK *param_stack, int *speech_parameters)
 {
 	// unwind the stack up to and including the previous tag of this type
 	int ix;
 	int top = 0;

 	if (tag_type >= SSML_CLOSE)
 		tag_type -= SSML_CLOSE;

 	for (ix = 0; ix < *n_param_stack; ix++) {
 		if (param_stack[ix].type == tag_type)
 			top = ix;
 	}
 	if (top > 0)
 		*n_param_stack = top;
 	ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 }

 static int ReplaceKeyName(char *outbuf, int index, int *outix)
 {
 	// Replace some key-names by single characters, so they can be pronounced in different languages
 	static MNEM_TAB keynames[] = {
 		{ "space ",        0xe020 },
 		{ "tab ",          0xe009 },
 		{ "underscore ",   0xe05f },
 		{ "double-quote ", '"' },
 		{ NULL,            0 }
 	};

 	int ix;
 	int letter;
 	char *p;

 	p = &outbuf[index];

 	if ((letter = LookupMnem(keynames, p)) != 0) {
 		ix = utf8_out(letter, p);
 		*outix = index + ix;
 		return letter;
 	}
 	return 0;
 }

 static void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp, PARAM_STACK *param_stack, int *speech_parameters)
 {
 	int value;
 	int sign;

 	static const MNEM_TAB mnem_volume[] = {
 		{ "default", 100 },
 		{ "silent",    0 },
 		{ "x-soft",   30 },
 		{ "soft",     65 },
 		{ "medium",  100 },
 		{ "loud",    150 },
 		{ "x-loud",  230 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_rate[] = {
 		{ "default", 100 },
 		{ "x-slow",   60 },
 		{ "slow",     80 },
 		{ "medium",  100 },
 		{ "fast",    125 },
 		{ "x-fast",  160 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_pitch[] = {
 		{ "default", 100 },
 		{ "x-low",    70 },
 		{ "low",      85 },
 		{ "medium",  100 },
 		{ "high",    110 },
 		{ "x-high",  120 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_range[] = {
 		{ "default", 100 },
 		{ "x-low",    20 },
 		{ "low",      50 },
 		{ "medium",  100 },
 		{ "high",    140 },
 		{ "x-high",  180 },
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB *mnem_tabs[5] = {
 		NULL, mnem_rate, mnem_volume, mnem_pitch, mnem_range
 	};

 	if ((value = attrlookup(attr1, mnem_tabs[param_type])) >= 0) {
 		// mnemonic specifies a value as a percentage of the base pitch/range/rate/volume
 		sp->parameter[param_type] = (param_stack[0].parameter[param_type] * value)/100;
 	} else {
 		sign = attr_prosody_value(param_type, attr1, &value);

 		if (sign == 0)
 			sp->parameter[param_type] = value; // absolute value in Hz
 		else if (sign == 2) {
 			// change specified as percentage or in semitones
 			sp->parameter[param_type] = (speech_parameters[param_type] * value)/100;
 		} else {
 			// change specified as plus or minus Hz
 			sp->parameter[param_type] = speech_parameters[param_type] + (value*sign);
 		}
 	}
 }

 int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, bool self_closing, const char *xmlbase, bool *audio_text, char *current_voice_id, espeak_VOICE *base_voice, char *base_voice_variant_name, bool *ignore_text, bool *clear_skipping_text, int *sayas_mode, int *sayas_start, SSML_STACK *ssml_stack, int *n_ssml_stack, int *n_param_stack, int *speech_parameters)
 {
 	// xml_buf is the tag and attributes with a zero terminator in place of the original '>'
 	// returns a clause terminator value.

 	unsigned int ix;
 	int index;
 	int c;
 	int tag_type;
 	int value;
 	int value2;
 	int value3;
 	int voice_change_flag;
 	wchar_t *px;
 	wchar_t *attr1;
 	wchar_t *attr2;
 	wchar_t *attr3;
 	int terminator;
 	char *uri;
 	int param_type;
 	char tag_name[40];
 	char buf[80];
 	PARAM_STACK *sp;
 	SSML_STACK *ssml_sp;

 	// these tags have no effect if they are self-closing, eg. <voice />
 	static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 };

 	static const MNEM_TAB mnem_phoneme_alphabet[] = {
 		{ "espeak", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_punct[] = {
 		{ "none", 1 },
 		{ "all",  2 },
 		{ "some", 3 },
 		{ NULL,  -1 }
 	};

 	static const MNEM_TAB mnem_capitals[] = {
 		{ "no",        0 },
 		{ "icon",      1 },
 		{ "spelling",  2 },
 		{ "pitch",    20 },  // this is the amount by which to raise the pitch
 		{ NULL,       -1 }
 	};

 	static const MNEM_TAB mnem_interpret_as[] = {
 		{ "characters", SAYAS_CHARS },
 		{ "tts:char",   SAYAS_SINGLE_CHARS },
 		{ "tts:key",    SAYAS_KEY },
 		{ "tts:digits", SAYAS_DIGITS },
 		{ "telephone",  SAYAS_DIGITS1 },
 		{ NULL,         -1 }
 	};

 	static const MNEM_TAB mnem_sayas_format[] = {
 		{ "glyphs", 1 },
 		{ NULL,    -1 }
 	};

 	static const MNEM_TAB mnem_break[] = {
 		{ "none",     0 },
 		{ "x-weak",   1 },
 		{ "weak",     2 },
 		{ "medium",   3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const MNEM_TAB mnem_emphasis[] = {
 		{ "none",     1 },
 		{ "reduced",  2 },
 		{ "moderate", 3 },
 		{ "strong",   4 },
 		{ "x-strong", 5 },
 		{ NULL,      -1 }
 	};

 	static const char *prosody_attr[5] = {
 		NULL, "rate", "volume", "pitch", "range"
 	};

 	for (ix = 0; ix < (sizeof(tag_name)-1); ix++) {
 		if (((c = xml_buf[ix]) == 0) || iswspace(c))
 			break;
 		tag_name[ix] = tolower((char)c);
 	}
 	tag_name[ix] = 0;

 	px = &xml_buf[ix]; // the tag's attributes

 	if (tag_name[0] == '/') {
 		// closing tag
 		if ((tag_type = LookupMnem(ssmltags, &tag_name[1])) != HTML_NOSPACE)
 			outbuf[(*outix)++] = ' ';
 		tag_type += SSML_CLOSE;
 	} else {
 		if ((tag_type = LookupMnem(ssmltags, tag_name)) != HTML_NOSPACE) {
 			// separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word)
 			outbuf[(*outix)++] = ' ';
 		}

 		if (self_closing && ignore_if_self_closing[tag_type])
 			return 0;
 	}

 	voice_change_flag = 0;
 	ssml_sp = &ssml_stack[*n_ssml_stack-1];

 	switch (tag_type)
 	{
 	case SSML_STYLE:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);
 		attr1 = GetSsmlAttribute(px, "field");
 		attr2 = GetSsmlAttribute(px, "mode");


 		if (attrcmp(attr1, "punctuation") == 0) {
 			value = attrlookup(attr2, mnem_punct);
 			sp->parameter[espeakPUNCTUATION] = value;
 		} else if (attrcmp(attr1, "capital_letters") == 0) {
 			value = attrlookup(attr2, mnem_capitals);
 			sp->parameter[espeakCAPITALS] = value;
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_PROSODY:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);

 		// look for attributes:  rate, volume, pitch, range
 		for (param_type = espeakRATE; param_type <= espeakRANGE; param_type++) {
 			if ((attr1 = GetSsmlAttribute(px, prosody_attr[param_type])) != NULL)
 				SetProsodyParameter(param_type, attr1, sp, param_stack, speech_parameters);
 		}

 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_EMPHASIS:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *) param_stack);
 		value = 3; // default is "moderate"
 		if ((attr1 = GetSsmlAttribute(px, "level")) != NULL)
 			value = attrlookup(attr1, mnem_emphasis);

 		if (translator->langopts.tone_language == 1) {
 			static unsigned char emphasis_to_pitch_range[] = { 50, 50, 40, 70, 90, 100 };
 			static unsigned char emphasis_to_volume[] = { 100, 100, 70, 110, 135, 150 };
 			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
 			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
 			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
 		} else {
 			static unsigned char emphasis_to_volume2[] = { 100, 100, 75, 100, 120, 150 };
 			sp->parameter[espeakVOLUME] = emphasis_to_volume2[value];
 			sp->parameter[espeakEMPHASIS] = value;
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);
 		break;
 	case SSML_STYLE + SSML_CLOSE:
 	case SSML_PROSODY + SSML_CLOSE:
 	case SSML_EMPHASIS + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		break;
 	case SSML_PHONEME:
 		attr1 = GetSsmlAttribute(px, "alphabet");
 		attr2 = GetSsmlAttribute(px, "ph");
 		value = attrlookup(attr1, mnem_phoneme_alphabet);
 		if (value == 1) { // alphabet="espeak"
 			outbuf[(*outix)++] = '[';
 			outbuf[(*outix)++] = '[';
 			*outix += attrcopy_utf8(&outbuf[*outix], attr2, n_outbuf-*outix);
 			outbuf[(*outix)++] = ']';
 			outbuf[(*outix)++] = ']';
 		}
 		break;
 	case SSML_SAYAS:
 		attr1 = GetSsmlAttribute(px, "interpret-as");
 		attr2 = GetSsmlAttribute(px, "format");
 		attr3 = GetSsmlAttribute(px, "detail");
 		value = attrlookup(attr1, mnem_interpret_as);
 		value2 = attrlookup(attr2, mnem_sayas_format);
 		if (value2 == 1)
 			value = SAYAS_GLYPHS;

 		value3 = attrnumber(attr3, 0, 0);

 		if (value == SAYAS_DIGITS) {
 			if (value3 <= 1)
 				value = SAYAS_DIGITS1;
 			else
 				value = SAYAS_DIGITS + value3;
 		}

 		sprintf(buf, "%c%dY", CTRL_EMBEDDED, value);
 		strcpy(&outbuf[*outix], buf);
 		*outix += strlen(buf);

 		*sayas_start = *outix;
 		*sayas_mode = value; // punctuation doesn't end clause during SAY-AS
 		break;
 	case SSML_SAYAS + SSML_CLOSE:
 		if (*sayas_mode == SAYAS_KEY) {
 			outbuf[*outix] = 0;
 			ReplaceKeyName(outbuf, *sayas_start, outix);
 		}

 		outbuf[(*outix)++] = CTRL_EMBEDDED;
 		outbuf[(*outix)++] = 'Y';
 		*sayas_mode = 0;
 		break;
 	case SSML_SUB:
 		if ((attr1 = GetSsmlAttribute(px, "alias")) != NULL) {
 			// use the alias  rather than the text
 			*ignore_text = true;
 			*outix += attrcopy_utf8(&outbuf[*outix], attr1, n_outbuf-*outix);
 		}
 		break;
 	case SSML_IGNORE_TEXT:
 		*ignore_text = true;
 		break;
 	case SSML_SUB + SSML_CLOSE:
 	case SSML_IGNORE_TEXT + SSML_CLOSE:
 		*ignore_text = false;
 		break;
 	case SSML_MARK:
 		if ((attr1 = GetSsmlAttribute(px, "name")) != NULL) {
 			// add name to circular buffer of marker names
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (strcmp(skip_marker, buf) == 0) {
 				// This is the marker we are waiting for before starting to speak
 				*clear_skipping_text = true;
 				skip_marker[0] = 0;
 				return CLAUSE_NONE;
 			}

 			if ((index = AddNameData(buf, 0)) >= 0) {
 				sprintf(buf, "%c%dM", CTRL_EMBEDDED, index);
 				strcpy(&outbuf[*outix], buf);
 				*outix += strlen(buf);
 			}
 		}
 		break;
 	case SSML_AUDIO:
 		sp = PushParamStack(tag_type, n_param_stack, (PARAM_STACK *)param_stack);

 		if ((attr1 = GetSsmlAttribute(px, "src")) != NULL) {
 			char fname[256];
 			attrcopy_utf8(buf, attr1, sizeof(buf));

 			if (uri_callback == NULL) {
 				if ((xmlbase != NULL) && (buf[0] != '/')) {
 					sprintf(fname, "%s/%s", xmlbase, buf);
 					index = LoadSoundFile2(fname);
 				} else
 					index = LoadSoundFile2(buf);
 				if (index >= 0) {
 					sprintf(buf, "%c%dI", CTRL_EMBEDDED, index);
 					strcpy(&outbuf[*outix], buf);
 					*outix += strlen(buf);
 					sp->parameter[espeakSILENCE] = 1;
 				}
 			} else {
 				if ((index = AddNameData(buf, 0)) >= 0) {
 					uri = &namedata[index];
 					if (uri_callback(1, uri, xmlbase) == 0) {
 						sprintf(buf, "%c%dU", CTRL_EMBEDDED, index);
 						strcpy(&outbuf[*outix], buf);
 						*outix += strlen(buf);
 						sp->parameter[espeakSILENCE] = 1;
 					}
 				}
 			}
 		}
 		ProcessParamStack(outbuf, outix, *n_param_stack, param_stack, speech_parameters);

 		if (self_closing)
 			PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		else
 			*audio_text = true;
 		return CLAUSE_NONE;
 	case SSML_AUDIO + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix, n_param_stack, (PARAM_STACK *) param_stack, (int *) speech_parameters);
 		*audio_text = false;
 		return CLAUSE_NONE;
 	case SSML_BREAK:
 		value = 21;
 		terminator = CLAUSE_NONE;

 		if ((attr1 = GetSsmlAttribute(px, "strength")) != NULL) {
 			static int break_value[6] = { 0, 7, 14, 21, 40, 80 }; // *10mS
 			value = attrlookup(attr1, mnem_break);
 			if (value < 3) {
 				// adjust prepause on the following word
 				sprintf(&outbuf[*outix], "%c%dB", CTRL_EMBEDDED, value);
 				*outix += 3;
 				terminator = 0;
 			}
 			value = break_value[value];
 		}
 		if ((attr2 = GetSsmlAttribute(px, "time")) != NULL) {
 			value2 = attrnumber(attr2, 0, 1);   // pause in mS

 			// compensate for speaking speed to keep constant pause length, see function PauseLength()
 			// 'value' here is x 10mS
 			value = (value2 * 256) / (speed.clause_pause_factor * 10);
 			if (value < 200)
 				value = (value2 * 256) / (speed.pause_factor * 10);

 			if (terminator == 0)
 				terminator = CLAUSE_NONE;
 		}
 		if (terminator) {
 			if (value > 0xfff) {
 				// scale down the value and set a scaling indicator bit
 				value = value / 32;
 				if (value > 0xfff)
 					value = 0xfff;
 				terminator |= CLAUSE_PAUSE_LONG;
 			}
 			return terminator + value;
 		}
 		break;
 	case SSML_SPEAK:
 		if ((attr1 = GetSsmlAttribute(px, "xml:base")) != NULL) {
 			attrcopy_utf8(buf, attr1, sizeof(buf));
 			if ((index = AddNameData(buf, 0)) >= 0)
 				xmlbase = &namedata[index];
 		}
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_VOICE:
 		if (GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) == 0)
 			return 0; // no voice change
 		return CLAUSE_VOICE;
 	case SSML_SPEAK + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_SPEAK))
 			(*n_ssml_stack)--;
 		return CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 	case SSML_VOICE + SSML_CLOSE:
 		// unwind stack until the previous <voice> or <speak> tag
 		while ((*n_ssml_stack > 1) && (ssml_stack[*n_ssml_stack-1].tag_type != SSML_VOICE))
 			(*n_ssml_stack)--;

 		terminator = 0; // ??  Sentence intonation, but no pause ??
 		return terminator + GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 	case HTML_BREAK:
 	case HTML_BREAK + SSML_CLOSE:
 		return CLAUSE_COLON;
 	case SSML_SENTENCE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new sentence implies end-of-sentence
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_PARAGRAPH:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		if (ssml_sp->tag_type == SSML_PARAGRAPH) {
 			// new paragraph implies end-of-sentence or end-of-paragraph
 			voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		voice_change_flag |= GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		return CLAUSE_PARAGRAPH + voice_change_flag;
 	case SSML_SENTENCE + SSML_CLOSE:
 		if (ssml_sp->tag_type == SSML_SENTENCE) {
 			// end of a sentence which specified a language
 			voice_change_flag = GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name);
 		}
 		return CLAUSE_PERIOD + voice_change_flag;
 	case SSML_PARAGRAPH + SSML_CLOSE:
 		if ((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH)) {
 			// End of a paragraph which specified a language.
 			// (End-of-paragraph also implies end-of-sentence)
 			return GetVoiceAttributes(px, tag_type, ssml_sp, ssml_stack, *n_ssml_stack, current_voice_id, base_voice, base_voice_variant_name) + CLAUSE_PARAGRAPH;
 		}
 		return CLAUSE_PARAGRAPH;
 	}
 	return 0;
 }
--- a/src/libespeak-ng/ssml.h
+++ b/src/libespeak-ng/ssml.h
@@ -0,0 +1,88 @@
 /* SSML (Speech Synthesis Markup Language) processing APIs.
 *
 * Copyright (C) 2005 to 2015 by Jonathan Duddington
 * email: [email protected]
 * Copyright (C) 2015-2018 Reece H. Dunn
 * Copyright (C) 2018 Juho Hiltunen
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 */
 #ifndef ESPEAK_NG_SSML_API
 #define ESPEAK_NG_SSML_API

 #ifdef __cplusplus
 extern "C"
 {
 #endif

 // stack for language and voice properties
 // frame 0 is for the defaults, before any ssml tags.
 typedef struct {
        int tag_type;
        int voice_variant_number;
        int voice_gender;
        int voice_age;
        char voice_name[40];
        char language[20];
 } SSML_STACK;

 #define N_PARAM_STACK  20

 #define SSML_SPEAK        1
 #define SSML_VOICE        2
 #define SSML_PROSODY      3
 #define SSML_SAYAS        4
 #define SSML_MARK         5
 #define SSML_SENTENCE     6
 #define SSML_PARAGRAPH    7
 #define SSML_PHONEME      8
 #define SSML_SUB          9
 #define SSML_STYLE       10
 #define SSML_AUDIO       11
 #define SSML_EMPHASIS    12
 #define SSML_BREAK       13
 #define SSML_IGNORE_TEXT 14
 #define HTML_BREAK       15
 #define HTML_NOSPACE     16   // don't insert a space for this element, so it doesn't break a word
 #define SSML_CLOSE       0x20 // for a closing tag, OR this with the tag type

 int LoadSoundFile2(const char *fname);

 int AddNameData(const char *name,
                int wide);

 int ProcessSsmlTag(wchar_t *xml_buf,
                   char *outbuf,
                   int *outix,
                   int n_outbuf,
                   bool self_closing,
                   const char *xmlbase,
                   bool *audio_text,
                   char *current_voice_id,
                   espeak_VOICE *base_voice,
                   char *base_voice_variant_name,
                   bool *ignore_text,
                   bool *clear_skipping_text,
                   int *sayas_mode,
                   int *sayas_start,
                   SSML_STACK *ssml_stack,
                   int *n_ssml_stack,
                   int *n_param_stack,
                   int *speech_parameters);

 #ifdef __cplusplus
 }
 #endif

 #endif