|
|
@@ -42,6 +42,7 @@ |
|
|
|
#include "voice.h" |
|
|
|
#include "synthesize.h" |
|
|
|
#include "translate.h" |
|
|
|
#include "ssml.h" |
|
|
|
|
|
|
|
#define N_XML_BUF 500 |
|
|
|
|
|
|
@@ -64,17 +65,6 @@ static int sayas_mode; |
|
|
|
static int sayas_start; |
|
|
|
static int ssml_ignore_l_angle = 0; |
|
|
|
|
|
|
|
// stack for language and voice properties |
|
|
|
// frame 0 is for the defaults, before any ssml tags. |
|
|
|
typedef struct { |
|
|
|
int tag_type; |
|
|
|
int voice_variant_number; |
|
|
|
int voice_gender; |
|
|
|
int voice_age; |
|
|
|
char voice_name[40]; |
|
|
|
char language[20]; |
|
|
|
} SSML_STACK; |
|
|
|
|
|
|
|
#define N_SSML_STACK 20 |
|
|
|
static int n_ssml_stack; |
|
|
|
static SSML_STACK ssml_stack[N_SSML_STACK]; |
|
|
@@ -560,66 +550,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output |
|
|
|
return short_pause; |
|
|
|
} |
|
|
|
|
|
|
|
#define SSML_SPEAK 1 |
|
|
|
#define SSML_VOICE 2 |
|
|
|
#define SSML_PROSODY 3 |
|
|
|
#define SSML_SAYAS 4 |
|
|
|
#define SSML_MARK 5 |
|
|
|
#define SSML_SENTENCE 6 |
|
|
|
#define SSML_PARAGRAPH 7 |
|
|
|
#define SSML_PHONEME 8 |
|
|
|
#define SSML_SUB 9 |
|
|
|
#define SSML_STYLE 10 |
|
|
|
#define SSML_AUDIO 11 |
|
|
|
#define SSML_EMPHASIS 12 |
|
|
|
#define SSML_BREAK 13 |
|
|
|
#define SSML_IGNORE_TEXT 14 |
|
|
|
#define HTML_BREAK 15 |
|
|
|
#define HTML_NOSPACE 16 // don't insert a space for this element, so it doesn't break a word |
|
|
|
#define SSML_CLOSE 0x20 // for a closing tag, OR this with the tag type |
|
|
|
|
|
|
|
// these tags have no effect if they are self-closing, eg. <voice /> |
|
|
|
static char ignore_if_self_closing[] = { 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }; |
|
|
|
|
|
|
|
static MNEM_TAB ssmltags[] = { |
|
|
|
{ "speak", SSML_SPEAK }, |
|
|
|
{ "voice", SSML_VOICE }, |
|
|
|
{ "prosody", SSML_PROSODY }, |
|
|
|
{ "say-as", SSML_SAYAS }, |
|
|
|
{ "mark", SSML_MARK }, |
|
|
|
{ "s", SSML_SENTENCE }, |
|
|
|
{ "p", SSML_PARAGRAPH }, |
|
|
|
{ "phoneme", SSML_PHONEME }, |
|
|
|
{ "sub", SSML_SUB }, |
|
|
|
{ "tts:style", SSML_STYLE }, |
|
|
|
{ "audio", SSML_AUDIO }, |
|
|
|
{ "emphasis", SSML_EMPHASIS }, |
|
|
|
{ "break", SSML_BREAK }, |
|
|
|
{ "metadata", SSML_IGNORE_TEXT }, |
|
|
|
|
|
|
|
{ "br", HTML_BREAK }, |
|
|
|
{ "li", HTML_BREAK }, |
|
|
|
{ "dd", HTML_BREAK }, |
|
|
|
{ "img", HTML_BREAK }, |
|
|
|
{ "td", HTML_BREAK }, |
|
|
|
{ "h1", SSML_PARAGRAPH }, |
|
|
|
{ "h2", SSML_PARAGRAPH }, |
|
|
|
{ "h3", SSML_PARAGRAPH }, |
|
|
|
{ "h4", SSML_PARAGRAPH }, |
|
|
|
{ "hr", SSML_PARAGRAPH }, |
|
|
|
{ "script", SSML_IGNORE_TEXT }, |
|
|
|
{ "style", SSML_IGNORE_TEXT }, |
|
|
|
{ "font", HTML_NOSPACE }, |
|
|
|
{ "b", HTML_NOSPACE }, |
|
|
|
{ "i", HTML_NOSPACE }, |
|
|
|
{ "strong", HTML_NOSPACE }, |
|
|
|
{ "em", HTML_NOSPACE }, |
|
|
|
{ "code", HTML_NOSPACE }, |
|
|
|
|
|
|
|
{ NULL, 0 } |
|
|
|
}; |
|
|
|
|
|
|
|
static const char *VoiceFromStack() |
|
|
|
const char *VoiceFromStack() |
|
|
|
{ |
|
|
|
// Use the voice properties from the SSML stack to choose a voice, and switch |
|
|
|
// to that voice if it's not the current voice |