Browse Source

Better specify the CLAUSE_ flags returned by ReadClause.

master
Reece H. Dunn 8 years ago
parent
commit
8749891069

+ 1
- 1
src/libespeak-ng/dictionary.c View File

@@ -2724,7 +2724,7 @@ static const char *LookupDict2(Translator *tr, const char *word, const char *wor
continue;
}

if ((dictionary_flags2 & FLAG_SENTENCE) && !(translator->clause_terminator & CLAUSE_BIT_SENTENCE)) {
if ((dictionary_flags2 & FLAG_SENTENCE) && !(translator->clause_terminator & CLAUSE_TYPE_SENTENCE)) {
// only if this clause is a sentence , i.e. terminator is {. ? !} not {, : :}
continue;
}

+ 28
- 28
src/libespeak-ng/readclause.c View File

@@ -126,29 +126,29 @@ static const unsigned int punct_attributes[] = {
CLAUSE_COLON,
CLAUSE_SEMICOLON,

CLAUSE_SEMICOLON | 0x8000, // inverted exclamation
CLAUSE_SEMICOLON | 0x8000, // inverted question
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question
CLAUSE_SEMICOLON, // en-dash
CLAUSE_SEMICOLON, // em-dash
CLAUSE_SEMICOLON | PUNCT_SAY_NAME | 0x8000, // elipsis
CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER, // elipsis

CLAUSE_QUESTION, // Greek question mark
CLAUSE_SEMICOLON, // Greek semicolon
CLAUSE_PERIOD | 0x8000, // Devanagari Danda (fullstop)
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop)

CLAUSE_PERIOD | 0x8000, // Armenian period
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Armenian period
CLAUSE_COMMA, // Armenian comma
CLAUSE_EXCLAMATION | PUNCT_IN_WORD, // Armenian exclamation
CLAUSE_QUESTION | PUNCT_IN_WORD, // Armenian question
CLAUSE_PERIOD | PUNCT_IN_WORD, // Armenian emphasis mark
CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian exclamation
CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian question
CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark

CLAUSE_COMMA, // Arabic ,
CLAUSE_SEMICOLON, // Arabic ;
CLAUSE_QUESTION, // Arabic question mark
CLAUSE_PERIOD, // Arabic full stop

CLAUSE_PERIOD+0x8000, // Singhalese period
CLAUSE_PERIOD+0x8000, // Tibet period
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Singhalese period
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Tibet period
CLAUSE_PARAGRAPH,

CLAUSE_PERIOD, // Ethiopic period
@@ -160,15 +160,15 @@ static const unsigned int punct_attributes[] = {
CLAUSE_PARAGRAPH, // Ethiopic paragraph
CLAUSE_PARAGRAPH, // Georgian paragraph

CLAUSE_COMMA+0x8000, // ideograph comma
CLAUSE_PERIOD+0x8000, // ideograph period
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph comma
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph period

CLAUSE_EXCLAMATION+0x8000, // fullwidth
CLAUSE_COMMA+0x8000,
CLAUSE_PERIOD+0x8000,
CLAUSE_COLON+0x8000,
CLAUSE_SEMICOLON+0x8000,
CLAUSE_QUESTION+0x8000,
CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER, // fullwidth
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER,
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,
CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER,
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,
CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER,

CLAUSE_SEMICOLON, // spare
0
@@ -607,7 +607,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
attributes = punct_attributes[lookupwchar(punct_chars, c1)];

short_pause = CLAUSE_SHORTFALL;
if ((attributes & CLAUSE_BITS_INTONATION) == 0x1000)
if ((attributes & CLAUSE_INTONATION_TYPE) == 0x1000)
short_pause = CLAUSE_SHORTCOMMA;

if ((bufix1 > 0) && !(tr->langopts.param[LOPT_ANNOUNCE_PUNCT] & 2)) {
@@ -616,7 +616,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
return short_pause;
}

if (attributes & CLAUSE_BIT_SENTENCE)
if (attributes & CLAUSE_TYPE_SENTENCE)
return attributes;

return short_pause;
@@ -1038,7 +1038,7 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
// a voice change.
// If it's a closing tag, delete the top frame of the stack and determine whether this implies
// a voice change.
// Returns CLAUSE_BIT_VOICE if there is a voice change
// Returns CLAUSE_TYPE_VOICE_CHANGE if there is a voice change

wchar_t *lang;
wchar_t *gender;
@@ -1096,7 +1096,7 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
if (strcmp(new_voice_id, current_voice_id) != 0) {
// add an embedded command to change the voice
strcpy(current_voice_id, new_voice_id);
return CLAUSE_BIT_VOICE; // change of voice
return CLAUSE_TYPE_VOICE_CHANGE;
}

return 0;
@@ -1780,7 +1780,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
buf[ix] = ' ';
buf[ix++] = 0;

if (terminator & CLAUSE_BIT_VOICE)
if (terminator & CLAUSE_TYPE_VOICE_CHANGE)
strcpy(voice_change, current_voice_id);
return terminator;
}
@@ -1993,7 +1993,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
if ((punct = lookupwchar(punct_chars, c1)) != 0) {
punct_data = punct_attributes[punct];

if (punct_data & PUNCT_IN_WORD) {
if (punct_data & CLAUSE_PUNCTUATION_IN_WORD) {
// Armenian punctuation inside a word
stressed_word = 1;
*tone_type = punct_data >> 12 & 0xf; // override the end-of-sentence type
@@ -2019,7 +2019,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
}
}

if ((punct_data & PUNCT_SAY_NAME) && (announced_punctuation == 0)) {
if ((punct_data & CLAUSE_SPEAK_PUNCTUATION_NAME) && (announced_punctuation == 0)) {
// used for elipsis (and 3 dots) if a pronunciation for elipsis is given in *_list
char *p2;

@@ -2028,7 +2028,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
if (p2[0] != 0) {
ix += strlen(p2);
announced_punctuation = c1;
punct_data = punct_data & ~CLAUSE_BITS_INTONATION; // change intonation type to 0 (full-stop)
punct_data = punct_data & ~CLAUSE_INTONATION_TYPE; // change intonation type to 0 (full-stop)
}
}

@@ -2045,7 +2045,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
}

if ((c1 == '.') && (nl_count < 2))
punct_data |= CLAUSE_DOT;
punct_data |= CLAUSE_DOT_AFTER_LAST_WORD;

if (nl_count == 0) {
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
@@ -2095,7 +2095,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
buf[ix+1] = 0;

if (iswdigit(cprev) && !IsAlpha(c_next)) // ????
punct_data &= ~CLAUSE_DOT;
punct_data &= ~CLAUSE_DOT_AFTER_LAST_WORD;
if (nl_count > 1) {
if ((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
return punct_data + 35; // with a longer pause

+ 5
- 5
src/libespeak-ng/translate.c View File

@@ -2004,7 +2004,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
clause_pause = 0;

if (new_sentence)
terminator |= CLAUSE_BIT_SENTENCE; // carry forward an end-of-sentence indicator
terminator |= CLAUSE_TYPE_SENTENCE; // carry forward an end-of-sentence indicator
max_clause_pause += clause_pause;
new_sentence2 = 0;
} else {
@@ -2484,7 +2484,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
words[ix].flags |= FLAG_LAST_WORD;

// FLAG_NOSPACE check to avoid recognizing .mr -mr
if ((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE))
if ((terminator & CLAUSE_DOT_AFTER_LAST_WORD) && !(words[word_count-1].flags & FLAG_NOSPACE))
words[word_count-1].flags |= FLAG_HAS_DOT;
}
words[0].flags |= FLAG_FIRST_WORD;
@@ -2612,7 +2612,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
}
}

if ((dict_flags & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (ix == word_count - 1 - dictionary_skipwords) && (terminator & CLAUSE_DOT)) {
if ((dict_flags & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (ix == word_count - 1 - dictionary_skipwords) && (terminator & CLAUSE_DOT_AFTER_LAST_WORD)) {
// probably an abbreviation such as Mr. or B. rather than end of sentence
clause_pause = 10;
tone = 4;
@@ -2665,12 +2665,12 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
*tone_out = tone;

new_sentence = 0;
if (terminator & CLAUSE_BIT_SENTENCE)
if (terminator & CLAUSE_TYPE_SENTENCE)
new_sentence = 1; // next clause is a new sentence

if (voice_change != NULL) {
// return new voice name if an embedded voice change command terminated the clause
if (terminator & CLAUSE_BIT_VOICE)
if (terminator & CLAUSE_TYPE_VOICE_CHANGE)
*voice_change = voice_change_name;
else
*voice_change = NULL;

+ 38
- 32
src/libespeak-ng/translate.h View File

@@ -198,38 +198,44 @@ extern "C"
#define LETTERGP_Y 6
#define LETTERGP_VOWEL2 7

// Punctuation types returned by ReadClause()
// bits 0-11 pause x 10mS
// bits12-14 intonation type
// bit 15- don't need space after the punctuation
// bit 19=sentence, bit 18=clause, bits 17=voice change
// bit 16 used to distinguish otherwise identical types
// bit 20= punctuation character can be inside a word (Armenian)
// bit 21= speak the name of the punctuation character
// bit 22= dot after the last word
// bit 23= pause is x 320mS (not x 10mS)

#define CLAUSE_BIT_SENTENCE 0x80000
#define CLAUSE_BIT_CLAUSE 0x40000
#define CLAUSE_BIT_VOICE 0x20000
#define CLAUSE_BITS_INTONATION 0x7000
#define PUNCT_IN_WORD 0x100000
#define PUNCT_SAY_NAME 0x200000
#define CLAUSE_DOT 0x400000
#define CLAUSE_PAUSE_LONG 0x800000

#define CLAUSE_NONE ( 0 + 0x04000)
#define CLAUSE_PARAGRAPH (70 + 0x80000)
#define CLAUSE_EOF (40 + 0x90000)
#define CLAUSE_VOICE ( 0 + 0x24000)
#define CLAUSE_PERIOD (40 + 0x80000)
#define CLAUSE_COMMA (20 + 0x41000)
#define CLAUSE_SHORTCOMMA ( 4 + 0x41000)
#define CLAUSE_SHORTFALL ( 4 + 0x40000)
#define CLAUSE_QUESTION (40 + 0x82000)
#define CLAUSE_EXCLAMATION (45 + 0x83000)
#define CLAUSE_COLON (30 + 0x40000)
#define CLAUSE_SEMICOLON (30 + 0x41000)
// Punctuation types returned by ReadClause()
//@{

#define CLAUSE_PAUSE 0x00000FFF // pause (x 10mS)
#define CLAUSE_INTONATION_TYPE 0x00007000 // intonation type
#define CLAUSE_OPTIONAL_SPACE_AFTER 0x00008000 // don't need space after the punctuation
#define CLAUSE_TYPE 0x000F0000 // phrase type
#define CLAUSE_PUNCTUATION_IN_WORD 0x00100000 // punctuation character can be inside a word (Armenian)
#define CLAUSE_SPEAK_PUNCTUATION_NAME 0x00200000 // speak the name of the punctuation character
#define CLAUSE_DOT_AFTER_LAST_WORD 0x00400000 // dot after the last word
#define CLAUSE_PAUSE_LONG 0x00800000 // x 320mS to the CLAUSE_PAUSE value

#define CLAUSE_INTONATION_FULL_STOP 0x00000000
#define CLAUSE_INTONATION_COMMA 0x00001000
#define CLAUSE_INTONATION_QUESTION 0x00002000
#define CLAUSE_INTONATION_EXCLAMATION 0x00003000
#define CLAUSE_INTONATION_NONE 0x00004000

#define CLAUSE_TYPE_NONE 0x00000000
#define CLAUSE_TYPE_EOF 0x00010000
#define CLAUSE_TYPE_VOICE_CHANGE 0x00020000
#define CLAUSE_TYPE_CLAUSE 0x00040000
#define CLAUSE_TYPE_SENTENCE 0x00080000

#define CLAUSE_NONE ( 0 | CLAUSE_INTONATION_NONE | CLAUSE_TYPE_NONE)
#define CLAUSE_PARAGRAPH (70 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE)
#define CLAUSE_EOF (40 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE | CLAUSE_TYPE_EOF)
#define CLAUSE_VOICE ( 0 | CLAUSE_INTONATION_NONE | CLAUSE_TYPE_VOICE_CHANGE)
#define CLAUSE_PERIOD (40 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE)
#define CLAUSE_COMMA (20 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)
#define CLAUSE_SHORTCOMMA ( 4 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)
#define CLAUSE_SHORTFALL ( 4 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_CLAUSE)
#define CLAUSE_QUESTION (40 | CLAUSE_INTONATION_QUESTION | CLAUSE_TYPE_SENTENCE)
#define CLAUSE_EXCLAMATION (45 | CLAUSE_INTONATION_EXCLAMATION | CLAUSE_TYPE_SENTENCE)
#define CLAUSE_COLON (30 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_CLAUSE)
#define CLAUSE_SEMICOLON (30 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)

//@}

#define SAYAS_CHARS 0x12
#define SAYAS_GLYPHS 0x13

Loading…
Cancel
Save