8 years ago · 8749891069
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -2724,7 +2724,7 @@ static const char *LookupDict2(Translator *tr, const char *word, const char *wor
 			continue;
 		}

 		if ((dictionary_flags2 & FLAG_SENTENCE) && !(translator->clause_terminator & CLAUSE_BIT_SENTENCE)) {
 		if ((dictionary_flags2 & FLAG_SENTENCE) && !(translator->clause_terminator & CLAUSE_TYPE_SENTENCE)) {
 			// only if this clause is a sentence , i.e. terminator is {. ? !} not {, : :}
 			continue;
 		}
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -126,29 +126,29 @@ static const unsigned int punct_attributes[] = {
 	CLAUSE_COLON,
 	CLAUSE_SEMICOLON,

 	CLAUSE_SEMICOLON | 0x8000,  // inverted exclamation
 	CLAUSE_SEMICOLON | 0x8000,  // inverted question
 	CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,  // inverted exclamation
 	CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,  // inverted question
 	CLAUSE_SEMICOLON,  // en-dash
 	CLAUSE_SEMICOLON,  // em-dash
 	CLAUSE_SEMICOLON | PUNCT_SAY_NAME | 0x8000,  // elipsis
 	CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER,  // elipsis

 	CLAUSE_QUESTION,  // Greek question mark
 	CLAUSE_SEMICOLON,  // Greek semicolon
 	CLAUSE_PERIOD | 0x8000,  // Devanagari Danda (fullstop)
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Devanagari Danda (fullstop)

 	CLAUSE_PERIOD | 0x8000,  // Armenian period
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Armenian period
 	CLAUSE_COMMA,  // Armenian comma
 	CLAUSE_EXCLAMATION | PUNCT_IN_WORD,  // Armenian exclamation
 	CLAUSE_QUESTION | PUNCT_IN_WORD,  // Armenian question
 	CLAUSE_PERIOD | PUNCT_IN_WORD,  // Armenian emphasis mark
 	CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian exclamation
 	CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian question
 	CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian emphasis mark

 	CLAUSE_COMMA,  // Arabic ,
 	CLAUSE_SEMICOLON,  // Arabic ;
 	CLAUSE_QUESTION,  // Arabic question mark
 	CLAUSE_PERIOD,  // Arabic full stop

 	CLAUSE_PERIOD+0x8000,  // Singhalese period
 	CLAUSE_PERIOD+0x8000,  // Tibet period
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Singhalese period
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Tibet period
 	CLAUSE_PARAGRAPH,

 	CLAUSE_PERIOD,  // Ethiopic period
@@ -160,15 +160,15 @@ static const unsigned int punct_attributes[] = {
 	CLAUSE_PARAGRAPH,  // Ethiopic paragraph
 	CLAUSE_PARAGRAPH,  // Georgian paragraph

 	CLAUSE_COMMA+0x8000,  // ideograph comma
 	CLAUSE_PERIOD+0x8000,  // ideograph period
 	CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER,  // ideograph comma
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // ideograph period

 	CLAUSE_EXCLAMATION+0x8000,  // fullwidth
 	CLAUSE_COMMA+0x8000,
 	CLAUSE_PERIOD+0x8000,
 	CLAUSE_COLON+0x8000,
 	CLAUSE_SEMICOLON+0x8000,
 	CLAUSE_QUESTION+0x8000,
 	CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER,  // fullwidth
 	CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER,
 	CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,
 	CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER,
 	CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,
 	CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER,

 	CLAUSE_SEMICOLON,  // spare
 	0
@@ -607,7 +607,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
 	attributes = punct_attributes[lookupwchar(punct_chars, c1)];

 	short_pause = CLAUSE_SHORTFALL;
 	if ((attributes & CLAUSE_BITS_INTONATION) == 0x1000)
 	if ((attributes & CLAUSE_INTONATION_TYPE) == 0x1000)
 		short_pause = CLAUSE_SHORTCOMMA;

 	if ((bufix1 > 0) && !(tr->langopts.param[LOPT_ANNOUNCE_PUNCT] & 2)) {
@@ -616,7 +616,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output
 		return short_pause;
 	}

 	if (attributes & CLAUSE_BIT_SENTENCE)
 	if (attributes & CLAUSE_TYPE_SENTENCE)
 		return attributes;

 	return short_pause;
@@ -1038,7 +1038,7 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
 	// a voice change.
 	// If it's a closing tag, delete the top frame of the stack and determine whether this implies
 	// a voice change.
 	// Returns  CLAUSE_BIT_VOICE if there is a voice change
 	// Returns  CLAUSE_TYPE_VOICE_CHANGE if there is a voice change

 	wchar_t *lang;
 	wchar_t *gender;
@@ -1096,7 +1096,7 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
 	if (strcmp(new_voice_id, current_voice_id) != 0) {
 		// add an embedded command to change the voice
 		strcpy(current_voice_id, new_voice_id);
 		return CLAUSE_BIT_VOICE;    // change of voice
 		return CLAUSE_TYPE_VOICE_CHANGE;
 	}

 	return 0;
@@ -1780,7 +1780,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 						buf[ix] = ' ';
 						buf[ix++] = 0;

 						if (terminator & CLAUSE_BIT_VOICE)
 						if (terminator & CLAUSE_TYPE_VOICE_CHANGE)
 							strcpy(voice_change, current_voice_id);
 						return terminator;
 					}
@@ -1993,7 +1993,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 			if ((punct = lookupwchar(punct_chars, c1)) != 0) {
 				punct_data = punct_attributes[punct];

 				if (punct_data & PUNCT_IN_WORD) {
 				if (punct_data & CLAUSE_PUNCTUATION_IN_WORD) {
 					// Armenian punctuation inside a word
 					stressed_word = 1;
 					*tone_type = punct_data >> 12 & 0xf; // override the end-of-sentence type
@@ -2019,7 +2019,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 				}
 			}

 			if ((punct_data & PUNCT_SAY_NAME) && (announced_punctuation == 0)) {
 			if ((punct_data & CLAUSE_SPEAK_PUNCTUATION_NAME) && (announced_punctuation == 0)) {
 				// used for elipsis (and 3 dots) if a pronunciation for elipsis is given in *_list
 				char *p2;

@@ -2028,7 +2028,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 				if (p2[0] != 0) {
 					ix += strlen(p2);
 					announced_punctuation = c1;
 					punct_data = punct_data & ~CLAUSE_BITS_INTONATION; // change intonation type to 0 (full-stop)
 					punct_data = punct_data & ~CLAUSE_INTONATION_TYPE; // change intonation type to 0 (full-stop)
 				}
 			}

@@ -2045,7 +2045,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 				}

 				if ((c1 == '.') && (nl_count < 2))
 					punct_data |= CLAUSE_DOT;
 					punct_data |= CLAUSE_DOT_AFTER_LAST_WORD;

 				if (nl_count == 0) {
 					if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
@@ -2095,7 +2095,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 					buf[ix+1] = 0;

 					if (iswdigit(cprev) && !IsAlpha(c_next)) // ????
 						punct_data &= ~CLAUSE_DOT;
 						punct_data &= ~CLAUSE_DOT_AFTER_LAST_WORD;
 					if (nl_count > 1) {
 						if ((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
 							return punct_data + 35; // with a longer pause
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -2004,7 +2004,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 			clause_pause = 0;

 		if (new_sentence)
 			terminator |= CLAUSE_BIT_SENTENCE; // carry forward an end-of-sentence indicator
 			terminator |= CLAUSE_TYPE_SENTENCE; // carry forward an end-of-sentence indicator
 		max_clause_pause += clause_pause;
 		new_sentence2 = 0;
 	} else {
@@ -2484,7 +2484,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 		words[ix].flags |= FLAG_LAST_WORD;

 		// FLAG_NOSPACE check to avoid recognizing  .mr  -mr
 		if ((terminator & CLAUSE_DOT) && !(words[word_count-1].flags & FLAG_NOSPACE))
 		if ((terminator & CLAUSE_DOT_AFTER_LAST_WORD) && !(words[word_count-1].flags & FLAG_NOSPACE))
 			words[word_count-1].flags |= FLAG_HAS_DOT;
 	}
 	words[0].flags |= FLAG_FIRST_WORD;
@@ -2612,7 +2612,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 				}
 			}

 			if ((dict_flags & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (ix == word_count - 1 - dictionary_skipwords) && (terminator & CLAUSE_DOT)) {
 			if ((dict_flags & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (ix == word_count - 1 - dictionary_skipwords) && (terminator & CLAUSE_DOT_AFTER_LAST_WORD)) {
 				// probably an abbreviation such as Mr. or B. rather than end of sentence
 				clause_pause = 10;
 				tone = 4;
@@ -2665,12 +2665,12 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 		*tone_out = tone;

 	new_sentence = 0;
 	if (terminator & CLAUSE_BIT_SENTENCE)
 	if (terminator & CLAUSE_TYPE_SENTENCE)
 		new_sentence = 1; // next clause is a new sentence

 	if (voice_change != NULL) {
 		// return new voice name if an embedded voice change command terminated the clause
 		if (terminator & CLAUSE_BIT_VOICE)
 		if (terminator & CLAUSE_TYPE_VOICE_CHANGE)
 			*voice_change = voice_change_name;
 		else
 			*voice_change = NULL;
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -198,38 +198,44 @@ extern "C"
 #define LETTERGP_Y      6
 #define LETTERGP_VOWEL2 7

 // Punctuation types  returned by ReadClause()
 // bits 0-11 pause x 10mS
 // bits12-14 intonation type
 // bit 15- don't need space after the punctuation
 // bit 19=sentence, bit 18=clause,  bits 17=voice change
 // bit 16 used to distinguish otherwise identical types
 // bit 20= punctuation character can be inside a word (Armenian)
 // bit 21= speak the name of the punctuation character
 // bit 22= dot after the last word
 // bit 23= pause is x 320mS (not x 10mS)

 #define CLAUSE_BIT_SENTENCE     0x80000
 #define CLAUSE_BIT_CLAUSE       0x40000
 #define CLAUSE_BIT_VOICE        0x20000
 #define CLAUSE_BITS_INTONATION   0x7000
 #define PUNCT_IN_WORD          0x100000
 #define PUNCT_SAY_NAME         0x200000
 #define CLAUSE_DOT             0x400000
 #define CLAUSE_PAUSE_LONG      0x800000

 #define CLAUSE_NONE        ( 0 + 0x04000)
 #define CLAUSE_PARAGRAPH   (70 + 0x80000)
 #define CLAUSE_EOF         (40 + 0x90000)
 #define CLAUSE_VOICE       ( 0 + 0x24000)
 #define CLAUSE_PERIOD      (40 + 0x80000)
 #define CLAUSE_COMMA       (20 + 0x41000)
 #define CLAUSE_SHORTCOMMA  ( 4 + 0x41000)
 #define CLAUSE_SHORTFALL   ( 4 + 0x40000)
 #define CLAUSE_QUESTION    (40 + 0x82000)
 #define CLAUSE_EXCLAMATION (45 + 0x83000)
 #define CLAUSE_COLON       (30 + 0x40000)
 #define CLAUSE_SEMICOLON   (30 + 0x41000)
 // Punctuation types returned by ReadClause()
 //@{

 #define CLAUSE_PAUSE                  0x00000FFF // pause (x 10mS)
 #define CLAUSE_INTONATION_TYPE        0x00007000 // intonation type
 #define CLAUSE_OPTIONAL_SPACE_AFTER   0x00008000 // don't need space after the punctuation
 #define CLAUSE_TYPE                   0x000F0000 // phrase type
 #define CLAUSE_PUNCTUATION_IN_WORD    0x00100000 // punctuation character can be inside a word (Armenian)
 #define CLAUSE_SPEAK_PUNCTUATION_NAME 0x00200000 // speak the name of the punctuation character
 #define CLAUSE_DOT_AFTER_LAST_WORD    0x00400000 // dot after the last word
 #define CLAUSE_PAUSE_LONG             0x00800000 // x 320mS to the CLAUSE_PAUSE value

 #define CLAUSE_INTONATION_FULL_STOP   0x00000000
 #define CLAUSE_INTONATION_COMMA       0x00001000
 #define CLAUSE_INTONATION_QUESTION    0x00002000
 #define CLAUSE_INTONATION_EXCLAMATION 0x00003000
 #define CLAUSE_INTONATION_NONE        0x00004000

 #define CLAUSE_TYPE_NONE              0x00000000
 #define CLAUSE_TYPE_EOF               0x00010000
 #define CLAUSE_TYPE_VOICE_CHANGE      0x00020000
 #define CLAUSE_TYPE_CLAUSE            0x00040000
 #define CLAUSE_TYPE_SENTENCE          0x00080000

 #define CLAUSE_NONE        ( 0 | CLAUSE_INTONATION_NONE        | CLAUSE_TYPE_NONE)
 #define CLAUSE_PARAGRAPH   (70 | CLAUSE_INTONATION_FULL_STOP   | CLAUSE_TYPE_SENTENCE)
 #define CLAUSE_EOF         (40 | CLAUSE_INTONATION_FULL_STOP   | CLAUSE_TYPE_SENTENCE | CLAUSE_TYPE_EOF)
 #define CLAUSE_VOICE       ( 0 | CLAUSE_INTONATION_NONE        | CLAUSE_TYPE_VOICE_CHANGE)
 #define CLAUSE_PERIOD      (40 | CLAUSE_INTONATION_FULL_STOP   | CLAUSE_TYPE_SENTENCE)
 #define CLAUSE_COMMA       (20 | CLAUSE_INTONATION_COMMA       | CLAUSE_TYPE_CLAUSE)
 #define CLAUSE_SHORTCOMMA  ( 4 | CLAUSE_INTONATION_COMMA       | CLAUSE_TYPE_CLAUSE)
 #define CLAUSE_SHORTFALL   ( 4 | CLAUSE_INTONATION_FULL_STOP   | CLAUSE_TYPE_CLAUSE)
 #define CLAUSE_QUESTION    (40 | CLAUSE_INTONATION_QUESTION    | CLAUSE_TYPE_SENTENCE)
 #define CLAUSE_EXCLAMATION (45 | CLAUSE_INTONATION_EXCLAMATION | CLAUSE_TYPE_SENTENCE)
 #define CLAUSE_COLON       (30 | CLAUSE_INTONATION_FULL_STOP   | CLAUSE_TYPE_CLAUSE)
 #define CLAUSE_SEMICOLON   (30 | CLAUSE_INTONATION_COMMA       | CLAUSE_TYPE_CLAUSE)

 //@}

 #define SAYAS_CHARS        0x12
 #define SAYAS_GLYPHS       0x13