| 
				
			 | 
			
			 | 
			@@ -36,41 +36,20 @@ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			// punctuations symbols that can end a clause | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static const unsigned short punct_chars[] = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				',', '.', '?', '!', ':', ';', | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x00a1, // inverted exclamation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x00bf, // inverted question | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x2013, // en-dash | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x2014, // em-dash | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x2026, // elipsis | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x037e, // Greek question mark (looks like semicolon) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0387, // Greek semicolon, ano teleia | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0964, // Devanagari Danda (fullstop) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0589, // Armenian period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x055d, // Armenian comma | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x055c, // Armenian exclamation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x055e, // Armenian question | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x055b, // Armenian emphasis mark | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x060c, // Arabic , | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x061b, // Arabic ; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x061f, // Arabic ? | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x06d4, // Arabic . | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0df4, // Singhalese Kunddaliya | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0f0d, // Tibet Shad | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x0f0e, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1362, // Ethiopic period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1363, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1364, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1365, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1366, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1367, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x1368, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x10fb, // Georgian paragraph | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x3001, // ideograph comma | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0x3002, // ideograph period | 
		
		
	
	
		
			
			| 
				
			 | 
			
			 | 
			@@ -87,46 +66,20 @@ static const unsigned short punct_chars[] = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			// indexed by entry num. in punct_chars | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			static const unsigned int punct_attributes[] = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COMMA, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_QUESTION, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_EXCLAMATION, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COLON, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,  // inverted exclamation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER,  // inverted question | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON,  // en-dash | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON,  // em-dash | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER,  // elipsis | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_QUESTION,  // Greek question mark | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON,  // Greek semicolon | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Devanagari Danda (fullstop) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Armenian period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COMMA,  // Armenian comma | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian exclamation | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian question | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD,  // Armenian emphasis mark | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COMMA,  // Arabic , | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON,  // Arabic ; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_QUESTION,  // Arabic question mark | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD,  // Arabic full stop | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Singhalese period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // Tibet period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PARAGRAPH, | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD,  // Ethiopic period | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COMMA,  // Ethiopic comma | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_SEMICOLON,  // Ethiopic semicolon | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COLON,  // Ethiopic colon | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COLON,  // Ethiopic preface colon | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_QUESTION,  // Ethiopic question mark | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PARAGRAPH,  // Ethiopic paragraph | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PARAGRAPH,  // Georgian paragraph | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER,  // ideograph comma | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER,  // ideograph period | 
		
		
	
	
		
			
			| 
				
			 | 
			
			 | 
			@@ -141,11 +94,37 @@ static const unsigned int punct_attributes[] = { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				0 | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			}; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFF00000000000000ull | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			int clause_type_from_codepoint(uint32_t c) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				ucd_category cat = ucd_lookup_category(c); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				ucd_property props = ucd_properties(c, cat); | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				for (int ix = 0; punct_chars[ix] != 0; ++ix) { | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					if (punct_chars[ix] == c) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
						return punct_attributes[ix]; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK) | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				{ | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_FULL_STOP: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_PERIOD; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_QUESTION_MARK: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_QUESTION; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_EXCLAMATION_MARK: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_EXCLAMATION; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_COMMA: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_COMMA; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_COLON: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_COLON; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_SEMI_COLON: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_SEMICOLON; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_ELLIPSIS: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				case ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR: | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
					return CLAUSE_PARAGRAPH; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				} | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			
  | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
				return CLAUSE_NONE; | 
		
		
	
		
			
			 | 
			 | 
			
			 | 
			} |