static const unsigned short punct_chars[] = { | static const unsigned short punct_chars[] = { | ||||
0x00a1, // inverted exclamation | 0x00a1, // inverted exclamation | ||||
0x00bf, // inverted question | 0x00bf, // inverted question | ||||
0x2013, // en-dash | |||||
0x2014, // em-dash | |||||
0x0964, // Devanagari Danda (fullstop) | 0x0964, // Devanagari Danda (fullstop) | ||||
static const unsigned int punct_attributes[] = { | static const unsigned int punct_attributes[] = { | ||||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation | CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation | ||||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question | CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question | ||||
CLAUSE_SEMICOLON, // en-dash | |||||
CLAUSE_SEMICOLON, // em-dash | |||||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop) | CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop) | ||||
0 | 0 | ||||
}; | }; | ||||
#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFF00000000000000ull | |||||
#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull | |||||
int clause_type_from_codepoint(uint32_t c) | int clause_type_from_codepoint(uint32_t c) | ||||
{ | { | ||||
case ESPEAKNG_PROPERTY_COLON: | case ESPEAKNG_PROPERTY_COLON: | ||||
return CLAUSE_COLON; | return CLAUSE_COLON; | ||||
case ESPEAKNG_PROPERTY_SEMI_COLON: | case ESPEAKNG_PROPERTY_SEMI_COLON: | ||||
case ESPEAKNG_PROPERTY_EXTENDED_DASH: | |||||
return CLAUSE_SEMICOLON; | return CLAUSE_SEMICOLON; | ||||
case ESPEAKNG_PROPERTY_ELLIPSIS: | case ESPEAKNG_PROPERTY_ELLIPSIS: | ||||
return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; | return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; |
2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR | 2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR | ||||
# Total code points: 6 | # Total code points: 6 | ||||
# ================================================ | |||||
2013..2014 ; Extended_Dash # Pd [2] EN DASH..EM DASH | |||||
2E3A..2E3B ; Extended_Dash # Pd [2] TWO-EM DASH..THREE-EM DASH | |||||
FE31..FE32 ; Extended_Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH | |||||
# Total code points: 6 |
#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ | #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ | ||||
// eSpeak NG extended properties: | // eSpeak NG extended properties: | ||||
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */ | |||||
#define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */ | #define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */ | ||||
#define ESPEAKNG_PROPERTY_ELLIPSIS 0x0200000000000000ull /**< @brief Ellipsis */ | #define ESPEAKNG_PROPERTY_ELLIPSIS 0x0200000000000000ull /**< @brief Ellipsis */ | ||||
#define ESPEAKNG_PROPERTY_SEMI_COLON 0x0400000000000000ull /**< @brief Semi_Colon */ | #define ESPEAKNG_PROPERTY_SEMI_COLON 0x0400000000000000ull /**< @brief Semi_Colon */ |
break; | break; | ||||
case 0x2000: | case 0x2000: | ||||
if (c >= 0x2010 && c <= 0x2011) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2010 && c <= 0x2011) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x2013 && c <= 0x2014) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH; | |||||
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x2E00: | case 0x2E00: | ||||
if (c == 0x2E17) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x2E17) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x2E3A && c <= 0x2E3B) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH; | |||||
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x3000: | case 0x3000: | ||||
if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
break; | break; | ||||
case 0xFE00: | case 0xFE00: | ||||
if (c >= 0xFE31 && c <= 0xFE32) return UCD_PROPERTY_DASH | ESPEAKNG_PROPERTY_EXTENDED_DASH; | |||||
if (c == 0xFE63) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_OTHER_MATH; | if (c == 0xFE63) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_OTHER_MATH; | ||||
break; | break; | ||||
case 0xFF00: | case 0xFF00: |
props += (2 ** 31) * data.get('Pattern_Syntax', 0) | props += (2 ** 31) * data.get('Pattern_Syntax', 0) | ||||
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) | props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) | ||||
# eSpeak NG extended properties: | # eSpeak NG extended properties: | ||||
props += (2 ** 55) * data.get('Extended_Dash', 0) | |||||
props += (2 ** 56) * data.get('Paragraph_Separator', 0) | props += (2 ** 56) * data.get('Paragraph_Separator', 0) | ||||
props += (2 ** 57) * data.get('Ellipsis', 0) | props += (2 ** 57) * data.get('Ellipsis', 0) | ||||
props += (2 ** 58) * data.get('Semi_Colon', 0) | props += (2 ** 58) * data.get('Semi_Colon', 0) |