Browse Source

ucd-tools: Extended_Dash eSpeakNG extended property support; use in clause_type_from_codepoint.

master
Reece H. Dunn 8 years ago
parent
commit
b932f3c493

+ 2
- 5
src/libespeak-ng/tokenizer.c View File

static const unsigned short punct_chars[] = { static const unsigned short punct_chars[] = {
0x00a1, // inverted exclamation 0x00a1, // inverted exclamation
0x00bf, // inverted question 0x00bf, // inverted question
0x2013, // en-dash
0x2014, // em-dash


0x0964, // Devanagari Danda (fullstop) 0x0964, // Devanagari Danda (fullstop)


static const unsigned int punct_attributes[] = { static const unsigned int punct_attributes[] = {
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question
CLAUSE_SEMICOLON, // en-dash
CLAUSE_SEMICOLON, // em-dash


CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop) CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop)


0 0
}; };


#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFF00000000000000ull
#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull


int clause_type_from_codepoint(uint32_t c) int clause_type_from_codepoint(uint32_t c)
{ {
case ESPEAKNG_PROPERTY_COLON: case ESPEAKNG_PROPERTY_COLON:
return CLAUSE_COLON; return CLAUSE_COLON;
case ESPEAKNG_PROPERTY_SEMI_COLON: case ESPEAKNG_PROPERTY_SEMI_COLON:
case ESPEAKNG_PROPERTY_EXTENDED_DASH:
return CLAUSE_SEMICOLON; return CLAUSE_SEMICOLON;
case ESPEAKNG_PROPERTY_ELLIPSIS: case ESPEAKNG_PROPERTY_ELLIPSIS:
return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER;

+ 8
- 0
src/ucd-tools/data/espeak-ng/PropList.txt View File

2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR 2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR


# Total code points: 6 # Total code points: 6

# ================================================

2013..2014 ; Extended_Dash # Pd [2] EN DASH..EM DASH
2E3A..2E3B ; Extended_Dash # Pd [2] TWO-EM DASH..THREE-EM DASH
FE31..FE32 ; Extended_Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH

# Total code points: 6

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */
#define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */ #define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */
#define ESPEAKNG_PROPERTY_ELLIPSIS 0x0200000000000000ull /**< @brief Ellipsis */ #define ESPEAKNG_PROPERTY_ELLIPSIS 0x0200000000000000ull /**< @brief Ellipsis */
#define ESPEAKNG_PROPERTY_SEMI_COLON 0x0400000000000000ull /**< @brief Semi_Colon */ #define ESPEAKNG_PROPERTY_SEMI_COLON 0x0400000000000000ull /**< @brief Semi_Colon */

+ 3
- 0
src/ucd-tools/src/proplist.c View File

break; break;
case 0x2000: case 0x2000:
if (c >= 0x2010 && c <= 0x2011) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2010 && c <= 0x2011) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2013 && c <= 0x2014) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH;
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2E00: case 0x2E00:
if (c == 0x2E17) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x2E17) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2E3A && c <= 0x2E3B) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH;
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000: case 0x3000:
if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0xFE00: case 0xFE00:
if (c >= 0xFE31 && c <= 0xFE32) return UCD_PROPERTY_DASH | ESPEAKNG_PROPERTY_EXTENDED_DASH;
if (c == 0xFE63) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_OTHER_MATH; if (c == 0xFE63) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_OTHER_MATH;
break; break;
case 0xFF00: case 0xFF00:

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 31) * data.get('Pattern_Syntax', 0) props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 55) * data.get('Extended_Dash', 0)
props += (2 ** 56) * data.get('Paragraph_Separator', 0) props += (2 ** 56) * data.get('Paragraph_Separator', 0)
props += (2 ** 57) * data.get('Ellipsis', 0) props += (2 ** 57) * data.get('Ellipsis', 0)
props += (2 ** 58) * data.get('Semi_Colon', 0) props += (2 ** 58) * data.get('Semi_Colon', 0)

Loading…
Cancel
Save