Browse Source

ucd-tools: Extended_Dash eSpeakNG extended property support; use in clause_type_from_codepoint.

master
Reece H. Dunn 8 years ago
parent
commit
b932f3c493

+ 2
- 5
src/libespeak-ng/tokenizer.c View File

@@ -38,8 +38,6 @@
static const unsigned short punct_chars[] = {
0x00a1, // inverted exclamation
0x00bf, // inverted question
0x2013, // en-dash
0x2014, // em-dash

0x0964, // Devanagari Danda (fullstop)

@@ -68,8 +66,6 @@ static const unsigned short punct_chars[] = {
static const unsigned int punct_attributes[] = {
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question
CLAUSE_SEMICOLON, // en-dash
CLAUSE_SEMICOLON, // em-dash

CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop)

@@ -94,7 +90,7 @@ static const unsigned int punct_attributes[] = {
0
};

#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFF00000000000000ull
#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull

int clause_type_from_codepoint(uint32_t c)
{
@@ -119,6 +115,7 @@ int clause_type_from_codepoint(uint32_t c)
case ESPEAKNG_PROPERTY_COLON:
return CLAUSE_COLON;
case ESPEAKNG_PROPERTY_SEMI_COLON:
case ESPEAKNG_PROPERTY_EXTENDED_DASH:
return CLAUSE_SEMICOLON;
case ESPEAKNG_PROPERTY_ELLIPSIS:
return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER;

+ 8
- 0
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -180,3 +180,11 @@ FE19 ; Ellipsis # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL EL
2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR

# Total code points: 6

# ================================================

2013..2014 ; Extended_Dash # Pd [2] EN DASH..EM DASH
2E3A..2E3B ; Extended_Dash # Pd [2] TWO-EM DASH..THREE-EM DASH
FE31..FE32 ; Extended_Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH

# Total code points: 6

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ typedef uint64_t ucd_property;
#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */
#define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */
#define ESPEAKNG_PROPERTY_ELLIPSIS 0x0200000000000000ull /**< @brief Ellipsis */
#define ESPEAKNG_PROPERTY_SEMI_COLON 0x0400000000000000ull /**< @brief Semi_Colon */

+ 3
- 0
src/ucd-tools/src/proplist.c View File

@@ -1239,15 +1239,18 @@ static ucd_property properties_Pd(codepoint_t c)
break;
case 0x2000:
if (c >= 0x2010 && c <= 0x2011) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2013 && c <= 0x2014) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH;
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2E00:
if (c == 0x2E17) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2E3A && c <= 0x2E3B) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXTENDED_DASH;
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000:
if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0xFE00:
if (c >= 0xFE31 && c <= 0xFE32) return UCD_PROPERTY_DASH | ESPEAKNG_PROPERTY_EXTENDED_DASH;
if (c == 0xFE63) return UCD_PROPERTY_DASH | UCD_PROPERTY_HYPHEN | UCD_PROPERTY_OTHER_MATH;
break;
case 0xFF00:

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 55) * data.get('Extended_Dash', 0)
props += (2 ** 56) * data.get('Paragraph_Separator', 0)
props += (2 ** 57) * data.get('Ellipsis', 0)
props += (2 ** 58) * data.get('Semi_Colon', 0)

Loading…
Cancel
Save