Browse Source

ucd-tools: Semi_Colon eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
8d8c8b3b56

+ 19
- 1
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -1,5 +1,5 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 16:18:00 GMT
# Date: 2017-04-17, 17:17:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
@@ -141,3 +141,21 @@ FF1A ; Colon # Po FULLWIDTH COLON
E003A ; Colon # Cf TAG COLON

# Total code points: 15

# ================================================

003B ; Semi_Colon # Po SEMICOLON
0387 ; Semi_Colon # Po GREEK ANO TELEIA
061B ; Semi_Colon # Po ARABIC SEMICOLON
0708 ; Semi_Colon # Po SYRIAC SUPRALINEAR COLON SKEWED LEFT
1364 ; Semi_Colon # Po ETHIOPIC SEMICOLON
204F ; Semi_Colon # Po REVERSED SEMICOLON
2E35 ; Semi_Colon # Po TURNED SEMICOLON
A6F6 ; Semi_Colon # Po BAMUM SEMICOLON
FE14 ; Semi_Colon # Po PRESENTATION FORM FOR VERTICAL SEMICOLON
FE54 ; Semi_Colon # Po SMALL SEMICOLON
FF1B ; Semi_Colon # Po FULLWIDTH SEMICOLON
1DA89 ; Semi_Colon # Po SIGNWRITING SEMICOLON
E003B ; Semi_Colon # Cf TAG SEMICOLON

# Total code points: 13

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x00
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_SEMI_COLON = 0x0400000000000000ull; /**< @brief Semi_Colon */
static const ucd_property ESPEAKNG_PROPERTY_COLON = 0x0800000000000000ull; /**< @brief Colon */
static const ucd_property ESPEAKNG_PROPERTY_COMMA = 0x1000000000000000ull; /**< @brief Comma */
static const ucd_property ESPEAKNG_PROPERTY_EXCLAMATION_MARK = 0x2000000000000000ull; /**< @brief Exclamation_Mark */

+ 13
- 9
src/ucd-tools/src/proplist.c View File

@@ -63,6 +63,7 @@ static ucd_property properties_Cf(codepoint_t c)
if (c == 0x0E002C) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0E003A) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_COLON;
if (c == 0x0E003B) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x0E003F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
@@ -1335,7 +1336,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
@@ -1343,7 +1344,7 @@ static ucd_property properties_Po(codepoint_t c)
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x0300:
if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE;
if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE | ESPEAKNG_PROPERTY_SEMI_COLON;
break;
case 0x0500:
if (c >= 0x055B && c <= 0x055C) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
@@ -1354,7 +1355,7 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x0600:
if (c == 0x060C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x061B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x061F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
@@ -1366,7 +1367,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0704) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0705) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x0706 && c <= 0x0707) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x0708) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0708) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x0709) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x070C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1400,7 +1401,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x1363) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c >= 0x1365 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1455,6 +1456,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x2041 && c <= 0x2043) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2047) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x2048 && c <= 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x204F) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c >= 0x204A && c <= 0x2051) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH;
if (c >= 0x2055 && c <= 0x205E) return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1469,6 +1471,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x2E32) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x2E33) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2E34) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x2E35) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x2E3C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2E41) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1489,7 +1492,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA6F4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xA6F5) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
case 0xA800:
@@ -1514,12 +1517,13 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0xFE10 && c <= 0xFE11) return ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE13) return ESPEAKNG_PROPERTY_COLON;
if (c == 0xFE14) return ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0xFE15) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE16) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE54) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE54) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xFE56) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
@@ -1535,7 +1539,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xFF3C) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFF65) return UCD_PROPERTY_HYPHEN;
if (c == 0xFF1A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
@@ -1616,7 +1620,7 @@ static ucd_property properties_Po(codepoint_t c)
case 0x1DA00:
if (c == 0x01DA87) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x01DA89) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x01DA89) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
break;
case 0x1E900:

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 58) * data.get('Semi_Colon', 0)
props += (2 ** 59) * data.get('Colon', 0)
props += (2 ** 60) * data.get('Comma', 0)
props += (2 ** 61) * data.get('Exclamation_Mark', 0)

Loading…
Cancel
Save