Browse Source

ucd-tools: Comma eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
9ef03b8ac8

+ 31
- 2
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -33,6 +33,7 @@
2488..249B ; Full_Stop # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP
2CF9 ; Full_Stop # Po COPTIC OLD NUBIAN FULL STOP
2CFE ; Full_Stop # Po COPTIC FULL STOP
2E33 ; Full_Stop # Po RAISED DOT
2E3C ; Full_Stop # Po STENOGRAPHIC FULL STOP
3002 ; Full_Stop # Po IDEOGRAPHIC FULL STOP
A4FF ; Full_Stop # Po LISU PUNCTUATION FULL STOP
@@ -48,7 +49,7 @@ FF61 ; Full_Stop # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1F100 ; Full_Stop # No DIGIT ZERO FULL STOP
E002E ; Full_Stop # Cf TAG FULL STOP

# Total code points: 48
# Total code points: 49

# ================================================

@@ -89,6 +90,34 @@ FE15 ; Exclamation_Mark # Po PRESENTATION FORM FOR VERTICAL EXCLA
FE57 ; Exclamation_Mark # Po SMALL EXCLAMATION MARK
FF01 ; Exclamation_Mark # Po FULLWIDTH EXCLAMATION MARK
1E95E ; Exclamation_Mark # Po ADLAM INITIAL EXCLAMATION MARK
E0021 ; Exclamation_Mark # Po TAG EXCLAMATION MARK
E0021 ; Exclamation_Mark # Cf TAG EXCLAMATION MARK

# Total code points: 16

# ================================================

002C ; Comma # Po COMMA
055D ; Comma # Po ARMENIAN COMMA
060C ; Comma # Po ARABIC COMMA
07F8 ; Comma # Po NKO COMMA
0F14 ; Comma # Po TIBETAN MARK GTER TSHEG
1363 ; Comma # Po ETHIOPIC COMMA
1802 ; Comma # Po MONGOLIAN COMMA
1808 ; Comma # Po MONGOLIAN MANCHU COMMA
2E32 ; Comma # Po TURNED COMMA
2E34 ; Comma # Po RAISED COMMA
2E41 ; Comma # Po REVERSED COMMA
3001 ; Comma # Po IDEOGRAPHIC COMMA
A4FE ; Comma # Po LISU PUNCTUATION COMMA
A60D ; Comma # Po VAI COMMA
A6F5 ; Comma # Po BAMUM COMMA
FE10..FE11 ; Comma # Po [2] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA
FE50..FE51 ; Comma # Po [2] SMALL COMMA..SMALL IDEOGRAPHIC COMMA
FF0C ; Comma # Po FULLWIDTH COMMA
FF64 ; Comma # Po HALFWIDTH IDEOGRAPHIC COMMA
1144D ; Comma # Po NEWA COMMA
1DA87 ; Comma # Po SIGNWRITING COMMA
1F101..1F10A ; Comma # No [10] DIGIT ZERO COMMA..DIGIT NINE COMMA
E002C ; Comma # Cf TAG COMMA

# Total code points: 34

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x00
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_COMMA = 0x1000000000000000ull; /**< @brief Comma */
static const ucd_property ESPEAKNG_PROPERTY_EXCLAMATION_MARK = 0x2000000000000000ull; /**< @brief Exclamation_Mark */
static const ucd_property ESPEAKNG_PROPERTY_QUESTION_MARK = 0x4000000000000000ull; /**< @brief Question_Mark */
static const ucd_property ESPEAKNG_PROPERTY_FULL_STOP = 0x8000000000000000ull; /**< @brief Full_Stop */

+ 29
- 16
src/ucd-tools/src/proplist.c View File

@@ -60,6 +60,7 @@ static ucd_property properties_Cf(codepoint_t c)
case 0x0E0000:
if (c == 0x0E0001) return UCD_PROPERTY_DEPRECATED;
if (c == 0x0E0021) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0E002C) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0E003F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
@@ -1204,6 +1205,7 @@ static ucd_property properties_No(codepoint_t c)
break;
case 0x01F100:
if (c == 0x01F100) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x01F101 && c <= 0x01F10A) return ESPEAKNG_PROPERTY_COMMA;
break;
}
return 0;
@@ -1329,7 +1331,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x003A && c <= 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
@@ -1343,12 +1345,13 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x0500:
if (c >= 0x055B && c <= 0x055C) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x055D) return ESPEAKNG_PROPERTY_COMMA;
if (c == 0x055F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x0600:
if (c == 0x060C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x060C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x061B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
@@ -1358,7 +1361,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x0701 && c <= 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0703 && c <= 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x070C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F8) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F8) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x07F9) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
break;
case 0x0800:
@@ -1379,6 +1382,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0F08) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F0D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0F0E && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F14) return ESPEAKNG_PROPERTY_COMMA;
break;
case 0x1000:
if (c >= 0x104A && c <= 0x104B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1386,7 +1390,8 @@ static ucd_property properties_Po(codepoint_t c)
case 0x1300:
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x1363 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1363) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c >= 0x1364 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
@@ -1402,8 +1407,9 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x1800:
if (c == 0x1803) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x1802 && c <= 0x1805) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1808) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1802) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c >= 0x1803 && c <= 0x1805) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1808) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x1809) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x180A) return UCD_PROPERTY_EXTENDER;
break;
@@ -1448,25 +1454,30 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x2E00:
if (c == 0x2E2E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2E32) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x2E33) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2E34) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x2E3C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2E41) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2E41) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000:
if (c == 0x3001) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3001) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x3002) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x3003) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x30FB) return UCD_PROPERTY_HYPHEN;
break;
case 0xA400:
if (c == 0xA4FE) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA4FE) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xA4FF) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0xA600:
if (c == 0xA60D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA60D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xA60E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xA6F4 && c <= 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F4) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F5) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
case 0xA800:
@@ -1488,11 +1499,12 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xABEB) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0xFE00:
if (c >= 0xFE10 && c <= 0xFE11) return ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE15) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE16) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE54 && c <= 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE56) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
@@ -1504,14 +1516,14 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xFF01) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFF02) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF07) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF0C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF0C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFF0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF3C) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFF65) return UCD_PROPERTY_HYPHEN;
if (c >= 0xFF1A && c <= 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
break;
case 0x10300:
if (c == 0x01039F) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1550,7 +1562,7 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x11400:
if (c >= 0x01144B && c <= 0x01144C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x01144D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x01144D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x01145B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x11500:
@@ -1586,8 +1598,9 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x01BC9F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x1DA00:
if (c == 0x01DA87) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x01DA87 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x01DA89 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1E900:
if (c == 0x01E95E) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 60) * data.get('Comma', 0)
props += (2 ** 61) * data.get('Exclamation_Mark', 0)
props += (2 ** 62) * data.get('Question_Mark', 0)
props += (2 ** 63) * data.get('Full_Stop', 0)

Loading…
Cancel
Save