Browse Source

ucd-tools: Colon eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
9869ee051e

+ 25
- 5
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -1,5 +1,5 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 13:47:00 GMT
# Date: 2017-04-17, 16:18:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
@@ -22,7 +22,7 @@
0589 ; Full_Stop # Po ARMENIAN FULL STOP
06D4 ; Full_Stop # Po ARABIC FULL STOP
0701 ; Full_Stop # Po SYRIAC SUPRALINEAR FULL STOP
0702 ; Full_Stop # Po SYRIAC SUBLINEAR FULL STOP
0704 ; Full_Stop # Po SYRIAC SUBLINEAR COLON
0964 ; Full_Stop # Po DEVANAGARI DANDA
0DF4 ; Full_Stop # Po SINHALA PUNCTUATION KUNDDALIYA
0F0D ; Full_Stop # Po TIBETAN MARK SHAD
@@ -58,6 +58,7 @@ E002E ; Full_Stop # Cf TAG FULL STOP
037E ; Question_Mark # Po GREEK QUESTION MARK
055F ; Question_Mark # Po ARMENIAN QUESTION MARK
061F ; Question_Mark # Po ARABIC QUESTION MARK
0709 ; Question_Mark # Po SYRIAC SUBLINEAR COLON SKEWED RIGHT
1367 ; Question_Mark # Po ETHIOPIC QUESTION MARK
1945 ; Question_Mark # Po LIMBU QUESTION MARK
2047..2049 ; Question_Mark # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
@@ -72,13 +73,14 @@ FF1F ; Question_Mark # Po FULLWIDTH QUESTION MARK
1E95F ; Question_Mark # Po ADLAM INITIAL QUESTION MARK
E003F ; Question_Mark # Cf TAG QUESTION MARK

# Total code points: 22
# Total code points: 23

# ================================================

0021 ; Exclamation_Mark # Po EXCLAMATION MARK
00A1 ; Exclamation_Mark # Po INVERTED EXCLAMATION MARK
055B..055C ; Exclamation_Mark # Po [2] ARMENIAN EMPHASIS MARK..ARMENIAN EXCLAMATION MARK
0703 ; Exclamation_Mark # Po SYRIAC SUPRALINEAR COLON
07F9 ; Exclamation_Mark # Po NKO EXCLAMATION MARK
1944 ; Exclamation_Mark # Po LIMBU EXCLAMATION MARK
203C ; Exclamation_Mark # Po DOUBLE EXCLAMATION MARK
@@ -92,13 +94,14 @@ FF01 ; Exclamation_Mark # Po FULLWIDTH EXCLAMATION MARK
1E95E ; Exclamation_Mark # Po ADLAM INITIAL EXCLAMATION MARK
E0021 ; Exclamation_Mark # Cf TAG EXCLAMATION MARK

# Total code points: 16
# Total code points: 17

# ================================================

002C ; Comma # Po COMMA
055D ; Comma # Po ARMENIAN COMMA
060C ; Comma # Po ARABIC COMMA
0702 ; Comma # Po SYRIAC SUBLINEAR FULL STOP
07F8 ; Comma # Po NKO COMMA
0F14 ; Comma # Po TIBETAN MARK GTER TSHEG
1363 ; Comma # Po ETHIOPIC COMMA
@@ -120,4 +123,21 @@ FF64 ; Comma # Po HALFWIDTH IDEOGRAPHIC COMMA
1F101..1F10A ; Comma # No [10] DIGIT ZERO COMMA..DIGIT NINE COMMA
E002C ; Comma # Cf TAG COMMA

# Total code points: 34
# Total code points: 35

# ================================================

003A ; Colon # Po COLON
0706..0707 ; Colon # Po [2] SYRIAC COLON SKEWED LEFT..SYRIAC COLON SKEWED RIGHT
1365..1366 ; Colon # Po [2] ETHIOPIC COLON..ETHIOPIC PREFACE COLON
1804 ; Colon # Po MONGOLIAN COLON
2982 ; Colon # Sm Z NOTATION TYPE COLON
A6F4 ; Colon # Po BAMUM COLON
FE13 ; Colon # Po PRESENTATION FORM FOR VERTICAL COLON
FE55 ; Colon # Po SMALL COLON
FF1A ; Colon # Po FULLWIDTH COLON
12471..12472 ; Colon # Po [2] CUNEIFORM PUNCTUATION SIGN VERTICAL COLON..CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON
1DA8A ; Colon # Po SIGNWRITING COLON
E003A ; Colon # Cf TAG COLON

# Total code points: 15

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x00
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_COLON = 0x0800000000000000ull; /**< @brief Colon */
static const ucd_property ESPEAKNG_PROPERTY_COMMA = 0x1000000000000000ull; /**< @brief Comma */
static const ucd_property ESPEAKNG_PROPERTY_EXCLAMATION_MARK = 0x2000000000000000ull; /**< @brief Exclamation_Mark */
static const ucd_property ESPEAKNG_PROPERTY_QUESTION_MARK = 0x4000000000000000ull; /**< @brief Question_Mark */

+ 29
- 9
src/ucd-tools/src/proplist.c View File

@@ -62,6 +62,7 @@ static ucd_property properties_Cf(codepoint_t c)
if (c == 0x0E0021) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0E002C) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0E003A) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_COLON;
if (c == 0x0E003F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
@@ -1333,7 +1334,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x003A && c <= 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
@@ -1358,8 +1360,15 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x0700:
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x0701 && c <= 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0703 && c <= 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0701) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0703) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0704) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0705) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x0706 && c <= 0x0707) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x0708) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0709) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x070C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F8) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x07F9) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
@@ -1391,7 +1400,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x1363) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c >= 0x1364 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x1365 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
@@ -1408,7 +1418,9 @@ static ucd_property properties_Po(codepoint_t c)
case 0x1800:
if (c == 0x1803) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x1802) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c >= 0x1803 && c <= 0x1805) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1803) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1804) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x1805) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1808) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x1809) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x180A) return UCD_PROPERTY_EXTENDER;
@@ -1475,7 +1487,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xA60E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA6F4) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xA6F5) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
@@ -1501,12 +1513,14 @@ static ucd_property properties_Po(codepoint_t c)
case 0xFE00:
if (c >= 0xFE10 && c <= 0xFE11) return ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE13) return ESPEAKNG_PROPERTY_COLON;
if (c == 0xFE15) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE16) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE54 && c <= 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE54) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xFE56) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE61) return UCD_PROPERTY_OTHER_MATH;
@@ -1520,7 +1534,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xFF0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF3C) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFF65) return UCD_PROPERTY_HYPHEN;
if (c >= 0xFF1A && c <= 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
@@ -1583,6 +1598,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x011C71) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x12400:
if (c >= 0x012471 && c <= 0x012472) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c >= 0x012470 && c <= 0x012474) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x16A00:
@@ -1600,7 +1616,8 @@ static ucd_property properties_Po(codepoint_t c)
case 0x1DA00:
if (c == 0x01DA87) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x01DA89 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x01DA89) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
break;
case 0x1E900:
if (c == 0x01E95E) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
@@ -1738,7 +1755,10 @@ static ucd_property properties_Sm(codepoint_t c)
case 0x2500:
case 0x2600:
case 0x2700:
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900:
if (c == 0x2982) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2A00:
case 0x2B00:
return UCD_PROPERTY_PATTERN_SYNTAX;

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 59) * data.get('Colon', 0)
props += (2 ** 60) * data.get('Comma', 0)
props += (2 ** 61) * data.get('Exclamation_Mark', 0)
props += (2 ** 62) * data.get('Question_Mark', 0)

Loading…
Cancel
Save