Browse Source

ucd-tools: Question_Mark eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
31d66fddb1

+ 25
- 1
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -1,5 +1,5 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 11:35:00 GMT
# Date: 2017-04-17, 12:49:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
@@ -49,3 +49,27 @@ FF61 ; Full_Stop # Po HALFWIDTH IDEOGRAPHIC FULL STOP
E002E ; Full_Stop # Cf TAG FULL STOP

# Total code points: 48

# ================================================

003F ; Question_Mark # Po QUESTION MARK
00BF ; Question_Mark # Po INVERTED QUESTION MARK
037E ; Question_Mark # Po GREEK QUESTION MARK
055F ; Question_Mark # Po ARMENIAN QUESTION MARK
061F ; Question_Mark # Po ARABIC QUESTION MARK
1367 ; Question_Mark # Po ETHIOPIC QUESTION MARK
1945 ; Question_Mark # Po LIMBU QUESTION MARK
2047 ; Question_Mark # Po DOUBLE QUESTION MARK
2049 ; Question_Mark # Po EXCLAMATION QUESTION MARK
2753..2754 ; Question_Mark # So BLACK QUESTION MARK ORNAMENT..WHITE QUESTION MARK ORNAMENT
2CFA..2CFB ; Question_Mark # Po COPTIC OLD NUBIAN DIRECT QUESTION MARK..COPTIC OLD NUBIAN INDIRECT QUESTION MARK
A60F ; Question_Mark # Po VAI QUESTION MARK
A6F7 ; Question_Mark # Po BAMUM QUESTION MARK
FE16 ; Question_Mark # Po PRESENTATION FORM FOR VERTICAL QUESTION MARK
FE56 ; Question_Mark # Po SMALL QUESTION MARK
FF1F ; Question_Mark # Po FULLWIDTH QUESTION MARK
11143 ; Question_Mark # Po CHAKMA QUESTION MARK
1E95F ; Question_Mark # Po ADLAM INITIAL QUESTION MARK
E003F ; Question_Mark # Cf TAG QUESTION MARK

# Total code points: 21

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x00
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties (PropListExt)
static const ucd_property ESPEAKNG_PROPERTY_QUESTION_MARK = 0x4000000000000000ull; /**< @brief Question_Mark */
static const ucd_property ESPEAKNG_PROPERTY_FULL_STOP = 0x8000000000000000ull; /**< @brief Full_Stop */

/** @brief Return the properties of the specified codepoint.

+ 27
- 11
src/ucd-tools/src/proplist.c View File

@@ -60,6 +60,7 @@ static ucd_property properties_Cf(codepoint_t c)
case 0x0E0000:
if (c == 0x0E0001) return UCD_PROPERTY_DEPRECATED;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0E003F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
}
@@ -1330,21 +1331,23 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x003A && c <= 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x0300:
if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE;
break;
case 0x0500:
if (c == 0x055F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x0600:
if (c == 0x060C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x061F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x0700:
@@ -1381,7 +1384,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x1363 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x1367 && c <= 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x1600:
if (c == 0x166D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1401,7 +1405,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x180A) return UCD_PROPERTY_EXTENDER;
break;
case 0x1900:
if (c >= 0x1944 && c <= 0x1945) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1944) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1945) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
case 0x1A00:
if (c >= 0x1AA8 && c <= 0x1AAB) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1426,13 +1431,16 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x203C && c <= 0x203D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x203B && c <= 0x203E) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2041 && c <= 0x2043) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2047 && c <= 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2047) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x2048) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x204A && c <= 0x2051) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH;
if (c >= 0x2055 && c <= 0x205E) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2C00:
if (c == 0x2CF9) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x2CFA && c <= 0x2CFB) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x2CFE) return ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x2E00:
@@ -1453,10 +1461,10 @@ static ucd_property properties_Po(codepoint_t c)
case 0xA600:
if (c == 0xA60D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA60E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xA6F4 && c <= 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
case 0xA800:
if (c >= 0xA876 && c <= 0xA877) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1478,11 +1486,13 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0xFE00:
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE16) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE54 && c <= 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0xFE56 && c <= 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFE56) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFE61) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFE68) return UCD_PROPERTY_OTHER_MATH;
break;
@@ -1495,7 +1505,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xFF3C) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFF65) return UCD_PROPERTY_HYPHEN;
if (c >= 0xFF1A && c <= 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
@@ -1523,7 +1533,8 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x0110BE && c <= 0x0110C1) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x11100:
if (c >= 0x011141 && c <= 0x011143) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011141 && c <= 0x011142) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x011143) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0111C5 && c <= 0x0111C6) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0111CD) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x0111DE && c <= 0x0111DF) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1574,6 +1585,9 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x01DA87 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1E900:
if (c == 0x01E95F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
}
return 0;
}
@@ -1778,6 +1792,8 @@ static ucd_property properties_So(codepoint_t c)
if (c >= 0x266D && c <= 0x266E) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2700:
if (c >= 0x2753 && c <= 0x2754) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2800:
case 0x2B00:
return UCD_PROPERTY_PATTERN_SYNTAX;

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 62) * data.get('Question_Mark', 0)
props += (2 ** 63) * data.get('Full_Stop', 0)
return props


Loading…
Cancel
Save