Browse Source

ucd-tools: Exclamation_Mark eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
5017153d62

+ 25
- 6
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -1,5 +1,5 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 12:49:00 GMT
# Date: 2017-04-17, 13:47:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
@@ -59,10 +59,9 @@ E002E ; Full_Stop # Cf TAG FULL STOP
061F ; Question_Mark # Po ARABIC QUESTION MARK
1367 ; Question_Mark # Po ETHIOPIC QUESTION MARK
1945 ; Question_Mark # Po LIMBU QUESTION MARK
2047 ; Question_Mark # Po DOUBLE QUESTION MARK
2049 ; Question_Mark # Po EXCLAMATION QUESTION MARK
2753..2754 ; Question_Mark # So BLACK QUESTION MARK ORNAMENT..WHITE QUESTION MARK ORNAMENT
2CFA..2CFB ; Question_Mark # Po COPTIC OLD NUBIAN DIRECT QUESTION MARK..COPTIC OLD NUBIAN INDIRECT QUESTION MARK
2047..2049 ; Question_Mark # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2753..2754 ; Question_Mark # So [2] BLACK QUESTION MARK ORNAMENT..WHITE QUESTION MARK ORNAMENT
2CFA..2CFB ; Question_Mark # Po [2] COPTIC OLD NUBIAN DIRECT QUESTION MARK..COPTIC OLD NUBIAN INDIRECT QUESTION MARK
A60F ; Question_Mark # Po VAI QUESTION MARK
A6F7 ; Question_Mark # Po BAMUM QUESTION MARK
FE16 ; Question_Mark # Po PRESENTATION FORM FOR VERTICAL QUESTION MARK
@@ -72,4 +71,24 @@ FF1F ; Question_Mark # Po FULLWIDTH QUESTION MARK
1E95F ; Question_Mark # Po ADLAM INITIAL QUESTION MARK
E003F ; Question_Mark # Cf TAG QUESTION MARK

# Total code points: 21
# Total code points: 22

# ================================================

0021 ; Exclamation_Mark # Po EXCLAMATION MARK
00A1 ; Exclamation_Mark # Po INVERTED EXCLAMATION MARK
055B..055C ; Exclamation_Mark # Po [2] ARMENIAN EMPHASIS MARK..ARMENIAN EXCLAMATION MARK
07F9 ; Exclamation_Mark # Po NKO EXCLAMATION MARK
1944 ; Exclamation_Mark # Po LIMBU EXCLAMATION MARK
203C ; Exclamation_Mark # Po DOUBLE EXCLAMATION MARK
2048..2049 ; Exclamation_Mark # Po [2] QUESTION EXCLAMATION MARK..EXCLAMATION QUESTION MARK
2755 ; Exclamation_Mark # So WHITE EXCLAMATION MARK ORNAMENT
2757 ; Exclamation_Mark # So HEAVY EXCLAMATION MARK SYMBOL
2762..2763 ; Exclamation_Mark # So [2] HEAVY EXCLAMATION MARK ORNAMENT..HEAVY HEART EXCLAMATION MARK ORNAMENT
FE15 ; Exclamation_Mark # Po PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK
FE57 ; Exclamation_Mark # Po SMALL EXCLAMATION MARK
FF01 ; Exclamation_Mark # Po FULLWIDTH EXCLAMATION MARK
1E95E ; Exclamation_Mark # Po ADLAM INITIAL EXCLAMATION MARK
E0021 ; Exclamation_Mark # Po TAG EXCLAMATION MARK

# Total code points: 16

+ 2
- 1
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -362,7 +362,8 @@ static const ucd_property UCD_PROPERTY_PATTERN_WHITE_SPACE = 0x00
static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x0000000080000000ull; /**< @brief Pattern_Syntax */
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties (PropListExt)
// eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_EXCLAMATION_MARK = 0x2000000000000000ull; /**< @brief Exclamation_Mark */
static const ucd_property ESPEAKNG_PROPERTY_QUESTION_MARK = 0x4000000000000000ull; /**< @brief Question_Mark */
static const ucd_property ESPEAKNG_PROPERTY_FULL_STOP = 0x8000000000000000ull; /**< @brief Full_Stop */


+ 16
- 8
src/ucd-tools/src/proplist.c View File

@@ -59,6 +59,7 @@ static ucd_property properties_Cf(codepoint_t c)
break;
case 0x0E0000:
if (c == 0x0E0001) return UCD_PROPERTY_DEPRECATED;
if (c == 0x0E0021) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0E003F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
@@ -1325,13 +1326,14 @@ static ucd_property properties_Po(codepoint_t c)
switch (c & 0xFFFFFF00)
{
case 0x0000:
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x003A && c <= 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1340,6 +1342,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE;
break;
case 0x0500:
if (c >= 0x055B && c <= 0x055C) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x055F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1356,7 +1359,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x0703 && c <= 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x070C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F8) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F9) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x07F9) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
break;
case 0x0800:
if (c >= 0x0830 && c <= 0x083E) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1405,7 +1408,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x180A) return UCD_PROPERTY_EXTENDER;
break;
case 0x1900:
if (c == 0x1944) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1944) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x1945) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
case 0x1A00:
@@ -1428,12 +1431,12 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x2020 && c <= 0x2027) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2032 && c <= 0x2034) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2030 && c <= 0x2038) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x203C && c <= 0x203D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x203C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x203D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x203B && c <= 0x203E) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2041 && c <= 0x2043) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2047) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x2048) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x2048 && c <= 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c >= 0x204A && c <= 0x2051) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH;
if (c >= 0x2055 && c <= 0x205E) return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1486,18 +1489,19 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0xFE00:
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFE15) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE16) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE54 && c <= 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE56) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFE61) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFE68) return UCD_PROPERTY_OTHER_MATH;
break;
case 0xFF00:
if (c == 0xFF01) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFF01) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0xFF02) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF07) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF0C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1586,6 +1590,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c >= 0x01DA87 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1E900:
if (c == 0x01E95E) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x01E95F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
break;
}
@@ -1793,6 +1798,9 @@ static ucd_property properties_So(codepoint_t c)
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2700:
if (c >= 0x2753 && c <= 0x2754) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x2755) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2757) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c >= 0x2762 && c <= 0x2763) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2800:
case 0x2B00:

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 61) * data.get('Exclamation_Mark', 0)
props += (2 ** 62) * data.get('Question_Mark', 0)
props += (2 ** 63) * data.get('Full_Stop', 0)
return props

Loading…
Cancel
Save