Browse Source

ucd-tools: Paragraph_Separator eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
2e375362c4

+ 12
- 1
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -1,5 +1,5 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 17:17:00 GMT
# Date: 2017-04-17, 20:19:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
@@ -169,3 +169,14 @@ E003B ; Semi_Colon # Cf TAG SEMICOLON
FE19 ; Ellipsis # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS

# Total code points: 8

# ================================================

0700 ; Paragraph_Separator # Po SYRIAC END OF PARAGRAPH
0965 ; Paragraph_Separator # Po DEVANAGARI DOUBLE DANDA
0F0E ; Paragraph_Separator # Po TIBETAN MARK NYIS SHAD
10FB ; Paragraph_Separator # Po GEORGIAN PARAGRAPH SEPARATOR
1368 ; Paragraph_Separator # Po ETHIOPIC PARAGRAPH SEPARATOR
2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR

# Total code points: 6

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -363,6 +363,7 @@ static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x00
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR = 0x0100000000000000ull; /**< @brief Paragraph_Separator */
static const ucd_property ESPEAKNG_PROPERTY_ELLIPSIS = 0x0200000000000000ull; /**< @brief Ellipsis */
static const ucd_property ESPEAKNG_PROPERTY_SEMI_COLON = 0x0400000000000000ull; /**< @brief Semi_Colon */
static const ucd_property ESPEAKNG_PROPERTY_COLON = 0x0800000000000000ull; /**< @brief Colon */

+ 6
- 4
src/ucd-tools/src/proplist.c View File

@@ -1361,7 +1361,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x0700:
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
if (c == 0x0701) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0703) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
@@ -1381,7 +1381,7 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x0900:
if (c == 0x0964) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break;
case 0x0D00:
if (c == 0x0DF4) return ESPEAKNG_PROPERTY_FULL_STOP;
@@ -1392,11 +1392,13 @@ static ucd_property properties_Po(codepoint_t c)
case 0x0F00:
if (c == 0x0F08) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F0D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0F0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
if (c >= 0x0F0E && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F14) return ESPEAKNG_PROPERTY_COMMA;
break;
case 0x1000:
if (c >= 0x104A && c <= 0x104B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x10FB) return ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break;
case 0x1300:
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1405,7 +1407,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c >= 0x1365 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break;
case 0x1600:
if (c == 0x166D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1903,7 +1905,7 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category)
case UCD_CATEGORY_Sm: return properties_Sm(c);
case UCD_CATEGORY_So: return properties_So(c);
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me
};

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

@@ -161,6 +161,7 @@ def properties(data):
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 56) * data.get('Paragraph_Separator', 0)
props += (2 ** 57) * data.get('Ellipsis', 0)
props += (2 ** 58) * data.get('Semi_Colon', 0)
props += (2 ** 59) * data.get('Colon', 0)

Loading…
Cancel
Save