Browse Source

ucd-tools: Paragraph_Separator eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
2e375362c4

+ 12
- 1
src/ucd-tools/data/espeak-ng/PropList.txt View File

# espeak-ng/PropList-9.0.0.txt # espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 17:17:00 GMT
# Date: 2017-04-17, 20:19:00 GMT
# Copyright (C) 2017 Reece H. Dunn # Copyright (C) 2017 Reece H. Dunn
# #
# This is an extension to the Unicode Character Database PropList.txt file, # This is an extension to the Unicode Character Database PropList.txt file,
FE19 ; Ellipsis # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS FE19 ; Ellipsis # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS


# Total code points: 8 # Total code points: 8

# ================================================

0700 ; Paragraph_Separator # Po SYRIAC END OF PARAGRAPH
0965 ; Paragraph_Separator # Po DEVANAGARI DOUBLE DANDA
0F0E ; Paragraph_Separator # Po TIBETAN MARK NYIS SHAD
10FB ; Paragraph_Separator # Po GEORGIAN PARAGRAPH SEPARATOR
1368 ; Paragraph_Separator # Po ETHIOPIC PARAGRAPH SEPARATOR
2029 ; Paragraph_Separator # Zp PARAGRAPH SEPARATOR

# Total code points: 6

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */ static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */


// eSpeak NG extended properties: // eSpeak NG extended properties:
static const ucd_property ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR = 0x0100000000000000ull; /**< @brief Paragraph_Separator */
static const ucd_property ESPEAKNG_PROPERTY_ELLIPSIS = 0x0200000000000000ull; /**< @brief Ellipsis */ static const ucd_property ESPEAKNG_PROPERTY_ELLIPSIS = 0x0200000000000000ull; /**< @brief Ellipsis */
static const ucd_property ESPEAKNG_PROPERTY_SEMI_COLON = 0x0400000000000000ull; /**< @brief Semi_Colon */ static const ucd_property ESPEAKNG_PROPERTY_SEMI_COLON = 0x0400000000000000ull; /**< @brief Semi_Colon */
static const ucd_property ESPEAKNG_PROPERTY_COLON = 0x0800000000000000ull; /**< @brief Colon */ static const ucd_property ESPEAKNG_PROPERTY_COLON = 0x0800000000000000ull; /**< @brief Colon */

+ 6
- 4
src/ucd-tools/src/proplist.c View File

if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break; break;
case 0x0700: case 0x0700:
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
if (c == 0x0701) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x0701) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_COMMA; if (c == 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x0703) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; if (c == 0x0703) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
break; break;
case 0x0900: case 0x0900:
if (c == 0x0964) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x0964) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break; break;
case 0x0D00: case 0x0D00:
if (c == 0x0DF4) return ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x0DF4) return ESPEAKNG_PROPERTY_FULL_STOP;
case 0x0F00: case 0x0F00:
if (c == 0x0F08) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x0F08) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F0D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x0F0D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0F0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
if (c >= 0x0F0E && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c >= 0x0F0E && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F14) return ESPEAKNG_PROPERTY_COMMA; if (c == 0x0F14) return ESPEAKNG_PROPERTY_COMMA;
break; break;
case 0x1000: case 0x1000:
if (c >= 0x104A && c <= 0x104B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; if (c >= 0x104A && c <= 0x104B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x10FB) return ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break; break;
case 0x1300: case 0x1300:
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON; if (c == 0x1364) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c >= 0x1365 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON; if (c >= 0x1365 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_COLON;
if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK; if (c == 0x1367) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
break; break;
case 0x1600: case 0x1600:
if (c == 0x166D) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x166D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
case UCD_CATEGORY_Sm: return properties_Sm(c); case UCD_CATEGORY_Sm: return properties_Sm(c);
case UCD_CATEGORY_So: return properties_So(c); case UCD_CATEGORY_So: return properties_So(c);
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
case UCD_CATEGORY_Zs: return properties_Zs(c); case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me default: return 0; // Co Cs Ii Lt Me
}; };

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 31) * data.get('Pattern_Syntax', 0) props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 56) * data.get('Paragraph_Separator', 0)
props += (2 ** 57) * data.get('Ellipsis', 0) props += (2 ** 57) * data.get('Ellipsis', 0)
props += (2 ** 58) * data.get('Semi_Colon', 0) props += (2 ** 58) * data.get('Semi_Colon', 0)
props += (2 ** 59) * data.get('Colon', 0) props += (2 ** 59) * data.get('Colon', 0)

Loading…
Cancel
Save