Browse Source

ucd-tools: Full_Stop eSpeakNG extended property support.

master
Reece H. Dunn 8 years ago
parent
commit
07cd2b12e1

+ 2
- 0
.gitignore View File

@@ -129,6 +129,8 @@ src/pcaudiolib/
*.wixpdb
*.cab

!src/ucd-tools/data/*/*.txt

# Linux/Source distribution files

praat-mod/

+ 7
- 0
src/ucd-tools/CHANGELOG.md View File

@@ -1,5 +1,12 @@
# Change Log

## eSpeak NG

These are eSpeak NG specific modifications to the `ucd-tools` project:

* `data/espeak-ng` data files for eSpeak NG extended data.
* espeak-ng PropList property lookup as part of the `ucd_property` API.

## 9.0.0.1 - (In Progress)

* Add `iswblank` and `iswxdigit` compatibility.

+ 1
- 0
src/ucd-tools/Makefile.am View File

@@ -141,6 +141,7 @@ tests_printucddata_cpp_SOURCES = tests/printucddata_cpp.cpp
tests_printucddata_cpp_LDADD = src/libucd.la

tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
data/espeak-ng/PropList.txt \
data/ucd/UnicodeData.txt \
data/ucd/PropList.txt \
data/ucd/DerivedCoreProperties.txt \

+ 51
- 0
src/ucd-tools/data/espeak-ng/PropList.txt View File

@@ -0,0 +1,51 @@
# espeak-ng/PropList-9.0.0.txt
# Date: 2017-04-17, 11:35:00 GMT
# Copyright (C) 2017 Reece H. Dunn
#
# This is an extension to the Unicode Character Database PropList.txt file,
# providing classification data used by the eSpeak NG Text-to-Speech
# program. It uses the same format as the PropList.txt file.
#
# This data is licensed under the same terms as the Unicode Character
# Database.
#
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in
# the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/

# ================================================

002E ; Full_Stop # Po FULL STOP
0589 ; Full_Stop # Po ARMENIAN FULL STOP
06D4 ; Full_Stop # Po ARABIC FULL STOP
0701 ; Full_Stop # Po SYRIAC SUPRALINEAR FULL STOP
0702 ; Full_Stop # Po SYRIAC SUBLINEAR FULL STOP
0964 ; Full_Stop # Po DEVANAGARI DANDA
0DF4 ; Full_Stop # Po SINHALA PUNCTUATION KUNDDALIYA
0F0D ; Full_Stop # Po TIBETAN MARK SHAD
1362 ; Full_Stop # Po ETHIOPIC FULL STOP
166E ; Full_Stop # Po CANADIAN SYLLABICS FULL STOP
1803 ; Full_Stop # Po MONGOLIAN FULL STOP
1809 ; Full_Stop # Po MONGOLIAN MANCHU FULL STOP
2488..249B ; Full_Stop # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP
2CF9 ; Full_Stop # Po COPTIC OLD NUBIAN FULL STOP
2CFE ; Full_Stop # Po COPTIC FULL STOP
2E3C ; Full_Stop # Po STENOGRAPHIC FULL STOP
3002 ; Full_Stop # Po IDEOGRAPHIC FULL STOP
A4FF ; Full_Stop # Po LISU PUNCTUATION FULL STOP
A60E ; Full_Stop # Po VAI FULL STOP
A6F3 ; Full_Stop # Po BAMUM FULL STOP
FE12 ; Full_Stop # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP
FE52 ; Full_Stop # Po SMALL FULL STOP
FF0E ; Full_Stop # Po FULLWIDTH FULL STOP
FF61 ; Full_Stop # Po HALFWIDTH IDEOGRAPHIC FULL STOP
16AF5 ; Full_Stop # Po BASSA VAH FULL STOP
1BC9F ; Full_Stop # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; Full_Stop # Po SIGNWRITING FULL STOP
1F100 ; Full_Stop # No DIGIT ZERO FULL STOP
E002E ; Full_Stop # Cf TAG FULL STOP

# Total code points: 48

+ 3
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -362,6 +362,9 @@ static const ucd_property UCD_PROPERTY_PATTERN_WHITE_SPACE = 0x00
static const ucd_property UCD_PROPERTY_PATTERN_SYNTAX = 0x0000000080000000ull; /**< @brief Pattern_Syntax */
static const ucd_property UCD_PROPERTY_PREPENDED_CONCATENATION_MARK = 0x0000000100000000ull; /**< @brief Prepended_Concatenation_Mark */

// eSpeak NG extended properties (PropListExt)
static const ucd_property ESPEAKNG_PROPERTY_FULL_STOP = 0x8000000000000000ull; /**< @brief Full_Stop */

/** @brief Return the properties of the specified codepoint.
*
* @param c The Unicode codepoint to lookup.

+ 40
- 21
src/ucd-tools/src/proplist.c View File

@@ -59,6 +59,7 @@ static ucd_property properties_Cf(codepoint_t c)
break;
case 0x0E0000:
if (c == 0x0E0001) return UCD_PROPERTY_DEPRECATED;
if (c == 0x0E002E) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
}
@@ -1196,6 +1197,12 @@ static ucd_property properties_No(codepoint_t c)
case 0x1900:
if (c == 0x19DA) return UCD_PROPERTY_OTHER_ID_CONTINUE;
break;
case 0x2400:
if (c >= 0x2488 && c <= 0x249B) return ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x01F100:
if (c == 0x01F100) return ESPEAKNG_PROPERTY_FULL_STOP;
break;
}
return 0;
}
@@ -1321,7 +1328,7 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x003A && c <= 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
@@ -1331,17 +1338,18 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE;
break;
case 0x0500:
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x0600:
if (c == 0x060C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x061F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x06D4) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x0700:
if (c >= 0x0700 && c <= 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0700) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x0701 && c <= 0x0702) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0703 && c <= 0x070A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x070C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x07F8) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1352,27 +1360,32 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x085E) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x0900:
if (c >= 0x0964 && c <= 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x0964) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x0965) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x0D00:
if (c == 0x0DF4) return ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x0E00:
if (c >= 0x0E5A && c <= 0x0E5B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x0F00:
if (c == 0x0F08) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x0F0D && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x0F0D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x0F0E && c <= 0x0F12) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1000:
if (c >= 0x104A && c <= 0x104B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x1300:
if (c == 0x1361) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1362) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x1363 && c <= 0x1366) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0x1367 && c <= 0x1368) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x1600:
if (c == 0x166D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x166E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x166E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x16EB && c <= 0x16ED) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1700:
@@ -1381,10 +1394,10 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x17DA) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x1800:
if (c == 0x1803) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1803) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x1802 && c <= 0x1805) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1808) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0x1809) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x1809) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x180A) return UCD_PROPERTY_EXTENDER;
break;
case 0x1900:
@@ -1418,25 +1431,30 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH;
if (c >= 0x2055 && c <= 0x205E) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2C00:
if (c == 0x2CF9) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2CFE) return ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x2E00:
if (c == 0x2E2E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2E3C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2E3C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x2E41) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000:
if (c == 0x3001) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3002) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3002) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x3003) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x30FB) return UCD_PROPERTY_HYPHEN;
break;
case 0xA400:
if (c == 0xA4FE) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA4FF) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA4FF) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0xA600:
if (c == 0xA60D) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0xA60E && c <= 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA60E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xA60F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xA6F3) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xA6F4 && c <= 0xA6F6) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xA6F7) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
@@ -1459,9 +1477,10 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xABEB) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0xFE00:
if (c == 0xFE12) return ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE45 && c <= 0xFE46) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0xFE50 && c <= 0xFE51) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFE52) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0xFE54 && c <= 0xFE55) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c >= 0xFE56 && c <= 0xFE57) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFE61) return UCD_PROPERTY_OTHER_MATH;
@@ -1472,12 +1491,12 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0xFF02) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF07) return UCD_PROPERTY_QUOTATION_MARK;
if (c == 0xFF0C) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFF0E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF3C) return UCD_PROPERTY_OTHER_MATH;
if (c == 0xFF65) return UCD_PROPERTY_HYPHEN;
if (c >= 0xFF1A && c <= 0xFF1B) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
if (c == 0xFF1F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0xFF61) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0xFF64) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x10300:
@@ -1541,7 +1560,7 @@ static ucd_property properties_Po(codepoint_t c)
break;
case 0x16A00:
if (c >= 0x016A6E && c <= 0x016A6F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x016AF5) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x016AF5) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x16B00:
if (c >= 0x016B37 && c <= 0x016B38) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
@@ -1549,10 +1568,10 @@ static ucd_property properties_Po(codepoint_t c)
if (c == 0x016B44) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x1BC00:
if (c == 0x01BC9F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x01BC9F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
break;
case 0x1DA00:
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x01DA88) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP;
if (c >= 0x01DA87 && c <= 0x01DA8A) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
}

+ 10
- 2
src/ucd-tools/tools/printdata.py View File

@@ -26,12 +26,18 @@ csur_rootdir = 'data/csur'

null = ucd.CodePoint('0000')

properties = [
(ucd_rootdir, 'PropList'),
(ucd_rootdir, 'DerivedCoreProperties'),
('data/espeak-ng', 'PropList')
]

unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data
for propfile in ['PropList', 'DerivedCoreProperties']:
for data in ucd.parse_ucd_data(ucd_rootdir, propfile):
for propdir, propfile in properties:
for data in ucd.parse_ucd_data(propdir, propfile):
for codepoint in data['Range']:
try:
unicode_chars[codepoint][data['Property']] = 1
@@ -154,6 +160,8 @@ def properties(data):
props += (2 ** 30) * data.get('Pattern_White_Space', 0)
props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties:
props += (2 ** 63) * data.get('Full_Stop', 0)
return props

if __name__ == '__main__':

Loading…
Cancel
Save