Valdis Vitolins 8 years ago
parent
commit
7431a4bbb4

+ 1
- 0
src/ucd-tools/.gitignore View File



# build output: # build output:


data/emoji
data/ucd data/ucd


src/libucd.la src/libucd.la

+ 7
- 1
src/ucd-tools/Makefile.am View File



EXTRA_DIST += ChangeLog EXTRA_DIST += ChangeLog


############################# Unicode Character Database ######################
############################# Unicode Data ####################################


EMOJI_VERSION=4.0
UCD_VERSION=@UCD_VERSION@ UCD_VERSION=@UCD_VERSION@
UCD_ROOTDIR=data/ucd UCD_ROOTDIR=data/ucd
UCD_SRCDIR=http://www.unicode.org/Public UCD_SRCDIR=http://www.unicode.org/Public


data/emoji/emoji-data.txt:
mkdir -pv data/emoji
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@

data/ucd/PropList.txt: data/ucd/PropList.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@
tests_printucddata_cpp_LDADD = src/libucd.la tests_printucddata_cpp_LDADD = src/libucd.la


tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
data/emoji/emoji-data.txt \
data/espeak-ng/PropList.txt \ data/espeak-ng/PropList.txt \
data/ucd/UnicodeData.txt \ data/ucd/UnicodeData.txt \
data/ucd/PropList.txt \ data/ucd/PropList.txt \

+ 9
- 96
src/ucd-tools/README.md View File

# Unicode Character Database Tools # Unicode Character Database Tools


- [Data Files](#data-files)
- [Unicode Character Database](#unicode-character-database)
- [ConScript Unicode Registry](#conscript-unicode-registry)
- [C Library](#c-library)
- [Querying Properties](#querying-properties)
- [Case Conversion](#case-conversion)
- [wctype Compatibility](#wctype-compatibility)
- [Build Dependencies](#build-dependencies) - [Build Dependencies](#build-dependencies)
- [Debian](#debian) - [Debian](#debian)
- [Building](#building) - [Building](#building)


---------- ----------


The Unicode Character Database (UCD) Tools is a set of Python tools and a C
library with a C++ API binding. The Python tools are designed to support
extracting and processing data from the text-based UCD source files, while
the C library is designed to provide easy access to this information within
a C or C++ program.
The Unicode Character Database (UCD) Tools is a set of Python tools and a
[C library](src/include/ucd/ucd.h) with a C++ API binding. The Python tools
are designed to support extracting and processing data from the text-based
UCD source files, while the C library is designed to provide easy access to
this information within a C or C++ program.


## Data Files
The project uses and supports the following sources of Unicode codepoint data:


The `ucd-tools` project provides support for UCD formatted data files from
several different sources.

### Unicode Character Database

The following [Unicode Character Database](http://www.unicode.org/Public/9.0.0/ucd/)
files are supported:

* Blocks
* DerivedAge
* PropList
* PropertyValueAliases
* Scripts
* UnicodeData

### ConScript Unicode Registry

If enabled, the following data from the
[ConScript Unicode Registry](http://www.evertype.com/standards/csur/) (CSUR) is
added:

| Code Range | Script |
|--------------|---------|
| `F8D0-F8FF` | [Klingon](http://www.evertype.com/standards/csur/klingon.html) |

This data is located in the `data/csur` directory in a form compatible with the
Unicode Character Data files.

## C Library

The C library provides several different facilities that make use of the UCD
data. It provides a compact and efficient representation of the different data
tables.

Detailed documentation is provided in the `src/include/ucd/ucd.h` file in the
Doxygen documentation format.

### Querying Properties

The library exposes the following properties from the UCD data files:

| C API | C++ API | Data | Description |
|-----------------------|------------------------|-------------|-------------|
| `ucd_lookup_category` | `ucd::lookup_category` | UnicodeData | A [General Category Value](http://www.unicode.org/reports/tr44/#General_Category_Values). |
| `ucd_lookup_script` | `ucd::lookup_script` | Script | An [ISO 15924](http://www.unicode.org/iso15924/iso15924-codes.html) script code. |
| `ucd_properties` | `ucd::properties` | PropList | The code point properties from the PropList Unicode data file. |

### Case Conversion

The following character conversion functions are provided:

| C API | C++ API | Description |
|---------------|----------------|-------------|
| `ucd_tolower` | `ucd::tolower` | convert letters to lower case |
| `ucd_totitle` | `ucd::totitle` | convert letters to title case (UCD extension) |
| `ucd_toupper` | `ucd::toupper` | convert letters to upper case |

__NOTE:__ These functions use the simple case mapping algorithm. That is, they
only ever map to a single character. This is to provide a compatible signature
to the standard C `wctype.h` APIs.

### wctype Compatibility

To facilitate working on platforms that don't have a useable wide-character
ctypes library, or to provide a more consistent behaviour, the `ucd-tools`
C library provides a set of APIs that are compatible with `wctype.h`.

The following character classification functions are provided:

| C API | C++ API |
|----------------|-----------------|
| `ucd_isalnum` | `ucd::isalnum` |
| `ucd_isalpha` | `ucd::isalpha` |
| `ucd_isblank` | `ucd::isblank` |
| `ucd_iscntrl` | `ucd::iscntrl` |
| `ucd_isdigit` | `ucd::isdigit` |
| `ucd_isgraph` | `ucd::isgraph` |
| `ucd_islower` | `ucd::islower` |
| `ucd_isprint` | `ucd::isprint` |
| `ucd_ispunct` | `ucd::ispunct` |
| `ucd_isspace` | `ucd::isspace` |
| `ucd_isupper` | `ucd::isupper` |
| `ucd_isxdigit` | `ucd::isxdigit` |
* [Unicode Character Database](http://www.unicode.org/Public/9.0.0/ucd/) 9.0.0
* [Unicode Emoji](http://www.unicode.org/Public/emoji/4.0/) 4.0 (UTR #51)
* [ConScript Unicode Registry](http://www.evertype.com/standards/csur/)


## Build Dependencies ## Build Dependencies



+ 9
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

/** @brief Properties /** @brief Properties
*/ */
typedef uint64_t ucd_property; typedef uint64_t ucd_property;

#define UCD_PROPERTY_WHITE_SPACE 0x0000000000000001ull /**< @brief White_Space */ #define UCD_PROPERTY_WHITE_SPACE 0x0000000000000001ull /**< @brief White_Space */
#define UCD_PROPERTY_BIDI_CONTROL 0x0000000000000002ull /**< @brief Bidi_Control */ #define UCD_PROPERTY_BIDI_CONTROL 0x0000000000000002ull /**< @brief Bidi_Control */
#define UCD_PROPERTY_JOIN_CONTROL 0x0000000000000004ull /**< @brief Join_Control */ #define UCD_PROPERTY_JOIN_CONTROL 0x0000000000000004ull /**< @brief Join_Control */
#define UCD_PROPERTY_PATTERN_WHITE_SPACE 0x0000000040000000ull /**< @brief Pattern_White_Space */ #define UCD_PROPERTY_PATTERN_WHITE_SPACE 0x0000000040000000ull /**< @brief Pattern_White_Space */
#define UCD_PROPERTY_PATTERN_SYNTAX 0x0000000080000000ull /**< @brief Pattern_Syntax */ #define UCD_PROPERTY_PATTERN_SYNTAX 0x0000000080000000ull /**< @brief Pattern_Syntax */
#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */
#define UCD_PROPERTY_EMOJI 0x0000000200000000ull /**< @brief Emoji */
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */
Pattern_White_Space = UCD_PROPERTY_PATTERN_WHITE_SPACE, /**< @brief Pattern_White_Space */ Pattern_White_Space = UCD_PROPERTY_PATTERN_WHITE_SPACE, /**< @brief Pattern_White_Space */
Pattern_Syntax = UCD_PROPERTY_PATTERN_SYNTAX, /**< @brief Pattern_Syntax */ Pattern_Syntax = UCD_PROPERTY_PATTERN_SYNTAX, /**< @brief Pattern_Syntax */
Prepended_Concatenation_Mark = UCD_PROPERTY_PREPENDED_CONCATENATION_MARK, /**< @brief Prepended_Concatenation_Mark */ Prepended_Concatenation_Mark = UCD_PROPERTY_PREPENDED_CONCATENATION_MARK, /**< @brief Prepended_Concatenation_Mark */
Emoji = UCD_PROPERTY_EMOJI, /**< @brief Emoji */
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */
}; };


/** @brief Return the properties of the specified codepoint. /** @brief Return the properties of the specified codepoint.

+ 246
- 24
src/ucd-tools/src/proplist.c View File

if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c == 0x2139) return UCD_PROPERTY_EMOJI;
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0000: case 0x0000:
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT;
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI;
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT;
return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000: case 0x3000:
if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x301C) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3030) return UCD_PROPERTY_DASH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
break; break;
case 0xFE00: case 0xFE00:
if (c >= 0xFE31 && c <= 0xFE32) return UCD_PROPERTY_DASH | ESPEAKNG_PROPERTY_EXTENDED_DASH; if (c >= 0xFE31 && c <= 0xFE32) return UCD_PROPERTY_DASH | ESPEAKNG_PROPERTY_EXTENDED_DASH;
case 0x0000: case 0x0000:
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
if (c >= 0x2020 && c <= 0x2027) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2020 && c <= 0x2027) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2032 && c <= 0x2034) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_OTHER_MATH; if (c >= 0x2032 && c <= 0x2034) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2030 && c <= 0x2038) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2030 && c <= 0x2038) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x203C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x203C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | UCD_PROPERTY_EMOJI;
if (c == 0x203D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x203D) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x203B && c <= 0x203E) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x203B && c <= 0x203E) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2041 && c <= 0x2043) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2041 && c <= 0x2043) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2047) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK; if (c == 0x2047) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c >= 0x2048 && c <= 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2048) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2049) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | UCD_PROPERTY_EMOJI;
if (c == 0x204F) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON; if (c == 0x204F) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c >= 0x204A && c <= 0x2051) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x204A && c <= 0x2051) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH; if (c == 0x2053) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_DASH;
if (c == 0x3001) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER; if (c == 0x3001) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER;
if (c == 0x3002) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER; if (c == 0x3002) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER;
if (c == 0x3003) return UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x3003) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x303D) return UCD_PROPERTY_EMOJI;
if (c == 0x30FB) return UCD_PROPERTY_HYPHEN; if (c == 0x30FB) return UCD_PROPERTY_HYPHEN;
break; break;
case 0xA400: case 0xA400:
if (c == 0xFF40) return UCD_PROPERTY_DIACRITIC; if (c == 0xFF40) return UCD_PROPERTY_DIACRITIC;
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x01F300:
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER;
} }
return 0; return 0;
} }
break; break;
case 0x2100: case 0x2100:
if (c == 0x2118) return UCD_PROPERTY_OTHER_ID_START; if (c == 0x2118) return UCD_PROPERTY_OTHER_ID_START;
if (c == 0x2194) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2190) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2190) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2200: case 0x2200:
if (c >= 0x22EE && c <= 0x22F1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_ELLIPSIS; if (c >= 0x22EE && c <= 0x22F1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_ELLIPSIS;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2300: case 0x2300:
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2500: case 0x2500:
if (c >= 0x25FB && c <= 0x25FC) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x25FD && c <= 0x25FE) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2600: case 0x2600:
case 0x2700: case 0x2700:
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
if (c >= 0x2934 && c <= 0x2935) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2982) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; if (c == 0x2982) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2A00: case 0x2A00:
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0000: case 0x0000:
if (c == 0x00A9) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x00AE) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2100: case 0x2100:
if (c == 0x2122) return UCD_PROPERTY_EMOJI;
if (c == 0x2129) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2129) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x212E) return UCD_PROPERTY_OTHER_ID_START; if (c == 0x212E) return UCD_PROPERTY_OTHER_ID_START;
if (c >= 0x2195 && c <= 0x2199) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x219C && c <= 0x219F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21A1 && c <= 0x21A2) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21A4 && c <= 0x21A5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x21A8) return UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x21A8) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21A7 && c <= 0x21AD) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x21A9) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x21AA) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2195 && c <= 0x2199) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x219C && c <= 0x21AD) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21B0 && c <= 0x21B1) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x21B0 && c <= 0x21B1) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21B6 && c <= 0x21B7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x21B6 && c <= 0x21B7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21AF && c <= 0x21BB) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x21AF && c <= 0x21BB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x21D5 && c <= 0x21F3) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x21D5 && c <= 0x21F3) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2300: case 0x2300:
if (c >= 0x2300 && c <= 0x2307) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c <= 0x2307) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x231A && c <= 0x231B) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x230C && c <= 0x231F) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x230C && c <= 0x231F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2322 && c <= 0x2328) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2322 && c <= 0x2327) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2328) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x232B && c <= 0x237B) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x232B && c <= 0x237B) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x237D && c <= 0x239A) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x237D && c <= 0x239A) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23B4 && c <= 0x23B5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x23B4 && c <= 0x23B5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x23B7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x23B7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x23CF) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x23D0) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x23D0) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23B4 && c <= 0x23DB) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x23B4 && c <= 0x23DB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x23E2) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x23E2) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23E9 && c <= 0x23EC) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x23F0) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2400: case 0x2400:
if (c >= 0x2400 && c <= 0x2426) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2440 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x24C2) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE | UCD_PROPERTY_EMOJI;
if (c >= 0x24B6 && c <= 0x24CF) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x24B6 && c <= 0x24CF) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c >= 0x24D0 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE; if (c >= 0x24D0 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0x2500: case 0x2500:
if (c >= 0x25A0 && c <= 0x25A1) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x25A0 && c <= 0x25A1) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25AE && c <= 0x25B6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25BC && c <= 0x25C0) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25AA && c <= 0x25AB) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x25AE && c <= 0x25B5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x25B6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x25BC && c <= 0x25BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x25C0) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x25C6 && c <= 0x25C7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x25C6 && c <= 0x25C7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25CA && c <= 0x25CB) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x25CA && c <= 0x25CB) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25CF && c <= 0x25D3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x25CF && c <= 0x25D3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x25E7 && c <= 0x25EC) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x25E7 && c <= 0x25EC) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2600: case 0x2600:
if (c <= 0x2604) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2605 && c <= 0x2606) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2605 && c <= 0x2606) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2640) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2642) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2660 && c <= 0x2663) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x260E) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2611) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2614 && c <= 0x2615) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2618) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x261D) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x2620) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2622 && c <= 0x2623) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2626) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x262A) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x262E && c <= 0x262F) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2638 && c <= 0x263A) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2640) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2642) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2648 && c <= 0x2653) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2660) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2661 && c <= 0x2662) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2663) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2665 && c <= 0x2666) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2668) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x266D && c <= 0x266E) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x266D && c <= 0x266E) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x267B) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x267F) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2693) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x2692 && c <= 0x2697) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2699) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x269B && c <= 0x269C) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26A0) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26A1) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x26AA && c <= 0x26AB) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x26B0 && c <= 0x26B1) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x26BD && c <= 0x26BE) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x26C4 && c <= 0x26C5) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x26C8) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26CE) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x26CF) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26D1) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26D3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26D4) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x26E9) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26EA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x26F4) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x26F0 && c <= 0x26F1) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x26F0 && c <= 0x26F5) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x26F7 && c <= 0x26F8) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x26F9) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x26FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x26FD) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2700: case 0x2700:
if (c >= 0x2753 && c <= 0x2754) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x2755) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2757) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c >= 0x2762 && c <= 0x2763) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2702) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2705) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x270A && c <= 0x270B) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x270C && c <= 0x270D) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x2708 && c <= 0x270D) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x270F) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2712) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2714) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2716) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x271D) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2721) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2728) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x2733 && c <= 0x2734) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2744) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x2747) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x274C) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x274E) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x2753 && c <= 0x2754) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2755) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2757) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2762) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x2763) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | UCD_PROPERTY_EMOJI;
if (c == 0x2764) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2795 && c <= 0x2797) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x27A1) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c == 0x27B0) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x27BF) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2800: case 0x2800:
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2B00: case 0x2B00:
if (c >= 0x2B05 && c <= 0x2B07) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x2B1B && c <= 0x2B1C) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2B50) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x2B55) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2E00: case 0x2E00:
if (c >= 0x2E80 && c <= 0x2E99) return UCD_PROPERTY_RADICAL; if (c >= 0x2E80 && c <= 0x2E99) return UCD_PROPERTY_RADICAL;
if (c >= 0x2E9B && c <= 0x2EF3) return UCD_PROPERTY_RADICAL; if (c >= 0x2E9B && c <= 0x2EF3) return UCD_PROPERTY_RADICAL;
break; break;
case 0x2F00: case 0x2F00:
if (c >= 0x2F00 && c <= 0x2FD5) return UCD_PROPERTY_RADICAL;
if (c <= 0x2FD5) return UCD_PROPERTY_RADICAL;
if (c >= 0x2FF0 && c <= 0x2FF1) return UCD_PROPERTY_IDS_BINARY_OPERATOR; if (c >= 0x2FF0 && c <= 0x2FF1) return UCD_PROPERTY_IDS_BINARY_OPERATOR;
if (c >= 0x2FF2 && c <= 0x2FF3) return UCD_PROPERTY_IDS_TRINARY_OPERATOR; if (c >= 0x2FF2 && c <= 0x2FF3) return UCD_PROPERTY_IDS_TRINARY_OPERATOR;
if (c >= 0x2FF4 && c <= 0x2FFB) return UCD_PROPERTY_IDS_BINARY_OPERATOR; if (c >= 0x2FF4 && c <= 0x2FFB) return UCD_PROPERTY_IDS_BINARY_OPERATOR;
if (c >= 0x3012 && c <= 0x3013) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x3012 && c <= 0x3013) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x3020) return UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x3020) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x3200:
if (c == 0x3297) return UCD_PROPERTY_EMOJI;
if (c == 0x3299) return UCD_PROPERTY_EMOJI;
break;
case 0x01F000:
if (c == 0x01F004) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F0CF) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F100: case 0x01F100:
if (c >= 0x01F130 && c <= 0x01F149) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x01F130 && c <= 0x01F149) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c >= 0x01F150 && c <= 0x01F169) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x01F150 && c <= 0x01F169) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c >= 0x01F170 && c <= 0x01F171) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE | UCD_PROPERTY_EMOJI;
if (c >= 0x01F17E && c <= 0x01F17F) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE | UCD_PROPERTY_EMOJI;
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
break;
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F200:
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F202) return UCD_PROPERTY_EMOJI;
if (c == 0x01F21A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F22F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F237) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F232 && c <= 0x01F23A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F250 && c <= 0x01F251) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F300:
if (c <= 0x01F320) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F321) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F32D && c <= 0x01F335) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F337 && c <= 0x01F37C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F385) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F37E && c <= 0x01F393) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F324 && c <= 0x01F393) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F396 && c <= 0x01F397) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F399 && c <= 0x01F39B) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F3C2 && c <= 0x01F3C4) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F3C7) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F3A0 && c <= 0x01F3C9) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F3CA) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F3CB && c <= 0x01F3CC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F3CF && c <= 0x01F3D3) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F3E0 && c <= 0x01F3F0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F39E && c <= 0x01F3F0) return UCD_PROPERTY_EMOJI;
if (c == 0x01F3F3) return UCD_PROPERTY_EMOJI;
if (c == 0x01F3F4) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F3F5) return UCD_PROPERTY_EMOJI;
if (c == 0x01F3F7) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F3F8 && c <= 0x01F3FA) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F400:
if (c == 0x01F43F) return UCD_PROPERTY_EMOJI;
if (c == 0x01F441) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F442 && c <= 0x01F443) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F446 && c <= 0x01F450) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F466 && c <= 0x01F469) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F46E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F470 && c <= 0x01F478) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F47C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F481 && c <= 0x01F483) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F485 && c <= 0x01F487) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F4AA) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F4FE) return 0;
if (c == 0x01F4FD) return UCD_PROPERTY_EMOJI;
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
case 0x01F500:
if (c <= 0x01F53D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F549 && c <= 0x01F54A) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F54B && c <= 0x01F54E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F550 && c <= 0x01F567) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F56F && c <= 0x01F570) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F574 && c <= 0x01F575) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F573 && c <= 0x01F579) return UCD_PROPERTY_EMOJI;
if (c == 0x01F57A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F587) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F58A && c <= 0x01F58D) return UCD_PROPERTY_EMOJI;
if (c == 0x01F590) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F595 && c <= 0x01F596) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F5A4) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F5A5) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5A8) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F5B1 && c <= 0x01F5B2) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5BC) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F5C2 && c <= 0x01F5C4) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F5D1 && c <= 0x01F5D3) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F5DC && c <= 0x01F5DE) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5E1) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5E3) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5E8) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5EF) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5F3) return UCD_PROPERTY_EMOJI;
if (c == 0x01F5FA) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F5FB) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F600:
if (c >= 0x01F645 && c <= 0x01F647) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F64B && c <= 0x01F64F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c <= 0x01F64F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6A3) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F6B4 && c <= 0x01F6B6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F6C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F680 && c <= 0x01F6C5) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6CC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F6CB && c <= 0x01F6CF) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6D0 && c <= 0x01F6D2) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6E0 && c <= 0x01F6E5) return UCD_PROPERTY_EMOJI;
if (c == 0x01F6E9) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI;
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F900:
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F93B) return 0;
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F946) return 0;
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_EMOJI;
} }
return 0; return 0;
} }

+ 6
- 0
src/ucd-tools/tools/printdata.py View File

import ucd import ucd


ucd_rootdir = sys.argv[1] ucd_rootdir = sys.argv[1]
emoji_rootdir = 'data/emoji'
csur_rootdir = 'data/csur' csur_rootdir = 'data/csur'


null = ucd.CodePoint('0000') null = ucd.CodePoint('0000')
properties = [ properties = [
(ucd_rootdir, 'PropList'), (ucd_rootdir, 'PropList'),
(ucd_rootdir, 'DerivedCoreProperties'), (ucd_rootdir, 'DerivedCoreProperties'),
(emoji_rootdir, 'emoji-data'),
('data/espeak-ng', 'PropList') ('data/espeak-ng', 'PropList')
] ]


props += (2 ** 30) * data.get('Pattern_White_Space', 0) props += (2 ** 30) * data.get('Pattern_White_Space', 0)
props += (2 ** 31) * data.get('Pattern_Syntax', 0) props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
props += (2 ** 33) * data.get('Emoji', 0) # emoji-data
props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0)
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) props += (2 ** 53) * data.get('Punctuation_In_Word', 0)

+ 4
- 0
src/ucd-tools/tools/ucd.py View File



data_items = { data_items = {
# Unicode Character Data: # Unicode Character Data:
'emoji-data': [
('Range', codepoint),
('Property', string)
],
'Blocks': [ 'Blocks': [
('Range', codepoint), ('Range', codepoint),
('Name', string) ('Name', string)

+ 276
- 6
tests/readclause.c View File

0, 17, 18, // #1 0, 17, 18, // #1
0, 20, 21, // in 0, 20, 21, // in
0, 23, 24, 25, // the 0, 23, 24, 25, // the
0, 27, 28, 29, 30, // race
0 };
0, 27, 28, 29, 30 }; // race


assert(set_text("Janet finished #1 in the race.", "en") == ENS_OK); assert(set_text("Janet finished #1 in the race.", "en") == ENS_OK);


charix_top = 0; charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == (CLAUSE_PERIOD | CLAUSE_DOT_AFTER_LAST_WORD)); assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == (CLAUSE_PERIOD | CLAUSE_DOT_AFTER_LAST_WORD));
assert(!strcmp(source, "Janet finished #1 in the race ")); assert(!strcmp(source, "Janet finished #1 in the race "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 2);
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix))); assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0); assert(tone2 == 0);
assert(voice_change_name[0] == 0); assert(voice_change_name[0] == 0);
3, -1, -1, 3, -1, -1,
4, -1, -1, -1, 4, -1, -1, -1,
5, -1, -1, -1, 5, -1, -1, -1,
6,
0 };
6 };


assert(set_text( assert(set_text(
"\xE2\x86\x94" // [2194] left right arrow "\xE2\x86\x94" // [2194] left right arrow
"\xF0\x9F\x90\x8B" // [1F40B] whale "\xF0\x9F\x90\x8B" // [1F40B] whale
"\xF0\x9F\x90\xAC" // [1F42C] dolphin "\xF0\x9F\x90\xAC" // [1F42C] dolphin
" ")); " "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 2);
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_text_presentation_sequence()
{
printf("testing Emoji ... UTS-51 ED-8a. text presentation sequence\n");

short retix[] = {
0, 2, -1, -1,
3, 4, -1, -1,
5, -1, -1, 6, -1, -1,
7, -1, -1, -1, 8, -1, -1,
9 };

assert(set_text(
"#\xEF\xB8\x8E" // [0023 FE0E] number sign (text style)
"4\xEF\xB8\x8E" // [0034 FE0E] digit four (text style)
"\xE2\x80\xBC\xEF\xB8\x8E" // [203C FE0E] double exclamation mark (text style)
"\xF0\x9F\x97\x92\xEF\xB8\x8E", // [1F5D2 FE0E] spiral note pad (text style)
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"#\xEF\xB8\x8E" // [0023 FE0E] number sign (text style)
"4\xEF\xB8\x8E" // [0034 FE0E] digit four (text style)
"\xE2\x80\xBC\xEF\xB8\x8E" // [203C FE0E] double exclamation mark (text style)
"\xF0\x9F\x97\x92\xEF\xB8\x8E" // [1F5D2 FE0E] spiral note pad (text style)
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_presentation_sequence()
{
printf("testing Emoji ... UTS-51 ED-9a. emoji presentation sequence\n");

short retix[] = {
0, 2, -1, -1,
3, 4, -1, -1,
5, -1, -1, 6, -1, -1,
7, -1, -1, -1, 8, -1, -1,
9 };

assert(set_text(
"#\xEF\xB8\x8F" // [0023 FE0F] number sign (emoji style)
"4\xEF\xB8\x8F" // [0034 FE0F] digit four (emoji style)
"\xE2\x80\xBC\xEF\xB8\x8F" // [203C FE0F] double exclamation mark (emoji style)
"\xF0\x9F\x97\x92\xEF\xB8\x8F", // [1F5D2 FE0F] spiral note pad (emoji style)
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"#\xEF\xB8\x8F" // [0023 FE0F] number sign (emoji style)
"4\xEF\xB8\x8F" // [0034 FE0F] digit four (emoji style)
"\xE2\x80\xBC\xEF\xB8\x8F" // [203C FE0F] double exclamation mark (emoji style)
"\xF0\x9F\x97\x92\xEF\xB8\x8F" // [1F5D2 FE0F] spiral note pad (emoji style)
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_modifier_sequence()
{
printf("testing Emoji ... UTS-51 ED-13. emoji modifier sequence\n");

short retix[] = {
0, -1, -1, 2, -1, -1, -1,
3, -1, -1, -1, 4, -1, -1, -1,
5, -1, -1, -1, 6, -1, -1, -1,
7 };

assert(set_text(
"\xE2\x98\x9D\xF0\x9F\x8F\xBB" // [261D 1F3FB] index pointing up; light skin tone
"\xF0\x9F\x91\xB0\xF0\x9F\x8F\xBD" // [1F5D2 1F3FD] bride with veil; medium skin tone
"\xF0\x9F\x92\xAA\xF0\x9F\x8F\xBF", // [1F4AA 1F3FF] flexed biceps; dark skin tone
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"\xE2\x98\x9D\xF0\x9F\x8F\xBB" // [261D 1F3FB] index pointing up; light skin tone
"\xF0\x9F\x91\xB0\xF0\x9F\x8F\xBD" // [1F5D2 1F3FD] bride with veil; medium skin tone
"\xF0\x9F\x92\xAA\xF0\x9F\x8F\xBF" // [1F4AA 1F3FF] flexed biceps; dark skin tone
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_flag_sequence()
{
printf("testing Emoji ... UTS-51 ED-14. emoji flag sequence\n");

short retix[] = {
0, -1, -1, -1, 2, -1, -1, -1,
3, -1, -1, -1, 4, -1, -1, -1,
5, -1, -1, -1, 6, -1, -1, -1,
7, -1, -1, -1, 8, -1, -1, -1,
9 };

assert(set_text(
"\xF0\x9F\x87\xA6\xF0\x9F\x87\xB7" // [1F1E6 1F1F7] AR (argentina)
"\xF0\x9F\x87\xA7\xF0\x9F\x87\xAC" // [1F1E7 1F1EC] BG (bulgaria)
"\xF0\x9F\x87\xAC\xF0\x9F\x87\xA8" // [1F1EC 1F1E8] GC -- unknown country flag
"\xF0\x9F\x87\xAC\xF0\x9F\x87\xB1", // [1F1EC 1F1F1] GL (greenland)
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"\xF0\x9F\x87\xA6\xF0\x9F\x87\xB7" // [1F1E6 1F1F7] AR (argentina)
"\xF0\x9F\x87\xA7\xF0\x9F\x87\xAC" // [1F1E7 1F1EC] BG (bulgaria)
"\xF0\x9F\x87\xAC\xF0\x9F\x87\xA8" // [1F1EC 1F1E8] GC -- unknown country flag
"\xF0\x9F\x87\xAC\xF0\x9F\x87\xB1" // [1F1EC 1F1F1] GL (greenland)
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_tag_sequence_emoji_character()
{
printf("testing Emoji ... UTS-51 ED-14a. emoji tag sequence (emoji character)\n");

short retix[] = {
0, -1, -1, -1, // emoji character
2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, // tag spec
7, -1, -1, -1, // tag term
8, -1, -1, -1, // emoji character
9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1, -1, 12, -1, -1, -1, 13, -1, -1, -1, // tag spec
14, -1, -1, -1, // tag term
15, -1, -1, -1, // emoji character
16, -1, -1, -1, 17, -1, -1, -1, 18, -1, -1, -1, 19, -1, -1, -1, // tag spec
20, -1, -1, -1, // tag term
21 };

assert(set_text(
// tag_base = emoji_character (RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xA7" // [E0067] tag : g
"\xF3\xA0\x81\xA2" // [E0062] tag : b
"\xF3\xA0\x81\xA5" // [E0065] tag : e
"\xF3\xA0\x81\xAE" // [E006E] tag : n
"\xF3\xA0\x81\xA7" // [E006E] tag : g
"\xF3\xA0\x81\xBF" // [E007F] tag : (cancel)
// tag_base = emoji_character (RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xA7" // [E0067] tag : g
"\xF3\xA0\x81\xA2" // [E0062] tag : b
"\xF3\xA0\x81\xB3" // [E0065] tag : s
"\xF3\xA0\x81\xA3" // [E006E] tag : c
"\xF3\xA0\x81\xB4" // [E006E] tag : t
"\xF3\xA0\x81\xBF" // [E007F] tag : (cancel)
// tag_base = emoji_character (non-RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xB5" // [E0067] tag : u
"\xF3\xA0\x81\xB3" // [E0062] tag : s
"\xF3\xA0\x81\xA3" // [E0065] tag : c
"\xF3\xA0\x81\xA1" // [E006E] tag : a
"\xF3\xA0\x81\xBF", // [E007F] tag : (cancel)
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
// tag_base = emoji_character (RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xA7" // [E0067] tag : g
"\xF3\xA0\x81\xA2" // [E0062] tag : b
"\xF3\xA0\x81\xA5" // [E0065] tag : e
"\xF3\xA0\x81\xAE" // [E006E] tag : n
"\xF3\xA0\x81\xA7" // [E006E] tag : g
"\xF3\xA0\x81\xBF" // [E007F] tag : (cancel)
// tag_base = emoji_character (RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xA7" // [E0067] tag : g
"\xF3\xA0\x81\xA2" // [E0062] tag : b
"\xF3\xA0\x81\xB3" // [E0065] tag : s
"\xF3\xA0\x81\xA3" // [E006E] tag : c
"\xF3\xA0\x81\xB4" // [E006E] tag : t
"\xF3\xA0\x81\xBF" // [E007F] tag : (cancel)
// tag_base = emoji_character (non-RGI sequence)
"\xF0\x9F\x8F\xB4" // [1F3F4] flag
"\xF3\xA0\x81\xB5" // [E0067] tag : u
"\xF3\xA0\x81\xB3" // [E0062] tag : s
"\xF3\xA0\x81\xA3" // [E0065] tag : c
"\xF3\xA0\x81\xA1" // [E006E] tag : a
"\xF3\xA0\x81\xBF" // [E007F] tag : (cancel)
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_combining_sequence()
{
printf("testing Emoji ... UTS-51 ED-14b. emoji combining sequence\n");

short retix[] = {
0, -1, -1, 2, -1, -1, // emoji character
3, -1, -1, 4, -1, -1, 5, -1, -1, // text presentation sequence
6, -1, -1, 7, -1, -1, 8, -1, -1, // emoji presentation sequence
9 };

assert(set_text(
"\xE2\x86\x95\xE2\x83\x9E" // [2195 20DE] up down arrow; Me (enclosing square)
"\xE2\x86\x95\xEF\xB8\x8E\xE2\x83\x9E" // [2195 FE0E 20DE] up down arrow; Me (enclosing square)
"\xE2\x86\x95\xEF\xB8\x8F\xE2\x83\x9E", // [2195 FE0F 20DE] up down arrow; Me (enclosing square)
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"\xE2\x86\x95\xE2\x83\x9E" // [2195 20DE] up down arrow; Me (enclosing square)
"\xE2\x86\x95\xEF\xB8\x8E\xE2\x83\x9E" // [2195 FE0E 20DE] up down arrow; Me (enclosing square)
"\xE2\x86\x95\xEF\xB8\x8F\xE2\x83\x9E" // [2195 FE0F 20DE] up down arrow; Me (enclosing square)
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0);
assert(voice_change_name[0] == 0);
}

void
test_uts51_emoji_keycap_sequence()
{
printf("testing Emoji ... UTS-51 ED-14c. emoji keycap sequence\n");

short retix[] = {
0, 2, -1, -1, 3, -1, -1,
4, 5, -1, -1, 6, -1, -1,
7, 8, -1, -1, 9, -1, -1,
10 };

assert(set_text(
"5\xEF\xB8\x8E\xE2\x83\xA3" // [0035 FE0E 20E3] keycap 5
"#\xEF\xB8\x8E\xE2\x83\xA3" // [0023 FE0E 20E3] keycap #
"*\xEF\xB8\x8E\xE2\x83\xA3", // [002A FE0E 20E3] keycap *
"en") == ENS_OK);

charix_top = 0;
assert(ReadClause(translator, source, charix, &charix_top, N_TR_SOURCE, &tone2, voice_change_name) == CLAUSE_EOF);
assert(!strcmp(source,
"5\xEF\xB8\x8E\xE2\x83\xA3" // [0035 FE0E 20E3] keycap 5
"#\xEF\xB8\x8E\xE2\x83\xA3" // [0023 FE0E 20E3] keycap #
"*\xEF\xB8\x8E\xE2\x83\xA3" // [002A FE0E 20E3] keycap *
" "));
assert(charix_top == (sizeof(retix)/sizeof(retix[0])) - 1);
assert(!memcmp(charix, retix, sizeof(retix))); assert(!memcmp(charix, retix, sizeof(retix)));
assert(tone2 == 0); assert(tone2 == 0);
assert(voice_change_name[0] == 0); assert(voice_change_name[0] == 0);
test_fullwidth(); test_fullwidth();


test_uts51_emoji_character(); test_uts51_emoji_character();
test_uts51_text_presentation_sequence();
test_uts51_emoji_presentation_sequence();
test_uts51_emoji_modifier_sequence();
test_uts51_emoji_flag_sequence();
test_uts51_emoji_tag_sequence_emoji_character();
test_uts51_emoji_combining_sequence();
test_uts51_emoji_keycap_sequence();


assert(espeak_Terminate() == EE_OK); assert(espeak_Terminate() == EE_OK);



Loading…
Cancel
Save