Browse Source

Merge commit 'f26aca0aa1b9485634314d0d1710d2a0bb67776d'

master
Reece H. Dunn 8 years ago
parent
commit
8566f2ee35

+ 1
- 0
src/ucd-tools/.gitignore View File

.*.swp .*.swp
*~


# intermediate files: # intermediate files:



+ 5
- 2
src/ucd-tools/CHANGELOG.md View File

* `data/espeak-ng` data files for eSpeak NG extended data. * `data/espeak-ng` data files for eSpeak NG extended data.
* espeak-ng PropList property lookup as part of the `ucd_property` API. * espeak-ng PropList property lookup as part of the `ucd_property` API.


## 9.0.0.1 - (In Progress)
## 10.0.0 - 2017-06-25


* Add `iswblank` and `iswxdigit` compatibility. * Add `iswblank` and `iswxdigit` compatibility.
* Improve ctype compatibility. * Improve ctype compatibility.
* PropList property lookup.
* PropList and emoji-data property lookup.
* Support building with a C89 compiler.
* Update to Unicode Character Data 10.0.0.
* Unicode Emoji 5.0.


## 9.0.0 - 2016-12-28 ## 9.0.0 - 2016-12-28



+ 7
- 7
src/ucd-tools/Makefile.am View File



############################# Unicode Data #################################### ############################# Unicode Data ####################################


EMOJI_VERSION=4.0
EMOJI_VERSION=5.0
UCD_VERSION=@UCD_VERSION@ UCD_VERSION=@UCD_VERSION@
UCD_ROOTDIR=data/ucd UCD_ROOTDIR=data/ucd
UCD_SRCDIR=http://www.unicode.org/Public UCD_SRCDIR=http://www.unicode.org/Public


data/emoji/emoji-data.txt: data/emoji/emoji-data.txt:
mkdir -pv data/emoji mkdir -pv data/emoji
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@


data/ucd/PropList.txt: data/ucd/PropList.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@


data/ucd/DerivedCoreProperties.txt: data/ucd/DerivedCoreProperties.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@


data/ucd/PropertyValueAliases.txt: data/ucd/PropertyValueAliases.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@


data/ucd/Scripts.txt: data/ucd/Scripts.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@


data/ucd/UnicodeData.txt: data/ucd/UnicodeData.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@


############################# documentation ################################### ############################# documentation ###################################



+ 1
- 0
src/ucd-tools/configure.ac View File

dnl ================================================================ dnl ================================================================


AC_CHECK_HEADERS([stddef.h]) dnl C89 AC_CHECK_HEADERS([stddef.h]) dnl C89
AC_CHECK_FUNCS([iswblank]) dnl C99


AC_TYPE_UINT8_T AC_TYPE_UINT8_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T

+ 4
- 3
src/ucd-tools/src/case.c View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data 9.0.0
/* Unicode Character Data 9.0.0 */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 740
- 679
src/ucd-tools/src/categories.c
File diff suppressed because it is too large
View File


+ 20
- 20
src/ucd-tools/src/ctype.c View File

switch (ucd_lookup_category(c)) switch (ucd_lookup_category(c))
{ {
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
return c == 0x09; // U+0009 : CHARACTER TABULATION
return c == 0x09; /* U+0009 : CHARACTER TABULATION */
default: default:
return 0; return 0;
} }


int ucd_isdigit(codepoint_t c) int ucd_isdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39); // [0-9]
return (c >= 0x30 && c <= 0x39); /* [0-9] */
} }


int ucd_isgraph(codepoint_t c) int ucd_isgraph(codepoint_t c)
case UCD_CATEGORY_Zp: case UCD_CATEGORY_Zp:
return 1; return 1;
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
switch (c) // Include control characters marked as White_Space
switch (c) /* Include control characters marked as White_Space */
{ {
case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED
case 0x0B: // U+000B : LINE TABULATION
case 0x0C: // U+000C : FORM FEED
case 0x0D: // U+000D : CARRIAGE RETURN
case 0x85: // U+0085 : NEXT LINE
case 0x09: /* U+0009 : CHARACTER TABULATION */
case 0x0A: /* U+000A : LINE FEED */
case 0x0B: /* U+000B : LINE TABULATION */
case 0x0C: /* U+000C : FORM FEED */
case 0x0D: /* U+000D : CARRIAGE RETURN */
case 0x85: /* U+0085 : NEXT LINE */
return 1; return 1;
} }
default: default:


int ucd_isxdigit(codepoint_t c) int ucd_isxdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39) // [0-9]
|| (c >= 0x41 && c <= 0x46) // [A-Z]
|| (c >= 0x61 && c <= 0x66); // [a-z]
return (c >= 0x30 && c <= 0x39) /* [0-9] */
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */
|| (c >= 0x61 && c <= 0x66); /* [a-z] */
} }

+ 10
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
UCD_SCRIPT_Goth, /**< @brief Gothic Script */ UCD_SCRIPT_Goth, /**< @brief Gothic Script */
UCD_SCRIPT_Gran, /**< @brief Grantha Script */ UCD_SCRIPT_Gran, /**< @brief Grantha Script */
UCD_SCRIPT_Grek, /**< @brief Greek Script */ UCD_SCRIPT_Grek, /**< @brief Greek Script */
UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
UCD_SCRIPT_Soyo, /**< @brief Soyombo */
UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
UCD_SCRIPT_Yiii, /**< @brief Yi Script */ UCD_SCRIPT_Yiii, /**< @brief Yi Script */
UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
UCD_SCRIPT_Zsym, /**< @brief Symbols */ UCD_SCRIPT_Zsym, /**< @brief Symbols */
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */
#define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */
#define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */
Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */
Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */
Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */
Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */
Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */
Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */
Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */
Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */
}; };


/** @brief Return the properties of the specified codepoint. /** @brief Return the properties of the specified codepoint.

+ 102
- 62
src/ucd-tools/src/proplist.c View File

case 0x2000: case 0x2000:
if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
break; break;
case 0x2300:
if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2400: case 0x2400:
if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2E00: case 0x2E00:
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x0400: case 0x0400:
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED;
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x2100: case 0x2100:
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2139) return UCD_PROPERTY_EMOJI; if (c == 0x2139) return UCD_PROPERTY_EMOJI;
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0xFF00: case 0xFF00:
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH;
break; break;
case 0x01D700: case 0x01D700:
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
break; break;
case 0x016F00: case 0x016F00:
if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC;
if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER;
if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER;
break; break;
} }
return 0; return 0;
{ {
case 0x000000: case 0x000000:
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
case 0x010000: case 0x010000:
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
case 0x020000: case 0x020000:
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
} }
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT;
case 0x2100: case 0x2100:
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D700: case 0x01D700:
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x3000: case 0x3000:
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x011A00:
if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011C00: case 0x011C00:
if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x0B00: case 0x0B00:
if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x0D00: case 0x0D00:
if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC;
case 0x1D00: case 0x1D00:
if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x2000: case 0x2000:
if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x011A00:
if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A98) return UCD_PROPERTY_EXTENDER;
if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC;
break;
case 0x011C00: case 0x011C00:
if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x011D00:
if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x016A00: case 0x016A00:
if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC;
break; break;
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0000: case 0x0000:
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI;
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT;
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT;
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
case 0x2E00: case 0x2E00:
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000: case 0x3000:
case 0x0000: case 0x0000:
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
case 0x11700: case 0x11700:
if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break; break;
case 0x11A00:
if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x11C00: case 0x11C00:
if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x01F300: case 0x01F300:
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER;
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT;
} }
return 0; return 0;
} }
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2400: case 0x2400:
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT;
break; break;
case 0x01F200: case 0x01F200:
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI;
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break; break;
case 0x01F900: case 0x01F900:
if (c <= 0x01F90B) return 0;
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F93B) return 0; if (c == 0x01F93B) return 0;
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F946) return 0; if (c == 0x01F946) return 0;
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_EMOJI; return UCD_PROPERTY_EMOJI;
} }
return 0; return 0;
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
case UCD_CATEGORY_Zs: return properties_Zs(c); case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me
default: return 0; /* Co Cs Ii Lt Me */
}; };
} }

+ 765
- 701
src/ucd-tools/src/scripts.c
File diff suppressed because it is too large
View File


+ 3
- 0
src/ucd-tools/src/tostring.c View File

"Geok", "Geok",
"Geor", "Geor",
"Glag", "Glag",
"Gonm",
"Goth", "Goth",
"Gran", "Gran",
"Grek", "Grek",
"Sind", "Sind",
"Sinh", "Sinh",
"Sora", "Sora",
"Soyo",
"Sund", "Sund",
"Sylo", "Sylo",
"Syrc", "Syrc",
"Xpeo", "Xpeo",
"Xsux", "Xsux",
"Yiii", "Yiii",
"Zanb",
"Zinh", "Zinh",
"Zmth", "Zmth",
"Zsym", "Zsym",

+ 36
- 26
src/ucd-tools/tests/printcdata.c View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


#include "config.h"
#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <locale.h> #include <locale.h>
#include <wchar.h> #include <wchar.h>
#include <wctype.h> #include <wctype.h>


#ifndef HAVE_ISWBLANK
static int iswblank(wint_t c)
{
return iswspace(c) && !(c >= 0x0A && c <= 0x0D);
}
#endif

void fput_utf8c(FILE *out, codepoint_t c) void fput_utf8c(FILE *out, codepoint_t c)
{ {
if (c < 0x80) if (c < 0x80)
{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(iswalnum(c) ? '1' : '0', out); fputc(iswalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(iswalpha(c) ? '1' : '0', out); fputc(iswalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(iswblank(c) ? '1' : '0', out); fputc(iswblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(iswcntrl(c) ? '1' : '0', out); fputc(iswcntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(iswdigit(c) ? '1' : '0', out); fputc(iswdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(iswgraph(c) ? '1' : '0', out); fputc(iswgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(iswlower(c) ? '1' : '0', out); fputc(iswlower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(iswprint(c) ? '1' : '0', out); fputc(iswprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(iswpunct(c) ? '1' : '0', out); fputc(iswpunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(iswspace(c) ? '1' : '0', out); fputc(iswspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(iswupper(c) ? '1' : '0', out); fputc(iswupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(iswxdigit(c) ? '1' : '0', out); fputc(iswxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, towlower(c), *++format); uprintf_codepoint(out, towlower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, towupper(c), *++format); uprintf_codepoint(out, towupper(c), *++format);
break; break;
} }
{ {
FILE *in = NULL; FILE *in = NULL;
const char *format = NULL; const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{ {
const char *arg = argv[argn]; const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
} }
else else
{ {
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
} }

+ 28
- 26
src/ucd-tools/tests/printucddata.c View File

{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(ucd_isalnum(c) ? '1' : '0', out); fputc(ucd_isalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(ucd_isalpha(c) ? '1' : '0', out); fputc(ucd_isalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(ucd_isblank(c) ? '1' : '0', out); fputc(ucd_isblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(ucd_iscntrl(c) ? '1' : '0', out); fputc(ucd_iscntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(ucd_isdigit(c) ? '1' : '0', out); fputc(ucd_isdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(ucd_isgraph(c) ? '1' : '0', out); fputc(ucd_isgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(ucd_islower(c) ? '1' : '0', out); fputc(ucd_islower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(ucd_isprint(c) ? '1' : '0', out); fputc(ucd_isprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(ucd_ispunct(c) ? '1' : '0', out); fputc(ucd_ispunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(ucd_isspace(c) ? '1' : '0', out); fputc(ucd_isspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(ucd_isupper(c) ? '1' : '0', out); fputc(ucd_isupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(ucd_isxdigit(c) ? '1' : '0', out); fputc(ucd_isxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, ucd_tolower(c), *++format); uprintf_codepoint(out, ucd_tolower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, ucd_toupper(c), *++format); uprintf_codepoint(out, ucd_toupper(c), *++format);
break; break;
} }
{ {
FILE *in = NULL; FILE *in = NULL;
const char *format = NULL; const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{ {
const char *arg = argv[argn]; const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
} }
else else
{ {
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
} }

+ 4
- 3
src/ucd-tools/tools/case.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data %s
/* Unicode Character Data %s */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 8
- 7
src/ucd-tools/tools/categories.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Zs UCD_CATEGORY_Zs #define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii #define Ii UCD_CATEGORY_Ii


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for category in special_categories: for category in special_categories:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(category_tables[table_index].items()): for codepoint, table in sorted(category_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tcategories_%s,\n' % codepoint) sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, category, comment in category_sets: for codepoints, category, comment in category_sets:
if category: if category:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')


sys.stdout.write(""" sys.stdout.write("""

+ 2
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data
props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0
props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0)
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) props += (2 ** 53) * data.get('Punctuation_In_Word', 0)

+ 11
- 7
src/ucd-tools/tools/scripts.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the Scripts.txt file in
// the Unicode Character database by the ucd-tools/tools/scripts.py script.
/* NOTE: This file is automatically generated from the Scripts.txt file in
* the Unicode Character database by the ucd-tools/tools/scripts.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Geok UCD_SCRIPT_Geok #define Geok UCD_SCRIPT_Geok
#define Geor UCD_SCRIPT_Geor #define Geor UCD_SCRIPT_Geor
#define Glag UCD_SCRIPT_Glag #define Glag UCD_SCRIPT_Glag
#define Gonm UCD_SCRIPT_Gonm
#define Goth UCD_SCRIPT_Goth #define Goth UCD_SCRIPT_Goth
#define Gran UCD_SCRIPT_Gran #define Gran UCD_SCRIPT_Gran
#define Grek UCD_SCRIPT_Grek #define Grek UCD_SCRIPT_Grek
#define Sind UCD_SCRIPT_Sind #define Sind UCD_SCRIPT_Sind
#define Sinh UCD_SCRIPT_Sinh #define Sinh UCD_SCRIPT_Sinh
#define Sora UCD_SCRIPT_Sora #define Sora UCD_SCRIPT_Sora
#define Soyo UCD_SCRIPT_Soyo
#define Sund UCD_SCRIPT_Sund #define Sund UCD_SCRIPT_Sund
#define Sylo UCD_SCRIPT_Sylo #define Sylo UCD_SCRIPT_Sylo
#define Syrc UCD_SCRIPT_Syrc #define Syrc UCD_SCRIPT_Syrc
#define Xpeo UCD_SCRIPT_Xpeo #define Xpeo UCD_SCRIPT_Xpeo
#define Xsux UCD_SCRIPT_Xsux #define Xsux UCD_SCRIPT_Xsux
#define Yiii UCD_SCRIPT_Yiii #define Yiii UCD_SCRIPT_Yiii
#define Zanb UCD_SCRIPT_Zanb
#define Zinh UCD_SCRIPT_Zinh #define Zinh UCD_SCRIPT_Zinh
#define Zmth UCD_SCRIPT_Zmth #define Zmth UCD_SCRIPT_Zmth
#define Zsym UCD_SCRIPT_Zsym #define Zsym UCD_SCRIPT_Zsym
#define Zyyy UCD_SCRIPT_Zyyy #define Zyyy UCD_SCRIPT_Zyyy
#define Zzzz UCD_SCRIPT_Zzzz #define Zzzz UCD_SCRIPT_Zzzz


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for script in special_scripts: for script in special_scripts:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(script_tables[table_index].items()): for codepoint, table in sorted(script_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tscripts_%s,\n' % codepoint) sys.stdout.write('\tscripts_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, script, comment in script_sets: for codepoints, script, comment in script_sets:
if script: if script:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')

Loading…
Cancel
Save