@@ -5,6 +5,7 @@ | |||
* Add `iswblank` and `iswxdigit` compatibility. | |||
* Improve ctype compatibility. | |||
* PropList and emoji-data property lookup. | |||
* Support building with a C89 compiler. | |||
## 9.0.0 - 2016-12-28 | |||
@@ -18,14 +18,15 @@ | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
#include <stddef.h> | |||
// Unicode Character Data 9.0.0 | |||
/* Unicode Character Data 9.0.0 */ | |||
struct case_conversion_entry | |||
{ |
@@ -69,16 +69,16 @@ int ucd_isblank(codepoint_t c) | |||
switch (ucd_lookup_category(c)) | |||
{ | |||
case UCD_CATEGORY_Zs: | |||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
{ | |||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
case 0x2007: // U+2007 : FIGURE SPACE | |||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
return 0; | |||
} | |||
return 1; | |||
case UCD_CATEGORY_Cc: | |||
return c == 0x09; // U+0009 : CHARACTER TABULATION | |||
return c == 0x09; /* U+0009 : CHARACTER TABULATION */ | |||
default: | |||
return 0; | |||
} | |||
@@ -91,7 +91,7 @@ int ucd_iscntrl(codepoint_t c) | |||
int ucd_isdigit(codepoint_t c) | |||
{ | |||
return (c >= 0x30 && c <= 0x39); // [0-9] | |||
return (c >= 0x30 && c <= 0x39); /* [0-9] */ | |||
} | |||
int ucd_isgraph(codepoint_t c) | |||
@@ -174,23 +174,23 @@ int ucd_isspace(codepoint_t c) | |||
case UCD_CATEGORY_Zp: | |||
return 1; | |||
case UCD_CATEGORY_Zs: | |||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
{ | |||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
case 0x2007: // U+2007 : FIGURE SPACE | |||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
return 0; | |||
} | |||
return 1; | |||
case UCD_CATEGORY_Cc: | |||
switch (c) // Include control characters marked as White_Space | |||
switch (c) /* Include control characters marked as White_Space */ | |||
{ | |||
case 0x09: // U+0009 : CHARACTER TABULATION | |||
case 0x0A: // U+000A : LINE FEED | |||
case 0x0B: // U+000B : LINE TABULATION | |||
case 0x0C: // U+000C : FORM FEED | |||
case 0x0D: // U+000D : CARRIAGE RETURN | |||
case 0x85: // U+0085 : NEXT LINE | |||
case 0x09: /* U+0009 : CHARACTER TABULATION */ | |||
case 0x0A: /* U+000A : LINE FEED */ | |||
case 0x0B: /* U+000B : LINE TABULATION */ | |||
case 0x0C: /* U+000C : FORM FEED */ | |||
case 0x0D: /* U+000D : CARRIAGE RETURN */ | |||
case 0x85: /* U+0085 : NEXT LINE */ | |||
return 1; | |||
} | |||
default: | |||
@@ -217,7 +217,7 @@ int ucd_isupper(codepoint_t c) | |||
int ucd_isxdigit(codepoint_t c) | |||
{ | |||
return (c >= 0x30 && c <= 0x39) // [0-9] | |||
|| (c >= 0x41 && c <= 0x46) // [A-Z] | |||
|| (c >= 0x61 && c <= 0x66); // [a-z] | |||
return (c >= 0x30 && c <= 0x39) /* [0-9] */ | |||
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */ | |||
|| (c >= 0x61 && c <= 0x66); /* [a-z] */ | |||
} |
@@ -142,11 +142,11 @@ static ucd_property properties_Ll(codepoint_t c) | |||
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0x0300: | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x0400: | |||
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | |||
@@ -160,12 +160,12 @@ static ucd_property properties_Ll(codepoint_t c) | |||
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0x2100: | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x2139) return UCD_PROPERTY_EMOJI; | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0xFF00: | |||
@@ -173,45 +173,45 @@ static ucd_property properties_Ll(codepoint_t c) | |||
break; | |||
case 0x01D400: | |||
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D500: | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D600: | |||
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | |||
break; | |||
case 0x01D700: | |||
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
} | |||
return 0; | |||
@@ -427,8 +427,8 @@ static ucd_property properties_Lu(codepoint_t c) | |||
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | |||
break; | |||
case 0x0300: | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0xFF00: | |||
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | |||
@@ -436,49 +436,49 @@ static ucd_property properties_Lu(codepoint_t c) | |||
case 0x2100: | |||
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D400: | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D500: | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D600: | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D700: | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
} | |||
return 0; | |||
@@ -1262,10 +1262,10 @@ static ucd_property properties_Pe(codepoint_t c) | |||
break; | |||
case 0x2700: | |||
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x2900: | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
case 0x2E00: | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x3000: | |||
@@ -1585,7 +1585,7 @@ static ucd_property properties_Ps(codepoint_t c) | |||
break; | |||
case 0x2700: | |||
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x2900: | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
@@ -2048,6 +2048,6 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category) | |||
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | |||
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | |||
case UCD_CATEGORY_Zs: return properties_Zs(c); | |||
default: return 0; // Co Cs Ii Lt Me | |||
default: return 0; /* Co Cs Ii Lt Me */ | |||
}; | |||
} |
@@ -86,7 +86,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'c': // character | |||
case 'c': /* character */ | |||
switch (c) | |||
{ | |||
case '\t': fputs("\\t", out); break; | |||
@@ -95,10 +95,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
default: fput_utf8c(out, c); break; | |||
} | |||
break; | |||
case 'h': // hexadecimal (lower) | |||
case 'h': /* hexadecimal (lower) */ | |||
fprintf(out, "%06x", c); | |||
break; | |||
case 'H': // hexadecimal (upper) | |||
case 'H': /* hexadecimal (upper) */ | |||
fprintf(out, "%06X", c); | |||
break; | |||
} | |||
@@ -108,40 +108,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'A': // alpha-numeric | |||
case 'A': /* alpha-numeric */ | |||
fputc(iswalnum(c) ? '1' : '0', out); | |||
break; | |||
case 'a': // alpha | |||
case 'a': /* alpha */ | |||
fputc(iswalpha(c) ? '1' : '0', out); | |||
break; | |||
case 'b': // blank | |||
case 'b': /* blank */ | |||
fputc(iswblank(c) ? '1' : '0', out); | |||
break; | |||
case 'c': // control | |||
case 'c': /* control */ | |||
fputc(iswcntrl(c) ? '1' : '0', out); | |||
break; | |||
case 'd': // numeric | |||
case 'd': /* numeric */ | |||
fputc(iswdigit(c) ? '1' : '0', out); | |||
break; | |||
case 'g': // glyph | |||
case 'g': /* glyph */ | |||
fputc(iswgraph(c) ? '1' : '0', out); | |||
break; | |||
case 'l': // lower case | |||
case 'l': /* lower case */ | |||
fputc(iswlower(c) ? '1' : '0', out); | |||
break; | |||
case 'P': // printable | |||
case 'P': /* printable */ | |||
fputc(iswprint(c) ? '1' : '0', out); | |||
break; | |||
case 'p': // punctuation | |||
case 'p': /* punctuation */ | |||
fputc(iswpunct(c) ? '1' : '0', out); | |||
break; | |||
case 's': // whitespace | |||
case 's': /* whitespace */ | |||
fputc(iswspace(c) ? '1' : '0', out); | |||
break; | |||
case 'u': // upper case | |||
case 'u': /* upper case */ | |||
fputc(iswupper(c) ? '1' : '0', out); | |||
break; | |||
case 'x': // xdigit | |||
case 'x': /* xdigit */ | |||
fputc(iswxdigit(c) ? '1' : '0', out); | |||
break; | |||
} | |||
@@ -154,31 +154,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
case '%': | |||
switch (*++format) | |||
{ | |||
case 'c': // category | |||
case 'c': /* category */ | |||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
break; | |||
case 'C': // category group | |||
case 'C': /* category group */ | |||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
break; | |||
case 'p': // codepoint | |||
case 'p': /* codepoint */ | |||
uprintf_codepoint(out, c, *++format); | |||
break; | |||
case 'P': // properties | |||
case 'P': /* properties */ | |||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
break; | |||
case 'i': // is* | |||
case 'i': /* is* */ | |||
uprintf_is(out, c, *++format); | |||
break; | |||
case 'L': // lowercase | |||
case 'L': /* lowercase */ | |||
uprintf_codepoint(out, towlower(c), *++format); | |||
break; | |||
case 's': // script | |||
case 's': /* script */ | |||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
break; | |||
case 'T': // titlecase | |||
case 'T': /* titlecase */ | |||
uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
break; | |||
case 'U': // uppercase | |||
case 'U': /* uppercase */ | |||
uprintf_codepoint(out, towupper(c), *++format); | |||
break; | |||
} |
@@ -83,7 +83,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'c': // character | |||
case 'c': /* character */ | |||
switch (c) | |||
{ | |||
case '\t': fputs("\\t", out); break; | |||
@@ -92,10 +92,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
default: fput_utf8c(out, c); break; | |||
} | |||
break; | |||
case 'h': // hexadecimal (lower) | |||
case 'h': /* hexadecimal (lower) */ | |||
fprintf(out, "%06x", c); | |||
break; | |||
case 'H': // hexadecimal (upper) | |||
case 'H': /* hexadecimal (upper) */ | |||
fprintf(out, "%06X", c); | |||
break; | |||
} | |||
@@ -105,40 +105,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'A': // alpha-numeric | |||
case 'A': /* alpha-numeric */ | |||
fputc(ucd_isalnum(c) ? '1' : '0', out); | |||
break; | |||
case 'a': // alpha | |||
case 'a': /* alpha */ | |||
fputc(ucd_isalpha(c) ? '1' : '0', out); | |||
break; | |||
case 'b': // blank | |||
case 'b': /* blank */ | |||
fputc(ucd_isblank(c) ? '1' : '0', out); | |||
break; | |||
case 'c': // control | |||
case 'c': /* control */ | |||
fputc(ucd_iscntrl(c) ? '1' : '0', out); | |||
break; | |||
case 'd': // numeric | |||
case 'd': /* numeric */ | |||
fputc(ucd_isdigit(c) ? '1' : '0', out); | |||
break; | |||
case 'g': // glyph | |||
case 'g': /* glyph */ | |||
fputc(ucd_isgraph(c) ? '1' : '0', out); | |||
break; | |||
case 'l': // lower case | |||
case 'l': /* lower case */ | |||
fputc(ucd_islower(c) ? '1' : '0', out); | |||
break; | |||
case 'P': // printable | |||
case 'P': /* printable */ | |||
fputc(ucd_isprint(c) ? '1' : '0', out); | |||
break; | |||
case 'p': // punctuation | |||
case 'p': /* punctuation */ | |||
fputc(ucd_ispunct(c) ? '1' : '0', out); | |||
break; | |||
case 's': // whitespace | |||
case 's': /* whitespace */ | |||
fputc(ucd_isspace(c) ? '1' : '0', out); | |||
break; | |||
case 'u': // upper case | |||
case 'u': /* upper case */ | |||
fputc(ucd_isupper(c) ? '1' : '0', out); | |||
break; | |||
case 'x': // xdigit | |||
case 'x': /* xdigit */ | |||
fputc(ucd_isxdigit(c) ? '1' : '0', out); | |||
break; | |||
} | |||
@@ -151,31 +151,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
case '%': | |||
switch (*++format) | |||
{ | |||
case 'c': // category | |||
case 'c': /* category */ | |||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
break; | |||
case 'C': // category group | |||
case 'C': /* category group */ | |||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
break; | |||
case 'p': // codepoint | |||
case 'p': /* codepoint */ | |||
uprintf_codepoint(out, c, *++format); | |||
break; | |||
case 'P': // properties | |||
case 'P': /* properties */ | |||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
break; | |||
case 'i': // is* | |||
case 'i': /* is* */ | |||
uprintf_is(out, c, *++format); | |||
break; | |||
case 'L': // lowercase | |||
case 'L': /* lowercase */ | |||
uprintf_codepoint(out, ucd_tolower(c), *++format); | |||
break; | |||
case 's': // script | |||
case 's': /* script */ | |||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
break; | |||
case 'T': // titlecase | |||
case 'T': /* titlecase */ | |||
uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
break; | |||
case 'U': // uppercase | |||
case 'U': /* uppercase */ | |||
uprintf_codepoint(out, ucd_toupper(c), *++format); | |||
break; | |||
} |
@@ -51,14 +51,15 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
#include <stddef.h> | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
struct case_conversion_entry | |||
{ |
@@ -110,8 +110,9 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
@@ -149,7 +150,7 @@ if __name__ == '__main__': | |||
#define Zs UCD_CATEGORY_Zs | |||
#define Ii UCD_CATEGORY_Ii | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
""" % ucd_version) | |||
for category in special_categories: | |||
@@ -187,7 +188,7 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoint, table in sorted(category_tables[table_index].items()): | |||
if isinstance(table, str): | |||
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint)) | |||
else: | |||
sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||
sys.stdout.write('};\n') | |||
@@ -197,14 +198,14 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoints, category, comment in category_sets: | |||
if category: | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment)) | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment)) | |||
else: | |||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\t{\n') | |||
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n') | |||
sys.stdout.write('}\n') | |||
sys.stdout.write(""" |
@@ -104,8 +104,9 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the Scripts.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
/* NOTE: This file is automatically generated from the Scripts.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
@@ -285,7 +286,7 @@ if __name__ == '__main__': | |||
#define Zyyy UCD_SCRIPT_Zyyy | |||
#define Zzzz UCD_SCRIPT_Zzzz | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
""" % ucd_version) | |||
for script in special_scripts: | |||
@@ -323,7 +324,7 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoint, table in sorted(script_tables[table_index].items()): | |||
if isinstance(table, str): | |||
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint)) | |||
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint)) | |||
else: | |||
sys.stdout.write('\tscripts_%s,\n' % codepoint) | |||
sys.stdout.write('};\n') | |||
@@ -333,12 +334,12 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoints, script, comment in script_sets: | |||
if script: | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment)) | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment)) | |||
else: | |||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\t{\n') | |||
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n') | |||
sys.stdout.write('}\n') |