| ucd-update: tools/case.py tools/categories.py tools/scripts.py | ucd-update: tools/case.py tools/categories.py tools/scripts.py | ||||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | ||||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | |||||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.c | |||||
| tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | ||||
| libucd_includedir = $(includedir)/ucd | libucd_includedir = $(includedir)/ucd | ||||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | ||||
| src_libucd_la_SOURCES = \ | src_libucd_la_SOURCES = \ | ||||
| src/case.c \ | src/case.c \ | ||||
| src/categories.cpp \ | |||||
| src/categories.c \ | |||||
| src/ctype.c \ | src/ctype.c \ | ||||
| src/scripts.cpp \ | src/scripts.cpp \ | ||||
| src/tostring.c | src/tostring.c |
| #include <stddef.h> | #include <stddef.h> | ||||
| using namespace ucd; | |||||
| #define Cc UCD_CATEGORY_Cc | |||||
| #define Cf UCD_CATEGORY_Cf | |||||
| #define Cn UCD_CATEGORY_Cn | |||||
| #define Co UCD_CATEGORY_Co | |||||
| #define Cs UCD_CATEGORY_Cs | |||||
| #define Ll UCD_CATEGORY_Ll | |||||
| #define Lm UCD_CATEGORY_Lm | |||||
| #define Lo UCD_CATEGORY_Lo | |||||
| #define Lt UCD_CATEGORY_Lt | |||||
| #define Lu UCD_CATEGORY_Lu | |||||
| #define Mc UCD_CATEGORY_Mc | |||||
| #define Me UCD_CATEGORY_Me | |||||
| #define Mn UCD_CATEGORY_Mn | |||||
| #define Nd UCD_CATEGORY_Nd | |||||
| #define Nl UCD_CATEGORY_Nl | |||||
| #define No UCD_CATEGORY_No | |||||
| #define Pc UCD_CATEGORY_Pc | |||||
| #define Pd UCD_CATEGORY_Pd | |||||
| #define Pe UCD_CATEGORY_Pe | |||||
| #define Pf UCD_CATEGORY_Pf | |||||
| #define Pi UCD_CATEGORY_Pi | |||||
| #define Po UCD_CATEGORY_Po | |||||
| #define Ps UCD_CATEGORY_Ps | |||||
| #define Sc UCD_CATEGORY_Sc | |||||
| #define Sk UCD_CATEGORY_Sk | |||||
| #define Sm UCD_CATEGORY_Sm | |||||
| #define So UCD_CATEGORY_So | |||||
| #define Zl UCD_CATEGORY_Zl | |||||
| #define Zp UCD_CATEGORY_Zp | |||||
| #define Zs UCD_CATEGORY_Zs | |||||
| #define Ii UCD_CATEGORY_Ii | |||||
| // Unicode Character Data 8.0.0 | // Unicode Character Data 8.0.0 | ||||
| }; | }; | ||||
| ucd_category ucd_lookup_category(codepoint_t c) | ucd_category ucd_lookup_category(codepoint_t c) | ||||
| { | |||||
| return (ucd_category)ucd::lookup_category((ucd::category)c); | |||||
| } | |||||
| ucd::category ucd::lookup_category(codepoint_t c) | |||||
| { | { | ||||
| if (c <= 0x00D7FF) // 000000..00D7FF | if (c <= 0x00D7FF) // 000000..00D7FF | ||||
| { | { | ||||
| const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256]; | const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256]; | ||||
| return (ucd::category)table[c % 256]; | |||||
| return (ucd_category)table[c % 256]; | |||||
| } | } | ||||
| if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates | if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates | ||||
| if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area | if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area | ||||
| if (c <= 0x02FAFF) // 00F800..02FAFF | if (c <= 0x02FAFF) // 00F800..02FAFF | ||||
| { | { | ||||
| const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256]; | const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256]; | ||||
| return (ucd::category)table[c % 256]; | |||||
| return (ucd_category)table[c % 256]; | |||||
| } | } | ||||
| if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned | if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned | ||||
| if (c <= 0x0E01FF) // 0E0000..0E01FF | if (c <= 0x0E01FF) // 0E0000..0E01FF | ||||
| { | { | ||||
| const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256]; | const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256]; | ||||
| return (ucd::category)table[c % 256]; | |||||
| return (ucd_category)table[c % 256]; | |||||
| } | } | ||||
| if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned | if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned | ||||
| if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use | if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use | ||||
| } | } | ||||
| ucd_category_group ucd_get_category_group_for_category(ucd_category c) | ucd_category_group ucd_get_category_group_for_category(ucd_category c) | ||||
| { | |||||
| return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||||
| } | |||||
| ucd::category_group ucd::lookup_category_group(category c) | |||||
| { | { | ||||
| switch (c) | switch (c) | ||||
| { | { | ||||
| case Cc: case Cf: case Cn: case Co: case Cs: | case Cc: case Cf: case Cn: case Co: case Cs: | ||||
| return C; | |||||
| return UCD_CATEGORY_GROUP_C; | |||||
| case Ll: case Lm: case Lo: case Lt: case Lu: | case Ll: case Lm: case Lo: case Lt: case Lu: | ||||
| return L; | |||||
| return UCD_CATEGORY_GROUP_L; | |||||
| case Mc: case Me: case Mn: | case Mc: case Me: case Mn: | ||||
| return M; | |||||
| return UCD_CATEGORY_GROUP_M; | |||||
| case Nd: case Nl: case No: | case Nd: case Nl: case No: | ||||
| return N; | |||||
| return UCD_CATEGORY_GROUP_N; | |||||
| case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | ||||
| return P; | |||||
| return UCD_CATEGORY_GROUP_P; | |||||
| case Sc: case Sk: case Sm: case So: | case Sc: case Sk: case Sm: case So: | ||||
| return S; | |||||
| return UCD_CATEGORY_GROUP_S; | |||||
| case Zl: case Zp: case Zs: | case Zl: case Zp: case Zs: | ||||
| return Z; | |||||
| return UCD_CATEGORY_GROUP_Z; | |||||
| case Ii: | case Ii: | ||||
| return I; | |||||
| return UCD_CATEGORY_GROUP_I; | |||||
| } | } | ||||
| } | } | ||||
| ucd_category_group ucd_lookup_category_group(codepoint_t c) | ucd_category_group ucd_lookup_category_group(codepoint_t c) | ||||
| { | { | ||||
| return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||||
| } | |||||
| ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||||
| { | |||||
| return lookup_category_group(lookup_category(c)); | |||||
| return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||||
| } | } |
| * @param c The General Category to lookup. | * @param c The General Category to lookup. | ||||
| * @return The General Category Group of the General Category. | * @return The General Category Group of the General Category. | ||||
| */ | */ | ||||
| category_group lookup_category_group(category c); | |||||
| inline category_group lookup_category_group(category c) | |||||
| { | |||||
| return (category_group)ucd_get_category_group_for_category((ucd_category)c); | |||||
| } | |||||
| /** @brief Lookup the General Category Group for a Unicode codepoint. | /** @brief Lookup the General Category Group for a Unicode codepoint. | ||||
| * | * | ||||
| * @param c The Unicode codepoint to lookup. | * @param c The Unicode codepoint to lookup. | ||||
| * @return The General Category Group of the Unicode codepoint. | * @return The General Category Group of the Unicode codepoint. | ||||
| */ | */ | ||||
| category_group lookup_category_group(codepoint_t c); | |||||
| inline category_group lookup_category_group(codepoint_t c) | |||||
| { | |||||
| return (category_group)ucd_lookup_category_group(c); | |||||
| } | |||||
| /** @brief Lookup the General Category for a Unicode codepoint. | /** @brief Lookup the General Category for a Unicode codepoint. | ||||
| * | * | ||||
| * @param c The Unicode codepoint to lookup. | * @param c The Unicode codepoint to lookup. | ||||
| * @return The General Category of the Unicode codepoint. | * @return The General Category of the Unicode codepoint. | ||||
| */ | */ | ||||
| category lookup_category(codepoint_t c); | |||||
| inline category lookup_category(codepoint_t c) | |||||
| { | |||||
| return (category)ucd_lookup_category(c); | |||||
| } | |||||
| /** @brief Unicode Script | /** @brief Unicode Script | ||||
| * @see http://www.iana.org/assignments/language-subtag-registry | * @see http://www.iana.org/assignments/language-subtag-registry |
| #include <stddef.h> | #include <stddef.h> | ||||
| using namespace ucd; | |||||
| #define Cc UCD_CATEGORY_Cc | |||||
| #define Cf UCD_CATEGORY_Cf | |||||
| #define Cn UCD_CATEGORY_Cn | |||||
| #define Co UCD_CATEGORY_Co | |||||
| #define Cs UCD_CATEGORY_Cs | |||||
| #define Ll UCD_CATEGORY_Ll | |||||
| #define Lm UCD_CATEGORY_Lm | |||||
| #define Lo UCD_CATEGORY_Lo | |||||
| #define Lt UCD_CATEGORY_Lt | |||||
| #define Lu UCD_CATEGORY_Lu | |||||
| #define Mc UCD_CATEGORY_Mc | |||||
| #define Me UCD_CATEGORY_Me | |||||
| #define Mn UCD_CATEGORY_Mn | |||||
| #define Nd UCD_CATEGORY_Nd | |||||
| #define Nl UCD_CATEGORY_Nl | |||||
| #define No UCD_CATEGORY_No | |||||
| #define Pc UCD_CATEGORY_Pc | |||||
| #define Pd UCD_CATEGORY_Pd | |||||
| #define Pe UCD_CATEGORY_Pe | |||||
| #define Pf UCD_CATEGORY_Pf | |||||
| #define Pi UCD_CATEGORY_Pi | |||||
| #define Po UCD_CATEGORY_Po | |||||
| #define Ps UCD_CATEGORY_Ps | |||||
| #define Sc UCD_CATEGORY_Sc | |||||
| #define Sk UCD_CATEGORY_Sk | |||||
| #define Sm UCD_CATEGORY_Sm | |||||
| #define So UCD_CATEGORY_So | |||||
| #define Zl UCD_CATEGORY_Zl | |||||
| #define Zp UCD_CATEGORY_Zp | |||||
| #define Zs UCD_CATEGORY_Zs | |||||
| #define Ii UCD_CATEGORY_Ii | |||||
| // Unicode Character Data %s | // Unicode Character Data %s | ||||
| """ % ucd_version) | """ % ucd_version) | ||||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | sys.stdout.write('\tcategories_%s,\n' % codepoint) | ||||
| sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
| sys.stdout.write(""" | |||||
| ucd_category ucd_lookup_category(codepoint_t c) | |||||
| { | |||||
| return (ucd_category)ucd::lookup_category((ucd::category)c); | |||||
| } | |||||
| """) | |||||
| sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
| sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||||
| sys.stdout.write('ucd_category ucd_lookup_category(codepoint_t c)\n') | |||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoints, category, comment in category_sets: | for codepoints, category, comment in category_sets: | ||||
| if category: | if category: | ||||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | ||||
| sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
| sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
| sys.stdout.write('\t\treturn (ucd::category)table[c % 256];\n') | |||||
| sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||||
| sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
| sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | ||||
| sys.stdout.write('}\n') | sys.stdout.write('}\n') | ||||
| sys.stdout.write(""" | sys.stdout.write(""" | ||||
| ucd_category_group ucd_get_category_group_for_category(ucd_category c) | ucd_category_group ucd_get_category_group_for_category(ucd_category c) | ||||
| { | |||||
| return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||||
| } | |||||
| ucd::category_group ucd::lookup_category_group(category c) | |||||
| { | { | ||||
| switch (c) | switch (c) | ||||
| { | { | ||||
| case Cc: case Cf: case Cn: case Co: case Cs: | case Cc: case Cf: case Cn: case Co: case Cs: | ||||
| return C; | |||||
| return UCD_CATEGORY_GROUP_C; | |||||
| case Ll: case Lm: case Lo: case Lt: case Lu: | case Ll: case Lm: case Lo: case Lt: case Lu: | ||||
| return L; | |||||
| return UCD_CATEGORY_GROUP_L; | |||||
| case Mc: case Me: case Mn: | case Mc: case Me: case Mn: | ||||
| return M; | |||||
| return UCD_CATEGORY_GROUP_M; | |||||
| case Nd: case Nl: case No: | case Nd: case Nl: case No: | ||||
| return N; | |||||
| return UCD_CATEGORY_GROUP_N; | |||||
| case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | ||||
| return P; | |||||
| return UCD_CATEGORY_GROUP_P; | |||||
| case Sc: case Sk: case Sm: case So: | case Sc: case Sk: case Sm: case So: | ||||
| return S; | |||||
| return UCD_CATEGORY_GROUP_S; | |||||
| case Zl: case Zp: case Zs: | case Zl: case Zp: case Zs: | ||||
| return Z; | |||||
| return UCD_CATEGORY_GROUP_Z; | |||||
| case Ii: | case Ii: | ||||
| return I; | |||||
| return UCD_CATEGORY_GROUP_I; | |||||
| } | } | ||||
| } | } | ||||
| ucd_category_group ucd_lookup_category_group(codepoint_t c) | ucd_category_group ucd_lookup_category_group(codepoint_t c) | ||||
| { | { | ||||
| return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||||
| } | |||||
| ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||||
| { | |||||
| return lookup_category_group(lookup_category(c)); | |||||
| return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||||
| } | } | ||||
| """) | """) |