| @@ -94,7 +94,7 @@ tools/scripts.py: tools/ucd.py \ | |||
| ucd-update: tools/case.py tools/categories.py tools/scripts.py | |||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | |||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | |||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.c | |||
| tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | |||
| libucd_includedir = $(includedir)/ucd | |||
| @@ -105,7 +105,7 @@ lib_LTLIBRARIES += src/libucd.la | |||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||
| src_libucd_la_SOURCES = \ | |||
| src/case.c \ | |||
| src/categories.cpp \ | |||
| src/categories.c \ | |||
| src/ctype.c \ | |||
| src/scripts.cpp \ | |||
| src/tostring.c | |||
| @@ -25,7 +25,37 @@ | |||
| #include <stddef.h> | |||
| using namespace ucd; | |||
| #define Cc UCD_CATEGORY_Cc | |||
| #define Cf UCD_CATEGORY_Cf | |||
| #define Cn UCD_CATEGORY_Cn | |||
| #define Co UCD_CATEGORY_Co | |||
| #define Cs UCD_CATEGORY_Cs | |||
| #define Ll UCD_CATEGORY_Ll | |||
| #define Lm UCD_CATEGORY_Lm | |||
| #define Lo UCD_CATEGORY_Lo | |||
| #define Lt UCD_CATEGORY_Lt | |||
| #define Lu UCD_CATEGORY_Lu | |||
| #define Mc UCD_CATEGORY_Mc | |||
| #define Me UCD_CATEGORY_Me | |||
| #define Mn UCD_CATEGORY_Mn | |||
| #define Nd UCD_CATEGORY_Nd | |||
| #define Nl UCD_CATEGORY_Nl | |||
| #define No UCD_CATEGORY_No | |||
| #define Pc UCD_CATEGORY_Pc | |||
| #define Pd UCD_CATEGORY_Pd | |||
| #define Pe UCD_CATEGORY_Pe | |||
| #define Pf UCD_CATEGORY_Pf | |||
| #define Pi UCD_CATEGORY_Pi | |||
| #define Po UCD_CATEGORY_Po | |||
| #define Ps UCD_CATEGORY_Ps | |||
| #define Sc UCD_CATEGORY_Sc | |||
| #define Sk UCD_CATEGORY_Sk | |||
| #define Sm UCD_CATEGORY_Sm | |||
| #define So UCD_CATEGORY_So | |||
| #define Zl UCD_CATEGORY_Zl | |||
| #define Zp UCD_CATEGORY_Zp | |||
| #define Zs UCD_CATEGORY_Zs | |||
| #define Ii UCD_CATEGORY_Ii | |||
| // Unicode Character Data 8.0.0 | |||
| @@ -3315,29 +3345,24 @@ static const uint8_t *categories_0E0000_0E01FF[] = | |||
| }; | |||
| ucd_category ucd_lookup_category(codepoint_t c) | |||
| { | |||
| return (ucd_category)ucd::lookup_category((ucd::category)c); | |||
| } | |||
| ucd::category ucd::lookup_category(codepoint_t c) | |||
| { | |||
| if (c <= 0x00D7FF) // 000000..00D7FF | |||
| { | |||
| const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256]; | |||
| return (ucd::category)table[c % 256]; | |||
| return (ucd_category)table[c % 256]; | |||
| } | |||
| if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates | |||
| if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area | |||
| if (c <= 0x02FAFF) // 00F800..02FAFF | |||
| { | |||
| const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256]; | |||
| return (ucd::category)table[c % 256]; | |||
| return (ucd_category)table[c % 256]; | |||
| } | |||
| if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned | |||
| if (c <= 0x0E01FF) // 0E0000..0E01FF | |||
| { | |||
| const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256]; | |||
| return (ucd::category)table[c % 256]; | |||
| return (ucd_category)table[c % 256]; | |||
| } | |||
| if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned | |||
| if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use | |||
| @@ -3348,39 +3373,29 @@ ucd::category ucd::lookup_category(codepoint_t c) | |||
| } | |||
| ucd_category_group ucd_get_category_group_for_category(ucd_category c) | |||
| { | |||
| return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||
| } | |||
| ucd::category_group ucd::lookup_category_group(category c) | |||
| { | |||
| switch (c) | |||
| { | |||
| case Cc: case Cf: case Cn: case Co: case Cs: | |||
| return C; | |||
| return UCD_CATEGORY_GROUP_C; | |||
| case Ll: case Lm: case Lo: case Lt: case Lu: | |||
| return L; | |||
| return UCD_CATEGORY_GROUP_L; | |||
| case Mc: case Me: case Mn: | |||
| return M; | |||
| return UCD_CATEGORY_GROUP_M; | |||
| case Nd: case Nl: case No: | |||
| return N; | |||
| return UCD_CATEGORY_GROUP_N; | |||
| case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | |||
| return P; | |||
| return UCD_CATEGORY_GROUP_P; | |||
| case Sc: case Sk: case Sm: case So: | |||
| return S; | |||
| return UCD_CATEGORY_GROUP_S; | |||
| case Zl: case Zp: case Zs: | |||
| return Z; | |||
| return UCD_CATEGORY_GROUP_Z; | |||
| case Ii: | |||
| return I; | |||
| return UCD_CATEGORY_GROUP_I; | |||
| } | |||
| } | |||
| ucd_category_group ucd_lookup_category_group(codepoint_t c) | |||
| { | |||
| return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||
| } | |||
| ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
| { | |||
| return lookup_category_group(lookup_category(c)); | |||
| return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||
| } | |||
| @@ -524,21 +524,30 @@ namespace ucd | |||
| * @param c The General Category to lookup. | |||
| * @return The General Category Group of the General Category. | |||
| */ | |||
| category_group lookup_category_group(category c); | |||
| inline category_group lookup_category_group(category c) | |||
| { | |||
| return (category_group)ucd_get_category_group_for_category((ucd_category)c); | |||
| } | |||
| /** @brief Lookup the General Category Group for a Unicode codepoint. | |||
| * | |||
| * @param c The Unicode codepoint to lookup. | |||
| * @return The General Category Group of the Unicode codepoint. | |||
| */ | |||
| category_group lookup_category_group(codepoint_t c); | |||
| inline category_group lookup_category_group(codepoint_t c) | |||
| { | |||
| return (category_group)ucd_lookup_category_group(c); | |||
| } | |||
| /** @brief Lookup the General Category for a Unicode codepoint. | |||
| * | |||
| * @param c The Unicode codepoint to lookup. | |||
| * @return The General Category of the Unicode codepoint. | |||
| */ | |||
| category lookup_category(codepoint_t c); | |||
| inline category lookup_category(codepoint_t c) | |||
| { | |||
| return (category)ucd_lookup_category(c); | |||
| } | |||
| /** @brief Unicode Script | |||
| * @see http://www.iana.org/assignments/language-subtag-registry | |||
| @@ -117,7 +117,37 @@ if __name__ == '__main__': | |||
| #include <stddef.h> | |||
| using namespace ucd; | |||
| #define Cc UCD_CATEGORY_Cc | |||
| #define Cf UCD_CATEGORY_Cf | |||
| #define Cn UCD_CATEGORY_Cn | |||
| #define Co UCD_CATEGORY_Co | |||
| #define Cs UCD_CATEGORY_Cs | |||
| #define Ll UCD_CATEGORY_Ll | |||
| #define Lm UCD_CATEGORY_Lm | |||
| #define Lo UCD_CATEGORY_Lo | |||
| #define Lt UCD_CATEGORY_Lt | |||
| #define Lu UCD_CATEGORY_Lu | |||
| #define Mc UCD_CATEGORY_Mc | |||
| #define Me UCD_CATEGORY_Me | |||
| #define Mn UCD_CATEGORY_Mn | |||
| #define Nd UCD_CATEGORY_Nd | |||
| #define Nl UCD_CATEGORY_Nl | |||
| #define No UCD_CATEGORY_No | |||
| #define Pc UCD_CATEGORY_Pc | |||
| #define Pd UCD_CATEGORY_Pd | |||
| #define Pe UCD_CATEGORY_Pe | |||
| #define Pf UCD_CATEGORY_Pf | |||
| #define Pi UCD_CATEGORY_Pi | |||
| #define Po UCD_CATEGORY_Po | |||
| #define Ps UCD_CATEGORY_Ps | |||
| #define Sc UCD_CATEGORY_Sc | |||
| #define Sk UCD_CATEGORY_Sk | |||
| #define Sm UCD_CATEGORY_Sm | |||
| #define So UCD_CATEGORY_So | |||
| #define Zl UCD_CATEGORY_Zl | |||
| #define Zp UCD_CATEGORY_Zp | |||
| #define Zs UCD_CATEGORY_Zs | |||
| #define Ii UCD_CATEGORY_Ii | |||
| // Unicode Character Data %s | |||
| """ % ucd_version) | |||
| @@ -162,14 +192,8 @@ using namespace ucd; | |||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||
| sys.stdout.write('};\n') | |||
| sys.stdout.write(""" | |||
| ucd_category ucd_lookup_category(codepoint_t c) | |||
| { | |||
| return (ucd_category)ucd::lookup_category((ucd::category)c); | |||
| } | |||
| """) | |||
| sys.stdout.write('\n') | |||
| sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||
| sys.stdout.write('ucd_category ucd_lookup_category(codepoint_t c)\n') | |||
| sys.stdout.write('{\n') | |||
| for codepoints, category, comment in category_sets: | |||
| if category: | |||
| @@ -178,47 +202,37 @@ ucd_category ucd_lookup_category(codepoint_t c) | |||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
| sys.stdout.write('\t{\n') | |||
| sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
| sys.stdout.write('\t\treturn (ucd::category)table[c % 256];\n') | |||
| sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||
| sys.stdout.write('\t}\n') | |||
| sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
| sys.stdout.write('}\n') | |||
| sys.stdout.write(""" | |||
| ucd_category_group ucd_get_category_group_for_category(ucd_category c) | |||
| { | |||
| return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||
| } | |||
| ucd::category_group ucd::lookup_category_group(category c) | |||
| { | |||
| switch (c) | |||
| { | |||
| case Cc: case Cf: case Cn: case Co: case Cs: | |||
| return C; | |||
| return UCD_CATEGORY_GROUP_C; | |||
| case Ll: case Lm: case Lo: case Lt: case Lu: | |||
| return L; | |||
| return UCD_CATEGORY_GROUP_L; | |||
| case Mc: case Me: case Mn: | |||
| return M; | |||
| return UCD_CATEGORY_GROUP_M; | |||
| case Nd: case Nl: case No: | |||
| return N; | |||
| return UCD_CATEGORY_GROUP_N; | |||
| case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | |||
| return P; | |||
| return UCD_CATEGORY_GROUP_P; | |||
| case Sc: case Sk: case Sm: case So: | |||
| return S; | |||
| return UCD_CATEGORY_GROUP_S; | |||
| case Zl: case Zp: case Zs: | |||
| return Z; | |||
| return UCD_CATEGORY_GROUP_Z; | |||
| case Ii: | |||
| return I; | |||
| return UCD_CATEGORY_GROUP_I; | |||
| } | |||
| } | |||
| ucd_category_group ucd_lookup_category_group(codepoint_t c) | |||
| { | |||
| return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||
| } | |||
| ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
| { | |||
| return lookup_category_group(lookup_category(c)); | |||
| return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||
| } | |||
| """) | |||