@@ -94,7 +94,7 @@ tools/scripts.py: tools/ucd.py \ | |||
ucd-update: tools/case.py tools/categories.py tools/scripts.py | |||
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | |||
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | |||
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.c | |||
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | |||
libucd_includedir = $(includedir)/ucd | |||
@@ -105,7 +105,7 @@ lib_LTLIBRARIES += src/libucd.la | |||
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||
src_libucd_la_SOURCES = \ | |||
src/case.c \ | |||
src/categories.cpp \ | |||
src/categories.c \ | |||
src/ctype.c \ | |||
src/scripts.cpp \ | |||
src/tostring.c |
@@ -25,7 +25,37 @@ | |||
#include <stddef.h> | |||
using namespace ucd; | |||
#define Cc UCD_CATEGORY_Cc | |||
#define Cf UCD_CATEGORY_Cf | |||
#define Cn UCD_CATEGORY_Cn | |||
#define Co UCD_CATEGORY_Co | |||
#define Cs UCD_CATEGORY_Cs | |||
#define Ll UCD_CATEGORY_Ll | |||
#define Lm UCD_CATEGORY_Lm | |||
#define Lo UCD_CATEGORY_Lo | |||
#define Lt UCD_CATEGORY_Lt | |||
#define Lu UCD_CATEGORY_Lu | |||
#define Mc UCD_CATEGORY_Mc | |||
#define Me UCD_CATEGORY_Me | |||
#define Mn UCD_CATEGORY_Mn | |||
#define Nd UCD_CATEGORY_Nd | |||
#define Nl UCD_CATEGORY_Nl | |||
#define No UCD_CATEGORY_No | |||
#define Pc UCD_CATEGORY_Pc | |||
#define Pd UCD_CATEGORY_Pd | |||
#define Pe UCD_CATEGORY_Pe | |||
#define Pf UCD_CATEGORY_Pf | |||
#define Pi UCD_CATEGORY_Pi | |||
#define Po UCD_CATEGORY_Po | |||
#define Ps UCD_CATEGORY_Ps | |||
#define Sc UCD_CATEGORY_Sc | |||
#define Sk UCD_CATEGORY_Sk | |||
#define Sm UCD_CATEGORY_Sm | |||
#define So UCD_CATEGORY_So | |||
#define Zl UCD_CATEGORY_Zl | |||
#define Zp UCD_CATEGORY_Zp | |||
#define Zs UCD_CATEGORY_Zs | |||
#define Ii UCD_CATEGORY_Ii | |||
// Unicode Character Data 8.0.0 | |||
@@ -3315,29 +3345,24 @@ static const uint8_t *categories_0E0000_0E01FF[] = | |||
}; | |||
ucd_category ucd_lookup_category(codepoint_t c) | |||
{ | |||
return (ucd_category)ucd::lookup_category((ucd::category)c); | |||
} | |||
ucd::category ucd::lookup_category(codepoint_t c) | |||
{ | |||
if (c <= 0x00D7FF) // 000000..00D7FF | |||
{ | |||
const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256]; | |||
return (ucd::category)table[c % 256]; | |||
return (ucd_category)table[c % 256]; | |||
} | |||
if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates | |||
if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area | |||
if (c <= 0x02FAFF) // 00F800..02FAFF | |||
{ | |||
const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256]; | |||
return (ucd::category)table[c % 256]; | |||
return (ucd_category)table[c % 256]; | |||
} | |||
if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned | |||
if (c <= 0x0E01FF) // 0E0000..0E01FF | |||
{ | |||
const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256]; | |||
return (ucd::category)table[c % 256]; | |||
return (ucd_category)table[c % 256]; | |||
} | |||
if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned | |||
if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use | |||
@@ -3348,39 +3373,29 @@ ucd::category ucd::lookup_category(codepoint_t c) | |||
} | |||
ucd_category_group ucd_get_category_group_for_category(ucd_category c) | |||
{ | |||
return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||
} | |||
ucd::category_group ucd::lookup_category_group(category c) | |||
{ | |||
switch (c) | |||
{ | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
return C; | |||
return UCD_CATEGORY_GROUP_C; | |||
case Ll: case Lm: case Lo: case Lt: case Lu: | |||
return L; | |||
return UCD_CATEGORY_GROUP_L; | |||
case Mc: case Me: case Mn: | |||
return M; | |||
return UCD_CATEGORY_GROUP_M; | |||
case Nd: case Nl: case No: | |||
return N; | |||
return UCD_CATEGORY_GROUP_N; | |||
case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | |||
return P; | |||
return UCD_CATEGORY_GROUP_P; | |||
case Sc: case Sk: case Sm: case So: | |||
return S; | |||
return UCD_CATEGORY_GROUP_S; | |||
case Zl: case Zp: case Zs: | |||
return Z; | |||
return UCD_CATEGORY_GROUP_Z; | |||
case Ii: | |||
return I; | |||
return UCD_CATEGORY_GROUP_I; | |||
} | |||
} | |||
ucd_category_group ucd_lookup_category_group(codepoint_t c) | |||
{ | |||
return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||
} | |||
ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
{ | |||
return lookup_category_group(lookup_category(c)); | |||
return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||
} |
@@ -524,21 +524,30 @@ namespace ucd | |||
* @param c The General Category to lookup. | |||
* @return The General Category Group of the General Category. | |||
*/ | |||
category_group lookup_category_group(category c); | |||
inline category_group lookup_category_group(category c) | |||
{ | |||
return (category_group)ucd_get_category_group_for_category((ucd_category)c); | |||
} | |||
/** @brief Lookup the General Category Group for a Unicode codepoint. | |||
* | |||
* @param c The Unicode codepoint to lookup. | |||
* @return The General Category Group of the Unicode codepoint. | |||
*/ | |||
category_group lookup_category_group(codepoint_t c); | |||
inline category_group lookup_category_group(codepoint_t c) | |||
{ | |||
return (category_group)ucd_lookup_category_group(c); | |||
} | |||
/** @brief Lookup the General Category for a Unicode codepoint. | |||
* | |||
* @param c The Unicode codepoint to lookup. | |||
* @return The General Category of the Unicode codepoint. | |||
*/ | |||
category lookup_category(codepoint_t c); | |||
inline category lookup_category(codepoint_t c) | |||
{ | |||
return (category)ucd_lookup_category(c); | |||
} | |||
/** @brief Unicode Script | |||
* @see http://www.iana.org/assignments/language-subtag-registry |
@@ -117,7 +117,37 @@ if __name__ == '__main__': | |||
#include <stddef.h> | |||
using namespace ucd; | |||
#define Cc UCD_CATEGORY_Cc | |||
#define Cf UCD_CATEGORY_Cf | |||
#define Cn UCD_CATEGORY_Cn | |||
#define Co UCD_CATEGORY_Co | |||
#define Cs UCD_CATEGORY_Cs | |||
#define Ll UCD_CATEGORY_Ll | |||
#define Lm UCD_CATEGORY_Lm | |||
#define Lo UCD_CATEGORY_Lo | |||
#define Lt UCD_CATEGORY_Lt | |||
#define Lu UCD_CATEGORY_Lu | |||
#define Mc UCD_CATEGORY_Mc | |||
#define Me UCD_CATEGORY_Me | |||
#define Mn UCD_CATEGORY_Mn | |||
#define Nd UCD_CATEGORY_Nd | |||
#define Nl UCD_CATEGORY_Nl | |||
#define No UCD_CATEGORY_No | |||
#define Pc UCD_CATEGORY_Pc | |||
#define Pd UCD_CATEGORY_Pd | |||
#define Pe UCD_CATEGORY_Pe | |||
#define Pf UCD_CATEGORY_Pf | |||
#define Pi UCD_CATEGORY_Pi | |||
#define Po UCD_CATEGORY_Po | |||
#define Ps UCD_CATEGORY_Ps | |||
#define Sc UCD_CATEGORY_Sc | |||
#define Sk UCD_CATEGORY_Sk | |||
#define Sm UCD_CATEGORY_Sm | |||
#define So UCD_CATEGORY_So | |||
#define Zl UCD_CATEGORY_Zl | |||
#define Zp UCD_CATEGORY_Zp | |||
#define Zs UCD_CATEGORY_Zs | |||
#define Ii UCD_CATEGORY_Ii | |||
// Unicode Character Data %s | |||
""" % ucd_version) | |||
@@ -162,14 +192,8 @@ using namespace ucd; | |||
sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||
sys.stdout.write('};\n') | |||
sys.stdout.write(""" | |||
ucd_category ucd_lookup_category(codepoint_t c) | |||
{ | |||
return (ucd_category)ucd::lookup_category((ucd::category)c); | |||
} | |||
""") | |||
sys.stdout.write('\n') | |||
sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||
sys.stdout.write('ucd_category ucd_lookup_category(codepoint_t c)\n') | |||
sys.stdout.write('{\n') | |||
for codepoints, category, comment in category_sets: | |||
if category: | |||
@@ -178,47 +202,37 @@ ucd_category ucd_lookup_category(codepoint_t c) | |||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\t{\n') | |||
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn (ucd::category)table[c % 256];\n') | |||
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('}\n') | |||
sys.stdout.write(""" | |||
ucd_category_group ucd_get_category_group_for_category(ucd_category c) | |||
{ | |||
return (ucd_category_group)ucd::lookup_category_group((ucd::category)c); | |||
} | |||
ucd::category_group ucd::lookup_category_group(category c) | |||
{ | |||
switch (c) | |||
{ | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
return C; | |||
return UCD_CATEGORY_GROUP_C; | |||
case Ll: case Lm: case Lo: case Lt: case Lu: | |||
return L; | |||
return UCD_CATEGORY_GROUP_L; | |||
case Mc: case Me: case Mn: | |||
return M; | |||
return UCD_CATEGORY_GROUP_M; | |||
case Nd: case Nl: case No: | |||
return N; | |||
return UCD_CATEGORY_GROUP_N; | |||
case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: | |||
return P; | |||
return UCD_CATEGORY_GROUP_P; | |||
case Sc: case Sk: case Sm: case So: | |||
return S; | |||
return UCD_CATEGORY_GROUP_S; | |||
case Zl: case Zp: case Zs: | |||
return Z; | |||
return UCD_CATEGORY_GROUP_Z; | |||
case Ii: | |||
return I; | |||
return UCD_CATEGORY_GROUP_I; | |||
} | |||
} | |||
ucd_category_group ucd_lookup_category_group(codepoint_t c) | |||
{ | |||
return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c)); | |||
} | |||
ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
{ | |||
return lookup_category_group(lookup_category(c)); | |||
return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c)); | |||
} | |||
""") |