data/ucd/Scripts.txt | data/ucd/Scripts.txt | ||||
ucd-update: tools/case.py tools/categories.py tools/scripts.py | ucd-update: tools/case.py tools/categories.py tools/scripts.py | ||||
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.cpp | |||||
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | |||||
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | ||||
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | ||||
lib_LTLIBRARIES += src/libucd.la | lib_LTLIBRARIES += src/libucd.la | ||||
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | ||||
src_libucd_la_SOURCES = \ | src_libucd_la_SOURCES = \ | ||||
src/case.cpp \ | |||||
src/case.c \ | |||||
src/categories.cpp \ | src/categories.cpp \ | ||||
src/ctype.c \ | src/ctype.c \ | ||||
src/scripts.cpp \ | src/scripts.cpp \ |
#include <stddef.h> | #include <stddef.h> | ||||
using namespace ucd; | |||||
// Unicode Character Data 8.0.0 | // Unicode Character Data 8.0.0 | ||||
struct case_conversion_entry | struct case_conversion_entry | ||||
codepoint_t titlecase; | codepoint_t titlecase; | ||||
}; | }; | ||||
static const case_conversion_entry case_conversion_data[] = | |||||
static const struct case_conversion_entry case_conversion_data[] = | |||||
{ | { | ||||
{ 0x000041, 0x000000, 0x000061, 0x000000 }, | { 0x000041, 0x000000, 0x000061, 0x000000 }, | ||||
{ 0x000042, 0x000000, 0x000062, 0x000000 }, | { 0x000042, 0x000000, 0x000062, 0x000000 }, | ||||
}; | }; | ||||
codepoint_t ucd_toupper(codepoint_t c) | codepoint_t ucd_toupper(codepoint_t c) | ||||
{ | |||||
return ucd::toupper(c); | |||||
} | |||||
ucd::codepoint_t ucd::toupper(codepoint_t c) | |||||
{ | { | ||||
int begin = 0; | int begin = 0; | ||||
int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
while (begin <= end) | while (begin <= end) | ||||
{ | { | ||||
int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
const case_conversion_entry *item = (case_conversion_data + pos); | |||||
const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
if (c == item->codepoint) | if (c == item->codepoint) | ||||
return item->uppercase == 0 ? c : item->uppercase; | return item->uppercase == 0 ? c : item->uppercase; | ||||
else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
} | } | ||||
codepoint_t ucd_tolower(codepoint_t c) | codepoint_t ucd_tolower(codepoint_t c) | ||||
{ | |||||
return ucd::tolower(c); | |||||
} | |||||
ucd::codepoint_t ucd::tolower(codepoint_t c) | |||||
{ | { | ||||
int begin = 0; | int begin = 0; | ||||
int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
while (begin <= end) | while (begin <= end) | ||||
{ | { | ||||
int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
const case_conversion_entry *item = (case_conversion_data + pos); | |||||
const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
if (c == item->codepoint) | if (c == item->codepoint) | ||||
return item->lowercase == 0 ? c : item->lowercase; | return item->lowercase == 0 ? c : item->lowercase; | ||||
else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
} | } | ||||
codepoint_t ucd_totitle(codepoint_t c) | codepoint_t ucd_totitle(codepoint_t c) | ||||
{ | |||||
return ucd::totitle(c); | |||||
} | |||||
ucd::codepoint_t ucd::totitle(codepoint_t c) | |||||
{ | { | ||||
int begin = 0; | int begin = 0; | ||||
int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
while (begin <= end) | while (begin <= end) | ||||
{ | { | ||||
int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
const case_conversion_entry *item = (case_conversion_data + pos); | |||||
const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
if (c == item->codepoint) | if (c == item->codepoint) | ||||
return item->titlecase == 0 ? c : item->titlecase; | return item->titlecase == 0 ? c : item->titlecase; | ||||
else if (c > item->codepoint) | else if (c > item->codepoint) |
* @return The upper-case Unicode codepoint for this codepoint, or | * @return The upper-case Unicode codepoint for this codepoint, or | ||||
* this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
*/ | */ | ||||
codepoint_t toupper(codepoint_t c); | |||||
inline codepoint_t toupper(codepoint_t c) | |||||
{ | |||||
return ucd_toupper(c); | |||||
} | |||||
/** @brief Convert the Unicode codepoint to lower-case. | /** @brief Convert the Unicode codepoint to lower-case. | ||||
* | * | ||||
* @return The lower-case Unicode codepoint for this codepoint, or | * @return The lower-case Unicode codepoint for this codepoint, or | ||||
* this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
*/ | */ | ||||
codepoint_t tolower(codepoint_t c); | |||||
inline codepoint_t tolower(codepoint_t c) | |||||
{ | |||||
return ucd_tolower(c); | |||||
} | |||||
/** @brief Convert the Unicode codepoint to title-case. | /** @brief Convert the Unicode codepoint to title-case. | ||||
* | * | ||||
* @return The title-case Unicode codepoint for this codepoint, or | * @return The title-case Unicode codepoint for this codepoint, or | ||||
* this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
*/ | */ | ||||
codepoint_t totitle(codepoint_t c); | |||||
inline codepoint_t totitle(codepoint_t c) | |||||
{ | |||||
return ucd_totitle(c); | |||||
} | |||||
} | } | ||||
#endif | #endif | ||||
#include <stddef.h> | #include <stddef.h> | ||||
using namespace ucd; | |||||
// Unicode Character Data %s | // Unicode Character Data %s | ||||
struct case_conversion_entry | struct case_conversion_entry | ||||
""" % ucd_version) | """ % ucd_version) | ||||
sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
sys.stdout.write('static const case_conversion_entry case_conversion_data[] =\n') | |||||
sys.stdout.write('static const struct case_conversion_entry case_conversion_data[] =\n') | |||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
for codepoint in sorted(unicode_chars.keys()): | for codepoint in sorted(unicode_chars.keys()): | ||||
lower, upper, title = unicode_chars[codepoint] | lower, upper, title = unicode_chars[codepoint] | ||||
sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case) | sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case) | ||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
sys.stdout.write('\treturn ucd::to%s(c);\n' % case) | |||||
sys.stdout.write('}\n') | |||||
sys.stdout.write('\n') | |||||
sys.stdout.write('ucd::codepoint_t ucd::to%s(codepoint_t c)\n' % case) | |||||
sys.stdout.write('{\n') | |||||
sys.stdout.write('\tint begin = 0;\n') | sys.stdout.write('\tint begin = 0;\n') | ||||
sys.stdout.write('\tint end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]);\n') | sys.stdout.write('\tint end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]);\n') | ||||
sys.stdout.write('\twhile (begin <= end)\n') | sys.stdout.write('\twhile (begin <= end)\n') | ||||
sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | ||||
sys.stdout.write('\t\tconst case_conversion_entry *item = (case_conversion_data + pos);\n') | |||||
sys.stdout.write('\t\tconst struct case_conversion_entry *item = (case_conversion_data + pos);\n') | |||||
sys.stdout.write('\t\tif (c == item->codepoint)\n') | sys.stdout.write('\t\tif (c == item->codepoint)\n') | ||||
sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case)) | sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case)) | ||||
sys.stdout.write('\t\telse if (c > item->codepoint)\n') | sys.stdout.write('\t\telse if (c > item->codepoint)\n') |