| data/ucd/Scripts.txt | data/ucd/Scripts.txt | ||||
| ucd-update: tools/case.py tools/categories.py tools/scripts.py | ucd-update: tools/case.py tools/categories.py tools/scripts.py | ||||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.cpp | |||||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c | |||||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | ||||
| tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | ||||
| lib_LTLIBRARIES += src/libucd.la | lib_LTLIBRARIES += src/libucd.la | ||||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | ||||
| src_libucd_la_SOURCES = \ | src_libucd_la_SOURCES = \ | ||||
| src/case.cpp \ | |||||
| src/case.c \ | |||||
| src/categories.cpp \ | src/categories.cpp \ | ||||
| src/ctype.c \ | src/ctype.c \ | ||||
| src/scripts.cpp \ | src/scripts.cpp \ |
| #include <stddef.h> | #include <stddef.h> | ||||
| using namespace ucd; | |||||
| // Unicode Character Data 8.0.0 | // Unicode Character Data 8.0.0 | ||||
| struct case_conversion_entry | struct case_conversion_entry | ||||
| codepoint_t titlecase; | codepoint_t titlecase; | ||||
| }; | }; | ||||
| static const case_conversion_entry case_conversion_data[] = | |||||
| static const struct case_conversion_entry case_conversion_data[] = | |||||
| { | { | ||||
| { 0x000041, 0x000000, 0x000061, 0x000000 }, | { 0x000041, 0x000000, 0x000061, 0x000000 }, | ||||
| { 0x000042, 0x000000, 0x000062, 0x000000 }, | { 0x000042, 0x000000, 0x000062, 0x000000 }, | ||||
| }; | }; | ||||
| codepoint_t ucd_toupper(codepoint_t c) | codepoint_t ucd_toupper(codepoint_t c) | ||||
| { | |||||
| return ucd::toupper(c); | |||||
| } | |||||
| ucd::codepoint_t ucd::toupper(codepoint_t c) | |||||
| { | { | ||||
| int begin = 0; | int begin = 0; | ||||
| int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
| while (begin <= end) | while (begin <= end) | ||||
| { | { | ||||
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | |||||
| const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->uppercase == 0 ? c : item->uppercase; | return item->uppercase == 0 ? c : item->uppercase; | ||||
| else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
| } | } | ||||
| codepoint_t ucd_tolower(codepoint_t c) | codepoint_t ucd_tolower(codepoint_t c) | ||||
| { | |||||
| return ucd::tolower(c); | |||||
| } | |||||
| ucd::codepoint_t ucd::tolower(codepoint_t c) | |||||
| { | { | ||||
| int begin = 0; | int begin = 0; | ||||
| int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
| while (begin <= end) | while (begin <= end) | ||||
| { | { | ||||
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | |||||
| const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->lowercase == 0 ? c : item->lowercase; | return item->lowercase == 0 ? c : item->lowercase; | ||||
| else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
| } | } | ||||
| codepoint_t ucd_totitle(codepoint_t c) | codepoint_t ucd_totitle(codepoint_t c) | ||||
| { | |||||
| return ucd::totitle(c); | |||||
| } | |||||
| ucd::codepoint_t ucd::totitle(codepoint_t c) | |||||
| { | { | ||||
| int begin = 0; | int begin = 0; | ||||
| int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | int end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]); | ||||
| while (begin <= end) | while (begin <= end) | ||||
| { | { | ||||
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | |||||
| const struct case_conversion_entry *item = (case_conversion_data + pos); | |||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->titlecase == 0 ? c : item->titlecase; | return item->titlecase == 0 ? c : item->titlecase; | ||||
| else if (c > item->codepoint) | else if (c > item->codepoint) |
| * @return The upper-case Unicode codepoint for this codepoint, or | * @return The upper-case Unicode codepoint for this codepoint, or | ||||
| * this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
| */ | */ | ||||
| codepoint_t toupper(codepoint_t c); | |||||
| inline codepoint_t toupper(codepoint_t c) | |||||
| { | |||||
| return ucd_toupper(c); | |||||
| } | |||||
| /** @brief Convert the Unicode codepoint to lower-case. | /** @brief Convert the Unicode codepoint to lower-case. | ||||
| * | * | ||||
| * @return The lower-case Unicode codepoint for this codepoint, or | * @return The lower-case Unicode codepoint for this codepoint, or | ||||
| * this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
| */ | */ | ||||
| codepoint_t tolower(codepoint_t c); | |||||
| inline codepoint_t tolower(codepoint_t c) | |||||
| { | |||||
| return ucd_tolower(c); | |||||
| } | |||||
| /** @brief Convert the Unicode codepoint to title-case. | /** @brief Convert the Unicode codepoint to title-case. | ||||
| * | * | ||||
| * @return The title-case Unicode codepoint for this codepoint, or | * @return The title-case Unicode codepoint for this codepoint, or | ||||
| * this codepoint if there is no upper-case codepoint. | * this codepoint if there is no upper-case codepoint. | ||||
| */ | */ | ||||
| codepoint_t totitle(codepoint_t c); | |||||
| inline codepoint_t totitle(codepoint_t c) | |||||
| { | |||||
| return ucd_totitle(c); | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| #include <stddef.h> | #include <stddef.h> | ||||
| using namespace ucd; | |||||
| // Unicode Character Data %s | // Unicode Character Data %s | ||||
| struct case_conversion_entry | struct case_conversion_entry | ||||
| """ % ucd_version) | """ % ucd_version) | ||||
| sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
| sys.stdout.write('static const case_conversion_entry case_conversion_data[] =\n') | |||||
| sys.stdout.write('static const struct case_conversion_entry case_conversion_data[] =\n') | |||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoint in sorted(unicode_chars.keys()): | for codepoint in sorted(unicode_chars.keys()): | ||||
| lower, upper, title = unicode_chars[codepoint] | lower, upper, title = unicode_chars[codepoint] | ||||
| sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
| sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case) | sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case) | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| sys.stdout.write('\treturn ucd::to%s(c);\n' % case) | |||||
| sys.stdout.write('}\n') | |||||
| sys.stdout.write('\n') | |||||
| sys.stdout.write('ucd::codepoint_t ucd::to%s(codepoint_t c)\n' % case) | |||||
| sys.stdout.write('{\n') | |||||
| sys.stdout.write('\tint begin = 0;\n') | sys.stdout.write('\tint begin = 0;\n') | ||||
| sys.stdout.write('\tint end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]);\n') | sys.stdout.write('\tint end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]);\n') | ||||
| sys.stdout.write('\twhile (begin <= end)\n') | sys.stdout.write('\twhile (begin <= end)\n') | ||||
| sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
| sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | ||||
| sys.stdout.write('\t\tconst case_conversion_entry *item = (case_conversion_data + pos);\n') | |||||
| sys.stdout.write('\t\tconst struct case_conversion_entry *item = (case_conversion_data + pos);\n') | |||||
| sys.stdout.write('\t\tif (c == item->codepoint)\n') | sys.stdout.write('\t\tif (c == item->codepoint)\n') | ||||
| sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case)) | sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case)) | ||||
| sys.stdout.write('\t\telse if (c > item->codepoint)\n') | sys.stdout.write('\t\telse if (c > item->codepoint)\n') |