| # build output: | # build output: | ||||
| src/libucd.la | src/libucd.la | ||||
| tests/printucddata | |||||
| # test output: | |||||
| tests/*.expected | |||||
| tests/*.actual | |||||
| tests/*.diff | |||||
| # autotools output: | # autotools output: | ||||
| libucd_include_HEADERS = \ | libucd_include_HEADERS = \ | ||||
| src/include/ucd/ucd.h | src/include/ucd/ucd.h | ||||
| lib_LTLIBRARIES += src/libucd.la | |||||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||||
| lib_LTLIBRARIES += src/libucd.la | |||||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||||
| src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | ||||
| src_libucd_la_SOURCES = \ | |||||
| src_libucd_la_SOURCES = \ | |||||
| src/case.cpp \ | src/case.cpp \ | ||||
| src/categories.cpp \ | src/categories.cpp \ | ||||
| src/ctype.cpp | src/ctype.cpp | ||||
| ############################# tests ########################################### | |||||
| noinst_bin_PROGRAMS += tests/printucddata | |||||
| tests_printucddata_SOURCES = tests/printucddata.cpp | |||||
| tests_printucddata_LDADD = src/libucd.la | |||||
| tests/unicode-data.expected: tools/printdata.py tools/ucd.py | |||||
| tools/printdata.py ${UCD_ROOTDIR} > $@ | |||||
| tests/unicode-data.actual: tests/printucddata | |||||
| tests/printucddata > $@ | |||||
| tests/unicode-data.diff: tests/unicode-data.expected tests/unicode-data.actual | |||||
| diff -U0 tests/unicode-data.expected tests/unicode-data.actual > tests/unicode-data.diff | |||||
| check: tests/unicode-data.diff |
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | const case_conversion_entry *item = (case_conversion_data + pos); | ||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->uppercase; | |||||
| return item->uppercase == 0 ? c : item->uppercase; | |||||
| else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
| begin = pos + 1; | begin = pos + 1; | ||||
| else | else | ||||
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | const case_conversion_entry *item = (case_conversion_data + pos); | ||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->lowercase; | |||||
| return item->lowercase == 0 ? c : item->lowercase; | |||||
| else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
| begin = pos + 1; | begin = pos + 1; | ||||
| else | else | ||||
| int pos = (begin + end) / 2; | int pos = (begin + end) / 2; | ||||
| const case_conversion_entry *item = (case_conversion_data + pos); | const case_conversion_entry *item = (case_conversion_data + pos); | ||||
| if (c == item->codepoint) | if (c == item->codepoint) | ||||
| return item->titlecase; | |||||
| return item->titlecase == 0 ? c : item->titlecase; | |||||
| else if (c > item->codepoint) | else if (c > item->codepoint) | ||||
| begin = pos + 1; | begin = pos + 1; | ||||
| else | else |
| /* | |||||
| * Copyright (C) 2012 Reece H. Dunn | |||||
| * | |||||
| * This file is part of ucd-tools. | |||||
| * | |||||
| * ucd-tools is free software: you can redistribute it and/or modify | |||||
| * it under the terms of the GNU General Public License as published by | |||||
| * the Free Software Foundation, either version 3 of the License, or | |||||
| * (at your option) any later version. | |||||
| * | |||||
| * ucd-tools is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| * GNU General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU General Public License | |||||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
| */ | |||||
| #include "ucd/ucd.h" | |||||
| #include <stdio.h> | |||||
| const char *get_category_string(ucd::category c) | |||||
| { | |||||
| using namespace ucd; | |||||
| switch (c) | |||||
| { | |||||
| case Cc: return "Cc"; | |||||
| case Cf: return "Cf"; | |||||
| case Cn: return "Cn"; | |||||
| case Co: return "Co"; | |||||
| case Cs: return "Cs"; | |||||
| case Ii: return "Ii"; | |||||
| case Ll: return "Ll"; | |||||
| case Lm: return "Lm"; | |||||
| case Lo: return "Lo"; | |||||
| case Lt: return "Lt"; | |||||
| case Lu: return "Lu"; | |||||
| case Mc: return "Mc"; | |||||
| case Me: return "Me"; | |||||
| case Mn: return "Mn"; | |||||
| case Nd: return "Nd"; | |||||
| case Nl: return "Nl"; | |||||
| case No: return "No"; | |||||
| case Pc: return "Pc"; | |||||
| case Pd: return "Pd"; | |||||
| case Pe: return "Pe"; | |||||
| case Pf: return "Pf"; | |||||
| case Pi: return "Pi"; | |||||
| case Po: return "Po"; | |||||
| case Ps: return "Ps"; | |||||
| case Sc: return "Sc"; | |||||
| case Sk: return "Sk"; | |||||
| case Sm: return "Sm"; | |||||
| case So: return "So"; | |||||
| case Zl: return "Zl"; | |||||
| case Zp: return "Zp"; | |||||
| case Zs: return "Zs"; | |||||
| default: return "--"; | |||||
| } | |||||
| } | |||||
| int main() | |||||
| { | |||||
| for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||||
| { | |||||
| const char *category = get_category_string(ucd::lookup_category(c)); | |||||
| ucd::codepoint_t upper = ucd::toupper(c); | |||||
| ucd::codepoint_t lower = ucd::tolower(c); | |||||
| ucd::codepoint_t title = ucd::totitle(c); | |||||
| printf("%06X %s %06X %06X %06X\n", c, category, upper, lower, title); | |||||
| } | |||||
| return 0; | |||||
| } |
| sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | sys.stdout.write('\t\tint pos = (begin + end) / 2;\n') | ||||
| sys.stdout.write('\t\tconst case_conversion_entry *item = (case_conversion_data + pos);\n') | sys.stdout.write('\t\tconst case_conversion_entry *item = (case_conversion_data + pos);\n') | ||||
| sys.stdout.write('\t\tif (c == item->codepoint)\n') | sys.stdout.write('\t\tif (c == item->codepoint)\n') | ||||
| sys.stdout.write('\t\t\treturn item->%scase;\n' % case) | |||||
| sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case)) | |||||
| sys.stdout.write('\t\telse if (c > item->codepoint)\n') | sys.stdout.write('\t\telse if (c > item->codepoint)\n') | ||||
| sys.stdout.write('\t\t\tbegin = pos + 1;\n') | sys.stdout.write('\t\t\tbegin = pos + 1;\n') | ||||
| sys.stdout.write('\t\telse\n') | sys.stdout.write('\t\telse\n') |
| unicode_chars = {} | unicode_chars = {} | ||||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | ||||
| if isinstance(data['CodePoint'], ucd.CodePoint): | |||||
| unicode_chars[data['CodePoint']] = data['GeneralCategory'] | |||||
| for codepoint in data['CodePoint']: | |||||
| unicode_chars[codepoint] = data['GeneralCategory'] | |||||
| # This map is a combination of the information in the UnicodeData and Blocks | # This map is a combination of the information in the UnicodeData and Blocks | ||||
| # data files. It is intended to reduce the number of character tables that | # data files. It is intended to reduce the number of character tables that |
| #!/usr/bin/python | |||||
| # Copyright (C) 2012 Reece H. Dunn | |||||
| # | |||||
| # This file is part of ucd-tools. | |||||
| # | |||||
| # ucd-tools is free software: you can redistribute it and/or modify | |||||
| # it under the terms of the GNU General Public License as published by | |||||
| # the Free Software Foundation, either version 3 of the License, or | |||||
| # (at your option) any later version. | |||||
| # | |||||
| # ucd-tools is distributed in the hope that it will be useful, | |||||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| # GNU General Public License for more details. | |||||
| # | |||||
| # You should have received a copy of the GNU General Public License | |||||
| # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
| import os | |||||
| import sys | |||||
| import ucd | |||||
| ucd_rootdir = sys.argv[1] | |||||
| unicode_chars = {} | |||||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | |||||
| for codepoint in data['CodePoint']: | |||||
| unicode_chars[codepoint] = data | |||||
| null = ucd.CodePoint('0000') | |||||
| if __name__ == '__main__': | |||||
| for codepoint in ucd.CodeRange('000000..10FFFF'): | |||||
| try: | |||||
| data = unicode_chars[codepoint] | |||||
| title = data['TitleCase'] | |||||
| upper = data['UpperCase'] | |||||
| lower = data['LowerCase'] | |||||
| if title == null: title = codepoint | |||||
| if upper == null: upper = codepoint | |||||
| if lower == null: lower = codepoint | |||||
| print '%s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title) | |||||
| except KeyError: | |||||
| print '%s Cn %s %s %s' % (codepoint, codepoint, codepoint, codepoint) |
| def __str__(self): | def __str__(self): | ||||
| return '%06X' % self.codepoint | return '%06X' % self.codepoint | ||||
| def __iter__(self): | |||||
| yield self | |||||
| def __hash__(self): | def __hash__(self): | ||||
| return self.codepoint | return self.codepoint | ||||