| @@ -57,6 +57,10 @@ EXTRA_DIST += ChangeLog | |||
| UCD_VERSION=6.2.0 | |||
| UCD_ROOTDIR=data/ucd | |||
| data/ucd/PropList.txt: | |||
| mkdir -pv data/ucd | |||
| wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt | |||
| data/ucd/UnicodeData.txt: | |||
| mkdir -pv data/ucd | |||
| wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt | |||
| @@ -87,7 +91,9 @@ noinst_bin_PROGRAMS += tests/printucddata | |||
| tests_printucddata_SOURCES = tests/printucddata.cpp | |||
| tests_printucddata_LDADD = src/libucd.la | |||
| tests/unicode-data.expected: tools/printdata.py tools/ucd.py | |||
| tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ | |||
| data/ucd/UnicodeData.txt \ | |||
| data/ucd/PropList.txt | |||
| tools/printdata.py ${UCD_ROOTDIR} > $@ | |||
| tests/unicode-data.actual: tests/printucddata | |||
| @@ -69,7 +69,8 @@ int main() | |||
| ucd::codepoint_t upper = ucd::toupper(c); | |||
| ucd::codepoint_t lower = ucd::tolower(c); | |||
| ucd::codepoint_t title = ucd::totitle(c); | |||
| printf("%06X %s %06X %06X %06X\n", c, category, upper, lower, title); | |||
| const char *whitespace = ucd::isspace(c) ? "White_Space" : ""; | |||
| printf("%06X %s %06X %06X %06X %s\n", c, category, upper, lower, title, whitespace); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -27,6 +27,11 @@ unicode_chars = {} | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | |||
| for codepoint in data['CodePoint']: | |||
| unicode_chars[codepoint] = data | |||
| unicode_chars[codepoint]['Properties'] = [] | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | |||
| if data['Property'] in ['White_Space']: | |||
| for codepoint in data['Range']: | |||
| unicode_chars[codepoint]['Properties'].append(data['Property']) | |||
| null = ucd.CodePoint('0000') | |||
| if __name__ == '__main__': | |||
| @@ -39,6 +44,6 @@ if __name__ == '__main__': | |||
| if title == null: title = codepoint | |||
| if upper == null: upper = codepoint | |||
| if lower == null: lower = codepoint | |||
| print '%s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title) | |||
| print '%s %s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title, ' '.join(data['Properties'])) | |||
| except KeyError: | |||
| print '%s Cn %s %s %s' % (codepoint, codepoint, codepoint, codepoint) | |||
| print '%s Cn %s %s %s ' % (codepoint, codepoint, codepoint, codepoint) | |||