@@ -57,6 +57,10 @@ EXTRA_DIST += ChangeLog | |||
UCD_VERSION=6.2.0 | |||
UCD_ROOTDIR=data/ucd | |||
data/ucd/PropList.txt: | |||
mkdir -pv data/ucd | |||
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt | |||
data/ucd/UnicodeData.txt: | |||
mkdir -pv data/ucd | |||
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt | |||
@@ -87,7 +91,9 @@ noinst_bin_PROGRAMS += tests/printucddata | |||
tests_printucddata_SOURCES = tests/printucddata.cpp | |||
tests_printucddata_LDADD = src/libucd.la | |||
tests/unicode-data.expected: tools/printdata.py tools/ucd.py | |||
tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ | |||
data/ucd/UnicodeData.txt \ | |||
data/ucd/PropList.txt | |||
tools/printdata.py ${UCD_ROOTDIR} > $@ | |||
tests/unicode-data.actual: tests/printucddata |
@@ -69,7 +69,8 @@ int main() | |||
ucd::codepoint_t upper = ucd::toupper(c); | |||
ucd::codepoint_t lower = ucd::tolower(c); | |||
ucd::codepoint_t title = ucd::totitle(c); | |||
printf("%06X %s %06X %06X %06X\n", c, category, upper, lower, title); | |||
const char *whitespace = ucd::isspace(c) ? "White_Space" : ""; | |||
printf("%06X %s %06X %06X %06X %s\n", c, category, upper, lower, title, whitespace); | |||
} | |||
return 0; | |||
} |
@@ -27,6 +27,11 @@ unicode_chars = {} | |||
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | |||
for codepoint in data['CodePoint']: | |||
unicode_chars[codepoint] = data | |||
unicode_chars[codepoint]['Properties'] = [] | |||
for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | |||
if data['Property'] in ['White_Space']: | |||
for codepoint in data['Range']: | |||
unicode_chars[codepoint]['Properties'].append(data['Property']) | |||
null = ucd.CodePoint('0000') | |||
if __name__ == '__main__': | |||
@@ -39,6 +44,6 @@ if __name__ == '__main__': | |||
if title == null: title = codepoint | |||
if upper == null: upper = codepoint | |||
if lower == null: lower = codepoint | |||
print '%s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title) | |||
print '%s %s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title, ' '.join(data['Properties'])) | |||
except KeyError: | |||
print '%s Cn %s %s %s' % (codepoint, codepoint, codepoint, codepoint) | |||
print '%s Cn %s %s %s ' % (codepoint, codepoint, codepoint, codepoint) |