UCD_VERSION=6.2.0 | UCD_VERSION=6.2.0 | ||||
UCD_ROOTDIR=data/ucd | UCD_ROOTDIR=data/ucd | ||||
data/ucd/PropList.txt: | |||||
mkdir -pv data/ucd | |||||
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt | |||||
data/ucd/UnicodeData.txt: | data/ucd/UnicodeData.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt | wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt | ||||
tests_printucddata_SOURCES = tests/printucddata.cpp | tests_printucddata_SOURCES = tests/printucddata.cpp | ||||
tests_printucddata_LDADD = src/libucd.la | tests_printucddata_LDADD = src/libucd.la | ||||
tests/unicode-data.expected: tools/printdata.py tools/ucd.py | |||||
tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ | |||||
data/ucd/UnicodeData.txt \ | |||||
data/ucd/PropList.txt | |||||
tools/printdata.py ${UCD_ROOTDIR} > $@ | tools/printdata.py ${UCD_ROOTDIR} > $@ | ||||
tests/unicode-data.actual: tests/printucddata | tests/unicode-data.actual: tests/printucddata |
ucd::codepoint_t upper = ucd::toupper(c); | ucd::codepoint_t upper = ucd::toupper(c); | ||||
ucd::codepoint_t lower = ucd::tolower(c); | ucd::codepoint_t lower = ucd::tolower(c); | ||||
ucd::codepoint_t title = ucd::totitle(c); | ucd::codepoint_t title = ucd::totitle(c); | ||||
printf("%06X %s %06X %06X %06X\n", c, category, upper, lower, title); | |||||
const char *whitespace = ucd::isspace(c) ? "White_Space" : ""; | |||||
printf("%06X %s %06X %06X %06X %s\n", c, category, upper, lower, title, whitespace); | |||||
} | } | ||||
return 0; | return 0; | ||||
} | } |
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | ||||
for codepoint in data['CodePoint']: | for codepoint in data['CodePoint']: | ||||
unicode_chars[codepoint] = data | unicode_chars[codepoint] = data | ||||
unicode_chars[codepoint]['Properties'] = [] | |||||
for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | |||||
if data['Property'] in ['White_Space']: | |||||
for codepoint in data['Range']: | |||||
unicode_chars[codepoint]['Properties'].append(data['Property']) | |||||
null = ucd.CodePoint('0000') | null = ucd.CodePoint('0000') | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
if title == null: title = codepoint | if title == null: title = codepoint | ||||
if upper == null: upper = codepoint | if upper == null: upper = codepoint | ||||
if lower == null: lower = codepoint | if lower == null: lower = codepoint | ||||
print '%s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title) | |||||
print '%s %s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title, ' '.join(data['Properties'])) | |||||
except KeyError: | except KeyError: | ||||
print '%s Cn %s %s %s' % (codepoint, codepoint, codepoint, codepoint) | |||||
print '%s Cn %s %s %s ' % (codepoint, codepoint, codepoint, codepoint) |