| case 'U': // uppercase | case 'U': // uppercase | ||||
| uprintf_codepoint(out, towupper(c), *++format); | uprintf_codepoint(out, towupper(c), *++format); | ||||
| break; | break; | ||||
| case 'W': // whitespace | |||||
| if (iswspace(c)) | |||||
| fputs("White_Space", out); | |||||
| break; | |||||
| } | } | ||||
| ++format; | ++format; | ||||
| break; | break; | ||||
| { | { | ||||
| codepoint_t c = 0; | codepoint_t c = 0; | ||||
| while (fget_utf8c(in, &c)) | while (fget_utf8c(in, &c)) | ||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
| } | } | ||||
| int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
| else | else | ||||
| { | { | ||||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } |
| case 'U': // uppercase | case 'U': // uppercase | ||||
| uprintf_codepoint(out, ucd_toupper(c), *++format); | uprintf_codepoint(out, ucd_toupper(c), *++format); | ||||
| break; | break; | ||||
| case 'W': // whitespace | |||||
| if (ucd_isspace(c)) | |||||
| fputs("White_Space", out); | |||||
| break; | |||||
| } | } | ||||
| ++format; | ++format; | ||||
| break; | break; | ||||
| { | { | ||||
| codepoint_t c = 0; | codepoint_t c = 0; | ||||
| while (fget_utf8c(in, &c)) | while (fget_utf8c(in, &c)) | ||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
| } | } | ||||
| int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
| else | else | ||||
| { | { | ||||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } |
| case 'U': // uppercase | case 'U': // uppercase | ||||
| uprintf_codepoint(out, ucd::toupper(c), *++format); | uprintf_codepoint(out, ucd::toupper(c), *++format); | ||||
| break; | break; | ||||
| case 'W': // whitespace | |||||
| if (ucd::isspace(c)) | |||||
| fputs("White_Space", out); | |||||
| break; | |||||
| } | } | ||||
| ++format; | ++format; | ||||
| break; | break; | ||||
| { | { | ||||
| ucd::codepoint_t c = 0; | ucd::codepoint_t c = 0; | ||||
| while (fget_utf8c(in, c)) | while (fget_utf8c(in, c)) | ||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
| } | } | ||||
| int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
| else | else | ||||
| { | { | ||||
| for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
| uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } |
| #!/usr/bin/python | #!/usr/bin/python | ||||
| # Copyright (C) 2012 Reece H. Dunn | |||||
| # Copyright (C) 2012-2017 Reece H. Dunn | |||||
| # | # | ||||
| # This file is part of ucd-tools. | # This file is part of ucd-tools. | ||||
| # | # | ||||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | ||||
| for codepoint in data['CodePoint']: | for codepoint in data['CodePoint']: | ||||
| unicode_chars[codepoint] = data | unicode_chars[codepoint] = data | ||||
| unicode_chars[codepoint]['Properties'] = [] | |||||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | ||||
| if data['Property'] in ['White_Space']: | if data['Property'] in ['White_Space']: | ||||
| for codepoint in data['Range']: | for codepoint in data['Range']: | ||||
| unicode_chars[codepoint]['Properties'].append(data['Property']) | |||||
| unicode_chars[codepoint][data['Property']] = 1 | |||||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | ||||
| for codepoint in data['Range']: | for codepoint in data['Range']: | ||||
| unicode_chars[codepoint]['Script'] = data['Script'] | unicode_chars[codepoint]['Script'] = data['Script'] | ||||
| codepoint, script, | codepoint, script, | ||||
| data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'), | data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'), | ||||
| upper, lower, title, | upper, lower, title, | ||||
| ' '.join(data.get('Properties', [])))) | |||||
| data.get('White_Space', 0))) |