case 'U': // uppercase | case 'U': // uppercase | ||||
uprintf_codepoint(out, towupper(c), *++format); | uprintf_codepoint(out, towupper(c), *++format); | ||||
break; | break; | ||||
case 'W': // whitespace | |||||
if (iswspace(c)) | |||||
fputs("White_Space", out); | |||||
break; | |||||
} | } | ||||
++format; | ++format; | ||||
break; | break; | ||||
{ | { | ||||
codepoint_t c = 0; | codepoint_t c = 0; | ||||
while (fget_utf8c(in, &c)) | while (fget_utf8c(in, &c)) | ||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
} | } | ||||
int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
else | else | ||||
{ | { | ||||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
} | } | ||||
return 0; | return 0; | ||||
} | } |
case 'U': // uppercase | case 'U': // uppercase | ||||
uprintf_codepoint(out, ucd_toupper(c), *++format); | uprintf_codepoint(out, ucd_toupper(c), *++format); | ||||
break; | break; | ||||
case 'W': // whitespace | |||||
if (ucd_isspace(c)) | |||||
fputs("White_Space", out); | |||||
break; | |||||
} | } | ||||
++format; | ++format; | ||||
break; | break; | ||||
{ | { | ||||
codepoint_t c = 0; | codepoint_t c = 0; | ||||
while (fget_utf8c(in, &c)) | while (fget_utf8c(in, &c)) | ||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
} | } | ||||
int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
else | else | ||||
{ | { | ||||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
} | } | ||||
return 0; | return 0; | ||||
} | } |
case 'U': // uppercase | case 'U': // uppercase | ||||
uprintf_codepoint(out, ucd::toupper(c), *++format); | uprintf_codepoint(out, ucd::toupper(c), *++format); | ||||
break; | break; | ||||
case 'W': // whitespace | |||||
if (ucd::isspace(c)) | |||||
fputs("White_Space", out); | |||||
break; | |||||
} | } | ||||
++format; | ++format; | ||||
break; | break; | ||||
{ | { | ||||
ucd::codepoint_t c = 0; | ucd::codepoint_t c = 0; | ||||
while (fget_utf8c(in, c)) | while (fget_utf8c(in, c)) | ||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%W\n"); | |||||
uprintf(stdout, c, format ? format : "%pc\t%pH\t%s\t%c\t%Uc\t%Lc\t%Tc\t%is\n"); | |||||
} | } | ||||
int main(int argc, char **argv) | int main(int argc, char **argv) | ||||
else | else | ||||
{ | { | ||||
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | ||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %W\n"); | |||||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %is\n"); | |||||
} | } | ||||
return 0; | return 0; | ||||
} | } |
#!/usr/bin/python | #!/usr/bin/python | ||||
# Copyright (C) 2012 Reece H. Dunn | |||||
# Copyright (C) 2012-2017 Reece H. Dunn | |||||
# | # | ||||
# This file is part of ucd-tools. | # This file is part of ucd-tools. | ||||
# | # | ||||
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | ||||
for codepoint in data['CodePoint']: | for codepoint in data['CodePoint']: | ||||
unicode_chars[codepoint] = data | unicode_chars[codepoint] = data | ||||
unicode_chars[codepoint]['Properties'] = [] | |||||
for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | ||||
if data['Property'] in ['White_Space']: | if data['Property'] in ['White_Space']: | ||||
for codepoint in data['Range']: | for codepoint in data['Range']: | ||||
unicode_chars[codepoint]['Properties'].append(data['Property']) | |||||
unicode_chars[codepoint][data['Property']] = 1 | |||||
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | ||||
for codepoint in data['Range']: | for codepoint in data['Range']: | ||||
unicode_chars[codepoint]['Script'] = data['Script'] | unicode_chars[codepoint]['Script'] = data['Script'] | ||||
codepoint, script, | codepoint, script, | ||||
data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'), | data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'), | ||||
upper, lower, title, | upper, lower, title, | ||||
' '.join(data.get('Properties', [])))) | |||||
data.get('White_Space', 0))) |