|
|
@@ -19,8 +19,42 @@ |
|
|
|
|
|
|
|
#include "ucd/ucd.h" |
|
|
|
|
|
|
|
#include <string.h> |
|
|
|
#include <stdio.h> |
|
|
|
|
|
|
|
bool fget_utf8c(FILE *in, ucd::codepoint_t &c) |
|
|
|
{ |
|
|
|
int ch = EOF; |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
if (uint8_t(ch) < 0x80) |
|
|
|
c = uint8_t(ch); |
|
|
|
else switch (uint8_t(ch) & 0xF0) |
|
|
|
{ |
|
|
|
default: |
|
|
|
c = uint8_t(ch) & 0x1F; |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
break; |
|
|
|
case 0xE0: |
|
|
|
c = uint8_t(ch) & 0x0F; |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
break; |
|
|
|
case 0xF0: |
|
|
|
c = uint8_t(ch) & 0x07; |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
if ((ch = fgetc(in)) == EOF) return false; |
|
|
|
c = (c << 6) + (uint8_t(ch) & 0x3F); |
|
|
|
break; |
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
void uprintf_codepoint(FILE *out, ucd::codepoint_t c, char mode) |
|
|
|
{ |
|
|
|
switch (mode) |
|
|
@@ -76,9 +110,35 @@ void uprintf(FILE *out, ucd::codepoint_t c, const char *format) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
int main() |
|
|
|
void print_file(FILE *in) |
|
|
|
{ |
|
|
|
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) |
|
|
|
ucd::codepoint_t c = 0; |
|
|
|
while (fget_utf8c(in, c)) |
|
|
|
uprintf(stdout, c, "%pH %s %C %c %UH %LH %TH %W\n"); |
|
|
|
} |
|
|
|
|
|
|
|
int main(int argc, char **argv) |
|
|
|
{ |
|
|
|
if (argc == 2) |
|
|
|
{ |
|
|
|
if (!strcmp(argv[1], "--stdin") || !strcmp(argv[1], "-")) |
|
|
|
print_file(stdin); |
|
|
|
else |
|
|
|
{ |
|
|
|
FILE *in = fopen(argv[1], "r"); |
|
|
|
if (in) |
|
|
|
{ |
|
|
|
print_file(in); |
|
|
|
fclose(in); |
|
|
|
} |
|
|
|
else |
|
|
|
fprintf(stdout, "cannot open `%s`\n", argv[1]); |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) |
|
|
|
uprintf(stdout, c, "%pH %s %C %c %UH %LH %TH %W\n"); |
|
|
|
} |
|
|
|
return 0; |
|
|
|
} |