@@ -3278,14 +3278,14 @@ ucd::category ucd::lookup_category(codepoint_t c) | |||
if (c <= 0x0FFFFF) return Cn; // 0FFFFE..0FFFFF : Plane 15 Private Use | |||
if (c <= 0x10FFFD) return Co; // 100000..10FFFD : Plane 16 Private Use | |||
if (c <= 0x10FFFF) return Cn; // 10FFFE..10FFFF : Plane 16 Private Use | |||
return Ci; | |||
return Ii; // Invalid Unicode Codepoint | |||
} | |||
ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
{ | |||
switch (lookup_category(c)) | |||
{ | |||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
return C; | |||
case Ll: case Lm: case Lo: case Lt: case Lu: | |||
return L; | |||
@@ -3299,5 +3299,7 @@ ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
return S; | |||
case Zl: case Zp: case Zs: | |||
return Z; | |||
case Ii: | |||
return I; | |||
} | |||
} |
@@ -63,8 +63,9 @@ int ucd::isgraph(codepoint_t c) | |||
{ | |||
switch (lookup_category(c)) | |||
{ | |||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
case Zl: case Zp: case Zs: | |||
case Ii: | |||
return 0; | |||
} | |||
return 1; | |||
@@ -79,7 +80,8 @@ int ucd::isprint(codepoint_t c) | |||
{ | |||
switch (lookup_category(c)) | |||
{ | |||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
case Ii: | |||
return 0; | |||
} | |||
return 1; |
@@ -37,6 +37,7 @@ namespace ucd | |||
enum category_group | |||
{ | |||
C, /**< @brief Other */ | |||
I, /**< @brief Invalid */ | |||
L, /**< @brief Letter */ | |||
M, /**< @brief Mark */ | |||
N, /**< @brief Number */ | |||
@@ -52,11 +53,12 @@ namespace ucd | |||
{ | |||
Cc, /**< @brief Control Character */ | |||
Cf, /**< @brief Format Control Character */ | |||
Ci, /**< @brief Invalid Unicode Character */ | |||
Cn, /**< @brief Unassigned */ | |||
Co, /**< @brief Private Use */ | |||
Cs, /**< @brief Surrogate Code Point */ | |||
Ii, /**< @brief Invalid Unicode Codepoint */ | |||
Ll, /**< @brief Lower Case Letter */ | |||
Lm, /**< @brief Letter Modifier */ | |||
Lo, /**< @brief Other Letter */ |
@@ -151,7 +151,7 @@ using namespace ucd; | |||
sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Ci;\n') | |||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('}\n') | |||
sys.stdout.write(""" | |||
@@ -159,7 +159,7 @@ ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
{ | |||
switch (lookup_category(c)) | |||
{ | |||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||
case Cc: case Cf: case Cn: case Co: case Cs: | |||
return C; | |||
case Ll: case Lm: case Lo: case Lt: case Lu: | |||
return L; | |||
@@ -173,6 +173,8 @@ ucd::category_group ucd::lookup_category_group(codepoint_t c) | |||
return S; | |||
case Zl: case Zp: case Zs: | |||
return Z; | |||
case Ii: | |||
return I; | |||
} | |||
} | |||
""") |