if (c <= 0x0FFFFF) return Cn; // 0FFFFE..0FFFFF : Plane 15 Private Use | if (c <= 0x0FFFFF) return Cn; // 0FFFFE..0FFFFF : Plane 15 Private Use | ||||
if (c <= 0x10FFFD) return Co; // 100000..10FFFD : Plane 16 Private Use | if (c <= 0x10FFFD) return Co; // 100000..10FFFD : Plane 16 Private Use | ||||
if (c <= 0x10FFFF) return Cn; // 10FFFE..10FFFF : Plane 16 Private Use | if (c <= 0x10FFFF) return Cn; // 10FFFE..10FFFF : Plane 16 Private Use | ||||
return Ci; | |||||
return Ii; // Invalid Unicode Codepoint | |||||
} | } | ||||
ucd::category_group ucd::lookup_category_group(codepoint_t c) | ucd::category_group ucd::lookup_category_group(codepoint_t c) | ||||
{ | { | ||||
switch (lookup_category(c)) | switch (lookup_category(c)) | ||||
{ | { | ||||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||||
case Cc: case Cf: case Cn: case Co: case Cs: | |||||
return C; | return C; | ||||
case Ll: case Lm: case Lo: case Lt: case Lu: | case Ll: case Lm: case Lo: case Lt: case Lu: | ||||
return L; | return L; | ||||
return S; | return S; | ||||
case Zl: case Zp: case Zs: | case Zl: case Zp: case Zs: | ||||
return Z; | return Z; | ||||
case Ii: | |||||
return I; | |||||
} | } | ||||
} | } |
{ | { | ||||
switch (lookup_category(c)) | switch (lookup_category(c)) | ||||
{ | { | ||||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||||
case Cc: case Cf: case Cn: case Co: case Cs: | |||||
case Zl: case Zp: case Zs: | case Zl: case Zp: case Zs: | ||||
case Ii: | |||||
return 0; | return 0; | ||||
} | } | ||||
return 1; | return 1; | ||||
{ | { | ||||
switch (lookup_category(c)) | switch (lookup_category(c)) | ||||
{ | { | ||||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||||
case Cc: case Cf: case Cn: case Co: case Cs: | |||||
case Ii: | |||||
return 0; | return 0; | ||||
} | } | ||||
return 1; | return 1; |
enum category_group | enum category_group | ||||
{ | { | ||||
C, /**< @brief Other */ | C, /**< @brief Other */ | ||||
I, /**< @brief Invalid */ | |||||
L, /**< @brief Letter */ | L, /**< @brief Letter */ | ||||
M, /**< @brief Mark */ | M, /**< @brief Mark */ | ||||
N, /**< @brief Number */ | N, /**< @brief Number */ | ||||
{ | { | ||||
Cc, /**< @brief Control Character */ | Cc, /**< @brief Control Character */ | ||||
Cf, /**< @brief Format Control Character */ | Cf, /**< @brief Format Control Character */ | ||||
Ci, /**< @brief Invalid Unicode Character */ | |||||
Cn, /**< @brief Unassigned */ | Cn, /**< @brief Unassigned */ | ||||
Co, /**< @brief Private Use */ | Co, /**< @brief Private Use */ | ||||
Cs, /**< @brief Surrogate Code Point */ | Cs, /**< @brief Surrogate Code Point */ | ||||
Ii, /**< @brief Invalid Unicode Codepoint */ | |||||
Ll, /**< @brief Lower Case Letter */ | Ll, /**< @brief Lower Case Letter */ | ||||
Lm, /**< @brief Letter Modifier */ | Lm, /**< @brief Letter Modifier */ | ||||
Lo, /**< @brief Other Letter */ | Lo, /**< @brief Other Letter */ |
sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n') | sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n') | ||||
sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
sys.stdout.write('\treturn Ci;\n') | |||||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||||
sys.stdout.write('}\n') | sys.stdout.write('}\n') | ||||
sys.stdout.write(""" | sys.stdout.write(""" | ||||
{ | { | ||||
switch (lookup_category(c)) | switch (lookup_category(c)) | ||||
{ | { | ||||
case Cc: case Cf: case Ci: case Cn: case Co: case Cs: | |||||
case Cc: case Cf: case Cn: case Co: case Cs: | |||||
return C; | return C; | ||||
case Ll: case Lm: case Lo: case Lt: case Lu: | case Ll: case Lm: case Lo: case Lt: case Lu: | ||||
return L; | return L; | ||||
return S; | return S; | ||||
case Zl: case Zp: case Zs: | case Zl: case Zp: case Zs: | ||||
return Z; | return Z; | ||||
case Ii: | |||||
return I; | |||||
} | } | ||||
} | } | ||||
""") | """) |