|
|
|
|
|
|
|
|
(ucd.CodeRange('10FFFE..10FFFF'), 'Cn', 'Plane 16 Private Use'), |
|
|
(ucd.CodeRange('10FFFE..10FFFF'), 'Cn', 'Plane 16 Private Use'), |
|
|
] |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
# These categories have many pages consisting of just this category: |
|
|
|
|
|
# Cn -- Unassigned |
|
|
|
|
|
# Lo -- CJK Ideographs |
|
|
|
|
|
special_categories = ['Cn', 'Lo'] |
|
|
|
|
|
|
|
|
category_tables = {} |
|
|
category_tables = {} |
|
|
for codepoints, category, comment in category_sets: |
|
|
for codepoints, category, comment in category_sets: |
|
|
if not category: |
|
|
if not category: |
|
|
table = {} |
|
|
table = {} |
|
|
table_entry = None |
|
|
table_entry = None |
|
|
table_codepoint = None |
|
|
table_codepoint = None |
|
|
is_unassigned = True |
|
|
|
|
|
|
|
|
table_category = None |
|
|
for i, codepoint in enumerate(codepoints): |
|
|
for i, codepoint in enumerate(codepoints): |
|
|
|
|
|
try: |
|
|
|
|
|
category = unicode_chars[codepoint] |
|
|
|
|
|
except KeyError: |
|
|
|
|
|
category = 'Cn' # Unassigned |
|
|
if (i % 256) == 0: |
|
|
if (i % 256) == 0: |
|
|
if table_entry: |
|
|
if table_entry: |
|
|
if is_unassigned: |
|
|
|
|
|
table[table_codepoint] = None |
|
|
|
|
|
|
|
|
if table_category in special_categories: |
|
|
|
|
|
table[table_codepoint] = table_category |
|
|
else: |
|
|
else: |
|
|
table[table_codepoint] = table_entry |
|
|
table[table_codepoint] = table_entry |
|
|
table_entry = [] |
|
|
table_entry = [] |
|
|
table_codepoint = codepoint |
|
|
table_codepoint = codepoint |
|
|
is_unassigned = True |
|
|
|
|
|
try: |
|
|
|
|
|
category = unicode_chars[codepoint] |
|
|
|
|
|
is_unassigned = False |
|
|
|
|
|
except KeyError: |
|
|
|
|
|
category = 'Cn' # Unassigned |
|
|
|
|
|
|
|
|
table_category = category |
|
|
|
|
|
if category != table_category: |
|
|
|
|
|
table_category = None |
|
|
table_entry.append(category) |
|
|
table_entry.append(category) |
|
|
if table_entry: |
|
|
if table_entry: |
|
|
if is_unassigned: |
|
|
|
|
|
table[table_codepoint] = None |
|
|
|
|
|
|
|
|
if table_category in special_categories: |
|
|
|
|
|
table[table_codepoint] = table_category |
|
|
else: |
|
|
else: |
|
|
table[table_codepoint] = table_entry |
|
|
table[table_codepoint] = table_entry |
|
|
category_tables['%s_%s' % (codepoints.first, codepoints.last)] = table |
|
|
category_tables['%s_%s' % (codepoints.first, codepoints.last)] = table |
|
|
|
|
|
|
|
|
// Unicode Character Data %s |
|
|
// Unicode Character Data %s |
|
|
""" % ucd_version) |
|
|
""" % ucd_version) |
|
|
|
|
|
|
|
|
|
|
|
for category in special_categories: |
|
|
|
|
|
sys.stdout.write('\n') |
|
|
|
|
|
sys.stdout.write('static const ucd::category categories_%s[256] =\n' % category) |
|
|
|
|
|
sys.stdout.write('{') |
|
|
|
|
|
for i in range(0, 256): |
|
|
|
|
|
if (i % 16) == 0: |
|
|
|
|
|
sys.stdout.write('\n\t/* %02X */' % i) |
|
|
|
|
|
sys.stdout.write(' %s,' % category) |
|
|
|
|
|
sys.stdout.write('\n};\n') |
|
|
|
|
|
|
|
|
for codepoints, category, comment in category_sets: |
|
|
for codepoints, category, comment in category_sets: |
|
|
if not category: |
|
|
if not category: |
|
|
tables = category_tables['%s_%s' % (codepoints.first, codepoints.last)] |
|
|
tables = category_tables['%s_%s' % (codepoints.first, codepoints.last)] |
|
|
for codepoint in sorted(tables.keys()): |
|
|
for codepoint in sorted(tables.keys()): |
|
|
table = tables[codepoint] |
|
|
table = tables[codepoint] |
|
|
if not table: |
|
|
|
|
|
|
|
|
if table in special_categories: |
|
|
continue |
|
|
continue |
|
|
|
|
|
|
|
|
sys.stdout.write('\n') |
|
|
sys.stdout.write('\n') |
|
|
|
|
|
|
|
|
sys.stdout.write('static const ucd::category *categories_%s[] =\n' % table_index) |
|
|
sys.stdout.write('static const ucd::category *categories_%s[] =\n' % table_index) |
|
|
sys.stdout.write('{\n') |
|
|
sys.stdout.write('{\n') |
|
|
for codepoint, table in sorted(category_tables[table_index].items()): |
|
|
for codepoint, table in sorted(category_tables[table_index].items()): |
|
|
if table: |
|
|
|
|
|
sys.stdout.write('\tcategories_%s,\n' % codepoint) |
|
|
|
|
|
|
|
|
if isinstance(table, str): |
|
|
|
|
|
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) |
|
|
else: |
|
|
else: |
|
|
sys.stdout.write('\tNULL, // %s : Unassigned\n' % codepoint) |
|
|
|
|
|
|
|
|
sys.stdout.write('\tcategories_%s,\n' % codepoint) |
|
|
sys.stdout.write('};\n') |
|
|
sys.stdout.write('};\n') |
|
|
|
|
|
|
|
|
sys.stdout.write('\n') |
|
|
sys.stdout.write('\n') |
|
|
|
|
|
|
|
|
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) |
|
|
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) |
|
|
sys.stdout.write('\t{\n') |
|
|
sys.stdout.write('\t{\n') |
|
|
sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) |
|
|
sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) |
|
|
sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n') |
|
|
|
|
|
|
|
|
sys.stdout.write('\t\treturn table[c % 256];\n') |
|
|
sys.stdout.write('\t}\n') |
|
|
sys.stdout.write('\t}\n') |
|
|
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') |
|
|
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') |
|
|
sys.stdout.write('}\n') |
|
|
sys.stdout.write('}\n') |