| (ucd.CodeRange('10FFFE..10FFFF'), 'Cn', 'Plane 16 Private Use'), | (ucd.CodeRange('10FFFE..10FFFF'), 'Cn', 'Plane 16 Private Use'), | ||||
| ] | ] | ||||
| # These categories have many pages consisting of just this category: | |||||
| # Cn -- Unassigned | |||||
| # Lo -- CJK Ideographs | |||||
| special_categories = ['Cn', 'Lo'] | |||||
| category_tables = {} | category_tables = {} | ||||
| for codepoints, category, comment in category_sets: | for codepoints, category, comment in category_sets: | ||||
| if not category: | if not category: | ||||
| table = {} | table = {} | ||||
| table_entry = None | table_entry = None | ||||
| table_codepoint = None | table_codepoint = None | ||||
| is_unassigned = True | |||||
| table_category = None | |||||
| for i, codepoint in enumerate(codepoints): | for i, codepoint in enumerate(codepoints): | ||||
| try: | |||||
| category = unicode_chars[codepoint] | |||||
| except KeyError: | |||||
| category = 'Cn' # Unassigned | |||||
| if (i % 256) == 0: | if (i % 256) == 0: | ||||
| if table_entry: | if table_entry: | ||||
| if is_unassigned: | |||||
| table[table_codepoint] = None | |||||
| if table_category in special_categories: | |||||
| table[table_codepoint] = table_category | |||||
| else: | else: | ||||
| table[table_codepoint] = table_entry | table[table_codepoint] = table_entry | ||||
| table_entry = [] | table_entry = [] | ||||
| table_codepoint = codepoint | table_codepoint = codepoint | ||||
| is_unassigned = True | |||||
| try: | |||||
| category = unicode_chars[codepoint] | |||||
| is_unassigned = False | |||||
| except KeyError: | |||||
| category = 'Cn' # Unassigned | |||||
| table_category = category | |||||
| if category != table_category: | |||||
| table_category = None | |||||
| table_entry.append(category) | table_entry.append(category) | ||||
| if table_entry: | if table_entry: | ||||
| if is_unassigned: | |||||
| table[table_codepoint] = None | |||||
| if table_category in special_categories: | |||||
| table[table_codepoint] = table_category | |||||
| else: | else: | ||||
| table[table_codepoint] = table_entry | table[table_codepoint] = table_entry | ||||
| category_tables['%s_%s' % (codepoints.first, codepoints.last)] = table | category_tables['%s_%s' % (codepoints.first, codepoints.last)] = table | ||||
| // Unicode Character Data %s | // Unicode Character Data %s | ||||
| """ % ucd_version) | """ % ucd_version) | ||||
| for category in special_categories: | |||||
| sys.stdout.write('\n') | |||||
| sys.stdout.write('static const ucd::category categories_%s[256] =\n' % category) | |||||
| sys.stdout.write('{') | |||||
| for i in range(0, 256): | |||||
| if (i % 16) == 0: | |||||
| sys.stdout.write('\n\t/* %02X */' % i) | |||||
| sys.stdout.write(' %s,' % category) | |||||
| sys.stdout.write('\n};\n') | |||||
| for codepoints, category, comment in category_sets: | for codepoints, category, comment in category_sets: | ||||
| if not category: | if not category: | ||||
| tables = category_tables['%s_%s' % (codepoints.first, codepoints.last)] | tables = category_tables['%s_%s' % (codepoints.first, codepoints.last)] | ||||
| for codepoint in sorted(tables.keys()): | for codepoint in sorted(tables.keys()): | ||||
| table = tables[codepoint] | table = tables[codepoint] | ||||
| if not table: | |||||
| if table in special_categories: | |||||
| continue | continue | ||||
| sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
| sys.stdout.write('static const ucd::category *categories_%s[] =\n' % table_index) | sys.stdout.write('static const ucd::category *categories_%s[] =\n' % table_index) | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoint, table in sorted(category_tables[table_index].items()): | for codepoint, table in sorted(category_tables[table_index].items()): | ||||
| if table: | |||||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||||
| if isinstance(table, str): | |||||
| sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||||
| else: | else: | ||||
| sys.stdout.write('\tNULL, // %s : Unassigned\n' % codepoint) | |||||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||||
| sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
| sys.stdout.write('\n') | sys.stdout.write('\n') | ||||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | ||||
| sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
| sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
| sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n') | |||||
| sys.stdout.write('\t\treturn table[c % 256];\n') | |||||
| sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
| sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | ||||
| sys.stdout.write('}\n') | sys.stdout.write('}\n') |