Browse Source

Avoid duplicating Lo only tables.

master
Reece H. Dunn 12 years ago
parent
commit
7f1dd9cc96
2 changed files with 1423 additions and 8367 deletions
  1. 1391
    8351
      src/categories.cpp
  2. 32
    16
      tools/categories.py

+ 1391
- 8351
src/categories.cpp
File diff suppressed because it is too large
View File


+ 32
- 16
tools/categories.py View File

@@ -46,32 +46,38 @@ category_sets = [
(ucd.CodeRange('10FFFE..10FFFF'), 'Cn', 'Plane 16 Private Use'),
]

# These categories have many pages consisting of just this category:
# Cn -- Unassigned
# Lo -- CJK Ideographs
special_categories = ['Cn', 'Lo']

category_tables = {}
for codepoints, category, comment in category_sets:
if not category:
table = {}
table_entry = None
table_codepoint = None
is_unassigned = True
table_category = None
for i, codepoint in enumerate(codepoints):
try:
category = unicode_chars[codepoint]
except KeyError:
category = 'Cn' # Unassigned
if (i % 256) == 0:
if table_entry:
if is_unassigned:
table[table_codepoint] = None
if table_category in special_categories:
table[table_codepoint] = table_category
else:
table[table_codepoint] = table_entry
table_entry = []
table_codepoint = codepoint
is_unassigned = True
try:
category = unicode_chars[codepoint]
is_unassigned = False
except KeyError:
category = 'Cn' # Unassigned
table_category = category
if category != table_category:
table_category = None
table_entry.append(category)
if table_entry:
if is_unassigned:
table[table_codepoint] = None
if table_category in special_categories:
table[table_codepoint] = table_category
else:
table[table_codepoint] = table_entry
category_tables['%s_%s' % (codepoints.first, codepoints.last)] = table
@@ -109,12 +115,22 @@ using namespace ucd;
// Unicode Character Data %s
""" % ucd_version)

for category in special_categories:
sys.stdout.write('\n')
sys.stdout.write('static const ucd::category categories_%s[256] =\n' % category)
sys.stdout.write('{')
for i in range(0, 256):
if (i % 16) == 0:
sys.stdout.write('\n\t/* %02X */' % i)
sys.stdout.write(' %s,' % category)
sys.stdout.write('\n};\n')

for codepoints, category, comment in category_sets:
if not category:
tables = category_tables['%s_%s' % (codepoints.first, codepoints.last)]
for codepoint in sorted(tables.keys()):
table = tables[codepoint]
if not table:
if table in special_categories:
continue

sys.stdout.write('\n')
@@ -133,10 +149,10 @@ using namespace ucd;
sys.stdout.write('static const ucd::category *categories_%s[] =\n' % table_index)
sys.stdout.write('{\n')
for codepoint, table in sorted(category_tables[table_index].items()):
if table:
sys.stdout.write('\tcategories_%s,\n' % codepoint)
if isinstance(table, str):
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint))
else:
sys.stdout.write('\tNULL, // %s : Unassigned\n' % codepoint)
sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n')

sys.stdout.write('\n')
@@ -149,7 +165,7 @@ using namespace ucd;
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst ucd::category *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn table ? table[c % 256] : Cn;\n')
sys.stdout.write('\t\treturn table[c % 256];\n')
sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('}\n')

Loading…
Cancel
Save