Browse Source

Do not include supplementary data in the UCD APIs.

This removes support for the CSUR (ConScript Unicode Registry) data
in the main Unicode APIs. This data should be accessed through a
different API.
master
Reece H. Dunn 11 years ago
parent
commit
ced06ed0f4
6 changed files with 46 additions and 76 deletions
  1. 0
    3
      Makefile.am
  2. 1
    2
      README.md
  3. 21
    21
      src/categories.cpp
  4. 21
    41
      src/scripts.cpp
  5. 2
    5
      tools/categories.py
  6. 1
    4
      tools/scripts.py

+ 0
- 3
Makefile.am View File

data/ucd/UnicodeData.txt data/ucd/UnicodeData.txt


tools/categories.py: tools/ucd.py \ tools/categories.py: tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt data/ucd/UnicodeData.txt


tools/scripts.py: tools/ucd.py \ tools/scripts.py: tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/Scripts.txt data/ucd/Scripts.txt


ucd-update: tools/case.py tools/categories.py tools/scripts.py ucd-update: tools/case.py tools/categories.py tools/scripts.py
tests_printucddata_LDADD = src/libucd.la tests_printucddata_LDADD = src/libucd.la


tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt \ data/ucd/UnicodeData.txt \
data/ucd/PropList.txt \ data/ucd/PropList.txt \
data/ucd/Scripts.txt data/ucd/Scripts.txt

+ 1
- 2
README.md View File

- converting to upper, lower and title case. - converting to upper, lower and title case.


The following data sets are used for the data tables: The following data sets are used for the data tables:
- [Unicode Character Data 7.0.0](http://www.unicode.org/Public/7.0.0/ucd/);
- [Klingon](http://www.evertype.com/standards/csur/klingon.html) [F8D0-F8FF].
- [Unicode Character Data 7.0.0](http://www.unicode.org/Public/7.0.0/ucd/).


## Build Dependencies ## Build Dependencies



+ 21
- 21
src/categories.cpp View File

/* F0 */ Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, /* F0 */ Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn, Cn,
}; };


static const uint8_t categories_Co[256] =
{
/* 00 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 10 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 20 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 30 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 40 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 50 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 60 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 70 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 80 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 90 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* A0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* B0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* C0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* D0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* E0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* F0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
};

static const uint8_t categories_Lo[256] = static const uint8_t categories_Lo[256] =
{ {
/* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, /* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
/* F0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Cn, Cn, Cn, Cn, /* F0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Cn, Cn, Cn, Cn,
}; };


static const uint8_t categories_00F800[256] =
{
/* 00 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 10 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 20 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 30 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 40 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 50 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 60 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 70 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 80 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 90 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* A0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* B0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* C0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* D0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
/* E0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Co, Co, Co, Co, Co, Co,
/* F0 */ Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Co, Co, Co, Po, Po, So,
};

static const uint8_t categories_00FA00[256] = static const uint8_t categories_00FA00[256] =
{ {
/* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, /* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,


static const uint8_t *categories_00F800_02FAFF[] = static const uint8_t *categories_00F800_02FAFF[] =
{ {
categories_00F800,
categories_Co, // 00F800
categories_Lo, // 00F900 categories_Lo, // 00F900
categories_00FA00, categories_00FA00,
categories_00FB00, categories_00FB00,

+ 21
- 41
src/scripts.cpp View File

/* F0 */ Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, /* F0 */ Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii, Vaii,
}; };


static const uint8_t scripts_Zzzz[256] =
{
/* 00 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 10 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 20 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 30 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 40 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 50 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 60 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 70 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 80 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 90 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* A0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* B0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* C0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* D0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* E0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* F0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
};

static const uint8_t scripts_Arab[256] = static const uint8_t scripts_Arab[256] =
{ {
/* 00 */ Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, /* 00 */ Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab, Arab,
/* F0 */ Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, /* F0 */ Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina, Lina,
}; };


static const uint8_t scripts_Zzzz[256] =
{
/* 00 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 10 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 20 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 30 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 40 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 50 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 60 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 70 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 80 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 90 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* A0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* B0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* C0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* D0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* E0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* F0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
};

static const uint8_t scripts_Xsux[256] = static const uint8_t scripts_Xsux[256] =
{ {
/* 00 */ Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, /* 00 */ Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux, Xsux,
/* F0 */ Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Zzzz, Zzzz, Zzzz, Zzzz, /* F0 */ Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Zzzz, Zzzz, Zzzz, Zzzz,
}; };


static const uint8_t scripts_00F800[256] =
{
/* 00 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 10 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 20 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 30 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 40 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 50 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 60 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 70 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 80 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 90 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* A0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* B0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* C0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* D0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak,
/* E0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* F0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zyyy, Zyyy, Zyyy,
};

static const uint8_t scripts_00FA00[256] = static const uint8_t scripts_00FA00[256] =
{ {
/* 00 */ Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, /* 00 */ Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant,


static const uint8_t *scripts_00F800_02FAFF[] = static const uint8_t *scripts_00F800_02FAFF[] =
{ {
scripts_00F800,
scripts_Zzzz, // 00F800
scripts_Hant, // 00F900 scripts_Hant, // 00F900
scripts_00FA00, scripts_00FA00,
scripts_00FB00, scripts_00FB00,

+ 2
- 5
tools/categories.py View File

#!/usr/bin/python #!/usr/bin/python


# Copyright (C) 2012 Reece H. Dunn
# Copyright (C) 2012, 2014 Reece H. Dunn
# #
# This file is part of ucd-tools. # This file is part of ucd-tools.
# #
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']: for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory'] unicode_chars[codepoint] = data['GeneralCategory']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory']


# This map is a combination of the information in the UnicodeData and Blocks # This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that # data files. It is intended to reduce the number of character tables that
# These categories have many pages consisting of just this category: # These categories have many pages consisting of just this category:
# Cn -- Unassigned # Cn -- Unassigned
# Lo -- CJK Ideographs # Lo -- CJK Ideographs
special_categories = ['Cn', 'Lo', 'Sm', 'So']
special_categories = ['Cn', 'Co', 'Lo', 'Sm', 'So']


category_tables = {} category_tables = {}
for codepoints, category, comment in category_sets: for codepoints, category, comment in category_sets:

+ 1
- 4
tools/scripts.py View File

#!/usr/bin/python #!/usr/bin/python


# Copyright (C) 2012 Reece H. Dunn
# Copyright (C) 2012, 2014 Reece H. Dunn
# #
# This file is part of ucd-tools. # This file is part of ucd-tools.
# #
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']: for codepoint in data['Range']:
unicode_chars[codepoint] = data['Script'] unicode_chars[codepoint] = data['Script']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['Script']


# This map is a combination of the information in the UnicodeData and Blocks # This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that # data files. It is intended to reduce the number of character tables that

Loading…
Cancel
Save