Browse Source

Add support for querying the Script property.

master
Reece H. Dunn 12 years ago
parent
commit
65f95033c8
8 changed files with 3621 additions and 17 deletions
  1. 19
    3
      Makefile.am
  2. 125
    0
      src/include/ucd/ucd.h
  3. 3042
    0
      src/scripts.cpp
  4. 114
    2
      tests/printucddata.cpp
  5. 102
    0
      tools/iana.py
  6. 19
    11
      tools/printdata.py
  7. 172
    0
      tools/scripts.py
  8. 28
    1
      tools/ucd.py

+ 19
- 3
Makefile.am View File

@@ -57,22 +57,37 @@ EXTRA_DIST += ChangeLog
UCD_VERSION=6.2.0
UCD_ROOTDIR=data/ucd

data/language-subtag-registry:
mkdir -pv data
wget -O $@ http://www.iana.org/assignments/language-subtag-registry

data/ucd/PropList.txt:
mkdir -pv data/ucd
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt

data/ucd/Scripts.txt:
mkdir -pv data/ucd
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/Scripts.txt

data/ucd/UnicodeData.txt:
mkdir -pv data/ucd
wget -O $@ http://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt

############################# libucd ##########################################

src/case.cpp: tools/case.py tools/ucd.py data/ucd/UnicodeData.txt
src/case.cpp: tools/case.py tools/ucd.py \
data/ucd/UnicodeData.txt
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@

src/categories.cpp: tools/categories.py tools/ucd.py data/ucd/UnicodeData.txt
src/categories.cpp: tools/categories.py tools/ucd.py \
data/ucd/UnicodeData.txt
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@

src/scripts.cpp: tools/scripts.py tools/ucd.py \
data/language-subtag-registry \
data/ucd/Scripts.txt
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@

libucd_includedir = $(includedir)/ucd
libucd_include_HEADERS = \
src/include/ucd/ucd.h
@@ -83,7 +98,8 @@ src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS}
src_libucd_la_SOURCES = \
src/case.cpp \
src/categories.cpp \
src/ctype.cpp
src/ctype.cpp \
src/scripts.cpp

############################# tests ###########################################


+ 125
- 0
src/include/ucd/ucd.h View File

@@ -118,6 +118,131 @@ namespace ucd
category lookup_category(codepoint_t c);


//@}
/** @name Unicode Script
* @brief These functions query the Script property of Unicode codepoints.
*/
//@{


/** @brief Unicode Script
* @see http://www.iana.org/assignments/language-subtag-registry
* @see http://www.unicode.org/iso15924/iso15924-codes.html
*/
enum script
{
Arab, /**< @brief Arabic Script */
Armi, /**< @brief Imperial Aramaic Script */
Armn, /**< @brief Armenian Script */
Avst, /**< @brief Avestan Script */
Bali, /**< @brief Balinese Script */
Bamu, /**< @brief Bamum Script */
Batk, /**< @brief Batak Script */
Beng, /**< @brief Bengali Script */
Bopo, /**< @brief Bopomofo Script */
Brah, /**< @brief Brahmi Script */
Brai, /**< @brief Braille Script */
Bugi, /**< @brief Buginese Script */
Buhd, /**< @brief Buhid Script */
Cans, /**< @brief Unified Canadian Aboriginal Syllabics */
Cari, /**< @brief Carian Script */
Cakm, /**< @brief Chakma Script */
Cham, /**< @brief Cham Script */
Cher, /**< @brief Cherokee Script */
Copt, /**< @brief Coptic Script */
Cprt, /**< @brief Cypriot Script */
Cyrl, /**< @brief Cyrillic Script */
Deva, /**< @brief Devanagari Script */
Dsrt, /**< @brief Deseret Script */
Egyp, /**< @brief Egyptian Hiegoglyphs */
Ethi, /**< @brief Ethiopic Script */
Geor, /**< @brief Geirgian Script */
Glag, /**< @brief Glagolitic Script */
Goth, /**< @brief Gothic Script */
Grek, /**< @brief Greek Script */
Gujr, /**< @brief Gujarati Script */
Guru, /**< @brief Gurmukhi Script */
Hang, /**< @brief Hangul Script */
Hano, /**< @brief Hanunoo Script */
Hant, /**< @brief Han (Traditional) Script */
Hebr, /**< @brief Hebrew Script */
Hira, /**< @brief Hiragana Script */
Ital, /**< @brief Old Italic Script */
Java, /**< @brief Javanese Script */
Kali, /**< @brief Kayah Li Script */
Kana, /**< @brief Katakana Script */
Khar, /**< @brief Kharoshthi Script */
Khmr, /**< @brief Khmer Script */
Knda, /**< @brief Kannada Script */
Kthi, /**< @brief Kaithi Script */
Lana, /**< @brief Tai Tham Script */
Laoo, /**< @brief Lao Script */
Latn, /**< @brief Latin Script */
Lepc, /**< @brief Lepcha Script */
Limb, /**< @brief Limbu Script */
Linb, /**< @brief Linear B Script */
Lisu, /**< @brief Lisu Script */
Lyci, /**< @brief Lycian Script */
Lydi, /**< @brief Lydian Script */
Mand, /**< @brief Mandaic Script */
Merc, /**< @brief Meroitic Cursive Script */
Mero, /**< @brief Meroitic Hieroglyphs */
Mlym, /**< @brief Malayalam Script */
Mong, /**< @brief Mongolian Script */
Mtei, /**< @brief Meitei Mayek Script */
Mymr, /**< @brief Myanmar Script */
Nkoo, /**< @brief N'Ko Script */
Ogam, /**< @brief Ogham Script */
Olck, /**< @brief Ol Chiki Script */
Orkh, /**< @brief Old Turkic Script */
Orya, /**< @brief Oriya Script */
Osma, /**< @brief Osmanya Script */
Phag, /**< @brief Phags-Pa Script */
Phli, /**< @brief Inscriptional Pahlavi Script */
Phnx, /**< @brief Phoenician Script */
Plrd, /**< @brief Miao Script */
Prti, /**< @brief Inscriptional Parthian Script */
Rjng, /**< @brief Rejang Script */
Runr, /**< @brief Runic Script */
Samr, /**< @brief Samaritan Script */
Sarb, /**< @brief Old South Arabian Script */
Saur, /**< @brief Saurashtra Script */
Shaw, /**< @brief Shavian Script */
Shrd, /**< @brief Sharada Script */
Sinh, /**< @brief Sinhala Script */
Sora, /**< @brief Sora Sompeng Script */
Sund, /**< @brief Sundanese Script */
Sylo, /**< @brief Syloti Nagri Script */
Syrn, /**< @brief Syriatic (Eastern) Script */
Tagb, /**< @brief Tagbanwa Script */
Takr, /**< @brief Takri Script */
Tale, /**< @brief Tai Le Script */
Talu, /**< @brief New Tai Lue Script */
Taml, /**< @brief Tamil Script */
Tavt, /**< @brief Tai Viet Script */
Telu, /**< @brief Telugu Script */
Tfng, /**< @brief Tifinagh Script */
Tglg, /**< @brief Tagalog Script */
Thaa, /**< @brief Thaana Script */
Thai, /**< @brief Thai Script */
Tibt, /**< @brief Tibetan Script */
Ugar, /**< @brief Ugaritic Script */
Vaii, /**< @brief Vai Script */
Xpeo, /**< @brief Old Persian Script */
Xsux, /**< @brief Cuneiform Script */
Yiii, /**< @brief Yi Script */
Zyyy, /**< @brief Inherited Script */
Zzzz, /**< @brief Unknown Script */
};

/** @brief Lookup the Script for a Unicode codepoint.
*
* @param c The Unicode codepoint to lookup.
* @return The Script of the Unicode codepoint.
*/
script lookup_script(codepoint_t c);


//@}
/** @name ctype-style APIs
* @brief These functions provide wctype compatible functions using the UCD data.

+ 3042
- 0
src/scripts.cpp
File diff suppressed because it is too large
View File


+ 114
- 2
tests/printucddata.cpp View File

@@ -78,18 +78,130 @@ const char *get_category_string(ucd::category c)
}
}

const char *get_script_string(ucd::script s)
{
using namespace ucd;
switch (s)
{
case Arab: return "Arab";
case Armi: return "Armi";
case Armn: return "Armn";
case Avst: return "Avst";
case Bali: return "Bali";
case Bamu: return "Bamu";
case Batk: return "Batk";
case Beng: return "Beng";
case Bopo: return "Bopo";
case Brah: return "Brah";
case Brai: return "Brai";
case Bugi: return "Bugi";
case Buhd: return "Buhd";
case Cans: return "Cans";
case Cari: return "Cari";
case Cakm: return "Cakm";
case Cham: return "Cham";
case Cher: return "Cher";
case Copt: return "Copt";
case Cprt: return "Cprt";
case Cyrl: return "Cyrl";
case Deva: return "Deva";
case Dsrt: return "Dsrt";
case Egyp: return "Egyp";
case Ethi: return "Ethi";
case Geor: return "Geor";
case Glag: return "Glag";
case Goth: return "Goth";
case Grek: return "Grek";
case Gujr: return "Gujr";
case Guru: return "Guru";
case Hang: return "Hang";
case Hano: return "Hano";
case Hant: return "Hant";
case Hebr: return "Hebr";
case Hira: return "Hira";
case Ital: return "Ital";
case Java: return "Java";
case Kali: return "Kali";
case Kana: return "Kana";
case Khar: return "Khar";
case Khmr: return "Khmr";
case Knda: return "Knda";
case Kthi: return "Kthi";
case Lana: return "Lana";
case Laoo: return "Laoo";
case Latn: return "Latn";
case Lepc: return "Lepc";
case Limb: return "Limb";
case Linb: return "Linb";
case Lisu: return "Lisu";
case Lyci: return "Lyci";
case Lydi: return "Lydi";
case Mand: return "Mand";
case Merc: return "Merc";
case Mero: return "Mero";
case Mlym: return "Mlym";
case Mong: return "Mong";
case Mtei: return "Mtei";
case Mymr: return "Mymr";
case Nkoo: return "Nkoo";
case Ogam: return "Ogam";
case Olck: return "Olck";
case Orkh: return "Orkh";
case Orya: return "Orya";
case Osma: return "Osma";
case Phag: return "Phag";
case Phli: return "Phli";
case Phnx: return "Phnx";
case Plrd: return "Plrd";
case Prti: return "Prti";
case Rjng: return "Rjng";
case Runr: return "Runr";
case Samr: return "Samr";
case Sarb: return "Sarb";
case Saur: return "Saur";
case Shaw: return "Shaw";
case Shrd: return "Shrd";
case Sinh: return "Sinh";
case Sora: return "Sora";
case Sund: return "Sund";
case Sylo: return "Sylo";
case Syrn: return "Syrn";
case Tagb: return "Tagb";
case Takr: return "Takr";
case Tale: return "Tale";
case Talu: return "Talu";
case Taml: return "Taml";
case Tavt: return "Tavt";
case Telu: return "Telu";
case Tfng: return "Tfng";
case Tglg: return "Tglg";
case Thaa: return "Thaa";
case Thai: return "Thai";
case Tibt: return "Tibt";
case Ugar: return "Ugar";
case Vaii: return "Vaii";
case Xpeo: return "Xpeo";
case Xsux: return "Xsux";
case Yiii: return "Yiii";
case Zyyy: return "Zyyy";
case Zzzz: return "Zzzz";
default: return "----";
}
}

int main()
{
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c)
{
const char *script = get_script_string(ucd::lookup_script(c));
const char *category = get_category_string(ucd::lookup_category(c));
const char *category_group = get_category_group_string(ucd::lookup_category_group(c));
ucd::codepoint_t upper = ucd::toupper(c);
ucd::codepoint_t lower = ucd::tolower(c);
ucd::codepoint_t title = ucd::totitle(c);
const char *whitespace = ucd::isspace(c) ? "White_Space" : "";
printf("%06X %s %s %06X %06X %06X %s\n",
c, category_group, category,
printf("%06X %s %s %s %06X %06X %06X %s\n",
c, script, category_group, category,
upper, lower, title,
whitespace);
}

+ 102
- 0
tools/iana.py View File

@@ -0,0 +1,102 @@
#!/usr/bin/python

# Copyright (C) 2012 Reece H. Dunn
#
# This file is part of ucd-tools.
#
# ucd-tools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ucd-tools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.

import os

def read_data(path, split_char=':'):
with open(path) as f:
for line in f:
line = line.replace('\n', '')
if not line.startswith('#'):
yield line.split(split_char)

def fold_lines(path):
next_line = None
with open(path) as f:
for line in f:
line = line.replace('\n', '')
if line.startswith(' '):
next_line = '%s%s' % (next_line, line[1:])
continue
if next_line:
yield next_line
next_line = line

def iana_subtag_entries(path):
tag = {}
for line in fold_lines(path):
if line == '%%':
if 'Type' in tag:
yield tag
tag = {}
continue

packed = line.split(': ')
key = packed[0]
value = ': '.join(packed[1:])

if key == 'Description':
# Only select the first Description. This handles subtag codes
# that have multiple descriptions (e.g. 'es' maps to "Spanish"
# and "Castilian").
if not key in tag.keys():
tag[key] = value
else:
tag[key] = value
yield tag

typemap = {
'extlang': 'ExtLang',
'grandfathered': 'Grandfathered',
'language': 'Language',
'redundant': 'Redundant',
'region': 'Region',
'script': 'Script',
'variant': 'Variant',
}

scopemap = {
'collection': 'Collection',
'macrolanguage': 'MacroLanguage',
'special': 'Special',
'private-use': 'PrivateUse',
}

def read_iana_subtags(path):
tags = {}
for tag in iana_subtag_entries(path):
if 'Subtag' in tag.keys():
ref = tag['Subtag']
del tag['Subtag']
else:
ref = tag['Tag']
del tag['Tag']

if 'Scope' in tag.keys():
if tag['Type'] != 'language':
raise Exception('"Scope" property unexpected for Type="%s"' % tag['Type'])

tag['Type'] = scopemap[ tag['Scope'] ]
del tag['Scope']
else:
tag['Type'] = typemap[ tag['Type'] ]

if '..' not in ref: # exclude private use definitions
tags[ref] = tag
return tags

+ 19
- 11
tools/printdata.py View File

@@ -32,21 +32,29 @@ for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'):
if data['Property'] in ['White_Space']:
for codepoint in data['Range']:
unicode_chars[codepoint]['Properties'].append(data['Property'])
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']:
unicode_chars[codepoint]['Script'] = data['Script']

null = ucd.CodePoint('0000')
if __name__ == '__main__':
for codepoint in ucd.CodeRange('000000..10FFFF'):
try:
data = unicode_chars[codepoint]
title = data['TitleCase']
upper = data['UpperCase']
lower = data['LowerCase']
if title == null: title = codepoint
if upper == null: upper = codepoint
if lower == null: lower = codepoint
print '%s %s %s %s %s %s %s' % (
codepoint, data['GeneralCategory'][0], data['GeneralCategory'],
upper, lower, title,
' '.join(data['Properties']))
except KeyError:
print '%s C Cn %s %s %s ' % (codepoint, codepoint, codepoint, codepoint)
data = {'GeneralCategory': 'Cn', 'TitleCase': codepoint, 'UpperCase': codepoint, 'LowerCase': codepoint, 'Properties': []}
try:
script = data['Script']
except KeyError:
script = 'Zzzz'
title = data['TitleCase']
upper = data['UpperCase']
lower = data['LowerCase']
if title == null: title = codepoint
if upper == null: upper = codepoint
if lower == null: lower = codepoint
print '%s %s %s %s %s %s %s %s' % (
codepoint, script,
data['GeneralCategory'][0], data['GeneralCategory'],
upper, lower, title,
' '.join(data['Properties']))

+ 172
- 0
tools/scripts.py View File

@@ -0,0 +1,172 @@
#!/usr/bin/python

# Copyright (C) 2012 Reece H. Dunn
#
# This file is part of ucd-tools.
#
# ucd-tools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ucd-tools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.

import os
import sys
import ucd

ucd_rootdir = sys.argv[1]
ucd_version = sys.argv[2]

unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']:
unicode_chars[codepoint] = data['Script']

# This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that
# need to be generated.
script_sets = [
(ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00DFFF'), 'Zzzz', 'Surrogates'),
(ucd.CodeRange('00E000..00F8FF'), 'Zzzz', 'Private Use Area'),
(ucd.CodeRange('00F900..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('02FB00..0DFFFF'), 'Zzzz', 'Unassigned'),
(ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('0E0200..0EFFFF'), 'Zzzz', 'Unassigned'),
(ucd.CodeRange('0F0000..0FFFFD'), 'Zzzz', 'Plane 15 Private Use'),
(ucd.CodeRange('0FFFFE..0FFFFF'), 'Zzzz', 'Plane 15 Private Use'),
(ucd.CodeRange('100000..10FFFD'), 'Zzzz', 'Plane 16 Private Use'),
(ucd.CodeRange('10FFFE..10FFFF'), 'Zzzz', 'Plane 16 Private Use'),
]

# These scripts have many pages consisting of just this script:
special_scripts = []

script_tables = {}
for codepoints, script, comment in script_sets:
if not script:
table = {}
table_entry = None
table_codepoint = None
table_script = None
for i, codepoint in enumerate(codepoints):
try:
script = unicode_chars[codepoint]
except KeyError:
script = 'Zzzz' # Unknown
if (i % 256) == 0:
if table_entry:
if table_script in special_scripts:
table[table_codepoint] = table_script
elif table_script:
special_scripts.append(table_script)
table[table_codepoint] = table_script
else:
table[table_codepoint] = table_entry
table_entry = []
table_codepoint = codepoint
table_script = script
if script != table_script:
table_script = None
table_entry.append(script)
if table_entry:
if table_script in special_scripts:
table[table_codepoint] = table_script
else:
table[table_codepoint] = table_entry
script_tables['%s_%s' % (codepoints.first, codepoints.last)] = table

if __name__ == '__main__':
sys.stdout.write("""/* Unicode Scripts
*
* Copyright (C) 2012 Reece H. Dunn
*
* This file is part of ucd-tools.
*
* ucd-tools is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ucd-tools is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the Scripts.txt file in
// the Unicode Character database by the ucd-tools/tools/scripts.py script.

#include "ucd/ucd.h"

#include <stddef.h>

using namespace ucd;

// Unicode Character Data %s
""" % ucd_version)

for script in special_scripts:
sys.stdout.write('\n')
sys.stdout.write('static const uint8_t scripts_%s[256] =\n' % script)
sys.stdout.write('{')
for i in range(0, 256):
if (i % 16) == 0:
sys.stdout.write('\n\t/* %02X */' % i)
sys.stdout.write(' %s,' % script)
sys.stdout.write('\n};\n')

for codepoints, script, comment in script_sets:
if not script:
tables = script_tables['%s_%s' % (codepoints.first, codepoints.last)]
for codepoint in sorted(tables.keys()):
table = tables[codepoint]
if table in special_scripts:
continue

sys.stdout.write('\n')
sys.stdout.write('static const uint8_t scripts_%s[256] =\n' % codepoint)
sys.stdout.write('{')
for i, script in enumerate(table):
if (i % 16) == 0:
sys.stdout.write('\n\t/* %02X */' % i)
sys.stdout.write(' %s,' % script)
sys.stdout.write('\n};\n')

for codepoints, script, comment in script_sets:
if not script:
table_index = '%s_%s' % (codepoints.first, codepoints.last)
sys.stdout.write('\n')
sys.stdout.write('static const uint8_t *scripts_%s[] =\n' % table_index)
sys.stdout.write('{\n')
for codepoint, table in sorted(script_tables[table_index].items()):
if isinstance(table, str):
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint))
else:
sys.stdout.write('\tscripts_%s,\n' % codepoint)
sys.stdout.write('};\n')

sys.stdout.write('\n')
sys.stdout.write('ucd::script ucd::lookup_script(codepoint_t c)\n')
sys.stdout.write('{\n')
for codepoints, script, comment in script_sets:
if script:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment))
else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd::script)table[c % 256];\n')
sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n')
sys.stdout.write('}\n')

+ 28
- 1
tools/ucd.py View File

@@ -19,6 +19,30 @@

import os
import sys
import iana

script_map = {
# UCD script names not derivable from IANA script tags:
'Canadian_Aboriginal': 'Cans',
'Common': 'Zyyy',
'Egyptian_Hieroglyphs': 'Egyp',
'Inherited': 'Zyyy',
'Meetei_Mayek': 'Mtei',
'Nko': 'Nkoo',
'Phags_Pa': 'Phag',
# Codes in http://www.unicode.org/iso15924/iso15924-codes.html not in IANA:
'Cuneiform': 'Xsux',
}
for ref, tag in iana.read_iana_subtags('data/language-subtag-registry').items():
if tag['Type'] == 'Script':
# Convert the IANA scipt tag descriptions to the UCD script names:
desc = tag['Description']
if ' (' in desc:
desc = desc.split(' (')[0]
desc = desc.replace(' ', '_')
script_map[desc] = ref
# Fix up incorrectly mapped script names:
script_map['Cyrillic'] = 'Cyrl'

class CodePoint:
def __init__(self, x):
@@ -86,6 +110,9 @@ def boolean(x):
return True
return False

def script(x):
return script_map[x]

data_items = {
'Blocks': [
('Range', codepoint),
@@ -101,7 +128,7 @@ data_items = {
],
'Scripts': [
('Range', codepoint),
('Script', str),
('Script', script),
],
'UnicodeData': [
('CodePoint', codepoint),

Loading…
Cancel
Save