10 years ago · bcf8be59b3
--- a/Makefile.am
+++ b/Makefile.am
 	data/ucd/Scripts.txt
 ucd-update: tools/case.py tools/categories.py tools/scripts.py
 	tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/case.cpp
 	tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/categories.cpp
 	tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/scripts.cpp
 	tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.cpp
 	tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp
 	tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp
 libucd_includedir = $(includedir)/ucd
 libucd_include_HEADERS = \
 	data/ucd/UnicodeData.txt \
 	data/ucd/PropList.txt \
 	data/ucd/Scripts.txt
 	tools/printdata.py ${UCD_ROOTDIR} > $@
 	tools/printdata.py ${UCD_ROOTDIR} ${UCD_FLAGS} > $@
 tests/unicode-data.actual: tests/printucddata
 	tests/printucddata > $@
--- a/README.md
+++ b/README.md
 The following data sets are used for the data tables:
 -  [Unicode Character Data 7.0.0](http://www.unicode.org/Public/7.0.0/ucd/).
 ## ConScript Unicode Registry
 If enabled, the following data from the
 [ConScript Unicode Registry](http://www.evertype.com/standards/csur/) (CSUR) is
 added:
 | Code Range | Script  |
 |------------|---------|
 | F8D0-F8FF  | [Klingon](http://www.evertype.com/standards/csur/klingon.html) |
 This data is located in the `data/csur` directory.
 ## Build Dependencies
 In order to build ucd-tools, you need:
 where `VERSION` is the Unicode version (e.g. `6.3.0`).
 Additionally, you can use the `UCD_FLAGS` option to control how the data is
 generated. The following flags are supported:
 | Flag        | Description |
 |-------------|-------------|
 | --with-csur | Add ConScript Unicode Registry data. |
 ## Bugs
 Report bugs to the [ucd-tools issues](https://github.com/rhdunn/ucd-tools/issues)
--- a/tools/categories.py
+++ b/tools/categories.py
 for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
 	for codepoint in data['CodePoint']:
 		unicode_chars[codepoint] = data['GeneralCategory']
 if '--with-csur' in sys.argv:
 	for csur in ['Klingon']:
 		for data in ucd.parse_ucd_data('data/csur', csur):
 			for codepoint in data['CodePoint']:
 				unicode_chars[codepoint] = data['GeneralCategory']
 # This map is a combination of the information in the UnicodeData and Blocks
 # data files. It is intended to reduce the number of character tables that
--- a/tools/printdata.py
+++ b/tools/printdata.py
 import ucd
 ucd_rootdir = sys.argv[1]
 csur_rootdir = 'data/csur'
 unicode_chars = {}
 for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
 for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
 	for codepoint in data['Range']:
 		unicode_chars[codepoint]['Script'] = data['Script']
 if '--with-csur' in sys.argv:
 	for csur in ['Klingon']:
 		for data in ucd.parse_ucd_data('data/csur', csur):
 			for codepoint in data['CodePoint']:
 				if not 'TitleCase'  in data: data['TitleCase']  = codepoint
 				if not 'UpperCase'  in data: data['UpperCase']  = codepoint
 				if not 'LowerCase'  in data: data['LowerCase']  = codepoint
 				if not 'Properties' in data: data['Properties'] = []
 				unicode_chars[codepoint] = data
 null = ucd.CodePoint('0000')
 if __name__ == '__main__':
--- a/tools/scripts.py
+++ b/tools/scripts.py
 for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
 	for codepoint in data['Range']:
 		unicode_chars[codepoint] = data['Script']
 if '--with-csur' in sys.argv:
 	for csur in ['Klingon']:
 		for data in ucd.parse_ucd_data('data/csur', csur):
 			for codepoint in data['CodePoint']:
 				unicode_chars[codepoint] = data['Script']
 # This map is a combination of the information in the UnicodeData and Blocks
 # data files. It is intended to reduce the number of character tables that
--- a/tools/ucd.py
+++ b/tools/ucd.py
 	return x, []
 data_items = {
 	# Unicode Character Data:
 	'Blocks': [
 		('Range', codepoint),
 		('Name', string)
 		('LowerCase', codepoint),
 		('TitleCase', codepoint),
 	],
 	# Supplemental Data:
 	# ConScript Unicode Registry Data:
 	'Klingon': [
 		('CodePoint', codepoint),
 		('Script', string),