| @@ -92,9 +92,9 @@ tools/scripts.py: tools/ucd.py \ | |||
| data/ucd/Scripts.txt | |||
| ucd-update: tools/case.py tools/categories.py tools/scripts.py | |||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/case.cpp | |||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/categories.cpp | |||
| tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > src/scripts.cpp | |||
| tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.cpp | |||
| tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp | |||
| tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp | |||
| libucd_includedir = $(includedir)/ucd | |||
| libucd_include_HEADERS = \ | |||
| @@ -120,7 +120,7 @@ tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ | |||
| data/ucd/UnicodeData.txt \ | |||
| data/ucd/PropList.txt \ | |||
| data/ucd/Scripts.txt | |||
| tools/printdata.py ${UCD_ROOTDIR} > $@ | |||
| tools/printdata.py ${UCD_ROOTDIR} ${UCD_FLAGS} > $@ | |||
| tests/unicode-data.actual: tests/printucddata | |||
| tests/printucddata > $@ | |||
| @@ -24,6 +24,18 @@ In addition to this it provides APIs for: | |||
| The following data sets are used for the data tables: | |||
| - [Unicode Character Data 7.0.0](http://www.unicode.org/Public/7.0.0/ucd/). | |||
| ## ConScript Unicode Registry | |||
| If enabled, the following data from the | |||
| [ConScript Unicode Registry](http://www.evertype.com/standards/csur/) (CSUR) is | |||
| added: | |||
| | Code Range | Script | | |||
| |------------|---------| | |||
| | F8D0-F8FF | [Klingon](http://www.evertype.com/standards/csur/klingon.html) | | |||
| This data is located in the `data/csur` directory. | |||
| ## Build Dependencies | |||
| In order to build ucd-tools, you need: | |||
| @@ -84,6 +96,13 @@ unicode is released, you need to run: | |||
| where `VERSION` is the Unicode version (e.g. `6.3.0`). | |||
| Additionally, you can use the `UCD_FLAGS` option to control how the data is | |||
| generated. The following flags are supported: | |||
| | Flag | Description | | |||
| |-------------|-------------| | |||
| | --with-csur | Add ConScript Unicode Registry data. | | |||
| ## Bugs | |||
| Report bugs to the [ucd-tools issues](https://github.com/rhdunn/ucd-tools/issues) | |||
| @@ -28,6 +28,11 @@ unicode_chars = {} | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | |||
| for codepoint in data['CodePoint']: | |||
| unicode_chars[codepoint] = data['GeneralCategory'] | |||
| if '--with-csur' in sys.argv: | |||
| for csur in ['Klingon']: | |||
| for data in ucd.parse_ucd_data('data/csur', csur): | |||
| for codepoint in data['CodePoint']: | |||
| unicode_chars[codepoint] = data['GeneralCategory'] | |||
| # This map is a combination of the information in the UnicodeData and Blocks | |||
| # data files. It is intended to reduce the number of character tables that | |||
| @@ -22,6 +22,7 @@ import sys | |||
| import ucd | |||
| ucd_rootdir = sys.argv[1] | |||
| csur_rootdir = 'data/csur' | |||
| unicode_chars = {} | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): | |||
| @@ -35,6 +36,15 @@ for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'): | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | |||
| for codepoint in data['Range']: | |||
| unicode_chars[codepoint]['Script'] = data['Script'] | |||
| if '--with-csur' in sys.argv: | |||
| for csur in ['Klingon']: | |||
| for data in ucd.parse_ucd_data('data/csur', csur): | |||
| for codepoint in data['CodePoint']: | |||
| if not 'TitleCase' in data: data['TitleCase'] = codepoint | |||
| if not 'UpperCase' in data: data['UpperCase'] = codepoint | |||
| if not 'LowerCase' in data: data['LowerCase'] = codepoint | |||
| if not 'Properties' in data: data['Properties'] = [] | |||
| unicode_chars[codepoint] = data | |||
| null = ucd.CodePoint('0000') | |||
| if __name__ == '__main__': | |||
| @@ -28,6 +28,11 @@ unicode_chars = {} | |||
| for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): | |||
| for codepoint in data['Range']: | |||
| unicode_chars[codepoint] = data['Script'] | |||
| if '--with-csur' in sys.argv: | |||
| for csur in ['Klingon']: | |||
| for data in ucd.parse_ucd_data('data/csur', csur): | |||
| for codepoint in data['CodePoint']: | |||
| unicode_chars[codepoint] = data['Script'] | |||
| # This map is a combination of the information in the UnicodeData and Blocks | |||
| # data files. It is intended to reduce the number of character tables that | |||
| @@ -98,6 +98,7 @@ def strlist(x): | |||
| return x, [] | |||
| data_items = { | |||
| # Unicode Character Data: | |||
| 'Blocks': [ | |||
| ('Range', codepoint), | |||
| ('Name', string) | |||
| @@ -137,7 +138,7 @@ data_items = { | |||
| ('LowerCase', codepoint), | |||
| ('TitleCase', codepoint), | |||
| ], | |||
| # Supplemental Data: | |||
| # ConScript Unicode Registry Data: | |||
| 'Klingon': [ | |||
| ('CodePoint', codepoint), | |||
| ('Script', string), | |||