| return self.last.codepoint - self.first.codepoint + 1 | return self.last.codepoint - self.first.codepoint + 1 | ||||
| def codepoint(x): | def codepoint(x): | ||||
| if '..' in x: | |||||
| return CodeRange(x) | |||||
| if '..' in x[0]: | |||||
| return CodeRange(x[0]), x[1:] | |||||
| if ' ' in x: | if ' ' in x: | ||||
| return [CodePoint(c) for c in x.split()] | |||||
| if x == '': | |||||
| return CodePoint('0000') | |||||
| return CodePoint(x) | |||||
| return [CodePoint(c) for c in x[0].split()], x[1:] | |||||
| if x[0] == '': | |||||
| return CodePoint('0000'), x[1:] | |||||
| return CodePoint(x[0]), x[1:] | |||||
| def string(x): | def string(x): | ||||
| if x == '': | |||||
| return None | |||||
| return x | |||||
| if x[0] == '': | |||||
| return None, x[1:] | |||||
| return x[0], x[1:] | |||||
| def integer(x): | |||||
| return int(x[0]), x[1:] | |||||
| def boolean(x): | def boolean(x): | ||||
| if x == 'Y': | |||||
| return True | |||||
| return False | |||||
| if x[0] == 'Y': | |||||
| return True, x[1:] | |||||
| return False, x[1:] | |||||
| def script(x): | def script(x): | ||||
| return script_map[x] | |||||
| return script_map[x[0]], x[1:] | |||||
| def strlist(x): | |||||
| return x, [] | |||||
| data_items = { | data_items = { | ||||
| 'Blocks': [ | 'Blocks': [ | ||||
| ('Range', codepoint), | ('Range', codepoint), | ||||
| ('Name', str) | |||||
| ('Name', string) | |||||
| ], | ], | ||||
| 'DerivedAge': [ | 'DerivedAge': [ | ||||
| ('Range', codepoint), | ('Range', codepoint), | ||||
| ('Age', str), | |||||
| ('Age', string), | |||||
| ], | ], | ||||
| 'PropList': [ | 'PropList': [ | ||||
| ('Range', codepoint), | ('Range', codepoint), | ||||
| ('Property', str), | |||||
| ('Property', string), | |||||
| ], | |||||
| 'PropertyValueAliases': [ | |||||
| ('Property', string), | |||||
| ('Key', string), | |||||
| ('Value', string), | |||||
| ('Aliases', strlist), | |||||
| ], | ], | ||||
| 'Scripts': [ | 'Scripts': [ | ||||
| ('Range', codepoint), | ('Range', codepoint), | ||||
| ('CodePoint', codepoint), | ('CodePoint', codepoint), | ||||
| ('Name', string), | ('Name', string), | ||||
| ('GeneralCategory', string), | ('GeneralCategory', string), | ||||
| ('CanonicalCombiningClass', int), | |||||
| ('CanonicalCombiningClass', integer), | |||||
| ('BidiClass', string), | ('BidiClass', string), | ||||
| ('DecompositionType', string), | ('DecompositionType', string), | ||||
| ('DecompositionMapping', string), | ('DecompositionMapping', string), | ||||
| # Supplemental Data: | # Supplemental Data: | ||||
| 'Klingon': [ | 'Klingon': [ | ||||
| ('CodePoint', codepoint), | ('CodePoint', codepoint), | ||||
| ('Script', str), | |||||
| ('Script', string), | |||||
| ('GeneralCategory', string), | ('GeneralCategory', string), | ||||
| ('Name', string), | ('Name', string), | ||||
| ('Transliteration', string), | ('Transliteration', string), | ||||
| for line in f: | for line in f: | ||||
| line = line.replace('\n', '').split('#')[0] | line = line.replace('\n', '').split('#')[0] | ||||
| linedata = [' '.join(x.split()) for x in line.split(';')] | linedata = [' '.join(x.split()) for x in line.split(';')] | ||||
| if len(linedata) == len(keys): | |||||
| if len(linedata) > 1: | |||||
| if linedata[1].endswith(', First>'): | if linedata[1].endswith(', First>'): | ||||
| first = linedata | first = linedata | ||||
| continue | continue | ||||
| first = None | first = None | ||||
| data = {} | data = {} | ||||
| for keydata, value in zip(keys, linedata): | |||||
| key, typemap = keydata | |||||
| if key: | |||||
| data[key] = typemap(value) | |||||
| for key, typemap in keys: | |||||
| data[key], linedata = typemap(linedata) | |||||
| yield data | yield data | ||||
| if __name__ == '__main__': | if __name__ == '__main__': |