|
|
|
|
|
|
|
|
return self.last.codepoint - self.first.codepoint + 1 |
|
|
return self.last.codepoint - self.first.codepoint + 1 |
|
|
|
|
|
|
|
|
def codepoint(x): |
|
|
def codepoint(x): |
|
|
if '..' in x: |
|
|
|
|
|
return CodeRange(x) |
|
|
|
|
|
|
|
|
if '..' in x[0]: |
|
|
|
|
|
return CodeRange(x[0]), x[1:] |
|
|
if ' ' in x: |
|
|
if ' ' in x: |
|
|
return [CodePoint(c) for c in x.split()] |
|
|
|
|
|
if x == '': |
|
|
|
|
|
return CodePoint('0000') |
|
|
|
|
|
return CodePoint(x) |
|
|
|
|
|
|
|
|
return [CodePoint(c) for c in x[0].split()], x[1:] |
|
|
|
|
|
if x[0] == '': |
|
|
|
|
|
return CodePoint('0000'), x[1:] |
|
|
|
|
|
return CodePoint(x[0]), x[1:] |
|
|
|
|
|
|
|
|
def string(x): |
|
|
def string(x): |
|
|
if x == '': |
|
|
|
|
|
return None |
|
|
|
|
|
return x |
|
|
|
|
|
|
|
|
if x[0] == '': |
|
|
|
|
|
return None, x[1:] |
|
|
|
|
|
return x[0], x[1:] |
|
|
|
|
|
|
|
|
|
|
|
def integer(x): |
|
|
|
|
|
return int(x[0]), x[1:] |
|
|
|
|
|
|
|
|
def boolean(x): |
|
|
def boolean(x): |
|
|
if x == 'Y': |
|
|
|
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
if x[0] == 'Y': |
|
|
|
|
|
return True, x[1:] |
|
|
|
|
|
return False, x[1:] |
|
|
|
|
|
|
|
|
def script(x): |
|
|
def script(x): |
|
|
return script_map[x] |
|
|
|
|
|
|
|
|
return script_map[x[0]], x[1:] |
|
|
|
|
|
|
|
|
|
|
|
def strlist(x): |
|
|
|
|
|
return x, [] |
|
|
|
|
|
|
|
|
data_items = { |
|
|
data_items = { |
|
|
'Blocks': [ |
|
|
'Blocks': [ |
|
|
('Range', codepoint), |
|
|
('Range', codepoint), |
|
|
('Name', str) |
|
|
|
|
|
|
|
|
('Name', string) |
|
|
], |
|
|
], |
|
|
'DerivedAge': [ |
|
|
'DerivedAge': [ |
|
|
('Range', codepoint), |
|
|
('Range', codepoint), |
|
|
('Age', str), |
|
|
|
|
|
|
|
|
('Age', string), |
|
|
], |
|
|
], |
|
|
'PropList': [ |
|
|
'PropList': [ |
|
|
('Range', codepoint), |
|
|
('Range', codepoint), |
|
|
('Property', str), |
|
|
|
|
|
|
|
|
('Property', string), |
|
|
|
|
|
], |
|
|
|
|
|
'PropertyValueAliases': [ |
|
|
|
|
|
('Property', string), |
|
|
|
|
|
('Key', string), |
|
|
|
|
|
('Value', string), |
|
|
|
|
|
('Aliases', strlist), |
|
|
], |
|
|
], |
|
|
'Scripts': [ |
|
|
'Scripts': [ |
|
|
('Range', codepoint), |
|
|
('Range', codepoint), |
|
|
|
|
|
|
|
|
('CodePoint', codepoint), |
|
|
('CodePoint', codepoint), |
|
|
('Name', string), |
|
|
('Name', string), |
|
|
('GeneralCategory', string), |
|
|
('GeneralCategory', string), |
|
|
('CanonicalCombiningClass', int), |
|
|
|
|
|
|
|
|
('CanonicalCombiningClass', integer), |
|
|
('BidiClass', string), |
|
|
('BidiClass', string), |
|
|
('DecompositionType', string), |
|
|
('DecompositionType', string), |
|
|
('DecompositionMapping', string), |
|
|
('DecompositionMapping', string), |
|
|
|
|
|
|
|
|
# Supplemental Data: |
|
|
# Supplemental Data: |
|
|
'Klingon': [ |
|
|
'Klingon': [ |
|
|
('CodePoint', codepoint), |
|
|
('CodePoint', codepoint), |
|
|
('Script', str), |
|
|
|
|
|
|
|
|
('Script', string), |
|
|
('GeneralCategory', string), |
|
|
('GeneralCategory', string), |
|
|
('Name', string), |
|
|
('Name', string), |
|
|
('Transliteration', string), |
|
|
('Transliteration', string), |
|
|
|
|
|
|
|
|
for line in f: |
|
|
for line in f: |
|
|
line = line.replace('\n', '').split('#')[0] |
|
|
line = line.replace('\n', '').split('#')[0] |
|
|
linedata = [' '.join(x.split()) for x in line.split(';')] |
|
|
linedata = [' '.join(x.split()) for x in line.split(';')] |
|
|
if len(linedata) == len(keys): |
|
|
|
|
|
|
|
|
if len(linedata) > 1: |
|
|
if linedata[1].endswith(', First>'): |
|
|
if linedata[1].endswith(', First>'): |
|
|
first = linedata |
|
|
first = linedata |
|
|
continue |
|
|
continue |
|
|
|
|
|
|
|
|
first = None |
|
|
first = None |
|
|
|
|
|
|
|
|
data = {} |
|
|
data = {} |
|
|
for keydata, value in zip(keys, linedata): |
|
|
|
|
|
key, typemap = keydata |
|
|
|
|
|
if key: |
|
|
|
|
|
data[key] = typemap(value) |
|
|
|
|
|
|
|
|
for key, typemap in keys: |
|
|
|
|
|
data[key], linedata = typemap(linedata) |
|
|
yield data |
|
|
yield data |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
if __name__ == '__main__': |