Browse Source

ucd: support parsing PropertyValueAliases data

master
Reece H. Dunn 10 years ago
parent
commit
8a8f021a2c
1 changed files with 33 additions and 23 deletions
  1. 33
    23
      tools/ucd.py

+ 33
- 23
tools/ucd.py View File

return self.last.codepoint - self.first.codepoint + 1 return self.last.codepoint - self.first.codepoint + 1


def codepoint(x): def codepoint(x):
if '..' in x:
return CodeRange(x)
if '..' in x[0]:
return CodeRange(x[0]), x[1:]
if ' ' in x: if ' ' in x:
return [CodePoint(c) for c in x.split()]
if x == '':
return CodePoint('0000')
return CodePoint(x)
return [CodePoint(c) for c in x[0].split()], x[1:]
if x[0] == '':
return CodePoint('0000'), x[1:]
return CodePoint(x[0]), x[1:]


def string(x): def string(x):
if x == '':
return None
return x
if x[0] == '':
return None, x[1:]
return x[0], x[1:]

def integer(x):
return int(x[0]), x[1:]


def boolean(x): def boolean(x):
if x == 'Y':
return True
return False
if x[0] == 'Y':
return True, x[1:]
return False, x[1:]


def script(x): def script(x):
return script_map[x]
return script_map[x[0]], x[1:]

def strlist(x):
return x, []


data_items = { data_items = {
'Blocks': [ 'Blocks': [
('Range', codepoint), ('Range', codepoint),
('Name', str)
('Name', string)
], ],
'DerivedAge': [ 'DerivedAge': [
('Range', codepoint), ('Range', codepoint),
('Age', str),
('Age', string),
], ],
'PropList': [ 'PropList': [
('Range', codepoint), ('Range', codepoint),
('Property', str),
('Property', string),
],
'PropertyValueAliases': [
('Property', string),
('Key', string),
('Value', string),
('Aliases', strlist),
], ],
'Scripts': [ 'Scripts': [
('Range', codepoint), ('Range', codepoint),
('CodePoint', codepoint), ('CodePoint', codepoint),
('Name', string), ('Name', string),
('GeneralCategory', string), ('GeneralCategory', string),
('CanonicalCombiningClass', int),
('CanonicalCombiningClass', integer),
('BidiClass', string), ('BidiClass', string),
('DecompositionType', string), ('DecompositionType', string),
('DecompositionMapping', string), ('DecompositionMapping', string),
# Supplemental Data: # Supplemental Data:
'Klingon': [ 'Klingon': [
('CodePoint', codepoint), ('CodePoint', codepoint),
('Script', str),
('Script', string),
('GeneralCategory', string), ('GeneralCategory', string),
('Name', string), ('Name', string),
('Transliteration', string), ('Transliteration', string),
for line in f: for line in f:
line = line.replace('\n', '').split('#')[0] line = line.replace('\n', '').split('#')[0]
linedata = [' '.join(x.split()) for x in line.split(';')] linedata = [' '.join(x.split()) for x in line.split(';')]
if len(linedata) == len(keys):
if len(linedata) > 1:
if linedata[1].endswith(', First>'): if linedata[1].endswith(', First>'):
first = linedata first = linedata
continue continue
first = None first = None


data = {} data = {}
for keydata, value in zip(keys, linedata):
key, typemap = keydata
if key:
data[key] = typemap(value)
for key, typemap in keys:
data[key], linedata = typemap(linedata)
yield data yield data


if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save