11 years ago · 8a8f021a2c
--- a/tools/ucd.py
+++ b/tools/ucd.py
@@ -94,39 +94,51 @@ class CodeRange:
 		return self.last.codepoint - self.first.codepoint + 1

 def codepoint(x):
 	if '..' in x:
 		return CodeRange(x)
 	if '..' in x[0]:
 		return CodeRange(x[0]), x[1:]
 	if ' ' in x:
 		return [CodePoint(c) for c in x.split()]
 	if x == '':
 		return CodePoint('0000')
 	return CodePoint(x)
 		return [CodePoint(c) for c in x[0].split()], x[1:]
 	if x[0] == '':
 		return CodePoint('0000'), x[1:]
 	return CodePoint(x[0]), x[1:]

 def string(x):
 	if x == '':
 		return None
 	return x
 	if x[0] == '':
 		return None, x[1:]
 	return x[0], x[1:]

 def integer(x):
 	return int(x[0]), x[1:]

 def boolean(x):
 	if x == 'Y':
 		return True
 	return False
 	if x[0] == 'Y':
 		return True, x[1:]
 	return False, x[1:]

 def script(x):
 	return script_map[x]
 	return script_map[x[0]], x[1:]

 def strlist(x):
 	return x, []

 data_items = {
 	'Blocks': [
 		('Range', codepoint),
 		('Name', str)
 		('Name', string)
 	],
 	'DerivedAge': [
 		('Range', codepoint),
 		('Age', str),
 		('Age', string),
 	],
 	'PropList': [
 		('Range', codepoint),
 		('Property', str),
 		('Property', string),
 	],
 	'PropertyValueAliases': [
 		('Property', string),
 		('Key', string),
 		('Value', string),
 		('Aliases', strlist),
 	],
 	'Scripts': [
 		('Range', codepoint),
@@ -136,7 +148,7 @@ data_items = {
 		('CodePoint', codepoint),
 		('Name', string),
 		('GeneralCategory', string),
 		('CanonicalCombiningClass', int),
 		('CanonicalCombiningClass', integer),
 		('BidiClass', string),
 		('DecompositionType', string),
 		('DecompositionMapping', string),
@@ -152,7 +164,7 @@ data_items = {
 	# Supplemental Data:
 	'Klingon': [
 		('CodePoint', codepoint),
 		('Script', str),
 		('Script', string),
 		('GeneralCategory', string),
 		('Name', string),
 		('Transliteration', string),
@@ -166,7 +178,7 @@ def parse_ucd_data(ucd_rootdir, dataset):
 		for line in f:
 			line = line.replace('\n', '').split('#')[0]
 			linedata = [' '.join(x.split()) for x in line.split(';')]
 			if len(linedata) == len(keys):
 			if len(linedata) > 1:
 				if linedata[1].endswith(', First>'):
 					first = linedata
 					continue
@@ -177,10 +189,8 @@ def parse_ucd_data(ucd_rootdir, dataset):
 					first = None

 				data = {}
 				for keydata, value in zip(keys, linedata):
 					key, typemap = keydata
 					if key:
 						data[key] = typemap(value)
 				for key, typemap in keys:
 					data[key], linedata = typemap(linedata)
 				yield data

 if __name__ == '__main__':