Browse Source

printdata: use the Uppercase property from DerivedCoreProperties.

master
Reece H. Dunn 8 years ago
parent
commit
9d1469b956
3 changed files with 18 additions and 10 deletions
  1. 5
    0
      Makefile.am
  2. 9
    10
      tools/printdata.py
  3. 4
    0
      tools/ucd.py

+ 5
- 0
Makefile.am View File

@@ -63,6 +63,10 @@ data/ucd/PropList.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@

data/ucd/DerivedCoreProperties.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@

data/ucd/PropertyValueAliases.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@
@@ -138,6 +142,7 @@ tests_printucddata_cpp_LDADD = src/libucd.la
tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
data/ucd/UnicodeData.txt \
data/ucd/PropList.txt \
data/ucd/DerivedCoreProperties.txt \
data/ucd/Scripts.txt
tools/printdata.py ${UCD_ROOTDIR} ${UCD_FLAGS} > $@


+ 9
- 10
tools/printdata.py View File

@@ -30,13 +30,14 @@ unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data
for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'):
for codepoint in data['Range']:
try:
unicode_chars[codepoint][data['Property']] = 1
except KeyError:
unicode_chars[codepoint] = {'CodePoint': codepoint}
unicode_chars[codepoint][data['Property']] = 1
for propfile in ['PropList', 'DerivedCoreProperties']:
for data in ucd.parse_ucd_data(ucd_rootdir, propfile):
for codepoint in data['Range']:
try:
unicode_chars[codepoint][data['Property']] = 1
except KeyError:
unicode_chars[codepoint] = {'CodePoint': codepoint}
unicode_chars[codepoint][data['Property']] = 1
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']:
unicode_chars[codepoint]['Script'] = data['Script']
@@ -100,9 +101,7 @@ def isalpha(data):
return 0

def isupper(data):
if data.get('GeneralCategory', 'Cn') == 'Lu':
return 1
elif data.get('Other_Uppercase', 0):
if data.get('Uppercase', 0):
return 1
elif data.get('LowerCase', null) != null: # Some Lt characters have lowercase forms.
return 1

+ 4
- 0
tools/ucd.py View File

@@ -113,6 +113,10 @@ data_items = {
('Range', codepoint),
('Age', string),
],
'DerivedCoreProperties': [
('Range', codepoint),
('Property', string),
],
'PropList': [
('Range', codepoint),
('Property', string),

Loading…
Cancel
Save