Browse Source

printdata: use the Uppercase property from DerivedCoreProperties.

master
Reece H. Dunn 8 years ago
parent
commit
9d1469b956
3 changed files with 18 additions and 10 deletions
  1. 5
    0
      Makefile.am
  2. 9
    10
      tools/printdata.py
  3. 4
    0
      tools/ucd.py

+ 5
- 0
Makefile.am View File

mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@


data/ucd/DerivedCoreProperties.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@

data/ucd/PropertyValueAliases.txt: data/ucd/PropertyValueAliases.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@ curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@
tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
data/ucd/UnicodeData.txt \ data/ucd/UnicodeData.txt \
data/ucd/PropList.txt \ data/ucd/PropList.txt \
data/ucd/DerivedCoreProperties.txt \
data/ucd/Scripts.txt data/ucd/Scripts.txt
tools/printdata.py ${UCD_ROOTDIR} ${UCD_FLAGS} > $@ tools/printdata.py ${UCD_ROOTDIR} ${UCD_FLAGS} > $@



+ 9
- 10
tools/printdata.py View File

for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']: for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data unicode_chars[codepoint] = data
for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'):
for codepoint in data['Range']:
try:
unicode_chars[codepoint][data['Property']] = 1
except KeyError:
unicode_chars[codepoint] = {'CodePoint': codepoint}
unicode_chars[codepoint][data['Property']] = 1
for propfile in ['PropList', 'DerivedCoreProperties']:
for data in ucd.parse_ucd_data(ucd_rootdir, propfile):
for codepoint in data['Range']:
try:
unicode_chars[codepoint][data['Property']] = 1
except KeyError:
unicode_chars[codepoint] = {'CodePoint': codepoint}
unicode_chars[codepoint][data['Property']] = 1
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']: for codepoint in data['Range']:
unicode_chars[codepoint]['Script'] = data['Script'] unicode_chars[codepoint]['Script'] = data['Script']
return 0 return 0


def isupper(data): def isupper(data):
if data.get('GeneralCategory', 'Cn') == 'Lu':
return 1
elif data.get('Other_Uppercase', 0):
if data.get('Uppercase', 0):
return 1 return 1
elif data.get('LowerCase', null) != null: # Some Lt characters have lowercase forms. elif data.get('LowerCase', null) != null: # Some Lt characters have lowercase forms.
return 1 return 1

+ 4
- 0
tools/ucd.py View File

('Range', codepoint), ('Range', codepoint),
('Age', string), ('Age', string),
], ],
'DerivedCoreProperties': [
('Range', codepoint),
('Property', string),
],
'PropList': [ 'PropList': [
('Range', codepoint), ('Range', codepoint),
('Property', string), ('Property', string),

Loading…
Cancel
Save