123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- #!/usr/bin/python
-
- # Copyright (C) 2012 Reece H. Dunn
- #
- # This file is part of ucd-tools.
- #
- # ucd-tools is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # ucd-tools is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
-
- import os
-
- def read_data(path, split_char=':'):
- with open(path) as f:
- for line in f:
- line = line.replace('\n', '')
- if not line.startswith('#'):
- yield line.split(split_char)
-
- def fold_lines(path):
- next_line = None
- with open(path) as f:
- for line in f:
- line = line.replace('\n', '')
- if line.startswith(' '):
- next_line = '%s%s' % (next_line, line[1:])
- continue
- if next_line:
- yield next_line
- next_line = line
-
- def iana_subtag_entries(path):
- tag = {}
- for line in fold_lines(path):
- if line == '%%':
- if 'Type' in tag:
- yield tag
- tag = {}
- continue
-
- packed = line.split(': ')
- key = packed[0]
- value = ': '.join(packed[1:])
-
- if key == 'Description':
- # Only select the first Description. This handles subtag codes
- # that have multiple descriptions (e.g. 'es' maps to "Spanish"
- # and "Castilian").
- if not key in tag.keys():
- tag[key] = value
- else:
- tag[key] = value
- yield tag
-
- typemap = {
- 'extlang': 'ExtLang',
- 'grandfathered': 'Grandfathered',
- 'language': 'Language',
- 'redundant': 'Redundant',
- 'region': 'Region',
- 'script': 'Script',
- 'variant': 'Variant',
- }
-
- scopemap = {
- 'collection': 'Collection',
- 'macrolanguage': 'MacroLanguage',
- 'special': 'Special',
- 'private-use': 'PrivateUse',
- }
-
- def read_iana_subtags(path):
- tags = {}
- for tag in iana_subtag_entries(path):
- if 'Subtag' in tag.keys():
- ref = tag['Subtag']
- del tag['Subtag']
- else:
- ref = tag['Tag']
- del tag['Tag']
-
- if 'Scope' in tag.keys():
- if tag['Type'] != 'language':
- raise Exception('"Scope" property unexpected for Type="%s"' % tag['Type'])
-
- tag['Type'] = scopemap[ tag['Scope'] ]
- del tag['Scope']
- else:
- tag['Type'] = typemap[ tag['Type'] ]
-
- if '..' not in ref: # exclude private use definitions
- tags[ref] = tag
- return tags
|