#!/usr/bin/python # Copyright (C) 2012 Reece H. Dunn # # This file is part of ucd-tools. # # ucd-tools is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # ucd-tools is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ucd-tools. If not, see . import os def read_data(path, split_char=':'): with open(path) as f: for line in f: line = line.replace('\n', '') if not line.startswith('#'): yield line.split(split_char) def fold_lines(path): next_line = None with open(path) as f: for line in f: line = line.replace('\n', '') if line.startswith(' '): next_line = '%s%s' % (next_line, line[1:]) continue if next_line: yield next_line next_line = line def iana_subtag_entries(path): tag = {} for line in fold_lines(path): if line == '%%': if 'Type' in tag: yield tag tag = {} continue packed = line.split(': ') key = packed[0] value = ': '.join(packed[1:]) if key == 'Description': # Only select the first Description. This handles subtag codes # that have multiple descriptions (e.g. 'es' maps to "Spanish" # and "Castilian"). if not key in tag.keys(): tag[key] = value else: tag[key] = value yield tag typemap = { 'extlang': 'ExtLang', 'grandfathered': 'Grandfathered', 'language': 'Language', 'redundant': 'Redundant', 'region': 'Region', 'script': 'Script', 'variant': 'Variant', } scopemap = { 'collection': 'Collection', 'macrolanguage': 'MacroLanguage', 'special': 'Special', 'private-use': 'PrivateUse', } def read_iana_subtags(path): tags = {} for tag in iana_subtag_entries(path): if 'Subtag' in tag.keys(): ref = tag['Subtag'] del tag['Subtag'] else: ref = tag['Tag'] del tag['Tag'] if 'Scope' in tag.keys(): if tag['Type'] != 'language': raise Exception('"Scope" property unexpected for Type="%s"' % tag['Type']) tag['Type'] = scopemap[ tag['Scope'] ] del tag['Scope'] else: tag['Type'] = typemap[ tag['Type'] ] if '..' not in ref: # exclude private use definitions tags[ref] = tag return tags