| @@ -3,8 +3,11 @@ | |||
| import os | |||
| import re | |||
| import sys | |||
| import math | |||
| import codecs | |||
| import xml.etree.ElementTree as etree | |||
| class Emoji: | |||
| def __init__(self, m): | |||
| self.emoji = m.group(1) | |||
| @@ -22,6 +25,13 @@ class Emoji: | |||
| def __str__(self): | |||
| return "{0}{1}// [{2}]{3}".format(self.emoji, self.pronunciation, self.codepoints, self.comment) | |||
| def read_annotations(filename): | |||
| ldml = etree.parse(filename).getroot() | |||
| for annotations in ldml.findall("annotations"): | |||
| for annotation in annotations.findall("annotation"): | |||
| if annotation.attrib.get("type", "") == "tts": | |||
| yield annotation.attrib["cp"], annotation.text | |||
| def read_emoji(filename, encoding="utf-8"): | |||
| re_emoji = re.compile(r"^([^ \t]*)([^/]*)// \[([^\]]*)\](.*)$") | |||
| with codecs.open(filename, "r", encoding) as f: | |||
| @@ -40,5 +50,23 @@ def read_emoji(filename, encoding="utf-8"): | |||
| else: | |||
| yield line | |||
| for line in read_emoji(sys.argv[1]): | |||
| print(line) | |||
| annotations = {} | |||
| for cp, name in read_annotations(sys.argv[2]): | |||
| annotations[cp] = name | |||
| for entry in read_emoji(sys.argv[1]): | |||
| if isinstance(entry, Emoji): | |||
| translation = annotations.get(entry.emoji, None) | |||
| if translation: | |||
| length = len(entry.pronunciation.strip()) | |||
| tabs = entry.pronunciation.count('\t') - 1 | |||
| first_tab = 8 - (length % 8) | |||
| tab_length = length + first_tab + ((tabs - 1) * 8) | |||
| new_length = len(translation) | |||
| new_tabs = math.ceil((tab_length - new_length)/8) | |||
| entry.pronunciation = "\t{0}{1}".format(translation, "\t"*int(new_tabs)) | |||
| else: | |||
| entry.comment += " (no translation)" | |||
| print(entry) | |||