8 years ago · 725c106e4c
--- a/tools/emoji
+++ b/tools/emoji
@@ -3,8 +3,11 @@
 import os
 import re
 import sys
 import math
 import codecs

 import xml.etree.ElementTree as etree

 class Emoji:
    def __init__(self, m):
        self.emoji = m.group(1)
@@ -22,6 +25,13 @@ class Emoji:
    def __str__(self):
        return "{0}{1}// [{2}]{3}".format(self.emoji, self.pronunciation, self.codepoints, self.comment)

 def read_annotations(filename):
    ldml = etree.parse(filename).getroot()
    for annotations in ldml.findall("annotations"):
        for annotation in annotations.findall("annotation"):
            if annotation.attrib.get("type", "") == "tts":
                yield annotation.attrib["cp"], annotation.text

 def read_emoji(filename, encoding="utf-8"):
    re_emoji = re.compile(r"^([^ \t]*)([^/]*)// \[([^\]]*)\](.*)$")
    with codecs.open(filename, "r", encoding) as f:
@@ -40,5 +50,23 @@ def read_emoji(filename, encoding="utf-8"):
                else:
                    yield line

 for line in read_emoji(sys.argv[1]):
    print(line)
 annotations = {}
 for cp, name in read_annotations(sys.argv[2]):
    annotations[cp] = name

 for entry in read_emoji(sys.argv[1]):
    if isinstance(entry, Emoji):
        translation = annotations.get(entry.emoji, None)
        if translation:
            length = len(entry.pronunciation.strip())
            tabs = entry.pronunciation.count('\t') - 1
            first_tab = 8 - (length % 8)
            tab_length = length + first_tab + ((tabs - 1) * 8)

            new_length = len(translation)
            new_tabs = math.ceil((tab_length - new_length)/8)

            entry.pronunciation = "\t{0}{1}".format(translation, "\t"*int(new_tabs))
        else:
            entry.comment += " (no translation)"
    print(entry)