Browse Source

tools/emoji: support importing translations from CLDR annotations files

master
Reece H. Dunn 8 years ago
parent
commit
725c106e4c
1 changed files with 30 additions and 2 deletions
  1. 30
    2
      tools/emoji

+ 30
- 2
tools/emoji View File

@@ -3,8 +3,11 @@
import os
import re
import sys
import math
import codecs

import xml.etree.ElementTree as etree

class Emoji:
def __init__(self, m):
self.emoji = m.group(1)
@@ -22,6 +25,13 @@ class Emoji:
def __str__(self):
return "{0}{1}// [{2}]{3}".format(self.emoji, self.pronunciation, self.codepoints, self.comment)

def read_annotations(filename):
ldml = etree.parse(filename).getroot()
for annotations in ldml.findall("annotations"):
for annotation in annotations.findall("annotation"):
if annotation.attrib.get("type", "") == "tts":
yield annotation.attrib["cp"], annotation.text

def read_emoji(filename, encoding="utf-8"):
re_emoji = re.compile(r"^([^ \t]*)([^/]*)// \[([^\]]*)\](.*)$")
with codecs.open(filename, "r", encoding) as f:
@@ -40,5 +50,23 @@ def read_emoji(filename, encoding="utf-8"):
else:
yield line

for line in read_emoji(sys.argv[1]):
print(line)
annotations = {}
for cp, name in read_annotations(sys.argv[2]):
annotations[cp] = name

for entry in read_emoji(sys.argv[1]):
if isinstance(entry, Emoji):
translation = annotations.get(entry.emoji, None)
if translation:
length = len(entry.pronunciation.strip())
tabs = entry.pronunciation.count('\t') - 1
first_tab = 8 - (length % 8)
tab_length = length + first_tab + ((tabs - 1) * 8)

new_length = len(translation)
new_tabs = math.ceil((tab_length - new_length)/8)

entry.pronunciation = "\t{0}{1}".format(translation, "\t"*int(new_tabs))
else:
entry.comment += " (no translation)"
print(entry)

Loading…
Cancel
Save