Update emoji to unicode 13.0

73cf7b23 · Nicolas Werner · 8fcb670b · 73cf7b23 · 8fcb670b · 73cf7b23
Commit 73cf7b23 authored 5 years ago by Nicolas Werner
--- a/resources/emoji-test.txt
+++ b/resources/emoji-test.txt
--- a/resources/emoji.json
+++ b/resources/emoji.json
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
 #!/usr/bin/env python3
 import sys
-import json
+import re
 from jinja2 import Template
 class Emoji(object):
-    def __init__(self, code, shortname, category, order):
+    def __init__(self, code, shortname):
-        self.code = ''.join(list(map(code_to_bytes, code.split('-'))))
+        self.code = repr(code.encode('utf-8'))[1:].strip("'")
        self.shortname = shortname
-        self.category = category
-        self.order = int(order)
-def code_to_bytes(codepoint):
-    '''
-    Convert hex unicode codepoint to hex byte array.
-    '''
-    bytes = chr(int(codepoint, 16)).encode('utf-8')
-    return str(bytes)[1:].strip("'")
 def generate_code(emojis, category):
    tmpl = Template('''
-const QList<Emoji> EmojiProvider::{{ category }} = {
+const std::vector<Emoji> emoji::Provider::{{ category }} = {
    {%- for e in emoji %}
        Emoji{QString::fromUtf8("{{ e.code }}"), "{{ e.shortname }}"},
    {%- endfor %}
@@ -38,44 +26,56 @@ const QList<Emoji> EmojiProvider::{{ category }} = {
 if __name__ == '__main__':
    if len(sys.argv) < 2:
-        print('usage: emoji_codegen.py /path/to/emoji.json')
+        print('usage: emoji_codegen.py /path/to/emoji-test.txt')
        sys.exit(1)
    filename = sys.argv[1]
-    data = {}
-    with open(filename, 'r') as filename:
+    people = []
-        data = json.loads(filename.read())
+    nature = []
+    food = []
+    activity = []
+    travel = []
+    objects = []
+    symbols = []
+    flags = []
+    categories = {
+        'Smileys & Emotion': people,
+        'People & Body': people,
+        'Animals & Nature': nature,
+        'Food & Drink': food,
+        'Travel & Places': travel,
+        'Activities': activity,
+        'Objects': objects,
+        'Symbols': symbols,
+        'Flags': flags
+    }
+    current_category = ''
+    for line in open(filename, 'r'):
+        if line.startswith('# group:'):
+            current_category = line.split(':', 1)[1].strip()
+        if not line or line.startswith('#'):
+            continue
-    emojis = []
+        segments = re.split(r'\s+[#;] ', line.strip())
+        if len(segments) != 3:
+            continue
-    for emoji_name in data:
+        code, qualification, charAndName = segments
-        tmp = data[emoji_name]
-        l = len(tmp['unicode'].split('-'))
+        # skip fully qualified versions of same unicode
+        if code.endswith('FE0F'):
+            continue
-        if l > 1 and tmp['category'] == 'people':
+        if qualification == 'component':
            continue
-        emojis.append(
+        char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
-            Emoji(
-                tmp['unicode'],
+        categories[current_category].append(Emoji(char, name))
-                tmp['shortname'],
-                tmp['category'],
-                tmp['emoji_order']
-            )
-        )
-    emojis.sort(key=lambda x: x.order)
-    people = list(filter(lambda x: x.category == "people", emojis))
-    nature = list(filter(lambda x: x.category == "nature", emojis))
-    food = list(filter(lambda x: x.category == "food", emojis))
-    activity = list(filter(lambda x: x.category == "activity", emojis))
-    travel = list(filter(lambda x: x.category == "travel", emojis))
-    objects = list(filter(lambda x: x.category == "objects", emojis))
-    symbols = list(filter(lambda x: x.category == "symbols", emojis))
-    flags = list(filter(lambda x: x.category == "flags", emojis))
    # Use xclip to pipe the output to clipboard.
    # e.g ./codegen.py emoji.json | xclip -sel clip

--- a/scripts/update_emoji.md
+++ b/scripts/update_emoji.md
+# Updating emoji
+1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/
+2. Overwrite the existing resources/emoji-test.txt with the new one
+3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
+4. `make lint`
+5. Compile and test
--- a/src/emoji/Provider.cpp
+++ b/src/emoji/Provider.cpp