[gnome-characters/wip/exalm/unistring: 8/15] lib: Add a script for generating unicode name table




commit 86baa4f437cf3892c15a56e8331c19b6d2e63b1d
Author: Alexander Mikhaylenko <alexm gnome org>
Date:   Fri Nov 26 01:52:04 2021 +0500

    lib: Add a script for generating unicode name table
    
    GLib doesn't have a way to get unicode char names, so we'll have to
    reimplement this.

 lib/gen-names.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
---
diff --git a/lib/gen-names.py b/lib/gen-names.py
new file mode 100755
index 0000000..7de541a
--- /dev/null
+++ b/lib/gen-names.py
@@ -0,0 +1,56 @@
+#!/usr/bin/python3
+
+# Input: https://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+
+import io
+import re
+
+class Builder(object):
+    def __init__(self):
+        pass
+
+    def read(self, infile):
+        names = []
+        for line in infile:
+            if line.startswith('#'):
+                continue
+            line = line.strip()
+            if len(line) == 0:
+                continue
+            (codepoint, name, _other) = line.split(';', 2)
+
+            # Names starting with < are signifying controls and special blocks,
+            # they aren't useful for us
+            if name[0] == '<':
+                continue
+
+            names.append((codepoint, name))
+
+        return names
+
+    def write(self, data):
+        print('''\
+struct CharacterName
+{
+  gunichar uc;
+  const char *name;
+};''')
+        print('static const struct CharacterName character_names[] =\n  {')
+        s = ''
+        offset = 0
+        for codepoint, name in data:
+            print('    {{ 0x{0}, "{1}" }},'.format(codepoint, name))
+        print('  };')
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(description='build')
+    parser.add_argument('infile', type=argparse.FileType('r'),
+                        help='input file')
+    args = parser.parse_args()
+
+    builder = Builder()
+    # FIXME: argparse.FileType(encoding=...) is available since Python 3.4
+    data = builder.read(io.open(args.infile.name, encoding='utf_8_sig'))
+    builder.write(data)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]