[gnome-characters/wip/exalm/unistring: 4/15] gen-scripts: Add iso15924 codes

From: Alexander Mikhaylenko <alexm src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-characters/wip/exalm/unistring: 4/15] gen-scripts: Add iso15924 codes
Date: Fri, 26 Nov 2021 09:30:13 +0000 (UTC)


commit 48e5105f678ed2655b4e9069e41840c5dc757ee4
Author: Alexander Mikhaylenko <alexm gnome org>
Date:   Thu Nov 25 19:13:34 2021 +0500

    gen-scripts: Add iso15924 codes

 lib/gen-scripts.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)
---
diff --git a/lib/gen-scripts.py b/lib/gen-scripts.py
index 14cdca7..b8b0c83 100755
--- a/lib/gen-scripts.py
+++ b/lib/gen-scripts.py
@@ -15,7 +15,11 @@ ISO_ALIASES = { 'Hans': ['Hani'], 'Hant': ['Hani'],
                 'Jpan': ['Hrkt'],
                 'Kore': ['Hang'] }
 UCD_ALIASES = { 'Katakana_Or_Hiragana': ['Katakana', 'Hiragana'] }
-DEFAULT_ALIASES = { 'en': ['Latin'] }
+
+def iso15924_to_hex(iso):
+    return hex(ord(iso[0]) << 24 | ord(iso[1]) << 16 | ord(iso[2]) << 8 | ord(iso[3]))
+
+DEFAULT_ALIASES = { 'en': (['Latin'], [iso15924_to_hex('Latn')]) }
 
 def get_language_data(infile, aliases):
     result = {}
@@ -40,6 +44,8 @@ def get_language_data(infile, aliases):
             scripts = [ISO_ALIASES.get(script, [script]) for script in scripts]
             scripts = [script for elements in scripts for script in elements]
 
+            iso15924 = scripts
+
             # Resolve ISO 15924 to Unicode mapping.
             scripts = [aliases[script] for script in scripts
                        if script in aliases]
@@ -51,10 +57,13 @@ def get_language_data(infile, aliases):
             scripts = set(scripts)
             scripts = sorted(scripts)
 
+            iso15924 = [iso15924_to_hex(iso) for iso in iso15924]
+            iso15924 = sorted(set(iso15924))
+
             if len(scripts) == 0:
                 continue
             for key in keys:
-                result[key] = scripts
+                result[key] = scripts, iso15924
     temp = dict(DEFAULT_ALIASES)
     temp.update(result)
     return temp
@@ -75,12 +84,15 @@ struct LanguageScripts
 {{
   const gchar *language;
   const gchar *scripts[{0}];
-}};'''.format(max([len(v) for v in data.values()])+1))
+  const guint32 iso15924[{1}];
+}};'''.format(max([len(v[0]) for v in data.values()])+1, max([len(v[1]) for v in data.values()])+1))
     print('''\
 struct LanguageScripts language_scripts[NLANGUAGES] =
   {''')
-    for index, (lang, scripts) in enumerate(sorted(data.items(), key=lambda x: x[0])):
-        print('    {{ "{0}", {{ {1} }} }}'.format(lang, ', '.join(['N_("{0}")'.format(script) for script in 
scripts] + ['NULL'])), end='')
+    for index, (lang, (scripts, iso15924)) in enumerate(sorted(data.items(), key=lambda x: x[0])):
+        scripts_array = ', '.join(['N_("{0}")'.format(script) for script in scripts] + ['NULL'])
+        iso15924_array = ', '.join(['{0}'.format(iso) for iso in iso15924] + ['0'])
+        print('    {{ "{0}", {{ {1} }}, {{ {2} }} }}'.format(lang, scripts_array, iso15924_array), end='')
         if index + 1 < len(data):
             print(',')
         else:

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]