[gucharmap] Classify characters without script name properly

From: Christian Persch <chpe src gnome org>
To: commits-list gnome org
Cc:
Subject: [gucharmap] Classify characters without script name properly
Date: Sat, 12 Sep 2020 20:19:30 +0000 (UTC)
commit 048b1d959029e4f64e3264ce4569af2bd6f9e005
Author: Ingo Brückl <ib wupperonline de>
Date:   Sat Sep 12 22:19:03 2020 +0200

    Classify characters without script name properly
    
    The Unicode Scripts.txt says:
    
       All code points not explicitly listed for Script
       have the value Unknown (Zzzz).
    
    Fixes: https://gitlab.gnome.org/GNOME/gucharmap/-/issues/36

 gucharmap/gen-guch-unicode-tables.pl        |  3 +++
 gucharmap/gucharmap-script-codepoint-list.c | 18 +++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)
---
diff --git a/gucharmap/gen-guch-unicode-tables.pl b/gucharmap/gen-guch-unicode-tables.pl
index 55f4beb1..34bf6d87 100755
--- a/gucharmap/gen-guch-unicode-tables.pl
+++ b/gucharmap/gen-guch-unicode-tables.pl
@@ -855,6 +855,9 @@ sub read_scripts_txt
 
     # Adds Common to make sure works with UCD <= 4.0.0
     $scripts->{"Common"} = 1; 
+
+    # Add Unknown (for code points not explicitly listed as script)
+    $scripts->{"Unknown"} = 1;
 }
 
 sub process_scripts_txt
diff --git a/gucharmap/gucharmap-script-codepoint-list.c b/gucharmap/gucharmap-script-codepoint-list.c
index 7145879f..e9b194ed 100644
--- a/gucharmap/gucharmap-script-codepoint-list.c
+++ b/gucharmap/gucharmap-script-codepoint-list.c
@@ -69,24 +69,24 @@ find_script (const gchar *script)
 }
 
 /* *ranges should be freed by caller */
-/* adds unlisted characters to the "Common" script */
+/* adds unlisted characters to the "Unknown" script */
 static gboolean
 get_chars_for_script (const gchar            *script,
                       UnicodeRange          **ranges,
                       gint                   *size)
 {
   gint i, j, index;
-  gint script_index, common_script_index;
+  gint script_index, unknown_script_index;
   gint prev_end;
 
   script_index = find_script (script);
-  common_script_index = find_script ("Common");
+  unknown_script_index = find_script ("Unknown");
   if (script_index == -1)
     return FALSE;
 
   j = 0;
 
-  if (script_index == common_script_index)
+  if (script_index == unknown_script_index)
     {
       prev_end = -1;
       for (i = 0;  i < G_N_ELEMENTS (unicode_scripts);  i++)
@@ -110,7 +110,7 @@ get_chars_for_script (const gchar            *script,
 
   for (i = 0;  i < G_N_ELEMENTS (unicode_scripts);  i++)
     {
-      if (script_index == common_script_index)
+      if (script_index == unknown_script_index)
        {
          if (unicode_scripts[i].start > prev_end + 1)
            {
@@ -136,7 +136,7 @@ get_chars_for_script (const gchar            *script,
        }
     }
 
-  if (script_index == common_script_index)
+  if (script_index == unknown_script_index)
     {
       if (unicode_scripts[i-1].end < UNICHAR_MAX)
        {
@@ -437,7 +437,7 @@ gucharmap_unicode_list_scripts (void)
  *
  * Return value: The English (untranslated) name of the script to which the
  * character belongs. Characters that don't belong to an actual script
- * return %"Common".
+ * return %"Unknown".
  **/
 const gchar *
 gucharmap_unicode_get_script_for_char (gunichar wc)
@@ -460,7 +460,7 @@ gucharmap_unicode_get_script_for_char (gunichar wc)
         return unicode_script_list_strings + unicode_script_list_offsets[unicode_scripts[mid].script_index];
     }
 
-  /* Unicode assigns "Common" as the script name for any character not
+  /* Unicode assigns "Unknown" as the script name for any character not
    * specifically listed in Scripts.txt */
-  return N_("Common");
+  return N_("Unknown");
 }
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]