[glib: 5/6] Fix g_unichar_iswide for unassigned codepoints




commit dcb459a0b0d3a1065735fed1d1a1cc9cec545d23
Author: Matthias Clasen <mclasen redhat com>
Date:   Wed Sep 14 14:55:43 2022 -0400

    Fix g_unichar_iswide for unassigned codepoints
    
    There are a few blocks in Unicode (mainly ideographs)
    which default to wide. These blocks are defined in the
    header comment of EastAsianWidth.txt.
    
    We have some tests which check that unassigned codepoints
    in those blocks get reported as wide, so make sure we handle
    this correctly.

 glib/guniprop.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)
---
diff --git a/glib/guniprop.c b/glib/guniprop.c
index 3744cd1b6b..d1363e546b 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -477,6 +477,14 @@ g_unichar_iswide_bsearch (gunichar ch)
   return FALSE;
 }
 
+static const struct Interval default_wide_blocks[] = {
+  { 0x3400, 0x4dbf },
+  { 0x4e00, 0x9fff },
+  { 0xf900, 0xfaff },
+  { 0x20000, 0x2fffd },
+  { 0x30000, 0x3fffd }
+};
+
 /**
  * g_unichar_iswide:
  * @c: a Unicode character
@@ -491,8 +499,17 @@ g_unichar_iswide (gunichar c)
 {
   if (c < g_unicode_width_table_wide[0].start)
     return FALSE;
-  else
-    return g_unichar_iswide_bsearch (c);
+  else if (g_unichar_iswide_bsearch (c))
+    return TRUE;
+  else if (g_unichar_type (c) == G_UNICODE_UNASSIGNED &&
+           bsearch (GUINT_TO_POINTER (c),
+                    default_wide_blocks,
+                    G_N_ELEMENTS (default_wide_blocks),
+                    sizeof default_wide_blocks[0],
+                    interval_compare))
+    return TRUE;
+
+  return FALSE;
 }
 
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]