[pango: 3/6] Update Grapheme Boundary to Unicode 11



commit 1ad4ac6065a405d0c6033633fd1b9ee019745d1c
Author: Peng Wu <alexepico gmail com>
Date:   Mon Aug 13 16:48:59 2018 +0800

    Update Grapheme Boundary to Unicode 11

 pango/break.c               | 129 ++++++++++++--------------------------------
 pango/pango-emoji-private.h |   3 ++
 pango/pango-emoji.c         |   6 +++
 3 files changed, 42 insertions(+), 96 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 2e66972e..c50c18eb 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -24,6 +24,7 @@
 #include "pango-break.h"
 #include "pango-engine-private.h"
 #include "pango-script-private.h"
+#include "pango-emoji-private.h"
 #include "pango-impl-utils.h"
 #include <string.h>
 
@@ -194,16 +195,12 @@ pango_default_break (const gchar   *text,
     GB_SpacingMark,
     GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */
     /* Use state machine to handle emoji sequence */
-    /* Rule GB10 and GB11 */
-    GB_E_Base,
-    GB_E_Modifier,
-    GB_Glue_After_Zwj,
-    GB_E_Base_GAZ,
     /* Rule GB12 and GB13 */
     GB_RI_Odd, /* Meets odd number of RI */
     GB_RI_Even, /* Meets even number of RI */
   } GraphemeBreakType;
   GraphemeBreakType prev_GB_type = GB_Other;
+  gboolean met_Extended_Pictographic = FALSE;
 
   /* See Word_Break Property Values table of UAX#29 */
   typedef enum
@@ -371,10 +368,14 @@ pango_default_break (const gchar   *text,
       /* Just few spaces have variable width. So explicitly mark them.
        */
       attrs[i].is_expandable_space = (0x0020 == wc || 0x00A0 == wc);
+      gboolean is_Extended_Pictographic =
+       _pango_Is_Emoji_Extended_Pictographic (wc);
+
 
       /* ---- UAX#29 Grapheme Boundaries ---- */
       {
        GraphemeBreakType GB_type;
+
         /* Find the GraphemeBreakType of wc */
        GB_type = GB_Other;
        switch ((int) type)
@@ -447,70 +448,6 @@ pango_default_break (const gchar   *text,
            break;
 
           case G_UNICODE_OTHER_SYMBOL:
-            if (G_UNLIKELY(wc == 0x261D ||
-                           wc == 0x26F9 ||
-                           (wc >= 0x270A && wc <= 0x270D) ||
-                           wc == 0x1F385 ||
-                           (wc >= 0x1F3C2 && wc <= 0x1F3C4) ||
-                           wc == 0x1F3C7 ||
-                           (wc >= 0x1F3CA && wc <= 0x1F3CC) ||
-                           (wc >= 0x1F442 && wc <= 0x1F443) ||
-                           (wc >= 0x1F446 && wc <= 0x1F450) ||
-                           wc == 0x1F46E ||
-                           (wc >= 0x1F470 && wc <= 0x1F478) ||
-                           wc == 0x1F47C ||
-                           (wc >= 0x1F481 && wc <= 0x1F483) ||
-                           (wc >= 0x1F485 && wc <= 0x1F487) ||
-                           wc == 0x1F4AA ||
-                           (wc >= 0x1F574 && wc <= 0x1F575) ||
-                           wc == 0x1F57A ||
-                           wc == 0x1F590 ||
-                           (wc >= 0x1F595 && wc <= 0x1F596) ||
-                           (wc >= 0x1F645 && wc <= 0x1F647) ||
-                           (wc >= 0x1F64B && wc <= 0x1F64F) ||
-                           wc == 0x1F6A3 ||
-                           (wc >= 0x1F6B4 && wc <= 0x1F6B6) ||
-                           wc == 0x1F6C0 ||
-                           wc == 0x1F6CC ||
-                           (wc >= 0x1F918 && wc <= 0x1F91C) ||
-                           (wc >= 0x1F91E && wc <= 0x1F91F) ||
-                           wc == 0x1F926 ||
-                           (wc >= 0x1F930 && wc <= 0x1F939) ||
-                           (wc >= 0x1F93D && wc <= 0x1F93E) ||
-                           (wc >= 0x1F9D1 && wc <= 0x1F9DD)))
-              {
-                GB_type = GB_E_Base;
-                break;
-              }
-            if (G_UNLIKELY(wc == 0x2640 ||
-                           wc == 0x2642 ||
-                           (wc >= 0x2695 && wc <= 0x2696) ||
-                           wc == 0x2708 ||
-                           wc == 0x2764 ||
-                           wc == 0x1F308 ||
-                           wc == 0x1F33E ||
-                           wc == 0x1F373 ||
-                           wc == 0x1F393 ||
-                           wc == 0x1F3A4 ||
-                           wc == 0x1F3A8 ||
-                           wc == 0x1F3EB ||
-                           wc == 0x1F3ED ||
-                           wc == 0x1F48B ||
-                           (wc >= 0x1F4BB && wc <= 0x1F4BC) ||
-                           wc == 0x1F527 ||
-                           wc == 0x1F52C ||
-                           wc == 0x1F5E8 ||
-                           wc == 0x1F680 ||
-                           wc == 0x1F692))
-              {
-                GB_type = GB_Glue_After_Zwj;
-                break;
-              }
-            if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469))
-              {
-                GB_type = GB_E_Base_GAZ;
-                break;
-              }
             if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF))
               {
                 if (prev_GB_type == GB_RI_Odd)
@@ -525,11 +462,28 @@ pango_default_break (const gchar   *text,
 
           case G_UNICODE_MODIFIER_SYMBOL:
             if (wc >= 0x1F3FB && wc <= 0x1F3FF)
-              GB_type = GB_E_Modifier;
+              GB_type = GB_Extend;
             break;
          }
 
+       /* Rule GB11 */
+       if (met_Extended_Pictographic)
+         {
+           if (GB_type == GB_Extend)
+             met_Extended_Pictographic = TRUE;
+           else if (_pango_Is_Emoji_Extended_Pictographic (prev_wc) &&
+                    GB_type == GB_ZWJ)
+             met_Extended_Pictographic = TRUE;
+           else if (prev_GB_type == GB_Extend && GB_type == GB_ZWJ)
+             met_Extended_Pictographic = TRUE;
+           else if (prev_GB_type == GB_ZWJ && is_Extended_Pictographic)
+             met_Extended_Pictographic = TRUE;
+           else
+             met_Extended_Pictographic = FALSE;
+         }
+
        /* Grapheme Cluster Boundary Rules */
+       is_grapheme_boundary = TRUE; /* Rule GB999 */
 
        /* We apply Rules GB1 and GB2 at the end of the function */
        if (wc == '\n' && prev_wc == '\r')
@@ -540,9 +494,6 @@ pango_default_break (const gchar   *text,
          is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */
        else if (GB_type == GB_Extend)
           {
-            /* Rule GB10 */
-            if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
-             GB_type = prev_GB_type;
            is_grapheme_boundary = FALSE; /* Rule GB9 */
           }
         else if (GB_type == GB_ZWJ)
@@ -551,37 +502,23 @@ pango_default_break (const gchar   *text,
          is_grapheme_boundary = FALSE; /* Rule GB9a */
        else if (prev_GB_type == GB_Prepend)
          is_grapheme_boundary = FALSE; /* Rule GB9b */
-       /* Rule GB10 */
-       else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
-         {
-            if (GB_type == GB_E_Modifier)
-              is_grapheme_boundary = FALSE;
-            else
-              is_grapheme_boundary = TRUE;
-          }
-       else if (prev_GB_type == GB_ZWJ &&
-                 (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ))
-         is_grapheme_boundary = FALSE; /* Rule GB11 */
+       else if (is_Extended_Pictographic)
+         { /* Rule GB11 */
+           if (prev_GB_type == GB_ZWJ && met_Extended_Pictographic)
+             is_grapheme_boundary = FALSE;
+         }
        else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even)
          is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */
-       else
-         is_grapheme_boundary = TRUE; /* Rule GB999 */
+
+       if (is_Extended_Pictographic)
+         met_Extended_Pictographic = TRUE;
 
        attrs[i].is_cursor_position = is_grapheme_boundary;
        /* If this is a grapheme boundary, we have to decide if backspace
         * deletes a character or the whole grapheme cluster */
        if (is_grapheme_boundary)
           {
-            if (prev_GB_type == GB_E_Base ||
-                prev_GB_type == GB_E_Base_GAZ ||
-                prev_GB_type == GB_Glue_After_Zwj ||
-                prev_GB_type == GB_Extend ||
-                prev_GB_type == GB_E_Modifier ||
-                prev_GB_type == GB_RI_Odd ||
-                prev_GB_type == GB_RI_Even)
-             attrs[i].backspace_deletes_character = FALSE;
-            else
-             attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
+           attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
           }
        else
          attrs[i].backspace_deletes_character = FALSE;
diff --git a/pango/pango-emoji-private.h b/pango/pango-emoji-private.h
index a360b37a..ed4b7213 100644
--- a/pango/pango-emoji-private.h
+++ b/pango/pango-emoji-private.h
@@ -24,6 +24,9 @@
 
 #include <glib.h>
 
+gboolean
+_pango_Is_Emoji_Extended_Pictographic (gunichar ch);
+
 typedef struct _PangoEmojiIter PangoEmojiIter;
 
 struct _PangoEmojiIter
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index 46ab5b3f..158daa5b 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -92,7 +92,13 @@ DEFINE_pango_Is_(Emoji)
 DEFINE_pango_Is_(Emoji_Presentation)
 DEFINE_pango_Is_(Emoji_Modifier)
 DEFINE_pango_Is_(Emoji_Modifier_Base)
+DEFINE_pango_Is_(Extended_Pictographic)
 
+gboolean
+_pango_Is_Emoji_Extended_Pictographic (gunichar ch)
+{
+       return _pango_Is_Extended_Pictographic (ch);
+}
 
 static gboolean
 _pango_Is_Emoji_Text_Default (gunichar ch)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]