[pango] Update pango_default_break function for Emoji ZWJ sequence



commit 93474c366309feec2c562637711c8b2f0dd27790
Author: Peng Wu <alexepico gmail com>
Date:   Fri May 19 09:52:11 2017 +0800

    Update pango_default_break function for Emoji ZWJ sequence
    
    Support Grapheme Boundaries Rule GB10, GB11, GB12 and GB13.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=782813

 pango/break.c |  117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 116 insertions(+), 1 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 498f764..b650ca5 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -521,6 +521,15 @@ pango_default_break (const gchar   *text,
     GB_Prepend,
     GB_SpacingMark,
     GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */
+    /* Use state machine to handle emoji sequence */
+    /* Rule GB10 and GB11 */
+    GB_E_Base,
+    GB_E_Modifier,
+    GB_Glue_After_Zwj,
+    GB_E_Base_GAZ,
+    /* Rule GB12 and GB13 */
+    GB_RI_Odd, /* Meets odd number of RI */
+    GB_RI_Even, /* Meets even number of RI */
   } GraphemeBreakType;
   GraphemeBreakType prev_GB_type = GB_Other;
 
@@ -671,6 +680,17 @@ pango_default_break (const gchar   *text,
                GB_type = GB_Extend; /* U+200C and U+200D are Other_Grapheme_Extend */
                break;
              }
+        if (G_UNLIKELY((wc >= 0x600 && wc <= 0x605) ||
+                       wc == 0x6DD ||
+                       wc == 0x70F ||
+                       wc == 0x8E2 ||
+                       wc == 0xD4E ||
+                       wc == 0x110BD ||
+                       (wc >= 0x111C2 && wc <= 0x111C3)))
+          {
+              GB_type = GB_Prepend;
+              break;
+          }
            /* fall through */
          case G_UNICODE_CONTROL:
          case G_UNICODE_LINE_SEPARATOR:
@@ -715,9 +735,86 @@ pango_default_break (const gchar   *text,
          case G_UNICODE_NON_SPACING_MARK:
            GB_type = GB_Extend; /* Grapheme_Extend */
            break;
+
+      case G_UNICODE_OTHER_SYMBOL:
+
+        if (G_UNLIKELY(wc == 0x261D ||
+                       wc == 0x26F9 ||
+                       (wc >= 0x270A && wc <= 0x270D) ||
+                       wc == 0x1F385 ||
+                       (wc >= 0x1F3C2 && wc <= 0x1F3C4) ||
+                       wc == 0x1F3C7 ||
+                       (wc >= 0x1F3CA && wc <= 0x1F3CC) ||
+                       (wc >= 0x1F442 && wc <= 0x1F443) ||
+                       (wc >= 0x1F446 && wc <= 0x1F450) ||
+                       wc == 0x1F46E ||
+                       (wc >= 0x1F470 && wc <= 0x1F478) ||
+                       wc == 0x1F47C ||
+                       (wc >= 0x1F481 && wc <= 0x1F483) ||
+                       (wc >= 0x1F485 && wc <= 0x1F487) ||
+                       wc == 0x1F4AA ||
+                       (wc >= 0x1F574 && wc <= 0x1F575) ||
+                       wc == 0x1F57A ||
+                       wc == 0x1F590 ||
+                       (wc >= 0x1F595 && wc <= 0x1F596) ||
+                       (wc >= 0x1F645 && wc <= 0x1F647) ||
+                       (wc >= 0x1F64B && wc <= 0x1F64F) ||
+                       wc == 0x1F6A3 ||
+                       (wc >= 0x1F6B4 && wc <= 0x1F6B6) ||
+                       wc == 0x1F6C0 ||
+                       wc == 0x1F6CC ||
+                       (wc >= 0x1F918 && wc <= 0x1F91C) ||
+                       (wc >= 0x1F91E && wc <= 0x1F91F) ||
+                       wc == 0x1F926 ||
+                       (wc >= 0x1F930 && wc <= 0x1F939) ||
+                       (wc >= 0x1F93D && wc <= 0x1F93E) ||
+                       (wc >= 0x1F9D1 && wc <= 0x1F9DD)))
+          GB_type = GB_E_Base;
+
+        if (G_UNLIKELY(wc == 0x2640 ||
+                       wc == 0x2642 ||
+                       (wc >= 0x2695 && wc <= 0x2696) ||
+                       wc == 0x2708 ||
+                       wc == 0x2764 ||
+                       wc == 0x1F308 ||
+                       wc == 0x1F33E ||
+                       wc == 0x1F373 ||
+                       wc == 0x1F393 ||
+                       wc == 0x1F3A4 ||
+                       wc == 0x1F3A8 ||
+                       wc == 0x1F3EB ||
+                       wc == 0x1F3ED ||
+                       wc == 0x1F48B ||
+                       (wc >= 0x1F4BB && wc <= 0x1F4BC) ||
+                       wc == 0x1F527 ||
+                       wc == 0x1F52C ||
+                       wc == 0x1F5E8 ||
+                       wc == 0x1F680 ||
+                       wc == 0x1F692))
+          GB_type = GB_Glue_After_Zwj;
+
+        if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469))
+          GB_type = GB_E_Base_GAZ;
+
+        if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF))
+          {
+            if (prev_GB_type == GB_RI_Odd)
+              GB_type = GB_RI_Even;
+            else if (prev_GB_type == GB_RI_Even)
+              GB_type = GB_RI_Odd;
+            else
+              GB_type = GB_RI_Odd;
+          }
+        break;
+
+    case G_UNICODE_MODIFIER_SYMBOL:
+      if (wc >= 0x1F3FB && wc <= 0x1F3FF)
+        GB_type = GB_E_Modifier;
+      break;
          }
 
        /* Grapheme Cluster Boundary Rules */
+
        /* We apply Rules GB1 and GB2 at the end of the function */
        if (wc == '\n' && prev_wc == '\r')
          is_grapheme_boundary = FALSE; /* Rule GB3 */
@@ -726,13 +823,31 @@ pango_default_break (const gchar   *text,
        else if (GB_type == GB_InHangulSyllable)
          is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */
        else if (GB_type == GB_Extend)
+        {
+      /* Rule GB10 */
+      if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
+           GB_type = prev_GB_type;
          is_grapheme_boundary = FALSE; /* Rule GB9 */
+        }
        else if (GB_type == GB_SpacingMark)
          is_grapheme_boundary = FALSE; /* Rule GB9a */
        else if (prev_GB_type == GB_Prepend)
          is_grapheme_boundary = FALSE; /* Rule GB9b */
+       /* Rule GB10 */
+       else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
+         {
+        if (GB_type == GB_E_Modifier)
+          is_grapheme_boundary = FALSE;
+        else
+          is_grapheme_boundary = TRUE;
+      }
+       else if (prev_wc == 0x200D &&
+           (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ))
+         is_grapheme_boundary = FALSE; /* Rule GB11 */
+       else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even)
+         is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */
        else
-         is_grapheme_boundary = TRUE;  /* Rule GB10 */
+         is_grapheme_boundary = TRUE;  /* Rule GB999 */
 
        prev_GB_type = GB_type;
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]