[pango] Update emoji scanner ragel file to latest from Chrome



commit 267d991d9b6f040580a6421a0f52c292ed9304dc
Author: Behdad Esfahbod <behdad behdad org>
Date:   Thu Jan 24 11:42:34 2019 +0100

    Update emoji scanner ragel file to latest from Chrome

 pango/emoji_presentation_scanner.c  | 66 ++++++++++++++++++-------------------
 pango/emoji_presentation_scanner.rl | 38 +++++++++++----------
 pango/pango-emoji.c                 | 24 ++++++--------
 3 files changed, 63 insertions(+), 65 deletions(-)
---
diff --git a/pango/emoji_presentation_scanner.c b/pango/emoji_presentation_scanner.c
index 70c4451d..97a52927 100644
--- a/pango/emoji_presentation_scanner.c
+++ b/pango/emoji_presentation_scanner.c
@@ -88,23 +88,22 @@ static const int emoji_presentation_en_text_and_emoji_run = 2;
 
 
 
-#line 76 "emoji_presentation_scanner.rl"
+#line 78 "emoji_presentation_scanner.rl"
 
 
-static gboolean
-scan_emoji_presentation (const unsigned char* buffer,
-                         unsigned buffer_size,
-                         unsigned cursor,
-                         unsigned* end)
+static emoji_text_iter_t
+scan_emoji_presentation (emoji_text_iter_t p,
+    const emoji_text_iter_t pe,
+    bool* is_emoji)
 {
-  const unsigned char *p = buffer + cursor;
-  const unsigned char *pe, *eof, *ts, *te;
+  emoji_text_iter_t ts, te;
+  const emoji_text_iter_t eof = pe;
+
   unsigned act;
   int cs;
-  pe = eof = buffer + buffer_size;
 
   
-#line 108 "emoji_presentation_scanner.c"
+#line 107 "emoji_presentation_scanner.c"
        {
        cs = emoji_presentation_start;
        ts = 0;
@@ -112,7 +111,7 @@ scan_emoji_presentation (const unsigned char* buffer,
        act = 0;
        }
 
-#line 116 "emoji_presentation_scanner.c"
+#line 115 "emoji_presentation_scanner.c"
        {
        int _klen;
        unsigned int _trans;
@@ -131,7 +130,7 @@ _resume:
 #line 1 "NONE"
        {ts = p;}
        break;
-#line 135 "emoji_presentation_scanner.c"
+#line 134 "emoji_presentation_scanner.c"
                }
        }
 
@@ -202,50 +201,50 @@ _eof_trans:
        {te = p+1;}
        break;
        case 3:
-#line 72 "emoji_presentation_scanner.rl"
+#line 74 "emoji_presentation_scanner.rl"
        {act = 2;}
        break;
        case 4:
-#line 73 "emoji_presentation_scanner.rl"
+#line 75 "emoji_presentation_scanner.rl"
        {act = 3;}
        break;
        case 5:
-#line 71 "emoji_presentation_scanner.rl"
-       {te = p+1;{ found_text_presentation_sequence }}
+#line 73 "emoji_presentation_scanner.rl"
+       {te = p+1;{ *is_emoji = false; return te; }}
        break;
        case 6:
-#line 72 "emoji_presentation_scanner.rl"
-       {te = p+1;{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+       {te = p+1;{ *is_emoji = true; return te; }}
        break;
        case 7:
-#line 73 "emoji_presentation_scanner.rl"
-       {te = p+1;{ found_text_presentation_sequence }}
+#line 75 "emoji_presentation_scanner.rl"
+       {te = p+1;{ *is_emoji = false; return te; }}
        break;
        case 8:
-#line 72 "emoji_presentation_scanner.rl"
-       {te = p;p--;{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+       {te = p;p--;{ *is_emoji = true; return te; }}
        break;
        case 9:
-#line 73 "emoji_presentation_scanner.rl"
-       {te = p;p--;{ found_text_presentation_sequence }}
+#line 75 "emoji_presentation_scanner.rl"
+       {te = p;p--;{ *is_emoji = false; return te; }}
        break;
        case 10:
-#line 72 "emoji_presentation_scanner.rl"
-       {{p = ((te))-1;}{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+       {{p = ((te))-1;}{ *is_emoji = true; return te; }}
        break;
        case 11:
 #line 1 "NONE"
        {       switch( act ) {
        case 2:
-       {{p = ((te))-1;} found_emoji_presentation_sequence }
+       {{p = ((te))-1;} *is_emoji = true; return te; }
        break;
        case 3:
-       {{p = ((te))-1;} found_text_presentation_sequence }
+       {{p = ((te))-1;} *is_emoji = false; return te; }
        break;
        }
        }
        break;
-#line 249 "emoji_presentation_scanner.c"
+#line 248 "emoji_presentation_scanner.c"
                }
        }
 
@@ -258,7 +257,7 @@ _again:
 #line 1 "NONE"
        {ts = 0;}
        break;
-#line 262 "emoji_presentation_scanner.c"
+#line 261 "emoji_presentation_scanner.c"
                }
        }
 
@@ -275,9 +274,10 @@ _again:
 
        }
 
-#line 93 "emoji_presentation_scanner.rl"
+#line 94 "emoji_presentation_scanner.rl"
 
 
-  g_assert_not_reached ();
+  /* Should not be reached. */
+  *is_emoji = false;
+  return pe;
 }
-
diff --git a/pango/emoji_presentation_scanner.rl b/pango/emoji_presentation_scanner.rl
index 4736f462..d9c26919 100644
--- a/pango/emoji_presentation_scanner.rl
+++ b/pango/emoji_presentation_scanner.rl
@@ -30,7 +30,7 @@ TAG_TERM = 15;
 any_emoji =  EMOJI_TEXT_PRESENTATION | EMOJI_EMOJI_PRESENTATION |  KEYCAP_BASE |
   EMOJI_MODIFIER_BASE | TAG_BASE | EMOJI;
 
-emoji_combining_encloding_circle_backslash_sequence = any_emoji
+emoji_combining_enclosing_circle_backslash_sequence = any_emoji
   COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
 
 # This could be sharper than any_emoji by restricting this only to valid
@@ -58,40 +58,42 @@ emoji_zwj_element =  emoji_presentation_sequence | emoji_modifier_sequence | any
 emoji_zwj_sequence = emoji_zwj_element ( ZWJ emoji_zwj_element )+;
 
 emoji_presentation = EMOJI_EMOJI_PRESENTATION | TAG_BASE | EMOJI_MODIFIER_BASE |
-  emoji_presentation_sequence | emoji_modifier_sequence | emoji_flag_sequence |
-  emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
-  emoji_combining_encloding_circle_backslash_sequence;
+ emoji_presentation_sequence | emoji_modifier_sequence | emoji_flag_sequence |
+ emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
+ emoji_combining_enclosing_circle_backslash_sequence;
 
 emoji_run = emoji_presentation;
 
 text_presentation_emoji = any_emoji VS15;
-text_run = text_presentation_emoji | any;
+text_run = any;
 
 text_and_emoji_run := |*
-text_presentation_emoji => { found_text_presentation_sequence };
-emoji_run => { found_emoji_presentation_sequence };
-any => { found_text_presentation_sequence };
+# In order to give the the VS15 sequences higher priority than detecting
+# emoji sequences they are listed first as scanner token here.
+text_presentation_emoji => { *is_emoji = false; return te; };
+emoji_run => { *is_emoji = true; return te; };
+text_run => { *is_emoji = false; return te; };
 *|;
 
 }%%
 
-static gboolean
-scan_emoji_presentation (const unsigned char* buffer,
-                         unsigned buffer_size,
-                         unsigned cursor,
-                         unsigned* end)
+static emoji_text_iter_t
+scan_emoji_presentation (emoji_text_iter_t p,
+    const emoji_text_iter_t pe,
+    bool* is_emoji)
 {
-  const unsigned char *p = buffer + cursor;
-  const unsigned char *pe, *eof, *ts, *te;
+  emoji_text_iter_t ts, te;
+  const emoji_text_iter_t eof = pe;
+
   unsigned act;
   int cs;
-  pe = eof = buffer + buffer_size;
 
   %%{
     write init;
     write exec;
   }%%
 
-  g_assert_not_reached ();
+  /* Should not be reached. */
+  *is_emoji = false;
+  return pe;
 }
-
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index 886fef53..c0e0de60 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -192,18 +192,10 @@ _pango_EmojiSegmentationCategory (gunichar codepoint)
   return kMaxEmojiScannerCategory;
 }
 
-#define found_text_presentation_sequence                                 \
-  {                                                                      \
-    if (0) g_print ("text  %ld..%ld\n", ts - buffer, te - buffer);       \
-    *end = te - buffer;                                                  \
-    return FALSE;                                                        \
-  }
-#define found_emoji_presentation_sequence                                \
-  {                                                                      \
-    if (0) g_print ("emoji %ld..%ld\n", ts - buffer, te - buffer);       \
-    *end = te - buffer;                                                  \
-    return TRUE;                                                         \
-  }
+
+typedef gboolean bool;
+enum { false = FALSE, true = TRUE };
+typedef unsigned char *emoji_text_iter_t;
 
 #include "emoji_presentation_scanner.c"
 
@@ -259,7 +251,9 @@ _pango_emoji_iter_next (PangoEmojiIter *iter)
   iter->start = iter->end;
 
   old_cursor = cursor = iter->cursor;
-  is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+  cursor = scan_emoji_presentation (iter->types + cursor,
+                                   iter->types + iter->n_chars,
+                                   &is_emoji) - iter->types;
   do
   {
     iter->cursor = cursor;
@@ -268,7 +262,9 @@ _pango_emoji_iter_next (PangoEmojiIter *iter)
     if (cursor == iter->n_chars)
       break;
 
-    is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+    cursor = scan_emoji_presentation (iter->types + cursor,
+                                     iter->types + iter->n_chars,
+                                     &is_emoji) - iter->types;
   }
   while (iter->is_emoji == is_emoji);
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]