[pango] Update emoji scanner ragel file to latest from Chrome
- From: Behdad Esfahbod <behdad src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango] Update emoji scanner ragel file to latest from Chrome
- Date: Thu, 24 Jan 2019 10:43:12 +0000 (UTC)
commit 267d991d9b6f040580a6421a0f52c292ed9304dc
Author: Behdad Esfahbod <behdad behdad org>
Date: Thu Jan 24 11:42:34 2019 +0100
Update emoji scanner ragel file to latest from Chrome
pango/emoji_presentation_scanner.c | 66 ++++++++++++++++++-------------------
pango/emoji_presentation_scanner.rl | 38 +++++++++++----------
pango/pango-emoji.c | 24 ++++++--------
3 files changed, 63 insertions(+), 65 deletions(-)
---
diff --git a/pango/emoji_presentation_scanner.c b/pango/emoji_presentation_scanner.c
index 70c4451d..97a52927 100644
--- a/pango/emoji_presentation_scanner.c
+++ b/pango/emoji_presentation_scanner.c
@@ -88,23 +88,22 @@ static const int emoji_presentation_en_text_and_emoji_run = 2;
-#line 76 "emoji_presentation_scanner.rl"
+#line 78 "emoji_presentation_scanner.rl"
-static gboolean
-scan_emoji_presentation (const unsigned char* buffer,
- unsigned buffer_size,
- unsigned cursor,
- unsigned* end)
+static emoji_text_iter_t
+scan_emoji_presentation (emoji_text_iter_t p,
+ const emoji_text_iter_t pe,
+ bool* is_emoji)
{
- const unsigned char *p = buffer + cursor;
- const unsigned char *pe, *eof, *ts, *te;
+ emoji_text_iter_t ts, te;
+ const emoji_text_iter_t eof = pe;
+
unsigned act;
int cs;
- pe = eof = buffer + buffer_size;
-#line 108 "emoji_presentation_scanner.c"
+#line 107 "emoji_presentation_scanner.c"
{
cs = emoji_presentation_start;
ts = 0;
@@ -112,7 +111,7 @@ scan_emoji_presentation (const unsigned char* buffer,
act = 0;
}
-#line 116 "emoji_presentation_scanner.c"
+#line 115 "emoji_presentation_scanner.c"
{
int _klen;
unsigned int _trans;
@@ -131,7 +130,7 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
-#line 135 "emoji_presentation_scanner.c"
+#line 134 "emoji_presentation_scanner.c"
}
}
@@ -202,50 +201,50 @@ _eof_trans:
{te = p+1;}
break;
case 3:
-#line 72 "emoji_presentation_scanner.rl"
+#line 74 "emoji_presentation_scanner.rl"
{act = 2;}
break;
case 4:
-#line 73 "emoji_presentation_scanner.rl"
+#line 75 "emoji_presentation_scanner.rl"
{act = 3;}
break;
case 5:
-#line 71 "emoji_presentation_scanner.rl"
- {te = p+1;{ found_text_presentation_sequence }}
+#line 73 "emoji_presentation_scanner.rl"
+ {te = p+1;{ *is_emoji = false; return te; }}
break;
case 6:
-#line 72 "emoji_presentation_scanner.rl"
- {te = p+1;{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+ {te = p+1;{ *is_emoji = true; return te; }}
break;
case 7:
-#line 73 "emoji_presentation_scanner.rl"
- {te = p+1;{ found_text_presentation_sequence }}
+#line 75 "emoji_presentation_scanner.rl"
+ {te = p+1;{ *is_emoji = false; return te; }}
break;
case 8:
-#line 72 "emoji_presentation_scanner.rl"
- {te = p;p--;{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+ {te = p;p--;{ *is_emoji = true; return te; }}
break;
case 9:
-#line 73 "emoji_presentation_scanner.rl"
- {te = p;p--;{ found_text_presentation_sequence }}
+#line 75 "emoji_presentation_scanner.rl"
+ {te = p;p--;{ *is_emoji = false; return te; }}
break;
case 10:
-#line 72 "emoji_presentation_scanner.rl"
- {{p = ((te))-1;}{ found_emoji_presentation_sequence }}
+#line 74 "emoji_presentation_scanner.rl"
+ {{p = ((te))-1;}{ *is_emoji = true; return te; }}
break;
case 11:
#line 1 "NONE"
{ switch( act ) {
case 2:
- {{p = ((te))-1;} found_emoji_presentation_sequence }
+ {{p = ((te))-1;} *is_emoji = true; return te; }
break;
case 3:
- {{p = ((te))-1;} found_text_presentation_sequence }
+ {{p = ((te))-1;} *is_emoji = false; return te; }
break;
}
}
break;
-#line 249 "emoji_presentation_scanner.c"
+#line 248 "emoji_presentation_scanner.c"
}
}
@@ -258,7 +257,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
-#line 262 "emoji_presentation_scanner.c"
+#line 261 "emoji_presentation_scanner.c"
}
}
@@ -275,9 +274,10 @@ _again:
}
-#line 93 "emoji_presentation_scanner.rl"
+#line 94 "emoji_presentation_scanner.rl"
- g_assert_not_reached ();
+ /* Should not be reached. */
+ *is_emoji = false;
+ return pe;
}
-
diff --git a/pango/emoji_presentation_scanner.rl b/pango/emoji_presentation_scanner.rl
index 4736f462..d9c26919 100644
--- a/pango/emoji_presentation_scanner.rl
+++ b/pango/emoji_presentation_scanner.rl
@@ -30,7 +30,7 @@ TAG_TERM = 15;
any_emoji = EMOJI_TEXT_PRESENTATION | EMOJI_EMOJI_PRESENTATION | KEYCAP_BASE |
EMOJI_MODIFIER_BASE | TAG_BASE | EMOJI;
-emoji_combining_encloding_circle_backslash_sequence = any_emoji
+emoji_combining_enclosing_circle_backslash_sequence = any_emoji
COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
# This could be sharper than any_emoji by restricting this only to valid
@@ -58,40 +58,42 @@ emoji_zwj_element = emoji_presentation_sequence | emoji_modifier_sequence | any
emoji_zwj_sequence = emoji_zwj_element ( ZWJ emoji_zwj_element )+;
emoji_presentation = EMOJI_EMOJI_PRESENTATION | TAG_BASE | EMOJI_MODIFIER_BASE |
- emoji_presentation_sequence | emoji_modifier_sequence | emoji_flag_sequence |
- emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
- emoji_combining_encloding_circle_backslash_sequence;
+ emoji_presentation_sequence | emoji_modifier_sequence | emoji_flag_sequence |
+ emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
+ emoji_combining_enclosing_circle_backslash_sequence;
emoji_run = emoji_presentation;
text_presentation_emoji = any_emoji VS15;
-text_run = text_presentation_emoji | any;
+text_run = any;
text_and_emoji_run := |*
-text_presentation_emoji => { found_text_presentation_sequence };
-emoji_run => { found_emoji_presentation_sequence };
-any => { found_text_presentation_sequence };
+# In order to give the the VS15 sequences higher priority than detecting
+# emoji sequences they are listed first as scanner token here.
+text_presentation_emoji => { *is_emoji = false; return te; };
+emoji_run => { *is_emoji = true; return te; };
+text_run => { *is_emoji = false; return te; };
*|;
}%%
-static gboolean
-scan_emoji_presentation (const unsigned char* buffer,
- unsigned buffer_size,
- unsigned cursor,
- unsigned* end)
+static emoji_text_iter_t
+scan_emoji_presentation (emoji_text_iter_t p,
+ const emoji_text_iter_t pe,
+ bool* is_emoji)
{
- const unsigned char *p = buffer + cursor;
- const unsigned char *pe, *eof, *ts, *te;
+ emoji_text_iter_t ts, te;
+ const emoji_text_iter_t eof = pe;
+
unsigned act;
int cs;
- pe = eof = buffer + buffer_size;
%%{
write init;
write exec;
}%%
- g_assert_not_reached ();
+ /* Should not be reached. */
+ *is_emoji = false;
+ return pe;
}
-
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index 886fef53..c0e0de60 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -192,18 +192,10 @@ _pango_EmojiSegmentationCategory (gunichar codepoint)
return kMaxEmojiScannerCategory;
}
-#define found_text_presentation_sequence \
- { \
- if (0) g_print ("text %ld..%ld\n", ts - buffer, te - buffer); \
- *end = te - buffer; \
- return FALSE; \
- }
-#define found_emoji_presentation_sequence \
- { \
- if (0) g_print ("emoji %ld..%ld\n", ts - buffer, te - buffer); \
- *end = te - buffer; \
- return TRUE; \
- }
+
+typedef gboolean bool;
+enum { false = FALSE, true = TRUE };
+typedef unsigned char *emoji_text_iter_t;
#include "emoji_presentation_scanner.c"
@@ -259,7 +251,9 @@ _pango_emoji_iter_next (PangoEmojiIter *iter)
iter->start = iter->end;
old_cursor = cursor = iter->cursor;
- is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+ cursor = scan_emoji_presentation (iter->types + cursor,
+ iter->types + iter->n_chars,
+ &is_emoji) - iter->types;
do
{
iter->cursor = cursor;
@@ -268,7 +262,9 @@ _pango_emoji_iter_next (PangoEmojiIter *iter)
if (cursor == iter->n_chars)
break;
- is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+ cursor = scan_emoji_presentation (iter->types + cursor,
+ iter->types + iter->n_chars,
+ &is_emoji) - iter->types;
}
while (iter->is_emoji == is_emoji);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]