[pango] handle VS15 emoji sequences



commit e7c292e918f318eccb6b756170640517331eee7a
Author: Behdad Esfahbod <behdad behdad org>
Date:   Wed Jan 23 10:56:37 2019 +0100

    handle VS15 emoji sequences

 pango/emoji_presentation_scanner.c  | 662 ++++++++++++------------------------
 pango/emoji_presentation_scanner.rl |   9 +-
 pango/pango-emoji-private.h         |   3 -
 pango/pango-emoji.c                 |  75 ++--
 4 files changed, 256 insertions(+), 493 deletions(-)
---
diff --git a/pango/emoji_presentation_scanner.c b/pango/emoji_presentation_scanner.c
index 43872abb..70c4451d 100644
--- a/pango/emoji_presentation_scanner.c
+++ b/pango/emoji_presentation_scanner.c
@@ -1,497 +1,283 @@
+
+#line 1 "emoji_presentation_scanner.rl"
 // Copyright 2018 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 
+#line 9 "emoji_presentation_scanner.c"
 static const char _emoji_presentation_actions[] = {
-       0, 1, 0, 1, 1, 1, 5, 1,
-       6, 1, 7, 1, 8, 1, 9, 2,
-       2, 3, 2, 2, 4, 0
+       0, 1, 0, 1, 1, 1, 5, 1, 
+       6, 1, 7, 1, 8, 1, 9, 1, 
+       10, 1, 11, 2, 2, 3, 2, 2, 
+       4
 };
 
 static const char _emoji_presentation_key_offsets[] = {
-       0, 3, 8, 9, 13, 15, 22, 26,
-       33, 42, 52, 63, 71, 82, 92, 103,
-       115, 116, 121, 0
+       0, 5, 7, 14, 18, 20, 21, 24, 
+       29, 30, 34, 36
 };
 
 static const unsigned char _emoji_presentation_trans_keys[] = {
-       9u, 10u, 12u, 3u, 7u, 13u, 0u, 2u,
-       6u, 10u, 12u, 8u, 9u, 14u, 15u, 2u,
-       3u, 6u, 7u, 13u, 0u, 1u, 9u, 10u,
-       11u, 12u, 2u, 3u, 6u, 7u, 13u, 0u,
-       1u, 2u, 3u, 6u, 7u, 10u, 12u, 13u,
-       0u, 1u, 2u, 3u, 6u, 7u, 9u, 10u,
-       12u, 13u, 0u, 1u, 2u, 3u, 4u, 6u,
-       7u, 9u, 10u, 12u, 13u, 0u, 1u, 2u,
-       3u, 6u, 7u, 10u, 13u, 0u, 1u, 2u,
-       3u, 6u, 7u, 9u, 10u, 12u, 13u, 14u,
-       0u, 1u, 2u, 3u, 4u, 6u, 7u, 10u,
-       12u, 13u, 0u, 1u, 2u, 3u, 6u, 7u,
-       9u, 10u, 11u, 12u, 13u, 0u, 1u, 2u,
-       3u, 4u, 6u, 7u, 9u, 10u, 11u, 12u,
-       13u, 0u, 1u, 6u, 10u, 11u, 12u, 8u,
-       9u, 2u, 3u, 6u, 7u, 9u, 10u, 11u,
-       12u, 13u, 14u, 0u, 1u, 0u
+       3u, 7u, 13u, 0u, 2u, 14u, 15u, 2u, 
+       3u, 6u, 7u, 13u, 0u, 1u, 9u, 10u, 
+       11u, 12u, 10u, 12u, 10u, 4u, 10u, 12u, 
+       4u, 9u, 10u, 11u, 12u, 6u, 9u, 10u, 
+       11u, 12u, 8u, 10u, 9u, 10u, 11u, 12u, 
+       14u, 0
 };
 
 static const char _emoji_presentation_single_lengths[] = {
-       3, 3, 1, 2, 2, 5, 4, 5,
-       7, 8, 9, 6, 9, 8, 9, 10,
-       1, 3, 10, 0
+       3, 2, 5, 4, 2, 1, 3, 5, 
+       1, 4, 2, 5
 };
 
 static const char _emoji_presentation_range_lengths[] = {
-       0, 1, 0, 1, 0, 1, 0, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       0, 1, 1, 0
+       1, 0, 1, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0
 };
 
 static const char _emoji_presentation_index_offsets[] = {
-       0, 4, 9, 11, 15, 18, 25, 30,
-       37, 46, 56, 67, 75, 86, 96, 107,
-       119, 121, 126, 0
-};
-
-static const char _emoji_presentation_trans_cond_spaces[] = {
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, 0
-};
-
-static const short _emoji_presentation_trans_offsets[] = {
-       0, 1, 2, 3, 4, 5, 6, 7,
-       8, 9, 10, 11, 12, 13, 14, 15,
-       16, 17, 18, 19, 20, 21, 22, 23,
-       24, 25, 26, 27, 28, 29, 30, 31,
-       32, 33, 34, 35, 36, 37, 38, 39,
-       40, 41, 42, 43, 44, 45, 46, 47,
-       48, 49, 50, 51, 52, 53, 54, 55,
-       56, 57, 58, 59, 60, 61, 62, 63,
-       64, 65, 66, 67, 68, 69, 70, 71,
-       72, 73, 74, 75, 76, 77, 78, 79,
-       80, 81, 82, 83, 84, 85, 86, 87,
-       88, 89, 90, 91, 92, 93, 94, 95,
-       96, 97, 98, 99, 100, 101, 102, 103,
-       104, 105, 106, 107, 108, 109, 110, 111,
-       112, 113, 114, 115, 116, 117, 118, 119,
-       120, 121, 122, 123, 124, 125, 126, 127,
-       128, 129, 130, 131, 132, 133, 134, 135,
-       136, 137, 138, 139, 140, 141, 142, 143,
-       144, 145, 146, 147, 148, 149, 150, 151,
-       152, 153, 154, 155, 0
-};
-
-static const char _emoji_presentation_trans_lengths[] = {
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 0
+       0, 5, 8, 15, 20, 23, 25, 29, 
+       35, 37, 42, 45
 };
 
-static const char _emoji_presentation_cond_keys[] = {
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0
+static const char _emoji_presentation_indicies[] = {
+       2, 1, 1, 1, 0, 4, 5, 3, 
+       7, 8, 10, 11, 12, 6, 9, 5, 
+       13, 14, 15, 0, 13, 15, 16, 13, 
+       16, 15, 13, 15, 16, 15, 5, 13, 
+       14, 15, 16, 5, 17, 5, 13, 14, 
+       18, 17, 5, 13, 16, 5, 13, 14, 
+       15, 4, 16, 0
 };
 
-static const char _emoji_presentation_cond_targs[] = {
-       7, 1, 11, 5, 13, 8, 8, 8,
-       5, 7, 5, 1, 11, 7, 5, 4,
-       7, 5, 14, 15, 16, 17, 18, 6,
-       5, 7, 1, 5, 11, 5, 9, 10,
-       2, 3, 12, 0, 5, 9, 10, 2,
-       3, 1, 11, 12, 0, 5, 9, 10,
-       2, 3, 7, 1, 11, 12, 0, 5,
-       9, 10, 11, 2, 3, 7, 1, 11,
-       12, 0, 5, 9, 10, 2, 3, 1,
-       12, 0, 5, 9, 10, 2, 3, 7,
-       1, 11, 12, 4, 0, 5, 9, 10,
-       11, 2, 3, 1, 11, 12, 0, 5,
-       9, 10, 2, 3, 7, 1, 5, 11,
-       12, 0, 5, 9, 10, 11, 2, 3,
-       7, 1, 5, 11, 12, 0, 5, 7,
-       5, 1, 5, 11, 7, 5, 9, 10,
-       2, 3, 7, 1, 5, 11, 12, 4,
-       0, 5, 5, 5, 5, 5, 5, 5,
-       5, 5, 5, 5, 5, 5, 5, 5,
-       5, 5, 5, 5, 0
+static const char _emoji_presentation_trans_targs[] = {
+       2, 4, 6, 2, 1, 2, 3, 3, 
+       7, 2, 8, 9, 11, 0, 2, 5, 
+       2, 2, 10
 };
 
-static const char _emoji_presentation_cond_actions[] = {
-       15, 0, 15, 11, 15, 15, 15, 15,
-       13, 15, 11, 0, 15, 15, 11, 0,
-       15, 11, 15, 15, 0, 18, 15, 18,
-       5, 15, 0, 5, 15, 9, 15, 15,
-       0, 0, 15, 0, 7, 15, 15, 0,
-       0, 0, 15, 15, 0, 7, 15, 15,
-       0, 0, 15, 0, 15, 15, 0, 7,
-       15, 15, 15, 0, 0, 15, 0, 15,
-       15, 0, 7, 15, 15, 0, 0, 0,
-       15, 0, 7, 15, 15, 0, 0, 15,
-       0, 15, 15, 0, 0, 7, 15, 15,
-       15, 0, 0, 0, 15, 15, 0, 7,
-       15, 15, 0, 0, 15, 0, 5, 15,
-       15, 0, 7, 15, 15, 15, 0, 0,
-       15, 0, 5, 15, 15, 0, 7, 15,
-       9, 0, 5, 15, 15, 9, 15, 15,
-       0, 0, 15, 0, 5, 15, 15, 0,
-       0, 7, 11, 13, 11, 11, 11, 9,
-       7, 7, 7, 7, 7, 7, 7, 7,
-       7, 9, 9, 7, 0
+static const char _emoji_presentation_trans_actions[] = {
+       17, 19, 19, 15, 0, 7, 22, 19, 
+       19, 9, 0, 22, 19, 0, 5, 19, 
+       11, 13, 19
 };
 
 static const char _emoji_presentation_to_state_actions[] = {
-       0, 0, 0, 0, 0, 1, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 1, 0, 0, 0, 0, 0, 
        0, 0, 0, 0
 };
 
 static const char _emoji_presentation_from_state_actions[] = {
-       0, 0, 0, 0, 0, 3, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0
-};
-
-static const char _emoji_presentation_eof_cond_spaces[] = {
-       -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, -1,
-       -1, -1, -1, -1, -1, -1, -1, 0
-};
-
-static const char _emoji_presentation_eof_cond_key_offs[] = {
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0
-};
-
-static const char _emoji_presentation_eof_cond_key_lens[] = {
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 3, 0, 0, 0, 0, 0, 
        0, 0, 0, 0
 };
 
-static const char _emoji_presentation_eof_cond_keys[] = {
-       0
-};
-
-static const short _emoji_presentation_eof_trans[] = {
-       139, 140, 141, 142, 143, 0, 144, 145,
-       146, 147, 148, 149, 150, 151, 152, 153,
-       154, 155, 156, 0
-};
-
-static const char _emoji_presentation_nfa_targs[] = {
-       0, 0
-};
-
-static const char _emoji_presentation_nfa_offsets[] = {
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0
+static const char _emoji_presentation_eof_trans[] = {
+       1, 4, 0, 1, 17, 17, 17, 17, 
+       18, 18, 17, 17
 };
 
-static const char _emoji_presentation_nfa_push_actions[] = {
-       0, 0
-};
+static const int emoji_presentation_start = 2;
 
-static const char _emoji_presentation_nfa_pop_trans[] = {
-       0, 0
-};
+static const int emoji_presentation_en_text_and_emoji_run = 2;
 
-static const int emoji_presentation_start = 5;
 
-static const int emoji_presentation_en_text_and_emoji_run = 5;
+#line 9 "emoji_presentation_scanner.rl"
 
 
 
+#line 76 "emoji_presentation_scanner.rl"
 
 
 static gboolean
 scan_emoji_presentation (const unsigned char* buffer,
-unsigned buffer_size,
-unsigned cursor,
-unsigned* last,
-unsigned* end)
+                         unsigned buffer_size,
+                         unsigned cursor,
+                         unsigned* end)
 {
-       const unsigned char *p = buffer + cursor;
-       const unsigned char *pe, *eof, *ts, *te;
-       unsigned act;
-       int cs;
-       pe = eof = buffer + buffer_size;
-       
-       
+  const unsigned char *p = buffer + cursor;
+  const unsigned char *pe, *eof, *ts, *te;
+  unsigned act;
+  int cs;
+  pe = eof = buffer + buffer_size;
+
+  
+#line 108 "emoji_presentation_scanner.c"
        {
-               cs = (int)emoji_presentation_start;
-               ts = 0;
-               te = 0;
-               act = 0;
+       cs = emoji_presentation_start;
+       ts = 0;
+       te = 0;
+       act = 0;
        }
-       
+
+#line 116 "emoji_presentation_scanner.c"
        {
-               int _cpc;
-               int _klen;const char * _cekeys;unsigned int _trans = 0;const unsigned char * _keys;const char 
* _acts;unsigned int _nacts;       {
-                       if ( p == pe )
-                       goto _test_eof;
-                       _resume:  {
-                               _acts = ( _emoji_presentation_actions + 
(_emoji_presentation_from_state_actions[cs]));
-                               _nacts = (unsigned int)(*( _acts));
-                               _acts += 1;
-                               while ( _nacts > 0 ) {
-                                       switch ( (*( _acts)) ) {
-                                               case 1:  {
-                                                       {
-                                                               #line 1 "NONE"
-                                                               {ts = p;}}
-                                                       break; }
-                                       }
-                                       _nacts -= 1;
-                                       _acts += 1;
-                               }
-                               
-                               _keys = ( _emoji_presentation_trans_keys + 
(_emoji_presentation_key_offsets[cs]));
-                               _trans = (unsigned int)_emoji_presentation_index_offsets[cs];
-                               
-                               _klen = (int)_emoji_presentation_single_lengths[cs];
-                               if ( _klen > 0 ) {
-                                       const unsigned char *_lower = _keys;
-                                       const unsigned char *_upper = _keys + _klen - 1;
-                                       const unsigned char *_mid;
-                                       while ( 1 ) {
-                                               if ( _upper < _lower )
-                                               break;
-                                               
-                                               _mid = _lower + ((_upper-_lower) >> 1);
-                                               if ( ( (*( p))) < (*( _mid)) )
-                                               _upper = _mid - 1;
-                                               else if ( ( (*( p))) > (*( _mid)) )
-                                               _lower = _mid + 1;
-                                               else {
-                                                       _trans += (unsigned int)(_mid - _keys);
-                                                       goto _match;
-                                               }
-                                       }
-                                       _keys += _klen;
-                                       _trans += (unsigned int)_klen;
-                               }
-                               
-                               _klen = (int)_emoji_presentation_range_lengths[cs];
-                               if ( _klen > 0 ) {
-                                       const unsigned char *_lower = _keys;
-                                       const unsigned char *_upper = _keys + (_klen<<1) - 2;
-                                       const unsigned char *_mid;
-                                       while ( 1 ) {
-                                               if ( _upper < _lower )
-                                               break;
-                                               
-                                               _mid = _lower + (((_upper-_lower) >> 1) & ~1);
-                                               if ( ( (*( p))) < (*( _mid)) )
-                                               _upper = _mid - 2;
-                                               else if ( ( (*( p))) > (*( _mid + 1)) )
-                                               _lower = _mid + 2;
-                                               else {
-                                                       _trans += (unsigned int)((_mid - _keys)>>1);
-                                                       goto _match;
-                                               }
-                                       }
-                                       _trans += (unsigned int)_klen;
-                               }
-                               
-                               _match:  {
-                                       goto _match_cond;
-                               }
-                       }
-                       _match_cond:  {
-                               cs = (int)_emoji_presentation_cond_targs[_trans];
-                               
-                               if ( _emoji_presentation_cond_actions[_trans] == 0 )
-                               goto _again;
-                               
-                               _acts = ( _emoji_presentation_actions + 
(_emoji_presentation_cond_actions[_trans]));
-                               _nacts = (unsigned int)(*( _acts));
-                               _acts += 1;
-                               while ( _nacts > 0 ) {
-                                       switch ( (*( _acts)) )
-                                       {
-                                               case 2:  {
-                                                       {
-                                                               #line 1 "NONE"
-                                                               {te = p+1;}}
-                                                       break; }
-                                               case 3:  {
-                                                       {
-                                                               #line 71 "emoji_presentation_scanner.rl"
-                                                               {act = 1;}}
-                                                       break; }
-                                               case 4:  {
-                                                       {
-                                                               #line 72 "emoji_presentation_scanner.rl"
-                                                               {act = 2;}}
-                                                       break; }
-                                               case 5:  {
-                                                       {
-                                                               #line 72 "emoji_presentation_scanner.rl"
-                                                               {te = p+1;{
-                                                                               #line 72 
"emoji_presentation_scanner.rl"
-                                                                               
found_text_presentation_sequence }}}
-                                                       break; }
-                                               case 6:  {
-                                                       {
-                                                               #line 71 "emoji_presentation_scanner.rl"
-                                                               {te = p;p = p - 1;{
-                                                                               #line 71 
"emoji_presentation_scanner.rl"
-                                                                               
found_emoji_presentation_sequence }}}
-                                                       break; }
-                                               case 7:  {
-                                                       {
-                                                               #line 72 "emoji_presentation_scanner.rl"
-                                                               {te = p;p = p - 1;{
-                                                                               #line 72 
"emoji_presentation_scanner.rl"
-                                                                               
found_text_presentation_sequence }}}
-                                                       break; }
-                                               case 8:  {
-                                                       {
-                                                               #line 71 "emoji_presentation_scanner.rl"
-                                                               {p = ((te))-1;
-                                                                       {
-                                                                               #line 71 
"emoji_presentation_scanner.rl"
-                                                                               
found_emoji_presentation_sequence }}}
-                                                       break; }
-                                               case 9:  {
-                                                       {
-                                                               #line 1 "NONE"
-                                                               {switch( act ) {
-                                                                               case 1:  {
-                                                                                       p = ((te))-1;
-                                                                                       {
-                                                                                               #line 71 
"emoji_presentation_scanner.rl"
-                                                                                               
found_emoji_presentation_sequence } break; }
-                                                                               case 2:  {
-                                                                                       p = ((te))-1;
-                                                                                       {
-                                                                                               #line 72 
"emoji_presentation_scanner.rl"
-                                                                                               
found_text_presentation_sequence } break; }
-                                                                       }}
-                                                       }
-                                                       break; }
-                                       }
-                                       _nacts -= 1;
-                                       _acts += 1;
-                               }
-                               
-                               
-                       }
-                       _again:  {
-                               _acts = ( _emoji_presentation_actions + 
(_emoji_presentation_to_state_actions[cs]));
-                               _nacts = (unsigned int)(*( _acts));
-                               _acts += 1;
-                               while ( _nacts > 0 ) {
-                                       switch ( (*( _acts)) ) {
-                                               case 0:  {
-                                                       {
-                                                               #line 1 "NONE"
-                                                               {ts = 0;}}
-                                                       break; }
-                                       }
-                                       _nacts -= 1;
-                                       _acts += 1;
-                               }
-                               
-                               p += 1;
-                               if ( p != pe )
-                               goto _resume;
-                       }
-                       _test_eof:  { {}
-                               if ( p == eof )
-                               {
-                                       if ( _emoji_presentation_eof_cond_spaces[cs] != -1 ) {
-                                               _cekeys = ( _emoji_presentation_eof_cond_keys + 
(_emoji_presentation_eof_cond_key_offs[cs]));
-                                               _klen = (int)_emoji_presentation_eof_cond_key_lens[cs];
-                                               _cpc = 0;
-                                               {
-                                                       const char *_lower = _cekeys;
-                                                       const char *_upper = _cekeys + _klen - 1;
-                                                       const char *_mid;
-                                                       while ( 1 ) {
-                                                               if ( _upper < _lower )
-                                                               break;
-                                                               
-                                                               _mid = _lower + ((_upper-_lower) >> 1);
-                                                               if ( _cpc < (int)(*( _mid)) )
-                                                               _upper = _mid - 1;
-                                                               else if ( _cpc > (int)(*( _mid)) )
-                                                               _lower = _mid + 1;
-                                                               else {
-                                                                       goto _ok;
-                                                               }
-                                                       }
-                                                       cs = -1;
-                                                       goto _out;
-                                               }
-                                               _ok: {}
-                                       }
-                                       if ( _emoji_presentation_eof_trans[cs] > 0 ) {
-                                               _trans = (unsigned int)_emoji_presentation_eof_trans[cs] - 1;
-                                               goto _match_cond;
-                                       }
-                               }
-                               
+       int _klen;
+       unsigned int _trans;
+       const char *_acts;
+       unsigned int _nacts;
+       const unsigned char *_keys;
+
+       if ( p == pe )
+               goto _test_eof;
+_resume:
+       _acts = _emoji_presentation_actions + _emoji_presentation_from_state_actions[cs];
+       _nacts = (unsigned int) *_acts++;
+       while ( _nacts-- > 0 ) {
+               switch ( *_acts++ ) {
+       case 1:
+#line 1 "NONE"
+       {ts = p;}
+       break;
+#line 135 "emoji_presentation_scanner.c"
+               }
+       }
+
+       _keys = _emoji_presentation_trans_keys + _emoji_presentation_key_offsets[cs];
+       _trans = _emoji_presentation_index_offsets[cs];
+
+       _klen = _emoji_presentation_single_lengths[cs];
+       if ( _klen > 0 ) {
+               const unsigned char *_lower = _keys;
+               const unsigned char *_mid;
+               const unsigned char *_upper = _keys + _klen - 1;
+               while (1) {
+                       if ( _upper < _lower )
+                               break;
+
+                       _mid = _lower + ((_upper-_lower) >> 1);
+                       if ( (*p) < *_mid )
+                               _upper = _mid - 1;
+                       else if ( (*p) > *_mid )
+                               _lower = _mid + 1;
+                       else {
+                               _trans += (unsigned int)(_mid - _keys);
+                               goto _match;
                        }
-                       _out:  { {}
+               }
+               _keys += _klen;
+               _trans += _klen;
+       }
+
+       _klen = _emoji_presentation_range_lengths[cs];
+       if ( _klen > 0 ) {
+               const unsigned char *_lower = _keys;
+               const unsigned char *_mid;
+               const unsigned char *_upper = _keys + (_klen<<1) - 2;
+               while (1) {
+                       if ( _upper < _lower )
+                               break;
+
+                       _mid = _lower + (((_upper-_lower) >> 1) & ~1);
+                       if ( (*p) < _mid[0] )
+                               _upper = _mid - 2;
+                       else if ( (*p) > _mid[1] )
+                               _lower = _mid + 2;
+                       else {
+                               _trans += (unsigned int)((_mid - _keys)>>1);
+                               goto _match;
                        }
                }
+               _trans += _klen;
        }
-       
-       return FALSE;
+
+_match:
+       _trans = _emoji_presentation_indicies[_trans];
+_eof_trans:
+       cs = _emoji_presentation_trans_targs[_trans];
+
+       if ( _emoji_presentation_trans_actions[_trans] == 0 )
+               goto _again;
+
+       _acts = _emoji_presentation_actions + _emoji_presentation_trans_actions[_trans];
+       _nacts = (unsigned int) *_acts++;
+       while ( _nacts-- > 0 )
+       {
+               switch ( *_acts++ )
+               {
+       case 2:
+#line 1 "NONE"
+       {te = p+1;}
+       break;
+       case 3:
+#line 72 "emoji_presentation_scanner.rl"
+       {act = 2;}
+       break;
+       case 4:
+#line 73 "emoji_presentation_scanner.rl"
+       {act = 3;}
+       break;
+       case 5:
+#line 71 "emoji_presentation_scanner.rl"
+       {te = p+1;{ found_text_presentation_sequence }}
+       break;
+       case 6:
+#line 72 "emoji_presentation_scanner.rl"
+       {te = p+1;{ found_emoji_presentation_sequence }}
+       break;
+       case 7:
+#line 73 "emoji_presentation_scanner.rl"
+       {te = p+1;{ found_text_presentation_sequence }}
+       break;
+       case 8:
+#line 72 "emoji_presentation_scanner.rl"
+       {te = p;p--;{ found_emoji_presentation_sequence }}
+       break;
+       case 9:
+#line 73 "emoji_presentation_scanner.rl"
+       {te = p;p--;{ found_text_presentation_sequence }}
+       break;
+       case 10:
+#line 72 "emoji_presentation_scanner.rl"
+       {{p = ((te))-1;}{ found_emoji_presentation_sequence }}
+       break;
+       case 11:
+#line 1 "NONE"
+       {       switch( act ) {
+       case 2:
+       {{p = ((te))-1;} found_emoji_presentation_sequence }
+       break;
+       case 3:
+       {{p = ((te))-1;} found_text_presentation_sequence }
+       break;
+       }
+       }
+       break;
+#line 249 "emoji_presentation_scanner.c"
+               }
+       }
+
+_again:
+       _acts = _emoji_presentation_actions + _emoji_presentation_to_state_actions[cs];
+       _nacts = (unsigned int) *_acts++;
+       while ( _nacts-- > 0 ) {
+               switch ( *_acts++ ) {
+       case 0:
+#line 1 "NONE"
+       {ts = 0;}
+       break;
+#line 262 "emoji_presentation_scanner.c"
+               }
+       }
+
+       if ( ++p != pe )
+               goto _resume;
+       _test_eof: {}
+       if ( p == eof )
+       {
+       if ( _emoji_presentation_eof_trans[cs] > 0 ) {
+               _trans = _emoji_presentation_eof_trans[cs] - 1;
+               goto _eof_trans;
+       }
+       }
+
+       }
+
+#line 93 "emoji_presentation_scanner.rl"
+
+
+  g_assert_not_reached ();
 }
 
diff --git a/pango/emoji_presentation_scanner.rl b/pango/emoji_presentation_scanner.rl
index c13ae279..4736f462 100644
--- a/pango/emoji_presentation_scanner.rl
+++ b/pango/emoji_presentation_scanner.rl
@@ -62,14 +62,15 @@ emoji_presentation = EMOJI_EMOJI_PRESENTATION | TAG_BASE | EMOJI_MODIFIER_BASE |
   emoji_tag_sequence | emoji_keycap_sequence | emoji_zwj_sequence |
   emoji_combining_encloding_circle_backslash_sequence;
 
-emoji_run = emoji_presentation+;
+emoji_run = emoji_presentation;
 
 text_presentation_emoji = any_emoji VS15;
 text_run = text_presentation_emoji | any;
 
 text_and_emoji_run := |*
+text_presentation_emoji => { found_text_presentation_sequence };
 emoji_run => { found_emoji_presentation_sequence };
-text_run => { found_text_presentation_sequence };
+any => { found_text_presentation_sequence };
 *|;
 
 }%%
@@ -78,7 +79,6 @@ static gboolean
 scan_emoji_presentation (const unsigned char* buffer,
                          unsigned buffer_size,
                          unsigned cursor,
-                         unsigned* last,
                          unsigned* end)
 {
   const unsigned char *p = buffer + cursor;
@@ -91,6 +91,7 @@ scan_emoji_presentation (const unsigned char* buffer,
     write init;
     write exec;
   }%%
-  return FALSE;
+
+  g_assert_not_reached ();
 }
 
diff --git a/pango/pango-emoji-private.h b/pango/pango-emoji-private.h
index ed4b7213..f0d3b7f9 100644
--- a/pango/pango-emoji-private.h
+++ b/pango/pango-emoji-private.h
@@ -37,9 +37,6 @@ struct _PangoEmojiIter
   const gchar *end;
   gboolean is_emoji;
 
-  const gchar *token_start;
-  const gchar *token_end;
-
   const unsigned char *types;
   unsigned int n_chars;
   unsigned int cursor;
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index 158daa5b..886fef53 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -192,11 +192,15 @@ _pango_EmojiSegmentationCategory (gunichar codepoint)
   return kMaxEmojiScannerCategory;
 }
 
-#define found_text_presentation_sequence
+#define found_text_presentation_sequence                                 \
+  {                                                                      \
+    if (0) g_print ("text  %ld..%ld\n", ts - buffer, te - buffer);       \
+    *end = te - buffer;                                                  \
+    return FALSE;                                                        \
+  }
 #define found_emoji_presentation_sequence                                \
   {                                                                      \
     if (0) g_print ("emoji %ld..%ld\n", ts - buffer, te - buffer);       \
-    *last = ts - buffer;                                                 \
     *end = te - buffer;                                                  \
     return TRUE;                                                         \
   }
@@ -221,7 +225,7 @@ _pango_emoji_iter_init (PangoEmojiIter *iter,
     p = g_utf8_next_char (p);
   }
 
-  iter->text_start = iter->start = iter->end = iter->token_start = iter->token_end = text;
+  iter->text_start = iter->start = iter->end = text;
   if (length >= 0)
     iter->text_end = text + length;
   else
@@ -246,56 +250,31 @@ _pango_emoji_iter_fini (PangoEmojiIter *iter)
 gboolean
 _pango_emoji_iter_next (PangoEmojiIter *iter)
 {
+  unsigned int old_cursor, cursor;
+  gboolean is_emoji;
+
   if (iter->end >= iter->text_end)
     return FALSE;
 
   iter->start = iter->end;
 
-  /* The scan_emoji_presentation scanner function returns false when it reaches
-   * the end of the buffer and has not discovered any emoji runs in between. For
-   * Emoji runs, it returns true, and token_start_ and token_end_ are set to the
-   * start and end of the emoji sequence. This means, it may skip over text runs
-   * in between, see below. */
-  if (iter->start >= iter->token_end)
-    {
-      /* We need to scan furhter. */
-      unsigned int token_start, token_end;
-      if (!scan_emoji_presentation (iter->types, iter->n_chars, iter->cursor,
-                                   &token_start, &token_end))
-       {
-         /* The scanner returned false, which means it has reached the end of the
-          * buffer without discovering any emoji segments in between. */
-         iter->end = iter->text_end;
-         iter->is_emoji = FALSE;
-
-         return TRUE;
-       };
-      /* Ugly... */
-      g_assert (iter->cursor <= token_start && token_start < token_end && token_end <= iter->n_chars);
-      iter->token_start = g_utf8_offset_to_pointer (iter->token_end, token_start - iter->cursor);
-      iter->token_end   = g_utf8_offset_to_pointer (iter->token_end, token_end   - iter->cursor);
-      iter->cursor = token_end;
-    }
-
-  if (iter->start < iter->token_start)
-    {
-      /* The scanner function has progressed to the next emoji segment, but we
-       * need to return the text segment over which it had skipped. */
-      iter->end = iter->token_start;
-      iter->is_emoji = FALSE;
-      return TRUE;
-    }
-
-  if (iter->start >= iter->token_start && iter->start < iter->token_end)
-    {
-      /* Now our cursor has reached the emoji segment, and we can return it. */
-      iter->end = iter->token_end;
-      iter->is_emoji = TRUE;
-      return TRUE;
-    }
-
-  g_assert_not_reached ();
-  return FALSE;
+  old_cursor = cursor = iter->cursor;
+  is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+  do
+  {
+    iter->cursor = cursor;
+    iter->is_emoji = is_emoji;
+
+    if (cursor == iter->n_chars)
+      break;
+
+    is_emoji = scan_emoji_presentation (iter->types, iter->n_chars, cursor, &cursor);
+  }
+  while (iter->is_emoji == is_emoji);
+
+  iter->end = g_utf8_offset_to_pointer (iter->start, iter->cursor - old_cursor);
+
+  return TRUE;
 }
 
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]