[pango/misc-speedups: 1/6] Speed up Emoji classification




commit 865c36aa5dcf4f76f1c9baf955b3fb863bf4b5e6
Author: Matthias Clasen <mclasen redhat com>
Date:   Sun Mar 28 21:55:17 2021 -0400

    Speed up Emoji classification
    
    Open-code the bsearch here. These functions
    show up in profiles, since itemizations uses
    an Emoji iter.

 pango/pango-emoji.c   | 129 ++++++++++++++++++++++++++------------------------
 pango/pangofc-shape.c |  27 +++++------
 2 files changed, 78 insertions(+), 78 deletions(-)
---
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index e316b370..04055d5c 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -52,40 +52,37 @@
 #include "pango-emoji-private.h"
 #include "pango-emoji-table.h"
 
-
-static int
-interval_compare (const void *key, const void *elt)
+static inline gboolean
+bsearch_interval (gunichar              c,
+                  const struct Interval table[],
+                  guint                 n)
 {
-  gunichar c = GPOINTER_TO_UINT (key);
-  struct Interval *interval = (struct Interval *)elt;
-
-  if (c < interval->start)
-    return -1;
-  if (c > interval->end)
-    return +1;
-
-  return 0;
+  guint lower = 0;
+  guint upper = n - 1;
+
+  while (lower <= upper)
+    {
+      int mid = (lower + upper) / 2;
+
+      if (c < table[mid].start)
+        upper = mid - 1;
+      else if (c > table[mid].end)
+        lower = mid + 1;
+      else
+        return TRUE;
+    }
+
+  return FALSE;
 }
 
 #define DEFINE_pango_Is_(name) \
-static gboolean \
+static inline gboolean \
 _pango_Is_##name (gunichar ch) \
 { \
-  /* bsearch() is declared attribute(nonnull(1)) so we can't validly search \
-   * for a NULL key */ \
-  /* \
-  if (G_UNLIKELY (ch == 0)) \
-    return FALSE; \
-   */ \
- \
-  if (bsearch (GUINT_TO_POINTER (ch), \
-               _pango_##name##_table, \
-               G_N_ELEMENTS (_pango_##name##_table), \
-               sizeof _pango_##name##_table[0], \
-              interval_compare)) \
-    return TRUE; \
- \
-  return FALSE; \
+  return ch >= _pango_##name##_table[0].start && \
+         bsearch_interval (ch, \
+                           _pango_##name##_table, \
+                           G_N_ELEMENTS (_pango_##name##_table)); \
 }
 
 DEFINE_pango_Is_(Emoji)
@@ -106,36 +103,36 @@ _pango_Is_Emoji_Extended_Pictographic (gunichar ch)
        return _pango_Is_Extended_Pictographic (ch);
 }
 
-static gboolean
+static inline gboolean
 _pango_Is_Emoji_Text_Default (gunichar ch)
 {
   return _pango_Is_Emoji (ch) && !_pango_Is_Emoji_Presentation (ch);
 }
 
-static gboolean
+static inline gboolean
 _pango_Is_Emoji_Emoji_Default (gunichar ch)
 {
   return _pango_Is_Emoji_Presentation (ch);
 }
 
-static gboolean
+static inline gboolean
 _pango_Is_Emoji_Keycap_Base (gunichar ch)
 {
   return (ch >= '0' && ch <= '9') || ch == '#' || ch == '*';
 }
 
-static gboolean
+static inline gboolean
 _pango_Is_Regional_Indicator (gunichar ch)
 {
   return (ch >= 0x1F1E6 && ch <= 0x1F1FF);
 }
 
 
-const gunichar kCombiningEnclosingCircleBackslashCharacter = 0x20E0;
-const gunichar kCombiningEnclosingKeycapCharacter = 0x20E3;
-const gunichar kVariationSelector15Character = 0xFE0E;
-const gunichar kVariationSelector16Character = 0xFE0F;
-const gunichar kZeroWidthJoinerCharacter = 0x200D;
+#define kCombiningEnclosingCircleBackslashCharacter 0x20E0
+#define kCombiningEnclosingKeycapCharacter 0x20E3
+#define kVariationSelector15Character 0xFE0E
+#define kVariationSelector16Character 0xFE0F
+#define kZeroWidthJoinerCharacter 0x200D
 
 enum PangoEmojiScannerCategory {
   EMOJI = 0,
@@ -157,42 +154,48 @@ enum PangoEmojiScannerCategory {
   kMaxEmojiScannerCategory = 16
 };
 
-static unsigned char
+static inline unsigned char
 _pango_EmojiSegmentationCategory (gunichar codepoint)
 {
   /* Specific ones first. */
-  if (codepoint == kCombiningEnclosingKeycapCharacter)
-    return COMBINING_ENCLOSING_KEYCAP;
-  if (codepoint == kCombiningEnclosingCircleBackslashCharacter)
-    return COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
-  if (codepoint == kZeroWidthJoinerCharacter)
-    return ZWJ;
-  if (codepoint == kVariationSelector15Character)
-    return VS15;
-  if (codepoint == kVariationSelector16Character)
-    return VS16;
-  if (codepoint == 0x1F3F4)
-    return TAG_BASE;
-  if ((codepoint >= 0xE0030 && codepoint <= 0xE0039) ||
-      (codepoint >= 0xE0061 && codepoint <= 0xE007A))
-    return TAG_SEQUENCE;
-  if (codepoint == 0xE007F)
-    return TAG_TERM;
+  switch (codepoint)
+    {
+    case 'a' ... 'z':
+    case 'A' ... 'Z':
+    case '0' ... '9':
+      return kMaxEmojiScannerCategory;
+    case kCombiningEnclosingKeycapCharacter:
+      return COMBINING_ENCLOSING_KEYCAP;
+    case kCombiningEnclosingCircleBackslashCharacter:
+      return COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
+    case kZeroWidthJoinerCharacter:
+      return ZWJ;
+    case kVariationSelector15Character:
+      return VS15;
+    case kVariationSelector16Character:
+      return VS16;
+    case 0x1F3F4:
+      return TAG_BASE;
+    case 0xE0030 ... 0xE0039:
+    case 0xE0061 ... 0xE007A:
+      return TAG_SEQUENCE;
+    case 0xE007F:
+      return TAG_TERM;
+    default: ;
+    }
+
   if (_pango_Is_Emoji_Modifier_Base (codepoint))
     return EMOJI_MODIFIER_BASE;
-  if (_pango_Is_Emoji_Modifier (codepoint))
+  else if (_pango_Is_Emoji_Modifier (codepoint))
     return EMOJI_MODIFIER;
-  if (_pango_Is_Regional_Indicator (codepoint))
+  else if (_pango_Is_Regional_Indicator (codepoint))
     return REGIONAL_INDICATOR;
-  if (_pango_Is_Emoji_Keycap_Base (codepoint))
+  else if (_pango_Is_Emoji_Keycap_Base (codepoint))
     return KEYCAP_BASE;
-
-  if (_pango_Is_Emoji_Emoji_Default (codepoint))
+  else if (_pango_Is_Emoji_Emoji_Default (codepoint))
     return EMOJI_EMOJI_PRESENTATION;
-  if (_pango_Is_Emoji_Text_Default (codepoint))
+  else if (_pango_Is_Emoji (codepoint))
     return EMOJI_TEXT_PRESENTATION;
-  if (_pango_Is_Emoji (codepoint))
-    return EMOJI;
 
   /* Ragel state machine will interpret unknown category as "any". */
   return kMaxEmojiScannerCategory;
diff --git a/pango/pangofc-shape.c b/pango/pangofc-shape.c
index 5c716b24..0a5ce7f9 100644
--- a/pango/pangofc-shape.c
+++ b/pango/pangofc-shape.c
@@ -151,18 +151,10 @@ pango_hb_font_get_nominal_glyph (hb_font_t      *font,
 {
   PangoHbShapeContext *context = (PangoHbShapeContext *) font_data;
 
-  if ((context->show_flags & PANGO_SHOW_IGNORABLES) != 0)
+  if (context->show_flags != 0)
     {
-      if (pango_get_ignorable (unicode))
-        {
-          *glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
-          return TRUE;
-        }
-    }
-
-  if ((context->show_flags & PANGO_SHOW_SPACES) != 0)
-    {
-      if (g_unichar_type (unicode) == G_UNICODE_SPACE_SEPARATOR)
+      if ((context->show_flags & PANGO_SHOW_SPACES) != 0 &&
+          g_unichar_type (unicode) == G_UNICODE_SPACE_SEPARATOR)
         {
           /* Replace 0x20 by visible space, since we
            * don't draw a hex box for 0x20
@@ -173,11 +165,16 @@ pango_hb_font_get_nominal_glyph (hb_font_t      *font,
             *glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
           return TRUE;
         }
-    }
 
-  if ((context->show_flags & PANGO_SHOW_LINE_BREAKS) != 0)
-    {
-      if (unicode == 0x2028)
+      if ((context->show_flags & PANGO_SHOW_IGNORABLES) != 0 &&
+          pango_get_ignorable (unicode))
+        {
+          *glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
+          return TRUE;
+        }
+
+      if ((context->show_flags & PANGO_SHOW_LINE_BREAKS) != 0 &&
+          unicode == 0x2028)
         {
           /* Always mark LS as unknown. If it ends up
            * at the line end, PangoLayout takes care of


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]