[pango/misc-speedups: 1/4] Speed up Emoji classification
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango/misc-speedups: 1/4] Speed up Emoji classification
- Date: Mon, 29 Mar 2021 04:15:17 +0000 (UTC)
commit 6f3b1397578d9eceef9e8b1d450efbddcd2cc7d5
Author: Matthias Clasen <mclasen redhat com>
Date: Sun Mar 28 21:55:17 2021 -0400
Speed up Emoji classification
Open-code the bsearch here. These functions
show up in profiles, since itemizations uses
an Emoji iter.
pango/pango-emoji.c | 119 ++++++++++++++++++++++++++------------------------
pango/pangofc-shape.c | 27 +++++-------
2 files changed, 73 insertions(+), 73 deletions(-)
---
diff --git a/pango/pango-emoji.c b/pango/pango-emoji.c
index e316b370..12abed9d 100644
--- a/pango/pango-emoji.c
+++ b/pango/pango-emoji.c
@@ -52,40 +52,37 @@
#include "pango-emoji-private.h"
#include "pango-emoji-table.h"
-
-static int
-interval_compare (const void *key, const void *elt)
+static inline gboolean
+bsearch_interval (gunichar c,
+ const struct Interval table[],
+ guint n)
{
- gunichar c = GPOINTER_TO_UINT (key);
- struct Interval *interval = (struct Interval *)elt;
-
- if (c < interval->start)
- return -1;
- if (c > interval->end)
- return +1;
-
- return 0;
+ guint lower = 0;
+ guint upper = n;
+
+ while (lower <= upper)
+ {
+ int mid = (lower + upper) / 2;
+
+ if (c < table[mid].start)
+ upper = mid - 1;
+ else if (c > table[mid].end)
+ lower = mid + 1;
+ else
+ return TRUE;
+ }
+
+ return FALSE;
}
#define DEFINE_pango_Is_(name) \
-static gboolean \
+static inline gboolean \
_pango_Is_##name (gunichar ch) \
{ \
- /* bsearch() is declared attribute(nonnull(1)) so we can't validly search \
- * for a NULL key */ \
- /* \
- if (G_UNLIKELY (ch == 0)) \
- return FALSE; \
- */ \
- \
- if (bsearch (GUINT_TO_POINTER (ch), \
- _pango_##name##_table, \
- G_N_ELEMENTS (_pango_##name##_table), \
- sizeof _pango_##name##_table[0], \
- interval_compare)) \
- return TRUE; \
- \
- return FALSE; \
+ return ch >= _pango_##name##_table[0].start && \
+ bsearch_interval (ch, \
+ _pango_##name##_table, \
+ G_N_ELEMENTS (_pango_##name##_table)); \
}
DEFINE_pango_Is_(Emoji)
@@ -106,25 +103,25 @@ _pango_Is_Emoji_Extended_Pictographic (gunichar ch)
return _pango_Is_Extended_Pictographic (ch);
}
-static gboolean
+static inline gboolean
_pango_Is_Emoji_Text_Default (gunichar ch)
{
return _pango_Is_Emoji (ch) && !_pango_Is_Emoji_Presentation (ch);
}
-static gboolean
+static inline gboolean
_pango_Is_Emoji_Emoji_Default (gunichar ch)
{
return _pango_Is_Emoji_Presentation (ch);
}
-static gboolean
+static inline gboolean
_pango_Is_Emoji_Keycap_Base (gunichar ch)
{
return (ch >= '0' && ch <= '9') || ch == '#' || ch == '*';
}
-static gboolean
+static inline gboolean
_pango_Is_Regional_Indicator (gunichar ch)
{
return (ch >= 0x1F1E6 && ch <= 0x1F1FF);
@@ -157,42 +154,48 @@ enum PangoEmojiScannerCategory {
kMaxEmojiScannerCategory = 16
};
-static unsigned char
+static inline unsigned char
_pango_EmojiSegmentationCategory (gunichar codepoint)
{
/* Specific ones first. */
- if (codepoint == kCombiningEnclosingKeycapCharacter)
- return COMBINING_ENCLOSING_KEYCAP;
- if (codepoint == kCombiningEnclosingCircleBackslashCharacter)
- return COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
- if (codepoint == kZeroWidthJoinerCharacter)
- return ZWJ;
- if (codepoint == kVariationSelector15Character)
- return VS15;
- if (codepoint == kVariationSelector16Character)
- return VS16;
- if (codepoint == 0x1F3F4)
- return TAG_BASE;
- if ((codepoint >= 0xE0030 && codepoint <= 0xE0039) ||
- (codepoint >= 0xE0061 && codepoint <= 0xE007A))
- return TAG_SEQUENCE;
- if (codepoint == 0xE007F)
- return TAG_TERM;
+ switch (codepoint)
+ {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ return kMaxEmojiScannerCategory;
+ case kCombiningEnclosingKeycapCharacter:
+ return COMBINING_ENCLOSING_KEYCAP;
+ case kCombiningEnclosingCircleBackslashCharacter:
+ return COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
+ case kZeroWidthJoinerCharacter:
+ return ZWJ;
+ case kVariationSelector15Character:
+ return VS15;
+ case kVariationSelector16Character:
+ return VS16;
+ case 0x1F3F4:
+ return TAG_BASE;
+ case 0xE0030 ... 0xE0039:
+ case 0xE0061 ... 0xE007A:
+ return TAG_SEQUENCE;
+ case 0xE007F:
+ return TAG_TERM;
+ default: ;
+ }
+
if (_pango_Is_Emoji_Modifier_Base (codepoint))
return EMOJI_MODIFIER_BASE;
- if (_pango_Is_Emoji_Modifier (codepoint))
+ else if (_pango_Is_Emoji_Modifier (codepoint))
return EMOJI_MODIFIER;
- if (_pango_Is_Regional_Indicator (codepoint))
+ else if (_pango_Is_Regional_Indicator (codepoint))
return REGIONAL_INDICATOR;
- if (_pango_Is_Emoji_Keycap_Base (codepoint))
+ else if (_pango_Is_Emoji_Keycap_Base (codepoint))
return KEYCAP_BASE;
-
- if (_pango_Is_Emoji_Emoji_Default (codepoint))
+ else if (_pango_Is_Emoji_Emoji_Default (codepoint))
return EMOJI_EMOJI_PRESENTATION;
- if (_pango_Is_Emoji_Text_Default (codepoint))
+ else if (_pango_Is_Emoji (codepoint))
return EMOJI_TEXT_PRESENTATION;
- if (_pango_Is_Emoji (codepoint))
- return EMOJI;
/* Ragel state machine will interpret unknown category as "any". */
return kMaxEmojiScannerCategory;
diff --git a/pango/pangofc-shape.c b/pango/pangofc-shape.c
index 5c716b24..0a5ce7f9 100644
--- a/pango/pangofc-shape.c
+++ b/pango/pangofc-shape.c
@@ -151,18 +151,10 @@ pango_hb_font_get_nominal_glyph (hb_font_t *font,
{
PangoHbShapeContext *context = (PangoHbShapeContext *) font_data;
- if ((context->show_flags & PANGO_SHOW_IGNORABLES) != 0)
+ if (context->show_flags != 0)
{
- if (pango_get_ignorable (unicode))
- {
- *glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
- return TRUE;
- }
- }
-
- if ((context->show_flags & PANGO_SHOW_SPACES) != 0)
- {
- if (g_unichar_type (unicode) == G_UNICODE_SPACE_SEPARATOR)
+ if ((context->show_flags & PANGO_SHOW_SPACES) != 0 &&
+ g_unichar_type (unicode) == G_UNICODE_SPACE_SEPARATOR)
{
/* Replace 0x20 by visible space, since we
* don't draw a hex box for 0x20
@@ -173,11 +165,16 @@ pango_hb_font_get_nominal_glyph (hb_font_t *font,
*glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
return TRUE;
}
- }
- if ((context->show_flags & PANGO_SHOW_LINE_BREAKS) != 0)
- {
- if (unicode == 0x2028)
+ if ((context->show_flags & PANGO_SHOW_IGNORABLES) != 0 &&
+ pango_get_ignorable (unicode))
+ {
+ *glyph = PANGO_GET_UNKNOWN_GLYPH (unicode);
+ return TRUE;
+ }
+
+ if ((context->show_flags & PANGO_SHOW_LINE_BREAKS) != 0 &&
+ unicode == 0x2028)
{
/* Always mark LS as unknown. If it ends up
* at the line end, PangoLayout takes care of
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]