[pango/bidi-stack-size: 10/11] break: Cache per-character data
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango/bidi-stack-size: 10/11] break: Cache per-character data
- Date: Fri, 30 Jul 2021 13:06:30 +0000 (UTC)
commit 43cdd9fbab3ababaae7308feec9e4dabdf8dc3a0
Author: Matthias Clasen <mclasen redhat com>
Date: Wed Jul 28 16:10:26 2021 -0400
break: Cache per-character data
pango/break.c | 114 +++++++++++++++++++++++++++++++++-------------------------
1 file changed, 66 insertions(+), 48 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index b9cf3cae..aedf54c4 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -138,6 +138,37 @@ typedef enum
WordNumbers
} WordType;
+static inline void
+get_unichar_data (gunichar wc,
+ GUnicodeType *type,
+ GUnicodeBreakType *break_type,
+ PangoScript *script,
+ gboolean *extended_pictographic)
+{
+ static struct {
+ gunichar wc;
+ GUnicodeType type;
+ GUnicodeBreakType break_type;
+ PangoScript script;
+ gboolean extended_pictographic;
+ } cache[256] = { 0, }, *p;
+
+ p = &cache[wc & 0xff];
+
+ if (G_UNLIKELY (p->wc != wc))
+ {
+ p->wc = wc;
+ p->type = g_unichar_type (wc);
+ p->break_type = g_unichar_break_type (wc);
+ p->script = (PangoScript)g_unichar_get_script (wc);
+ p->extended_pictographic = _pango_Is_Emoji_Extended_Pictographic (wc);
+ }
+
+ *type = p->type;
+ *break_type = p->break_type;
+ *script = p->script;
+ *extended_pictographic = p->extended_pictographic;
+}
/**
* pango_default_break:
@@ -182,10 +213,14 @@ pango_default_break (const gchar *text,
JamoType prev_jamo;
+ GUnicodeType next_type;
GUnicodeBreakType next_break_type;
GUnicodeBreakType prev_break_type;
GUnicodeBreakType prev_prev_break_type;
+ PangoScript next_script;
+ gboolean next_Extended_Pictographic;
+
/* See Grapheme_Cluster_Break Property Values table of UAX#29 */
typedef enum
{
@@ -256,6 +291,7 @@ pango_default_break (const gchar *text,
LB_RI_Odd,
LB_RI_Even,
} LineBreakType;
+ LineBreakType LB_type;
LineBreakType prev_LB_type = LB_Other;
WordType current_word_type = WordNone;
@@ -286,8 +322,7 @@ pango_default_break (const gchar *text,
else
next_wc = g_utf8_get_char (next);
- next_break_type = g_unichar_break_type (next_wc);
- next_break_type = BREAK_TYPE_SAFE (next_break_type);
+ get_unichar_data (next_wc, &next_type, &next_break_type, &next_script, &next_Extended_Pictographic);
for (i = 0; !done ; i++)
{
@@ -299,6 +334,8 @@ pango_default_break (const gchar *text,
JamoType jamo;
gboolean makes_hangul_syllable;
+ PangoScript script;
+
/* UAX#29 boundaries */
gboolean is_grapheme_boundary;
gboolean is_word_boundary;
@@ -310,7 +347,10 @@ pango_default_break (const gchar *text,
gboolean can_break;
wc = next_wc;
+ type = next_type;
break_type = next_break_type;
+ script = next_script;
+ is_Extended_Pictographic = next_Extended_Pictographic;
if (almost_done)
{
@@ -319,6 +359,7 @@ pango_default_break (const gchar *text,
* may not increment next
*/
next_wc = 0;
+ next_type = 0;
next_break_type = G_UNICODE_BREAK_UNKNOWN;
done = TRUE;
}
@@ -338,11 +379,9 @@ pango_default_break (const gchar *text,
else
next_wc = g_utf8_get_char (next);
- next_break_type = g_unichar_break_type (next_wc);
- next_break_type = BREAK_TYPE_SAFE (next_break_type);
+ get_unichar_data (next_wc, &next_type, &next_break_type, &next_script,
&next_Extended_Pictographic);
}
- type = g_unichar_type (wc);
jamo = JAMO_TYPE (break_type);
/* Determine wheter this forms a Hangul syllable with prev. */
@@ -380,9 +419,6 @@ pango_default_break (const gchar *text,
break;
}
- is_Extended_Pictographic =
- _pango_Is_Emoji_Extended_Pictographic (wc);
-
/* ---- UAX#29 Grapheme Boundaries ---- */
{
@@ -558,11 +594,8 @@ pango_default_break (const gchar *text,
if (is_grapheme_boundary ||
G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */
{
- PangoScript script;
WordBreakType WB_type;
- script = (PangoScript)g_unichar_get_script (wc);
-
/* Find the WordBreakType of wc */
WB_type = WB_Other;
@@ -1025,8 +1058,10 @@ pango_default_break (const gchar *text,
*/
can_break = attrs[i].is_cursor_position;
- /* Rule LB1:
- assign a line breaking class to each code point of the input. */
+ LB_type = LB_Other;
+
+ /* Rule LB1: assign a line breaking class to each code point of the input. */
+ /* Also determine if we can break */
switch (break_type)
{
case G_UNICODE_BREAK_AMBIGUOUS:
@@ -1058,50 +1093,33 @@ pango_default_break (const gchar *text,
case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
case G_UNICODE_BREAK_EMOJI_MODIFIER:
- case G_UNICODE_BREAK_REGIONAL_INDICATOR:
can_break = TRUE;
break;
+ case G_UNICODE_BREAK_REGIONAL_INDICATOR:
+ can_break = TRUE;
+ if (prev_LB_type == LB_RI_Odd)
+ LB_type = LB_RI_Even;
+ else
+ LB_type = LB_RI_Odd;
+ break;
+
+ case G_UNICODE_BREAK_NUMERIC:
+ LB_type = LB_Numeric;
+ break;
+
+ case G_UNICODE_BREAK_CLOSE_PUNCTUATION:
+ case G_UNICODE_BREAK_CLOSE_PARANTHESIS:
+ if (prev_LB_type == LB_Numeric)
+ LB_type = LB_Numeric_Close;
+ break;
+
default:
;
}
if (can_break)
{
- LineBreakType LB_type;
-
- /* Find the LineBreakType of wc */
- LB_type = LB_Other;
-
- if (break_type == G_UNICODE_BREAK_NUMERIC)
- LB_type = LB_Numeric;
-
- if (break_type == G_UNICODE_BREAK_SYMBOL ||
- break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)
- {
- if (!(prev_LB_type == LB_Numeric))
- LB_type = LB_Other;
- }
-
- if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
- break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS)
- {
- if (prev_LB_type == LB_Numeric)
- LB_type = LB_Numeric_Close;
- else
- LB_type = LB_Other;
- }
-
- if (break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR)
- {
- if (prev_LB_type == LB_RI_Odd)
- LB_type = LB_RI_Even;
- else if (prev_LB_type == LB_RI_Even)
- LB_type = LB_RI_Odd;
- else
- LB_type = LB_RI_Odd;
- }
-
attrs[i].is_line_break = TRUE; /* Rule LB31 */
/* Unicode doesn't specify char wrap;
we wrap around all chars currently. */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]