[pango/hyphen-log-attr: 1/3] Refine hyphenation
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango/hyphen-log-attr: 1/3] Refine hyphenation
- Date: Wed, 25 Aug 2021 04:40:36 +0000 (UTC)
commit 2c9792d4b435e87e8616c22e1e5516d7302b06dc
Author: Matthias Clasen <mclasen redhat com>
Date: Wed Aug 25 00:09:37 2021 -0400
Refine hyphenation
Replace ‧ and | with a - when we break there.
Update affected test output.
Fixes: #603
pango/break.c | 15 +++++++++++--
pango/pango-break.h | 3 +++
pango/pango-layout.c | 50 ++++++++++++++++++++++++++++++++++-------
pango/shape.c | 34 +++++++++++++++++++---------
tests/layouts/valid-17.expected | 2 +-
5 files changed, 83 insertions(+), 21 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 8e1aeb56..043ac0cc 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1559,10 +1559,14 @@ default_break (const char *text,
}
/* --- Hyphens --- */
+
{
gboolean insert_hyphens;
gboolean space_or_hyphen = FALSE;
+ attrs[i].break_inserts_hyphen = FALSE;
+ attrs[i].break_removes_preceding = FALSE;
+
switch ((int)script)
{
case PANGO_SCRIPT_COMMON:
@@ -1599,7 +1603,6 @@ default_break (const char *text,
wc == 0x1400 || /* Canadian syllabics hyphen */
wc == 0x1806 || /* Mongolian todo hyphen */
wc == 0x2010 || /* Hyphen */
- wc == 0x2027 || /* Hyphenation point */
wc == 0x2e17 || /* Double oblique hyphen */
wc == 0x2e40 || /* Double hyphen */
wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
@@ -1617,6 +1620,13 @@ default_break (const char *text,
else
attrs[i].break_inserts_hyphen = insert_hyphens;
+ if (prev_wc == 0x007C || /* Vertical Line */
+ prev_wc == 0x2027) /* Hyphenation point */
+ {
+ attrs[i].break_inserts_hyphen = TRUE;
+ attrs[i].break_removes_preceding = TRUE;
+ }
+
prev_space_or_hyphen = space_or_hyphen;
}
@@ -1774,7 +1784,8 @@ break_attrs (const char *text,
for (pos = start_pos + 1; pos < end_pos; pos++)
{
- log_attrs[pos].break_inserts_hyphen = FALSE;
+ if (!log_attrs[pos].break_removes_preceding)
+ log_attrs[pos].break_inserts_hyphen = FALSE;
}
}
} while (pango_attr_iterator_next (&iter));
diff --git a/pango/pango-break.h b/pango/pango-break.h
index 52febd3d..5d791e27 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -74,6 +74,8 @@ G_BEGIN_DECLS
* semantics. (Since: 1.22)
* @break_inserts_hyphen: when breaking lines before this char, insert a hyphen.
* Since: 1.50
+ * @break_removes_preceding: when breaking lines before this char, remove the
+ * preceding char. Since 1.50
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
@@ -94,6 +96,7 @@ struct _PangoLogAttr
guint is_expandable_space : 1;
guint is_word_boundary : 1;
guint break_inserts_hyphen : 1;
+ guint break_removes_preceding : 1;
};
PANGO_DEPRECATED_IN_1_44
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 4366450a..1ebe42cd 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3601,7 +3601,8 @@ break_needs_hyphen (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
- return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen;
+ return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen ||
+ layout->log_attrs[state->start_offset + pos].break_removes_preceding;
}
static int
@@ -3626,24 +3627,57 @@ find_hyphen_width (PangoItem *item)
return 0;
}
+static int
+find_char_width (PangoItem *item,
+ gunichar wc)
+{
+ hb_font_t *hb_font;
+ hb_codepoint_t glyph;
+
+ if (!item->analysis.font)
+ return 0;
+
+ hb_font = pango_font_get_hb_font (item->analysis.font);
+ if (hb_font_get_nominal_glyph (hb_font, wc, &glyph))
+ return hb_font_get_glyph_h_advance (hb_font, glyph);
+
+ return 0;
+}
+
+static inline void
+ensure_hyphen_width (ParaBreakState *state)
+{
+ if (state->hyphen_width < 0)
+ {
+ PangoItem *item = state->items->data;
+ state->hyphen_width = find_hyphen_width (item);
+ }
+}
+
static int
find_break_extra_width (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
/* Check whether to insert a hyphen */
- if (break_needs_hyphen (layout, state, pos))
+ if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen)
{
- if (state->hyphen_width < 0)
+ ensure_hyphen_width (state);
+
+ if (layout->log_attrs[state->start_offset + pos].break_removes_preceding)
{
PangoItem *item = state->items->data;
- state->hyphen_width = find_hyphen_width (item);
- }
+ gunichar wc;
- return state->hyphen_width;
+ wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1));
+
+ return state->hyphen_width - find_char_width (item, wc);
+ }
+ else
+ return state->hyphen_width;
}
- else
- return 0;
+
+ return 0;
}
#if 0
diff --git a/pango/shape.c b/pango/shape.c
index 707534ed..62c0f025 100644
--- a/pango/shape.c
+++ b/pango/shape.c
@@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text,
unsigned int num_features = 0;
PangoGlyphInfo *infos;
PangoTextTransform transform;
+ int hyphen_index;
g_return_if_fail (analysis != NULL);
g_return_if_fail (analysis->font != NULL);
@@ -392,6 +394,17 @@ pango_hb_shape (const char *item_text,
hb_buffer_set_flags (hb_buffer, hb_buffer_flags);
hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY);
+ if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
+ {
+ const char *p = paragraph_text + item_offset + item_length;
+ int last_char_len = p - g_utf8_prev_char (p);
+
+ hyphen_index = item_offset + item_length - last_char_len;
+
+ if (log_attrs[num_chars].break_removes_preceding)
+ item_length -= last_char_len;
+ }
+
/* Add pre-context */
hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
@@ -407,7 +420,9 @@ pango_hb_shape (const char *item_text,
/* Transform the item text according to text transform.
* Note: we assume text transforms won't cross font boundaries
*/
- for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p =
g_utf8_next_char (p), i++)
+ for (p = paragraph_text + item_offset, i = 0;
+ p < paragraph_text + item_offset + item_length;
+ p = g_utf8_next_char (p), i++)
{
int index = p - paragraph_text;
gunichar ch = g_utf8_get_char (p);
@@ -457,15 +472,13 @@ pango_hb_shape (const char *item_text,
/* Insert either a Unicode or ASCII hyphen. We may
* want to look for script-specific hyphens here.
*/
- const char *p = paragraph_text + item_offset + item_length;
- int last_char_len = p - g_utf8_prev_char (p);
hb_codepoint_t glyph;
/* Note: We rely on hb_buffer_add clearing existing post-context */
if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph))
- hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, 0x2010, hyphen_index);
else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph))
- hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, '-', hyphen_index);
}
pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features);
@@ -578,6 +591,7 @@ pango_shape_internal (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -605,9 +619,8 @@ pango_shape_internal (const char *item_text,
pango_hb_shape (item_text, item_length,
paragraph_text, paragraph_length,
analysis,
- log_attrs,
- glyphs,
- flags);
+ log_attrs, num_chars,
+ glyphs, flags);
if (G_UNLIKELY (glyphs->num_glyphs == 0))
{
@@ -866,7 +879,7 @@ pango_shape_with_flags (const char *item_text,
{
pango_shape_internal (item_text, item_length,
paragraph_text, paragraph_length,
- analysis, NULL,
+ analysis, NULL, 0,
glyphs, flags);
}
@@ -905,7 +918,8 @@ pango_shape_item (PangoItem *item,
{
pango_shape_internal (paragraph_text + item->offset, item->length,
paragraph_text, paragraph_length,
- &item->analysis, log_attrs,
+ &item->analysis,
+ log_attrs, item->num_chars,
glyphs, flags);
}
diff --git a/tests/layouts/valid-17.expected b/tests/layouts/valid-17.expected
index 4b3192fb..a2b7d494 100644
--- a/tests/layouts/valid-17.expected
+++ b/tests/layouts/valid-17.expected
@@ -28,7 +28,7 @@ i=3, index=17, paragraph-start=1, dir=ltr ''
--- runs
-i=1, index=0, chars=13, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us,
'some|bla|bla|'
+i=1, index=0, chars=13, level=0, gravity=south, flags=4, font=OMITTED, script=latin, language=en-us,
'some|bla|bla|'
i=2, index=13, no run, line end
i=3, index=13, chars=3, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'bla'
i=4, index=16, no run, line end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]