[pango/hyphen-log-attr: 1/3] Refine hyphenation




commit 2c9792d4b435e87e8616c22e1e5516d7302b06dc
Author: Matthias Clasen <mclasen redhat com>
Date:   Wed Aug 25 00:09:37 2021 -0400

    Refine hyphenation
    
    Replace ‧ and | with a - when we break there.
    
    Update affected test output.
    
    Fixes: #603

 pango/break.c                   | 15 +++++++++++--
 pango/pango-break.h             |  3 +++
 pango/pango-layout.c            | 50 ++++++++++++++++++++++++++++++++++-------
 pango/shape.c                   | 34 +++++++++++++++++++---------
 tests/layouts/valid-17.expected |  2 +-
 5 files changed, 83 insertions(+), 21 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 8e1aeb56..043ac0cc 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1559,10 +1559,14 @@ default_break (const char    *text,
       }
 
       /* --- Hyphens --- */
+
       {
         gboolean insert_hyphens;
         gboolean space_or_hyphen = FALSE;
 
+        attrs[i].break_inserts_hyphen = FALSE;
+        attrs[i].break_removes_preceding = FALSE;
+
         switch ((int)script)
           {
           case PANGO_SCRIPT_COMMON:
@@ -1599,7 +1603,6 @@ default_break (const char    *text,
                 wc == 0x1400 || /* Canadian syllabics hyphen */
                 wc == 0x1806 || /* Mongolian todo hyphen */
                 wc == 0x2010 || /* Hyphen */
-                wc == 0x2027 || /* Hyphenation point */
                 wc == 0x2e17 || /* Double oblique hyphen */
                 wc == 0x2e40 || /* Double hyphen */
                 wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
@@ -1617,6 +1620,13 @@ default_break (const char    *text,
         else
           attrs[i].break_inserts_hyphen = insert_hyphens;
 
+        if (prev_wc == 0x007C ||   /* Vertical Line */
+            prev_wc == 0x2027)     /* Hyphenation point */
+          {
+            attrs[i].break_inserts_hyphen = TRUE;
+            attrs[i].break_removes_preceding = TRUE;
+          }
+
         prev_space_or_hyphen = space_or_hyphen;
       }
 
@@ -1774,7 +1784,8 @@ break_attrs (const char   *text,
 
         for (pos = start_pos + 1; pos < end_pos; pos++)
           {
-            log_attrs[pos].break_inserts_hyphen = FALSE;
+            if (!log_attrs[pos].break_removes_preceding)
+              log_attrs[pos].break_inserts_hyphen = FALSE;
           }
       }
   } while (pango_attr_iterator_next (&iter));
diff --git a/pango/pango-break.h b/pango/pango-break.h
index 52febd3d..5d791e27 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -74,6 +74,8 @@ G_BEGIN_DECLS
  *   semantics. (Since: 1.22)
  * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen.
  *   Since: 1.50
+ * @break_removes_preceding: when breaking lines before this char, remove the
+ *   preceding char. Since 1.50
  *
  * The `PangoLogAttr` structure stores information about the attributes of a
  * single character.
@@ -94,6 +96,7 @@ struct _PangoLogAttr
   guint is_expandable_space         : 1;
   guint is_word_boundary            : 1;
   guint break_inserts_hyphen        : 1;
+  guint break_removes_preceding     : 1;
 };
 
 PANGO_DEPRECATED_IN_1_44
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 4366450a..1ebe42cd 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3601,7 +3601,8 @@ break_needs_hyphen (PangoLayout    *layout,
                     ParaBreakState *state,
                     int             pos)
 {
-  return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen;
+  return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen ||
+         layout->log_attrs[state->start_offset + pos].break_removes_preceding;
 }
 
 static int
@@ -3626,24 +3627,57 @@ find_hyphen_width (PangoItem *item)
   return 0;
 }
 
+static int
+find_char_width (PangoItem *item,
+                 gunichar   wc)
+{
+  hb_font_t *hb_font;
+  hb_codepoint_t glyph;
+
+  if (!item->analysis.font)
+    return 0;
+
+  hb_font = pango_font_get_hb_font (item->analysis.font);
+  if (hb_font_get_nominal_glyph (hb_font, wc, &glyph))
+    return hb_font_get_glyph_h_advance (hb_font, glyph);
+
+  return 0;
+}
+
+static inline void
+ensure_hyphen_width (ParaBreakState *state)
+{
+  if (state->hyphen_width < 0)
+    {
+      PangoItem *item = state->items->data;
+      state->hyphen_width = find_hyphen_width (item);
+    }
+}
+
 static int
 find_break_extra_width (PangoLayout    *layout,
                         ParaBreakState *state,
                         int             pos)
 {
   /* Check whether to insert a hyphen */
-  if (break_needs_hyphen (layout, state, pos))
+  if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen)
     {
-      if (state->hyphen_width < 0)
+      ensure_hyphen_width (state);
+
+      if (layout->log_attrs[state->start_offset + pos].break_removes_preceding)
         {
           PangoItem *item = state->items->data;
-          state->hyphen_width = find_hyphen_width (item);
-        }
+          gunichar wc;
 
-      return state->hyphen_width;
+          wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1));
+
+          return state->hyphen_width - find_char_width (item, wc);
+        }
+      else
+        return state->hyphen_width;
     }
-  else
-    return 0;
+
+  return 0;
 }
 
 #if 0
diff --git a/pango/shape.c b/pango/shape.c
index 707534ed..62c0f025 100644
--- a/pango/shape.c
+++ b/pango/shape.c
@@ -344,6 +344,7 @@ pango_hb_shape (const char          *item_text,
                 int                  paragraph_length,
                 const PangoAnalysis *analysis,
                 PangoLogAttr        *log_attrs,
+                int                  num_chars,
                 PangoGlyphString    *glyphs,
                 PangoShapeFlags      flags)
 {
@@ -362,6 +363,7 @@ pango_hb_shape (const char          *item_text,
   unsigned int num_features = 0;
   PangoGlyphInfo *infos;
   PangoTextTransform transform;
+  int hyphen_index;
 
   g_return_if_fail (analysis != NULL);
   g_return_if_fail (analysis->font != NULL);
@@ -392,6 +394,17 @@ pango_hb_shape (const char          *item_text,
   hb_buffer_set_flags (hb_buffer, hb_buffer_flags);
   hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY);
 
+  if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
+    {
+      const char *p = paragraph_text + item_offset + item_length;
+      int last_char_len = p - g_utf8_prev_char (p);
+
+      hyphen_index = item_offset + item_length - last_char_len;
+
+      if (log_attrs[num_chars].break_removes_preceding)
+        item_length -= last_char_len;
+    }
+
   /* Add pre-context */
   hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
 
@@ -407,7 +420,9 @@ pango_hb_shape (const char          *item_text,
       /* Transform the item text according to text transform.
        * Note: we assume text transforms won't cross font boundaries
        */
-      for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = 
g_utf8_next_char (p), i++)
+      for (p = paragraph_text + item_offset, i = 0;
+           p < paragraph_text + item_offset + item_length;
+           p = g_utf8_next_char (p), i++)
         {
           int index = p - paragraph_text;
           gunichar ch = g_utf8_get_char (p);
@@ -457,15 +472,13 @@ pango_hb_shape (const char          *item_text,
       /* Insert either a Unicode or ASCII hyphen. We may
        * want to look for script-specific hyphens here.
        */
-      const char *p = paragraph_text + item_offset + item_length;
-      int last_char_len = p - g_utf8_prev_char (p);
       hb_codepoint_t glyph;
 
       /* Note: We rely on hb_buffer_add clearing existing post-context */
       if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph))
-        hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len);
+        hb_buffer_add (hb_buffer, 0x2010, hyphen_index);
       else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph))
-        hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len);
+        hb_buffer_add (hb_buffer, '-', hyphen_index);
     }
 
   pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features);
@@ -578,6 +591,7 @@ pango_shape_internal (const char          *item_text,
                       int                  paragraph_length,
                       const PangoAnalysis *analysis,
                       PangoLogAttr        *log_attrs,
+                      int                  num_chars,
                       PangoGlyphString    *glyphs,
                       PangoShapeFlags      flags)
 {
@@ -605,9 +619,8 @@ pango_shape_internal (const char          *item_text,
       pango_hb_shape (item_text, item_length,
                       paragraph_text, paragraph_length,
                       analysis,
-                      log_attrs,
-                      glyphs,
-                      flags);
+                      log_attrs, num_chars,
+                      glyphs, flags);
 
       if (G_UNLIKELY (glyphs->num_glyphs == 0))
         {
@@ -866,7 +879,7 @@ pango_shape_with_flags (const char          *item_text,
 {
   pango_shape_internal (item_text, item_length,
                         paragraph_text, paragraph_length,
-                        analysis, NULL,
+                        analysis, NULL, 0,
                         glyphs, flags);
 }
 
@@ -905,7 +918,8 @@ pango_shape_item (PangoItem        *item,
 {
   pango_shape_internal (paragraph_text + item->offset, item->length,
                         paragraph_text, paragraph_length,
-                        &item->analysis, log_attrs,
+                        &item->analysis,
+                        log_attrs, item->num_chars,
                         glyphs, flags);
 }
 
diff --git a/tests/layouts/valid-17.expected b/tests/layouts/valid-17.expected
index 4b3192fb..a2b7d494 100644
--- a/tests/layouts/valid-17.expected
+++ b/tests/layouts/valid-17.expected
@@ -28,7 +28,7 @@ i=3, index=17, paragraph-start=1, dir=ltr ''
 
 --- runs
 
-i=1, index=0, chars=13, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 
'some|bla|bla|'
+i=1, index=0, chars=13, level=0, gravity=south, flags=4, font=OMITTED, script=latin, language=en-us, 
'some|bla|bla|'
 i=2, index=13, no run, line end
 i=3, index=13, chars=3, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'bla'
 i=4, index=16, no run, line end


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]