[pango/break-tailoring] Add segmentation attributes

From: Matthias Clasen <matthiasc src gnome org>
To: commits-list gnome org
Cc:
Subject: [pango/break-tailoring] Add segmentation attributes
Date: Wed, 25 Aug 2021 05:08:11 +0000 (UTC)

commit b614ea2b06b3c9defaceb92b6904fa8a92249abe
Author: Matthias Clasen <mclasen redhat com>
Date:   Sat Aug 21 23:54:03 2021 -0400

    Add segmentation attributes
    
    Add attributes that let us override word and
    sentence boundaries (and, indirectly, line breaks).
    
    Tests included.

 docs/pango_markup.md           |   6 +
 pango/break.c                  | 410 ++++++++++++++++++++++++++++++++++++-----
 pango/pango-attributes.c       |  56 ++++++
 pango/pango-attributes.h       |  10 +
 pango/pango-layout.c           |   2 +
 pango/pango-markup.c           |  23 ++-
 tests/breaks/five.break        |   1 +
 tests/breaks/five.expected     |   7 +
 tests/breaks/fourteen.break    |   2 +
 tests/breaks/fourteen.expected |   7 +
 tests/breaks/thirteen.break    |   2 +
 tests/breaks/thirteen.expected |   7 +
 tests/breaks/twelve.break      |   2 +
 tests/breaks/twelve.expected   |   7 +
 tests/test-common.c            |   2 +
 tests/testattributes.c         |  16 +-
 16 files changed, 509 insertions(+), 51 deletions(-)
---
diff --git a/docs/pango_markup.md b/docs/pango_markup.md
index 3a1cc311..287bbc56 100644
--- a/docs/pango_markup.md
+++ b/docs/pango_markup.md
@@ -201,6 +201,12 @@ text_transform
   'none', 'lowercase', 'uppercase' or 'capitalize'. Support for text transformation
   was added in Pango 1.50.
 
+segment
+: Overrides word or sentence boundaries. The value can be 'word' or 'sentence',
+  to indicate that the span should be treated as a single word or sentence.
+  Overlapping segments will be split to allow this, and line breaks will be
+  adjusted accordingly. Available since Pango 1.50.
+
 ## Convenience Tags
 
 `<b>`
diff --git a/pango/break.c b/pango/break.c
index d348f9b8..864ac339 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1699,64 +1699,325 @@ break_script (const char          *item_text,
 /* }}} */
 /* {{{ Attribute-based customization */
 
+/* We allow customizing log attrs in two ways:
+ *
+ * - You can directly remove breaks from a range, using allow_breaks=false.
+ *   We preserve the non-tailorable rules from UAX #14, so mandatory breaks
+ *   and breaks after ZWS remain. We also preserve break opportunities after
+ *   hyphens and visible word dividers.
+ *
+ * - You can tweak the segmentation by marking ranges as word or sentence.
+ *   When doing so, we split adjacent segments to preserve alternating
+ *   starts and ends. We add a line break opportunity before each word that
+ *   is created in this way, and we remove line break opportunities inside
+ *   the word in the same way as for a range marked as allow_breaks=false,
+ *   except that we don't remove char break opportunities.
+ *
+ *   Note that UAX #14 does not guarantee that words fall neatly into
+ *   sentences, so we don't do extra work to enforce that.
+ */
+
+static void
+remove_breaks_from_range (const char   *text,
+                          int           start,
+                          PangoLogAttr *log_attrs,
+                          int           start_pos,
+                          int           end_pos)
+{
+  int pos;
+  const char *p;
+  gunichar ch;
+  int bt;
+  gboolean after_zws;
+  gboolean after_hyphen;
+
+  /* Assume our range doesn't start after a hyphen or in a zws sequence */
+  after_zws = FALSE;
+  after_hyphen = FALSE;
+  for (pos = start_pos + 1, p = g_utf8_next_char (text + start);
+       pos < end_pos;
+       pos++, p = g_utf8_next_char (p))
+    {
+      /* Mandatory breaks aren't tailorable */
+      if (!log_attrs[pos].is_mandatory_break)
+        log_attrs[pos].is_line_break = FALSE;
+
+      ch = g_utf8_get_char (p);
+      bt = g_unichar_break_type (ch);
+
+      /* Hyphens and visible word dividers */
+      if (after_hyphen)
+        log_attrs[pos].is_line_break = TRUE;
+
+      after_hyphen = ch == 0x00ad || /* Soft Hyphen */
+         ch == 0x05A0 || ch == 0x2010 || /* Breaking Hyphens */
+         ch == 0x2012 || ch == 0x2013 ||
+         ch == 0x05BE || ch == 0x0F0B || /* Visible word dividers */
+         ch == 0x1361 || ch == 0x17D8 ||
+         ch == 0x17DA || ch == 0x2027 ||
+         ch == 0x007C;
+
+      /* ZWS sequence */
+      if (after_zws && bt != G_UNICODE_BREAK_SPACE)
+        log_attrs[pos].is_line_break = TRUE;
+
+      after_zws = bt == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
+                  (bt == G_UNICODE_BREAK_SPACE && after_zws);
+    }
+}
+
 static gboolean
-break_attrs (const char   *text,
-             int           length,
-             GSList       *attributes,
-             int           offset,
-             PangoLogAttr *log_attrs,
-             int           log_attrs_len)
+handle_allow_breaks (const char    *text,
+                     int            length,
+                     PangoAttrList *attrs,
+                     int            offset,
+                     PangoLogAttr  *log_attrs,
+                     int            log_attrs_len)
 {
-  PangoAttrList list;
-  PangoAttrList hyphens;
   PangoAttrIterator iter;
-  GSList *l;
+  gboolean tailored = FALSE;
 
-  _pango_attr_list_init (&list);
-  _pango_attr_list_init (&hyphens);
+  _pango_attr_list_get_iterator (attrs, &iter);
 
-  for (l = attributes; l; l = l->next)
+  do
     {
-      PangoAttribute *attr = l->data;
+      const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
 
-      if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
-        pango_attr_list_insert (&list, pango_attribute_copy (attr));
-      else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
-        pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
+      if (!attr)
+        continue;
+
+      if (!((PangoAttrInt*)attr)->value)
+        {
+          int start, end;
+          int start_pos, end_pos;
+          int pos;
+
+          start = attr->start_index;
+          end = attr->end_index;
+          if (start < offset)
+            start_pos = 0;
+          else
+            start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+          if (end >= offset + length)
+            end_pos = log_attrs_len;
+          else
+            end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+          for (pos = start_pos + 1; pos < end_pos; pos++)
+            log_attrs[pos].is_char_break = FALSE;
+
+          remove_breaks_from_range (text, MAX (start - offset, 0), log_attrs, start_pos, end_pos);
+
+          tailored = TRUE;
+        }
     }
+  while (pango_attr_iterator_next (&iter));
 
-  _pango_attr_list_get_iterator (&list, &iter);
-  do {
-    const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
+  _pango_attr_iterator_destroy (&iter);
 
-    if (attr && ((PangoAttrInt*)attr)->value == 0)
-      {
-        int start, end;
-        int start_pos, end_pos;
-        int pos;
+  return tailored;
+}
 
-        pango_attr_iterator_range (&iter, &start, &end);
-        if (start < offset)
-          start_pos = 0;
-        else
-          start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
-        if (end >= offset + length)
-          end_pos = log_attrs_len;
-        else
-          end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
 
-        for (pos = start_pos + 1; pos < end_pos; pos++)
-          {
-            log_attrs[pos].is_mandatory_break = FALSE;
-            log_attrs[pos].is_line_break = FALSE;
-            log_attrs[pos].is_char_break = FALSE;
-          }
-      }
-  } while (pango_attr_iterator_next (&iter));
+static gboolean
+handle_words (const char    *text,
+              int            length,
+              PangoAttrList *attrs,
+              int            offset,
+              PangoLogAttr  *log_attrs,
+              int            log_attrs_len)
+{
+  PangoAttrIterator iter;
+  gboolean tailored = FALSE;
+
+  _pango_attr_list_get_iterator (attrs, &iter);
+
+  do
+    {
+      const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_WORD);
+      int start, end;
+      int start_pos, end_pos;
+      int pos;
+
+      if (!attr)
+        continue;
+
+      start = attr->start_index;
+      end = attr->end_index;
+      if (start < offset)
+        start_pos = 0;
+      else
+        start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+      if (end >= offset + length)
+        end_pos = log_attrs_len;
+      else
+        end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+      for (pos = start_pos + 1; pos < end_pos; pos++)
+        {
+          log_attrs[pos].is_word_start = FALSE;
+          log_attrs[pos].is_word_end = FALSE;
+          log_attrs[pos].is_word_boundary = FALSE;
+        }
+
+      remove_breaks_from_range (text, MAX (start - offset, 0), log_attrs,
+                                start_pos, end_pos);
+
+      if (start >= offset)
+        {
+          gboolean in_word = FALSE;
+          for (pos = start_pos - 1; pos >= 0; pos--)
+            {
+              if (log_attrs[pos].is_word_end)
+                break;
+              if (log_attrs[pos].is_word_start)
+                {
+                  in_word = TRUE;
+                  break;
+                }
+            }
+          log_attrs[start_pos].is_word_start = TRUE;
+          log_attrs[start_pos].is_word_end = in_word;
+          log_attrs[start_pos].is_word_boundary = TRUE;
+
+          /* Allow line breaks before words */
+          log_attrs[start_pos].is_line_break = TRUE;
+
+          tailored = TRUE;
+        }
+
+      if (end < offset + length)
+        {
+          gboolean in_word = FALSE;
+          for (pos = end_pos + 1; pos < log_attrs_len; pos++)
+            {
+              if (log_attrs[pos].is_word_start)
+                break;
+              if (log_attrs[pos].is_word_end)
+                {
+                  in_word = TRUE;
+                  break;
+                }
+            }
+          log_attrs[end_pos].is_word_start = in_word;
+          log_attrs[end_pos].is_word_end = TRUE;
+          log_attrs[end_pos].is_word_boundary = TRUE;
+
+          /* Allow line breaks before words */
+          if (in_word)
+            log_attrs[end_pos].is_line_break = TRUE;
+
+          tailored = TRUE;
+        }
+    }
+  while (pango_attr_iterator_next (&iter));
+
+  _pango_attr_iterator_destroy (&iter);
+
+  return tailored;
+}
+
+static gboolean
+handle_sentences (const char    *text,
+                  int            length,
+                  PangoAttrList *attrs,
+                  int            offset,
+                  PangoLogAttr  *log_attrs,
+                  int            log_attrs_len)
+{
+  PangoAttrIterator iter;
+  gboolean tailored = FALSE;
+
+  _pango_attr_list_get_iterator (attrs, &iter);
+
+  do
+    {
+      const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_SENTENCE);
+      int start, end;
+      int start_pos, end_pos;
+      int pos;
+
+      if (!attr)
+        continue;
+
+      start = attr->start_index;
+      end = attr->end_index;
+      if (start < offset)
+        start_pos = 0;
+      else
+        start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+      if (end >= offset + length)
+        end_pos = log_attrs_len;
+      else
+        end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+      for (pos = start_pos + 1; pos < end_pos; pos++)
+        {
+          log_attrs[pos].is_sentence_start = FALSE;
+          log_attrs[pos].is_sentence_end = FALSE;
+          log_attrs[pos].is_sentence_boundary = FALSE;
+
+          tailored = TRUE;
+        }
+      if (start >= offset)
+        {
+          gboolean in_sentence = FALSE;
+          for (pos = start_pos - 1; pos >= 0; pos--)
+            {
+              if (log_attrs[pos].is_sentence_end)
+                break;
+              if (log_attrs[pos].is_sentence_start)
+                {
+                  in_sentence = TRUE;
+                  break;
+                }
+            }
+          log_attrs[start_pos].is_sentence_start = TRUE;
+          log_attrs[start_pos].is_sentence_end = in_sentence;
+          log_attrs[start_pos].is_sentence_boundary = TRUE;
+
+          tailored = TRUE;
+        }
+      if (end < offset + length)
+        {
+          gboolean in_sentence = FALSE;
+          for (pos = end_pos + 1; end_pos < log_attrs_len; pos++)
+            {
+              if (log_attrs[pos].is_sentence_start)
+                break;
+              if (log_attrs[pos].is_sentence_end)
+                {
+                  in_sentence = TRUE;
+                  break;
+                }
+            }
+          log_attrs[end_pos].is_sentence_start = in_sentence;
+          log_attrs[end_pos].is_sentence_end = TRUE;
+          log_attrs[end_pos].is_sentence_boundary = TRUE;
+
+          tailored = TRUE;
+        }
+    }
+  while (pango_attr_iterator_next (&iter));
 
   _pango_attr_iterator_destroy (&iter);
 
-  _pango_attr_list_get_iterator (&hyphens, &iter);
+  return tailored;
+}
+
+static gboolean
+handle_hyphens (const char    *text,
+                int            length,
+                PangoAttrList *attrs,
+                int            offset,
+                PangoLogAttr  *log_attrs,
+                int            log_attrs_len)
+{
+  PangoAttrIterator iter;
+  gboolean tailored = FALSE;
+
+  _pango_attr_list_get_iterator (attrs, &iter);
+
   do {
     const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
 
@@ -1779,17 +2040,72 @@ break_attrs (const char   *text,
         for (pos = start_pos + 1; pos < end_pos; pos++)
           {
             if (!log_attrs[pos].break_removes_preceding)
-              log_attrs[pos].break_inserts_hyphen = FALSE;
+              {
+                log_attrs[pos].break_inserts_hyphen = FALSE;
+
+                tailored = TRUE;
+              }
           }
       }
   } while (pango_attr_iterator_next (&iter));
 
   _pango_attr_iterator_destroy (&iter);
 
-  _pango_attr_list_destroy (&list);
+  return tailored;
+}
+
+static gboolean
+break_attrs (const char   *text,
+             int           length,
+             GSList       *attributes,
+             int           offset,
+             PangoLogAttr *log_attrs,
+             int           log_attrs_len)
+{
+  PangoAttrList allow_breaks;
+  PangoAttrList words;
+  PangoAttrList sentences;
+  PangoAttrList hyphens;
+  GSList *l;
+  gboolean tailored = FALSE;
+
+  _pango_attr_list_init (&allow_breaks);
+  _pango_attr_list_init (&words);
+  _pango_attr_list_init (&sentences);
+  _pango_attr_list_init (&hyphens);
+
+  for (l = attributes; l; l = l->next)
+    {
+      PangoAttribute *attr = l->data;
+
+      if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
+        pango_attr_list_insert (&allow_breaks, pango_attribute_copy (attr));
+      else if (attr->klass->type == PANGO_ATTR_WORD)
+        pango_attr_list_insert (&words, pango_attribute_copy (attr));
+      else if (attr->klass->type == PANGO_ATTR_SENTENCE)
+        pango_attr_list_insert (&sentences, pango_attribute_copy (attr));
+      else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+        pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
+    }
+
+  tailored |= handle_allow_breaks (text, length, &allow_breaks, offset,
+                                   log_attrs, log_attrs_len);
+
+  tailored |= handle_words (text, length, &words, offset,
+                            log_attrs, log_attrs_len);
+
+  tailored |= handle_sentences (text, length, &words, offset,
+                                log_attrs, log_attrs_len);
+
+  tailored |= handle_hyphens (text, length, &hyphens, offset,
+                              log_attrs, log_attrs_len);
+
+  _pango_attr_list_destroy (&allow_breaks);
+  _pango_attr_list_destroy (&words);
+  _pango_attr_list_destroy (&sentences);
   _pango_attr_list_destroy (&hyphens);
 
-  return TRUE;
+  return tailored;
 }
 
 /* }}} */
@@ -2033,6 +2349,6 @@ pango_get_log_attrs (const char    *text,
                attrs_len);
 }
 
- /* }}} */
+/* }}} */
 
 /* vim:set foldmethod=marker expandtab: */
diff --git a/pango/pango-attributes.c b/pango/pango-attributes.c
index 28dc4105..326234d2 100644
--- a/pango/pango-attributes.c
+++ b/pango/pango-attributes.c
@@ -1302,6 +1302,60 @@ pango_attr_show_new (PangoShowFlags flags)
   return pango_attr_int_new (&klass, (int)flags);
 }
 
+/**
+ * pango_attr_word_new:
+ *
+ * Marks the range of the attribute as a single word.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ *   `PangoAttribute`, which should be freed with
+ *   [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_word_new (void)
+{
+  static const PangoAttrClass klass = {
+    PANGO_ATTR_WORD,
+    pango_attr_int_copy,
+    pango_attr_int_destroy,
+    pango_attr_int_equal,
+  };
+
+  return pango_attr_int_new (&klass, 0);
+}
+
+/**
+ * pango_attr_sentence_new:
+ *
+ * Marks the range of the attribute as a single sentence.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ *   `PangoAttribute`, which should be freed with
+ *   [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_sentence_new (void)
+{
+  static const PangoAttrClass klass = {
+    PANGO_ATTR_SENTENCE,
+    pango_attr_int_copy,
+    pango_attr_int_destroy,
+    pango_attr_int_equal,
+  };
+
+  return pango_attr_int_new (&klass, 0);
+}
+
 /**
  * pango_attr_overline_new:
  * @overline: the overline style
@@ -1477,6 +1531,8 @@ pango_attribute_as_int (PangoAttribute *attr)
     case PANGO_ATTR_OVERLINE:
     case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
     case PANGO_ATTR_TEXT_TRANSFORM:
+    case PANGO_ATTR_WORD:
+    case PANGO_ATTR_SENTENCE:
       return (PangoAttrInt *)attr;
 
     default:
diff --git a/pango/pango-attributes.h b/pango/pango-attributes.h
index 86826b62..613aa021 100644
--- a/pango/pango-attributes.h
+++ b/pango/pango-attributes.h
@@ -77,6 +77,8 @@ typedef struct _PangoAttrFontFeatures PangoAttrFontFeatures;
  * @PANGO_ATTR_OVERLINE_COLOR: overline color ([struct@Pango.AttrColor]). Since 1.46
  * @PANGO_ATTR_LINE_HEIGHT: line height factor ([struct@Pango.AttrFloat]). Since: 1.50
  * @PANGO_ATTR_ABSOLUTE_LINE_HEIGHT: line height ([struct@Pango.AttrInt]). Since: 1.50
+ * @PANGO_ATTR_WORD: override segmentation to classify the range of the attribute as a single word 
([struct@Pango.AttrInt]). Since 1.50
+ * @PANGO_ATTR_SENTENCE: override segmentation to classify the range of the attribute as a single sentence 
([struct@Pango.AttrInt]). Since 1.50
  *
  * The `PangoAttrType` distinguishes between different types of attributes.
  *
@@ -121,6 +123,8 @@ typedef enum
   PANGO_ATTR_LINE_HEIGHT,       /* PangoAttrFloat */
   PANGO_ATTR_ABSOLUTE_LINE_HEIGHT, /* PangoAttrInt */
   PANGO_ATTR_TEXT_TRANSFORM,    /* PangoAttrInt */
+  PANGO_ATTR_WORD,              /* PangoAttrInt */
+  PANGO_ATTR_SENTENCE,          /* PangoAttrInt */
 } PangoAttrType;
 
 /**
@@ -538,6 +542,12 @@ PANGO_AVAILABLE_IN_1_38
 PangoAttribute *        pango_attr_background_alpha_new         (guint16                      alpha);
 PANGO_AVAILABLE_IN_1_44
 PangoAttribute *        pango_attr_allow_breaks_new             (gboolean                     allow_breaks);
+
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute *        pango_attr_word_new                     (void);
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute *        pango_attr_sentence_new                 (void);
+
 PANGO_AVAILABLE_IN_1_44
 PangoAttribute *        pango_attr_insert_hyphens_new           (gboolean                     
insert_hyphens);
 PANGO_AVAILABLE_IN_1_46
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 85f12f0a..ad56e8f9 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -4323,6 +4323,8 @@ affects_break_or_shape (PangoAttribute *attr,
     {
     /* Affects breaks */
     case PANGO_ATTR_ALLOW_BREAKS:
+    case PANGO_ATTR_WORD:
+    case PANGO_ATTR_SENTENCE:
     /* Affects shaping */
     case PANGO_ATTR_INSERT_HYPHENS:
     case PANGO_ATTR_FONT_FEATURES:
diff --git a/pango/pango-markup.c b/pango/pango-markup.c
index 22064103..a9df8ed0 100644
--- a/pango/pango-markup.c
+++ b/pango/pango-markup.c
@@ -1230,6 +1230,7 @@ span_parse_func     (MarkupData            *md G_GNUC_UNUSED,
   const char *show = NULL;
   const char *line_height = NULL;
   const char *text_transform = NULL;
+  const char *segment = NULL;
 
   g_markup_parse_context_get_position (context,
                                       &line_number, &char_number);
@@ -1297,6 +1298,7 @@ span_parse_func     (MarkupData            *md G_GNUC_UNUSED,
        CHECK_ATTRIBUTE (strikethrough);
        CHECK_ATTRIBUTE (strikethrough_color);
        CHECK_ATTRIBUTE (style);
+       CHECK_ATTRIBUTE (segment);
        break;
       case 't':
         CHECK_ATTRIBUTE (text_transform);
@@ -1712,7 +1714,7 @@ span_parse_func     (MarkupData            *md G_GNUC_UNUSED,
       gboolean b = FALSE;
 
       if (!span_parse_boolean ("allow_breaks", allow_breaks, &b, line_number, error))
-       goto error;
+        goto error;
 
       add_attribute (tag, pango_attr_allow_breaks_new (b));
     }
@@ -1727,6 +1729,25 @@ span_parse_func     (MarkupData            *md G_GNUC_UNUSED,
       add_attribute (tag, pango_attr_insert_hyphens_new (b));
     }
 
+  if (G_UNLIKELY (segment))
+    {
+      if (strcmp (segment, "word") == 0)
+        add_attribute (tag, pango_attr_word_new ());
+      else if (strcmp (segment, "sentence") == 0)
+        add_attribute (tag, pango_attr_sentence_new ());
+      else
+        {
+          g_set_error (error,
+                       G_MARKUP_ERROR,
+                       G_MARKUP_ERROR_INVALID_CONTENT,
+                       _("Value of 'segment' attribute on <span> tag on line %d "
+                         "could not be parsed; should be one of 'word' or "
+                         "'sentence', not '%s'"),
+                       line_number, segment);
+          goto error;
+        }
+    }
+
   return TRUE;
 
  error:
diff --git a/tests/breaks/five.break b/tests/breaks/five.break
new file mode 100644
index 00000000..c9a192dd
--- /dev/null
+++ b/tests/breaks/five.break
@@ -0,0 +1 @@
+<span segment='word'>ab cd</span>
diff --git a/tests/breaks/five.expected b/tests/breaks/five.expected
new file mode 100644
index 00000000..17ce3a58
--- /dev/null
+++ b/tests/breaks/five.expected
@@ -0,0 +1,7 @@
+Text:         ⁦a⁩ ⁦b⁩ [0x2028]  ⁦c⁩ ⁦d⁩  [0x0a] 
+Breaks:     lc c c        Lc c c       c
+Whitespace:      w             w       w
+Sentences:  bs   e        bs   e       b
+Words:      bs                 be      b
+Graphemes:  b  b b        b  b b       b
+Hyphens:       i             i          
diff --git a/tests/breaks/fourteen.break b/tests/breaks/fourteen.break
new file mode 100644
index 00000000..e270276d
--- /dev/null
+++ b/tests/breaks/fourteen.break
@@ -0,0 +1,2 @@
+# item-spanning line break attrs
+<span allow_breaks='n'>ab<span segment='word'>c α</span>βγ</span>
diff --git a/tests/breaks/fourteen.expected b/tests/breaks/fourteen.expected
new file mode 100644
index 00000000..c8a22617
--- /dev/null
+++ b/tests/breaks/fourteen.expected
@@ -0,0 +1,7 @@
+Text:         ⁦a⁩ ⁦b⁩   ⁦c⁩ [ ] ⁦α⁩   ⁦β⁩ ⁦γ⁩  [0x0a] 
+Breaks:     c    l         l     c       c
+Whitespace:          x           w       w
+Sentences:  bs                   e       b
+Words:      bs   bse       bse   be      b
+Graphemes:  b  b b   b   b b   b b       b
+Hyphens:       i i         i   i          
diff --git a/tests/breaks/thirteen.break b/tests/breaks/thirteen.break
new file mode 100644
index 00000000..a1e5cacf
--- /dev/null
+++ b/tests/breaks/thirteen.break
@@ -0,0 +1,2 @@
+# some line breaks remain in words
+a <span segment='word'>absp  defg</span> b
diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected
new file mode 100644
index 00000000..70742cff
--- /dev/null
+++ b/tests/breaks/thirteen.expected
@@ -0,0 +1,7 @@
+Text:         ⁦a⁩  [ ]  ⁦a⁩ ⁦b⁩ [0x200b]  ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ]  ⁦d⁩ ⁦e⁩ [0xad]  ⁦f⁩ ⁦g⁩  [ ]  ⁦b⁩  
[0x0a] 
+Breaks:     c  c    lc c c        lc c c        c   c   lc c c      lc c c    lc c       c
+Whitespace:    x                                x   x                    x       w       w
+Sentences:  bs                                                                   e       b
+Words:      bs be   bs                                                   be   bs be      b
+Graphemes:  b  b    b  b b        b  b b        b   b   b  b b      b  b b    b  b       b
+Hyphens:               i          i  i                     i        i  i                  
diff --git a/tests/breaks/twelve.break b/tests/breaks/twelve.break
new file mode 100644
index 00000000..a1659be4
--- /dev/null
+++ b/tests/breaks/twelve.break
@@ -0,0 +1,2 @@
+# test line break attributes
+the file <span segment='word'>/path/</span><span segment='word'>to/</span><span 
segment='word'>my/</span>home is cursed.
diff --git a/tests/breaks/twelve.expected b/tests/breaks/twelve.expected
new file mode 100644
index 00000000..63e14e64
--- /dev/null
+++ b/tests/breaks/twelve.expected
@@ -0,0 +1,7 @@
+Text:         ⁦t⁩ ⁦h⁩ ⁦e⁩  [ ]  ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩  [ ]  ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩   ⁦t⁩ ⁦o⁩ ⁦/⁩  ⁦m⁩ ⁦y⁩ ⁦/⁩   
⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩  [ ]  ⁦i⁩ ⁦s⁩  [ ]  ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩  ⁦.⁩ [0x0a] 
+Breaks:     c  c c c    lc c c c c    lc c c c c c lc  c c lc c c lc  c c c c    lc c c    lc c c c c c c  c 
     c
+Whitespace:        x             x                                          x         x                    w 
     w
+Sentences:  bs                                                                                             e 
     b
+Words:      bs     be   bs       be   bs           bse     bs     bse       be   bs   be   bs           be b 
     b
+Graphemes:  b  b b b    b  b b b b    b  b b b b b b   b b b  b b b   b b b b    b  b b    b  b b b b b b  b 
     b
+Hyphens:       i i         i i i           i i i       i      i       i i i         i         i i i i i      
      
diff --git a/tests/test-common.c b/tests/test-common.c
index 011b2eef..0e3719e7 100644
--- a/tests/test-common.c
+++ b/tests/test-common.c
@@ -144,6 +144,8 @@ print_attribute (PangoAttribute *attr, GString *string)
     case PANGO_ATTR_SHOW:
     case PANGO_ATTR_TEXT_TRANSFORM:
     case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
+    case PANGO_ATTR_WORD:
+    case PANGO_ATTR_SENTENCE:
       g_string_append_printf (string, "%d", ((PangoAttrInt *)attr)->value);
       break;
     case PANGO_ATTR_FONT_DESC:
diff --git a/tests/testattributes.c b/tests/testattributes.c
index f950a204..b9dcc1b3 100644
--- a/tests/testattributes.c
+++ b/tests/testattributes.c
@@ -75,6 +75,8 @@ test_attributes_basic (void)
   test_copy (pango_attr_text_transform_new (PANGO_TEXT_TRANSFORM_UPPERCASE));
   test_copy (pango_attr_line_height_new (1.5));
   test_copy (pango_attr_line_height_new_absolute (3000));
+  test_copy (pango_attr_word_new ());
+  test_copy (pango_attr_sentence_new ());
 }
 
 static void
@@ -125,7 +127,7 @@ test_binding (PangoAttribute *attr)
     INVALID, LANGUAGE, STRING, INT, INT, INT, INT, SIZE, FONT_DESC, COLOR,
     COLOR, INT, INT, INT, SHAPE, FLOAT, INT, INT, COLOR, COLOR, SIZE,
     INT, INT, FONT_FEATURES, INT, INT, INT, INT, INT, INT, COLOR, FLOAT,
-    INT, INT, INT, INT
+    INT, INT, INT, INT, INT, INT
   };
 
   switch (attr_base[attr->klass->type])
@@ -207,6 +209,8 @@ test_binding_helpers (void)
   test_binding (pango_attr_text_transform_new (PANGO_TEXT_TRANSFORM_UPPERCASE));
   test_binding (pango_attr_line_height_new (1.5));
   test_binding (pango_attr_line_height_new_absolute (3000));
+  test_binding (pango_attr_word_new ());
+  test_binding (pango_attr_sentence_new ());
 }
 
 static void
@@ -1213,8 +1217,8 @@ test_merge2 (void)
   pango_attr_list_unref (list);
 }
 
-/* This only prints rise, size and scale, which are the
- * only relevant attributes in the test that uses this
+/* This only prints rise, size, scale, allow_breaks and line_break,
+ * which are the only relevant attributes in the tests that use this
  * function.
  */
 static void
@@ -1240,6 +1244,12 @@ print_tags_for_attributes (PangoAttrIterator *iter,
     g_string_append_printf (s, "[%d, %d]scale=%f\n",
                             attr->start_index, attr->end_index,
                             ((PangoAttrFloat*)attr)->value);
+
+  attr = pango_attr_iterator_get (iter, PANGO_ATTR_ALLOW_BREAKS);
+  if (attr)
+    g_string_append_printf (s, "[%d, %d]allow_breaks=%d\n",
+                            attr->start_index, attr->end_index,
+                            ((PangoAttrInt*)attr)->value);
 }
 
 static void
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]