[pango/log-attr-tweaks: 33/35] Add api to validate log attrs




commit 801e93cfa39a450d28a35ad8a36f84faab19fd36
Author: Matthias Clasen <mclasen redhat com>
Date:   Sat Aug 21 15:41:39 2021 -0400

    Add api to validate log attrs
    
    This is based on code that previously lived in
    tests/validate-test-boundaries.c
    
    Fixes: #129

 pango/break.c              | 492 +++++++++++++++++++++++++++++++++++++++++-
 pango/pango-break.h        |  22 ++
 tests/meson.build          |   2 +-
 tests/test-break.c         |   1 -
 tests/validate-log-attrs.c | 528 ---------------------------------------------
 tests/validate-log-attrs.h |  52 -----
 6 files changed, 514 insertions(+), 583 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 0cb6666d..20756506 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1759,7 +1759,9 @@ remove_breaks_from_range (const char   *text,
          ch == 0x007C;
 
       /* ZWS sequence */
-      if (after_zws && bt != G_UNICODE_BREAK_SPACE)
+      if (after_zws &&
+          bt != G_UNICODE_BREAK_SPACE &&
+          bt != G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
         log_attrs[pos].is_line_break = TRUE;
 
       after_zws = bt == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
@@ -2141,6 +2143,428 @@ tailor_break (const char    *text,
   return res;
 }
 
+/* }}} */
+/* {{{ Validation */
+
+G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error)
+
+typedef gboolean (* CharForeachFunc) (int                  pos,
+                                      gunichar             wc,
+                                      gunichar             prev_wc,
+                                      gunichar             next_wc,
+                                      GUnicodeType         type,
+                                      GUnicodeType         prev_type,
+                                      GUnicodeType         next_type,
+                                      const PangoLogAttr  *attr,
+                                      const PangoLogAttr  *prev_attr,
+                                      const PangoLogAttr  *next_attr,
+                                      gboolean            *after_zws,
+                                      GError             **error);
+
+static gboolean
+log_attr_foreach (const char          *text,
+                  int                  length,
+                  const PangoLogAttr  *attrs,
+                  int                  attrs_len,
+                  CharForeachFunc      func,
+                  GError             **error)
+{
+  const gchar *next = text;
+  const gchar *end = text + length;
+  gint i = 0;
+  gunichar prev_wc;
+  gunichar next_wc;
+  GUnicodeType prev_type;
+  GUnicodeType next_type;
+  gboolean after_zws;
+
+  if (next == end)
+    goto done;
+
+  prev_type = (GUnicodeType) -1;
+  prev_wc = 0;
+
+  next_wc = g_utf8_get_char (next);
+  next_type = g_unichar_type (next_wc);
+
+  after_zws = FALSE;
+
+  while (next_wc != 0)
+    {
+      GUnicodeType type;
+      gunichar wc;
+
+      wc = next_wc;
+      type = next_type;
+
+      next = g_utf8_next_char (next);
+
+      if (next >= end)
+        next_wc = 0;
+      else
+        next_wc = g_utf8_get_char (next);
+
+      if (next_wc)
+        next_type = g_unichar_type (next_wc);
+
+      if (!func (i,
+                 wc, prev_wc, next_wc,
+                 type, prev_type, next_type,
+                 &attrs[i],
+                 i != 0 ? &attrs[i - 1] : NULL,
+                 &attrs[i + 1],
+                 &after_zws,
+                 error))
+        return FALSE;
+
+      prev_type = type;
+      prev_wc = wc;
+      i++;
+    }
+
+done:
+  return TRUE;
+}
+
+static gboolean
+check_line_char (int                  pos,
+                 gunichar             wc,
+                 gunichar             prev_wc,
+                 gunichar             next_wc,
+                 GUnicodeType         type,
+                 GUnicodeType         prev_type,
+                 GUnicodeType         next_type,
+                 const PangoLogAttr  *attr,
+                 const PangoLogAttr  *prev_attr,
+                 const PangoLogAttr  *next_attr,
+                 gboolean            *after_zws,
+                 GError             **error)
+{
+  GUnicodeBreakType break_type;
+  GUnicodeBreakType prev_break_type;
+
+  break_type = g_unichar_break_type (wc);
+
+  if (prev_wc)
+    prev_break_type = g_unichar_break_type (prev_wc);
+  else
+    prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+
+  if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
+      (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws))
+    *after_zws = TRUE;
+  else
+    *after_zws = FALSE;
+
+  if (wc == '\n' && prev_wc == '\r')
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_wc == 0 && wc != 0)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Do not break before first char (LB2)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (next_wc == 0)
+    {
+      if (!next_attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Always break after the last char (LB3)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_MANDATORY)
+    {
+      if (!attr->is_mandatory_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Always break after hard line breaks (LB4)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+      prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+      prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
+    {
+      if (!attr->is_mandatory_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (break_type == G_UNICODE_BREAK_MANDATORY ||
+      break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+      break_type == G_UNICODE_BREAK_LINE_FEED ||
+      break_type == G_UNICODE_BREAK_NEXT_LINE)
+    {
+          if (attr->is_line_break)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                           "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos);
+              return FALSE;
+            }
+    }
+
+  if (break_type == G_UNICODE_BREAK_SPACE ||
+      break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+    {
+      if (attr->is_line_break && prev_attr != NULL &&
+          !attr->is_mandatory_break &&
+          !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK))
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Can't break before a space unless mandatory precedes or combining mark 
follows (LB7)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE &&
+      break_type != G_UNICODE_BREAK_SPACE &&
+      *after_zws)
+    {
+      if (!attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, 
pos);
+          return FALSE;
+        }
+    }
+
+  if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  /* TODO: check LB9 */
+
+  if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
+      break_type == G_UNICODE_BREAK_WORD_JOINER)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Do not break before or after WJ (LB11)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
+    {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Do not break after GL (LB12)", wc, pos);
+          return FALSE;
+    }
+
+  /* internal consistency */
+
+  if (attr->is_mandatory_break && !attr->is_line_break)
+    {
+      g_set_error (error,
+                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                   "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos);
+      return FALSE;
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_line_invariants (const char          *text,
+                       int                  length,
+                       const PangoLogAttr  *attrs,
+                       int                  attrs_len,
+                       GError             **error)
+{
+  return log_attr_foreach (text, length,
+                           attrs, attrs_len,
+                           check_line_char, error);
+}
+
+static gboolean
+check_grapheme_invariants (const char          *text,
+                           int                  length,
+                           const PangoLogAttr  *attrs,
+                           int                  attrs_len,
+                           GError             **error)
+{
+  return TRUE;
+}
+
+static gboolean
+check_word_invariants (const char          *text,
+                       int                  length,
+                       const PangoLogAttr  *attrs,
+                       int                  attrs_len,
+                       GError             **error)
+{
+  enum {
+    AFTER_START,
+    AFTER_END
+  } state = AFTER_END;
+
+  for (int i = 0; i < attrs_len; i++)
+    {
+      /* Check that word starts and ends are alternating */
+      switch (state)
+        {
+        case AFTER_END:
+          if (attrs[i].is_word_start)
+            {
+              if (attrs[i].is_word_end)
+                state = AFTER_END;
+              else
+                state = AFTER_START;
+              break;
+            }
+          if (attrs[i].is_word_end)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+                           "char %d: Unexpected word end", i);
+              return FALSE;
+            }
+          break;
+
+        case AFTER_START:
+          if (attrs[i].is_word_end)
+            {
+              if (attrs[i].is_word_start)
+                state = AFTER_START;
+              else
+                state = AFTER_END;
+              break;
+           }
+          if (attrs[i].is_word_start)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+                           "char %d: Unexpected word start", i);
+              return FALSE;
+            }
+          break;
+
+        default:
+          g_assert_not_reached ();
+        }
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_sentence_invariants (const char          *text,
+                           int                  length,
+                           const PangoLogAttr  *attrs,
+                           int                  attrs_len,
+                           GError             **error)
+{
+  enum {
+    AFTER_START,
+    AFTER_END
+  } state = AFTER_END;
+
+  for (int i = 0; i < attrs_len; i++)
+    {
+      /* Check that word starts and ends are alternating */
+      switch (state)
+        {
+        case AFTER_END:
+          if (attrs[i].is_sentence_start)
+            {
+              if (attrs[i].is_sentence_end)
+                state = AFTER_END;
+              else
+                state = AFTER_START;
+              break;
+            }
+          if (attrs[i].is_sentence_end)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+                           "char %d: Unexpected sentence end", i);
+              return FALSE;
+            }
+          break;
+
+        case AFTER_START:
+          if (attrs[i].is_sentence_end)
+            {
+              if (attrs[i].is_sentence_start)
+                state = AFTER_START;
+              else
+                state = AFTER_END;
+              break;
+            }
+          if (attrs[i].is_sentence_start)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+                           "char %d: Unexpected sentence start", i);
+              return FALSE;
+            }
+          break;
+
+        default:
+          g_assert_not_reached ();
+        }
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_space_invariants (const char          *text,
+                        int                  length,
+                        const PangoLogAttr  *log_attrs,
+                        int                  attrs_len,
+                        GError             **error)
+{
+  for (int i = 0; i < attrs_len; i++)
+    {
+      if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SPACE,
+                       "char %d: Expandable space must be space", i);
+          return FALSE;
+        }
+    }
+
+  return TRUE;
+}
+
 /* }}} */
 /* {{{ Public API */
 
@@ -2377,6 +2801,72 @@ pango_get_log_attrs (const char    *text,
                attrs_len);
 }
 
+/**
+ * pango_validate_log_attrs:
+ * @text: text to which @log_attrs belong
+ * @length: length of @text
+ * @log_attrs: `PangoLogAttr` array to validate
+ * @attrs_len: length of @log_attrs
+ *
+ * Apply sanity checks to @log_attrs.
+ *
+ * This function checks some conditions that Pango
+ * relies on. It is not guaranteed to be an exhaustive
+ * validity test. Currentlty, it checks that
+ *
+ * - There's no break before the first char
+ * - Mandatory breaks are line breaks
+ * - Line breaks are char breaks
+ * - Lines aren't broken between \\r and \\n
+ * - Lines aren't broken before a space (unless the break
+ *   is mandatory, or the space precedes a combining mark)
+ * - Lines aren't broken between two open punctuation
+ *   or between two close punctuation characters
+ * - Lines aren't broken between a letter and a quotation mark
+ * - Word starts and ends alternate
+ * - Sentence starts and ends alternate
+ * - Expandable spaces are spaces
+ *
+ * Returns: %TRUE if @log_attrs are valid
+ *
+ * Since: 1.50
+ */
+gboolean
+pango_validate_log_attrs (const char          *text,
+                          int                  length,
+                          const PangoLogAttr  *log_attrs,
+                          int                  attrs_len,
+                          GError             **error)
+{
+  int n_chars;
+
+  n_chars = g_utf8_strlen (text, length);
+  if (attrs_len != n_chars + 1)
+    {
+      g_set_error_literal (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_FAILED,
+                           "Array has wrong length");
+      return FALSE;
+    }
+
+  if (!check_line_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_grapheme_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_word_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_sentence_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_space_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  return TRUE;
+}
+
 /* }}} */
 
 /* vim:set foldmethod=marker expandtab: */
diff --git a/pango/pango-break.h b/pango/pango-break.h
index 5d791e27..b29d713a 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -137,6 +137,28 @@ void                    pango_attr_break        (const char    *text,
                                                  PangoLogAttr  *attrs,
                                                  int            attrs_len);
 
+#define PANGO_VALIDATE_ERROR (pango_validate_error_quark ())
+
+typedef enum
+{
+  PANGO_VALIDATE_ERROR_FAILED,
+  PANGO_VALIDATE_ERROR_BREAK,
+  PANGO_VALIDATE_ERROR_GRAPHEME,
+  PANGO_VALIDATE_ERROR_WORD,
+  PANGO_VALIDATE_ERROR_SENTENCE,
+  PANGO_VALIDATE_ERROR_SPACE
+} PangoValidateError;
+
+PANGO_AVAILABLE_IN_1_50
+GQuark                 pango_validate_error_quark (void);
+
+PANGO_AVAILABLE_IN_1_50
+gboolean               pango_validate_log_attrs (const char          *text,
+                                                 int                  length,
+                                                 const PangoLogAttr  *log_attrs,
+                                                 int                  attrs_len,
+                                                 GError             **error);
+
 G_END_DECLS
 
 #endif /* __PANGO_BREAK_H__ */
diff --git a/tests/meson.build b/tests/meson.build
index 90bb94ba..4776b2c9 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -52,7 +52,7 @@ if cairo_dep.found()
     [ 'testmisc', [ 'testmisc.c' ], [ libpangocairo_dep, libpangoft2_dep, glib_dep, harfbuzz_dep ] ],
     [ 'cxx-test', [ 'cxx-test.cpp' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ],
     [ 'test-harfbuzz', [ 'test-harfbuzz.c' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ],
-    [ 'test-break', [ 'test-break.c', 'test-common.c', 'validate-log-attrs.c' ], [libpangocairo_dep, 
glib_dep, harfbuzz_dep ] ]
+    [ 'test-break', [ 'test-break.c', 'test-common.c' ], [libpangocairo_dep, glib_dep, harfbuzz_dep ] ]
   ]
 
   if pango_cairo_backends.contains('png')
diff --git a/tests/test-break.c b/tests/test-break.c
index fecca168..672d44e1 100644
--- a/tests/test-break.c
+++ b/tests/test-break.c
@@ -30,7 +30,6 @@
 #include "config.h"
 #include <pango/pangocairo.h>
 #include "test-common.h"
-#include "validate-log-attrs.h"
 
 
 static PangoContext *context;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]