[pango/log-attr-tweaks] break: validation tweaks



commit 87561357f51d72af818ff960ca890822540b9860
Author: Matthias Clasen <mclasen redhat com>
Date:   Tue Aug 24 00:44:23 2021 -0400

    break: validation tweaks

 pango/break.c | 101 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 48 insertions(+), 53 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index dac60cb3..8780392e 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1727,6 +1727,7 @@ typedef gboolean (* CharForeachFunc) (int                  pos,
                                       const PangoLogAttr  *attr,
                                       const PangoLogAttr  *prev_attr,
                                       const PangoLogAttr  *next_attr,
+                                      gboolean            *after_zws,
                                       GError             **error);
 
 static gboolean
@@ -1744,6 +1745,7 @@ log_attr_foreach (const char          *text,
   gunichar next_wc;
   GUnicodeType prev_type;
   GUnicodeType next_type;
+  gboolean after_zws;
 
   if (next == end)
     goto done;
@@ -1754,6 +1756,8 @@ log_attr_foreach (const char          *text,
   next_wc = g_utf8_get_char (next);
   next_type = g_unichar_type (next_wc);
 
+  after_zws = FALSE;
+
   while (next_wc != 0)
     {
       GUnicodeType type;
@@ -1777,7 +1781,8 @@ log_attr_foreach (const char          *text,
                  type, prev_type, next_type,
                  &attrs[i],
                  i != 0 ? &attrs[i - 1] : NULL,
-                 next_wc != 0 ? &attrs[i + 1] : NULL,
+                 &attrs[i + 1],
+                 &after_zws,
                  error))
         return FALSE;
 
@@ -1801,52 +1806,70 @@ check_line_char (int                  pos,
                  const PangoLogAttr  *attr,
                  const PangoLogAttr  *prev_attr,
                  const PangoLogAttr  *next_attr,
+                 gboolean            *after_zws,
                  GError             **error)
 {
   GUnicodeBreakType break_type;
   GUnicodeBreakType prev_break_type;
 
   break_type = g_unichar_break_type (wc);
+
   if (prev_wc)
     prev_break_type = g_unichar_break_type (prev_wc);
   else
     prev_break_type = G_UNICODE_BREAK_UNKNOWN;
 
+  if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
+      (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws))
+    *after_zws = TRUE;
+  else
+    *after_zws = FALSE;
+
   if (wc == '\n' && prev_wc == '\r')
     {
       if (attr->is_line_break)
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Do not break between \\r and \\n", pos);
+                       "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos);
           return FALSE;
         }
     }
 
-  if (prev_wc == 0)
+  if (prev_wc == 0 && wc != 0)
     {
       if (attr->is_line_break)
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Do not break before first char (LB2)", pos);
+                       "char %#x %d: Do not break before first char (LB2)", wc, pos);
           return FALSE;
         }
     }
 
   if (next_wc == 0)
     {
-      if (!attr->is_line_break)
+      if (!next_attr->is_line_break)
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Always break after the last char (LB3)", pos);
+                       "char %#x %d: Always break after the last char (LB3)", wc, pos);
           return FALSE;
         }
     }
 
-  if (prev_break_type == G_UNICODE_BREAK_MANDATORY ||
-      prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+  if (prev_break_type == G_UNICODE_BREAK_MANDATORY)
+    {
+      if (!attr->is_mandatory_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Always break after hard line breaks (LB4)", wc, pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
       prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
       prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
     {
@@ -1854,7 +1877,7 @@ check_line_char (int                  pos,
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Always break after hard line breaks (LB4, LB5)", pos);
+                       "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos);
           return FALSE;
         }
     }
@@ -1868,7 +1891,7 @@ check_line_char (int                  pos,
             {
               g_set_error (error,
                            PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                           "char %d: Do not break before hard line beaks (LB6)", pos);
+                           "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos);
               return FALSE;
             }
     }
@@ -1882,12 +1905,21 @@ check_line_char (int                  pos,
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Can't break before a space unless mandatory precedes or combining mark 
follows (LB7)", pos);
+                       "char %#x %d: Can't break before a space unless mandatory precedes or combining mark 
follows (LB7)", wc, pos);
           return FALSE;
         }
     }
 
-  /* TODO: check LB8 */
+  if (*after_zws)
+    {
+      if (!attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, 
pos);
+          return FALSE;
+        }
+    }
 
   if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
     {
@@ -1895,7 +1927,7 @@ check_line_char (int                  pos,
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Do not break after ZWJ (LB8a)", pos);
+                       "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos);
           return FALSE;
         }
     }
@@ -1909,7 +1941,7 @@ check_line_char (int                  pos,
         {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Do not break before or after WJ (LB11)", pos);
+                       "char %#x %d: Do not break before or after WJ (LB11)", wc, pos);
           return FALSE;
         }
     }
@@ -1918,54 +1950,17 @@ check_line_char (int                  pos,
     {
           g_set_error (error,
                        PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                       "char %d: Do not break after GL (LB12)", pos);
+                       "char %#x %d: Do not break after GL (LB12)", wc, pos);
           return FALSE;
     }
 
-  if (attr->is_line_break && !attr->is_char_break)
-    {
-      g_set_error (error,
-                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                   "char %d: Line breaks must also be marked as char breaks", pos);
-      return FALSE;
-    }
-  if (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
-      prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
-      attr->is_line_break && !attr->is_mandatory_break)
-    {
-      g_set_error (error,
-                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                   "char %d: Can't break between two open punctuation chars", pos);
-      return FALSE;
-    }
-
-  if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
-      prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
-      attr->is_line_break && !attr->is_mandatory_break)
-    {
-      g_set_error (error,
-                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                   "char %d: Can't break between two close punctuation chars", pos);
-      return FALSE;
-    }
-
-  if (break_type == G_UNICODE_BREAK_QUOTATION &&
-      prev_break_type == G_UNICODE_BREAK_ALPHABETIC &&
-      attr->is_line_break && !attr->is_mandatory_break)
-    {
-      g_set_error (error,
-                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                   "char %d: Can't break a letter-quotemark sequence", pos);
-      return FALSE;
-    }
-
   /* internal consistency */
 
   if (attr->is_mandatory_break && !attr->is_line_break)
     {
       g_set_error (error,
                    PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
-                   "char %d: Mandatory breaks must also be marked as regular breaks", pos);
+                   "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos);
       return FALSE;
     }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]