[pango/performance-test: 2/3] Factor out UTF8 validation




commit 9f3d94cdd34de1fdfc85a589adf11a1a17929830
Author: Matthias Clasen <mclasen redhat com>
Date:   Wed May 18 14:06:42 2022 -0400

    Factor out UTF8 validation
    
    This is in preparation for optimizing this
    function a bit.

 pango/pango-layout.c         | 51 +++++++++-----------------------------------
 pango/pango-utils-internal.h |  5 +++++
 pango/pango-utils.c          | 46 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 41 deletions(-)
---
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 2b03ee2c..de43dbc7 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -83,6 +83,7 @@
 #include <string.h>
 #include <math.h>
 #include <locale.h>
+#include "pango-utils-internal.h"
 
 #include <hb-ot.h>
 
@@ -1230,7 +1231,7 @@ pango_layout_set_text (PangoLayout *layout,
                        const char  *text,
                        int          length)
 {
-  char *old_text, *start, *end;
+  char *old_text;
 
   g_return_if_fail (layout != NULL);
   g_return_if_fail (length == 0 || text != NULL);
@@ -1238,50 +1239,18 @@ pango_layout_set_text (PangoLayout *layout,
   old_text = layout->text;
 
   if (length < 0)
-    {
-      layout->length = strlen (text);
-      layout->text = g_strndup (text, layout->length);
-    }
+    layout->text = g_strdup (text);
   else if (length > 0)
-    {
-      /* This is not exactly what we want.  We don't need the padding...
-       */
-      layout->length = length;
-      layout->text = g_strndup (text, length);
-    }
+    layout->text = g_strndup (text, length);
   else
-    {
-      layout->length = 0;
-      layout->text = g_malloc0 (1);
-    }
+    layout->text = g_malloc0 (1);
 
   /* validate it, and replace invalid bytes with -1 */
-  start = layout->text;
-  for (;;) {
-    gboolean valid;
-
-    valid = g_utf8_validate (start, -1, (const char **)&end);
-
-    if (!*end)
-      break;
-
-    /* Replace invalid bytes with -1.  The -1 will be converted to
-     * ((gunichar) -1) by glib, and that in turn yields a glyph value of
-     * ((PangoGlyph) -1) by PANGO_GET_UNKNOWN_GLYPH(-1),
-     * and that's PANGO_GLYPH_INVALID_INPUT.
-     */
-    if (!valid)
-      *end++ = -1;
-
-    start = end;
-  }
-
-  if (start != layout->text)
-    /* TODO: Write out the beginning excerpt of text? */
-    g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()");
-
-  layout->n_chars = pango_utf8_strlen (layout->text, -1);
-  layout->length = strlen (layout->text);
+  if (!pango_utf8_make_valid (layout->text, &layout->length, &layout->n_chars))
+    {
+      /* TODO: Write out the beginning excerpt of text? */
+      g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()");
+    }
 
   g_clear_pointer (&layout->log_attrs, g_free);
   layout_changed (layout);
diff --git a/pango/pango-utils-internal.h b/pango/pango-utils-internal.h
index 0bc355e0..162295c3 100644
--- a/pango/pango-utils-internal.h
+++ b/pango/pango-utils-internal.h
@@ -44,6 +44,11 @@ gboolean pango_parse_flags              (GType       type,
 char    *_pango_trim_string             (const char *str);
 
 
+gboolean pango_utf8_make_valid          (char *str,
+                                         int  *n_bytes,
+                                         int  *n_chars);
+
+
 G_END_DECLS
 
 #endif /* __PANGO_UTILS_H__ */
diff --git a/pango/pango-utils.c b/pango/pango-utils.c
index b942921e..a0ff000e 100644
--- a/pango/pango-utils.c
+++ b/pango/pango-utils.c
@@ -1257,3 +1257,49 @@ pango_find_paragraph_boundary (const char *text,
   if (start && next_paragraph_start)
     *next_paragraph_start = start - text;
 }
+
+
+/*< private >
+ * pango_utf8_make_valid:
+ * @str: the string to convert to valid UTF-8
+ * @n_bytes: return location for byte count
+ * @n_chars: return location for character count
+ *
+ * Validate that @str is valid UTF-8, and make it
+ * so if it isn't.
+ *
+ * Invalid bytes get replaced by -1 (which gets ultimatively
+ * turned into PANGO_GLYPH_INVALID_INPUT).
+ *
+ * Returns: `TRUE` if @str was valid without any modification
+ */
+gboolean
+pango_utf8_make_valid (char *str,
+                       int  *n_bytes,
+                       int  *n_chars)
+{
+  char *start, *end;
+
+  start = str;
+
+  for (;;)
+    {
+      gboolean valid;
+
+      valid = g_utf8_validate (start, -1, (const char **)&end);
+
+      if (!*end)
+        break;
+
+      if (!valid)
+        *end++ = -1;
+
+      start = end;
+    }
+
+  *n_bytes = strlen (str);
+  *n_chars = g_utf8_strlen (str, -1);
+
+  return start == str;
+}
+


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]