[glib] gconvert: Optimize UTF-8 conversions, fix output on error

From: Philip Withnall <pwithnall src gnome org>
To: commits-list gnome org
Cc:
Subject: [glib] gconvert: Optimize UTF-8 conversions, fix output on error
Date: Fri, 19 Jan 2018 12:10:51 +0000 (UTC)

commit 413605a6f33cbfa1b273e36a7a276cf21d6bfd73
Author: Mikhail Zabaluev <mikhail zabaluev gmail com>
Date:   Sat Jan 13 12:40:22 2018 +0200

    gconvert: Optimize UTF-8 conversions, fix output on error
    
    In the strdup_len() path, no need to do what g_utf8_validate()
    already does: locate the string-terminating nul byte.
    
    Also in strdup_len(), make the out parameter bytes_read receive the length
    of the valid (meaning also nul-free) part of the input string, as the
    documentation on g_{locale,filename}_{from,to}_utf8() says it does.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=792516

 glib/gconvert.c |   35 +++++++++++++++++++----------------
 1 files changed, 19 insertions(+), 16 deletions(-)
---
diff --git a/glib/gconvert.c b/glib/gconvert.c
index c142206..083ea17 100644
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -823,20 +823,31 @@ g_convert_with_fallback (const gchar *str,
  * 
  */
 
+/*
+ * Validate @string as UTF-8. @len can be negative if @string is
+ * nul-terminated, or a non-negative value in bytes. If @string ends in an
+ * incomplete sequence, or contains any illegal sequences or nul codepoints,
+ * %NULL will be returned and the error set to
+ * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
+ * On success, @bytes_read and @bytes_written, if provided, will be set to
+ * the number of bytes in @string up to @len or the terminating nul byte.
+ * On error, @bytes_read will be set to the byte offset after the last valid
+ * and non-nul UTF-8 sequence in @string, and @bytes_written will be set to 0.
+ */
 static gchar *
 strdup_len (const gchar *string,
            gssize       len,
-           gsize       *bytes_written,
            gsize       *bytes_read,
-           GError      **error)
-        
+           gsize       *bytes_written,
+           GError     **error)
 {
   gsize real_len;
+  const gchar *end_valid;
 
-  if (!g_utf8_validate (string, len, NULL))
+  if (!g_utf8_validate (string, len, &end_valid))
     {
       if (bytes_read)
-       *bytes_read = 0;
+       *bytes_read = end_valid - string;
       if (bytes_written)
        *bytes_written = 0;
 
@@ -844,17 +855,9 @@ strdup_len (const gchar *string,
                            _("Invalid byte sequence in conversion input"));
       return NULL;
     }
-  
-  if (len < 0)
-    real_len = strlen (string);
-  else
-    {
-      real_len = 0;
-      
-      while (real_len < len && string[real_len])
-       real_len++;
-    }
-  
+
+  real_len = end_valid - string;
+
   if (bytes_read)
     *bytes_read = real_len;
   if (bytes_written)

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]