[glib] GDataInputStream: Add _utf8() variants of _read_line



commit 28254a38a7f077d5fc03939ea7c03260aabe5188
Author: Colin Walters <walters verbum org>
Date:   Thu Jun 16 14:00:36 2011 -0400

    GDataInputStream: Add _utf8() variants of _read_line
    
    These will validate the resulting line, and throw a conversion error.
    In practice these will likely be used by bindings, but it's good
    for even C apps too that don't want to explode if that text file
    they're reading into Pango actually has invalid UTF-8.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=652758

 gio/gdatainputstream.c        |   82 +++++++++++++++++++++++++++++++++++++++++
 gio/gdatainputstream.h        |    8 ++++
 gio/gio.symbols               |    2 +
 gio/tests/data-input-stream.c |   74 +++++++++++++++++++++++++++++++++++++
 4 files changed, 166 insertions(+), 0 deletions(-)
---
diff --git a/gio/gdatainputstream.c b/gio/gdatainputstream.c
index d528c13..ff866a6 100644
--- a/gio/gdatainputstream.c
+++ b/gio/gdatainputstream.c
@@ -813,6 +813,49 @@ g_data_input_stream_read_line (GDataInputStream  *stream,
   return line;
 }
 
+/**
+ * g_data_input_stream_read_line_utf8:
+ * @stream: a given #GDataInputStream.
+ * @length: (out): a #gsize to get the length of the data read in.
+ * @cancellable: (allow-none): optional #GCancellable object, %NULL to ignore.
+ * @error: #GError for error reporting.
+ *
+ * Reads a UTF-8 encoded line from the data input stream.
+ *
+ * If @cancellable is not %NULL, then the operation can be cancelled by
+ * triggering the cancellable object from another thread. If the operation
+ * was cancelled, the error %G_IO_ERROR_CANCELLED will be returned.
+ *
+ * Returns: (transfer full): a NUL terminated UTF-8 string with the
+ *  line that was read in (without the newlines).  Set @length to a
+ *  #gsize to get the length of the read line.  On an error, it will
+ *  return %NULL and @error will be set.  For UTF-8 conversion errors,
+ *  the set error domain is %G_CONVERT_ERROR.  If there's no content to
+ *  read, it will still return %NULL, but @error won't be set.
+ **/
+char *
+g_data_input_stream_read_line_utf8 (GDataInputStream  *stream,
+				    gsize             *length,
+				    GCancellable      *cancellable,
+				    GError           **error)
+{
+  char *res;
+
+  res = g_data_input_stream_read_line (stream, length, cancellable, error);
+  if (!res)
+    return NULL;
+  
+  if (!g_utf8_validate (res, -1, NULL))
+    {
+      g_set_error_literal (error, G_CONVERT_ERROR,
+			   G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+			   _("Invalid byte sequence in conversion input"));
+      g_free (res);
+      return NULL;
+    }
+  return res;
+}
+
 static gssize
 scan_for_chars (GDataInputStream *stream,
 		gsize            *checked_out,
@@ -1212,6 +1255,45 @@ g_data_input_stream_read_line_finish (GDataInputStream  *stream,
 }
 
 /**
+ * g_data_input_stream_read_line_finish_utf8:
+ * @stream: a given #GDataInputStream.
+ * @result: the #GAsyncResult that was provided to the callback.
+ * @length: (out): a #gsize to get the length of the data read in.
+ * @error: #GError for error reporting.
+ *
+ * Finish an asynchronous call started by
+ * g_data_input_stream_read_line_async().
+ *
+ * Returns: (transfer full): a string with the line that was read in
+ *  (without the newlines).  Set @length to a #gsize to get the length
+ *  of the read line.  On an error, it will return %NULL and @error
+ *  will be set. For UTF-8 conversion errors, the set error domain is
+ *  %G_CONVERT_ERROR.  If there's no content to read, it will still
+ *  return %NULL, but @error won't be set.
+ *
+ * Since: 2.20
+ */
+gchar *
+g_data_input_stream_read_line_finish_utf8 (GDataInputStream  *stream,
+					   GAsyncResult      *result,
+					   gsize             *length,
+					   GError           **error)
+{
+  gchar *res;
+
+  res = g_data_input_stream_read_line_finish (stream, result, length, error);
+  if (!g_utf8_validate (res, -1, NULL))
+    {
+      g_set_error_literal (error, G_CONVERT_ERROR,
+			   G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+			   _("Invalid byte sequence in conversion input"));
+      g_free (res);
+      return NULL;
+    }
+  return res;
+}
+
+/**
  * g_data_input_stream_read_until_finish:
  * @stream: a given #GDataInputStream.
  * @result: the #GAsyncResult that was provided to the callback.
diff --git a/gio/gdatainputstream.h b/gio/gdatainputstream.h
index e5724d5..9a38dff 100644
--- a/gio/gdatainputstream.h
+++ b/gio/gdatainputstream.h
@@ -102,6 +102,10 @@ char *                 g_data_input_stream_read_line            (GDataInputStrea
                                                                  gsize                   *length,
                                                                  GCancellable            *cancellable,
                                                                  GError                 **error);
+char *                 g_data_input_stream_read_line_utf8       (GDataInputStream        *stream,
+								 gsize                   *length,
+								 GCancellable            *cancellable,
+								 GError                 **error);
 void                   g_data_input_stream_read_line_async      (GDataInputStream        *stream,
                                                                  gint                     io_priority,
                                                                  GCancellable            *cancellable,
@@ -111,6 +115,10 @@ char *                 g_data_input_stream_read_line_finish     (GDataInputStrea
                                                                  GAsyncResult            *result,
                                                                  gsize                   *length,
                                                                  GError                 **error);
+char *                 g_data_input_stream_read_line_finish_utf8(GDataInputStream        *stream,
+                                                                 GAsyncResult            *result,
+                                                                 gsize                   *length,
+                                                                 GError                 **error);
 char *                 g_data_input_stream_read_until           (GDataInputStream        *stream,
                                                                  const gchar             *stop_chars,
                                                                  gsize                   *length,
diff --git a/gio/gio.symbols b/gio/gio.symbols
index 942cf4c..726f67c 100644
--- a/gio/gio.symbols
+++ b/gio/gio.symbols
@@ -180,8 +180,10 @@ g_data_input_stream_read_uint32
 g_data_input_stream_read_int64
 g_data_input_stream_read_uint64
 g_data_input_stream_read_line
+g_data_input_stream_read_line_utf8
 g_data_input_stream_read_line_async
 g_data_input_stream_read_line_finish
+g_data_input_stream_read_line_finish_utf8
 g_data_input_stream_read_until
 g_data_input_stream_read_until_async
 g_data_input_stream_read_until_finish
diff --git a/gio/tests/data-input-stream.c b/gio/tests/data-input-stream.c
index 1a34205..3d01a73 100644
--- a/gio/tests/data-input-stream.c
+++ b/gio/tests/data-input-stream.c
@@ -151,6 +151,78 @@ test_read_lines_any (void)
 }
 
 static void
+test_read_lines_LF_valid_utf8 (void)
+{
+  GInputStream *stream;
+  GInputStream *base_stream;
+  GError *error = NULL;
+  char *line;
+  guint n_lines = 0;
+	
+  base_stream = g_memory_input_stream_new ();
+  stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream));
+	
+  g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream),
+				  "foo\nthis is valid UTF-8 â!\nbar\n", -1, NULL);
+
+  /*  Test read line */
+  error = NULL;
+  while (TRUE)
+    {
+      gsize length = -1;
+      line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error);
+      g_assert_no_error (error);
+      if (line == NULL)
+	break;
+      n_lines++;
+      g_free (line);
+    }
+  g_assert_cmpint (n_lines, ==, 3);
+  
+  g_object_unref (base_stream);
+  g_object_unref (stream);
+}
+
+static void
+test_read_lines_LF_invalid_utf8 (void)
+{
+  GInputStream *stream;
+  GInputStream *base_stream;
+  GError *error = NULL;
+  char *line;
+  guint n_lines = 0;
+	
+  base_stream = g_memory_input_stream_new ();
+  stream = G_INPUT_STREAM (g_data_input_stream_new (base_stream));
+	
+  g_memory_input_stream_add_data (G_MEMORY_INPUT_STREAM (base_stream),
+				  "foo\nthis is not valid UTF-8 \xE5 =(\nbar\n", -1, NULL);
+
+  /*  Test read line */
+  error = NULL;
+  while (TRUE)
+    {
+      gsize length = -1;
+      line = g_data_input_stream_read_line_utf8 (G_DATA_INPUT_STREAM (stream), &length, NULL, &error);
+      if (n_lines == 0)
+	g_assert_no_error (error);
+      else
+	{
+	  g_assert (error != NULL);
+	  g_clear_error (&error);
+	  g_free (line);
+	  break;
+	}
+      n_lines++;
+      g_free (line);
+    }
+  g_assert_cmpint (n_lines, ==, 1);
+  
+  g_object_unref (base_stream);
+  g_object_unref (stream);
+}
+
+static void
 test_read_until (void)
 {
   GInputStream *stream;
@@ -417,6 +489,8 @@ main (int   argc,
 
   g_test_add_func ("/data-input-stream/basic", test_basic);
   g_test_add_func ("/data-input-stream/read-lines-LF", test_read_lines_LF);
+  g_test_add_func ("/data-input-stream/read-lines-LF-valid-utf8", test_read_lines_LF_valid_utf8);
+  g_test_add_func ("/data-input-stream/read-lines-LF-invalid-utf8", test_read_lines_LF_invalid_utf8);
   g_test_add_func ("/data-input-stream/read-lines-CR", test_read_lines_CR);
   g_test_add_func ("/data-input-stream/read-lines-CR-LF", test_read_lines_CR_LF);
   g_test_add_func ("/data-input-stream/read-lines-any", test_read_lines_any);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]