[glib] gsubprocess: Add UTF-8 variants of communicate()



commit 9318d5a4292544f2f7f8f9bc2d805974b3b52c7e
Author: Colin Walters <walters verbum org>
Date:   Tue Oct 15 15:04:54 2013 +0100

    gsubprocess: Add UTF-8 variants of communicate()
    
    Over many years of writing code interacting with subprocesses, a pattern
    that comes up a lot is to run a child and get its output as UTF-8, to
    put inside a JSON document or render in a GtkTextBuffer, etc.
    
    It's very important to validate at the boundaries, and not say deep
    inside Pango.
    
    We could do this a bit more efficiently if done in a streaming fashion,
    but realistically this should be OK for now.

 gio/gsubprocess.c       |  181 +++++++++++++++++++++++++++++++++++++++++++++--
 gio/gsubprocess.h       |   21 ++++++
 gio/tests/gsubprocess.c |  105 ++++++++++++++++++++++++++--
 3 files changed, 296 insertions(+), 11 deletions(-)
---
diff --git a/gio/gsubprocess.c b/gio/gsubprocess.c
index f894203..2e517f9 100644
--- a/gio/gsubprocess.c
+++ b/gio/gsubprocess.c
@@ -1189,6 +1189,8 @@ typedef struct
   gsize stdin_length;
   gsize stdin_offset;
 
+  gboolean add_nul;
+
   GInputStream *stdin_buf;
   GMemoryOutputStream *stdout_buf;
   GMemoryOutputStream *stderr_buf;
@@ -1224,7 +1226,25 @@ g_subprocess_communicate_made_progress (GObject      *source_object,
       source == state->stdout_buf ||
       source == state->stderr_buf)
     {
-      (void) g_output_stream_splice_finish ((GOutputStream*)source, result, &error);
+      if (!g_output_stream_splice_finish ((GOutputStream*)source, result, &error))
+        goto out;
+
+      if (source == state->stdout_buf ||
+          source == state->stderr_buf)
+        {
+          /* This is a memory stream, so it can't be cancelled or return
+           * an error really.
+           */
+          if (state->add_nul)
+            {
+              gsize bytes_written;
+              if (!g_output_stream_write_all (source, "\0", 1, &bytes_written,
+                                              NULL, &error))
+                goto out;
+            }
+          if (!g_output_stream_close (source, NULL, &error))
+            goto out;
+        }
     }
   else if (source == subprocess)
     {
@@ -1233,6 +1253,7 @@ g_subprocess_communicate_made_progress (GObject      *source_object,
   else
     g_assert_not_reached ();
 
+ out:
   if (error)
     {
       /* Only report the first error we see.
@@ -1286,6 +1307,7 @@ g_subprocess_communicate_state_free (gpointer data)
 
 static CommunicateState *
 g_subprocess_communicate_internal (GSubprocess         *subprocess,
+                                   gboolean             add_nul,
                                    GBytes              *stdin_buf,
                                    GCancellable        *cancellable,
                                    GAsyncReadyCallback  callback,
@@ -1299,6 +1321,7 @@ g_subprocess_communicate_internal (GSubprocess         *subprocess,
   g_task_set_task_data (task, state, g_subprocess_communicate_state_free);
 
   state->cancellable = g_cancellable_new ();
+  state->add_nul = add_nul;
 
   if (cancellable)
     {
@@ -1323,7 +1346,7 @@ g_subprocess_communicate_internal (GSubprocess         *subprocess,
     {
       state->stdout_buf = (GMemoryOutputStream*)g_memory_output_stream_new_resizable ();
       g_output_stream_splice_async ((GOutputStream*)state->stdout_buf, subprocess->stdout_pipe,
-                                    G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE | 
G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
+                                    G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
                                     G_PRIORITY_DEFAULT, state->cancellable,
                                     g_subprocess_communicate_made_progress, g_object_ref (task));
       state->outstanding_ops++;
@@ -1333,7 +1356,7 @@ g_subprocess_communicate_internal (GSubprocess         *subprocess,
     {
       state->stderr_buf = (GMemoryOutputStream*)g_memory_output_stream_new_resizable ();
       g_output_stream_splice_async ((GOutputStream*)state->stderr_buf, subprocess->stderr_pipe,
-                                    G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE | 
G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
+                                    G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
                                     G_PRIORITY_DEFAULT, state->cancellable,
                                     g_subprocess_communicate_made_progress, g_object_ref (task));
       state->outstanding_ops++;
@@ -1418,7 +1441,8 @@ g_subprocess_communicate (GSubprocess   *subprocess,
   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
 
   g_subprocess_sync_setup ();
-  g_subprocess_communicate_internal (subprocess, stdin_buf, cancellable, g_subprocess_sync_done, &result);
+  g_subprocess_communicate_internal (subprocess, FALSE, stdin_buf, cancellable,
+                                     g_subprocess_sync_done, &result);
   g_subprocess_sync_complete (&result);
   success = g_subprocess_communicate_finish (subprocess, result, stdout_buf, stderr_buf, error);
   g_object_unref (result);
@@ -1448,7 +1472,7 @@ g_subprocess_communicate_async (GSubprocess         *subprocess,
   g_return_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE));
   g_return_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable));
 
-  g_subprocess_communicate_internal (subprocess, stdin_buf, cancellable, callback, user_data);
+  g_subprocess_communicate_internal (subprocess, FALSE, stdin_buf, cancellable, callback, user_data);
 }
 
 /**
@@ -1491,3 +1515,150 @@ g_subprocess_communicate_finish (GSubprocess   *subprocess,
   g_object_unref (result);
   return success;
 }
+
+/**
+ * g_subprocess_communicate_utf8:
+ * @self: a #GSubprocess
+ * @stdin_buf: data to send to the stdin of the subprocess, or %NULL
+ * @cancellable: a #GCancellable
+ * @stdout_buf: (out): data read from the subprocess stdout
+ * @stderr_buf: (out): data read from the subprocess stderr
+ * @error: a pointer to a %NULL #GError pointer, or %NULL
+ *
+ * Like g_subprocess_communicate(), but validates the output of the
+ * process as UTF-8, and returns it as a regular NUL terminated string.
+ */
+gboolean
+g_subprocess_communicate_utf8 (GSubprocess          *subprocess,
+                               const char           *stdin_buf,
+                               GCancellable         *cancellable,
+                               char                **stdout_buf,
+                               char                **stderr_buf,
+                               GError              **error)
+{
+  GAsyncResult *result = NULL;
+  gboolean success;
+  GBytes *stdin_bytes;
+
+  g_return_val_if_fail (G_IS_SUBPROCESS (subprocess), FALSE);
+  g_return_val_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE), FALSE);
+  g_return_val_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable), FALSE);
+  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+  stdin_bytes = g_bytes_new (stdin_buf, strlen (stdin_buf));
+
+  g_subprocess_sync_setup ();
+  g_subprocess_communicate_internal (subprocess, TRUE, stdin_bytes, cancellable,
+                                     g_subprocess_sync_done, &result);
+  g_subprocess_sync_complete (&result);
+  success = g_subprocess_communicate_utf8_finish (subprocess, result, stdout_buf, stderr_buf, error);
+  g_object_unref (result);
+
+  g_bytes_unref (stdin_bytes);
+  return success;
+}
+
+/**
+ * g_subprocess_communicate_utf8_async:
+ * @subprocess: Self
+ * @stdin_buf: Input data
+ * @cancellable: Cancellable
+ * @callback: Callback
+ * @user_data: User data
+ *
+ * Asynchronous version of g_subprocess_communicate_utf().  Complete
+ * invocation with g_subprocess_communicate_utf8_finish().
+ */
+void
+g_subprocess_communicate_utf8_async (GSubprocess          *subprocess,
+                                     const char           *stdin_buf,
+                                     GCancellable         *cancellable,
+                                     GAsyncReadyCallback   callback,
+                                     gpointer              user_data)
+{
+  GBytes *stdin_bytes;
+
+  g_return_if_fail (G_IS_SUBPROCESS (subprocess));
+  g_return_if_fail (stdin_buf == NULL || (subprocess->flags & G_SUBPROCESS_FLAGS_STDIN_PIPE));
+  g_return_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable));
+
+  stdin_bytes = g_bytes_new (stdin_buf, strlen (stdin_buf));
+  g_subprocess_communicate_internal (subprocess, TRUE, stdin_bytes, cancellable, callback, user_data);
+  g_bytes_unref (stdin_bytes);
+}
+
+static gboolean
+communicate_result_validate_utf8 (const char            *stream_name,
+                                  char                 **return_location,
+                                  GMemoryOutputStream   *buffer,
+                                  GError               **error)
+{
+  if (return_location == NULL)
+    return TRUE;
+
+  if (buffer)
+    {
+      const char *end;
+      *return_location = g_memory_output_stream_steal_data (buffer);
+      if (!g_utf8_validate (*return_location, -1, &end))
+        {
+          g_free (*return_location);
+          g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                       "Invalid UTF-8 in child %s at offset %lu",
+                       stream_name,
+                       (unsigned long) (end - *return_location));
+          return FALSE;
+        }
+    }
+  else
+    *return_location = NULL;
+
+  return TRUE;
+}
+
+/**
+ * g_subprocess_communicate_utf8_finish:
+ * @subprocess: Self
+ * @result: Result
+ * @stdout_buf: (out): Return location for stdout data
+ * @stderr_buf: (out): Return location for stderr data
+ * @error: Error
+ *
+ * Complete an invocation of g_subprocess_communicate_utf8_async().
+ */
+gboolean
+g_subprocess_communicate_utf8_finish (GSubprocess          *subprocess,
+                                      GAsyncResult         *result,
+                                      char                **stdout_buf,
+                                      char                **stderr_buf,
+                                      GError              **error)
+{
+  gboolean ret = FALSE;
+  CommunicateState *state;
+
+  g_return_val_if_fail (G_IS_SUBPROCESS (subprocess), FALSE);
+  g_return_val_if_fail (g_task_is_valid (result, subprocess), FALSE);
+  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+  g_object_ref (result);
+
+  state = g_task_get_task_data ((GTask*)result);
+  if (!g_task_propagate_boolean ((GTask*)result, error))
+    goto out;
+
+  /* TODO - validate UTF-8 while streaming, rather than all at once.
+   */
+  if (!communicate_result_validate_utf8 ("stdout", stdout_buf,
+                                         state->stdout_buf,
+                                         error))
+    goto out;
+  if (!communicate_result_validate_utf8 ("stderr", stderr_buf,
+                                         state->stderr_buf,
+                                         error))
+    goto out;
+
+  ret = TRUE;
+ out:
+  g_object_unref (result);
+  return ret;
+}
diff --git a/gio/gsubprocess.h b/gio/gsubprocess.h
index febdeb8..71017ed 100644
--- a/gio/gsubprocess.h
+++ b/gio/gsubprocess.h
@@ -143,6 +143,27 @@ gboolean        g_subprocess_communicate_finish         (GSubprocess          *s
                                                          GBytes              **stderr_buf,
                                                          GError              **error);
 
+GLIB_AVAILABLE_IN_2_40
+gboolean         g_subprocess_communicate_utf8          (GSubprocess          *subprocess,
+                                                         const char           *stdin_buf,
+                                                         GCancellable         *cancellable,
+                                                         char                **stdout_buf,
+                                                         char                **stderr_buf,
+                                                         GError              **error);
+GLIB_AVAILABLE_IN_2_40
+void            g_subprocess_communicate_utf8_async     (GSubprocess          *subprocess,
+                                                         const char           *stdin_buf,
+                                                         GCancellable         *cancellable,
+                                                         GAsyncReadyCallback   callback,
+                                                         gpointer              user_data);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean        g_subprocess_communicate_utf8_finish    (GSubprocess          *subprocess,
+                                                         GAsyncResult         *result,
+                                                         char                **stdout_buf,
+                                                         char                **stderr_buf,
+                                                         GError              **error);
+
 G_END_DECLS
 
 #endif /* __G_SUBPROCESS_H__ */
diff --git a/gio/tests/gsubprocess.c b/gio/tests/gsubprocess.c
index 71b018d..716ce06 100644
--- a/gio/tests/gsubprocess.c
+++ b/gio/tests/gsubprocess.c
@@ -546,6 +546,8 @@ test_multi_1 (void)
 }
 
 typedef struct {
+  gboolean is_utf8;
+  gboolean is_invalid_utf8;
   gboolean running;
   GError *error;
 } TestAsyncCommunicateData;
@@ -556,21 +558,41 @@ on_communicate_complete (GObject               *proc,
                          gpointer               user_data)
 {
   TestAsyncCommunicateData *data = user_data;
-  GBytes *stdout;
+  GBytes *stdout = NULL;
+  char *stdout_str = NULL;
   const guint8 *stdout_data;
   gsize stdout_len;
 
   data->running = FALSE;
-  (void) g_subprocess_communicate_finish ((GSubprocess*)proc, result,
-                                          &stdout, NULL, &data->error);
+  if (data->is_utf8)
+    (void) g_subprocess_communicate_utf8_finish ((GSubprocess*)proc, result,
+                                                 &stdout_str, NULL, &data->error);
+  else
+    (void) g_subprocess_communicate_finish ((GSubprocess*)proc, result,
+                                            &stdout, NULL, &data->error);
+  if (data->is_invalid_utf8)
+    {
+      g_assert_error (data->error, G_IO_ERROR, G_IO_ERROR_FAILED);
+      return;
+    }
 
   g_assert_no_error (data->error);
 
-  stdout_data = g_bytes_get_data (stdout, &stdout_len);
+  if (!data->is_utf8)
+    {
+      stdout_data = g_bytes_get_data (stdout, &stdout_len);
+    }
+  else
+    {
+      stdout_data = (guint8*)stdout_str;
+      stdout_len = strlen (stdout_str);
+    }
 
   g_assert_cmpint (stdout_len, ==, 11);
   g_assert (memcmp (stdout_data, "hello world", 11) == 0);
-  g_bytes_unref (stdout);
+  if (stdout)
+    g_bytes_unref (stdout);
+  g_free (stdout_str);
 }
 
 static void
@@ -583,6 +605,7 @@ test_communicate (void)
   GSubprocess *proc;
   GCancellable *cancellable = NULL;
   GBytes *input;
+  const char *hellostring;
 
   args = get_test_subprocess_args ("cat", NULL);
   proc = g_subprocess_newv ((const gchar* const*)args->pdata,
@@ -591,7 +614,8 @@ test_communicate (void)
   g_assert_no_error (local_error);
   g_ptr_array_free (args, TRUE);
 
-  input = g_bytes_new_static ("hello world", strlen ("hello world"));
+  hellostring = "hello world";
+  input = g_bytes_new_static (hellostring, strlen (hellostring));
 
   data.error = local_error;
   g_subprocess_communicate_async (proc, input,
@@ -608,6 +632,73 @@ test_communicate (void)
   g_object_unref (proc);
 }
 
+static void
+test_communicate_utf8 (void)
+{
+  GError *local_error = NULL;
+  GError **error = &local_error;
+  GPtrArray *args;
+  TestAsyncCommunicateData data = { 0, };
+  GSubprocess *proc;
+  GCancellable *cancellable = NULL;
+
+  args = get_test_subprocess_args ("cat", NULL);
+  proc = g_subprocess_newv ((const gchar* const*)args->pdata,
+                            G_SUBPROCESS_FLAGS_STDIN_PIPE | G_SUBPROCESS_FLAGS_STDOUT_PIPE,
+                            error);
+  g_assert_no_error (local_error);
+  g_ptr_array_free (args, TRUE);
+
+  data.error = local_error;
+  data.is_utf8 = TRUE;
+  g_subprocess_communicate_utf8_async (proc, "hello world",
+                                       cancellable,
+                                       on_communicate_complete, 
+                                       &data);
+  
+  data.running = TRUE;
+  while (data.running)
+    g_main_context_iteration (NULL, TRUE);
+
+  g_assert_no_error (local_error);
+
+  g_object_unref (proc);
+}
+
+static void
+test_communicate_utf8_invalid (void)
+{
+  GError *local_error = NULL;
+  GError **error = &local_error;
+  GPtrArray *args;
+  TestAsyncCommunicateData data = { 0, };
+  GSubprocess *proc;
+  GCancellable *cancellable = NULL;
+
+  args = get_test_subprocess_args ("cat", NULL);
+  proc = g_subprocess_newv ((const gchar* const*)args->pdata,
+                            G_SUBPROCESS_FLAGS_STDIN_PIPE | G_SUBPROCESS_FLAGS_STDOUT_PIPE,
+                            error);
+  g_assert_no_error (local_error);
+  g_ptr_array_free (args, TRUE);
+
+  data.error = local_error;
+  data.is_utf8 = TRUE;
+  data.is_invalid_utf8 = TRUE;
+  g_subprocess_communicate_utf8_async (proc, "\xFF\xFF",
+                                       cancellable,
+                                       on_communicate_complete, 
+                                       &data);
+  
+  data.running = TRUE;
+  while (data.running)
+    g_main_context_iteration (NULL, TRUE);
+
+  g_assert_no_error (local_error);
+
+  g_object_unref (proc);
+}
+
 static gboolean
 send_terminate (gpointer   user_data)
 {
@@ -905,6 +996,8 @@ main (int argc, char **argv)
   g_test_add_func ("/gsubprocess/cat-eof", test_cat_eof);
   g_test_add_func ("/gsubprocess/multi1", test_multi_1);
   g_test_add_func ("/gsubprocess/communicate", test_communicate);
+  g_test_add_func ("/gsubprocess/communicate-utf8", test_communicate_utf8);
+  g_test_add_func ("/gsubprocess/communicate-utf8-invalid", test_communicate_utf8_invalid);
   g_test_add_func ("/gsubprocess/terminate", test_terminate);
 #ifdef G_OS_UNIX
   g_test_add_func ("/gsubprocess/stdout-file", test_stdout_file);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]