[glib] Add GUtf8InputStream - Bug #603270



commit 568cd48365a8c8ffa6fc25d5282ec92de6e2ff31
Author: Paolo Borelli <pborelli gnome org>
Date:   Sat Nov 28 23:39:48 2009 +0100

    Add GUtf8InputStream - Bug #603270
    
    Add a filter input stream that performs utf8 validation.

 docs/reference/gio/gio-docs.xml     |    1 +
 docs/reference/gio/gio-sections.txt |   18 ++
 docs/reference/gio/gio.types        |    1 +
 gio/Makefile.am                     |    2 +
 gio/gio.h                           |    1 +
 gio/gio.symbols                     |    7 +
 gio/giotypes.h                      |    1 +
 gio/gutf8inputstream.c              |  327 +++++++++++++++++++++++++++++++++++
 gio/gutf8inputstream.h              |   80 +++++++++
 gio/tests/Makefile.am               |    4 +
 gio/tests/utf8-input-stream.c       |  252 +++++++++++++++++++++++++++
 11 files changed, 694 insertions(+), 0 deletions(-)
---
diff --git a/docs/reference/gio/gio-docs.xml b/docs/reference/gio/gio-docs.xml
index 660e1e6..69bfb9a 100644
--- a/docs/reference/gio/gio-docs.xml
+++ b/docs/reference/gio/gio-docs.xml
@@ -66,6 +66,7 @@
         <xi:include href="xml/gunixoutputstream.xml"/>
         <xi:include href="xml/gconverterinputstream.xml"/>
         <xi:include href="xml/gconverteroutputstream.xml"/>
+        <xi:include href="xml/gutf8inputstream.xml"/>
     </chapter>
     <chapter id="types">
         <title>File types and applications</title>
diff --git a/docs/reference/gio/gio-sections.txt b/docs/reference/gio/gio-sections.txt
index a3e2453..0478b51 100644
--- a/docs/reference/gio/gio-sections.txt
+++ b/docs/reference/gio/gio-sections.txt
@@ -713,6 +713,24 @@ GBufferedInputStreamPrivate
 </SECTION>
 
 <SECTION>
+<FILE>gutf8inputstream</FILE>
+<TITLE>GUtf8InputStream</TITLE>
+GUtf8InputStream
+g_utf8_input_stream_new
+<SUBSECTION Standard>
+GUtf8InputStreamClass
+G_UTF8_INPUT_STREAM
+G_IS_UTF8_INPUT_STREAM
+G_TYPE_UTF8_INPUT_STREAM
+G_UTF8_INPUT_STREAM_CLASS
+G_IS_UTF8_INPUT_STREAM_CLASS
+G_UTF8_INPUT_STREAM_GET_CLASS
+<SUBSECTION Private>
+g_utf8_input_stream_get_type
+GUtf8InputStreamPrivate
+</SECTION>
+
+<SECTION>
 <FILE>goutputstream</FILE>
 <TITLE>GOutputStream</TITLE>
 GOutputStreamSpliceFlags
diff --git a/docs/reference/gio/gio.types b/docs/reference/gio/gio.types
index ed02dfd..7eae206 100644
--- a/docs/reference/gio/gio.types
+++ b/docs/reference/gio/gio.types
@@ -96,6 +96,7 @@ g_unix_input_stream_get_type
 g_unix_mount_monitor_get_type
 g_unix_output_stream_get_type
 g_unix_socket_address_get_type
+g_utf8_input_stream_get_type
 g_vfs_get_type
 g_volume_get_type
 g_volume_monitor_get_type
diff --git a/gio/Makefile.am b/gio/Makefile.am
index 9dc2e02..3e7bb3e 100644
--- a/gio/Makefile.am
+++ b/gio/Makefile.am
@@ -264,6 +264,7 @@ libgio_2_0_la_SOURCES =		\
 	gthreadedresolver.h	\
 	gunionvolumemonitor.c 	\
 	gunionvolumemonitor.h 	\
+	gutf8inputstream.c	\
 	gvfs.c 			\
 	gvolume.c 		\
 	gvolumemonitor.c 	\
@@ -391,6 +392,7 @@ gio_headers =			\
 	gtcpconnection.h	\
 	gthreadedsocketservice.h\
 	gthemedicon.h 		\
+	gutf8inputstream.c	\
 	gvfs.h 			\
 	gvolume.h 		\
 	gvolumemonitor.h 	\
diff --git a/gio/gio.h b/gio/gio.h
index 861f172..4fc93f1 100644
--- a/gio/gio.h
+++ b/gio/gio.h
@@ -90,6 +90,7 @@
 #include <gio/gthreadedsocketservice.h>
 #include <gio/gsrvtarget.h>
 #include <gio/gthemedicon.h>
+#include <gio/gutf8inputstream.h>
 #include <gio/gvfs.h>
 #include <gio/gvolume.h>
 #include <gio/gvolumemonitor.h>
diff --git a/gio/gio.symbols b/gio/gio.symbols
index 3925e27..9db3444 100644
--- a/gio/gio.symbols
+++ b/gio/gio.symbols
@@ -1318,6 +1318,13 @@ g_unix_fd_list_steal_fds
 #endif
 #endif
 
+#if IN_HEADER(__G_UTF8_INPUT_STREAM_H__)
+#if IN_FILE(__G_UTF8_INPUT_STREAM_C__)
+g_utf8_input_stream_get_type
+g_utf8_input_stream_new
+#endif
+#endif
+
 #if IN_HEADER(__G_ZLIB_COMPRESSOR_H__)
 #if IN_FILE(__G_ZLIB_COMPRESSOR_C__)
 g_zlib_compressor_get_type
diff --git a/gio/giotypes.h b/gio/giotypes.h
index 1e67b0b..298a03e 100644
--- a/gio/giotypes.h
+++ b/gio/giotypes.h
@@ -43,6 +43,7 @@ typedef struct _GConverter                    GConverter;
 typedef struct _GConverterInputStream         GConverterInputStream;
 typedef struct _GConverterOutputStream        GConverterOutputStream;
 typedef struct _GDataInputStream              GDataInputStream;
+typedef struct _GUtf8InputStream              GUtf8InputStream;
 typedef struct _GZlibCompressor               GZlibCompressor;
 typedef struct _GZlibDecompressor             GZlibDecompressor;
 
diff --git a/gio/gutf8inputstream.c b/gio/gutf8inputstream.c
new file mode 100644
index 0000000..2f0dd13
--- /dev/null
+++ b/gio/gutf8inputstream.c
@@ -0,0 +1,327 @@
+/* GIO - GLib Input, Output and Streaming Library
+ *
+ * Copyright (C) 2009 Paolo Borelli
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Paolo Borelli <pborelli gnome org>
+ */
+
+#include "config.h"
+#include "gutf8inputstream.h"
+#include "ginputstream.h"
+#include "gcancellable.h"
+#include "gioerror.h"
+#include "glibintl.h"
+
+#include "gioalias.h"
+
+/**
+ * SECTION:gutf8inputstream
+ * @short_description: Input Stream performing UTF8 validation
+ * @include: gio/gio.h
+ * @see_also: #GFilterInputStream, #GInputStream
+ *
+ * utf8 input stream implements #GFilterInputStream and provides
+ * UTF8 validation of the data read from a the stream.
+ * If the supplied buffer is long enough (see below), the returned
+ * data is guaranteed to end at utf8 character boundaries.
+ * <note>
+ *   <para>
+ *     Extra care must be taken when performing "small" reads:
+ *     unless you have control of the data being read, you need
+ *     to always supply a buffer long at least 6 bytes, otherwise
+ *     the returned content may be an incomplete utf8 byte sequence.
+ *   </para>
+ * </note>
+ *
+ * To create an utf8 input stream, use g_utf8_input_stream_new().
+ *
+ **/
+
+
+#define MAX_UNICHAR_LEN 6
+
+struct _GUtf8InputStreamPrivate {
+  /* buffer containing trailing partial character not yet returned */
+  char buffer[MAX_UNICHAR_LEN];
+  gsize len;
+
+  /* buffer containing partial character returned in a "small read"
+   * but not yet validated */
+  char small_read_buffer[MAX_UNICHAR_LEN];
+  gsize small_read_len;
+};
+
+static gssize g_utf8_input_stream_read        (GInputStream          *stream,
+                                               void                  *buffer,
+                                               gsize                  count,
+                                               GCancellable          *cancellable,
+                                               GError               **error);
+
+G_DEFINE_TYPE (GUtf8InputStream,
+               g_utf8_input_stream,
+               G_TYPE_FILTER_INPUT_STREAM)
+
+
+static void
+g_utf8_input_stream_class_init (GUtf8InputStreamClass *klass)
+{
+  GInputStreamClass *istream_class;
+
+  g_type_class_add_private (klass, sizeof (GUtf8InputStreamPrivate));
+
+  istream_class = G_INPUT_STREAM_CLASS (klass);
+  istream_class->read_fn = g_utf8_input_stream_read;
+}
+
+static void
+g_utf8_input_stream_init (GUtf8InputStream *stream)
+{
+  stream->priv = G_TYPE_INSTANCE_GET_PRIVATE (stream,
+                                              G_TYPE_UTF8_INPUT_STREAM,
+                                              GUtf8InputStreamPrivate);
+}
+
+/**
+ * g_utf8_input_stream_new:
+ * @base_stream: a #GInputStream.
+ *
+ * Creates a new #GUtf8InputStream from the given @base_stream.
+ *
+ * Returns: a #GInputStream for the given @base_stream.
+ *
+ * Since: 2.24
+ **/
+GInputStream *
+g_utf8_input_stream_new (GInputStream *base_stream)
+{
+  GInputStream *stream;
+
+  g_return_val_if_fail (G_IS_INPUT_STREAM (base_stream), NULL);
+
+  stream = g_object_new (G_TYPE_UTF8_INPUT_STREAM,
+                         "base-stream", base_stream,
+                         NULL);
+
+  return stream;
+}
+
+static void
+store_remainder (GUtf8InputStream *stream,
+                 const char       *remainder,
+                 gsize             len)
+{
+  GUtf8InputStreamPrivate *priv;
+  gsize i;
+
+  priv = stream->priv;
+
+  /* we store a remanainder only after having
+   * consumed the previous */
+  g_assert (priv->len == 0);
+
+  for (i = 0; i < len; ++i)
+    priv->buffer[i] = remainder[i];
+  priv->len = i;
+}
+
+static gssize
+get_remainder (GUtf8InputStream *stream,
+               char             *buffer,
+               gsize             count)
+{
+  GUtf8InputStreamPrivate *priv;
+  gsize i, len;
+  gssize res;
+
+  priv = stream->priv;
+
+  g_assert (priv->len < MAX_UNICHAR_LEN);
+
+  len = MIN (count, priv->len);
+  for (i = 0; i < len; ++i)
+    buffer[i] = priv->buffer[i];
+  res = i;
+
+  /* if there is more remainder, move it at the start */
+  for (i = 0; i < (priv->len - res); ++i)
+    priv->buffer[i] = priv->buffer[res + i];
+  priv->len = i;
+
+  return res;
+}
+
+static void
+store_small_read (GUtf8InputStream *stream,
+                  const char       *buffer,
+                  gsize             len)
+{
+  GUtf8InputStreamPrivate *priv;
+  gsize i;
+
+  priv = stream->priv;
+
+  /* if we reach MAX_UNICHAR_LEN it is either valid
+   * or invalid, so we should already have removed it
+   * from the buffer */
+  g_assert (priv->small_read_len + len < MAX_UNICHAR_LEN);
+
+  for (i = 0; i < len; ++i)
+    priv->small_read_buffer[priv->small_read_len + i] = buffer[i];
+  priv->small_read_len += i;
+}
+
+/* Combines the current "small read" buffer with the new
+ * bytes given, validates the buffer and if needed
+ * flushes it.
+ *
+ * returns:
+ * the number of bytes of buffer that are needed to
+ * make the current small read buffer valid.
+ *
+ * -1 if the small read buffer is invalid
+ *
+ * 0 if it is an incomplete character or if the
+ * small read buffer is empty.
+ */
+static gssize
+validate_small_read (GUtf8InputStream *stream,
+                     const char       *buffer,
+                     gsize             len)
+{
+  GUtf8InputStreamPrivate *priv;
+  gsize i;
+  gunichar c;
+  char *p;
+  gssize res;
+
+  priv = stream->priv;
+
+  if (priv->small_read_len == 0)
+    return 0;
+
+  for (i = 0; i < MIN (len, MAX_UNICHAR_LEN - priv->small_read_len); ++i)
+    priv->small_read_buffer[priv->small_read_len + i] = buffer[i];
+
+  c = g_utf8_get_char_validated (priv->small_read_buffer, priv->small_read_len + i);
+  if (c == (gunichar)-1)
+    {
+      priv->small_read_len = 0;
+      return -1;
+    }
+  if (c == (gunichar)-2)
+    {
+      return 0;
+    }
+
+  p = g_utf8_next_char (priv->small_read_buffer);
+  res = p - (priv->small_read_buffer + priv->small_read_len);
+
+  g_assert (res > 0);
+
+  /* reset the buffer */
+  priv->small_read_len = 0;
+
+  return res;
+}
+
+static gssize
+g_utf8_input_stream_read (GInputStream *stream,
+                          void         *buffer,
+                          gsize         count,
+                          GCancellable *cancellable,
+                          GError      **error)
+{
+  GUtf8InputStream *ustream;
+  GUtf8InputStreamPrivate *priv;
+  GInputStream *base_stream;
+  gsize nvalid, remainder;
+  gssize oldread, nread, offset;
+  gboolean valid, eof;
+  const gchar *end;
+
+  ustream = G_UTF8_INPUT_STREAM (stream);
+  priv = ustream->priv;
+
+  /* if we had previous incomplete data put it at the start of the buffer */
+  oldread = get_remainder (ustream, buffer, count);
+
+  /* if we have already reached count, it is "small read":
+   * store it to validate later */
+  if (oldread == count)
+    {
+      store_small_read (ustream, buffer, oldread);
+      return oldread;
+    }
+
+  base_stream = g_filter_input_stream_get_base_stream (G_FILTER_INPUT_STREAM (stream));
+
+  nread = g_input_stream_read (base_stream,
+                               buffer + oldread,
+                               count - oldread,
+                               cancellable,
+                               error);
+
+  if (nread < 0)
+    return -1;
+
+  /* take into account bytes we put in the buffer */
+  eof = (nread == 0);
+  nread += oldread;
+
+  /* validate previous small reads */
+  offset = validate_small_read (ustream, buffer, nread);
+  if (offset < 0)
+    goto error;
+
+  /* validate */
+  valid = g_utf8_validate (buffer + offset, nread - offset, &end);
+  nvalid = end - (char *)buffer;
+
+  if (valid)
+      return nread;
+
+  remainder = nread - nvalid;
+
+  /* if validation failed in the last bytes and the byte 
+   * sequence is an incomplete character and EOF is not reached,
+   * try to read further to see if we stopped in the middle
+   * of a character */
+  if ((remainder < MAX_UNICHAR_LEN) &&
+      (!eof) &&
+      (g_utf8_get_char_validated ((char *)buffer + nvalid, remainder) == (gunichar)-2))
+    {
+      if (nvalid == 0)
+        {
+          /* A "small" read: store it to validate later */
+          store_small_read (ustream, buffer, nread);
+          return nread;
+        }
+
+      store_remainder (ustream, (char *)buffer + nvalid, remainder);
+
+      return nvalid;
+    }
+
+error:
+  g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
+               _("Invalid UTF-8 sequence in input"));
+  return -1;
+}
+
+#define __G_UTF8_INPUT_STREAM_C__
+#include "gioaliasdef.c"
diff --git a/gio/gutf8inputstream.h b/gio/gutf8inputstream.h
new file mode 100644
index 0000000..eeb6a02
--- /dev/null
+++ b/gio/gutf8inputstream.h
@@ -0,0 +1,80 @@
+/* GIO - GLib Input, Output and Streaming Library
+ *
+ * Copyright (C) 2009 Paolo Borelli
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Paolo Borelli <pborelli gnome org>
+ */
+
+#if !defined (__GIO_GIO_H_INSIDE__) && !defined (GIO_COMPILATION)
+#error "Only <gio/gio.h> can be included directly."
+#endif
+
+#ifndef __G_UTF8_INPUT_STREAM_H__
+#define __G_UTF8_INPUT_STREAM_H__
+
+#include <gio/ginputstream.h>
+#include <gio/gfilterinputstream.h>
+
+G_BEGIN_DECLS
+
+#define G_TYPE_UTF8_INPUT_STREAM         (g_utf8_input_stream_get_type ())
+#define G_UTF8_INPUT_STREAM(o)           (G_TYPE_CHECK_INSTANCE_CAST ((o), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStream))
+#define G_UTF8_INPUT_STREAM_CLASS(k)     (G_TYPE_CHECK_CLASS_CAST((k), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStreamClass))
+#define G_IS_UTF8_INPUT_STREAM(o)        (G_TYPE_CHECK_INSTANCE_TYPE ((o), G_TYPE_UTF8_INPUT_STREAM))
+#define G_IS_UTF8_INPUT_STREAM_CLASS(k)  (G_TYPE_CHECK_CLASS_TYPE ((k), G_TYPE_UTF8_INPUT_STREAM))
+#define G_UTF8_INPUT_STREAM_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStreamClass))
+
+/**
+ * GUtf8InputStream:
+ * @parent_instance: a #GFilterInputStream.
+ *
+ * An implementation of #GFilterInputStream that performs UTF8 validation.
+ *
+ * Since: 2.24
+ **/
+typedef struct _GUtf8InputStreamClass    GUtf8InputStreamClass;
+typedef struct _GUtf8InputStreamPrivate  GUtf8InputStreamPrivate;
+
+struct _GUtf8InputStream
+{
+  GFilterInputStream parent_instance;
+
+  /*< private >*/
+  GUtf8InputStreamPrivate *priv;
+};
+
+struct _GUtf8InputStreamClass
+{
+  GFilterInputStreamClass parent_class;
+
+  /*< private >*/
+  /* Padding for future expansion */
+  void (*_g_reserved1) (void);
+  void (*_g_reserved2) (void);
+  void (*_g_reserved3) (void);
+  void (*_g_reserved4) (void);
+  void (*_g_reserved5) (void);
+};
+
+GType             g_utf8_input_stream_get_type      (void) G_GNUC_CONST;
+
+GInputStream     *g_utf8_input_stream_new           (GInputStream *base_stream);
+
+G_END_DECLS
+
+#endif /* __G_UTF8_INPUT_STREAM_H__ */
diff --git a/gio/tests/Makefile.am b/gio/tests/Makefile.am
index 8f9f483..bc853b5 100644
--- a/gio/tests/Makefile.am
+++ b/gio/tests/Makefile.am
@@ -26,6 +26,7 @@ TEST_PROGS +=	 		\
 	converter-stream	\
 	data-input-stream 	\
 	data-output-stream 	\
+	utf8-input-stream 	\
 	g-icon			\
 	buffered-input-stream	\
 	sleepy-stream		\
@@ -71,6 +72,9 @@ data_input_stream_LDADD		= $(progs_ldadd)
 data_output_stream_SOURCES	= data-output-stream.c
 data_output_stream_LDADD	= $(progs_ldadd)
 
+utf8_input_stream_SOURCES	= utf8-input-stream.c
+utf8_input_stream_LDADD		= $(progs_ldadd)
+
 filter_cat_SOURCES	= filter-cat.c
 filter_cat_LDADD	= $(progs_ldadd)
 
diff --git a/gio/tests/utf8-input-stream.c b/gio/tests/utf8-input-stream.c
new file mode 100644
index 0000000..9f9e955
--- /dev/null
+++ b/gio/tests/utf8-input-stream.c
@@ -0,0 +1,252 @@
+/* GIO - GLib Input, Output and Streaming Library
+ *
+ * Copyright (C) 2009 Paolo Borelli
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Paolo Borelli <pborelli gnome org>
+ */
+
+#include <glib/glib.h>
+#include <gio/gio.h>
+#include <string.h>
+
+static void
+do_test_read (const char *str, gssize expected_nread, glong expected_nchar)
+{
+  GInputStream *base;
+  GInputStream *in;
+  gssize len, n;
+  char *buf;
+  GError *err;
+
+  len = strlen (str);
+
+  base = g_memory_input_stream_new_from_data (str, -1, NULL);
+  in = g_utf8_input_stream_new (base);
+  g_object_unref (base);
+
+  buf = g_new0 (char, strlen(str));
+  err = NULL;
+  n = g_input_stream_read (in, buf, len, NULL, &err);
+  g_assert_cmpint (n, ==, expected_nread);
+  if (expected_nread < 0)
+    {
+      g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
+    }
+  else
+    {
+      g_assert_cmpstr (str, ==, buf);
+      g_assert_cmpint (g_utf8_strlen (buf, -1), ==, expected_nchar);
+      g_assert (err == NULL);
+    }
+  g_free (buf);
+
+  g_object_unref (in);
+}
+
+static void
+do_test_read_partial (const char *str,
+                      gssize chunk_len,
+                      gssize expected_nread1,
+                      gssize expected_nread2,
+                      glong expected_nchar)
+{
+  GInputStream *base;
+  GInputStream *in;
+  gssize len, n1, n2;
+  char *buf;
+  GError *err;
+
+  len = strlen (str);
+
+  base = g_memory_input_stream_new_from_data (str, -1, NULL);
+  in = g_utf8_input_stream_new (base);
+  g_object_unref (base);
+
+  buf = g_new0 (char, strlen(str));
+  err = NULL;
+  n1 = g_input_stream_read (in, buf, chunk_len, NULL, &err);
+  g_assert_cmpint (n1, ==, expected_nread1);
+  g_assert (err == NULL);
+
+  n2 = g_input_stream_read (in, buf + n1, len - n1, NULL, &err);
+  g_assert_cmpint (n2, ==, expected_nread2);
+  if (expected_nread2 < 0)
+    {
+      g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
+    }
+  else
+    {
+      g_assert_cmpstr (str, ==, buf);
+      g_assert_cmpint (g_utf8_strlen (buf, -1), ==, expected_nchar);
+      g_assert (err == NULL);
+    }
+  g_free (buf);
+
+  g_object_unref (in);
+}
+
+static void
+test_read_ascii (void)
+{
+  do_test_read ("foobar", 6, 6);
+}
+
+static void
+test_read_utf8 (void)
+{
+  do_test_read ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", 18, 15);
+}
+
+static void
+test_read_utf8_partial (void)
+{
+  do_test_read_partial ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", 7, 6, 12, 15);
+}
+
+static void
+test_read_invalid_start (void)
+{
+  do_test_read ("\xef\xbf\xbezzzzzz", -1, -1);
+}
+
+static void
+test_read_invalid_middle (void)
+{
+  do_test_read ("foobar\xef\xbf\xbezzzzzz", -1, -1);
+}
+
+static void
+test_read_invalid_end (void)
+{
+  do_test_read ("foobar\xef\xbf\xbe", -1, -1);
+}
+
+static void
+test_read_invalid_partial (void)
+{
+  do_test_read_partial ("foobar\xef\xbf\xbezzzzzz", 7, 6, -1, -1);
+}
+
+static void
+test_read_small_valid (void)
+{
+  GInputStream *base;
+  GInputStream *in;
+  gssize len, n;
+  char *buf;
+  GError *err;
+
+  base = g_memory_input_stream_new_from_data ("\xc3\xa8\xc3\xa8", -1, NULL);
+  in = g_utf8_input_stream_new (base);
+  g_object_unref (base);
+
+  len = strlen("\xc3\xa8\xc3\xa8");
+  buf = g_new0 (char, len);
+  err = NULL;
+
+  /* read a single byte */
+  n = g_input_stream_read (in, buf, 1, NULL, &err);
+  g_assert_cmpint (n, ==, 1);
+  g_assert_cmpstr ("\xc3", ==, buf);
+  g_assert (err == NULL);
+
+  /* read the rest */
+  n = g_input_stream_read (in, buf + n, len - n, NULL, &err);
+  g_assert_cmpint (n, ==, len - 1);
+  g_assert_cmpstr ("\xc3\xa8\xc3\xa8", ==, buf);
+  g_assert (err == NULL);
+
+  g_object_unref (in);
+}
+
+static void
+test_read_small_invalid (void)
+{
+  GInputStream *base;
+  GInputStream *in;
+  gssize n;
+  char *buf;
+  GError *err;
+
+  base = g_memory_input_stream_new_from_data ("\xbf\xbe", -1, NULL);
+  in = g_utf8_input_stream_new (base);
+  g_object_unref (base);
+
+  buf = g_new0 (char, 2);
+  err = NULL;
+  n = g_input_stream_read (in, buf, 1, NULL, &err);
+  g_assert_cmpint (n, ==, -1);
+  g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
+
+  g_object_unref (in);
+}
+
+static void
+test_read_small_consecutive (void)
+{
+  GInputStream *base;
+  GInputStream *in;
+  gssize len, n;
+  char *buf;
+  GError *err;
+
+  base = g_memory_input_stream_new_from_data ("\xc3\xa8\xc3\xa8", -1, NULL);
+  in = g_utf8_input_stream_new (base);
+  g_object_unref (base);
+
+  len = strlen("\xc3\xa8\xc3\xa8");
+  buf = g_new0 (char, len);
+  err = NULL;
+  n = 0;
+
+  /* read a single byte at a time */
+  while (n < len)
+  {
+    gssize r;
+
+    r = g_input_stream_read (in, buf + n, 1, NULL, &err);
+    g_assert_cmpint (r, ==, 1);
+    g_assert (err == NULL);
+
+    n += r;
+  }
+
+  g_assert_cmpstr ("\xc3\xa8\xc3\xa8", ==, buf);
+
+  g_object_unref (in);
+}
+
+int
+main (int   argc,
+      char *argv[])
+{
+  g_type_init ();
+  g_test_init (&argc, &argv, NULL);
+  g_test_add_func ("/utf8-input-stream/read-ascii", test_read_ascii);
+  g_test_add_func ("/utf8-input-stream/read-utf8", test_read_utf8);
+  g_test_add_func ("/utf8-input-stream/read-utf8-partial", test_read_utf8_partial);
+  g_test_add_func ("/utf8-input-stream/read-invalid-start", test_read_invalid_start);
+  g_test_add_func ("/utf8-input-stream/read-invalid-middle", test_read_invalid_middle);
+  g_test_add_func ("/utf8-input-stream/read-invalid-end", test_read_invalid_end);
+  g_test_add_func ("/utf8-input-stream/read-invalid-partial", test_read_invalid_partial);
+  g_test_add_func ("/utf8-input-stream/read-small-valid", test_read_small_valid);
+  g_test_add_func ("/utf8-input-stream/read-small-invalid", test_read_small_invalid);
+  g_test_add_func ("/utf8-input-stream/read-small-consecutive", test_read_small_consecutive);
+
+  return g_test_run();
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]