[libsoup] soup-message-io: Use GConverterInputStream for content-decoding



commit a1ee9acd68e6be9dca4ce3796d8d99ca17e4d5ba
Author: Dan Winship <danw gnome org>
Date:   Thu Dec 23 15:57:24 2010 -0500

    soup-message-io: Use GConverterInputStream for content-decoding
    
    Decode Content-Encodings by wrapping a GConverterInputStream around
    the SoupBodyInputStream.
    
    Because we want to be able to fall back to passing data through
    undecoded in the case that decoding fails, we need to use a GConverter
    wrapper (SoupConverterWrapper) that implements that.
    
    The old soup-message-io code was automatically stopping decompression
    when it reached the end of the response body, without checking that
    the compressed data was actually whole at that point. Fix that.
    However, this breaks the previous hack used for the zlib-to-raw
    fallback, since the raw data won't have a checksum at the end.
    So do that differently now.

 libsoup/Makefile.am              |    2 +
 libsoup/soup-content-decoder.c   |    2 -
 libsoup/soup-converter-wrapper.c |  299 ++++++++++++++++++++++++++++++++++++++
 libsoup/soup-converter-wrapper.h |   45 ++++++
 libsoup/soup-message-io.c        |  131 +++--------------
 po/POTFILES.in                   |    1 +
 6 files changed, 367 insertions(+), 113 deletions(-)
---
diff --git a/libsoup/Makefile.am b/libsoup/Makefile.am
index 5cfba04..3be0f9b 100644
--- a/libsoup/Makefile.am
+++ b/libsoup/Makefile.am
@@ -105,6 +105,8 @@ libsoup_2_4_la_SOURCES =		\
 	soup-connection.c		\
 	soup-content-decoder.c		\
 	soup-content-sniffer.c		\
+	soup-converter-wrapper.h	\
+	soup-converter-wrapper.c	\
 	soup-cookie.c			\
 	soup-cookie-jar.c		\
 	soup-cookie-jar-text.c		\
diff --git a/libsoup/soup-content-decoder.c b/libsoup/soup-content-decoder.c
index e5a7125..e686896 100644
--- a/libsoup/soup-content-decoder.c
+++ b/libsoup/soup-content-decoder.c
@@ -184,8 +184,6 @@ soup_content_decoder_got_headers_cb (SoupMessage *msg, SoupContentDecoder *decod
 		msgpriv->decoders = g_slist_prepend (msgpriv->decoders, converter);
 	}
 	soup_header_free_list (encodings);
-
-	soup_message_set_flags (msg, msgpriv->msg_flags | SOUP_MESSAGE_CONTENT_DECODED);
 }
 
 static void
diff --git a/libsoup/soup-converter-wrapper.c b/libsoup/soup-converter-wrapper.c
new file mode 100644
index 0000000..8480960
--- /dev/null
+++ b/libsoup/soup-converter-wrapper.c
@@ -0,0 +1,299 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * soup-converter-wrapper.c
+ *
+ * Copyright 2011 Red Hat, Inc.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include <glib/gi18n-lib.h>
+
+#include "soup-converter-wrapper.h"
+#include "soup-message.h"
+
+/* SoupConverterWrapper is a GConverter that wraps another GConverter.
+ * Mostly it is transparent, but it implements two special fallbacks
+ * for Content-Encoding handling: (1) "deflate" can mean either raw
+ * deflate or zlib-encoded default, (2) the server may mistakenly
+ * claim that a response is encoded when actually it isn't.
+ *
+ * If the wrapped conversion succeeds, then the wrapper will set the
+ * %SOUP_MESSAGE_CONTENT_DECODED flag on its message.
+ */
+
+enum {
+	PROP_0,
+	PROP_BASE_CONVERTER,
+	PROP_MESSAGE
+};
+
+static void soup_converter_wrapper_iface_init (GConverterIface *iface);
+
+G_DEFINE_TYPE_WITH_CODE (SoupConverterWrapper, soup_converter_wrapper, G_TYPE_OBJECT,
+			 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
+						soup_converter_wrapper_iface_init))
+
+struct _SoupConverterWrapperPrivate
+{
+	GConverter *base_converter;
+	SoupMessage *msg;
+	gboolean try_deflate_fallback;
+	gboolean started;
+};
+
+static void
+soup_converter_wrapper_finalize (GObject *object)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (object)->priv;
+
+	if (priv->base_converter)
+		g_object_unref (priv->base_converter);
+
+	G_OBJECT_CLASS (soup_converter_wrapper_parent_class)->finalize (object);
+}
+
+
+static void
+soup_converter_wrapper_set_property (GObject      *object,
+				     guint         prop_id,
+				     const GValue *value,
+				     GParamSpec   *pspec)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (object)->priv;
+
+	switch (prop_id) {
+	case PROP_BASE_CONVERTER:
+		priv->base_converter = g_value_dup_object (value);
+		if (G_IS_ZLIB_DECOMPRESSOR (priv->base_converter)) {
+			GZlibCompressorFormat format;
+
+			g_object_get (G_OBJECT (priv->base_converter),
+				      "format", &format,
+				      NULL);
+			if (format == G_ZLIB_COMPRESSOR_FORMAT_ZLIB)
+				priv->try_deflate_fallback = TRUE;
+		}
+		break;
+
+	case PROP_MESSAGE:
+		priv->msg = g_value_dup_object (value);
+		break;
+
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+		break;
+	}
+}
+
+static void
+soup_converter_wrapper_get_property (GObject    *object,
+				     guint       prop_id,
+				     GValue     *value,
+				     GParamSpec *pspec)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (object)->priv;
+
+	switch (prop_id) {
+	case PROP_BASE_CONVERTER:
+		g_value_set_object (value, priv->base_converter);
+		break;
+
+	case PROP_MESSAGE:
+		g_value_set_object (value, priv->msg);
+		break;
+
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+		break;
+	}
+}
+
+static void
+soup_converter_wrapper_init (SoupConverterWrapper *converter)
+{
+	converter->priv = G_TYPE_INSTANCE_GET_PRIVATE (converter,
+						       SOUP_TYPE_CONVERTER_WRAPPER,
+						       SoupConverterWrapperPrivate);
+}
+
+static void
+soup_converter_wrapper_class_init (SoupConverterWrapperClass *klass)
+{
+	GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+
+	g_type_class_add_private (klass, sizeof (SoupConverterWrapperPrivate));
+
+	gobject_class->finalize = soup_converter_wrapper_finalize;
+	gobject_class->get_property = soup_converter_wrapper_get_property;
+	gobject_class->set_property = soup_converter_wrapper_set_property;
+
+	g_object_class_install_property (gobject_class,
+					 PROP_BASE_CONVERTER,
+					 g_param_spec_object ("base-converter",
+							      "Base GConverter",
+							      "GConverter to wrap",
+							      G_TYPE_CONVERTER,
+							      G_PARAM_READWRITE |
+							      G_PARAM_CONSTRUCT_ONLY |
+							      G_PARAM_STATIC_STRINGS));
+	g_object_class_install_property (gobject_class,
+					 PROP_MESSAGE,
+					 g_param_spec_object ("message",
+							      "Message",
+							      "Associated SoupMessage",
+							      SOUP_TYPE_MESSAGE,
+							      G_PARAM_READWRITE |
+							      G_PARAM_CONSTRUCT_ONLY |
+							      G_PARAM_STATIC_STRINGS));
+}
+
+GConverter *
+soup_converter_wrapper_new (GConverter  *base_converter,
+			    SoupMessage *msg)
+{
+	return g_object_new (SOUP_TYPE_CONVERTER_WRAPPER,
+			     "base-converter", base_converter,
+			     "message", msg,
+			     NULL);
+}
+
+static void
+soup_converter_wrapper_reset (GConverter *converter)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (converter)->priv;
+
+	if (priv->base_converter)
+		g_converter_reset (priv->base_converter);
+}
+
+static GConverterResult
+soup_converter_wrapper_fallback_convert (GConverter *converter,
+					 const void *inbuf,
+					 gsize       inbuf_size,
+					 void       *outbuf,
+					 gsize       outbuf_size,
+					 GConverterFlags flags,
+					 gsize      *bytes_read,
+					 gsize      *bytes_written,
+					 GError    **error)
+{
+	if (outbuf_size == 0) {
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
+			     _("Output buffer is too small"));
+		return G_CONVERTER_ERROR;
+	}
+
+	if (outbuf_size >= inbuf_size) {
+		memcpy (outbuf, inbuf, inbuf_size);
+		*bytes_read = *bytes_written = inbuf_size;
+		if (flags & G_CONVERTER_INPUT_AT_END)
+			return G_CONVERTER_FINISHED;
+		else if (flags & G_CONVERTER_FLUSH)
+			return G_CONVERTER_FLUSHED;
+		else
+			return G_CONVERTER_CONVERTED;
+	} else {
+		memcpy (outbuf, inbuf, outbuf_size);
+		*bytes_read = *bytes_written = outbuf_size;
+		return G_CONVERTER_CONVERTED;
+	}
+}
+
+static GConverterResult
+soup_converter_wrapper_real_convert (GConverter *converter,
+				     const void *inbuf,
+				     gsize       inbuf_size,
+				     void       *outbuf,
+				     gsize       outbuf_size,
+				     GConverterFlags flags,
+				     gsize      *bytes_read,
+				     gsize      *bytes_written,
+				     GError    **error)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (converter)->priv;
+	GConverterResult result;
+	GError *my_error = NULL;
+
+ try_again:
+	result = g_converter_convert (priv->base_converter,
+				      inbuf, inbuf_size,
+				      outbuf, outbuf_size,
+				      flags, bytes_read, bytes_written,
+				      &my_error);
+	if (result != G_CONVERTER_ERROR) {
+		if (!priv->started) {
+			SoupMessageFlags flags = soup_message_get_flags (priv->msg);
+			soup_message_set_flags (priv->msg, flags | SOUP_MESSAGE_CONTENT_DECODED);
+			priv->started = TRUE;
+		}
+		return result;
+	}
+
+	if (!g_error_matches (my_error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA) ||
+	    priv->started) {
+		g_propagate_error (error, my_error);
+		return result;
+	}
+	g_clear_error (&my_error);
+
+	/* Deflate hack: some servers (especially Apache with
+	 * mod_deflate) return raw compressed data without the zlib
+	 * headers when the client claims to support deflate.
+	 */
+	if (priv->try_deflate_fallback) {
+		priv->try_deflate_fallback = FALSE;
+		g_object_unref (priv->base_converter);
+		priv->base_converter = (GConverter *)
+			g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW);
+		goto try_again;
+	}
+
+	/* Passthrough hack: some servers mistakenly claim to be
+	 * sending encoded data when in fact they aren't, so fall
+	 * back to just not decoding.
+	 */
+	g_clear_object (&priv->base_converter);
+	return soup_converter_wrapper_fallback_convert (converter,
+							inbuf, inbuf_size,
+							outbuf, outbuf_size,
+							flags, bytes_read,
+							bytes_written, error);
+}
+
+static GConverterResult
+soup_converter_wrapper_convert (GConverter *converter,
+				const void *inbuf,
+				gsize       inbuf_size,
+				void       *outbuf,
+				gsize       outbuf_size,
+				GConverterFlags flags,
+				gsize      *bytes_read,
+				gsize      *bytes_written,
+				GError    **error)
+{
+	SoupConverterWrapperPrivate *priv = SOUP_CONVERTER_WRAPPER (converter)->priv;
+
+	if (priv->base_converter) {
+		return soup_converter_wrapper_real_convert (converter,
+							    inbuf, inbuf_size,
+							    outbuf, outbuf_size,
+							    flags, bytes_read,
+							    bytes_written, error);
+	} else {
+		return soup_converter_wrapper_fallback_convert (converter,
+								inbuf, inbuf_size,
+								outbuf, outbuf_size,
+								flags, bytes_read,
+								bytes_written, error);
+	}
+}
+
+static void
+soup_converter_wrapper_iface_init (GConverterIface *iface)
+{
+	iface->convert = soup_converter_wrapper_convert;
+	iface->reset = soup_converter_wrapper_reset;
+}
diff --git a/libsoup/soup-converter-wrapper.h b/libsoup/soup-converter-wrapper.h
new file mode 100644
index 0000000..62ed9da
--- /dev/null
+++ b/libsoup/soup-converter-wrapper.h
@@ -0,0 +1,45 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright 2011 Red Hat, Inc.
+ */
+
+#ifndef SOUP_CONVERTER_WRAPPER_H
+#define SOUP_CONVERTER_WRAPPER_H 1
+
+#include <libsoup/soup-types.h>
+
+G_BEGIN_DECLS
+
+#define SOUP_TYPE_CONVERTER_WRAPPER            (soup_converter_wrapper_get_type ())
+#define SOUP_CONVERTER_WRAPPER(obj)            (G_TYPE_CHECK_INSTANCE_CAST ((obj), SOUP_TYPE_CONVERTER_WRAPPER, SoupConverterWrapper))
+#define SOUP_CONVERTER_WRAPPER_CLASS(klass)    (G_TYPE_CHECK_CLASS_CAST ((klass), SOUP_TYPE_CONVERTER_WRAPPER, SoupConverterWrapperClass))
+#define SOUP_IS_CONVERTER_WRAPPER(obj)         (G_TYPE_CHECK_INSTANCE_TYPE ((obj), SOUP_TYPE_CONVERTER_WRAPPER))
+#define SOUP_IS_CONVERTER_WRAPPER_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((obj), SOUP_TYPE_CONVERTER_WRAPPER))
+#define SOUP_CONVERTER_WRAPPER_GET_CLASS(obj)  (G_TYPE_INSTANCE_GET_CLASS ((obj), SOUP_TYPE_CONVERTER_WRAPPER, SoupConverterWrapperClass))
+
+typedef struct _SoupConverterWrapperPrivate SoupConverterWrapperPrivate;
+
+typedef struct {
+	GObject parent;
+
+	SoupConverterWrapperPrivate *priv;
+} SoupConverterWrapper;
+
+typedef struct {
+	GObjectClass parent_class;
+
+	/* Padding for future expansion */
+	void (*_libsoup_reserved1) (void);
+	void (*_libsoup_reserved2) (void);
+	void (*_libsoup_reserved3) (void);
+	void (*_libsoup_reserved4) (void);
+} SoupConverterWrapperClass;
+
+GType soup_converter_wrapper_get_type (void);
+
+GConverter *soup_converter_wrapper_new (GConverter  *base_converter,
+					SoupMessage *msg);
+
+G_END_DECLS
+
+#endif /* SOUP_CONVERTER_WRAPPER_H */
diff --git a/libsoup/soup-message-io.c b/libsoup/soup-message-io.c
index aabb902..6e66364 100644
--- a/libsoup/soup-message-io.c
+++ b/libsoup/soup-message-io.c
@@ -15,6 +15,7 @@
 #include "soup-body-input-stream.h"
 #include "soup-body-output-stream.h"
 #include "soup-connection.h"
+#include "soup-converter-wrapper.h"
 #include "soup-filter-input-stream.h"
 #include "soup-message.h"
 #include "soup-message-private.h"
@@ -309,117 +310,29 @@ read_headers (SoupMessage *msg, GCancellable *cancellable, GError **error)
 	return TRUE;
 }
 
-static SoupBuffer *
-content_decode_one (SoupBuffer *buf, GConverter *converter, GError **error)
-{
-	gsize outbuf_length, outbuf_used, outbuf_cur, input_used, input_cur;
-	char *outbuf;
-	GConverterResult result;
-	gboolean dummy_zlib_header_used = FALSE;
-
-	outbuf_length = MAX (buf->length * 2, 1024);
-	outbuf = g_malloc (outbuf_length);
-	outbuf_cur = input_cur = 0;
-
-	do {
-		result = g_converter_convert (
-			converter,
-			buf->data + input_cur, buf->length - input_cur,
-			outbuf + outbuf_cur, outbuf_length - outbuf_cur,
-			0, &input_used, &outbuf_used, error);
-		input_cur += input_used;
-		outbuf_cur += outbuf_used;
-
-		if (g_error_matches (*error, G_IO_ERROR, G_IO_ERROR_NO_SPACE) ||
-		    (!*error && outbuf_cur == outbuf_length)) {
-			g_clear_error (error);
-			outbuf_length *= 2;
-			outbuf = g_realloc (outbuf, outbuf_length);
-		} else if (input_cur == 0 &&
-			   !dummy_zlib_header_used &&
-			   G_IS_ZLIB_DECOMPRESSOR (converter) &&
-			   g_error_matches (*error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
-
-			GZlibCompressorFormat format;
-			g_object_get (G_OBJECT (converter), "format", &format, NULL);
-
-			if (format == G_ZLIB_COMPRESSOR_FORMAT_ZLIB) {
-				/* Some servers (especially Apache with mod_deflate)
-				 * return RAW compressed data without the zlib headers
-				 * when the client claims to support deflate. For
-				 * those cases use a dummy header (stolen from
-				 * Mozilla's nsHTTPCompressConv.cpp) and try to
-				 * continue uncompressing data.
-				 */
-				static char dummy_zlib_header[2] = { 0x78, 0x9C };
-
-				g_converter_reset (converter);
-				result = g_converter_convert (converter,
-							      dummy_zlib_header, sizeof(dummy_zlib_header),
-							      outbuf + outbuf_cur, outbuf_length - outbuf_cur,
-							      0, &input_used, &outbuf_used, NULL);
-				dummy_zlib_header_used = TRUE;
-				if (result == G_CONVERTER_CONVERTED) {
-					g_clear_error (error);
-					continue;
-				}
-			}
-
-			g_free (outbuf);
-			return NULL;
-
-		} else if (*error) {
-			/* GZlibDecompressor can't ever return
-			 * G_IO_ERROR_PARTIAL_INPUT unless we pass it
-			 * input_length = 0, which we don't. Other
-			 * converters might of course, so eventually
-			 * this code needs to be rewritten to deal
-			 * with that.
-			 */
-			g_free (outbuf);
-			return NULL;
-		}
-	} while (input_cur < buf->length && result != G_CONVERTER_FINISHED);
-
-	if (outbuf_cur)
-		return soup_buffer_new (SOUP_MEMORY_TAKE, outbuf, outbuf_cur);
-	else {
-		g_free (outbuf);
-		return NULL;
-	}
-}
-
-static SoupBuffer *
-content_decode (SoupMessage *msg, SoupBuffer *buf)
+static void
+setup_body_istream (SoupMessage *msg)
 {
 	SoupMessagePrivate *priv = SOUP_MESSAGE_GET_PRIVATE (msg);
-	GConverter *decoder;
-	SoupBuffer *decoded;
-	GError *error = NULL;
+	SoupMessageIOData *io = priv->io_data;
+	GConverter *decoder, *wrapper;
+	GInputStream *filter;
 	GSList *d;
 
+	io->body_istream = soup_body_input_stream_new (io->istream,
+						       io->read_encoding,
+						       io->read_length);
+
 	for (d = priv->decoders; d; d = d->next) {
 		decoder = d->data;
-
-		decoded = content_decode_one (buf, decoder, &error);
-		if (error) {
-			if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_FAILED))
-				g_warning ("Content-Decoding error: %s\n", error->message);
-			g_error_free (error);
-
-			soup_message_set_flags (msg, priv->msg_flags & ~SOUP_MESSAGE_CONTENT_DECODED);
-			break;
-		}
-		if (buf)
-			soup_buffer_free (buf);
-
-		if (decoded)
-			buf = decoded;
-		else
-			return NULL;
+		wrapper = soup_converter_wrapper_new (decoder, msg);
+		filter = g_object_new (G_TYPE_CONVERTER_INPUT_STREAM,
+				       "base-stream", io->body_istream,
+				       "converter", wrapper,
+				       NULL);
+		g_object_unref (io->body_istream);
+		io->body_istream = filter;
 	}
-
-	return buf;
 }
 
 /*
@@ -742,14 +655,14 @@ io_read (SoupMessage *msg, GCancellable *cancellable, GError **error)
 		} else
 			io->read_length = -1;
 
-		io->body_istream = soup_body_input_stream_new (SOUP_FILTER_INPUT_STREAM (io->istream),
-							       io->read_encoding,
-							       io->read_length);
 		soup_message_got_headers (msg);
 		break;
 
 
 	case SOUP_MESSAGE_IO_STATE_BODY:
+		if (!io->body_istream)
+			setup_body_istream (msg);
+
 		if (!io_handle_sniffing (msg, FALSE))
 			return FALSE;
 
@@ -774,10 +687,6 @@ io_read (SoupMessage *msg, GCancellable *cancellable, GError **error)
 						cancellable, error);
 		if (nread > 0) {
 			buffer->length = nread;
-			buffer = content_decode (msg, buffer);
-			if (!buffer)
-				break;
-
 			soup_message_body_got_chunk (io->read_body, buffer);
 
 			if (io->need_content_sniffed) {
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 4115bb0..b35ee88 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -1,3 +1,4 @@
 libsoup/soup-body-input-stream.c
+libsoup/soup-converter-wrapper.c
 libsoup/soup-request.c
 libsoup/soup-requester.c



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]