[gedit] Rework encoding validation



commit cb235cbe4b6e705a2489a395d3ecbf5d648c32ec
Author: Paolo Borelli <pborelli gnome org>
Date:   Sun Feb 28 16:12:35 2010 +0100

    Rework encoding validation
    
    It turns out that we were beeing too smart for gtk, since we handled
    partial utf8 reads, while gtktextview needs to be fed complete utf8
    sequences. Since we have to do our own buffering before inserting the
    text we may drop the utf8 stream validation and validate directly in the
    document-output-stream write method.
    Fixes bug #611350.

 gedit/gedit-document-output-stream.c |   99 ++++++++++++++++++++++++++++++++--
 gedit/gedit-gio-document-loader.c    |    6 +--
 po/POTFILES.skip                     |    1 +
 3 files changed, 97 insertions(+), 9 deletions(-)
---
diff --git a/gedit/gedit-document-output-stream.c b/gedit/gedit-document-output-stream.c
index e6055a6..22b692a 100644
--- a/gedit/gedit-document-output-stream.c
+++ b/gedit/gedit-document-output-stream.c
@@ -22,7 +22,9 @@
 
 #include "config.h"
 
+#include <string.h>
 #include <glib.h>
+#include <glib/gi18n.h>
 #include <gio/gio.h>
 #include "gedit-document-output-stream.h"
 
@@ -32,13 +34,20 @@
  * there is no I/O involved and should be accessed only by the main
  * thread */
 
-#define GEDIT_DOCUMENT_OUTPUT_STREAM_GET_PRIVATE(object)(G_TYPE_INSTANCE_GET_PRIVATE((object), GEDIT_TYPE_DOCUMENT_OUTPUT_STREAM, GeditDocumentOutputStreamPrivate))
+#define GEDIT_DOCUMENT_OUTPUT_STREAM_GET_PRIVATE(object)(G_TYPE_INSTANCE_GET_PRIVATE((object),\
+							 GEDIT_TYPE_DOCUMENT_OUTPUT_STREAM,\
+							 GeditDocumentOutputStreamPrivate))
+
+#define MAX_UNICHAR_LEN 6
 
 struct _GeditDocumentOutputStreamPrivate
 {
 	GeditDocument *doc;
 	GtkTextIter    pos;
 
+	gchar *buffer;
+	gsize buflen;
+
 	guint is_initialized : 1;
 	guint is_closed : 1;
 };
@@ -102,6 +111,16 @@ gedit_document_output_stream_get_property (GObject    *object,
 }
 
 static void
+gedit_document_output_stream_finalize (GObject *object)
+{
+	GeditDocumentOutputStream *stream = GEDIT_DOCUMENT_OUTPUT_STREAM (object);
+
+	g_free (stream->priv->buffer);
+
+	G_OBJECT_CLASS (gedit_document_output_stream_parent_class)->finalize (object);
+}
+
+static void
 gedit_document_output_stream_class_init (GeditDocumentOutputStreamClass *klass)
 {
 	GObjectClass *object_class = G_OBJECT_CLASS (klass);
@@ -109,6 +128,7 @@ gedit_document_output_stream_class_init (GeditDocumentOutputStreamClass *klass)
 
 	object_class->get_property = gedit_document_output_stream_get_property;
 	object_class->set_property = gedit_document_output_stream_set_property;
+	object_class->finalize = gedit_document_output_stream_finalize;
 
 	stream_class->write_fn = gedit_document_output_stream_write;
 	stream_class->close_fn = gedit_document_output_stream_close;
@@ -130,6 +150,9 @@ gedit_document_output_stream_init (GeditDocumentOutputStream *stream)
 {
 	stream->priv = GEDIT_DOCUMENT_OUTPUT_STREAM_GET_PRIVATE (stream);
 
+	stream->priv->buffer = NULL;
+	stream->priv->buflen = 0;
+
 	stream->priv->is_initialized = FALSE;
 	stream->priv->is_closed = FALSE;
 }
@@ -239,11 +262,19 @@ gedit_document_output_stream_write (GOutputStream            *stream,
 				    GCancellable             *cancellable,
 				    GError                  **error)
 {
-	GeditDocumentOutputStream *ostream = GEDIT_DOCUMENT_OUTPUT_STREAM (stream);
+	GeditDocumentOutputStream *ostream;
+	gchar *text;
+	gsize len;
+	gboolean freetext = FALSE;
+	const gchar *end;
+	gsize nvalid;
+	gboolean valid;
 
 	if (g_cancellable_set_error_if_cancelled (cancellable, error))
 		return -1;
 
+	ostream = GEDIT_DOCUMENT_OUTPUT_STREAM (stream);
+
 	if (!ostream->priv->is_initialized)
 	{
 		/* Init the undoable action */
@@ -258,10 +289,63 @@ gedit_document_output_stream_write (GOutputStream            *stream,
 		ostream->priv->is_initialized = TRUE;
 	}
 
+	if (ostream->priv->buflen > 0)
+	{
+		len = ostream->priv->buflen + count;
+		text = g_new (gchar , len + 1);
+		memcpy (text, ostream->priv->buffer, ostream->priv->buflen);
+		memcpy (text + ostream->priv->buflen, buffer, count);
+		text[len] = '\0';
+		g_free (ostream->priv->buffer);
+		ostream->priv->buffer = NULL;
+		ostream->priv->buflen = 0;
+		freetext = TRUE;
+	}
+	else
+	{
+		text = (gchar *) buffer;
+		len = count;
+	}
+
+	/* validate */
+	valid = g_utf8_validate (text, len, &end);
+	nvalid = end - text;
+
+	if (!valid)
+	{
+		gsize remainder;
+
+		remainder = len - nvalid;
+
+		if ((remainder < MAX_UNICHAR_LEN) &&
+		    (g_utf8_get_char_validated (text + nvalid, remainder) == (gunichar)-2))
+		{
+			ostream->priv->buffer = g_strndup (end, remainder);
+			ostream->priv->buflen = remainder;
+			len -= remainder;
+		}
+		else
+		{
+			/* TODO: we cuould escape invalid text and tag it in red
+			 * and make the doc readonly.
+			 */
+			g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
+				     _("Invalid UTF-8 sequence in input"));
+
+			if (freetext)
+				g_free (text);
+
+			return -1;
+		}
+	}
+
 	gtk_text_buffer_insert (GTK_TEXT_BUFFER (ostream->priv->doc),
-				&ostream->priv->pos, buffer, count);
+				&ostream->priv->pos, text, len);
 
-	return count;
+	if (freetext)
+		g_free (text);
+
+	return len;
 }
 
 static gboolean
@@ -277,5 +361,12 @@ gedit_document_output_stream_close (GOutputStream     *stream,
 		ostream->priv->is_closed = TRUE;
 	}
 
+	if (ostream->priv->buflen > 0)
+	{
+		g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
+			     _("Incomplete UTF-8 sequence in input"));
+		return FALSE;
+	}
+
 	return TRUE;
 }
diff --git a/gedit/gedit-gio-document-loader.c b/gedit/gedit-gio-document-loader.c
index 53a1869..57a00a0 100644
--- a/gedit/gedit-gio-document-loader.c
+++ b/gedit/gedit-gio-document-loader.c
@@ -449,7 +449,6 @@ finish_query_info (AsyncData *async)
 {
 	GeditGioDocumentLoader *gvloader;
 	GeditDocumentLoader *loader;
-	GInputStream *utf8_stream;
 	GInputStream *conv_stream;
 	GFileInfo *info;
 	GSList *candidate_encodings;
@@ -489,10 +488,7 @@ finish_query_info (AsyncData *async)
 						    G_CONVERTER (gvloader->priv->converter));
 	g_object_unref (gvloader->priv->stream);
 
-	utf8_stream = g_utf8_input_stream_new (conv_stream);
-	g_object_unref (conv_stream);
-
-	gvloader->priv->stream = utf8_stream;
+	gvloader->priv->stream = conv_stream;
 
 	/* Output stream */
 	gvloader->priv->output = gedit_document_output_stream_new (loader->document);
diff --git a/po/POTFILES.skip b/po/POTFILES.skip
index dc27561..3e6ea29 100644
--- a/po/POTFILES.skip
+++ b/po/POTFILES.skip
@@ -1,5 +1,6 @@
 data/gedit.desktop.in
 data/gedit.schemas.in
+gedit/gedit-document-output-stream.c
 gedit/dialogs/gedit-style-scheme-dialog.c
 gedit/dialogs/gedit-style-scheme-dialog.ui
 gedit/dialogs/gedit-style-scheme-generator.c



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]