[gedit] Use new filter streams to load the document.



commit 4bc2c698f14a3a5420c35bc5edc842df2b481855
Author: Ignacio Casal Quinteiro <icq gnome org>
Date:   Mon Dec 28 15:05:49 2009 +0100

    Use new filter streams to load the document.
    
    Use the converter and utf8 validator streams to load the document.
    Also added GeditSmartCharsetConverter to detect and load the right
    encoding.

 gedit/Makefile.am                     |    2 +
 gedit/gedit-document-loader.c         |  168 -----------------
 gedit/gedit-document-loader.h         |    5 -
 gedit/gedit-gio-document-loader.c     |  234 +++++++++++++++++++++---
 gedit/gedit-smart-charset-converter.c |  317 +++++++++++++++++++++++++++++++++
 gedit/gedit-smart-charset-converter.h |   64 +++++++
 6 files changed, 587 insertions(+), 203 deletions(-)
---
diff --git a/gedit/Makefile.am b/gedit/Makefile.am
index e9eae45..295d0bf 100644
--- a/gedit/Makefile.am
+++ b/gedit/Makefile.am
@@ -83,6 +83,7 @@ NOINST_H_FILES =			\
 	gedit-print-job.h		\
 	gedit-print-preview.h		\
 	gedit-session.h			\
+	gedit-smart-charset-converter.h	\
 	gedit-style-scheme-manager.h	\
 	gedit-tab-label.h		\
 	gedittextregion.h		\
@@ -180,6 +181,7 @@ libgedit_la_SOURCES = 			\
 	gedit-print-preview.c		\
 	gedit-progress-message-area.c	\
 	gedit-session.c			\
+	gedit-smart-charset-converter.c	\
 	gedit-statusbar.c		\
 	gedit-status-combo-box.c	\
 	gedit-style-scheme-manager.c	\
diff --git a/gedit/gedit-document-loader.c b/gedit/gedit-document-loader.c
index 923eb6a..390c6c1 100644
--- a/gedit/gedit-document-loader.c
+++ b/gedit/gedit-document-loader.c
@@ -204,174 +204,6 @@ gedit_document_loader_init (GeditDocumentLoader *loader)
 	loader->used = FALSE;
 }
 
-static void
-insert_text_in_document (GeditDocumentLoader *loader,
-			 const gchar         *text,
-			 gint                 len)
-{
-	GeditDocument *doc = loader->document;
-
-	g_return_if_fail (text != NULL);
-
-	gtk_source_buffer_begin_not_undoable_action (GTK_SOURCE_BUFFER (doc));
-
-	/* If the last char is a newline, don't add it to the buffer (otherwise
-	   GtkTextView shows it as an empty line). See bug #324942. */
-	if ((len > 0) && (text[len-1] == '\n'))
-		len--;
-
-	/* Insert text in the buffer */
-	gtk_text_buffer_set_text (GTK_TEXT_BUFFER (doc), text, len);
-
-	gtk_text_buffer_set_modified (GTK_TEXT_BUFFER (doc), FALSE);
-
-	gtk_source_buffer_end_not_undoable_action (GTK_SOURCE_BUFFER (doc));
-}
-
-static const GeditEncoding *
-get_metadata_encoding (GeditDocumentLoader *loader)
-{
-	const GeditEncoding *enc = NULL;
-
-#ifdef G_OS_WIN32
-	gchar *charset;
-	const gchar *uri;
-
-	uri = gedit_document_loader_get_uri (loader);
-
-	charset = gedit_metadata_manager_get (uri, "encoding");
-
-	if (charset == NULL)
-		return NULL;
-
-	enc = gedit_encoding_get_from_charset (charset);
-
-	g_free (charset);
-#else
-	GFileInfo *info;
-
-	info = gedit_document_loader_get_info (loader);
-
-	/* check if the encoding was set in the metadata */
-	if (g_file_info_has_attribute (info, GEDIT_METADATA_ATTRIBUTE_ENCODING))
-	{
-		const gchar *charset;
-
-		charset = g_file_info_get_attribute_string (info,
-							    GEDIT_METADATA_ATTRIBUTE_ENCODING);
-
-		if (charset == NULL)
-			return NULL;
-		
-		enc = gedit_encoding_get_from_charset (charset);
-	}
-#endif
-
-	return enc;
-}
-
-/* This function is only meant to be called by child classes */
-gboolean
-gedit_document_loader_update_document_contents (GeditDocumentLoader  *loader,
-					        const gchar          *file_contents,
-					        gint                  file_size,
-					        GError              **error)
-{
-	gedit_debug (DEBUG_LOADER);
-
-	g_return_val_if_fail (file_size >= 0, FALSE);
-	g_return_val_if_fail (file_contents != NULL, FALSE);
-
-	/* short-circuit the case where the file is empty */
-	if (file_size == 0)
-	{
-		if (loader->encoding == NULL)
-			loader->auto_detected_encoding = gedit_encoding_get_current ();
-		insert_text_in_document (loader, "", 0);
-		return TRUE;
-	}
-
-	if (loader->encoding == gedit_encoding_get_utf8 ())
-	{
-		if (g_utf8_validate (file_contents, file_size, NULL))
-		{
-			insert_text_in_document (loader,
-						 file_contents,
-						 file_size);
-			return TRUE;
-		}
-		else
-		{
-			g_set_error (error,
-				     G_CONVERT_ERROR,
-				     G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
-				     "The file you are trying to open contains an invalid byte sequence.");
-			return FALSE;
-		}
-	}
-	else
-	{
-		GError *conv_error = NULL;
-		gchar *converted_text = NULL;
-		gsize new_len = file_size;
-
-		/* Autodetecting the encoding */
-		if (loader->encoding == NULL)
-		{
-			const GeditEncoding *metadata_encoding;
-
-			/* first try with the encoding stored in the metadata, if any */
-			metadata_encoding = get_metadata_encoding (loader);
-
-			if (metadata_encoding != NULL)
-			{
-				converted_text = gedit_convert_to_utf8 (
-								file_contents,
-								file_size,
-								&metadata_encoding,
-								&new_len,
-								NULL);
-
-				if (converted_text != NULL)
-					loader->auto_detected_encoding = metadata_encoding;
-			}
-		}
-
-		if (converted_text == NULL)
-		{
-			loader->auto_detected_encoding = loader->encoding;
-
-			converted_text = gedit_convert_to_utf8 (
-							file_contents,
-							file_size,
-							&loader->auto_detected_encoding,
-							&new_len,
-							&conv_error);
-		}
-
-		if (converted_text == NULL)
-		{
-			g_return_val_if_fail (conv_error != NULL, FALSE);
-
-			g_propagate_error (error, conv_error);
-
-			return FALSE;
-		}
-		else
-		{
-			insert_text_in_document (loader,
-						 converted_text,
-						 new_len);
-
-			g_free (converted_text);
-
-			return TRUE;
-		}
-	}
-
-	g_return_val_if_reached (FALSE);
-}
-
 void
 gedit_document_loader_loading (GeditDocumentLoader *loader,
 			       gboolean             completed,
diff --git a/gedit/gedit-document-loader.h b/gedit/gedit-document-loader.h
index 45805d7..84bfcfb 100644
--- a/gedit/gedit-document-loader.h
+++ b/gedit/gedit-document-loader.h
@@ -98,11 +98,6 @@ GeditDocumentLoader 	*gedit_document_loader_new 		(GeditDocument       *doc,
 								 const gchar         *uri,
 								 const GeditEncoding *encoding);
 
-gboolean		 gedit_document_loader_update_document_contents
-								(GeditDocumentLoader  *loader,
-								 const gchar          *file_contents,
-								 gint                  file_size,
-								 GError              **error);
 void			 gedit_document_loader_loading		(GeditDocumentLoader *loader,
 								 gboolean             completed,
 								 GError              *error);
diff --git a/gedit/gedit-gio-document-loader.c b/gedit/gedit-gio-document-loader.c
index 79b100b..5ba8909 100644
--- a/gedit/gedit-gio-document-loader.c
+++ b/gedit/gedit-gio-document-loader.c
@@ -39,6 +39,8 @@
 #include <gio/gio.h>
 
 #include "gedit-gio-document-loader.h"
+#include "gedit-smart-charset-converter.h"
+#include "gedit-prefs-manager.h"
 #include "gedit-debug.h"
 #include "gedit-utils.h"
 
@@ -77,17 +79,20 @@ struct _GeditGioDocumentLoaderPrivate
 
 	/* Handle for remote files */
 	GCancellable 	 *cancellable;
-	GFileInputStream *stream;
+	GInputStream	 *stream;
+	GeditSmartCharsetConverter *converter;
 
-	gchar            *buffer;
+	gchar             buffer[READ_CHUNK_SIZE];
 
 	GError           *error;
+
+	guint		  started_insert : 1;
 };
 
 G_DEFINE_TYPE(GeditGioDocumentLoader, gedit_gio_document_loader, GEDIT_TYPE_DOCUMENT_LOADER)
 
 static void
-gedit_gio_document_loader_finalize (GObject *object)
+gedit_gio_document_loader_dispose (GObject *object)
 {
 	GeditGioDocumentLoaderPrivate *priv;
 
@@ -97,19 +102,39 @@ gedit_gio_document_loader_finalize (GObject *object)
 	{
 		g_cancellable_cancel (priv->cancellable);
 		g_object_unref (priv->cancellable);
+		priv->cancellable = NULL;
 	}
-	
-	if (priv->stream)
+
+	if (priv->stream != NULL)
+	{
 		g_object_unref (priv->stream);
+		priv->stream = NULL;
+	}
 
-	g_free (priv->buffer);
+	if (priv->converter != NULL)
+	{
+		g_object_unref (priv->converter);
+		priv->converter = NULL;
+	}
 
-	if (priv->gfile)
+	if (priv->gfile != NULL)
+	{
 		g_object_unref (priv->gfile);
+		priv->gfile = NULL;
+	}
 
-	if (priv->error)
+	if (priv->error != NULL)
+	{
 		g_error_free (priv->error);
+		priv->error = NULL;
+	}
 
+	G_OBJECT_CLASS (gedit_gio_document_loader_parent_class)->dispose (object);
+}
+
+static void
+gedit_gio_document_loader_finalize (GObject *object)
+{
 	G_OBJECT_CLASS (gedit_gio_document_loader_parent_class)->finalize (object);
 }
 
@@ -119,6 +144,7 @@ gedit_gio_document_loader_class_init (GeditGioDocumentLoaderClass *klass)
 	GObjectClass *object_class = G_OBJECT_CLASS (klass);
 	GeditDocumentLoaderClass *loader_class = GEDIT_DOCUMENT_LOADER_CLASS (klass);
 
+	object_class->dispose = gedit_gio_document_loader_dispose;
 	object_class->finalize = gedit_gio_document_loader_finalize;
 
 	loader_class->load = gedit_gio_document_loader_load;
@@ -132,7 +158,10 @@ static void
 gedit_gio_document_loader_init (GeditGioDocumentLoader *gvloader)
 {
 	gvloader->priv = GEDIT_GIO_DOCUMENT_LOADER_GET_PRIVATE (gvloader);
+
+	gvloader->priv->converter = NULL;
 	gvloader->priv->error = NULL;
+	gvloader->priv->started_insert = FALSE;
 }
 
 static AsyncData *
@@ -140,7 +169,7 @@ async_data_new (GeditGioDocumentLoader *gvloader)
 {
 	AsyncData *async;
 	
-	async = g_new (AsyncData, 1);
+	async = g_slice_new (AsyncData);
 	async->loader = gvloader;
 	async->cancellable = g_object_ref (gvloader->priv->cancellable);
 	async->tried_mount = FALSE;
@@ -152,21 +181,64 @@ static void
 async_data_free (AsyncData *async)
 {
 	g_object_unref (async->cancellable);
-	g_free (async);
+	g_slice_free (AsyncData, async);
+}
+
+static const GeditEncoding *
+get_metadata_encoding (GeditDocumentLoader *loader)
+{
+	const GeditEncoding *enc = NULL;
+
+#ifdef G_OS_WIN32
+	gchar *charset;
+	const gchar *uri;
+
+	uri = gedit_document_loader_get_uri (loader);
+
+	charset = gedit_metadata_manager_get (uri, "encoding");
+
+	if (charset == NULL)
+		return NULL;
+
+	enc = gedit_encoding_get_from_charset (charset);
+
+	g_free (charset);
+#else
+	GFileInfo *info;
+
+	info = gedit_document_loader_get_info (loader);
+
+	/* check if the encoding was set in the metadata */
+	if (g_file_info_has_attribute (info, GEDIT_METADATA_ATTRIBUTE_ENCODING))
+	{
+		const gchar *charset;
+
+		charset = g_file_info_get_attribute_string (info,
+							    GEDIT_METADATA_ATTRIBUTE_ENCODING);
+
+		if (charset == NULL)
+			return NULL;
+		
+		enc = gedit_encoding_get_from_charset (charset);
+	}
+#endif
+
+	return enc;
 }
 
 static void
 remote_load_completed_or_failed (GeditGioDocumentLoader *gvloader, AsyncData *async)
 {
-	/* free the buffer */
-	g_free (gvloader->priv->buffer);
-	gvloader->priv->buffer = NULL;
+	GeditDocumentLoader *loader;
+
+	loader = GEDIT_DOCUMENT_LOADER (gvloader);
 
 	if (async)
 		async_data_free (async);
 		
 	if (gvloader->priv->stream)
-		g_input_stream_close_async (G_INPUT_STREAM (gvloader->priv->stream), G_PRIORITY_HIGH, NULL, NULL, NULL);
+		g_input_stream_close_async (G_INPUT_STREAM (gvloader->priv->stream),
+					    G_PRIORITY_HIGH, NULL, NULL, NULL);
 
 	gedit_document_loader_loading (GEDIT_DOCUMENT_LOADER (gvloader),
 				       TRUE,
@@ -184,6 +256,48 @@ async_failed (AsyncData *async, GError *error)
 }
 
 static void
+append_text_to_document (GeditDocumentLoader *loader,
+			 const gchar         *text,
+			 gint                 len)
+{
+	GeditDocument *doc = loader->document;
+	GtkTextIter end;
+
+	/* Insert text in the buffer */
+	gtk_text_buffer_get_end_iter (GTK_TEXT_BUFFER (doc), &end);
+	
+	gtk_text_buffer_insert (GTK_TEXT_BUFFER (doc), &end, text, len);
+}
+
+static void
+end_append_text_to_document (GeditDocumentLoader *loader)
+{
+	GtkTextIter start, end;
+
+	/* If the last char is a newline, remove it from the buffer (otherwise
+	   GtkTextView shows it as an empty line). See bug #324942. */
+	gtk_text_buffer_get_end_iter (GTK_TEXT_BUFFER (loader->document), &end);
+	start = end;
+
+	if (gtk_text_iter_backward_char (&start))
+	{
+		gunichar c;
+
+		c = gtk_text_iter_get_char (&start);
+
+		if (g_unichar_break_type (c) == G_UNICODE_BREAK_LINE_FEED)
+			gtk_text_buffer_delete (GTK_TEXT_BUFFER (loader->document),
+						&start, &end);
+	}
+
+	gtk_text_buffer_set_modified (GTK_TEXT_BUFFER (loader->document), FALSE);
+
+	gtk_source_buffer_end_not_undoable_action (GTK_SOURCE_BUFFER (loader->document));
+
+	GEDIT_GIO_DOCUMENT_LOADER (loader)->priv->started_insert = FALSE;
+}
+
+static void
 async_read_cb (GInputStream *stream,
 	       GAsyncResult *res,
 	       AsyncData    *async)
@@ -192,16 +306,17 @@ async_read_cb (GInputStream *stream,
 	GeditGioDocumentLoader *gvloader;
 	gssize bytes_read;
 	GError *error = NULL;
-	
+
+	gvloader = async->loader;
+
 	/* manually check cancelled state */
 	if (g_cancellable_is_cancelled (async->cancellable))
 	{
-		g_input_stream_close_async (stream, G_PRIORITY_HIGH, NULL, NULL, NULL);
-		async_data_free (async);
+		end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
+		remote_load_completed_or_failed (gvloader, async);
 		return;
 	}
 
-	gvloader = async->loader;
 	bytes_read = g_input_stream_read_finish (stream, res, &error);
 	
 	/* error occurred */
@@ -219,6 +334,7 @@ async_read_cb (GInputStream *stream,
 			     GEDIT_DOCUMENT_ERROR_TOO_BIG,
 			     "File too big");
 
+		end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
 		remote_load_completed_or_failed (gvloader, async);
 
 		return;
@@ -230,17 +346,19 @@ async_read_cb (GInputStream *stream,
 	/* end of the file, we are done! */
 	if (bytes_read == 0)
 	{
-		gedit_document_loader_update_document_contents (
-						GEDIT_DOCUMENT_LOADER (gvloader),
-						gvloader->priv->buffer,
-						gvloader->priv->bytes_read,
-						&gvloader->priv->error);
-		
+		GEDIT_DOCUMENT_LOADER (gvloader)->auto_detected_encoding =
+			gedit_smart_charset_converter_get_guessed (gvloader->priv->converter);
+
+		end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
 		remote_load_completed_or_failed (gvloader, async);
 
 		return;
 	}
 
+	append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader),
+				 gvloader->priv->buffer,
+				 bytes_read);
+
 	/* otherwise emit progress and read some more */
 
 	/* note that this signal blocks the read... check if it isn't
@@ -259,11 +377,20 @@ read_file_chunk (AsyncData *async)
 	GeditGioDocumentLoader *gvloader;
 	
 	gvloader = async->loader;
-	gvloader->priv->buffer = g_realloc (gvloader->priv->buffer,
-					    gvloader->priv->bytes_read + READ_CHUNK_SIZE);
+
+	if (!gvloader->priv->started_insert)
+	{
+		GeditDocumentLoader *loader;
+
+		loader = GEDIT_DOCUMENT_LOADER (gvloader);
+
+		/* Init the undoable action */
+		gtk_source_buffer_begin_not_undoable_action (GTK_SOURCE_BUFFER (loader->document));
+		gvloader->priv->started_insert = TRUE;
+	}
 
 	g_input_stream_read_async (G_INPUT_STREAM (gvloader->priv->stream),
-				   gvloader->priv->buffer + gvloader->priv->bytes_read,
+				   gvloader->priv->buffer,
 				   READ_CHUNK_SIZE,
 				   G_PRIORITY_HIGH,
 				   async->cancellable,
@@ -271,14 +398,36 @@ read_file_chunk (AsyncData *async)
 				   async);
 }
 
+static GSList *
+get_candidate_encodings (GeditGioDocumentLoader *gvloader)
+{
+	const GeditEncoding *metadata;
+	GSList *encodings = NULL;
+
+	encodings = gedit_prefs_manager_get_auto_detected_encodings ();
+
+	metadata = get_metadata_encoding (GEDIT_DOCUMENT_LOADER (gvloader));
+	if (metadata != NULL)
+	{
+		encodings = g_slist_prepend (encodings, (gpointer)metadata);
+	}
+
+	return encodings;
+}
+
 static void
 finish_query_info (AsyncData *async)
 {
 	GeditGioDocumentLoader *gvloader;
+	GeditDocumentLoader *loader;
+	GInputStream *utf8_stream;
+	GInputStream *conv_stream;
 	GFileInfo *info;
+	GSList *candidate_encodings;
 	
 	gvloader = async->loader;
-	info = GEDIT_DOCUMENT_LOADER (gvloader)->info;
+	loader = GEDIT_DOCUMENT_LOADER (gvloader);
+	info = loader->info;
 
 	/* if it's not a regular file, error out... */
 	if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_STANDARD_TYPE) &&
@@ -294,6 +443,29 @@ finish_query_info (AsyncData *async)
 		return;
 	}
 
+	/* Get the candidate encodings */
+	if (loader->encoding == NULL)
+	{
+		candidate_encodings = get_candidate_encodings (gvloader);
+	}
+	else
+	{
+		candidate_encodings = g_slist_prepend (candidate_encodings,
+						       (gpointer)loader->encoding);
+	}
+
+	gvloader->priv->converter = gedit_smart_charset_converter_new (candidate_encodings);
+	g_slist_free (candidate_encodings);
+	
+	conv_stream = g_converter_input_stream_new (gvloader->priv->stream,
+						    G_CONVERTER (gvloader->priv->converter));
+	g_object_unref (gvloader->priv->stream);
+
+	utf8_stream = g_utf8_input_stream_new (conv_stream);
+	g_object_unref (conv_stream);
+
+	gvloader->priv->stream = utf8_stream;
+
 	/* start reading */
 	read_file_chunk (async);
 }
@@ -401,9 +573,11 @@ async_read_ready_callback (GObject      *source,
 		async_data_free (async);
 		return;
 	}
-	
+
 	gvloader = async->loader;
-	gvloader->priv->stream = g_file_read_finish (gvloader->priv->gfile, res, &error);
+	
+	gvloader->priv->stream = G_INPUT_STREAM (g_file_read_finish (gvloader->priv->gfile,
+								     res, &error));
 
 	if (!gvloader->priv->stream)
 	{		
diff --git a/gedit/gedit-smart-charset-converter.c b/gedit/gedit-smart-charset-converter.c
new file mode 100644
index 0000000..e5b7633
--- /dev/null
+++ b/gedit/gedit-smart-charset-converter.c
@@ -0,0 +1,317 @@
+/*
+ * gedit-smart-charset-converter.c
+ * This file is part of gedit
+ *
+ * Copyright (C) 2009 - Ignacio Casal Quinteiro
+ *
+ * gedit is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * gedit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with gedit; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, 
+ * Boston, MA  02110-1301  USA
+ */
+
+#include "gedit-smart-charset-converter.h"
+#include "gedit-debug.h"
+
+#include <gio/gio.h>
+#include <glib/gi18n.h>
+
+#define GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE(object)(G_TYPE_INSTANCE_GET_PRIVATE((object), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverterPrivate))
+
+struct _GeditSmartCharsetConverterPrivate
+{
+	GCharsetConverter *charset_conv;
+
+	GSList *encodings;
+	GSList *current_encoding;
+
+	guint is_utf8 : 1;
+	guint use_first : 1;
+};
+
+static void gedit_smart_charset_converter_iface_init    (GConverterIface *iface);
+
+G_DEFINE_TYPE_WITH_CODE (GeditSmartCharsetConverter, gedit_smart_charset_converter,
+			 G_TYPE_OBJECT,
+			 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
+						gedit_smart_charset_converter_iface_init))
+
+static void
+gedit_smart_charset_converter_finalize (GObject *object)
+{
+	GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (object);
+
+	g_slist_free (smart->priv->encodings);
+
+	gedit_debug_message (DEBUG_UTILS, "finalizing smart charset converter");
+
+	G_OBJECT_CLASS (gedit_smart_charset_converter_parent_class)->finalize (object);
+}
+
+static void
+gedit_smart_charset_converter_dispose (GObject *object)
+{
+	GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (object);
+
+	if (smart->priv->charset_conv != NULL)
+	{
+		g_object_unref (smart->priv->charset_conv);
+		smart->priv->charset_conv = NULL;
+	}
+
+	gedit_debug_message (DEBUG_UTILS, "disposing smart charset converter");
+
+	G_OBJECT_CLASS (gedit_smart_charset_converter_parent_class)->dispose (object);
+}
+
+static void
+gedit_smart_charset_converter_class_init (GeditSmartCharsetConverterClass *klass)
+{
+	GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+	object_class->finalize = gedit_smart_charset_converter_finalize;
+	object_class->dispose = gedit_smart_charset_converter_dispose;
+
+	g_type_class_add_private (object_class, sizeof (GeditSmartCharsetConverterPrivate));
+}
+
+static void
+gedit_smart_charset_converter_init (GeditSmartCharsetConverter *self)
+{
+	self->priv = GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE (self);
+
+	self->priv->charset_conv = NULL;
+	self->priv->encodings = NULL;
+	self->priv->current_encoding = NULL;
+	self->priv->is_utf8 = FALSE;
+
+	gedit_debug_message (DEBUG_UTILS, "initializing smart charset converter");
+}
+
+static const GeditEncoding *
+get_encoding (GeditSmartCharsetConverter *smart)
+{
+	if (smart->priv->current_encoding == NULL)
+	{
+		smart->priv->current_encoding = smart->priv->encodings;
+	}
+	else
+	{
+		smart->priv->current_encoding = g_slist_next (smart->priv->current_encoding);
+	}
+
+	if (smart->priv->current_encoding != NULL)
+		return (const GeditEncoding *)smart->priv->current_encoding->data;
+
+	/* If we tried all encodings, we return the first encoding */
+	smart->priv->use_first = TRUE;
+	smart->priv->current_encoding = smart->priv->encodings;
+
+	return (const GeditEncoding *)smart->priv->current_encoding->data;
+}
+
+static GCharsetConverter *
+guess_encoding (GeditSmartCharsetConverter *smart,
+		const void                 *inbuf,
+		gsize                       inbuf_size)
+{
+	GCharsetConverter *conv = NULL;
+
+	/* We just check the first block */
+	while (TRUE)
+	{
+		const GeditEncoding *enc;
+		gchar *conv_text;
+		gsize written;
+		GError *err = NULL;
+
+		/* We get the first encoding we have in the list */
+		enc = get_encoding (smart);
+
+		/* if it is NULL we didn't guess anything */
+		if (enc == NULL)
+		{
+			break;
+		}
+
+		if (enc == gedit_encoding_get_utf8 ())
+		{
+			if (g_utf8_validate (inbuf, inbuf_size, NULL))
+			{
+				smart->priv->is_utf8 = TRUE;
+				break;
+			}
+			else
+			{
+				continue;
+			}
+		}
+		
+		/* Let's try converting the input to one encoding, if there
+		   was no error or the error was because of needed more input
+		   we create the charset converter for that encoding */
+		conv_text = g_convert (inbuf,
+				       inbuf_size,
+				       "UTF-8",
+				       gedit_encoding_get_charset (enc),
+				       NULL,
+				       &written,
+				       &err);
+
+		g_free (conv_text);
+
+		if (err != NULL)
+		{
+			/* FIXME: Is this ok or we should just skip it? */
+			if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
+			{
+				g_error_free (err);
+
+				conv = g_charset_converter_new ("UTF-8",
+								gedit_encoding_get_charset (enc),
+								NULL);
+				break;
+			}
+
+			g_error_free (err);
+		}
+		else
+		{
+			conv = g_charset_converter_new ("UTF-8",
+							gedit_encoding_get_charset (enc),
+							NULL);
+			break;
+		}
+	}
+
+	return conv;
+}
+
+static GConverterResult
+gedit_smart_charset_converter_convert (GConverter *converter,
+				       const void *inbuf,
+				       gsize       inbuf_size,
+				       void       *outbuf,
+				       gsize       outbuf_size,
+				       GConverterFlags flags,
+				       gsize      *bytes_read,
+				       gsize      *bytes_written,
+				       GError    **error)
+{
+	GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (converter);
+
+	if (inbuf_size == 0)
+	{
+		if (flags & G_CONVERTER_INPUT_AT_END)
+			return G_CONVERTER_FINISHED;
+
+		if (flags & G_CONVERTER_FLUSH)
+			return G_CONVERTER_FLUSHED;
+
+		g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
+				     _("Incomplete multibyte sequence in input"));
+		return G_CONVERTER_ERROR;
+	}
+
+	/* Guess the encoding if we didn't make it yet */
+	if (smart->priv->charset_conv == NULL &&
+	    !smart->priv->is_utf8)
+	{
+		smart->priv->charset_conv = guess_encoding (smart, inbuf, inbuf_size);
+
+		/* If we still have the previous case is that we didn't guess
+		   anything */
+		if (smart->priv->charset_conv == NULL &&
+		    !smart->priv->is_utf8)
+		{
+			g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+					     _("It is not possible to detect the encoding automatically"));
+			return G_CONVERTER_ERROR;
+		}
+	}
+
+	/* Now if the encoding is utf8 just redirect the input to the output */
+	if (smart->priv->is_utf8)
+	{
+		gsize size;
+
+		size = MIN (inbuf_size, outbuf_size);
+
+		memcpy (outbuf, inbuf, size);
+		*bytes_read = size;
+		*bytes_written = size;
+
+		return G_CONVERTER_CONVERTED;
+	}
+
+	/* If we reached here is because we need to convert the text so, we
+	   convert it with the charset converter */
+	return g_converter_convert (G_CONVERTER (smart->priv->charset_conv),
+				    inbuf,
+				    inbuf_size,
+				    outbuf,
+				    outbuf_size,
+				    flags,
+				    bytes_read,
+				    bytes_written,
+				    error);
+}
+
+static void
+gedit_smart_charset_converter_reset (GConverter *converter)
+{
+	GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (converter);
+
+	smart->priv->current_encoding = NULL;
+	smart->priv->is_utf8 = FALSE;
+
+	if (smart->priv->charset_conv != NULL)
+	{
+		g_object_unref (smart->priv->charset_conv);
+		smart->priv->charset_conv = NULL;
+	}
+}
+
+static void
+gedit_smart_charset_converter_iface_init (GConverterIface *iface)
+{
+	iface->convert = gedit_smart_charset_converter_convert;
+	iface->reset = gedit_smart_charset_converter_reset;
+}
+
+GeditSmartCharsetConverter *
+gedit_smart_charset_converter_new (GSList *candidate_encodings)
+{
+	GeditSmartCharsetConverter *smart;
+
+	g_return_val_if_fail (candidate_encodings != NULL, NULL);
+
+	smart = g_object_new (GEDIT_TYPE_SMART_CHARSET_CONVERTER, NULL);
+
+	smart->priv->encodings = g_slist_copy (candidate_encodings);
+
+	return smart;
+}
+
+const GeditEncoding *
+gedit_smart_charset_converter_get_guessed (GeditSmartCharsetConverter *smart)
+{
+	g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), NULL);
+
+	if (smart->priv->current_encoding != NULL)
+	{
+		return (const GeditEncoding *)smart->priv->current_encoding->data;
+	}
+
+	return NULL;
+}
diff --git a/gedit/gedit-smart-charset-converter.h b/gedit/gedit-smart-charset-converter.h
new file mode 100644
index 0000000..06e621d
--- /dev/null
+++ b/gedit/gedit-smart-charset-converter.h
@@ -0,0 +1,64 @@
+/*
+ * gedit-smart-charset-converter.h
+ * This file is part of gedit
+ *
+ * Copyright (C) 2009 - Ignacio Casal Quinteiro
+ *
+ * gedit is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * gedit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with gedit; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, 
+ * Boston, MA  02110-1301  USA
+ */
+
+#ifndef __GEDIT_SMART_CHARSET_CONVERTER_H__
+#define __GEDIT_SMART_CHARSET_CONVERTER_H__
+
+#include <glib-object.h>
+
+#include "gedit-encodings.h"
+
+G_BEGIN_DECLS
+
+#define GEDIT_TYPE_SMART_CHARSET_CONVERTER		(gedit_smart_charset_converter_get_type ())
+#define GEDIT_SMART_CHARSET_CONVERTER(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverter))
+#define GEDIT_SMART_CHARSET_CONVERTER_CONST(obj)	(G_TYPE_CHECK_INSTANCE_CAST ((obj), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverter const))
+#define GEDIT_SMART_CHARSET_CONVERTER_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverterClass))
+#define GEDIT_IS_SMART_CHARSET_CONVERTER(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GEDIT_TYPE_SMART_CHARSET_CONVERTER))
+#define GEDIT_IS_SMART_CHARSET_CONVERTER_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GEDIT_TYPE_SMART_CHARSET_CONVERTER))
+#define GEDIT_SMART_CHARSET_CONVERTER_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverterClass))
+
+typedef struct _GeditSmartCharsetConverter		GeditSmartCharsetConverter;
+typedef struct _GeditSmartCharsetConverterClass		GeditSmartCharsetConverterClass;
+typedef struct _GeditSmartCharsetConverterPrivate	GeditSmartCharsetConverterPrivate;
+
+struct _GeditSmartCharsetConverter
+{
+	GObject parent;
+	
+	GeditSmartCharsetConverterPrivate *priv;
+};
+
+struct _GeditSmartCharsetConverterClass
+{
+	GObjectClass parent_class;
+};
+
+GType gedit_smart_charset_converter_get_type (void) G_GNUC_CONST;
+
+GeditSmartCharsetConverter	*gedit_smart_charset_converter_new		(GSList *candidate_encodings);
+
+const GeditEncoding		*gedit_smart_charset_converter_get_guessed	(GeditSmartCharsetConverter *smart);
+
+G_END_DECLS
+
+#endif /* __GEDIT_SMART_CHARSET_CONVERTER_H__ */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]