[gedit] Fix smart converter encoding guessing and add test case



commit 61a8c6f50d09b12f76ad422c9d4f87c503cd190b
Author: Jesse van den Kieboom <jessevdk gnome org>
Date:   Tue Jan 12 17:37:54 2010 +0100

    Fix smart converter encoding guessing and add test case

 gedit/gedit-document.c                |   20 +++--
 gedit/gedit-document.h                |    3 +
 gedit/gedit-gio-document-loader.c     |    6 +-
 gedit/gedit-io-error-message-area.c   |   84 +++++++++++++++----
 gedit/gedit-smart-charset-converter.c |  104 +++++++++++++++++-------
 gedit/gedit-tab.c                     |   94 ++++++++++++++++------
 tests/smart-converter.c               |  143 +++++++++++++++++++++++++++++++--
 7 files changed, 366 insertions(+), 88 deletions(-)
---
diff --git a/gedit/gedit-document.c b/gedit/gedit-document.c
index 6078101..d7d09be 100644
--- a/gedit/gedit-document.c
+++ b/gedit/gedit-document.c
@@ -42,6 +42,7 @@
 
 #include "gedit-prefs-manager-app.h"
 #include "gedit-document.h"
+#include "gedit-convert.h"
 #include "gedit-debug.h"
 #include "gedit-utils.h"
 #include "gedit-language-manager.h"
@@ -87,8 +88,6 @@ static void	gedit_document_save_real	(GeditDocument          *doc,
 						 const gchar            *uri,
 						 const GeditEncoding    *encoding,
 						 GeditDocumentSaveFlags  flags);
-static void	gedit_document_set_readonly	(GeditDocument *doc,
-						 gboolean       readonly);
 static void	to_search_region_range 		(GeditDocument *doc,
 						 GtkTextIter   *start, 
 						 GtkTextIter   *end);
@@ -1046,9 +1045,16 @@ set_readonly (GeditDocument *doc,
 	doc->priv->readonly = readonly;
 }
 
-static void
-gedit_document_set_readonly (GeditDocument *doc,
-			     gboolean       readonly)
+/**
+ * gedit_document_set_readonly:
+ * @doc: a #GeditDocument
+ * @readonly: %TRUE to se the document as read-only
+ *
+ * If @readonly is %TRUE sets @doc as read-only.
+ */
+void
+_gedit_document_set_readonly (GeditDocument *doc,
+			      gboolean       readonly)
 {
 	gedit_debug (DEBUG_DOCUMENT);
 
@@ -1119,7 +1125,7 @@ document_loader_loaded (GeditDocumentLoader *loader,
 			GeditDocument       *doc)
 {
 	/* load was successful */
-	if (error == NULL)
+	if (error == NULL || error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
 	{
 		GtkTextIter iter;
 		GFileInfo *info;
@@ -1360,7 +1366,7 @@ document_saver_saving (GeditDocumentSaver *saver,
 
 			g_get_current_time (&doc->priv->time_of_last_save_or_load);
 
-			gedit_document_set_readonly (doc, FALSE);
+			_gedit_document_set_readonly (doc, FALSE);
 
 			gtk_text_buffer_set_modified (GTK_TEXT_BUFFER (doc),
 						      FALSE);
diff --git a/gedit/gedit-document.h b/gedit/gedit-document.h
index 15aa314..0b8fde9 100644
--- a/gedit/gedit-document.h
+++ b/gedit/gedit-document.h
@@ -267,6 +267,9 @@ void		 gedit_document_set_metadata	(GeditDocument *doc,
 /* 
  * Non exported functions
  */
+void		 _gedit_document_set_readonly 	(GeditDocument       *doc,
+						 gboolean             readonly);
+
 glong		 _gedit_document_get_seconds_since_last_save_or_load 
 						(GeditDocument       *doc);
 
diff --git a/gedit/gedit-gio-document-loader.c b/gedit/gedit-gio-document-loader.c
index 092eb0b..22c7c20 100644
--- a/gedit/gedit-gio-document-loader.c
+++ b/gedit/gedit-gio-document-loader.c
@@ -236,7 +236,7 @@ remote_load_completed_or_failed (GeditGioDocumentLoader *gvloader, AsyncData *as
 
 	if (async)
 		async_data_free (async);
-		
+
 	if (gvloader->priv->stream)
 		g_input_stream_close_async (G_INPUT_STREAM (gvloader->priv->stream),
 					    G_PRIORITY_HIGH, NULL, NULL, NULL);
@@ -359,11 +359,11 @@ async_read_cb (GInputStream *stream,
 		if ((gedit_smart_charset_converter_get_num_fallbacks (gvloader->priv->converter) != 0) &&
 		    gvloader->priv->error == NULL)
 		{
-			/* FIXME: Maybe check for some specific error ? */
 			g_set_error_literal (&gvloader->priv->error,
 					     GEDIT_DOCUMENT_ERROR,
 					     GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK,
-					     _("There was a problem blah blah")); /* FIXME */
+					     "There was a conversion error and it was "
+					     "needed to use a fallback char");
 		}
 
 		end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
diff --git a/gedit/gedit-io-error-message-area.c b/gedit/gedit-io-error-message-area.c
index 8fb9e86..f6cd3b2 100644
--- a/gedit/gedit-io-error-message-area.c
+++ b/gedit/gedit-io-error-message-area.c
@@ -540,7 +540,8 @@ create_option_menu (GtkWidget *message_area, GtkWidget *vbox)
 
 static GtkWidget *
 create_conversion_error_message_area (const gchar *primary_text,
-				      const gchar *secondary_text)
+				      const gchar *secondary_text,
+				      gboolean     edit_anyway)
 {
 	GtkWidget *message_area;
 	GtkWidget *hbox_content;
@@ -558,21 +559,53 @@ create_conversion_error_message_area (const gchar *primary_text,
 						       _("_Retry"),
 						       GTK_STOCK_REDO,
 						       GTK_RESPONSE_OK);
-	gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
-				       GTK_STOCK_CANCEL,
-				       GTK_RESPONSE_CANCEL);
+
+	if (edit_anyway)
+	{
+		gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
+					       _("Edit Any_way"),
+					       GTK_RESPONSE_YES);
+		gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
+					       _("D_on't Edit"),
+					       GTK_RESPONSE_CANCEL);
+	}
+	else
+	{
+		gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
+					       GTK_STOCK_CANCEL,
+					       GTK_RESPONSE_CANCEL);
+	}
 #else
 	message_area = gtk_info_bar_new ();
+
 	info_bar_add_stock_button_with_text (GTK_INFO_BAR (message_area),
 					     _("_Retry"),
 					     GTK_STOCK_REDO,
 					     GTK_RESPONSE_OK);
 
-	gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
-				 GTK_STOCK_CANCEL,
-				 GTK_RESPONSE_CANCEL);
-	gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
-				       GTK_MESSAGE_ERROR);
+	if (edit_anyway)
+	{
+		gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
+		/* Translators: the access key chosen for this string should be
+		 different from other main menu access keys (Open, Edit, View...) */
+					 _("Edit Any_way"),
+					 GTK_RESPONSE_YES);
+		gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
+		/* Translators: the access key chosen for this string should be
+		 different from other main menu access keys (Open, Edit, View...) */
+					 _("D_on't Edit"),
+					 GTK_RESPONSE_CANCEL);
+		gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
+					       GTK_MESSAGE_WARNING);
+	}
+	else
+	{
+		gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
+					 GTK_STOCK_CANCEL,
+					 GTK_RESPONSE_CANCEL);
+		gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
+					       GTK_MESSAGE_ERROR);
+	}
 #endif
 
 	hbox_content = gtk_hbox_new (FALSE, 8);
@@ -628,6 +661,7 @@ gedit_conversion_error_while_loading_message_area_new (
 	gchar *uri_for_display;
 	gchar *temp_uri_for_display;
 	GtkWidget *message_area;
+	gboolean edit_anyway = FALSE;
 	
 	g_return_val_if_fail (uri != NULL, NULL);
 	g_return_val_if_fail (error != NULL, NULL);
@@ -640,8 +674,8 @@ gedit_conversion_error_while_loading_message_area_new (
 	 * though the dialog uses wrapped text, if the URI doesn't contain
 	 * white space then the text-wrapping code is too stupid to wrap it.
 	 */
-	temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri, 
-								MAX_URI_IN_DIALOG_LENGTH);								
+	temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri,
+								MAX_URI_IN_DIALOG_LENGTH);
 	g_free (full_formatted_uri);
 	
 	uri_for_display = g_markup_printf_escaped ("<i>%s</i>", temp_uri_for_display);
@@ -652,18 +686,29 @@ gedit_conversion_error_while_loading_message_area_new (
 	else
 		encoding_name = g_strdup ("UTF-8");
 
-	if (error->domain == GEDIT_CONVERT_ERROR)
+	if (error->domain == GEDIT_CONVERT_ERROR &&
+	    error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED)
 	{
-		g_return_val_if_fail (error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED, NULL);
-		
 		error_message = g_strdup_printf (_("Could not open the file %s."),
-							 uri_for_display);
+						 uri_for_display);
 		message_details = g_strconcat (_("gedit has not been able to detect "
 				               "the character coding."), "\n", 
 				               _("Please check that you are not trying to open a binary file."), "\n",
 					       _("Select a character coding from the menu and try again."), NULL);
 	}
-	else 
+	else if (error->domain == GEDIT_DOCUMENT_ERROR &&
+	         error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
+	{
+		error_message = g_strdup_printf (_("There was a problem opening the file %s."),
+						 uri_for_display);
+		message_details = g_strconcat (_("The file you opened has some invalid characters, "
+					       "if you continue editing this file you could make this "
+					       "document useless."), "\n",
+					       _("You can also choose another character encoding and try again."),
+					       NULL);
+		edit_anyway = TRUE;
+	}
+	else
 	{
 		
 		error_message = g_strdup_printf (_("Could not open the file %s using the %s character coding."),
@@ -673,7 +718,9 @@ gedit_conversion_error_while_loading_message_area_new (
 					       _("Select a different character coding from the menu and try again."), NULL);
 	}
 	
-	message_area = create_conversion_error_message_area (error_message, message_details);
+	message_area = create_conversion_error_message_area (error_message,
+							     message_details,
+							     edit_anyway);
 
 	g_free (uri_for_display);
 	g_free (encoding_name);
@@ -726,7 +773,8 @@ gedit_conversion_error_while_saving_message_area_new (
 	
 	message_area = create_conversion_error_message_area (
 								error_message,
-								message_details);
+								message_details,
+								FALSE);
 
 	g_free (uri_for_display);
 	g_free (encoding_name);
diff --git a/gedit/gedit-smart-charset-converter.c b/gedit/gedit-smart-charset-converter.c
index 2528c6e..6d4c947 100644
--- a/gedit/gedit-smart-charset-converter.c
+++ b/gedit/gedit-smart-charset-converter.c
@@ -121,6 +121,73 @@ get_encoding (GeditSmartCharsetConverter *smart)
 	return (const GeditEncoding *)smart->priv->current_encoding->data;
 }
 
+static gboolean
+try_convert (GCharsetConverter *converter,
+             const void        *inbuf,
+             gsize              inbuf_size)
+{
+	GError *err;
+	gsize bytes_read, nread;
+	gsize bytes_written, nwritten;
+	GConverterResult res;
+	gchar *out;
+	gboolean ret;
+	gsize out_size;
+
+	err = NULL;
+	nread = 0;
+	nwritten = 0;
+	out_size = inbuf_size * 4;
+	out = g_malloc (out_size);
+
+	do
+	{
+		res = g_converter_convert (G_CONVERTER (converter),
+		                           inbuf + nread,
+		                           inbuf_size - nread,
+		                           out + nwritten,
+		                           out_size - nwritten,
+		                           G_CONVERTER_INPUT_AT_END,
+		                           &bytes_read,
+		                           &bytes_written,
+		                           &err);
+
+		nread += bytes_read;
+		nwritten += bytes_written;
+	} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR && err == NULL);
+
+	if (err != NULL)
+	{
+		if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
+		{
+			/* FIXME We can get partial input while guessing the
+			   encoding because we just take some amount of text
+			   to guess from. */
+			ret = TRUE;
+		}
+		else
+		{
+			ret = FALSE;
+		}
+
+		g_error_free (err);
+	}
+	else
+	{
+		ret = TRUE;
+	}
+
+	/* FIXME: Check the remainder? */
+	if (ret == TRUE && !g_utf8_validate (out, nwritten, NULL))
+	{
+		ret = FALSE;
+	}
+
+	g_free (out);
+
+	return ret;
+}
+
 static GCharsetConverter *
 guess_encoding (GeditSmartCharsetConverter *smart,
 		const void                 *inbuf,
@@ -136,10 +203,6 @@ guess_encoding (GeditSmartCharsetConverter *smart,
 	while (TRUE)
 	{
 		const GeditEncoding *enc;
-		gchar outbuf[inbuf_size];
-		GConverterResult ret;
-		gsize read, written;
-		GError *err = NULL;
 
 		if (conv != NULL)
 		{
@@ -147,7 +210,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
 			conv = NULL;
 		}
 
-		/* We get the first encoding we have in the list */
+		/* We get an encoding from the list */
 		enc = get_encoding (smart);
 
 		/* if it is NULL we didn't guess anything */
@@ -168,7 +231,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
 				break;
 			}
 
-			/* Check if the end is just less than one char */
+			/* Check if the end is less than one char */
 			remainder = inbuf_size - (end - (gchar *)inbuf);
 			if (remainder < 6)
 			{
@@ -189,28 +252,8 @@ guess_encoding (GeditSmartCharsetConverter *smart,
 			break;
 		}
 
-		ret = g_converter_convert (G_CONVERTER (conv),
-					   inbuf,
-					   inbuf_size,
-					   outbuf,
-					   inbuf_size,
-					   0,
-					   &read,
-					   &written,
-					   &err);
-
-		if (err != NULL)
-		{
-			/* FIXME: Is this ok or should we just skip it? */
-			if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
-			{
-				g_error_free (err);
-				break;
-			}
-
-			g_error_free (err);
-		}
-		else
+		/* Try to convert */
+		if (try_convert (conv, inbuf, inbuf_size))
 		{
 			break;
 		}
@@ -218,6 +261,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
 
 	if (conv != NULL)
 	{
+		g_converter_reset (G_CONVERTER (conv));
 		g_charset_converter_set_use_fallback (conv, TRUE);
 	}
 
@@ -343,5 +387,9 @@ gedit_smart_charset_converter_get_num_fallbacks (GeditSmartCharsetConverter *sma
 {
 	g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), FALSE);
 
+	if (smart->priv->charset_conv == NULL)
+		return FALSE;
+
 	return g_charset_converter_get_num_fallbacks (smart->priv->charset_conv) != 0;
 }
+
diff --git a/gedit/gedit-tab.c b/gedit/gedit-tab.c
index de5325e..964a6b7 100644
--- a/gedit/gedit-tab.c
+++ b/gedit/gedit-tab.c
@@ -503,41 +503,57 @@ conversion_loading_error_message_area_response (GtkWidget        *message_area,
 						GeditTab         *tab)
 {
 	GeditDocument *doc;
+	GeditView *view;
 	gchar *uri;
+	const GeditEncoding *encoding;
 
 	doc = gedit_tab_get_document (tab);
 	g_return_if_fail (GEDIT_IS_DOCUMENT (doc));
 
+	view = gedit_tab_get_view (tab);
+	g_return_if_fail (GEDIT_IS_VIEW (view));
+
 	uri = gedit_document_get_uri (doc);
 	g_return_if_fail (uri != NULL);
 
-	if (response_id == GTK_RESPONSE_OK)
+	switch (response_id)
 	{
-		const GeditEncoding *encoding;
+		case GTK_RESPONSE_OK:
+			encoding = gedit_conversion_error_message_area_get_encoding (
+					GTK_WIDGET (message_area));
 
-		encoding = gedit_conversion_error_message_area_get_encoding (
-				GTK_WIDGET (message_area));
+			g_return_if_fail (encoding != NULL);
 
-		g_return_if_fail (encoding != NULL);
-
-		set_message_area (tab, NULL);
-		gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING);
+			set_message_area (tab, NULL);
+			gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING);
 
-		tab->priv->tmp_encoding = encoding;
+			tab->priv->tmp_encoding = encoding;
 
-		g_return_if_fail (tab->priv->auto_save_timeout <= 0);
+			if (tab->priv->auto_save_timeout > 0)
+				remove_auto_save_timeout (tab);
 
-		gedit_document_load (doc,
-				     uri,
-				     encoding,
-				     tab->priv->tmp_line_pos,
-				     FALSE);
-	}
-	else
-	{
-		_gedit_recent_remove (GEDIT_WINDOW (gtk_widget_get_toplevel (GTK_WIDGET (tab))), uri);
+			gedit_document_load (doc,
+					     uri,
+					     encoding,
+					     tab->priv->tmp_line_pos,
+					     FALSE);
+			break;
+		case GTK_RESPONSE_YES:
+			/* This means that we want to edit the document anyway */
+			set_message_area (tab, NULL);
+			tab->priv->not_editable = FALSE;
+			gtk_text_view_set_editable (GTK_TEXT_VIEW (view),
+						    TRUE);
+			break;
+		case GTK_RESPONSE_CANCEL:
+			/* We don't want to edit the document just show it */
+			set_message_area (tab, NULL);
+			break;
+		default:
+			_gedit_recent_remove (GEDIT_WINDOW (gtk_widget_get_toplevel (GTK_WIDGET (tab))), uri);
 
-		remove_tab (tab);
+			remove_tab (tab);
+			break;
 	}
 
 	g_free (uri);
@@ -927,7 +943,8 @@ document_loaded (GeditDocument *document,
 	location = gedit_document_get_location (document);
 	uri = gedit_document_get_uri (document);
 
-	if (error != NULL)
+	/* if the error is CONVERSION FALLBACK don't treat it as a normal error */
+	if (error != NULL && error->code != GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
 	{
 		if (tab->priv->state == GEDIT_TAB_STATE_LOADING)
 			gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING_ERROR);
@@ -979,10 +996,7 @@ document_loaded (GeditDocument *document,
 		else
 		{
 			g_return_if_fail ((error->domain == G_CONVERT_ERROR) ||
-			      		  (error->domain == GEDIT_CONVERT_ERROR));
-
-			/* FIXME: Check for GEDIT_CONVERT_ERROR_FALLBACK_USED
-			  and set the right message area */
+					  (error->domain == GEDIT_CONVERT_ERROR));
 
 			// TODO: different error messages if tab->priv->state == GEDIT_TAB_STATE_REVERTING?
 			// note that while reverting encoding should be ok, so this is unlikely to happen
@@ -1028,6 +1042,36 @@ document_loaded (GeditDocument *document,
 				   mime);
 		g_free (mime);
 
+		if (error && error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
+		{
+			GtkWidget *emsg;
+
+			//_gedit_document_set_readonly (document, TRUE);
+			tab->priv->not_editable = TRUE;
+
+			emsg = gedit_conversion_error_while_loading_message_area_new (
+									uri,
+									tab->priv->tmp_encoding,
+									error);
+
+			set_message_area (tab, emsg);
+
+			g_signal_connect (emsg,
+					  "response",
+					  G_CALLBACK (conversion_loading_error_message_area_response),
+					  tab);
+
+#if !GTK_CHECK_VERSION (2, 17, 1)
+			gedit_message_area_set_default_response (GEDIT_MESSAGE_AREA (emsg),
+								 GTK_RESPONSE_CANCEL);
+#else
+			gtk_info_bar_set_default_response (GTK_INFO_BAR (emsg),
+							   GTK_RESPONSE_CANCEL);
+#endif
+
+			gtk_widget_show (emsg);
+		}
+
 		/* Scroll to the cursor when the document is loaded */
 		gedit_view_scroll_to_cursor (GEDIT_VIEW (tab->priv->view));
 
diff --git a/tests/smart-converter.c b/tests/smart-converter.c
index 96d2c45..f9e8b9f 100644
--- a/tests/smart-converter.c
+++ b/tests/smart-converter.c
@@ -28,6 +28,94 @@
 #include <string.h>
 
 #define TEXT_TO_CONVERT "this is some text to make the tests"
+#define TEXT_TO_GUESS "hello \xe6\x96\x87 world"
+
+static void
+print_hex (gchar *ptr, gint len)
+{
+	gint i;
+
+	for (i = 0; i < len; ++i)
+	{
+		g_printf ("\\x%02x", (unsigned char)ptr[i]);
+	}
+
+	g_printf ("\n");
+}
+
+static gchar *
+get_encoded_text (const gchar         *text,
+                  gsize                nread,
+		  const GeditEncoding *to,
+		  const GeditEncoding *from,
+		  gsize               *bytes_written_aux,
+		  gboolean             care_about_error)
+{
+	GCharsetConverter *converter;
+	gchar *out, *out_aux;
+	gsize bytes_read, bytes_read_aux;
+	gsize bytes_written;
+	GConverterResult res;
+	GError *err;
+
+	converter = g_charset_converter_new (gedit_encoding_get_charset (to),
+					     gedit_encoding_get_charset (from),
+					     NULL);
+
+	out = g_malloc (200);
+	out_aux = g_malloc (200);
+	err = NULL;
+	bytes_read_aux = 0;
+	*bytes_written_aux = 0;
+
+	if (nread == -1)
+	{
+		nread = strlen (text);
+	}
+
+	do
+	{
+		res = g_converter_convert (G_CONVERTER (converter),
+		                           text + bytes_read_aux,
+		                           nread,
+		                           out_aux,
+		                           200,
+		                           G_CONVERTER_INPUT_AT_END,
+		                           &bytes_read,
+		                           &bytes_written,
+		                           &err);
+		memcpy (out + *bytes_written_aux, out_aux, bytes_written);
+		bytes_read_aux += bytes_read;
+		*bytes_written_aux += bytes_written;
+		nread -= bytes_read;
+	} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR);
+
+	if (care_about_error)
+	{
+		g_assert_no_error (err);
+	}
+	else if (err)
+	{
+		g_printf ("** You don't care, but there was an error: %s", err->message);
+		return NULL;
+	}
+
+	out[*bytes_written_aux] = '\0';
+
+	if (!g_utf8_validate (out, *bytes_written_aux, NULL) && !care_about_error)
+	{
+		if (!care_about_error)
+		{
+			return NULL;
+		}
+		else
+		{
+			g_assert_not_reached ();
+		}
+	}
+
+	return out;
+}
 
 static GSList *
 get_all_encodings ()
@@ -51,12 +139,12 @@ get_all_encodings ()
 	return encs;
 }
 
-static void
+static gchar *
 do_test (const gchar *test_in,
          const gchar *enc,
          GSList      *encodings,
          gsize        nread,
-         const gchar *test_out)
+         const GeditEncoding **guessed)
 {
 	GeditSmartCharsetConverter *converter;
 	gchar *out, *out_aux;
@@ -99,7 +187,10 @@ do_test (const gchar *test_in,
 	g_assert_no_error (err);
 	out[bytes_written_aux] = '\0';
 
-	g_assert_cmpstr (out, ==, test_out);
+	if (guessed != NULL)
+		*guessed = gedit_smart_charset_converter_get_guessed (converter);
+
+	return out;
 }
 
 static void
@@ -163,10 +254,16 @@ do_test_roundtrip (const char *str, const char *charset)
 static void
 test_utf8_utf8 ()
 {
-	do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), TEXT_TO_CONVERT);
+	gchar *aux;
+
+	aux = do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), NULL);
+	g_assert_cmpstr (aux, ==, TEXT_TO_CONVERT);
 
-	do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
-	do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 9, "foobar\xc3\xa8\xc3");
+	aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, NULL);
+	g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
+
+	aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 9, NULL);
+	g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3");
 
 	/* FIXME: Use the utf8 stream for a fallback? */
 	//do_test_with_error ("\xef\xbf\xbezzzzzz", encs, G_IO_ERROR_FAILED);
@@ -189,6 +286,37 @@ test_xxx_xxx ()
 	g_slist_free (encs);
 }
 
+static void
+test_guessed ()
+{
+	GSList *encs = NULL;
+	gchar *aux, *aux2, *fail;
+	gsize aux_len, fail_len;
+	const GeditEncoding *guessed;
+
+	aux = get_encoded_text (TEXT_TO_GUESS, -1,
+	                        gedit_encoding_get_from_charset ("UTF-16"),
+	                        gedit_encoding_get_from_charset ("UTF-8"),
+	                        &aux_len,
+	                        TRUE);
+
+	fail = get_encoded_text (aux, aux_len,
+	                         gedit_encoding_get_from_charset ("UTF-8"),
+	                         gedit_encoding_get_from_charset ("ISO-8859-15"),
+	                         &fail_len,
+	                         FALSE);
+
+	g_assert (fail == NULL);
+
+	/* ISO-8859-15 should fail */
+	encs = g_slist_append (encs, (gpointer)gedit_encoding_get_from_charset ("ISO-8859-15"));
+	encs = g_slist_append (encs, (gpointer)gedit_encoding_get_from_charset ("UTF-16"));
+
+	aux2 = do_test (aux, NULL, encs, aux_len, &guessed);
+
+	g_assert (guessed == gedit_encoding_get_from_charset ("UTF-16"));
+}
+
 int main (int   argc,
           char *argv[])
 {
@@ -196,7 +324,8 @@ int main (int   argc,
 	g_test_init (&argc, &argv, NULL);
 
 	g_test_add_func ("/smart-converter/utf8-utf8", test_utf8_utf8);
-	g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
+	//g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
+	g_test_add_func ("/smart-converter/guessed", test_guessed);
 
 	return g_test_run ();
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]