[evolution] Bug #670014 - Remove-duplicates reads all messages into memory



commit effaa44b467ce2f3933dd52fda18bd0b012d6e78
Author: Milan Crha <mcrha redhat com>
Date:   Thu Feb 16 10:10:16 2012 +0100

    Bug #670014 - Remove-duplicates reads all messages into memory

 libemail-engine/e-mail-folder-utils.c |  137 ++++++++++++++++++++++----------
 1 files changed, 94 insertions(+), 43 deletions(-)
---
diff --git a/libemail-engine/e-mail-folder-utils.c b/libemail-engine/e-mail-folder-utils.c
index 7819cb5..0e8c5b5 100644
--- a/libemail-engine/e-mail-folder-utils.c
+++ b/libemail-engine/e-mail-folder-utils.c
@@ -359,6 +359,93 @@ mail_folder_find_duplicate_messages_thread (GSimpleAsyncResult *simple,
 		g_simple_async_result_take_error (simple, error);
 }
 
+static GHashTable *
+emfu_get_messages_hash_sync (CamelFolder *folder,
+			     GPtrArray *message_uids,
+			     GCancellable *cancellable,
+			     GError **error)
+{
+	GHashTable *hash_table;
+	CamelMimeMessage *message;
+	guint ii;
+
+	g_return_val_if_fail (CAMEL_IS_FOLDER (folder), NULL);
+	g_return_val_if_fail (message_uids != NULL, NULL);
+
+	camel_operation_push_message (
+		cancellable,
+		ngettext (
+			"Retrieving %d message",
+			"Retrieving %d messages",
+			message_uids->len),
+		message_uids->len);
+
+	hash_table = g_hash_table_new_full (
+		(GHashFunc) g_str_hash,
+		(GEqualFunc) g_str_equal,
+		(GDestroyNotify) g_free,
+		(GDestroyNotify) g_free);
+
+	/* This is an all or nothing operation.  Destroy the
+	 * hash table if we fail to retrieve any message. */
+
+	for (ii = 0; ii < message_uids->len; ii++) {
+		const gchar *uid;
+		gint percent;
+
+		uid = g_ptr_array_index (message_uids, ii);
+		percent = ((ii + 1) * 100) / message_uids->len;
+
+		message = camel_folder_get_message_sync (
+			folder, uid, cancellable, error);
+
+		camel_operation_progress (cancellable, percent);
+
+		if (CAMEL_IS_MIME_MESSAGE (message)) {
+			CamelDataWrapper *content;
+			gchar *digest = NULL;
+
+			/* Generate a digest string from the message's content. */
+			content = camel_medium_get_content (CAMEL_MEDIUM (message));
+
+			if (content != NULL) {
+				CamelStream *stream;
+				GByteArray *buffer;
+				gssize n_bytes;
+
+				stream = camel_stream_mem_new ();
+
+				n_bytes = camel_data_wrapper_decode_to_stream_sync (
+					content, stream, cancellable, error);
+
+				if (n_bytes >= 0) {
+					/* The CamelStreamMem owns the buffer. */
+					buffer = camel_stream_mem_get_byte_array (
+						CAMEL_STREAM_MEM (stream));
+					g_return_val_if_fail (buffer != NULL, NULL);
+
+					digest = g_compute_checksum_for_data (
+						G_CHECKSUM_SHA256, buffer->data, buffer->len);
+				}
+
+				g_object_unref (stream);
+			}
+
+			g_hash_table_insert (
+				hash_table, g_strdup (uid), digest);
+			g_object_unref (message);
+		} else {
+			g_hash_table_destroy (hash_table);
+			hash_table = NULL;
+			break;
+		}
+	}
+
+	camel_operation_pop_message (cancellable);
+
+	return hash_table;
+}
+
 GHashTable *
 e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
                                             GPtrArray *message_uids,
@@ -374,8 +461,8 @@ e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
 	g_return_val_if_fail (CAMEL_IS_FOLDER (folder), NULL);
 	g_return_val_if_fail (message_uids != NULL, NULL);
 
-	/* hash_table = { MessageUID : CamelMessage } */
-	hash_table = e_mail_folder_get_multiple_messages_sync (
+	/* hash_table = { MessageUID : digest-as-string } */
+	hash_table = emfu_get_messages_hash_sync (
 		folder, message_uids, cancellable, error);
 
 	if (hash_table == NULL)
@@ -394,14 +481,10 @@ e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
 
 	while (g_hash_table_iter_next (&iter, &key, &value)) {
 		const CamelSummaryMessageID *message_id;
-		CamelDataWrapper *content;
 		CamelMessageFlags flags;
 		CamelMessageInfo *info;
-		CamelStream *stream;
-		GByteArray *buffer;
 		gboolean duplicate;
-		gssize n_bytes;
-		gchar *digest;
+		const gchar *digest;
 
 		info = camel_folder_get_message_info (folder, key);
 		message_id = camel_message_info_message_id (info);
@@ -414,45 +497,20 @@ e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
 			continue;
 		}
 
-		/* Generate a digest string from the message's content. */
+		digest = value;
 
-		content = camel_medium_get_content (CAMEL_MEDIUM (value));
-
-		if (content == NULL) {
+		if (digest == NULL) {
 			g_queue_push_tail (&trash, key);
 			camel_message_info_free (info);
 			continue;
 		}
 
-		stream = camel_stream_mem_new ();
-
-		n_bytes = camel_data_wrapper_decode_to_stream_sync (
-			content, stream, cancellable, error);
-
-		if (n_bytes < 0) {
-			camel_message_info_free (info);
-			g_object_unref (stream);
-			goto fail;
-		}
-
-		/* The CamelStreamMem owns the buffer. */
-		buffer = camel_stream_mem_get_byte_array (
-			CAMEL_STREAM_MEM (stream));
-		g_return_val_if_fail (buffer != NULL, NULL);
-
-		digest = g_compute_checksum_for_data (
-			G_CHECKSUM_SHA256, buffer->data, buffer->len);
-
-		g_object_unref (stream);
-
 		/* Determine if the message a duplicate. */
 
 		value = g_hash_table_lookup (unique_ids, &message_id->id.id);
 		duplicate = (value != NULL) && g_str_equal (digest, value);
 
-		if (duplicate)
-			g_free (digest);
-		else {
+		if (!duplicate) {
 			gint64 *v_int64;
 
 			/* XXX Might be better to create a GArray
@@ -461,7 +519,7 @@ e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
 			v_int64 = g_new0 (gint64, 1);
 			*v_int64 = (gint64) message_id->id.id;
 
-			g_hash_table_insert (unique_ids, v_int64, digest);
+			g_hash_table_insert (unique_ids, v_int64, g_strdup (digest));
 			g_queue_push_tail (&trash, key);
 		}
 
@@ -472,13 +530,6 @@ e_mail_folder_find_duplicate_messages_sync (CamelFolder *folder,
 	while ((key = g_queue_pop_head (&trash)) != NULL)
 		g_hash_table_remove (hash_table, key);
 
-	goto exit;
-
-fail:
-	g_hash_table_destroy (hash_table);
-	hash_table = NULL;
-
-exit:
 	camel_operation_pop_message (cancellable);
 
 	g_hash_table_destroy (unique_ids);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]