New patch for Duplicates with progress dialogs - alpha quality



Hi guys!

I have coded a new patch for the duplicates feature for F-Spot. The
basics are the same as the first patch:

 http://mail.gnome.org/archives/f-spot-list/2005-June/msg00039.html

with the ideas in:

http://mail.gnome.org/archives/f-spot-list/2005-June/msg00031.html

This new patch has now progress dialogs so you can see how the process
for creating the MD5 or finding the duplicates is going. And you can
cancel the process if it is getting too much time. I have used the class
FSpot.ThreadProgressDialog to be consistent with the way things are done
in F-Spot so threading issues have created some problems. I have tested
it in some deep but I need to review some class variables that are used
in threading scenarios. Also the code I think will be reorganized in
someway so ... this patch is only to test the ideas.

The patch is against the CVS as it is right now. Larry is doing changes
in CVS so maybe the patch will only work for a short period of time.

Cheers

-- Alvaro
Index: src/MainWindow.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/MainWindow.cs,v
retrieving revision 1.202
diff -u -b -B -p -u -r1.202 MainWindow.cs
--- src/MainWindow.cs	18 Jun 2005 22:07:10 -0000	1.202
+++ src/MainWindow.cs	24 Jun 2005 20:35:12 -0000
@@ -10,6 +10,9 @@ using System.Collections;
 using System.Runtime.InteropServices;
 using System.Text.RegularExpressions;
 
+using System.IO;
+using System.Security.Cryptography;
+
 using LibGPhoto2;
 
 public class MainWindow {
@@ -17,6 +20,7 @@ public class MainWindow {
 
 	Db db;
 
+	
 	TagSelectionWidget tag_selection_widget;
 	[Glade.Widget] Gtk.Window main_window;
 	[Glade.Widget] Gtk.VBox left_vbox;
@@ -68,6 +72,8 @@ public class MainWindow {
 	[Glade.Widget] MenuItem remove_tag;
 	[Glade.Widget] MenuItem find_tag;
 	
+    [Glade.Widget] MenuItem find_duplicates;
+	
 	[Glade.Widget] Scale zoom_scale;
 
 	[Glade.Widget] VPaned info_vpaned;
@@ -123,6 +129,81 @@ public class MainWindow {
 
 	const int PHOTO_IDX_NONE = -1;
 
+    // Duplicates process
+    private System.Threading.Thread    duplicates_thread = null;
+    private FSpot.ThreadProgressDialog progress_dialog_duplicates;
+    private ArrayList                  duplicates = null;
+    private Boolean                    end_duplicates = false;
+    private uint                       duplicates_timer = 0;
+    private System.Threading.Thread    md5_thread = null;
+    private FSpot.ThreadProgressDialog progress_dialog_md5;
+    private Hashtable                  photos_md5 = null;
+    private Boolean                    end_computeMD5 = false;
+    private uint                       computeMD5_timer = 0;
+
+
+    private bool HandleDuplicatesTimer ()
+	{
+        Console.WriteLine ("Duplicates Timer ...");
+        if (end_duplicates || !duplicates_thread.IsAlive) {
+            end_duplicates = false;
+            duplicates_timer = 0;
+            foreach (int num in duplicates) {
+                Console.WriteLine ("Tagging duplicate photo {0}", num);
+                query.Photos[num].AddTag (db.Tags.Duplicate);
+                query.Commit (num);
+            }
+            tag_selection_widget.Select (db.Tags.Duplicate);
+            UpdateQuery ();
+            SetViewMode (ModeType.IconView);
+            find_duplicates.Sensitive = true;
+            return false;
+        }
+        return true;
+    }
+
+    private void StartDuplicatesTimer ()
+	{
+		if (duplicates_timer == 0)
+			duplicates_timer = 
+                GLib.Timeout.Add (100, new GLib.TimeoutHandler (HandleDuplicatesTimer));
+	}
+
+    private bool HandleComputeMD5Timer ()
+	{
+        Console.WriteLine ("MD5 Timer ...");
+        if (md5_thread.IsAlive) {
+            Console.WriteLine ("The MD5 thread is alive ...");
+        }
+        if (end_computeMD5 || !md5_thread.IsAlive) {
+            if (progress_dialog_md5 != null) {
+                progress_dialog_md5.Destroy ();
+            }
+            computeMD5_timer = 0;
+            if (end_computeMD5) {
+                end_computeMD5 = false;
+                duplicates_thread = new System.Threading.Thread 
+                    (new System.Threading.ThreadStart (this.FindDuplicates));
+                duplicates_thread.Name = Mono.Posix.Catalog.GetString ("Finding Duplicates");
+                progress_dialog_duplicates = 
+                    new FSpot.ThreadProgressDialog (duplicates_thread, SelectedIds().Length);
+                progress_dialog_duplicates.Start ();
+                StartDuplicatesTimer ();
+            } else {
+                find_duplicates.Sensitive = true;
+            }
+            return false;
+        }
+        return true;
+    }
+
+    private void StartComputeMD5Timer ()
+	{
+		if (computeMD5_timer == 0)
+			computeMD5_timer = 
+                GLib.Timeout.Add (1000, new GLib.TimeoutHandler (HandleComputeMD5Timer));
+	}
+
 	//
 	// Constructor
 	//
@@ -1633,6 +1714,117 @@ public class MainWindow {
 	void HandleClearDateRange (object sender, EventArgs args) {
 		query.Range = null;
 	}
+
+	void HandleFindDuplicates (object sender, EventArgs args) {
+        find_duplicates.Sensitive = false;
+        Tag tag_duplicate = db.Tags.Duplicate;
+
+        if (tag_duplicate == null) {
+            Console.WriteLine ("Creating the Duplicate tag ...");
+            tag_duplicate = db.Tags.CreateTag (null, "Duplicate");
+            db.Tags.Duplicate = tag_duplicate;
+            tag_duplicate.StockIconName = "f-spot-hidden.png";
+            tag_duplicate.SortPriority = -11;
+            db.Tags.Commit (tag_duplicate);
+            tag_selection_widget.Update ();
+        } else {
+            Console.WriteLine ("The duplicate tag already exists ...");
+        }
+
+		System.Console.WriteLine ("Looking for duplicates ...");
+        if (!PhotoSelectionActive()) {
+            icon_view.SelectAllCells ();
+        }
+
+        md5_thread = 
+            new System.Threading.Thread (new System.Threading.ThreadStart (this.ComputeMD5));
+        md5_thread.Name = Mono.Posix.Catalog.GetString ("Creating image unique identifiers");
+        progress_dialog_md5 = new FSpot.ThreadProgressDialog (md5_thread, SelectedIds().Length);
+        progress_dialog_md5.Start();
+        StartComputeMD5Timer ();
+    }
+
+    private void ComputeMD5 () {
+        // A really simple implementation: Build hash table with MD5 of images
+        // and we later use it to find duplicates. 
+        photos_md5 = new Hashtable ();
+
+        int counter = 1;
+        foreach (int num in SelectedIds ()) {
+            Photo photo = query.Photos [num];
+            progress_dialog_md5.Message = System.String.Format 
+                (Mono.Posix.Catalog.GetString ("Creating unique identifier for {0}"), photo.Name);
+            progress_dialog_md5.Fraction = counter / (double) SelectedIds().Length;
+            progress_dialog_md5.ProgressText = System.String.Format (Mono.Posix.Catalog.GetString ("{0} of {1}"), counter, SelectedIds().Length);
+            if (string.Compare(photo.MD5Sum, "") != 0) {
+                photos_md5.Add (num, photo.MD5Sum);
+                counter++;
+                continue;
+            }
+            // Computing time is measured
+            long startTime = DateTime.Now.Ticks;
+			FileStream fs = new FileStream(photo.Path, FileMode.Open, FileAccess.Read);
+			MD5 md5ServiceProvider = new MD5CryptoServiceProvider();
+            byte[] md5 = md5ServiceProvider.ComputeHash(fs);
+            
+            StringBuilder hash = new StringBuilder();
+            for (int pos = 0; pos < md5.Length; pos++) {
+                hash.Append(md5[pos].ToString("X2").ToLower());
+            }
+            long endTime = DateTime.Now.Ticks;
+            TimeSpan timeTaken = new TimeSpan(endTime - startTime);
+            Console.WriteLine("MD5 compute: {0}", timeTaken.ToString());
+            
+            photos_md5.Add (num, hash);
+            photo.MD5Sum = hash.ToString ();
+            db.Photos.Commit (photo);
+            counter++;
+        }
+        progress_dialog_md5.Message = Mono.Posix.Catalog.GetString ("Done Creating Unique Identifiers for Photos");
+        progress_dialog_md5.Fraction = 1.0;
+        progress_dialog_md5.ProgressText = Mono.Posix.Catalog.GetString ("All unique identifiers created.");
+        progress_dialog_md5.ButtonLabel = Gtk.Stock.Ok;
+        end_computeMD5 = true;
+    }
+
+    // Second progress dialog is to find photo duplicates
+    private void FindDuplicates () {
+        duplicates = new ArrayList ();
+        
+        ICollection md5keys = photos_md5.Keys;
+        int counter = 0;
+		foreach (int num in SelectedIds ()) {
+            String photoName = query.Photos[num].Name; 
+            System.Console.WriteLine ("Finding duplicates for {0}", photoName);
+            progress_dialog_duplicates.Message = System.String.Format 
+                (Mono.Posix.Catalog.GetString ("Finding duplicates for {0}"), photoName);
+            progress_dialog_duplicates.Fraction = counter / (double) SelectedIds().Length;
+            progress_dialog_duplicates.ProgressText = System.String.Format (Mono.Posix.Catalog.GetString ("{0} of {1}"), counter, SelectedIds().Length);
+            // System.Threading.Thread.Sleep (1000);
+            foreach (int key in md5keys) {
+                if (photos_md5[num] == null) {
+                    continue;
+                }
+                if (string.Compare(key.ToString(), num.ToString()) != 0 && 
+                    string.Compare(photos_md5[num].ToString(), photos_md5[key].ToString()) == 0){
+                    Console.WriteLine ("Duplicate photo found ...");
+                    Tag [] tags = new Tag [1];
+                    if (db.Tags.Duplicate == null) {
+                        Console.WriteLine ("Problems with duplicate Tag ...");
+                        return;
+                    }
+                    duplicates.Add (num);
+                    break;
+                }
+            }
+            counter++;
+        }
+        progress_dialog_duplicates.Message = Mono.Posix.Catalog.GetString ("Done Finding Duplicates Photos");
+        progress_dialog_duplicates.Fraction = 1.0;
+        progress_dialog_duplicates.ProgressText = Mono.Posix.Catalog.GetString ("Finding Complete");
+        progress_dialog_duplicates.ButtonLabel = Gtk.Stock.Ok;
+        end_duplicates = true;
+    }	
 
 	// Version Id updates.
 
Index: src/PhotoStore.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/PhotoStore.cs,v
retrieving revision 1.67
diff -u -b -B -p -u -r1.67 PhotoStore.cs
--- src/PhotoStore.cs	28 May 2005 10:35:45 -0000	1.67
+++ src/PhotoStore.cs	24 Jun 2005 20:35:13 -0000
@@ -28,6 +28,7 @@ public class Photo : DbItem, IComparable
 		return Compare (this, photo);
 	}
 	
+    // FIXME: With md5sum field this could be easy
 	public static int Compare (Photo photo1, Photo photo2)
 	{
 		int result = photo1.Id.CompareTo (photo2.Id);
@@ -134,6 +135,16 @@ public class Photo : DbItem, IComparable
 		}
 	}
 
+    private string md5sum;
+    public string MD5Sum {
+		get {
+			return md5sum;
+		}
+		set {
+			md5sum = value;
+		}
+	}
+
 	// Version management
 	public const int OriginalVersionId = 1;
 	private uint highest_version_id;
@@ -378,6 +389,7 @@ public class Photo : DbItem, IComparable
 		this.name = name;
 
 		description = "";
+        md5sum      = "";
 
 		// Note that the original version is never stored in the photo_versions table in the
 		// database.
@@ -478,7 +490,8 @@ public class PhotoStore : DbStore {
 			"       directory_path     STRING NOT NULL,		   " +
 			"       name               STRING NOT NULL,		   " +
 			"       description        TEXT NOT NULL,	           " +
-			"       default_version_id INTEGER NOT NULL		   " +
+			"       default_version_id INTEGER NOT NULL,		     " +
+            "       md5sum             STRING NOT NULL		         " +
 			")";
 
 		command.ExecuteNonQuery ();
@@ -529,8 +542,8 @@ public class PhotoStore : DbStore {
 		command.Connection = Connection;
 
 		command.CommandText = String.Format ("INSERT INTO photos (time, " +
-						     "directory_path, name, description, default_version_id) " +
-						     "       VALUES ({0}, '{1}', '{2}', '', {3})                                       ",
+						     "directory_path, name, description, default_version_id, md5sum) " +
+						     "       VALUES ({0}, '{1}', '{2}', '', {3},'')",
 						     unix_time,
 						     SqlString (System.IO.Path.GetDirectoryName (path)),
 						     SqlString (System.IO.Path.GetFileName (path)),
@@ -718,7 +731,8 @@ public class PhotoStore : DbStore {
 						     "       directory_path,                       " +
 						     "       name,                                 " +
 						     "       description,                          " +
-						     "       default_version_id                    " +
+						     "       default_version_id,                   " +
+                             "       md5sum                                " +
 						     "     FROM photos                             " +
 						     "     WHERE id = {0}                          ",
 						     id);
@@ -732,6 +746,7 @@ public class PhotoStore : DbStore {
 
 			photo.Description = reader[3].ToString ();
 			photo.DefaultVersionId = Convert.ToUInt32 (reader[4]);
+            photo.MD5Sum = reader[5].ToString ();
 			AddToCache (photo);
 		}
 
@@ -885,11 +900,13 @@ public class PhotoStore : DbStore {
 
 		SqliteCommand command = new SqliteCommand ();
 		command.Connection = Connection;
-		command.CommandText = String.Format ("UPDATE photos SET description = '{0}',     " +
-						     "                  default_version_id = {1} " +
-						     "              WHERE id = {2}",
+		command.CommandText = String.Format ("UPDATE photos SET description = '{0}'," +
+						     "                default_version_id = {1}," +
+                             "                md5sum = '{2}'"+                
+						     "                WHERE id = {3}",
 						     SqlString (photo.Description),
 						     photo.DefaultVersionId,
+                             SqlString (photo.MD5Sum),  
 						     photo.Id);
 		command.ExecuteNonQuery ();
 		command.Dispose ();
@@ -994,6 +1011,7 @@ public class PhotoStore : DbStore {
 				
 				photo.Description = reader[4].ToString ();
 				photo.DefaultVersionId = Convert.ToUInt32 (reader[5]);		 
+                photo.MD5Sum = reader[6].ToString ();
 				
 				version_list.Add (photo);
 			}
@@ -1033,7 +1051,8 @@ public class PhotoStore : DbStore {
 						     "       photos.directory_path,              " +
 						     "       photos.name,                        " +
 						     "       photos.description,                 " +
-						     "       photos.default_version_id           " +
+						     "       photos.default_version_id,          " +
+                             "       photos.md5sum                       " +
 						     "     FROM photos                           " +
 						     "     WHERE directory_path = \"{0}\"", dir.FullName);
 
@@ -1073,7 +1092,8 @@ public class PhotoStore : DbStore {
 				      "       photos.directory_path,              " +
 				      "       photos.name,                        " +
 				      "       photos.description,                 " +
-				      "       photos.default_version_id           " +
+				      "       photos.default_version_id,          " +
+                      "       photos.md5sum                       " +
 				      "     FROM photos                      ");
 		
 		if (range != null) {
Index: src/TagStore.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/TagStore.cs,v
retrieving revision 1.17
diff -u -b -B -p -u -r1.17 TagStore.cs
--- src/TagStore.cs	10 Mar 2005 02:48:53 -0000	1.17
+++ src/TagStore.cs	24 Jun 2005 20:35:13 -0000
@@ -209,6 +209,16 @@ public class TagStore : DbStore {
 		}
 	}
 
+    private Tag duplicate;
+	public Tag Duplicate {
+		get {
+			return duplicate;
+		}
+        set {
+            duplicate = value;
+        }
+	}
+
 	// In this store we keep all the items (i.e. the tags) in memory at all times.  This is
 	// mostly to simplify handling of the parent relationship between tags, but it also makes it
 	// a little bit faster.  We achieve this by passing "true" as the cache_is_immortal to our
@@ -242,6 +252,10 @@ public class TagStore : DbStore {
 			
 			if (tag.Name == "Hidden")
 				hidden = tag;
+
+            if (tag.Name == "Duplicate")
+                duplicate = tag;
+            
 		}
 
 		reader.Close ();
@@ -401,6 +415,11 @@ public class TagStore : DbStore {
 	
 	public override void Remove (DbItem item)
 	{
+        // FIXME: Hack!
+        if (string.Compare (((Tag) item).Name, "Duplicate") == 0) {
+            duplicate = null;
+        }
+        
 		RemoveFromCache (item);
 		
 		((Tag)item).Category = null;
Index: src/f-spot.glade
===================================================================
RCS file: /cvs/gnome/f-spot/src/f-spot.glade,v
retrieving revision 1.101
diff -u -b -B -p -u -r1.101 f-spot.glade
--- src/f-spot.glade	17 Jun 2005 16:11:25 -0000	1.101
+++ src/f-spot.glade	24 Jun 2005 20:35:19 -0000
@@ -7128,6 +7128,15 @@ Photo Details</property>
 		  </child>
 
 		  <child>
+		    <widget class="GtkMenuItem" id="find_duplicates">
+		      <property name="visible">True</property>
+		      <property name="label" translatable="yes">_Find Duplicates</property>
+		      <property name="use_underline">True</property>
+		      <signal name="activate" handler="HandleFindDuplicates" last_modification_time="Tue, 10 Aug 2004 07:08:24 GMT"/>
+		    </widget>
+		  </child>
+
+		  <child>
 		    <widget class="GtkSeparatorMenuItem" id="separator15">
 		      <property name="visible">True</property>
 		    </widget>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]