DuplicatesFinder new class



Hi guys!

Finally I have created a new class called DuplicatesFinder.cs which is
responsible to the task to find duplicates in a selection of photos.
When it finish to find the duplicates it generates and event to inform
to the delegates registered of the fact.

So MainWindow.cs only needs to be modified in 22 lines to add the new
feature. I attach to the email the new class and the patch you will need
to apply to F-Spot.

I hope this will be the final desing of the work so don't expect more
bored emails about the duplicates feature ;-)

Cheers

-- Alvaro

Index: src/MainWindow.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/MainWindow.cs,v
retrieving revision 1.202
diff -u -b -B -p -u -r1.202 MainWindow.cs
--- src/MainWindow.cs	18 Jun 2005 22:07:10 -0000	1.202
+++ src/MainWindow.cs	25 Jun 2005 17:37:24 -0000
@@ -17,6 +17,7 @@ public class MainWindow {
 
 	Db db;
 
+	
 	TagSelectionWidget tag_selection_widget;
 	[Glade.Widget] Gtk.Window main_window;
 	[Glade.Widget] Gtk.VBox left_vbox;
@@ -68,6 +69,8 @@ public class MainWindow {
 	[Glade.Widget] MenuItem remove_tag;
 	[Glade.Widget] MenuItem find_tag;
 	
+    [Glade.Widget] MenuItem find_duplicates;
+	
 	[Glade.Widget] Scale zoom_scale;
 
 	[Glade.Widget] VPaned info_vpaned;
@@ -1633,6 +1636,28 @@ public class MainWindow {
 	void HandleClearDateRange (object sender, EventArgs args) {
 		query.Range = null;
 	}
+
+	void HandleFindDuplicates (object sender, EventArgs args) {
+        find_duplicates.Sensitive = false;
+        FSpot.DuplicatesFinder finder = new FSpot.DuplicatesFinder (db, query);
+        finder.CreateDuplicateTag ();
+        tag_selection_widget.Update ();
+        if (!PhotoSelectionActive()) {
+            icon_view.SelectAllCells ();
+        }
+        System.Console.WriteLine ("Looking for duplicates ...");
+        finder.SearchFinished += new FSpot.DuplicatesFinderEnd (HandleEndDuplicates);
+        finder.startFind (SelectedIds());
+    }
+
+    void HandleEndDuplicates (Boolean success) {
+        if (success) {
+            tag_selection_widget.Select (db.Tags.Duplicate);
+            UpdateQuery ();
+            SetViewMode (ModeType.IconView);
+        }
+        find_duplicates.Sensitive = true;
+    }
 
 	// Version Id updates.
 
Index: src/Makefile.am
===================================================================
RCS file: /cvs/gnome/f-spot/src/Makefile.am,v
retrieving revision 1.31
diff -u -b -B -p -u -r1.31 Makefile.am
--- src/Makefile.am	17 Jun 2005 16:11:25 -0000	1.31
+++ src/Makefile.am	25 Jun 2005 17:37:25 -0000
@@ -14,6 +14,7 @@ F_SPOT_CSDISTFILES =				\
 	$(srcdir)/Delay.cs			\
 	$(srcdir)/DirectoryAdaptor.cs		\
 	$(srcdir)/DirectoryCollection.cs	\
+	$(srcdir)/DuplicatesFinder.cs	\
 	$(srcdir)/Exif.cs			\
 	$(srcdir)/ExifUtils.cs			\
 	$(srcdir)/FlickrExport.cs		\
Index: src/PhotoStore.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/PhotoStore.cs,v
retrieving revision 1.67
diff -u -b -B -p -u -r1.67 PhotoStore.cs
--- src/PhotoStore.cs	28 May 2005 10:35:45 -0000	1.67
+++ src/PhotoStore.cs	25 Jun 2005 17:37:25 -0000
@@ -28,6 +28,7 @@ public class Photo : DbItem, IComparable
 		return Compare (this, photo);
 	}
 	
+    // FIXME: With md5sum field this could be easy
 	public static int Compare (Photo photo1, Photo photo2)
 	{
 		int result = photo1.Id.CompareTo (photo2.Id);
@@ -134,6 +135,16 @@ public class Photo : DbItem, IComparable
 		}
 	}
 
+    private string md5sum;
+    public string MD5Sum {
+		get {
+			return md5sum;
+		}
+		set {
+			md5sum = value;
+		}
+	}
+
 	// Version management
 	public const int OriginalVersionId = 1;
 	private uint highest_version_id;
@@ -378,6 +389,7 @@ public class Photo : DbItem, IComparable
 		this.name = name;
 
 		description = "";
+        md5sum      = "";
 
 		// Note that the original version is never stored in the photo_versions table in the
 		// database.
@@ -478,7 +490,8 @@ public class PhotoStore : DbStore {
 			"       directory_path     STRING NOT NULL,		   " +
 			"       name               STRING NOT NULL,		   " +
 			"       description        TEXT NOT NULL,	           " +
-			"       default_version_id INTEGER NOT NULL		   " +
+			"       default_version_id INTEGER NOT NULL,		     " +
+            "       md5sum             STRING NOT NULL		         " +
 			")";
 
 		command.ExecuteNonQuery ();
@@ -529,8 +542,8 @@ public class PhotoStore : DbStore {
 		command.Connection = Connection;
 
 		command.CommandText = String.Format ("INSERT INTO photos (time, " +
-						     "directory_path, name, description, default_version_id) " +
-						     "       VALUES ({0}, '{1}', '{2}', '', {3})                                       ",
+						     "directory_path, name, description, default_version_id, md5sum) " +
+						     "       VALUES ({0}, '{1}', '{2}', '', {3},'')",
 						     unix_time,
 						     SqlString (System.IO.Path.GetDirectoryName (path)),
 						     SqlString (System.IO.Path.GetFileName (path)),
@@ -718,7 +731,8 @@ public class PhotoStore : DbStore {
 						     "       directory_path,                       " +
 						     "       name,                                 " +
 						     "       description,                          " +
-						     "       default_version_id                    " +
+						     "       default_version_id,                   " +
+                             "       md5sum                                " +
 						     "     FROM photos                             " +
 						     "     WHERE id = {0}                          ",
 						     id);
@@ -732,6 +746,7 @@ public class PhotoStore : DbStore {
 
 			photo.Description = reader[3].ToString ();
 			photo.DefaultVersionId = Convert.ToUInt32 (reader[4]);
+            photo.MD5Sum = reader[5].ToString ();
 			AddToCache (photo);
 		}
 
@@ -885,11 +900,13 @@ public class PhotoStore : DbStore {
 
 		SqliteCommand command = new SqliteCommand ();
 		command.Connection = Connection;
-		command.CommandText = String.Format ("UPDATE photos SET description = '{0}',     " +
-						     "                  default_version_id = {1} " +
-						     "              WHERE id = {2}",
+		command.CommandText = String.Format ("UPDATE photos SET description = '{0}'," +
+						     "                default_version_id = {1}," +
+                             "                md5sum = '{2}'"+                
+						     "                WHERE id = {3}",
 						     SqlString (photo.Description),
 						     photo.DefaultVersionId,
+                             SqlString (photo.MD5Sum),  
 						     photo.Id);
 		command.ExecuteNonQuery ();
 		command.Dispose ();
@@ -994,6 +1011,7 @@ public class PhotoStore : DbStore {
 				
 				photo.Description = reader[4].ToString ();
 				photo.DefaultVersionId = Convert.ToUInt32 (reader[5]);		 
+                photo.MD5Sum = reader[6].ToString ();
 				
 				version_list.Add (photo);
 			}
@@ -1033,7 +1051,8 @@ public class PhotoStore : DbStore {
 						     "       photos.directory_path,              " +
 						     "       photos.name,                        " +
 						     "       photos.description,                 " +
-						     "       photos.default_version_id           " +
+						     "       photos.default_version_id,          " +
+                             "       photos.md5sum                       " +
 						     "     FROM photos                           " +
 						     "     WHERE directory_path = \"{0}\"", dir.FullName);
 
@@ -1073,7 +1092,8 @@ public class PhotoStore : DbStore {
 				      "       photos.directory_path,              " +
 				      "       photos.name,                        " +
 				      "       photos.description,                 " +
-				      "       photos.default_version_id           " +
+				      "       photos.default_version_id,          " +
+                      "       photos.md5sum                       " +
 				      "     FROM photos                      ");
 		
 		if (range != null) {
Index: src/TagStore.cs
===================================================================
RCS file: /cvs/gnome/f-spot/src/TagStore.cs,v
retrieving revision 1.17
diff -u -b -B -p -u -r1.17 TagStore.cs
--- src/TagStore.cs	10 Mar 2005 02:48:53 -0000	1.17
+++ src/TagStore.cs	25 Jun 2005 17:37:25 -0000
@@ -209,6 +209,16 @@ public class TagStore : DbStore {
 		}
 	}
 
+    private Tag duplicate;
+	public Tag Duplicate {
+		get {
+			return duplicate;
+		}
+        set {
+            duplicate = value;
+        }
+	}
+
 	// In this store we keep all the items (i.e. the tags) in memory at all times.  This is
 	// mostly to simplify handling of the parent relationship between tags, but it also makes it
 	// a little bit faster.  We achieve this by passing "true" as the cache_is_immortal to our
@@ -242,6 +252,10 @@ public class TagStore : DbStore {
 			
 			if (tag.Name == "Hidden")
 				hidden = tag;
+
+            if (tag.Name == "Duplicate")
+                duplicate = tag;
+            
 		}
 
 		reader.Close ();
@@ -401,6 +415,11 @@ public class TagStore : DbStore {
 	
 	public override void Remove (DbItem item)
 	{
+        // FIXME: Hack!
+        if (string.Compare (((Tag) item).Name, "Duplicate") == 0) {
+            duplicate = null;
+        }
+        
 		RemoveFromCache (item);
 		
 		((Tag)item).Category = null;
Index: src/f-spot.glade
===================================================================
RCS file: /cvs/gnome/f-spot/src/f-spot.glade,v
retrieving revision 1.101
diff -u -b -B -p -u -r1.101 f-spot.glade
--- src/f-spot.glade	17 Jun 2005 16:11:25 -0000	1.101
+++ src/f-spot.glade	25 Jun 2005 17:37:28 -0000
@@ -7128,6 +7128,15 @@ Photo Details</property>
 		  </child>
 
 		  <child>
+		    <widget class="GtkMenuItem" id="find_duplicates">
+		      <property name="visible">True</property>
+		      <property name="label" translatable="yes">_Find Duplicates</property>
+		      <property name="use_underline">True</property>
+		      <signal name="activate" handler="HandleFindDuplicates" last_modification_time="Tue, 10 Aug 2004 07:08:24 GMT"/>
+		    </widget>
+		  </child>
+
+		  <child>
 		    <widget class="GtkSeparatorMenuItem" id="separator15">
 		      <property name="visible">True</property>
 		    </widget>
using System;
using System.Collections;
using System.IO;
using System.Security.Cryptography;
using System.Text;

namespace FSpot {
    public delegate void DuplicatesFinderEnd (Boolean success);

    public class DuplicatesFinder {
        // Internal implementation variables for Duplicates
        private System.Threading.Thread    duplicates_thread = null;
        private FSpot.ThreadProgressDialog progress_dialog_duplicates;
        private ArrayList                  duplicates = null;
        private Boolean                    end_duplicates = false;
        private uint                       duplicates_timer = 0;
        private System.Threading.Thread    md5_thread = null;
        private FSpot.ThreadProgressDialog progress_dialog_md5;
        private Hashtable                  photos_md5 = null;
        private Boolean                    end_computeMD5 = false;
        private uint                       computeMD5_timer = 0;

        // Shared data variables we need to work with
        private Db                         db;
        private int[]                      selected_ids;
        private FSpot.PhotoQuery           query;
    
        // To inform that the Finder has finished
        public event DuplicatesFinderEnd SearchFinished;
    
        public DuplicatesFinder (Db                 db, 
                                 FSpot.PhotoQuery   query)

        {
            this.db = db;
            this.query = query;
        }

        public void CreateDuplicateTag () {
            Tag tag_duplicate = db.Tags.Duplicate;
            if (tag_duplicate == null) {
                Console.WriteLine ("Creating the Duplicate tag ...");
                tag_duplicate = db.Tags.CreateTag (null, "Duplicate");
                db.Tags.Duplicate = tag_duplicate;
                tag_duplicate.StockIconName = "f-spot-hidden.png";
                tag_duplicate.SortPriority = -11;
                db.Tags.Commit (tag_duplicate);
            } else {
                Console.WriteLine ("The duplicate tag already exists ...");
            }
        }

        public void startFind (int [] selected_ids) {
            this.selected_ids = selected_ids;
            md5_thread = 
                new System.Threading.Thread (new System.Threading.ThreadStart (this.ComputeMD5));
            md5_thread.Name = Mono.Posix.Catalog.GetString ("Creating image unique identifiers");
            progress_dialog_md5 = new FSpot.ThreadProgressDialog (md5_thread, selected_ids.Length);
            progress_dialog_md5.Start();
            StartComputeMD5Timer ();
        }

        private bool HandleDuplicatesTimer ()
        {
            Console.WriteLine ("Duplicates Timer ...");
            if (end_duplicates || !duplicates_thread.IsAlive) {
                end_duplicates = false;
                duplicates_timer = 0;
                foreach (int num in duplicates) {
                    Console.WriteLine ("Tagging duplicate photo {0}", num);
                    query.Photos[num].AddTag (db.Tags.Duplicate);
                    query.Commit (num);
                }
                if (SearchFinished != null) {
                    SearchFinished (true);
                }
                return false;
            }
            return true;
        }

        private void StartDuplicatesTimer ()
        {
            if (duplicates_timer == 0)
                duplicates_timer = 
                    GLib.Timeout.Add (100, new GLib.TimeoutHandler (HandleDuplicatesTimer));
        }

        private bool HandleComputeMD5Timer ()
        {
            Console.WriteLine ("MD5 Timer ...");
            if (md5_thread.IsAlive) {
                Console.WriteLine ("The MD5 thread is alive ...");
            }
            if (end_computeMD5 || !md5_thread.IsAlive) {
                if (progress_dialog_md5 != null) {
                    progress_dialog_md5.Destroy ();
                }
                computeMD5_timer = 0;
                if (end_computeMD5) {
                    end_computeMD5 = false;
                    duplicates_thread = new System.Threading.Thread 
                        (new System.Threading.ThreadStart (this.FindDuplicates));
                    duplicates_thread.Name = Mono.Posix.Catalog.GetString ("Finding Duplicates");
                    progress_dialog_duplicates = 
                        new FSpot.ThreadProgressDialog (duplicates_thread, selected_ids.Length);
                    progress_dialog_duplicates.Start ();
                    StartDuplicatesTimer ();
                } else {
                    if (SearchFinished != null) {
                        SearchFinished (false);
                    }
                }
                return false;
            }
            return true;
        }

        private void StartComputeMD5Timer ()
        {
            if (computeMD5_timer == 0)
                computeMD5_timer = 
                    GLib.Timeout.Add (1000, new GLib.TimeoutHandler (HandleComputeMD5Timer));
        }

        // Executed in a different thread
        private void ComputeMD5 () {
            // A really simple implementation: Build hash table with MD5 of images
            // and we later use it to find duplicates. 
            photos_md5 = new Hashtable ();

            int counter = 1;
            foreach (int num in selected_ids) {
                Photo photo = query.Photos [num];
                progress_dialog_md5.Message = System.String.Format 
                    (Mono.Posix.Catalog.GetString ("Creating unique identifier for {0}"), photo.Name);
                progress_dialog_md5.Fraction = counter / (double) selected_ids.Length;
                progress_dialog_md5.ProgressText = System.String.Format (Mono.Posix.Catalog.GetString ("{0} of {1}"), counter, selected_ids.Length);
                if (string.Compare(photo.MD5Sum, "") != 0) {
                    photos_md5.Add (num, photo.MD5Sum);
                    counter++;
                    continue;
                }
                // Computing time is measured for testing purposes
                long startTime = DateTime.Now.Ticks;
                FileStream fs = new FileStream(photo.Path, FileMode.Open, FileAccess.Read);
                MD5 md5ServiceProvider = new MD5CryptoServiceProvider();
                byte[] md5 = md5ServiceProvider.ComputeHash(fs);

                StringBuilder hash = new StringBuilder();
                for (int pos = 0; pos < md5.Length; pos++) {
                    hash.Append(md5[pos].ToString("X2").ToLower());
                }
                long endTime = DateTime.Now.Ticks;
                TimeSpan timeTaken = new TimeSpan(endTime - startTime);
                Console.WriteLine("MD5 compute: {0}", timeTaken.ToString());

                photos_md5.Add (num, hash);
                photo.MD5Sum = hash.ToString ();
                db.Photos.Commit (photo);
                counter++;
            }
            progress_dialog_md5.Message = Mono.Posix.Catalog.GetString ("Done Creating Unique Identifiers for Photos");
            progress_dialog_md5.Fraction = 1.0;
            progress_dialog_md5.ProgressText = Mono.Posix.Catalog.GetString ("All unique identifiers created.");
            progress_dialog_md5.ButtonLabel = Gtk.Stock.Ok;
            lock (this) {
                end_computeMD5 = true;
            }
        }

        // Second progress dialog is to find photo duplicates
        private void FindDuplicates () {
            duplicates = new ArrayList ();

            ICollection md5keys = photos_md5.Keys;
            int counter = 0;
            foreach (int num in selected_ids) {
                String photoName = query.Photos[num].Name; 
                System.Console.WriteLine ("Finding duplicates for {0}", photoName);
                progress_dialog_duplicates.Message = System.String.Format 
                    (Mono.Posix.Catalog.GetString ("Finding duplicates for {0}"), photoName);
                progress_dialog_duplicates.Fraction = counter / (double) selected_ids.Length;
                progress_dialog_duplicates.ProgressText = System.String.Format (Mono.Posix.Catalog.GetString ("{0} of {1}"), counter, selected_ids.Length);
                foreach (int key in md5keys) {
                    if (photos_md5[num] == null) {
                        continue;
                    }
                    if (string.Compare(key.ToString(), num.ToString()) != 0 && 
                        string.Compare(photos_md5[num].ToString(), photos_md5[key].ToString()) == 0){
                        Console.WriteLine ("Duplicate photo found ...");
                        Tag [] tags = new Tag [1];
                        if (db.Tags.Duplicate == null) {
                            Console.WriteLine ("Problems with duplicate Tag ...");
                            return;
                        }
                        duplicates.Add (num);
                        break;
                    }
                }
                counter++;
            }
            progress_dialog_duplicates.Message = Mono.Posix.Catalog.GetString ("Done Finding Duplicates Photos");
            progress_dialog_duplicates.Fraction = 1.0;
            progress_dialog_duplicates.ProgressText = Mono.Posix.Catalog.GetString ("Finding Complete");
            progress_dialog_duplicates.ButtonLabel = Gtk.Stock.Ok;
            lock (this) {
                end_duplicates = true;
            }
        }
    }
}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]