[baobab] Fold back ThreadedScanner in Scanner

From: Paolo Borelli <pborelli src gnome org>
To: commits-list gnome org
Cc:
Subject: [baobab] Fold back ThreadedScanner in Scanner
Date: Mon, 9 Apr 2012 13:11:58 +0000 (UTC)
commit 28e663bc6a485353e83f411698255a204811554a
Author: Paolo Borelli <pborelli gnome org>
Date:   Mon Apr 9 15:00:54 2012 +0200

    Fold back ThreadedScanner in Scanner
    
    Since we decided to drop the sync scanner, we also remove the
    Scanner abstract class and merge it with the concrete ThreadedScanner
    class

 src/Makefile.am                  |    2 -
 src/baobab-scanner.vala          |  278 +++++++++++++++++++++++++++++++++++---
 src/baobab-threaded-scanner.vala |  270 ------------------------------------
 src/baobab-window.vala           |    2 +-
 4 files changed, 259 insertions(+), 293 deletions(-)
---
diff --git a/src/Makefile.am b/src/Makefile.am
index 9d78727..9ce4692 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -34,7 +34,6 @@ baobab_SOURCES = \
 	baobab-location.vala		\
 	baobab-location-widget.vala	\
 	baobab-scanner.vala		\
-	baobab-threaded-scanner.vala	\
 	baobab-window.vala		\
 	main.vala			\
 	$(BUILT_SOURCES)
@@ -63,7 +62,6 @@ MAINTAINERCLEANFILES = \
 	baobab-location.c		\
 	baobab-location-widget.c	\
 	baobab-scanner.c		\
-	baobab-threaded-scanner.c	\
 	baobab-window.c			\
 	baobab_vala.stamp		\
 	main.c				\
diff --git a/src/baobab-scanner.vala b/src/baobab-scanner.vala
index db3c146..b33a60f 100644
--- a/src/baobab-scanner.vala
+++ b/src/baobab-scanner.vala
@@ -28,7 +28,7 @@ namespace Baobab {
         EXCLUDE_MOUNTS
     }
 
-    abstract class Scanner : Gtk.TreeStore {
+    class Scanner : Gtk.TreeStore {
         public enum Columns {
             DISPLAY_NAME,
             PARSE_NAME,
@@ -49,41 +49,274 @@ namespace Baobab {
             DONE
         }
 
-        protected struct HardLink {
-            uint64 inode;
-            uint32 device;
+        public File directory { get; private set; }
 
-            public HardLink (FileInfo info) {
-                this.inode = info.get_attribute_uint64 (FileAttribute.UNIX_INODE);
-                this.device = info.get_attribute_uint32 (FileAttribute.UNIX_DEVICE);
-            }
-        }
+        public ScanFlags scan_flags { get; private set; }
 
-        protected Cancellable cancellable;
-        protected HashTable<File, unowned File> excluded_locations;
-        protected HardLink[] hardlinks;
-        protected Error? scan_error;
+        public int max_depth { get; protected set; }
 
-        protected static const string ATTRIBUTES =
+        public signal void completed();
+
+        static const string ATTRIBUTES =
             FileAttribute.STANDARD_NAME + "," +
             FileAttribute.STANDARD_DISPLAY_NAME + "," +
             FileAttribute.STANDARD_TYPE + "," +
             FileAttribute.STANDARD_SIZE +  "," +
-            FileAttribute.STANDARD_ALLOCATED_SIZE +     "," +
+            FileAttribute.STANDARD_ALLOCATED_SIZE + "," +
             FileAttribute.UNIX_NLINK + "," +
             FileAttribute.UNIX_INODE + "," +
             FileAttribute.UNIX_DEVICE + "," +
             FileAttribute.ACCESS_CAN_READ;
 
-        public File directory { get; private set; }
+        struct HardLink {
+            uint64 inode;
+            uint32 device;
 
-        public ScanFlags scan_flags { get; private set; }
+            public HardLink (FileInfo info) {
+                this.inode = info.get_attribute_uint64 (FileAttribute.UNIX_INODE);
+                this.device = info.get_attribute_uint32 (FileAttribute.UNIX_DEVICE);
+            }
+        }
 
-        public int max_depth { get; protected set; }
+        HardLink[] hardlinks;
+        HashTable<File, unowned File> excluded_locations;
 
-        public signal void completed();
+        /* General overview:
+         *
+         * We cannot directly modify the treemodel from the worker thread, so we have to have a way to dispatch
+         * the results back to the main thread.
+         *
+         * Each scanned directory gets a 'Results' struct created for it.  If the directory has a parent
+         * directory, then the 'parent' pointer is set.  The 'display_name' and 'parse_name' fields are filled
+         * in as soon as the struct is created.  This part is done as soon as the directory is encountered.
+         *
+         * In order to determine all of the information for a particular directory (and finish filling in the
+         * results structure), we must scan it and all of its children.  We must also scan all of the siblings
+         * of the directory so that we know what percentage of the total size of the parent directory the
+         * directory in question is responsible for.
+         *
+         * After a directory, all of its children and all of its siblings have been scanned, we can do the
+         * percentage calculation.  We do this from the iteration that takes care of the parent directory: we
+         * collect an array of all of the child directory result structs and when we have them all, we assign
+         * the proper percentage to each.  At this point we can report this array of result structs back to the
+         * main thread to be added to the treemodel.
+         *
+         * Back in the main thread, we receive a Results object.  If the results object has not yet had a
+         * TreeIter assigned to it, we create it one.  We use the parent results object to determine the correct
+         * place in the tree (assigning the parent an iter if required, recursively).  When we create the iter,
+         * we fill in the data that existed from the start (ie: display name and parse name) and mark the status
+         * of the iter as 'scanning'.
+         *
+         * For the iter that was actually directly reported (ie: the one that's ready) we record the information
+         * into the treemodel and free the results structure (or Vala does it for us).
+         *
+         * We can be sure that the 'parent' field always points to valid memory because of the nature of the
+         * recursion and the queue.  At the time we queue a Results struct for dispatch back to the main thread,
+         * its 'parent' is held on the stack by a higher invocation of add_directory().  This invocation will
+         * never finish without first pushing its own Results struct onto the queue -- after ours.  It is
+         * therefore guaranteed that the 'parent' Results object will not be freed before each child.
+         */
+
+        AsyncQueue<ResultsArray> results_queue;
+        Scanner? self;
+        Cancellable cancellable;
+        Error? scan_error;
+
+        [Compact]
+        class ResultsArray {
+            internal Results[] results;
+        }
+
+        [Compact]
+        class Results {
+            // written in the worker thread on creation
+            // read from the main thread at any time
+            internal unowned Results? parent;
+            internal string display_name;
+            internal string parse_name;
+
+            // written in the worker thread before dispatch
+            // read from the main thread only after dispatch
+            internal uint64 size;
+            internal uint64 alloc_size;
+            internal uint64 elements;
+            internal double percent;
+            internal int max_depth;
+            internal Error? error;
+
+            // accessed only by the main thread
+            internal Gtk.TreeIter iter;
+            internal bool iter_is_set;
+        }
+
+        Results? add_directory (File directory, FileInfo info, Results? parent = null) {
+            var results_array = new ResultsArray ();
+
+            if (directory in excluded_locations) {
+                return null;
+            }
+
+            var results = new Results ();
+            results.display_name = info.get_display_name ();
+            results.parse_name = directory.get_parse_name ();
+            results.parent = parent;
+
+            results.size = info.get_size ();
+            if (info.has_attribute (FileAttribute.STANDARD_ALLOCATED_SIZE)) {
+                results.alloc_size = info.get_attribute_uint64 (FileAttribute.STANDARD_ALLOCATED_SIZE);
+            }
+            results.elements = 1;
+            results.error = null;
+
+            try {
+                var children = directory.enumerate_children (ATTRIBUTES, FileQueryInfoFlags.NOFOLLOW_SYMLINKS, cancellable);
+                FileInfo? child_info;
+                while ((child_info = children.next_file (cancellable)) != null) {
+                    switch (child_info.get_file_type ()) {
+                        case FileType.DIRECTORY:
+                            var child = directory.get_child (child_info.get_name ());
+                            var child_results = add_directory (child, child_info, results);
+
+                            if (child_results != null) {
+                                results.size += child_results.size;
+                                results.alloc_size += child_results.alloc_size;
+                                results.elements += child_results.elements;
+                                results.max_depth = int.max (results.max_depth, child_results.max_depth + 1);
+                                results_array.results += (owned) child_results;
+                            }
+                            break;
 
-        public abstract void scan ();
+                        case FileType.REGULAR:
+                            if (child_info.has_attribute (FileAttribute.UNIX_NLINK)) {
+                                if (child_info.get_attribute_uint32 (FileAttribute.UNIX_NLINK) > 1) {
+                                    var hl = HardLink (child_info);
+
+                                    // check if we've already encountered this file
+                                    if (hl in hardlinks) {
+                                        continue;
+                                    }
+
+                                    hardlinks += hl;
+                                }
+                            }
+
+                            results.size += child_info.get_size ();
+                            if (child_info.has_attribute (FileAttribute.STANDARD_ALLOCATED_SIZE)) {
+                                results.alloc_size += child_info.get_attribute_uint64 (FileAttribute.STANDARD_ALLOCATED_SIZE);
+                            }
+                            results.elements++;
+                            break;
+
+                        default:
+                            // ignore other types (symlinks, sockets, devices, etc)
+                            break;
+                    }
+                }
+            } catch (Error e) {
+                results.error = e;
+            }
+
+            foreach (unowned Results child_results in results_array.results) {
+                child_results.percent = 100 * ((double) child_results.size) / ((double) results.size);
+            }
+
+            // No early exit: in order to avoid a potential crash, we absolutely *must* push this onto the
+            // queue after having passed it to any recursive invocation of add_directory() above.
+            //  See the large comment at the top of this class for why.
+            results_queue.push ((owned) results_array);
+
+            return results;
+        }
+
+        void* scan_in_thread () {
+            try {
+                var array = new ResultsArray ();
+                var info = directory.query_info (ATTRIBUTES, 0, cancellable);
+                var results = add_directory (directory, info);
+                results.percent = 100.0;
+                array.results += (owned) results;
+                results_queue.push ((owned) array);
+            } catch {
+            }
+
+            // drop the thread's reference on the Scanner object
+            this.self = null;
+            return null;
+        }
+
+        void ensure_iter_exists (Results results) {
+            Gtk.TreeIter? parent_iter;
+
+            if (results.iter_is_set) {
+                return;
+            }
+
+            if (results.parent != null) {
+                ensure_iter_exists (results.parent);
+                parent_iter = results.parent.iter;
+            } else {
+                parent_iter = null;
+            }
+
+            append (out results.iter, parent_iter);
+            set (results.iter,
+                 Columns.STATE,        State.SCANNING,
+                 Columns.DISPLAY_NAME, results.display_name,
+                 Columns.PARSE_NAME,   results.parse_name);
+            results.iter_is_set = true;
+        }
+
+        bool process_results () {
+            while (true) {
+                var results_array = results_queue.try_pop ();
+
+                if (results_array == null) {
+                    break;
+                }
+
+                foreach (unowned Results results in results_array.results) {
+                    ensure_iter_exists (results);
+
+                    set (results.iter,
+                         Columns.SIZE,       results.size,
+                         Columns.ALLOC_SIZE, results.alloc_size,
+                         Columns.PERCENT,    results.percent,
+                         Columns.ELEMENTS,   results.elements,
+                         Columns.STATE,      results.error == null ? State.DONE : State.ERROR,
+                         Columns.ERROR,      results.error);
+
+                    if (results.max_depth > max_depth) {
+                        max_depth = results.max_depth;
+                    }
+
+                    // If the user cancelled abort the scan and
+                    // report CANCELLED as the error, otherwise
+                    // consider the error not fatal and report the
+                    // first error we encountered
+                    if (results.error != null) {
+                        if (results.error is IOError.CANCELLED) {
+                            scan_error = results.error;
+                            completed ();
+                            return false;
+                        } else if (scan_error == null) {
+                            scan_error = results.error;
+                        }
+                    }
+
+                    if (results.parent == null) {
+                        completed ();
+                        return false;
+                    }
+                }
+            }
+
+            return this.self != null;
+        }
+
+        public void scan () {
+            new GLib2.Thread ("scanner", scan_in_thread);
+            Timeout.add (100, process_results);
+        }
 
         public virtual void cancel () {
             cancellable.cancel ();
@@ -121,6 +354,11 @@ namespace Baobab {
             }
 
             excluded_locations.remove (directory);
+
+            results_queue = new AsyncQueue<ResultsArray> ();
+
+            // the thread owns a reference on the Scanner object
+            this.self = this;
         }
     }
 }
diff --git a/src/baobab-window.vala b/src/baobab-window.vala
index 0c42b83..30c2bfa 100644
--- a/src/baobab-window.vala
+++ b/src/baobab-window.vala
@@ -535,7 +535,7 @@ namespace Baobab {
                 return;
             }
 
-            scanner = new ThreadedScanner (directory, flags);
+            scanner = new Scanner (directory, flags);
             set_model (scanner);
 
             scanner.completed.connect(() => {
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]