[geary/wip/714134-gc] Vacuuming in place, better heuristics, testing of GC resume



commit 9ab984631d40e49340db53111e0eceae01a66bae
Author: Jim Nelson <jim yorba org>
Date:   Thu Dec 18 13:42:16 2014 -0800

    Vacuuming in place, better heuristics, testing of GC resume
    
    This seems like a pretty good way to go.  My Yorba Geary data dir
    dropped from 3.8GB to 1.9GB between reaping and vacuuming the db
    and attachments.

 sql/version-024.sql                                |    2 +-
 src/client/dialogs/upgrade-dialog.vala             |    1 +
 src/engine/abstract/geary-abstract-account.vala    |    1 +
 src/engine/api/geary-account.vala                  |    1 +
 src/engine/api/geary-progress-monitor.vala         |    3 +-
 src/engine/imap-db/imap-db-account.vala            |    8 +-
 src/engine/imap-db/imap-db-database.vala           |   51 +++--
 src/engine/imap-db/imap-db-gc.vala                 |  277 +++++++++++++-------
 .../imap-engine/imap-engine-generic-account.vala   |    1 +
 ui/upgrade_dialog.glade                            |    4 +-
 10 files changed, 233 insertions(+), 116 deletions(-)
---
diff --git a/sql/version-024.sql b/sql/version-024.sql
index 4da75c5..f29a1fe 100644
--- a/sql/version-024.sql
+++ b/sql/version-024.sql
@@ -14,7 +14,7 @@ CREATE TABLE DeleteAttachmentFileTable (
 
 CREATE TABLE GarbageCollectionTable (
     id INTEGER PRIMARY KEY,
-    last_gc_time_t INTEGER DEFAULT NULL,
+    last_reap_time_t INTEGER DEFAULT NULL,
     last_vacuum_time_t INTEGER DEFAULT NULL,
     reaped_messages_since_last_vacuum INTEGER DEFAULT 0
 );
diff --git a/src/client/dialogs/upgrade-dialog.vala b/src/client/dialogs/upgrade-dialog.vala
index 1f79dfc..063ea60 100644
--- a/src/client/dialogs/upgrade-dialog.vala
+++ b/src/client/dialogs/upgrade-dialog.vala
@@ -60,6 +60,7 @@ public class UpgradeDialog : Object {
      */
     public void add_account(Geary.Account account, Cancellable? cancellable = null) {
         monitor.add(account.db_upgrade_monitor);
+        monitor.add(account.db_vacuum_monitor);
         if (cancellable != null)
             cancellables.add(cancellable);
     }
diff --git a/src/engine/abstract/geary-abstract-account.vala b/src/engine/abstract/geary-abstract-account.vala
index ea24d18..1d590bd 100644
--- a/src/engine/abstract/geary-abstract-account.vala
+++ b/src/engine/abstract/geary-abstract-account.vala
@@ -8,6 +8,7 @@ public abstract class Geary.AbstractAccount : BaseObject, Geary.Account {
     public Geary.AccountInformation information { get; protected set; }
     public Geary.ProgressMonitor search_upgrade_monitor { get; protected set; }
     public Geary.ProgressMonitor db_upgrade_monitor { get; protected set; }
+    public Geary.ProgressMonitor db_vacuum_monitor { get; protected set; }
     public Geary.ProgressMonitor opening_monitor { get; protected set; }
     public Geary.ProgressMonitor sending_monitor { get; protected set; }
     
diff --git a/src/engine/api/geary-account.vala b/src/engine/api/geary-account.vala
index 369bf04..152508e 100644
--- a/src/engine/api/geary-account.vala
+++ b/src/engine/api/geary-account.vala
@@ -19,6 +19,7 @@ public interface Geary.Account : BaseObject {
     
     public abstract Geary.ProgressMonitor search_upgrade_monitor { get; protected set; }
     public abstract Geary.ProgressMonitor db_upgrade_monitor { get; protected set; }
+    public abstract Geary.ProgressMonitor db_vacuum_monitor { get; protected set; }
     public abstract Geary.ProgressMonitor opening_monitor { get; protected set; }
     public abstract Geary.ProgressMonitor sending_monitor { get; protected set; }
     
diff --git a/src/engine/api/geary-progress-monitor.vala b/src/engine/api/geary-progress-monitor.vala
index 6ffcbfd..3641f64 100644
--- a/src/engine/api/geary-progress-monitor.vala
+++ b/src/engine/api/geary-progress-monitor.vala
@@ -11,7 +11,8 @@ public enum Geary.ProgressType {
     AGGREGATED,
     ACTIVITY,
     DB_UPGRADE,
-    SEARCH_INDEX
+    SEARCH_INDEX,
+    DB_VACUUM
 }
 
 /**
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index 33b120e..66323fb 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -27,6 +27,8 @@ private class Geary.ImapDB.Account : BaseObject {
         default = new IntervalProgressMonitor(ProgressType.SEARCH_INDEX, 0, 0); }
     public SimpleProgressMonitor upgrade_monitor { get; private set; default = new SimpleProgressMonitor(
         ProgressType.DB_UPGRADE); }
+    public SimpleProgressMonitor vacuum_monitor { get; private set; default = new SimpleProgressMonitor(
+        ProgressType.DB_VACUUM); }
     public SimpleProgressMonitor sending_monitor { get; private set;
         default = new SimpleProgressMonitor(ProgressType.ACTIVITY); }
     
@@ -68,16 +70,18 @@ private class Geary.ImapDB.Account : BaseObject {
         if (db != null)
             throw new EngineError.ALREADY_OPEN("IMAP database already open");
         
-        db = new ImapDB.Database(user_data_dir, schema_dir, upgrade_monitor, account_information.email);
+        db = new ImapDB.Database(user_data_dir, schema_dir, upgrade_monitor, vacuum_monitor,
+            account_information.email);
         
         try {
-            db.open(
+            yield db.open_async(
                 Db.DatabaseFlags.CREATE_DIRECTORY | Db.DatabaseFlags.CREATE_FILE | 
Db.DatabaseFlags.CHECK_CORRUPTION,
                 cancellable);
         } catch (Error err) {
             warning("Unable to open database: %s", err.message);
             
             // close database before exiting
+            db.close(null);
             db = null;
             
             throw err;
diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala
index 223bf5a..de40483 100644
--- a/src/engine/imap-db/imap-db-database.vala
+++ b/src/engine/imap-db/imap-db-database.vala
@@ -39,44 +39,61 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
         open_background(flags, on_prepare_database_connection, pump_event_loop,
             OPEN_PUMP_EVENT_LOOP_MSEC, cancellable);
         
+        // Tie user-supplied Cancellable to internal Cancellable, which is used when close() is
+        // called
         if (cancellable != null)
-            cancellable.cancelled.connect(on_open_cancelled);
+            cancellable.cancelled.connect(cancel_gc);
         
+        // Create new garbage collection object for this database
         GC gc = new GC(this, Priority.LOW);
         
+        // Get recommendations on what GC operations should be executed
         GC.RecommendedOperation recommended = yield gc.should_run_async(gc_cancellable);
-        if (recommended == GC.RecommendedOperation.NONE)
-            return;
         
-        if ((op & GC.RecommendedOperation.VACUUM) != 0) {
+        // VACUUM needs to execute in the foreground with the user given a busy prompt (and cannot
+        // be run at the same time as REAP)
+        if ((recommended & GC.RecommendedOperation.VACUUM) != 0) {
             if (!vacuum_monitor.is_in_progress)
                 vacuum_monitor.notify_start();
             
-            yield gc.vacuum_async(gc_cancellable);
-            
-            if (vacuum_monitor.is_in_progress)
-                vacuum_monitor.notify_finish();
+            try {
+                yield gc.vacuum_async(gc_cancellable);
+            } catch (Error err) {
+                message("Vacuum of IMAP database %s failed: %s", db_file.get_path(), err.message);
+                
+                throw err;
+            } finally {
+                if (vacuum_monitor.is_in_progress)
+                    vacuum_monitor.notify_finish();
+            }
         }
         
-        if ((op & GC.RecommendedOperation.GC) != 0) {
+        // REAP can run in the background while the application is executing
+        if ((recommended & GC.RecommendedOperation.REAP) != 0) {
+            // run in the background and allow application to continue running
+            gc.reap_async.begin(gc_cancellable, on_reap_async_completed);
         }
+        
+        if (cancellable != null)
+            cancellable.cancelled.disconnect(cancel_gc);
     }
     
-    private void on_gc_run_async_completed(Object? object, AsyncResult result) {
+    private void on_reap_async_completed(Object? object, AsyncResult result) {
+        GC gc = (GC) object;
         try {
-            gc.run_async.end(result);
+            gc.reap_async.end(result);
         } catch (Error err) {
-            debug("Garbage collection of IMAP database %s completed with error: %s",
-                db_file.get_path(), err.message);
+            message("Garbage collection of IMAP database %s failed: %s", db_file.get_path(), err.message);
         }
-        
-        // Drop ref to avoid cyclical references
-        gc = null;
     }
     
-    public override void close(Cancellable? cancellable) throws Error {
+    private void cancel_gc() {
         gc_cancellable.cancel();
         gc_cancellable = new Cancellable();
+    }
+    
+    public override void close(Cancellable? cancellable) throws Error {
+        cancel_gc();
         
         base.close(cancellable);
     }
diff --git a/src/engine/imap-db/imap-db-gc.vala b/src/engine/imap-db/imap-db-gc.vala
index 8a05cf5..39a5525 100644
--- a/src/engine/imap-db/imap-db-gc.vala
+++ b/src/engine/imap-db/imap-db-gc.vala
@@ -14,35 +14,61 @@
  *
  * The garbage collector is designed to run in the background and in such a way that it can be
  * closed (even by application shutdown) and re-run later without the database going incoherent.
+ *
+ * In addition, GC can recommend when to perform a VACUUM on the database and perform that
+ * operation for the caller.  Vacuuming locks the database for an extended period of time, and GC
+ * attempts to determine when it's best to do that by tracking the number of messages reaped by
+ * the garbage collector.  (Vacuuming is really only advantageous when lots of rows have been
+ * deleted in the database; without garbage collection, Geary's database tends to merely grow in
+ * size.)  Unlike garbage collection, vacuuming is not designed to run in the background and the
+ * user should be presented a busy monitor while it's occurring.
  */
 
 private class Geary.ImapDB.GC {
-    // Maximum number of days between GC runs.
-    private const int GC_DAYS_SPAN = 10;
+    // Minimum number of days between reaping runs.
+    private const int REAP_DAYS_SPAN = 10;
+    
+    // Minimum number of days between vacuums.
+    private const int VACUUM_DAYS_SPAN = 30;
     
     // Number of reaped messages since last vacuum indicating another vacuum should occur
-    private const int VACUUM_WHEN_REAPED_REACHED = 1000;
+    private const int VACUUM_WHEN_REAPED_REACHES = 10000;
     
     // Days old from today an unlinked email message must be to be reaped by the garbage collector
-    private const int UNLINKED_DAYS = 31;
+    private const int UNLINKED_DAYS = 30;
     
     // Amount of time to sleep between various database-bound GC iterations to give other
     // transactions a chance
     private const uint SLEEP_MSEC = 15;
     
-    // Number of files to delete from the DeleteAttachmentFileTable per iteration
-    private const int DELETE_ATTACHMENT_PER = 5;
+    // Number of database operations to perform before sleeping (obviously this is a rough
+    // approximation, as not all operations are the same cost)
+    private const int OPS_PER_SLEEP_CYCLE = 10;
+    
+    // Number of files to reap from the DeleteAttachmentFileTable per iteration
+    private const int REAP_ATTACHMENT_PER = 5;
     
     // Number of files to enumerate per time when walking a directory's children
     private const int ENUM_DIR_PER = 10;
     
     /**
-     * See { link should_run_async}.
+     * Operation(s) recommended by { link should_run_async}.
      */
     [Flag]
     public enum RecommendedOperation {
+        /**
+         * Indicates no garbage collection is recommended at this time.
+         */
         NONE = 0,
-        GC,
+        /*
+         * Indicates the caller should run { link reap_async} to potentially clean up unlinked
+         * messages and files.
+         */
+        REAP,
+        /**
+         * Indicates the caller should run { link vauum_async} to consolidate disk space and reduce
+         * database fragmentation.
+         */
         VACUUM
     }
     
@@ -67,31 +93,57 @@ private class Geary.ImapDB.GC {
      * Returns if the GC should be executed (via { link run_async}).
      */
     public async RecommendedOperation should_run_async(Cancellable? cancellable) throws Error {
-        DateTime? last_gc_time, last_vacuum_time;
+        DateTime? last_reap_time, last_vacuum_time;
         int reaped_messages_since_last_vacuum;
-        yield fetch_gc_info_async(out last_gc_time, out last_vacuum_time, out 
reaped_messages_since_last_vacuum,
+        yield fetch_gc_info_async(out last_reap_time, out last_vacuum_time, out 
reaped_messages_since_last_vacuum,
             cancellable);
         
         RecommendedOperation op = RecommendedOperation.NONE;
         DateTime now = new DateTime.now_local();
         
-        // GC every GC_DAYS_SPAN unless never executed, in which case run now
+        // Reap every REAP_DAYS_SPAN unless never executed, in which case run now
         int64 days;
-        if (last_gc_time == null) {
-            // null means GC has never executed
-            debug("[%s] Recommending garbage collection: never run", to_string());
-            op |= RecommendedOperation.GC;
-        } else if (elapsed_days(now, last, out days) >= GC_DAYS_SPAN) {
-            debug("[%s] Recommending garbage collection: %d days since last run", to_string(), days);
-            op |= RecommendedOperation.GC;
+        if (last_reap_time == null) {
+            // null means reaping has never executed
+            debug("[%s] Recommending reaping: never run", to_string());
+            
+            op |= RecommendedOperation.REAP;
+        } else if (elapsed_days(now, last_reap_time, out days) >= REAP_DAYS_SPAN) {
+            debug("[%s] Recommending reaping: %s days since last run", to_string(),
+                days.to_string());
+            
+            op |= RecommendedOperation.REAP;
+        } else {
+            debug("[%s] Reaping last run on %s (%s days ago)", to_string(),
+                last_reap_time.to_string(), days.to_string());
         }
         
         // VACUUM is not something we do regularly, but rather look for a lot of reaped messages
-        // as indicator it's time; the last_vacuum_time is stored in case want to add that to the
-        // heuristic later
-        if (reaped_messages_since_last_vacuum >= VACUUM_WHEN_REAPED_REACHED) {
-            debug("[%s] Recommending database vacuum: %d messages reaped since last vacuum",
-                to_string(), reaped_messages_since_last_vacuum);
+        // as indicator it's time ... to prevent doing this a lot (which is annoying), still space
+        // it out over a minimum amount of time
+        days = 0;
+        bool vacuum_permitted;
+        if (last_vacuum_time == null) {
+            debug("[%s] Database never vacuumed (%d messages reaped)", to_string(),
+                reaped_messages_since_last_vacuum);
+            
+            vacuum_permitted = true;
+        } else if (elapsed_days(now, last_vacuum_time, out days) >= VACUUM_DAYS_SPAN) {
+            debug("[%s] Database vacuuming permitted (%s days since last run, %d messages reaped since",
+                to_string(), days.to_string(), reaped_messages_since_last_vacuum);
+            
+            vacuum_permitted = true;
+        } else {
+            debug("[%s] Database vacuuming not permitted (%s days since last run, %d messages reaped since",
+                to_string(), days.to_string(), reaped_messages_since_last_vacuum);
+            
+            vacuum_permitted = false;
+        }
+        
+        if (vacuum_permitted && reaped_messages_since_last_vacuum >= VACUUM_WHEN_REAPED_REACHES) {
+            debug("[%s] Recommending database vacuum: %d messages reaped since last vacuum %s days ago",
+                to_string(), reaped_messages_since_last_vacuum, days.to_string());
+            
             op |= RecommendedOperation.VACUUM;
         }
         
@@ -105,15 +157,20 @@ private class Geary.ImapDB.GC {
     }
     
     /**
+     * Vacuum the database, reducing fragmentation and coalescing free space, to optimize access
+     * and reduce disk usage.
+     *
      * Should only be called from the foreground thread.
      *
      * Since this will lock the database for an extended period of time, the user should be
      * presented a busy indicator.  Operations against the database should not be expected to run
      * until this completes.  Cancellation will not work once vacuuming has started.
+     *
+     * @throws Geary.EngineError.ALREADY_OPEN if { link is_running} is true.
      */
     public async void vacuum_async(Cancellable? cancellable) throws Error {
         if (is_running)
-            return;
+            throw new EngineError.ALREADY_OPEN("Cannot vacuum %s: already running", to_string());
         
         is_running = true;
         try {
@@ -126,18 +183,30 @@ private class Geary.ImapDB.GC {
     }
     
     private async void internal_vacuum_async(Cancellable? cancellable) throws Error {
-        db.exec_transaction_async(Db.TransactionType.WO, (cx) => {
-            cx.exec("VACUUM", cancellable);
-            
-            DateTime now = new DateTime.now_local();
-            
-            // update last vacuum time and messages reaped since last vacuum
+        DateTime? last_vacuum_time = null;
+        
+        // NOTE: VACUUM cannot happen inside a transaction, so to avoid blocking the main thread,
+        // run a non-transacted command from a background thread
+        yield Nonblocking.Concurrent.global.schedule_async(() => {
+            db.open_connection(cancellable).exec("VACUUM", cancellable);
+            
+            // it's a small thing, but take snapshot of time when vacuum completes, as scheduling
+            // of the next transaction is not instantaneous
+            last_vacuum_time = new DateTime.now_local();
+        }, cancellable);
+        
+        // could assert here, but these calls really need to be bulletproof
+        if (last_vacuum_time == null)
+            last_vacuum_time = new DateTime.now_local();
+        
+        // update last vacuum time and reset messages reaped since last vacuum
+        yield db.exec_transaction_async(Db.TransactionType.WO, (cx) => {
             Db.Statement stmt = cx.prepare("""
                 UPDATE GarbageCollectionTable
                 SET last_vacuum_time_t = ?, reaped_messages_since_last_vacuum = ?
                 WHERE id = 0
             """);
-            stmt.bind_int64(0, now.to_unix());
+            stmt.bind_int64(0, last_vacuum_time.to_unix());
             stmt.bind_int(1, 0);
             
             stmt.exec(cancellable);
@@ -147,47 +216,55 @@ private class Geary.ImapDB.GC {
     }
     
     /**
-     * Should only be called from the foreground thread.
+     * Run the garbage collector, which reaps unlinked messages from the database and deletes
+     * their on-disk attachments.
      *
-     * Returns immediately if { link is_running} is true.
+     * Should only be called from the foreground thread.  { link reap_async} is designed to run in
+     * the background, so the application may continue while it's executing.  It also is designed
+     * to be interrupted (i.e. the application closes) and pick back up where it left off when the
+     * application restarts.
+     *
+     * @throws Geary.EngineError.ALREADY_OPEN if { link is_running} is true.
      */
-    public async void gc_async(Cancellable? cancellable) throws Error {
+    public async void reap_async(Cancellable? cancellable) throws Error {
         if (is_running)
-            return;
+            throw new EngineError.ALREADY_OPEN("Cannot garbage collect %s: already running", to_string());
         
         is_running = true;
         try {
             debug("[%s] Starting garbage collection of IMAP database", to_string());
-            yield internal_gc_async(cancellable);
+            yield internal_reap_async(cancellable);
             debug("[%s] Completed garbage collection of IMAP database", to_string());
         } finally {
             is_running = false;
         }
     }
     
-    private async void internal_gc_async(Cancellable? cancellable) throws Error {
-        DateTime now = new DateTime.now_local();
-        DateTime reap_date = now.add_days(0 - UNLINKED_DAYS);
-        
-        debug("[%s] Garbage collector reaping date: %s (%s)", to_string(), reap_date.to_string(),
-            reap_date.to_unix().to_string());
-        
+    private async void internal_reap_async(Cancellable? cancellable) throws Error {
+        //
+        // Find all messages unlinked from the location table and older than the GC reap date ...
+        // this is necessary because we can't be certain at any point in time that the local store
+        // is fully synchronized with the server.  For example, it's possible we recvd a message in
+        // the Inbox, the user archived it, then closed Geary before the engine could synchronize
+        // with All Mail.  In that situation, the email is completely unlinked from the
+        // MessageLocationTable but still on the server.  This attempts to give some "breathing
+        // room" and not remove that message until we feel more comfortable that it's truly
+        // unlinked.
         //
-        // Find all messages unlinked from the location table and older than the GC epoch ... this
-        // is necessary because we can't be certain that the local store is fully synchronized
-        // with the server; it's possible we recvd a message in the Inbox, the user archived it,
-        // then closed Geary before the engine could synchronize will All Mail.  In that
-        // situation, the email is completely unlinked from the location table but still on the
-        // server.  This attempts to give some "breathing room".  If the message is gc'd and
-        // detected later, the engine will merely re-download it.  As long as the gc'd emails are
-        // not in the MessageLocationTable, removing them will leave the db in a coherent state.
+        // If the message is reaped and detected later during folder normalization, the engine will
+        // merely re-download it and link the new copy to the MessageLocationTable.  The
+        // UNLINKED_DAYS optimization is merely an attempt to avoid re-downloading email.
         //
-        // Checking internaldate_time_t is NULL is merely a way to gc emails that were allocated
-        // a row in the database but never downloaded.  Since internaldate is the first thing
+        // Checking internaldate_time_t is NULL is a way to reap emails that were allocated a row
+        // in the MessageTable but never downloaded.  Since internaldate is the first thing
         // downloaded, this is rare, but can happen, and this will reap those rows.
         //
         
-        Gee.HashSet<int64?> gc_message_ids = new Gee.HashSet<int64?>(Collection.int64_hash_func,
+        DateTime reap_date = new DateTime.now_local().add_days(0 - UNLINKED_DAYS);
+        debug("[%s] Garbage collector reaping date: %s (%s)", to_string(), reap_date.to_string(),
+            reap_date.to_unix().to_string());
+        
+        Gee.HashSet<int64?> reap_message_ids = new Gee.HashSet<int64?>(Collection.int64_hash_func,
             Collection.int64_equal_func);
         
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
@@ -205,7 +282,7 @@ private class Geary.ImapDB.GC {
             
             Db.Result result = stmt.exec(cancellable);
             while (!result.finished) {
-                gc_message_ids.add(result.rowid_at(0));
+                reap_message_ids.add(result.rowid_at(0));
                 
                 result.next(cancellable);
             }
@@ -213,57 +290,65 @@ private class Geary.ImapDB.GC {
             return Db.TransactionOutcome.DONE;
         }, cancellable);
         
-        message("[%s] Found %d email messages ready for reaping", to_string(), gc_message_ids.size);
+        message("[%s] Found %d email messages ready for reaping", to_string(), reap_message_ids.size);
         
         //
-        // To prevent holding the database lock for long periods of time, delete each message one
-        // at a time, deleting it from subsidiary tables as well as all on-disk attachments.
-        // Although slow, we do want this to be a background task that doesn't interrupt the user.
-        // This approach also means gc can be interrupted at any time (i.e. the user exits the
-        // application) without leaving the database in an incoherent state.  gc can be resumed
-        // even if interrupted.
+        // To prevent holding the database lock for long periods of time, reap each message one
+        // at a time, deleting it from the message table and subsidiary tables.  Although slow, we
+        // do want this to be a background task that doesn't interrupt the user.  This approach
+        // also means gc can be interrupted at any time (i.e. the user exits the application)
+        // without leaving the database in an incoherent state.  gc can be resumed even if'
+        // interrupted.
         //
         
         int count = 0;
-        foreach (int64 message_id in gc_message_ids) {
+        foreach (int64 reap_message_id in reap_message_ids) {
             try {
-                yield reap_message_async(message_id, cancellable);
+                yield reap_message_async(reap_message_id, cancellable);
                 count++;
             } catch (Error err) {
                 if (err is IOError.CANCELLED)
                     throw err;
                 
-                message("[%s] Unable to reap message #%s: %s", to_string(), message_id.to_string(),
+                message("[%s] Unable to reap message #%s: %s", to_string(), reap_message_id.to_string(),
                     err.message);
             }
             
-            yield Scheduler.sleep_ms_async(SLEEP_MSEC);
+            if ((count % OPS_PER_SLEEP_CYCLE) == 0)
+                yield Scheduler.sleep_ms_async(SLEEP_MSEC);
+            
+            if ((count % 5000) == 0)
+                debug("[%s] Reaped %d messages", to_string(), count);
         }
         
-        message("[%s] Reaped %d email messages", to_string(), count);
+        message("[%s] Reaped completed: %d messages", to_string(), count);
         
         //
-        // Now delete attachment files marked for deletion ... since they're added to this table
-        // as part of the gc_message_async() transaction, assured that they're ready for deletion
-        // (and, again, means this process is resumable)
+        // Now reap on-disk attachment files marked for deletion.  Since they're added to the
+        // DeleteAttachmentFileTable as part of the reap_message_async() transaction, it's assured
+        // that they're ready for deletion (and, again, means this process is resumable)
         //
         
         count = 0;
         for (;;) {
-            int deleted = yield delete_attachment_files(DELETE_ATTACHMENT_PER, cancellable);
-            if (deleted == 0)
+            int reaped = yield reap_attachment_files_async(REAP_ATTACHMENT_PER, cancellable);
+            if (reaped == 0)
                 break;
             
-            count += deleted;
+            count += reaped;
+            
+            if ((count % OPS_PER_SLEEP_CYCLE) == 0)
+                yield Scheduler.sleep_ms_async(SLEEP_MSEC);
             
-            yield Scheduler.sleep_ms_async(SLEEP_MSEC);
+            if ((count % 1000) == 0)
+                debug("[%s] Reaped %d attachment files", to_string(), count);
         }
         
-        message("[%s] Deleted %d attachment files from reaped messages", to_string(), count);
+        message("[%s] Completed: Reaped %d attachment files", to_string(), count);
         
         //
         // To be sure everything's clean, delete any empty directories in the attachment dir tree,
-        // as old code would only remove files
+        // as code (here and elsewhere) only removes files.
         //
         
         count = yield delete_empty_attachment_directories_async(null, null, cancellable);
@@ -271,17 +356,19 @@ private class Geary.ImapDB.GC {
         message("[%s] Deleted %d empty attachment directories", to_string(), count);
         
         //
-        // A full GC cycle completed -- store date for next time.
+        // A full reap cycle completed -- store date for next time.  By only storing when the full
+        // cycle is completed, even if the user closes the application through the cycle it will
+        // start the next time, assuring all reaped messages/attachments are dealt with in a timely
+        // manner.
         //
         
         yield db.exec_transaction_async(Db.TransactionType.WO, (cx) => {
-            DateTime now = new DateTime.now_local();
             Db.Statement stmt = cx.prepare("""
                 UPDATE GarbageCollectionTable
-                SET last_gc_time_t = ?
+                SET last_reap_time_t = ?
                 WHERE id = 0
             """);
-            stmt.bind_int64(now.to_unix());
+            stmt.bind_int64(0, new DateTime.now_local().to_unix());
             
             stmt.exec(cancellable);
             
@@ -368,7 +455,10 @@ private class Geary.ImapDB.GC {
             stmt.exec(cancellable);
             
             //
-            // Mark on-disk attachment files as ready for deletion
+            // Mark on-disk attachment files as ready for deletion (handled by
+            // reap_attachments_files_async).  This two-step process assures that this transaction
+            // commits without error and the attachment files can be deleted without being
+            // referenced by the database, in a way that's resumable.
             //
             
             foreach (File attachment_file in attachment_files) {
@@ -382,7 +472,7 @@ private class Geary.ImapDB.GC {
             }
             
             //
-            // Add to the garbage collection count since last vacuum
+            // Increment the reap count since last vacuum
             //
             
             cx.exec("""
@@ -399,7 +489,7 @@ private class Geary.ImapDB.GC {
         }, cancellable);
     }
     
-    private async int delete_attachment_files(int limit, Cancellable? cancellable) throws Error {
+    private async int reap_attachment_files_async(int limit, Cancellable? cancellable) throws Error {
         if (limit <= 0)
             return 0;
         
@@ -412,7 +502,8 @@ private class Geary.ImapDB.GC {
             """);
             stmt.bind_int(0, limit);
             
-            // build SQL for removing successfully-deleted files from table
+            // build SQL for removing file from table (whether it's deleted or not -- at this point,
+            // we're in best-attempt mode)
             StringBuilder sql = new StringBuilder("""
                 DELETE FROM DeleteAttachmentFileTable
                 WHERE id IN (
@@ -521,31 +612,31 @@ private class Geary.ImapDB.GC {
         return deleted;
     }
     
-    private async void fetch_gc_info_async(out DateTime? last_gc_time, out DateTime? last_vacuum_time,
+    private async void fetch_gc_info_async(out DateTime? last_reap_time, out DateTime? last_vacuum_time,
         out int reaped_messages_since_last_vacuum, Cancellable? cancellable) throws Error {
         // dealing with out arguments for an async method inside a closure is asking for trouble
-        int64 last_gc_time_t = -1, last_vacuum_time_t = -1;
+        int64 last_reap_time_t = -1, last_vacuum_time_t = -1;
         int reaped_count = -1;
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
             Db.Result result = cx.query("""
-                SELECT last_gc_time_t, last_vacuum_time_t, reaped_messages_since_last_vacuum
+                SELECT last_reap_time_t, last_vacuum_time_t, reaped_messages_since_last_vacuum
                 FROM GarbageCollectionTable
                 WHERE id = 0
             """);
             
             if (result.finished)
-                Db.TransactionOutcome.FAILURE;
+                return Db.TransactionOutcome.FAILURE;
             
-            // NULL indicates GC/vacuum has not run
-            last_gc_time_t = !result.is_null_at(0) ? result.int64_at(0) : -1;
+            // NULL indicates reaping/vacuum has not run
+            last_reap_time_t = !result.is_null_at(0) ? result.int64_at(0) : -1;
             last_vacuum_time_t = !result.is_null_at(1) ? result.int64_at(1) : -1;
-            reaped_count = result.int64_at(2);
+            reaped_count = result.int_at(2);
             
             return Db.TransactionOutcome.SUCCESS;
         }, cancellable);
         
-        last_gc_time = (last_gc_time_t >= 0) ? new Date.from_unix_local(last_gc_time_t) : null;
-        last_vacuum_time = (last_vacuum_time_t >= 0) ? new Date.from_unix_local(last_vacuum_time_t) : null;
+        last_reap_time = (last_reap_time_t >= 0) ? new DateTime.from_unix_local(last_reap_time_t) : null;
+        last_vacuum_time = (last_vacuum_time_t >= 0) ? new DateTime.from_unix_local(last_vacuum_time_t) : 
null;
         reaped_messages_since_last_vacuum = reaped_count;
     }
     
diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala 
b/src/engine/imap-engine/imap-engine-generic-account.vala
index fc7c7e5..50f6001 100644
--- a/src/engine/imap-engine/imap-engine-generic-account.vala
+++ b/src/engine/imap-engine/imap-engine-generic-account.vala
@@ -37,6 +37,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.AbstractAccount {
         
         search_upgrade_monitor = local.search_index_monitor;
         db_upgrade_monitor = local.upgrade_monitor;
+        db_vacuum_monitor = local.vacuum_monitor;
         opening_monitor = new Geary.ReentrantProgressMonitor(Geary.ProgressType.ACTIVITY);
         sending_monitor = local.sending_monitor;
         
diff --git a/ui/upgrade_dialog.glade b/ui/upgrade_dialog.glade
index 7c9487c..e2a138e 100644
--- a/ui/upgrade_dialog.glade
+++ b/ui/upgrade_dialog.glade
@@ -54,10 +54,10 @@
               </packing>
             </child>
             <child>
-              <object class="GtkLabel" id="label1">
+              <object class="GtkLabel" id="text_label">
                 <property name="visible">True</property>
                 <property name="can_focus">False</property>
-                <property name="label" translatable="yes">Geary upgrade in progress.</property>
+                <property name="label" translatable="yes">Geary update in progress…</property>
               </object>
               <packing>
                 <property name="left_attach">1</property>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]