[geary/wip/714134-gc] A great first start that seems to work well.
- From: Jim Nelson <jnelson src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/wip/714134-gc] A great first start that seems to work well.
- Date: Thu, 18 Dec 2014 01:42:34 +0000 (UTC)
commit 92d07aacbc8edb8315a1382bced7ed3e8948e72f
Author: Jim Nelson <jim yorba org>
Date: Wed Dec 17 17:41:21 2014 -0800
A great first start that seems to work well.
Note: This commit will upgrade your database and delete a lot of email
and attachments from your local disk. Test with caution! Backup your
mail directory!
sql/CMakeLists.txt | 1 +
sql/version-024.sql | 11 +
src/CMakeLists.txt | 1 +
src/engine/imap-db/imap-db-database.vala | 24 ++
src/engine/imap-db/imap-db-gc.vala | 398 ++++++++++++++++++++++++++++++
5 files changed, 435 insertions(+), 0 deletions(-)
---
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 40184ce..11de86c 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -23,3 +23,4 @@ install(FILES version-020.sql DESTINATION ${SQL_DEST})
install(FILES version-021.sql DESTINATION ${SQL_DEST})
install(FILES version-022.sql DESTINATION ${SQL_DEST})
install(FILES version-023.sql DESTINATION ${SQL_DEST})
+install(FILES version-024.sql DESTINATION ${SQL_DEST})
diff --git a/sql/version-024.sql b/sql/version-024.sql
new file mode 100644
index 0000000..4925784
--- /dev/null
+++ b/sql/version-024.sql
@@ -0,0 +1,11 @@
+--
+-- Add the DeleteAttachmentFile table, which allows for attachment files to be deleted (garbage
+-- collected) after all references to them have been removed from the database without worrying
+-- about deleting them first and the database transaction failing.
+--
+
+CREATE TABLE DeleteAttachmentFileTable (
+ id INTEGER PRIMARY KEY,
+ filename TEXT NOT NULL
+)
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index dbd4d98..251b3b0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -168,6 +168,7 @@ engine/imap-db/imap-db-contact.vala
engine/imap-db/imap-db-database.vala
engine/imap-db/imap-db-email-identifier.vala
engine/imap-db/imap-db-folder.vala
+engine/imap-db/imap-db-gc.vala
engine/imap-db/imap-db-message-addresses.vala
engine/imap-db/imap-db-message-row.vala
engine/imap-db/imap-db-search-query.vala
diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala
index 533209f..2459313 100644
--- a/src/engine/imap-db/imap-db-database.vala
+++ b/src/engine/imap-db/imap-db-database.vala
@@ -13,6 +13,8 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
private ProgressMonitor upgrade_monitor;
private string account_owner_email;
private bool new_db = false;
+ private GC? gc = null;
+ private Cancellable gc_cancellable = new Cancellable();
public Database(File db_dir, File schema_dir, ProgressMonitor upgrade_monitor,
string account_owner_email) {
@@ -35,6 +37,28 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
public new void open(Db.DatabaseFlags flags, Cancellable? cancellable) throws Error {
open_background(flags, on_prepare_database_connection, pump_event_loop,
OPEN_PUMP_EVENT_LOOP_MSEC, cancellable);
+
+ gc = new GC(this, Priority.LOW);
+ gc.run_async.begin(gc_cancellable, on_gc_run_async_completed);
+ }
+
+ private void on_gc_run_async_completed(Object? object, AsyncResult result) {
+ try {
+ gc.run_async.end(result);
+ } catch (Error err) {
+ debug("Garbage collection of IMAP database %s completed with error: %s",
+ db_file.get_path(), err.message);
+ }
+
+ // Drop ref to avoid cyclical references
+ gc = null;
+ }
+
+ public override void close(Cancellable? cancellable) throws Error {
+ gc_cancellable.cancel();
+ gc_cancellable = new Cancellable();
+
+ base.close(cancellable);
}
private void pump_event_loop() {
diff --git a/src/engine/imap-db/imap-db-gc.vala b/src/engine/imap-db/imap-db-gc.vala
new file mode 100644
index 0000000..f2245ce
--- /dev/null
+++ b/src/engine/imap-db/imap-db-gc.vala
@@ -0,0 +1,398 @@
+/* Copyright 2014 Yorba Foundation
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+/**
+ * IMAP database garbage collector.
+ *
+ * Currently the garbage collector reaps messages unlinked from the MessageLocationTable older than
+ * a prescribed date. It also removes their on-disk attachment files (in a transaction-safe manner)
+ * and looks for empty directories in the attachment directory tree (caused by attachment files
+ * being removed without deleting their parents).
+ *
+ * The garbage collector is designed to run in the background and in such a way that it can be
+ * closed (even by application shutdown) and re-run later without the database going incoherent.
+ */
+
+private class Geary.ImapDB.GC {
+ // Days old from today an unlinked email message must be to be reaped by the garbage collector
+ private const int UNLINKED_DAYS = 31;
+
+ // Amount of time to sleep between various GC iterations to give other operations a chance
+ private const uint SLEEP_MSEC = 50;
+
+ // Number of files to delete from the DeleteAttachmentFileTable per iteration
+ private const int DELETE_ATTACHMENT_PER = 5;
+
+ // Number of files to enumerate per time when walking a directory's children
+ private const int ENUM_DIR_PER = 10;
+
+ /**
+ * Indicates the garbage collector is running.
+ *
+ * { link run_async} will return immediately if called while running.
+ */
+ public bool is_running { get; private set; default = false; }
+
+ private ImapDB.Database db;
+ private int priority;
+ private File data_dir;
+
+ public GC(ImapDB.Database db, int priority) {
+ this.db = db;
+ this.priority = priority;
+ data_dir = db.db_file.get_parent();
+ }
+
+ /**
+ * Should only be called from the foreground thread.
+ */
+ public async void run_async(Cancellable? cancellable) throws Error {
+ if (is_running)
+ return;
+
+ is_running = true;
+ try {
+ debug("[%s] Starting garbage collection of IMAP database", to_string());
+ yield internal_run_async(cancellable);
+ debug("[%s] Completed garbage collection of IMAP database", to_string());
+ } finally {
+ is_running = false;
+ }
+ }
+
+ private async void internal_run_async(Cancellable? cancellable) throws Error {
+ DateTime now = new DateTime.now(new TimeZone.local());
+ DateTime reap_date = now.add_days(0 - UNLINKED_DAYS);
+
+ debug("[%s] Garbage collector reaping date: %s (%s)", to_string(), reap_date.to_string(),
+ reap_date.to_unix().to_string());
+
+ //
+ // Find all messages unlinked from the location table and older than the GC epoch ... this
+ // is necessary because we can't be certain that the local store is fully synchronized
+ // with the server; it's possible we recvd a message in the Inbox, the user archived it,
+ // then closed Geary before the engine could synchronize will All Mail. In that
+ // situation, the email is completely unlinked from the location table but still on the
+ // server. This attempts to give some "breathing room". If the message is gc'd and
+ // detected later, the engine will merely re-download it. As long as the gc'd emails are
+ // not in the MessageLocationTable, removing them will leave the db in a coherent state.
+ //
+ // Checking internaldate_time_t is NULL is merely a way to gc emails that were allocated
+ // a row in the database but never downloaded. Since internaldate is the first thing
+ // downloaded, this is rare, but can happen, and this will reap those rows.
+ //
+
+ Gee.HashSet<int64?> gc_message_ids = new Gee.HashSet<int64?>(Collection.int64_hash_func,
+ Collection.int64_equal_func);
+
+ yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
+ Db.Statement stmt = cx.prepare("""
+ SELECT id
+ FROM MessageTable
+ WHERE (internaldate_time_t IS NULL OR internaldate_time_t <= ?)
+ AND NOT EXISTS (
+ SELECT message_id
+ FROM MessageLocationTable
+ WHERE MessageLocationTable.message_id = MessageTable.id
+ )
+ """);
+ stmt.bind_int64(0, reap_date.to_unix());
+
+ Db.Result result = stmt.exec(cancellable);
+ while (!result.finished) {
+ gc_message_ids.add(result.rowid_at(0));
+
+ result.next(cancellable);
+ }
+
+ return Db.TransactionOutcome.DONE;
+ }, cancellable);
+
+ message("[%s] Found %d email messages ready for reaping", to_string(), gc_message_ids.size);
+
+ //
+ // To prevent holding the database lock for long periods of time, delete each message one
+ // at a time, deleting it from subsidiary tables as well as all on-disk attachments.
+ // Although slow, we do want this to be a background task that doesn't interrupt the user.
+ // This approach also means gc can be interrupted at any time (i.e. the user exits the
+ // application) without leaving the database in an incoherent state. gc can be resumed
+ // even if interrupted.
+ //
+
+ int count = 0;
+ foreach (int64 message_id in gc_message_ids) {
+ try {
+ yield reap_message_async(message_id, cancellable);
+ count++;
+ } catch (Error err) {
+ if (err is IOError.CANCELLED)
+ throw err;
+
+ message("[%s] Unable to reap message #%s: %s", to_string(), message_id.to_string(),
+ err.message);
+ }
+
+ yield Scheduler.sleep_ms_async(SLEEP_MSEC);
+ }
+
+ message("[%s] Reaped %d email messages", to_string(), count);
+
+ //
+ // Now delete attachment files marked for deletion ... since they're added to this table
+ // as part of the gc_message_async() transaction, assured that they're ready for deletion
+ // (and, again, means this process is resumable)
+ //
+
+ count = 0;
+ for (;;) {
+ int deleted = yield delete_attachment_files(DELETE_ATTACHMENT_PER, cancellable);
+ if (deleted == 0)
+ break;
+
+ count += deleted;
+
+ yield Scheduler.sleep_ms_async(SLEEP_MSEC);
+ }
+
+ message("[%s] Deleted %d attachment files from reaped messages", to_string(), count);
+
+ //
+ // To be sure everything's clean, delete any empty directories in the attachment dir tree,
+ // as old code would only remove files
+ //
+
+ count = yield delete_empty_attachment_directories_async(null, null, cancellable);
+
+ message("[%s] Deleted %d empty attachment directories", to_string(), count);
+ }
+
+ private async void reap_message_async(int64 message_id, Cancellable? cancellable) throws Error {
+ yield db.exec_transaction_async(Db.TransactionType.RW, (cx) => {
+ // Since there's a window of time between locating gc-able messages and removing them,
+ // need to double-check in the transaction that it's still not in the MessageLocationTable.
+ Db.Statement stmt = cx.prepare("""
+ SELECT id
+ FROM MessageLocationTable
+ WHERE message_id = ?
+ """);
+ stmt.bind_rowid(0, message_id);
+
+ // If find one, then message is no longer unlinked
+ Db.Result result = stmt.exec(cancellable);
+ if (!result.finished) {
+ debug("[%s] Not garbage collection message #%s: found linked in location table",
+ to_string(), message_id.to_string());
+
+ return Db.TransactionOutcome.ROLLBACK;
+ }
+
+ //
+ // Fetch all on-disk attachments for this message
+ //
+
+ Gee.ArrayList<File> attachment_files = new Gee.ArrayList<File>();
+
+ stmt = cx.prepare("""
+ SELECT id, filename
+ FROM MessageAttachmentTable
+ WHERE message_id = ?
+ """);
+ stmt.bind_rowid(0, message_id);
+
+ result = stmt.exec(cancellable);
+ while (!result.finished) {
+ File file = Attachment.generate_file(data_dir, message_id, result.rowid_for("id"),
+ result.string_for("filename"));
+ attachment_files.add(file);
+
+ result.next(cancellable);
+ }
+
+ //
+ // Delete from search table
+ //
+
+ stmt = cx.prepare("""
+ DELETE FROM MessageSearchTable
+ WHERE docid = ?
+ """);
+ stmt.bind_rowid(0, message_id);
+
+ stmt.exec(cancellable);
+
+ //
+ // Delete from attachment table
+ //
+
+ stmt = cx.prepare("""
+ DELETE FROM MessageAttachmentTable
+ WHERE message_id = ?
+ """);
+ stmt.bind_rowid(0, message_id);
+
+ stmt.exec(cancellable);
+
+ //
+ // Delete from message table
+ //
+
+ stmt = cx.prepare("""
+ DELETE FROM MessageTable
+ WHERE id = ?
+ """);
+ stmt.bind_rowid(0, message_id);
+
+ stmt.exec(cancellable);
+
+ //
+ // Mark on-disk attachment files as ready for deletion
+ //
+
+ foreach (File attachment_file in attachment_files) {
+ stmt = cx.prepare("""
+ INSERT INTO DeleteAttachmentFileTable (filename)
+ VALUES (?)
+ """);
+ stmt.bind_string(0, attachment_file.get_path());
+
+ stmt.exec(cancellable);
+ }
+
+ //
+ // Done; other than on-disk attachment files, message is now garbage collected.
+ //
+
+ return Db.TransactionOutcome.COMMIT;
+ }, cancellable);
+ }
+
+ private async int delete_attachment_files(int limit, Cancellable? cancellable) throws Error {
+ if (limit <= 0)
+ return 0;
+
+ int deleted = 0;
+ yield db.exec_transaction_async(Db.TransactionType.RW, (cx) => {
+ Db.Statement stmt = cx.prepare("""
+ SELECT id, filename
+ FROM DeleteAttachmentFileTable
+ LIMIT ?
+ """);
+ stmt.bind_int(0, limit);
+
+ // build SQL for removing successfully-deleted files from table
+ StringBuilder sql = new StringBuilder("""
+ DELETE FROM DeleteAttachmentFileTable
+ WHERE id IN (
+ """);
+
+ Db.Result result = stmt.exec(cancellable);
+ bool first = true;
+ while (!result.finished) {
+ int64 id = result.rowid_at(0);
+ string filename = result.string_at(1);
+
+ File file = File.new_for_path(filename);
+
+ // if it deletes, great; if not, we tried
+ try {
+ file.delete(cancellable);
+ } catch (Error err) {
+ if (err is IOError.CANCELLED)
+ throw err;
+
+ debug("[%s] Unable to delete reaped attachment file \"%s\": %s", to_string(),
+ file.get_path(), err.message);
+ }
+
+ if (!first)
+ sql.append(", ");
+
+ sql.append(id.to_string());
+ first = false;
+
+ deleted++;
+
+ result.next(cancellable);
+ }
+
+ sql.append(")");
+
+ // if any files were deleted, remove them from the table
+ if (deleted > 0)
+ cx.exec(sql.str);
+
+ return Db.TransactionOutcome.COMMIT;
+ }, cancellable);
+
+ return deleted;
+ }
+
+ private async int delete_empty_attachment_directories_async(File? current, out bool empty,
+ Cancellable? cancellable) throws Error {
+ File current_dir = current ?? Attachment.get_attachments_dir(db.db_file.get_parent());
+
+ // directory is considered empty until file or non-deleted child directory is found
+ empty = true;
+
+ int deleted = 0;
+ FileEnumerator file_enum = yield current_dir.enumerate_children_async("*",
+ FileQueryInfoFlags.NOFOLLOW_SYMLINKS, priority, cancellable);
+ for (;;) {
+ List<FileInfo> infos = yield file_enum.next_files_async(ENUM_DIR_PER, priority, cancellable);
+ if (infos.length() == 0)
+ break;
+
+ foreach (FileInfo info in infos) {
+ if (info.get_file_type() != FileType.DIRECTORY) {
+ empty = false;
+
+ continue;
+ }
+
+ File child = current_dir.get_child(info.get_name());
+
+ bool child_empty;
+ deleted += yield delete_empty_attachment_directories_async(child, out child_empty,
+ cancellable);
+ if (!child_empty) {
+ empty = false;
+
+ continue;
+ }
+
+ string? failure = null;
+ try {
+ if (!yield child.delete_async(priority, cancellable))
+ failure = "delete indicates not empty";
+ } catch (Error err) {
+ if (err is IOError.CANCELLED)
+ throw err;
+
+ failure = err.message;
+ }
+
+ if (failure == null) {
+ deleted++;
+ } else {
+ message("[%s] Unable to delete empty attachment directory \"%s\": %s",
+ to_string(), child.get_path(), failure);
+
+ // since it remains, directory not empty
+ empty = false;
+ }
+ }
+ }
+
+ yield file_enum.close_async(priority, cancellable);
+
+ return deleted;
+ }
+
+ public string to_string() {
+ return "GC:%s".printf(db.db_file.get_path());
+ }
+}
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]