[geary/wip/713150-conversations] Significant improvement in loading conversations from folders



commit 982467fe086e44abfa480366cec54c3a52fed308
Author: Jim Nelson <jim yorba org>
Date:   Wed Mar 18 18:01:02 2015 -0700

    Significant improvement in loading conversations from folders
    
    Using prior work on this branch, a Folder with the
    FolderSupport.Associations interface can provide a set of
    AssociatedEmails for a specified range in a single operation.  This
    eliminates looping and filling the window in ConversationMonitor
    repeatedly trying to "estimate" the number of messages needed to
    generate the required number of conversations.  In most situations,
    the necessary # of conversations is loaded in one call.

 src/CMakeLists.txt                                 |    1 +
 src/engine/api/geary-account.vala                  |    6 +-
 .../api/geary-folder-supports-associations.vala    |   54 ++++++++++++++
 src/engine/app/app-conversation-monitor.vala       |   49 +++++++++++--
 src/engine/imap-db/imap-db-account.vala            |   31 +-------
 src/engine/imap-db/imap-db-conversation.vala       |   25 +++++++
 src/engine/imap-db/imap-db-folder.vala             |   77 ++++++++++++++++++++
 .../imap-engine/imap-engine-minimal-folder.vala    |   18 +++++-
 8 files changed, 224 insertions(+), 37 deletions(-)
---
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index db6c896..8bbbe4e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -28,6 +28,7 @@ engine/api/geary-folder.vala
 engine/api/geary-folder-path.vala
 engine/api/geary-folder-properties.vala
 engine/api/geary-folder-supports-archive.vala
+engine/api/geary-folder-supports-associations.vala
 engine/api/geary-folder-supports-copy.vala
 engine/api/geary-folder-supports-create.vala
 engine/api/geary-folder-supports-empty.vala
diff --git a/src/engine/api/geary-account.vala b/src/engine/api/geary-account.vala
index 88533e2..204a5c3 100644
--- a/src/engine/api/geary-account.vala
+++ b/src/engine/api/geary-account.vala
@@ -388,7 +388,7 @@ public abstract class Geary.Account : BaseObject {
     /**
      * Fetch all local messages associated with supplied { link EmailIdentifier}.
      *
-     * This is usually a better choice that { link local_search_message_id_async) in the sense that
+     * This is usually a better choice that { link local_search_message_id_async} in the sense that
      * the { link Account} is tasked to maintain internal lookup tables that speed lookup of
      * associated emails.  In particular, this call does ''not'' merely search for the
      * { link RFC822.MessageID}s listed in this particular { link Email}.  Rather, these lookup
@@ -399,7 +399,7 @@ public abstract class Geary.Account : BaseObject {
      * local_search_message_id_async.
      *
      * The Emails can only be searched for if they are stored locally with
-     * { link ASSOCIATED_EMAILS_REQUIRED_FIELDS} { link Email.Field}s.  Thus, when listing for
+     * { link ASSOCIATED_REQUIRED_FIELDS} { link Email.Field}s.  Thus, when listing for
      * EmailIdentifiers, add that field to the required fields to ensure they're available
      * locally.
      */
@@ -411,7 +411,7 @@ public abstract class Geary.Account : BaseObject {
      * Return a listing of local { link Email} fulfilling the required fields.
      *
      * This is akin to { link Folder.list_email_by_id_async} in that it doesn't throw an Error for
-     * unknown { link EmailIdentifiers}.
+     * unknown { link EmailIdentifier}s.
      */
     public abstract async Gee.Collection<Geary.Email>? local_list_email_async(
         Gee.Collection<Geary.EmailIdentifier> email_ids, Geary.Email.Field required_fields,
diff --git a/src/engine/api/geary-folder-supports-associations.vala 
b/src/engine/api/geary-folder-supports-associations.vala
new file mode 100644
index 0000000..1f1a8fe
--- /dev/null
+++ b/src/engine/api/geary-folder-supports-associations.vala
@@ -0,0 +1,54 @@
+/* Copyright 2015 Yorba Foundation
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later).  See the COPYING file in this distribution.
+ */
+
+/**
+ * The addition of the Geary.FolderSupport.Associations interface to a { link Geary.Folder}
+ * indicates it has special support for organizing its flat list of { link Geary.Email} into
+ * structured { link Geary.AssociatedEmails}.
+ *
+ * This interface indicates that the Folder's implementation has an optimized method for building
+ * the associations.  Virtual and special folders may not be able to load conversations any faster
+ * than { link ConversationMonitor}, which has a fallback method for generating conversations for
+ * Folders that don't support this interface.
+ */
+
+public interface Geary.FolderSupport.Associations : Geary.Folder {
+    /**
+     * List { link AssociatedEmails} for { link Email} in the { link Folder} starting from the
+     * specified { link EmailIdentifier} and traversing down the vector.
+     *
+     * Like { link Folder.list_email_by_id_async}, the EmailIdentifier may be null to indicate
+     * listing associations from the top of the Folder's vector.  count indicates the number of
+     * desired AssociatedEmails to return.  Since Email in a Folder may be associated, the total
+     * number of EmailIdentifiers local to this Folder in all AssociatedEmails may be greater than
+     * this count.
+     *
+     * Unlike list_email_by_id_async, however, count may not be negative, i.e. there is no
+     * support for walking "up" the email vector.  This may be added later if necessary.
+     *
+     * initial_id is ''not'' included in the returned collection of AssociatedEmails.  This may also
+     * be made an option later, if necessary.
+     *
+     * primary_email_ids is the collection of EmailIdentifiers that all associations are keyed off
+     * of.  It does ''not'' include EmailIdentifiers that are outside of the initial vector range
+     * of loaded emails.
+     *
+     * For example, if the top 20 emails are loaded to generate associations but an email
+     * is found deeper in the Folder's vector of emails that is associated with one of the first 20,
+     * that EmailIdentifier is ''not'' included in primary_email_ids.  Thus, the lowest email
+     * identifier of the returned collection can be used as the initial_id for the next call to
+     * this method.
+     *
+     * This only lists Email stored in the local store.
+     *
+     * @see Geary.Folder.find_boundaries_async
+     */
+    public abstract async Gee.Collection<Geary.AssociatedEmails>? local_list_associated_emails_async(
+        Geary.EmailIdentifier? initial_id, int count, Geary.Account.EmailSearchPredicate? predicate,
+        Gee.Collection<Geary.EmailIdentifier>? primary_email_ids, Cancellable? cancellable = null)
+        throws Error;
+}
+
diff --git a/src/engine/app/app-conversation-monitor.vala b/src/engine/app/app-conversation-monitor.vala
index c74695a..425f46e 100644
--- a/src/engine/app/app-conversation-monitor.vala
+++ b/src/engine/app/app-conversation-monitor.vala
@@ -512,10 +512,41 @@ public class Geary.App.ConversationMonitor : BaseObject {
         if (associations == null || associations.size == 0)
             return;
         
+        yield process_associations_async(path, associations, email_ids, cancellable);
+        
+        Logging.debug(Logging.Flag.CONVERSATIONS, "[%s] ConversationMonitor::process_email completed: %d 
emails",
+            folder.to_string(), email_ids.size);
+    }
+    
+    private async void load_associations_async(FolderSupport.Associations supports_associations,
+        Geary.EmailIdentifier? low_id, int count, Cancellable? cancellable) {
+        SearchPredicateInstance predicate_instance = new SearchPredicateInstance(folder, required_fields);
+        
+        Gee.Collection<AssociatedEmails>? associations = null;
+        Gee.Collection<EmailIdentifier> primary_email_ids = new Gee.HashSet<EmailIdentifier>();
+        try {
+            associations = yield supports_associations.local_list_associated_emails_async(
+                low_id, count, predicate_instance.search_predicate, primary_email_ids, cancellable);
+        } catch (Error err) {
+            debug("Unable to load associated emails from %s: %s", supports_associations.to_string(),
+                err.message);
+        }
+        
+        if (associations == null || associations.size == 0)
+            return;
+        
+        yield process_associations_async(supports_associations.path, associations, primary_email_ids,
+            cancellable);
+    }
+    
+    private async void process_associations_async(FolderPath path, Gee.Collection<AssociatedEmails> 
associations,
+        Gee.Collection<Geary.EmailIdentifier> original_email_ids, Cancellable? cancellable) {
         Gee.HashSet<Conversation> added = new Gee.HashSet<Conversation>();
         Gee.HashMultiMap<Conversation, Email> appended = new Gee.HashMultiMap<Conversation, Email>();
-        foreach (AssociatedEmails association in associations)
-            yield process_association_async(association, path, email_ids, added, appended, cancellable);
+        foreach (AssociatedEmails association in associations) {
+            yield process_association_async(association, path, original_email_ids, added, appended,
+                cancellable);
+        }
         
         if (added.size > 0)
             notify_conversations_added(added);
@@ -524,9 +555,6 @@ public class Geary.App.ConversationMonitor : BaseObject {
             foreach (Conversation conversation in appended.get_keys())
                 notify_conversation_appended(conversation, appended.get(conversation));
         }
-        
-        Logging.debug(Logging.Flag.CONVERSATIONS, "[%s] ConversationMonitor::process_email completed: %d 
emails",
-            folder.to_string(), email_ids.size);
     }
     
     private async void process_association_async(AssociatedEmails association, FolderPath path,
@@ -836,8 +864,17 @@ public class Geary.App.ConversationMonitor : BaseObject {
         
         bool expected_more_email;
         
+        Geary.FolderSupport.Associations? supports_associations = folder as Geary.FolderSupport.Associations;
         Geary.EmailIdentifier? low_id = yield get_lowest_email_id_async(null);
-        if (low_id != null && !is_insert) {
+        
+        if (!is_insert && supports_associations != null) {
+            // easy case: just load in the Associations straight from the folder; this is (almost)
+            // guaranteed to fill to the right amount every time
+            yield load_associations_async(supports_associations, low_id,
+                min_window_count - conversations.size, cancellable_monitor);
+            
+            expected_more_email = true;
+        } else if (low_id != null && !is_insert) {
             // Load at least as many messages as remaining conversations.
             int num_to_load = Numeric.int_floor((min_window_count - conversations.size) * 
MSG_CONV_MULTIPLIER,
                 MIN_FILL_MESSAGE_COUNT);
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index 6fa7a4c..ed1273f 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -687,11 +687,6 @@ private class Geary.ImapDB.Account : BaseObject {
         Gee.Collection<AssociatedEmails> associations = new Gee.ArrayList<AssociatedEmails>();
         Gee.HashSet<ImapDB.EmailIdentifier> found_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
         
-        // Need flags for search predicate
-        Email.Field required_fields = Email.Field.NONE;
-        if (search_predicate != null)
-            required_fields = required_fields | Geary.Email.Field.FLAGS;
-        
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
             foreach (ImapDB.EmailIdentifier db_id in db_ids) {
                 // if message found in previous conversation search, don't re-fetch
@@ -705,27 +700,9 @@ private class Geary.ImapDB.Account : BaseObject {
                 
                 found_ids.add_all(associated_ids);
                 
-                AssociatedEmails association = new AssociatedEmails();
-                foreach (ImapDB.EmailIdentifier associated_id in associated_ids) {
-                    Email? email;
-                    Gee.Collection<FolderPath?>? known_paths;
-                    try {
-                        do_fetch_message(cx, associated_id.message_id, required_fields, false,
-                            search_predicate, out email, out known_paths, cancellable);
-                    } catch (Error err) {
-                        if (err is EngineError.NOT_FOUND)
-                            continue;
-                        
-                        throw err;
-                    }
-                    
-                    if (email != null) {
-                        association.add(email.id, known_paths);
-                        found_ids.add((ImapDB.EmailIdentifier) email.id);
-                    }
-                }
-                
-                if (association.email_ids.size > 0)
+                AssociatedEmails? association = Conversation.do_generate_associations(cx, associated_ids,
+                    search_predicate, cancellable);
+                if (association != null && association.email_ids.size > 0)
                     associations.add(association);
             }
             
@@ -735,7 +712,7 @@ private class Geary.ImapDB.Account : BaseObject {
         return associations.size > 0 ? associations : null;
     }
     
-    private void do_fetch_message(Db.Connection cx, int64 message_id, Email.Field required_fields,
+    public static void do_fetch_message(Db.Connection cx, int64 message_id, Email.Field required_fields,
         bool include_removed, Geary.Account.EmailSearchPredicate? search_predicate, out Email? email,
         out Gee.Collection<FolderPath?>? known_paths, Cancellable? cancellable) throws Error {
         Email.Field actual_fields;
diff --git a/src/engine/imap-db/imap-db-conversation.vala b/src/engine/imap-db/imap-db-conversation.vala
index fc4dd6c..1d6a8e3 100644
--- a/src/engine/imap-db/imap-db-conversation.vala
+++ b/src/engine/imap-db/imap-db-conversation.vala
@@ -260,5 +260,30 @@ private Gee.HashSet<ImapDB.EmailIdentifier>? do_fetch_associated_email_ids(Db.Co
     return associated_message_ids;
 }
 
+internal AssociatedEmails? do_generate_associations(Db.Connection cx,
+    Gee.Collection<ImapDB.EmailIdentifier> associated_ids, Geary.Account.EmailSearchPredicate? predicate,
+    Cancellable? cancellable) throws Error {
+    AssociatedEmails association = new AssociatedEmails();
+    foreach (ImapDB.EmailIdentifier associated_id in associated_ids) {
+        Email? email;
+        Gee.Collection<FolderPath?>? known_paths;
+        try {
+            Account.do_fetch_message(cx, associated_id.message_id,
+                predicate != null ? Geary.Email.Field.NONE : Geary.Email.Field.FLAGS,
+                false, predicate, out email, out known_paths, cancellable);
+        } catch (Error err) {
+            if (err is EngineError.NOT_FOUND)
+                continue;
+            
+            throw err;
+        }
+        
+        if (email != null)
+            association.add(email.id, known_paths);
+    }
+    
+    return association.email_ids.size > 0 ? association : null;
+}
+
 }
 
diff --git a/src/engine/imap-db/imap-db-folder.vala b/src/engine/imap-db/imap-db-folder.vala
index 7044a9a..c2714ab 100644
--- a/src/engine/imap-db/imap-db-folder.vala
+++ b/src/engine/imap-db/imap-db-folder.vala
@@ -597,6 +597,83 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics {
         return email;
     }
     
+    public async Gee.Collection<Geary.AssociatedEmails>? list_associated_emails_async(
+        ImapDB.EmailIdentifier? start_id, int count, Geary.Account.EmailSearchPredicate? predicate,
+        Gee.Collection<Geary.EmailIdentifier>? primary_email_ids, Cancellable? cancellable) throws Error {
+        if (count == 0)
+            return null;
+        
+        Gee.Collection<Geary.AssociatedEmails> list = new Gee.ArrayList<Geary.AssociatedEmails>();
+        Gee.HashSet<ImapDB.EmailIdentifier> found_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
+        
+        yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
+            // find the starting UID to being a ranged search on this folder ... include marked for
+            // removed as this is a ranged search (and they will be removed later as appropriate)
+            Imap.UID start_uid;
+            if (start_id != null) {
+                LocationIdentifier? loc = do_get_location_for_id(cx, start_id, 
ListFlags.INCLUDE_MARKED_FOR_REMOVE,
+                    cancellable);
+                if (loc == null)
+                    return Db.TransactionOutcome.DONE;
+                
+                start_uid = loc.uid.previous(true);
+            } else {
+                start_uid = new Imap.UID(Imap.UID.MAX);
+            }
+            
+            Db.Statement stmt = cx.prepare("""
+                SELECT message_id, ordering, remove_marker
+                FROM MessageLocationTable
+                WHERE folder_id = ? AND ordering <= ?
+                ORDER BY ordering DESC
+            """);
+            stmt.bind_rowid(0, folder_id);
+            stmt.bind_int64(1, start_uid.value);
+            
+            for (Db.Result result = stmt.exec(cancellable); !result.finished; result.next(cancellable)) {
+                // drop messages marked for remove
+                if (result.bool_at(2))
+                    continue;
+                
+                // Don't need a UID to generate associations but include for primary_email_ids
+                ImapDB.EmailIdentifier id = new ImapDB.EmailIdentifier(result.int64_at(0),
+                    new Imap.UID(result.int64_at(1)));
+                
+                // add this one to the list of primary (vector) email identifiers associations are
+                // keying off of
+                if (primary_email_ids != null)
+                    primary_email_ids.add(id);
+                
+                // don't bother searching for id's already associated with prior email
+                if (found_ids.contains(id))
+                    continue;
+                
+                Gee.HashSet<ImapDB.EmailIdentifier>? associated_ids = 
Conversation.do_fetch_associated_email_ids(
+                    cx, id, cancellable);
+                if (associated_ids == null || associated_ids.size == 0)
+                    continue;
+                
+                found_ids.add_all(associated_ids);
+                
+                AssociatedEmails? association = Conversation.do_generate_associations(cx, associated_ids,
+                    predicate, cancellable);
+                if (association != null && association.email_ids.size > 0) {
+                    list.add(association);
+                    if (list.size >= count)
+                        break;
+                }
+            }
+            
+            return Db.TransactionOutcome.DONE;
+        }, cancellable);
+        
+        // clear if returning null
+        if (list.size == 0 && primary_email_ids != null)
+            primary_email_ids.clear();
+        
+        return list.size > 0 ? list : null;
+    }
+    
     // Note that this does INCLUDES messages marked for removal
     // TODO: Let the user request a SortedSet, or have them provide the Set to add to
     public async Gee.Set<Imap.UID>? list_uids_by_range_async(Imap.UID first_uid, Imap.UID last_uid,
diff --git a/src/engine/imap-engine/imap-engine-minimal-folder.vala 
b/src/engine/imap-engine/imap-engine-minimal-folder.vala
index 9759d9c..b347ef3 100644
--- a/src/engine/imap-engine/imap-engine-minimal-folder.vala
+++ b/src/engine/imap-engine/imap-engine-minimal-folder.vala
@@ -5,7 +5,7 @@
  */
 
 private class Geary.ImapEngine.MinimalFolder : Geary.Folder, Geary.FolderSupport.Copy,
-    Geary.FolderSupport.Mark, Geary.FolderSupport.Move {
+    Geary.FolderSupport.Mark, Geary.FolderSupport.Move, Geary.FolderSupport.Associations {
     private const int FORCE_OPEN_REMOTE_TIMEOUT_SEC = 10;
     private const int DEFAULT_REESTABLISH_DELAY_MSEC = 500;
     private const int MAX_REESTABLISH_DELAY_MSEC = 60 * 1000;
@@ -1478,6 +1478,22 @@ private class Geary.ImapEngine.MinimalFolder : Geary.Folder, Geary.FolderSupport
         return new RevokableMove(_account, this, destination, prepare.prepared_for_move);
     }
     
+    public virtual async Gee.Collection<AssociatedEmails>? local_list_associated_emails_async(
+        EmailIdentifier? initial_id, int count, Account.EmailSearchPredicate? predicate,
+        Gee.Collection<EmailIdentifier>? primary_email_ids, Cancellable? cancellable = null) throws Error {
+        check_open("local_list_associated_emails_async");
+        if (initial_id != null)
+            check_id("local_list_associated_emails_async", initial_id);
+        
+        if (count == 0)
+            return null;
+        else if (count < 0)
+            throw new EngineError.BAD_PARAMETERS("count may not be negative");
+        
+        return yield local_folder.list_associated_emails_async((ImapDB.EmailIdentifier) initial_id,
+            count, predicate, primary_email_ids, cancellable);
+    }
+    
     public void schedule_op(ReplayOperation op) throws Error {
         check_open("schedule_op");
         


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]