[geary] Improve folder full-normalization time using IMAP SEARCH



commit 11d80384225bdad31177946d7825fe729cac7bfc
Author: Jim Nelson <jim yorba org>
Date:   Thu Jan 29 13:49:02 2015 -0800

    Improve folder full-normalization time using IMAP SEARCH
    
    When messages have been appended and removed from a folder, Geary has
    no choice but to do a full UID scan of the server to figure out
    exactly what's changed.  Prior code use a FETCH command, but SEARCH is
    a better choice because there's less overhead in a SEARCH result: only
    the UIDs are returned and they may be returned in a single line or in
    batches.  Also, there's no reason to return those results in a sorted
    set, which was driving up transaction time, so an unsorted set is
    returned.
    
    On my account, a full normalization of All Mail went from 8s to 1s
    with this change.

 src/console/main.vala                              |    9 +++-
 src/engine/imap/api/imap-folder.vala               |   49 ++++++++-----------
 src/engine/imap/command/imap-search-command.vala   |   12 ++++-
 src/engine/imap/command/imap-search-criteria.vala  |    4 +-
 src/engine/imap/response/imap-server-data.vala     |   16 +++----
 src/engine/imap/transport/imap-client-session.vala |    2 +-
 src/engine/util/util-collection.vala               |    6 +-
 7 files changed, 52 insertions(+), 46 deletions(-)
---
diff --git a/src/console/main.vala b/src/console/main.vala
index 418112a..dff6c20 100644
--- a/src/console/main.vala
+++ b/src/console/main.vala
@@ -543,8 +543,13 @@ class ImapConsole : Gtk.Window {
         foreach (string arg in args)
             criteria.and(new Geary.Imap.SearchCriterion.simple(arg));
         
-        cx.send_async.begin(new Geary.Imap.SearchCommand(criteria, cmd == "uid-search"),
-            null, on_searched);
+        Geary.Imap.SearchCommand search;
+        if (cmd == "uid-search")
+            search = new Geary.Imap.SearchCommand.uid(criteria);
+        else
+            search = new Geary.Imap.SearchCommand(criteria);
+        
+        cx.send_async.begin(search, null, on_searched);
     }
     
     private void on_searched(Object? source, AsyncResult result) {
diff --git a/src/engine/imap/api/imap-folder.vala b/src/engine/imap/api/imap-folder.vala
index f6dbee3..e31ec0b 100644
--- a/src/engine/imap/api/imap-folder.vala
+++ b/src/engine/imap/api/imap-folder.vala
@@ -33,7 +33,7 @@ private class Geary.Imap.Folder : BaseObject {
     private Nonblocking.Mutex cmd_mutex = new Nonblocking.Mutex();
     private Gee.HashMap<SequenceNumber, FetchedData> fetch_accumulator = new Gee.HashMap<
         SequenceNumber, FetchedData>();
-    private Gee.TreeSet<Imap.UID> search_accumulator = new Gee.TreeSet<Imap.UID>();
+    private Gee.Set<Imap.UID> search_accumulator = new Gee.HashSet<Imap.UID>();
     
     /**
      * A (potentially unsolicited) response from the server.
@@ -235,7 +235,7 @@ private class Geary.Imap.Folder : BaseObject {
             recent(total);
     }
     
-    private void on_search(Gee.List<int64?> seq_or_uid) {
+    private void on_search(int64[] seq_or_uid) {
         // All SEARCH from this class are UID SEARCH, so can reliably convert and add to
         // accumulator
         foreach (int64 uid in seq_or_uid) {
@@ -312,8 +312,8 @@ private class Geary.Imap.Folder : BaseObject {
     // FETCH commands can generate a FolderError.RETRY.  State will be updated to accomodate retry,
     // but all Commands must be regenerated to ensure new state is reflected in requests.
     private async Gee.Map<Command, StatusResponse>? exec_commands_async(Gee.Collection<Command> cmds,
-        out Gee.HashMap<SequenceNumber, FetchedData>? fetched,
-        out Gee.TreeSet<Imap.UID>? search_results, Cancellable? cancellable) throws Error {
+        out Gee.HashMap<SequenceNumber, FetchedData>? fetched, out Gee.Set<Imap.UID>? search_results,
+        Cancellable? cancellable) throws Error {
         int token = yield cmd_mutex.claim_async(cancellable);
         Gee.Map<Command, StatusResponse>? responses = null;
         // execute commands with mutex locked
@@ -338,7 +338,7 @@ private class Geary.Imap.Folder : BaseObject {
         
         if (search_accumulator.size > 0) {
             search_results = search_accumulator;
-            search_accumulator = new Gee.TreeSet<Imap.UID>();
+            search_accumulator = new Gee.HashSet<Imap.UID>();
         } else {
             search_results = null;
         }
@@ -420,33 +420,21 @@ private class Geary.Imap.Folder : BaseObject {
         }
     }
     
-    // Utility method for listing a UID range
-    // TODO: Offer parameter so a SortedSet could be returned (or, the caller must supply the Set)
+    // Utility method for listing UIDs on the remote within the supplied range
     public async Gee.Set<Imap.UID>? list_uids_async(MessageSet msg_set, Cancellable? cancellable)
         throws Error {
         check_open();
         
-        // TODO: exec_commands_async() can throw a RETRY error, but currently that doesn't affect
-        // this code path.  When it does, this will need to honor that error.
-        FetchCommand cmd = new FetchCommand.data_type(msg_set, FetchDataSpecifier.UID);
+        // Although FETCH could be used, SEARCH is more efficient in returning pure UID results,
+        // which is all we're interested in here
+        SearchCriteria criteria = new SearchCriteria(SearchCriterion.message_set(msg_set));
+        SearchCommand cmd = new SearchCommand.uid(criteria);
         
-        Gee.HashMap<SequenceNumber, FetchedData>? fetched;
-        yield exec_commands_async(Geary.iterate<Command>(cmd).to_array_list(), out fetched, null,
+        Gee.Set<Imap.UID>? search_results;
+        yield exec_commands_async(Geary.iterate<Command>(cmd).to_array_list(), null, out search_results,
             cancellable);
-        if (fetched == null || fetched.size == 0)
-            return null;
-        
-        // Because the number of UIDs can be immense, do hashing in the background
-        Gee.Set<Imap.UID> uids = new Gee.HashSet<Imap.UID>();
-        yield Nonblocking.Concurrent.global.schedule_async(() => {
-            foreach (FetchedData fetched_data in fetched.values) {
-                Imap.UID? uid = fetched_data.data_map.get(FetchDataSpecifier.UID) as Imap.UID;
-                if (uid != null)
-                    uids.add(uid);
-            }
-        }, cancellable);
         
-        return (uids.size > 0) ? uids : null;
+        return (search_results != null && search_results.size > 0) ? search_results : null;
     }
     
     private Gee.Collection<FetchCommand> assemble_list_commands(Imap.MessageSet msg_set,
@@ -739,12 +727,17 @@ private class Geary.Imap.Folder : BaseObject {
         
         // always perform a UID SEARCH
         Gee.Collection<Command> cmds = new Gee.ArrayList<Command>();
-        cmds.add(new SearchCommand(criteria, true));
+        cmds.add(new SearchCommand.uid(criteria));
         
-        Gee.TreeSet<Imap.UID>? search_results;
+        Gee.Set<Imap.UID>? search_results;
         yield exec_commands_async(cmds, null, out search_results, cancellable);
+        if (search_results == null || search_results.size == 0)
+            return null;
         
-        return (search_results != null && search_results.size > 0) ? search_results : null;
+        Gee.SortedSet<Imap.UID> tree = new Gee.TreeSet<Imap.UID>();
+        tree.add_all(search_results);
+        
+        return tree;
     }
     
     // NOTE: If fields are added or removed from this method, BASIC_FETCH_FIELDS *must* be updated
diff --git a/src/engine/imap/command/imap-search-command.vala 
b/src/engine/imap/command/imap-search-command.vala
index 84c6e4e..b5bb723 100644
--- a/src/engine/imap/command/imap-search-command.vala
+++ b/src/engine/imap/command/imap-search-command.vala
@@ -14,8 +14,16 @@ public class Geary.Imap.SearchCommand : Command {
     public const string NAME = "search";
     public const string UID_NAME = "uid search";
     
-    public SearchCommand(SearchCriteria criteria, bool is_uid) {
-        base (is_uid ? UID_NAME : NAME);
+    public SearchCommand(SearchCriteria criteria) {
+        base (NAME);
+        
+        // append rather than add the criteria, so the top-level criterion appear in the top-level
+        // list and not as a child list
+        append(criteria);
+    }
+    
+    public SearchCommand.uid(SearchCriteria criteria) {
+        base (UID_NAME);
         
         // append rather than add the criteria, so the top-level criterion appear in the top-level
         // list and not as a child list
diff --git a/src/engine/imap/command/imap-search-criteria.vala 
b/src/engine/imap/command/imap-search-criteria.vala
index ce0575c..a1c463e 100644
--- a/src/engine/imap/command/imap-search-criteria.vala
+++ b/src/engine/imap/command/imap-search-criteria.vala
@@ -23,7 +23,9 @@
  */
 
 public class Geary.Imap.SearchCriteria : ListParameter {
-    public SearchCriteria() {
+    public SearchCriteria(SearchCriterion? first = null) {
+        if (first != null)
+            add_all(first.to_parameters());
     }
     
     /**
diff --git a/src/engine/imap/response/imap-server-data.vala b/src/engine/imap/response/imap-server-data.vala
index f4d6101..e8babc6 100644
--- a/src/engine/imap/response/imap-server-data.vala
+++ b/src/engine/imap/response/imap-server-data.vala
@@ -147,18 +147,16 @@ public class Geary.Imap.ServerData : ServerResponse {
      *
      * @throws ImapError.INVALID if not a { link ServerDataType.SEARCH} value.
      */
-    public Gee.List<int64?> get_search() throws ImapError {
+    public int64[] get_search() throws ImapError {
         if (server_data_type != ServerDataType.SEARCH)
             throw new ImapError.INVALID("Not SEARCH data: %s", to_string());
         
-        Gee.List<int64?> results = new Gee.ArrayList<int64?>();
-        for (int ctr = 2; ctr < size; ctr++) {
-            // can't directly return the result from as_int() into results as a Vala bug causes a
-            // build policy violation for uncast int -> pointer on 64-bit architectures:
-            // https://bugzilla.gnome.org/show_bug.cgi?id=720437
-            int64 result = get_as_string(ctr).as_int64(0);
-            results.add(result);
-        }
+        if (size <= 2)
+            return new int64[0];
+        
+        int64[] results = new int64[size - 2];
+        for (int ctr = 2; ctr < size; ctr++)
+            results[ctr - 2] = get_as_string(ctr).as_int64(0);
         
         return results;
     }
diff --git a/src/engine/imap/transport/imap-client-session.vala 
b/src/engine/imap/transport/imap-client-session.vala
index e1549c4..4ed00fa 100644
--- a/src/engine/imap/transport/imap-client-session.vala
+++ b/src/engine/imap/transport/imap-client-session.vala
@@ -221,7 +221,7 @@ public class Geary.Imap.ClientSession : BaseObject {
     
     public signal void recent(int count);
     
-    public signal void search(Gee.List<int64?> seq_or_uid);
+    public signal void search(int64[] seq_or_uid);
     
     public signal void status(StatusData status_data);
     
diff --git a/src/engine/util/util-collection.vala b/src/engine/util/util-collection.vala
index dc9f43d..184b551 100644
--- a/src/engine/util/util-collection.vala
+++ b/src/engine/util/util-collection.vala
@@ -127,14 +127,14 @@ public Gee.MultiMap<V, K> reverse_multi_map<K, V>(Gee.MultiMap<K, V> map) {
 /**
  * To be used by a Hashable's to_hash() method.
  */
-public inline static uint int64_hash(int64 value) {
+public inline uint int64_hash(int64 value) {
     return hash_memory(&value, sizeof(int64));
 }
 
 /**
  * To be used as hash_func for Gee collections.
  */
-public uint int64_hash_func(int64? n) {
+public inline uint int64_hash_func(int64? n) {
     return hash_memory((uint8 *) n, sizeof(int64));
 }
 
@@ -152,7 +152,7 @@ public bool int64_equal_func(int64? a, int64? b) {
  * A rotating-XOR hash that can be used to hash memory buffers of any size.
  */
 public uint hash_memory(void *ptr, size_t bytes) {
-    if (bytes == 0)
+    if (ptr == null || bytes == 0)
         return 0;
     
     uint8 *u8 = (uint8 *) ptr;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]