[geary] Avoid excessive IMAP cmd length with max. MessageSet size: Bug #734757



commit 9bcfbfed24dff7630ff36c6235d19be83398224b
Author: Jim Nelson <jim yorba org>
Date:   Tue Aug 19 13:35:05 2014 -0700

    Avoid excessive IMAP cmd length with max. MessageSet size: Bug #734757
    
    When building sparse MessageSets, instead of constructing single one
    it's possible for multiple to be returned, indicating that the number
    of values exceeds a defined limit.  This may require multiple I/Os to
    complete all commands, although they can be pipelined.

 .../imap-engine/imap-engine-minimal-folder.vala    |   66 ++++++-----
 .../imap-engine-abstract-list-email.vala           |   10 +-
 .../replay-ops/imap-engine-copy-email.vala         |    5 +-
 .../replay-ops/imap-engine-mark-email.vala         |    9 +-
 .../replay-ops/imap-engine-move-email.vala         |   10 +-
 .../replay-ops/imap-engine-remove-email.vala       |    9 +-
 src/engine/imap/command/imap-message-set.vala      |  117 +++++++++++---------
 7 files changed, 127 insertions(+), 99 deletions(-)
---
diff --git a/src/engine/imap-engine/imap-engine-minimal-folder.vala 
b/src/engine/imap-engine/imap-engine-minimal-folder.vala
index 2808ab0..ceb9f2c 100644
--- a/src/engine/imap-engine/imap-engine-minimal-folder.vala
+++ b/src/engine/imap-engine/imap-engine-minimal-folder.vala
@@ -316,13 +316,17 @@ private class Geary.ImapEngine.MinimalFolder : Geary.AbstractFolder, Geary.Folde
         
         // fetch from the server the local store's required flags for all appended/inserted messages
         // (which is simply equal to all remaining remote UIDs)
-        Gee.List<Geary.Email>? to_create = null;
+        Gee.List<Geary.Email> to_create = new Gee.ArrayList<Geary.Email>();
         if (remote_uids.size > 0) {
             // for new messages, get the local store's required fields (which provide duplicate
             // detection)
-            to_create = yield remote_folder.list_email_async(
-                new Imap.MessageSet.uid_sparse(remote_uids.to_array()), ImapDB.Folder.REQUIRED_FIELDS,
-                cancellable);
+            Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(remote_uids);
+            foreach (Imap.MessageSet msg_set in msg_sets) {
+                Gee.List<Geary.Email>? list = yield remote_folder.list_email_async(msg_set,
+                    ImapDB.Folder.REQUIRED_FIELDS, cancellable);
+                if (list != null && list.size > 0)
+                    to_create.add_all(list);
+            }
         }
         
         check_open("normalize_folders (list remote appended/inserted required fields)");
@@ -332,7 +336,7 @@ private class Geary.ImapEngine.MinimalFolder : Geary.AbstractFolder, Geary.Folde
         Gee.Set<ImapDB.EmailIdentifier> locally_appended_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
         Gee.Set<ImapDB.EmailIdentifier> inserted_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
         Gee.Set<ImapDB.EmailIdentifier> locally_inserted_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
-        if (to_create != null && to_create.size > 0) {
+        if (to_create.size > 0) {
             Gee.Map<Email, bool>? created_or_merged = yield local_folder.create_or_merge_email_async(
                 to_create, cancellable);
             assert(created_or_merged != null);
@@ -931,34 +935,36 @@ private class Geary.ImapEngine.MinimalFolder : Geary.AbstractFolder, Geary.Folde
         Gee.HashSet<Geary.EmailIdentifier> created = new Gee.HashSet<Geary.EmailIdentifier>();
         Gee.HashSet<Geary.EmailIdentifier> appended = new Gee.HashSet<Geary.EmailIdentifier>();
         try {
-            Imap.MessageSet msg_set = new Imap.MessageSet.sparse(remote_positions.to_array());
-            Gee.List<Geary.Email>? list = yield remote_folder.list_email_async(msg_set,
-                ImapDB.Folder.REQUIRED_FIELDS, null);
-            if (list != null && list.size > 0) {
-                debug("%s do_replay_appended_message: %d new messages in %s", to_string(),
-                    list.size, msg_set.to_string());
-                
-                // need to report both if it was created (not known before) and appended (which
-                // could mean created or simply a known email associated with this folder)
-                Gee.Map<Geary.Email, bool> created_or_merged =
-                    yield local_folder.create_or_merge_email_async(list, null);
-                foreach (Geary.Email email in created_or_merged.keys) {
-                    // true means created
-                    if (created_or_merged.get(email)) {
-                        debug("%s do_replay_appended_message: appended email ID %s added",
-                            to_string(), email.id.to_string());
+            Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.sparse(remote_positions);
+            foreach (Imap.MessageSet msg_set in msg_sets) {
+                Gee.List<Geary.Email>? list = yield remote_folder.list_email_async(msg_set,
+                    ImapDB.Folder.REQUIRED_FIELDS, null);
+                if (list != null && list.size > 0) {
+                    debug("%s do_replay_appended_message: %d new messages in %s", to_string(),
+                        list.size, msg_set.to_string());
+                    
+                    // need to report both if it was created (not known before) and appended (which
+                    // could mean created or simply a known email associated with this folder)
+                    Gee.Map<Geary.Email, bool> created_or_merged =
+                        yield local_folder.create_or_merge_email_async(list, null);
+                    foreach (Geary.Email email in created_or_merged.keys) {
+                        // true means created
+                        if (created_or_merged.get(email)) {
+                            debug("%s do_replay_appended_message: appended email ID %s added",
+                                to_string(), email.id.to_string());
+                            
+                            created.add(email.id);
+                        } else {
+                            debug("%s do_replay_appended_message: appended email ID %s associated",
+                                to_string(), email.id.to_string());
+                        }
                         
-                        created.add(email.id);
-                    } else {
-                        debug("%s do_replay_appended_message: appended email ID %s associated",
-                            to_string(), email.id.to_string());
+                        appended.add(email.id);
                     }
-                    
-                    appended.add(email.id);
+                } else {
+                    debug("%s do_replay_appended_message: no new messages in %s", to_string(),
+                        msg_set.to_string());
                 }
-            } else {
-                debug("%s do_replay_appended_message: no new messages in %s", to_string(),
-                    msg_set.to_string());
             }
         } catch (Error err) {
             debug("%s do_replay_appended_message: Unable to process: %s",
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-abstract-list-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-abstract-list-email.vala
index ebe90c0..1e76607 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-abstract-list-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-abstract-list-email.vala
@@ -139,10 +139,12 @@ private abstract class Geary.ImapEngine.AbstractListEmail : Geary.ImapEngine.Sen
             if (unfulfilled_uids.size == 0)
                 continue;
             
-            RemoteBatchOperation remote_op = new RemoteBatchOperation(owner,
-                new Imap.MessageSet.uid_sparse(unfulfilled_uids.to_array()), unfulfilled_fields,
-                required_fields);
-            batch.add(remote_op);
+            Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(unfulfilled_uids);
+            foreach (Imap.MessageSet msg_set in msg_sets) {
+                RemoteBatchOperation remote_op = new RemoteBatchOperation(owner, msg_set,
+                    unfulfilled_fields, required_fields);
+                batch.add(remote_op);
+            }
         }
         
         yield batch.execute_all_async(cancellable);
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-copy-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-copy-email.vala
index 9fec2ae..0d68888 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-copy-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-copy-email.vala
@@ -45,8 +45,9 @@ private class Geary.ImapEngine.CopyEmail : Geary.ImapEngine.SendReplayOperation
             ImapDB.Folder.ListFlags.NONE, cancellable);
         
         if (uids != null && uids.size > 0) {
-            yield engine.remote_folder.copy_email_async(
-                new Imap.MessageSet.uid_sparse(uids.to_array()), destination, cancellable);
+            Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(uids);
+            foreach (Imap.MessageSet msg_set in msg_sets)
+                yield engine.remote_folder.copy_email_async(msg_set, destination, cancellable);
         }
         
         return ReplayOperation.Status.COMPLETED;
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-mark-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-mark-email.vala
index e40f3fa..0965920 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-mark-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-mark-email.vala
@@ -63,9 +63,12 @@ private class Geary.ImapEngine.MarkEmail : Geary.ImapEngine.SendReplayOperation
         if (original_flags.size == 0)
             return ReplayOperation.Status.COMPLETED;
         
-        yield engine.remote_folder.mark_email_async(
-            new Imap.MessageSet.uid_sparse(ImapDB.EmailIdentifier.to_uids(original_flags.keys).to_array()),
-            flags_to_add, flags_to_remove, cancellable);
+        Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(
+            ImapDB.EmailIdentifier.to_uids(original_flags.keys));
+        foreach (Imap.MessageSet msg_set in msg_sets) {
+            yield engine.remote_folder.mark_email_async(msg_set, flags_to_add, flags_to_remove,
+                cancellable);
+        }
         
         return ReplayOperation.Status.COMPLETED;
     }
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-move-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-move-email.vala
index 04d5a19..236932b 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-move-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-move-email.vala
@@ -67,10 +67,12 @@ private class Geary.ImapEngine.MoveEmail : Geary.ImapEngine.SendReplayOperation
         if (cancellable != null && cancellable.is_cancelled())
             throw new IOError.CANCELLED("Move email to %s cancelled", engine.remote_folder.to_string());
         
-        Imap.MessageSet msg_set = new 
Imap.MessageSet.uid_sparse(ImapDB.EmailIdentifier.to_uids(moved_ids).to_array());
-        
-        yield engine.remote_folder.copy_email_async(msg_set, destination, null);
-        yield engine.remote_folder.remove_email_async(msg_set, null);
+        Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(
+            ImapDB.EmailIdentifier.to_uids(moved_ids));
+        foreach (Imap.MessageSet msg_set in msg_sets) {
+            yield engine.remote_folder.copy_email_async(msg_set, destination, null);
+            yield engine.remote_folder.remove_email_async(msg_set, null);
+        }
         
         return ReplayOperation.Status.COMPLETED;
     }
diff --git a/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala 
b/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
index 99bc2d4..234ab36 100644
--- a/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
+++ b/src/engine/imap-engine/replay-ops/imap-engine-remove-email.vala
@@ -60,11 +60,10 @@ private class Geary.ImapEngine.RemoveEmail : Geary.ImapEngine.SendReplayOperatio
         // Remove from server. Note that this causes the receive replay queue to kick into
         // action, removing the e-mail but *NOT* firing a signal; the "remove marker" indicates
         // that the signal has already been fired.
-        if (removed_ids.size > 0) {
-            yield engine.remote_folder.remove_email_async(
-                new Imap.MessageSet.uid_sparse(ImapDB.EmailIdentifier.to_uids(removed_ids).to_array()),
-                cancellable);
-        }
+        Gee.List<Imap.MessageSet> msg_sets = Imap.MessageSet.uid_sparse(
+            ImapDB.EmailIdentifier.to_uids(removed_ids));
+        foreach (Imap.MessageSet msg_set in msg_sets)
+            yield engine.remote_folder.remove_email_async(msg_set, cancellable);
         
         return ReplayOperation.Status.COMPLETED;
     }
diff --git a/src/engine/imap/command/imap-message-set.vala b/src/engine/imap/command/imap-message-set.vala
index 9dc7f38..44f6014 100644
--- a/src/engine/imap/command/imap-message-set.vala
+++ b/src/engine/imap/command/imap-message-set.vala
@@ -16,6 +16,12 @@ extern void qsort(void *base, size_t num, size_t size, CompareFunc compare_func)
  */
 
 public class Geary.Imap.MessageSet : BaseObject {
+    // 2^32 in base 10 requires ten bytes (characters) on the wire plus a separator between each
+    // one, i.e. ~11 bytes per seqnum or UID ... to keep lists below server maximums, this value
+    // is set to keep max. command length somewhere under 1K (including tag, command, parameters,
+    // etc.)
+    private const int MAX_SPARSE_VALUES_PER_SET = 50;
+    
     /**
      * True if the { link MessageSet} was created with a UID or a UID range.
      *
@@ -67,7 +73,7 @@ public class Geary.Imap.MessageSet : BaseObject {
         assert(low.value > 0);
         assert(high.value > 0);
         
-        // corrent ordering
+        // correct ordering
         if (low.value > high.value) {
             UID swap = low;
             low = high;
@@ -95,65 +101,62 @@ public class Geary.Imap.MessageSet : BaseObject {
         is_uid = true;
     }
     
-    public MessageSet.sparse(SequenceNumber[] seq_nums) {
-        value = build_sparse_range(seq_array_to_int64(seq_nums));
+    public MessageSet.custom(string custom) {
+        value = custom;
     }
     
-    public MessageSet.uid_sparse(UID[] msg_uids) {
-        value = build_sparse_range(uid_array_to_int64(msg_uids));
+    public MessageSet.uid_custom(string custom) {
+        value = custom;
         is_uid = true;
     }
     
-    public MessageSet.sparse_to_highest(SequenceNumber[] seq_nums) {
-        value = "%s:*".printf(build_sparse_range(seq_array_to_int64(seq_nums)));
+    /**
+     * Convert a collection of { link SequenceNumber}s into a list of { link MessageSet}s.
+     *
+     * Although this could return a single MessageSet, large collections could create an IMAP
+     * command beyond the server maximum, and so they will be broken up into multiple sets.
+     */
+    public static Gee.List<MessageSet> sparse(Gee.Collection<SequenceNumber> seq_nums) {
+        return build_sparse_sets(seq_array_to_int64(seq_nums), false);
     }
     
-    public MessageSet.multirange(MessageSet[] msg_sets) {
-        StringBuilder builder = new StringBuilder();
-        for (int ctr = 0; ctr < msg_sets.length; ctr++) {
-            unowned MessageSet msg_set = msg_sets[ctr];
-            
-            if (ctr < (msg_sets.length - 1))
-                builder.append_printf("%s:", msg_set.value);
-            else
-                builder.append(msg_set.value);
-        }
-        
-        value = builder.str;
+    /**
+     * Convert a collection of { link UID}s into a list of { link MessageSet}s.
+     *
+     * Although this could return a single MessageSet, large collections could create an IMAP
+     * command beyond the server maximum, and so they will be broken up into multiple sets.
+     */
+    public static Gee.List<MessageSet> uid_sparse(Gee.Collection<UID> msg_uids) {
+        return build_sparse_sets(uid_array_to_int64(msg_uids), true);
     }
     
-    public MessageSet.multisparse(MessageSet[] msg_sets) {
-        StringBuilder builder = new StringBuilder();
-        for (int ctr = 0; ctr < msg_sets.length; ctr++) {
-            unowned MessageSet msg_set = msg_sets[ctr];
+    // create zero or more MessageSets of no more than MAX_SPARSE_VALUES_PER_SET UIDs/sequence
+    // numbers
+    private static Gee.List<MessageSet> build_sparse_sets(int64[] sorted, bool is_uid) {
+        Gee.List<MessageSet> list = new Gee.ArrayList<MessageSet>();
+        
+        int start = 0;
+        for (;;) {
+            if (start >= sorted.length)
+                break;
+            
+            int end = (start + MAX_SPARSE_VALUES_PER_SET).clamp(0, sorted.length);
+            unowned int64[] slice = sorted[start:end];
             
-            if (ctr < (msg_sets.length - 1))
-                builder.append_printf("%s,", msg_set.value);
-            else
-                builder.append(msg_set.value);
+            string sparse_range = build_sparse_range(slice);
+            list.add(is_uid ? new MessageSet.uid_custom(sparse_range) : new MessageSet.custom(sparse_range));
+            
+            start = end;
         }
         
-        value = builder.str;
-    }
-    
-    public MessageSet.custom(string custom) {
-        value = custom;
+        return list;
     }
     
-    public MessageSet.uid_custom(string custom) {
-        value = custom;
-        is_uid = true;
-    }
-    
-    // Builds sparse range of either UID values or message numbers.
-    // NOTE: This method assumes the supplied array is internally allocated, and so an in-place sort
-    // is allowable
+    // Builds sparse range of either UID values or sequence numbers.  Values should be sorted before
+    // calling to maximum finding runs.
     private static string build_sparse_range(int64[] seq_nums) {
         assert(seq_nums.length > 0);
         
-        // sort array to search for spans
-        qsort(seq_nums, seq_nums.length, sizeof(int64), Numeric.int64_compare);
-        
         int64 start_of_span = -1;
         int64 last_seq_num = -1;
         int span_count = 0;
@@ -208,18 +211,30 @@ public class Geary.Imap.MessageSet : BaseObject {
         return builder.str;
     }
     
-    private static int64[] seq_array_to_int64(SequenceNumber[] seq_nums) {
-        int64[] ret = new int64[0];
-        foreach (SequenceNumber seq_num in seq_nums)
-            ret += (int64) seq_num.value;
+    private static int64[] seq_array_to_int64(Gee.Collection<SequenceNumber> seq_nums) {
+        // guarantee sorted (to maximum finding runs in build_sparse_range())
+        Gee.TreeSet<SequenceNumber> sorted = new Gee.TreeSet<SequenceNumber>();
+        sorted.add_all(seq_nums);
+        
+        // build sorted array
+        int64[] ret = new int64[sorted.size];
+        int index = 0;
+        foreach (SequenceNumber seq_num in sorted)
+            ret[index++] = (int64) seq_num.value;
         
         return ret;
     }
     
-    private static int64[] uid_array_to_int64(UID[] msg_uids) {
-        int64[] ret = new int64[0];
-        foreach (UID uid in msg_uids)
-            ret += uid.value;
+    private static int64[] uid_array_to_int64(Gee.Collection<UID> msg_uids) {
+        // guarantee sorted (to maximize finding runs in build_sparse_range())
+        Gee.TreeSet<UID> sorted = new Gee.TreeSet<UID>();
+        sorted.add_all(msg_uids);
+        
+        // build sorted array
+        int64[] ret = new int64[sorted.size];
+        int index = 0;
+        foreach (UID uid in sorted)
+            ret[index++] = uid.value;
         
         return ret;
     }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]