[geary] Improved search experience: Bug #720361



commit 533ab75ee37af935597d3e72265c97cf4cd32e9f
Author: Jim Nelson <jim yorba org>
Date:   Tue Dec 16 12:21:09 2014 -0800

    Improved search experience: Bug #720361
    
    This introduces a new full-text search algorithm that attempts to
    curb the effects of overstemming in the Porter Snowball stemmer.
    The FTS table will be regenerated with this update.
    
    The crux of this new algorithm is a configurable heuristic that
    reduces stemmed matching.  The configuration is not available via the
    UI (I suspect it will only confuse users) but can be changed by power
    users via GSettings.  More information is available at:
    
    https://wiki.gnome.org/Apps/Geary/FullTextSearchStrategy

 desktop/org.yorba.geary.gschema.xml                |    7 +
 sql/CMakeLists.txt                                 |    1 +
 sql/version-023.sql                                |   21 +
 src/CMakeLists.txt                                 |    2 +
 src/client/application/geary-config.vala           |   38 ++
 src/client/application/geary-controller.vala       |    8 +-
 .../conversation-viewer/conversation-viewer.vala   |   29 ++-
 src/engine/abstract/geary-abstract-account.vala    |    4 +-
 src/engine/api/geary-account.vala                  |   21 +-
 src/engine/api/geary-search-folder.vala            |   21 +-
 src/engine/api/geary-search-query.vala             |   80 +++--
 src/engine/imap-db/imap-db-account.vala            |  440 +++++++++++++++-----
 src/engine/imap-db/imap-db-database.vala           |   27 ++-
 src/engine/imap-db/imap-db-search-query.vala       |  121 ++++++
 src/engine/imap-db/imap-db-search-term.vala        |   62 +++
 .../imap-engine/imap-engine-generic-account.vala   |    6 +-
 16 files changed, 737 insertions(+), 151 deletions(-)
---
diff --git a/desktop/org.yorba.geary.gschema.xml b/desktop/org.yorba.geary.gschema.xml
index b619d13..3e5bb33 100644
--- a/desktop/org.yorba.geary.gschema.xml
+++ b/desktop/org.yorba.geary.gschema.xml
@@ -74,6 +74,13 @@
         <summary>whether to compose emails in HTML</summary>
         <description>True to compose emails in HTML; false for plain text.</description>
     </key>
+    
+    <key name="search-strategy" type="s">
+        <default>"conservative"</default>
+        <summary>Advisory strategy for full-text searching</summary>
+        <description>Acceptable values are EXACT, CONSERVATIVE, AGGRESSIVE, and HORIZON.</description>
+    </key>
+    
 </schema>
 
 </schemalist>
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index bbd8f91..40184ce 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -22,3 +22,4 @@ install(FILES version-019.sql DESTINATION ${SQL_DEST})
 install(FILES version-020.sql DESTINATION ${SQL_DEST})
 install(FILES version-021.sql DESTINATION ${SQL_DEST})
 install(FILES version-022.sql DESTINATION ${SQL_DEST})
+install(FILES version-023.sql DESTINATION ${SQL_DEST})
diff --git a/sql/version-023.sql b/sql/version-023.sql
new file mode 100644
index 0000000..d282516
--- /dev/null
+++ b/sql/version-023.sql
@@ -0,0 +1,21 @@
+--
+-- Database upgrade to add FTS tokenize virtual table, which allows for querying the tokenizer
+-- directly for stemmed words, and dropping the stemmed FTS table for an unstemmed one.  We now
+-- use the stemmer manually to generate search queries.
+--
+
+DROP TABLE MessageSearchTable;
+
+CREATE VIRTUAL TABLE MessageSearchTable USING fts4(
+    body,
+    attachment,
+    subject,
+    from_field,
+    receivers,
+    cc,
+    bcc,
+    
+    tokenize=simple,
+    prefix="2,4,6,8,10"
+);
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 13d1f2b..dbd4d98 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -170,6 +170,8 @@ engine/imap-db/imap-db-email-identifier.vala
 engine/imap-db/imap-db-folder.vala
 engine/imap-db/imap-db-message-addresses.vala
 engine/imap-db/imap-db-message-row.vala
+engine/imap-db/imap-db-search-query.vala
+engine/imap-db/imap-db-search-term.vala
 engine/imap-db/imap-db-search-email-identifier.vala
 engine/imap-db/outbox/smtp-outbox-email-identifier.vala
 engine/imap-db/outbox/smtp-outbox-email-properties.vala
diff --git a/src/client/application/geary-config.vala b/src/client/application/geary-config.vala
index 2e84304..7976167 100644
--- a/src/client/application/geary-config.vala
+++ b/src/client/application/geary-config.vala
@@ -135,5 +135,43 @@ public class Configuration {
         if (!settings.set_boolean(name, value))
             message("Unable to set configuration value %s = %s", name, value.to_string());
     }
+    
+    public Geary.SearchQuery.Strategy get_search_strategy() {
+        switch (settings.get_string("search-strategy").down()) {
+            case "exact":
+                return Geary.SearchQuery.Strategy.EXACT;
+            
+            case "aggressive":
+                return Geary.SearchQuery.Strategy.AGGRESSIVE;
+            
+            case "horizon":
+                return Geary.SearchQuery.Strategy.HORIZON;
+            
+            case "conservative":
+            default:
+                return Geary.SearchQuery.Strategy.CONSERVATIVE;
+        }
+    }
+    
+    public void set_search_strategy(Geary.SearchQuery.Strategy strategy) {
+        switch (strategy) {
+            case Geary.SearchQuery.Strategy.EXACT:
+                settings.set_string("search-strategy", "exact");
+            break;
+            
+            case Geary.SearchQuery.Strategy.AGGRESSIVE:
+                settings.set_string("search-strategy", "aggressive");
+            break;
+            
+            case Geary.SearchQuery.Strategy.HORIZON:
+                settings.set_string("search-strategy", "horizon");
+            break;
+            
+            case Geary.SearchQuery.Strategy.CONSERVATIVE:
+            default:
+                settings.set_string("search-strategy", "conservative");
+            break;
+        }
+    }
 }
 
diff --git a/src/client/application/geary-controller.vala b/src/client/application/geary-controller.vala
index 808f3f4..39f5c75 100644
--- a/src/client/application/geary-controller.vala
+++ b/src/client/application/geary-controller.vala
@@ -81,7 +81,7 @@ public class GearyController : Geary.BaseObject {
     private const string MOVE_MESSAGE_TOOLTIP_MULTIPLE = _("Move conversations");
     
     private const int SELECT_FOLDER_TIMEOUT_USEC = 100 * 1000;
-    private const int SEARCH_TIMEOUT_MSEC = 100;
+    private const int SEARCH_TIMEOUT_MSEC = 250;
     
     private const string PROP_ATTEMPT_OPEN_ACCOUNT = "attempt-open-account";
     
@@ -2512,7 +2512,8 @@ public class GearyController : Geary.BaseObject {
         
         cancel_search(); // Stop any search in progress.
         
-        folder.set_search_query(search_text, cancellable_search);
+        folder.search(search_text, GearyApplication.instance.config.get_search_strategy(),
+            cancellable_search);
         
         main_window.folder_list.set_search(folder);
         search_text_changed(main_window.main_toolbar.search_text);
@@ -2523,7 +2524,8 @@ public class GearyController : Geary.BaseObject {
         // search after a quick delay when they finish typing.
         if (search_timeout_id != 0)
             Source.remove(search_timeout_id);
-        search_timeout_id = Timeout.add(SEARCH_TIMEOUT_MSEC, on_search_timeout);
+        
+        search_timeout_id = Timeout.add(SEARCH_TIMEOUT_MSEC, on_search_timeout, Priority.LOW);
     }
     
     private bool on_search_timeout() {
diff --git a/src/client/conversation-viewer/conversation-viewer.vala 
b/src/client/conversation-viewer/conversation-viewer.vala
index ce113eb..5bb4847 100644
--- a/src/client/conversation-viewer/conversation-viewer.vala
+++ b/src/client/conversation-viewer/conversation-viewer.vala
@@ -457,11 +457,28 @@ public class ConversationViewer : Gtk.Box {
         }
     }
     
-    private void on_search_text_changed(string? query) {
+    private void on_search_text_changed(Geary.SearchQuery? query) {
         if (query != null)
             highlight_search_terms.begin();
     }
     
+    // This applies a fudge-factor set of matches when the database results
+    // aren't entirely satisfactory, such as when you search for an email
+    // address and the database tokenizes out the @ and ., etc.  It's not meant
+    // to be comprehensive, just a little extra highlighting applied to make
+    // the results look a little closer to what you typed.
+    private void add_literal_matches(string raw_query, Gee.Set<string>? search_matches) {
+        foreach (string word in raw_query.split(" ")) {
+            if (word.has_suffix("\""))
+                word = word.substring(0, word.length - 1);
+            if (word.has_prefix("\""))
+                word = word.substring(1);
+            
+            if (!Geary.String.is_empty_or_whitespace(word))
+                search_matches.add(word);
+        }
+    }
+    
     private async void highlight_search_terms() {
         if (search_folder == null)
             return;
@@ -475,8 +492,13 @@ public class ConversationViewer : Gtk.Box {
             ids.add(email.id);
         
         try {
-            Gee.Collection<string>? search_matches = yield search_folder.get_search_matches_async(
+            Gee.Set<string>? search_matches = yield search_folder.get_search_matches_async(
                 ids, cancellable_fetch);
+            if (search_matches == null)
+                search_matches = new Gee.HashSet<string>();
+            
+            if (search_folder.search_query != null)
+                add_literal_matches(search_folder.search_query.raw, search_matches);
             
             // Webkit's highlighting is ... weird.  In order to actually see
             // all the highlighting you're applying, it seems necessary to
@@ -484,8 +506,7 @@ public class ConversationViewer : Gtk.Box {
             // seems that shorter strings will overwrite longer ones, and
             // you're left with incomplete highlighting.
             Gee.ArrayList<string> ordered_matches = new Gee.ArrayList<string>();
-            if (search_matches != null)
-                ordered_matches.add_all(search_matches);
+            ordered_matches.add_all(search_matches);
             ordered_matches.sort((a, b) => a.length - b.length);
             
             foreach(string match in ordered_matches)
diff --git a/src/engine/abstract/geary-abstract-account.vala b/src/engine/abstract/geary-abstract-account.vala
index 7c194f8..ea24d18 100644
--- a/src/engine/abstract/geary-abstract-account.vala
+++ b/src/engine/abstract/geary-abstract-account.vala
@@ -118,11 +118,13 @@ public abstract class Geary.AbstractAccount : BaseObject, Geary.Account {
     public abstract async Geary.Email local_fetch_email_async(Geary.EmailIdentifier email_id,
         Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
     
+    public abstract Geary.SearchQuery open_search(string query, Geary.SearchQuery.Strategy strategy);
+    
     public abstract async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
         int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
         Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws 
Error;
     
-    public abstract async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
+    public abstract async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
         Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
     
     public abstract async Gee.MultiMap<Geary.EmailIdentifier, Geary.FolderPath>? 
get_containing_folders_async(
diff --git a/src/engine/api/geary-account.vala b/src/engine/api/geary-account.vala
index a144566..369bf04 100644
--- a/src/engine/api/geary-account.vala
+++ b/src/engine/api/geary-account.vala
@@ -323,6 +323,23 @@ public interface Geary.Account : BaseObject {
         Geary.Email.Field required_fields, Cancellable? cancellable = null) throws Error;
     
     /**
+     * Create a new { link SearchQuery} for this { link Account}.
+     *
+     * See { link Geary.SearchQuery.Strategy} for more information about how its interpreted by the
+     * Engine.  In particular, note that it's an advisory parameter only and may have no effect,
+     * especially on server searches.  However, it may also have a dramatic effect on what search
+     * results are returned and so should be used with some caution.  Whether this parameter is
+     * user-configurable, available through GSettings or another configuration mechanism, or simply
+     * baked into the caller's code is up to the caller.  CONSERVATIVE is designed to be a good
+     * default.
+     *
+     * The SearchQuery object can only be used with calls into this Account.
+     *
+     * Dropping the last reference to the SearchQuery will close it.
+     */
+    public abstract Geary.SearchQuery open_search(string query, Geary.SearchQuery.Strategy strategy);
+    
+    /**
      * Performs a search with the given query.  Optionally, a list of folders not to search
      * can be passed as well as a list of email identifiers to restrict the search to only those messages.
      * Returns a list of EmailIdentifiers, or null if there are no results.
@@ -335,9 +352,9 @@ public interface Geary.Account : BaseObject {
         Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws 
Error;
     
     /**
-     * Given a list of mail IDs, returns a list of words that match for the query.
+     * Given a list of mail IDs, returns a set of casefolded words that match for the query.
      */
-    public abstract async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
+    public abstract async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
         Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error;
     
     /**
diff --git a/src/engine/api/geary-search-folder.vala b/src/engine/api/geary-search-folder.vala
index 4d03421..c064a62 100644
--- a/src/engine/api/geary-search-folder.vala
+++ b/src/engine/api/geary-search-folder.vala
@@ -49,6 +49,8 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
         }
     }
     
+    public Geary.SearchQuery? search_query { get; private set; default = null; }
+    
     private Gee.HashSet<Geary.FolderPath?> exclude_folders = new Gee.HashSet<Geary.FolderPath?>();
     private Geary.SpecialFolderType[] exclude_types = {
         Geary.SpecialFolderType.SPAM,
@@ -56,7 +58,6 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
         Geary.SpecialFolderType.DRAFTS,
         // Orphan emails (without a folder) are also excluded; see ctor.
     };
-    private Geary.SearchQuery? search_query = null;
     private Gee.TreeSet<ImapDB.SearchEmailIdentifier> search_results;
     private Geary.Nonblocking.Mutex result_mutex = new Geary.Nonblocking.Mutex();
     
@@ -64,7 +65,7 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
      * Fired when the search query has changed.  This signal is fired *after* the search
      * has completed.
      */
-    public signal void search_query_changed(string? query);
+    public signal void search_query_changed(Geary.SearchQuery? query);
     
     public SearchFolder(Account account) {
         base();
@@ -203,8 +204,8 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
     /**
      * Sets the keyword string for this search.
      */
-    public void set_search_query(string query, Cancellable? cancellable = null) {
-        set_search_query_async.begin(query, cancellable, on_set_search_query_complete);
+    public void search(string query, SearchQuery.Strategy strategy, Cancellable? cancellable = null) {
+        set_search_query_async.begin(query, strategy, cancellable, on_set_search_query_complete);
     }
     
     private void on_set_search_query_complete(Object? source, AsyncResult result) {
@@ -215,8 +216,9 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
         }
     }
     
-    private async void set_search_query_async(string query, Cancellable? cancellable = null) throws Error {
-        Geary.SearchQuery search_query = new Geary.SearchQuery(query);
+    private async void set_search_query_async(string query, SearchQuery.Strategy strategy,
+        Cancellable? cancellable) throws Error {
+        Geary.SearchQuery search_query = account.open_search(query, strategy);
         
         int result_mutex_token = yield result_mutex.claim_async();
         
@@ -230,7 +232,7 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
         result_mutex.release(ref result_mutex_token);
         
         this.search_query = search_query;
-        search_query_changed(search_query.raw);
+        search_query_changed(search_query);
         
         if (error != null)
             throw error;
@@ -425,13 +427,14 @@ public class Geary.SearchFolder : Geary.AbstractLocalFolder, Geary.FolderSupport
     }
     
     /**
-     * Given a list of mail IDs, returns a list of words that match for the current
+     * Given a list of mail IDs, returns a set of casefolded words that match for the current
      * search query.
      */
-    public async Gee.Collection<string>? get_search_matches_async(
+    public async Gee.Set<string>? get_search_matches_async(
         Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
         if (search_query == null)
             return null;
+        
         return yield account.get_search_matches_async(search_query, ids, cancellable);
     }
     
diff --git a/src/engine/api/geary-search-query.vala b/src/engine/api/geary-search-query.vala
index 51bcbd3..da9e187 100644
--- a/src/engine/api/geary-search-query.vala
+++ b/src/engine/api/geary-search-query.vala
@@ -6,39 +6,63 @@
 
 /**
  * An object to hold state for various search subsystems that might need to
- * parse the same text string different ways.  The only interaction the API
- * user should have with this is creating new ones and then passing them off to
- * the search methods in the engine.
+ * parse the same text string different ways.
  *
- * TODO: support anything other than ImapDB.Account's search methods.
+ * The only interaction the API user should have with this is creating new ones and then passing
+ * them to the search methods in the Engine.
+ *
+ * @see Geary.Account.open_search
  */
-public class Geary.SearchQuery : BaseObject {
-    public string raw { get; private set; }
-    public bool parsed { get; internal set; default = false; }
-    
-    // Not using a MultiMap because we (might) need a guarantee of order.
-    private Gee.HashMap<string?, Gee.ArrayList<string>> field_map
-        = new Gee.HashMap<string?, Gee.ArrayList<string>>();
-    
-    public SearchQuery(string query) {
-        raw = query;
+
+public abstract class Geary.SearchQuery : BaseObject {
+    /**
+     * An advisory parameter regarding search quality, scope, and breadth.
+     *
+     * The Engine can perform searches based on (unspecified, uncontracted) textual variations of
+     * a query's search terms.  Some of those variations may produce undesirable results due to
+     * "greedy" matching of terms.  The Strategy parameter allows for an advisory to the Engine
+     * about how to use those textual variants, if any at all.
+     *
+     * This may be respected or ignored by the Engine.  In particular, there's no guarantee it will
+     * have any effect on server search.
+     */
+    public enum Strategy {
+        /**
+         * Only return exact matches, perform no searches for textual variants.
+         *
+         * Note that Geary's search syntax does prefix-matching for unquoted strings.  EXACT means
+         * exact ''prefix-''matching in this case.
+         */
+        EXACT,
+        /**
+         * Allow for searching for a small set of textual variants and small differences in search
+         * terms.  This is a good default.
+         */
+        CONSERVATIVE,
+        /**
+         * Allow for searching for a broad set of textual variants and larger differences in
+         * search terms.
+         */
+        AGGRESSIVE,
+        /**
+         * Search for all textual variants, i.e. "the sky's the limit."
+         */
+        HORIZON
     }
     
-    internal void add_token(string? field, string token) {
-        if (!field_map.has_key(field))
-            field_map.set(field, new Gee.ArrayList<string>());
-        
-        field_map.get(field).add(token);
-    }
+    /**
+     * The original user search text.
+     */
+    public string raw { get; private set; }
     
-    internal Gee.Collection<string?> get_fields() {
-        return field_map.keys;
-    }
+    /**
+     * The selected { link Strategy} quality.
+     */
+    public Strategy strategy { get; private set; }
     
-    internal Gee.List<string>? get_tokens(string? field) {
-        if (!field_map.has_key(field))
-            return null;
-        
-        return field_map.get(field);
+    protected SearchQuery(string raw, Strategy strategy) {
+        this.raw = raw;
+        this.strategy = strategy;
     }
 }
+
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index c4a370e..33b120e 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -17,18 +17,6 @@ private class Geary.ImapDB.Account : BaseObject {
         }
     }
     
-    private class SearchOffset {
-        public int column;      // Column in search table
-        public int byte_offset; // Offset (in bytes) of search term in string
-        public int size;        // Size (in bytes) of the search term in string
-        
-        public SearchOffset(string[] offset_string) {
-            column = int.parse(offset_string[0]);
-            byte_offset = int.parse(offset_string[2]);
-            size = int.parse(offset_string[3]);
-        }
-    }
-    
     public signal void email_sent(Geary.RFC822.Message rfc822);
     
     // Only available when the Account is opened
@@ -61,6 +49,14 @@ private class Geary.ImapDB.Account : BaseObject {
             throw new EngineError.OPEN_REQUIRED("Database not open");
     }
     
+    private ImapDB.SearchQuery check_search_query(Geary.SearchQuery q) throws Error {
+        ImapDB.SearchQuery? query = q as ImapDB.SearchQuery;
+        if (query == null || query.account != this)
+            throw new EngineError.BAD_PARAMETERS("Geary.SearchQuery not associated with %s", name);
+        
+        return query;
+    }
+    
     public static void get_imap_db_storage_locations(File user_data_dir, out File db_file,
         out File attachments_dir) {
         db_file = ImapDB.Database.get_db_file(user_data_dir);
@@ -715,7 +711,87 @@ private class Geary.ImapDB.Account : BaseObject {
         return null;
     }
     
-    private void prepare_search_query(Geary.SearchQuery query) {
+    /**
+     * This method is used to convert an unquoted user-entered search terms into a stemmed search
+     * term.
+     *
+     * Prior experience with the Unicode Snowball stemmer indicates it's too aggressive for our
+     * tastes when coupled with prefix-matching of all unquoted terms (see
+     * https://bugzilla.gnome.org/show_bug.cgi?id=713179)   This method is part of a larger strategy
+     * designed to dampen that aggressiveness without losing the benefits of stemming entirely.
+     *
+     * Database upgrade 23 removes the old Snowball-stemmed FTS table and replaces it with one
+     * with no stemming (using only SQLite's "simple" tokenizer).  It also creates a "magic" SQLite
+     * table called TokenizerTable which allows for uniform queries to the Snowball stemmer, which
+     * is still installed in Geary.  Thus, we are now in the position to search for the original
+     * term and its stemmed variant, then do post-search processing to strip results which are
+     * too "greedy" due to prefix-matching the stemmed variant.
+     *
+     * Some heuristics are in place simply to determine if stemming should occur:
+     *
+     * # If stemming is unallowed, no stemming occurs.
+     * # If the term is < min. term length for stemming, no stemming occurs.
+     * # If the stemmer returns a stem that is the same as the original term, no stemming occurs.
+     * # If the difference between the stemmed word and the original term is more than
+     *   maximum allowed, no stemming occurs.  This works under the assumption that if
+     *   the user has typed a long word, they do not want to "go back" to searching for a much
+     *   shorter version of it.  (For example, "accountancies" stems to "account").
+     *
+     * Otherwise, the stem for the term is returned.
+     */
+    private string? stem_search_term(ImapDB.SearchQuery query, string term) {
+        if (!query.allow_stemming)
+            return null;
+        
+        int term_length = term.length;
+        if (term_length < query.min_term_length_for_stemming)
+            return null;
+        
+        string? stemmed = null;
+        try {
+            Db.Statement stmt = db.prepare("""
+                SELECT token
+                FROM TokenizerTable
+                WHERE input=?
+            """);
+            stmt.bind_string(0, term);
+            
+            // get stemmed string; if no result, fall through
+            Db.Result result = stmt.exec();
+            if (!result.finished)
+                stemmed = result.string_at(0);
+            else
+                debug("No stemmed term returned for \"%s\"", term);
+        } catch (Error err) {
+            debug("Unable to query tokenizer table for stemmed term for \"%s\": %s", term, err.message);
+            
+            // fall-through
+        }
+        
+        if (String.is_empty(stemmed)) {
+            debug("Empty stemmed term returned for \"%s\"", term);
+            
+            return null;
+        }
+        
+        // If same term returned, treat as non-stemmed
+        if (stemmed == term)
+            return null;
+        
+        // Don't search for stemmed words that are significantly shorter than the user's search term
+        if (term_length - stemmed.length > query.max_difference_term_stem_lengths) {
+            debug("Stemmed \"%s\" dropped searching for \"%s\": too much distance in terms",
+                stemmed, term);
+            
+            return null;
+        }
+        
+        debug("Search processing: term -> stem is \"%s\" -> \"%s\"", term, stemmed);
+        
+        return stemmed;
+    }
+    
+    private void prepare_search_query(ImapDB.SearchQuery query) {
         if (query.parsed)
             return;
         
@@ -753,16 +829,31 @@ private class Geary.ImapDB.Account : BaseObject {
                 --quotes;
             }
             
+            SearchTerm? term;
             if (in_quote) {
                 // HACK: this helps prevent a syntax error when the user types
                 // something like from:"somebody".  If we ever properly support
                 // quotes after : we can get rid of this.
-                s = s.replace(":", " ");
+                term = new SearchTerm(s, s, null, s.replace(":", " "), null);
             } else {
+                string original = s;
+                
+                // some common search phrases we don't respect and therefore don't want to fall
+                // through to search results
                 string lower = s.down();
-                if (lower == "" || lower == "and" || lower == "or" || lower == "not" || lower == "near"
-                    || lower.has_prefix("near/"))
-                    continue;
+                switch (lower) {
+                    case "":
+                    case "and":
+                    case "or":
+                    case "not":
+                    case "near":
+                        continue;
+                    
+                    default:
+                        if (lower.has_prefix("near/"))
+                            continue;
+                    break;
+                }
                 
                 if (s.has_prefix("-"))
                     s = s.substring(1);
@@ -775,13 +866,29 @@ private class Geary.ImapDB.Account : BaseObject {
                 if (parts.length > 1)
                     field = extract_field_from_token(parts, ref s);
                 
-                s = "\"" + s + "*\"";
+                // SQL MATCH syntax for parsed term
+                string? sql_s = "%s*".printf(s);
+                
+                // stem the word, but if stemmed and stem is simply shorter version of original
+                // term, only prefix-match search for it (i.e. avoid searching for
+                // [archive* OR archiv*] when that's the same as [archiv*]), otherwise search for
+                // both
+                string? stemmed = stem_search_term(query, s);
+                
+                string? sql_stemmed = null;
+                if (stemmed != null) {
+                    sql_stemmed = "%s*".printf(stemmed);
+                    if (s.has_prefix(stemmed))
+                        sql_s = null;
+                }
+                
+                term = new SearchTerm(original, s, stemmed, sql_s, sql_stemmed);
             }
             
             if (in_quote && quotes % 2 != 0)
                 in_quote = false;
             
-            query.add_token(field, s);
+            query.add_search_term(field, term);
         }
         
         assert(!in_quote);
@@ -790,28 +897,53 @@ private class Geary.ImapDB.Account : BaseObject {
     }
     
     // Return a map of column -> phrase, to use as WHERE column MATCH 'phrase'.
-    private Gee.HashMap<string, string> get_query_phrases(Geary.SearchQuery query) {
+    private Gee.HashMap<string, string> get_query_phrases(ImapDB.SearchQuery query) {
         prepare_search_query(query);
         
         Gee.HashMap<string, string> phrases = new Gee.HashMap<string, string>();
         foreach (string? field in query.get_fields()) {
-            string? phrase = null;
-            Gee.List<string>? tokens = query.get_tokens(field);
-            if (tokens != null) {
-                string[] array = tokens.to_array();
-                // HACK: work around a bug in vala where it's not null-terminating
-                // arrays created from generic-typed functions (Gee.Collection.to_array)
-                // before passing them off to g_strjoinv.  Simply making a copy to a
-                // local proper string array adds the null for us.
-                string[] copy = new string[array.length];
-                for (int i = 0; i < array.length; ++i)
-                    copy[i] = array[i];
-                phrase = string.joinv(" ", copy).strip();
+            Gee.List<SearchTerm>? terms = query.get_search_terms(field);
+            if (terms == null || terms.size == 0)
+                continue;
+            
+            // Each SearchTerm is an AND but the SQL text within in are OR ... this allows for
+            // each user term to be AND but the variants of each term are or.  So, if terms are
+            // [party] and [eventful] and stems are [parti] and [event], the search would be:
+            //
+            // (party* OR parti*) AND (eventful* OR event*)
+            //
+            // Obviously with stemming there's the possibility of the stemmed variant being nothing
+            // but a broader search of the original term (such as event* and eventful*) but do both
+            // to determine from each hit result which term caused the hit, and if it's too greedy
+            // a match of the stemmed variant, it can be stripped from the results.
+            //
+            // Note that this uses SQLite's "standard" query syntax for MATCH, where AND is implied
+            // (and would be treated as search term if included), parentheses are not allowed, and
+            // OR has a higher precendence than AND.  So the above example in standard syntax is:
+            //
+            // party* OR parti* eventful* OR event*
+            StringBuilder builder = new StringBuilder();
+            foreach (SearchTerm term in terms) {
+                if (term.sql.size == 0)
+                    continue;
+                
+                if (term.is_exact) {
+                    builder.append_printf("%s ", term.parsed);
+                } else {
+                    bool is_first_sql = true;
+                    foreach (string sql in term.sql) {
+                        if (!is_first_sql)
+                            builder.append(" OR ");
+                        
+                        builder.append_printf("%s ", sql);
+                        is_first_sql = false;
+                    }
+                }
             }
             
-            if (!Geary.String.is_empty(phrase))
-                phrases.set((field == null ? "MessageSearchTable" : field), phrase);
+            phrases.set(field ?? "MessageSearchTable", builder.str);
         }
+        
         return phrases;
     }
     
@@ -865,19 +997,39 @@ private class Geary.ImapDB.Account : BaseObject {
         return sql.str;
     }
     
-    public async Gee.Collection<Geary.EmailIdentifier>? search_async(Geary.SearchQuery query,
+    public async Gee.Collection<Geary.EmailIdentifier>? search_async(Geary.SearchQuery q,
         int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
-        Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws 
Error {
+        Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null)
+        throws Error {
         check_open();
+        ImapDB.SearchQuery query = check_search_query(q);
         
         Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
         if (query_phrases.size == 0)
             return null;
         
-        Gee.ArrayList<ImapDB.SearchEmailIdentifier> search_results
-            = new Gee.ArrayList<ImapDB.SearchEmailIdentifier>();
-        
+        // Do this outside of transaction to catch invalid search ids up-front
         string? search_ids_sql = get_search_ids_sql(search_ids);
+        
+        // for some searches, results are stripped if they're too "greedy", but this requires
+        // examining the matched text, which has an expense to fetch, so avoid doing so unless
+        // necessary
+        bool strip_results = true;
+        
+        // HORIZON strategy is configured in such a way to allow all stemmed variants to match,
+        // so don't do any stripping in that case
+        //
+        // If any of the search terms is exact-match (no prefix matching) or none have stemmed
+        // variants, then don't do stripping of "greedy" stemmed matching (because in both cases,
+        // there are none)
+        if (query.strategy == Geary.SearchQuery.Strategy.HORIZON)
+            strip_results = false;
+        else if (traverse<SearchTerm>(query.get_all_terms()).any(term => term.stemmed == null || 
term.is_exact))
+            strip_results = false;
+        
+        Gee.Set<ImapDB.EmailIdentifier> unstripped_ids = new Gee.HashSet<ImapDB.EmailIdentifier>();
+        Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? search_results = null;
+        
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
             string blacklisted_ids_sql = do_get_blacklisted_message_ids_sql(
                 folder_blacklist, cx, cancellable);
@@ -919,95 +1071,117 @@ private class Geary.ImapDB.Account : BaseObject {
                 stmt.bind_int(bind_index++, offset);
             }
             
+            Gee.HashMap<int64?, ImapDB.EmailIdentifier> id_map = new Gee.HashMap<int64?, 
ImapDB.EmailIdentifier>(
+                Collection.int64_hash_func, Collection.int64_equal_func);
+            
             Db.Result result = stmt.exec(cancellable);
             while (!result.finished) {
-                int64 id = result.int64_at(0);
+                int64 message_id = result.int64_at(0);
                 int64 internaldate_time_t = result.int64_at(1);
                 DateTime? internaldate = (internaldate_time_t == -1
                     ? null : new DateTime.from_unix_local(internaldate_time_t));
-                search_results.add(new ImapDB.SearchEmailIdentifier(id, internaldate));
+                
+                ImapDB.EmailIdentifier id = new ImapDB.SearchEmailIdentifier(message_id, internaldate);
+                
+                unstripped_ids.add(id);
+                id_map.set(message_id, id);
                 
                 result.next(cancellable);
             }
             
+            if (!strip_results)
+                return Db.TransactionOutcome.DONE;
+            
+            search_results = do_get_search_matches(cx, query, id_map, cancellable);
+            
             return Db.TransactionOutcome.DONE;
         }, cancellable);
         
-        return (search_results.size == 0 ? null : search_results);
+        if (unstripped_ids == null || unstripped_ids.size == 0)
+            return null;
+        
+        if (!strip_results)
+            return unstripped_ids;
+        
+        // at this point, there should be some "full" search results to strip from
+        assert(search_results != null && search_results.size > 0);
+        
+        strip_greedy_results(query, search_results);
+        
+        return search_results.size == 0 ? null : search_results.keys;
     }
     
-    // This applies a fudge-factor set of matches when the database results
-    // aren't entirely satisfactory, such as when you search for an email
-    // address and the database tokenizes out the @ and ., etc.  It's not meant
-    // to be comprehensive, just a little extra highlighting applied to make
-    // the results look a little closer to what you typed.
-    private void add_literal_matches(string raw_query, Gee.Set<string> search_matches) {
-        foreach (string word in raw_query.split(" ")) {
-            if (word.has_suffix("\""))
-                word = word.substring(0, word.length - 1);
-            if (word.has_prefix("\""))
-                word = word.substring(1);
-            
-            if (!String.is_empty_or_whitespace(word))
-                search_matches.add(word);
+    // Strip out search results that only contain a hit due to "greedy" matching of the stemmed
+    // variants on all search terms
+    private void strip_greedy_results(ImapDB.SearchQuery query,
+        Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> search_results) {
+        int prestripped_results = search_results.size;
+        Gee.MapIterator<ImapDB.EmailIdentifier, Gee.Set<string>> iter = search_results.map_iterator();
+        while (iter.next()) {
+            // For each matched string in this message, retain the message in the search results
+            // if it prefix-matches any of the straight-up parsed terms or matches a stemmed
+            // variant (with only max. difference in their lengths allowed, i.e. not a "greedy"
+            // match)
+            bool good_match_found = false;
+            foreach (string match in iter.get_value()) {
+                foreach (SearchTerm term in query.get_all_terms()) {
+                    // if prefix-matches parsed term, then don't strip
+                    if (match.has_prefix(term.parsed)) {
+                        good_match_found = true;
+                        
+                        break;
+                    }
+                    
+                    // if prefix-matches stemmed term w/o doing so greedily, then don't strip
+                    if (term.stemmed != null && match.has_prefix(term.stemmed)) {
+                        int diff = match.length - term.stemmed.length;
+                        if (diff <= query.max_difference_match_stem_lengths) {
+                            good_match_found = true;
+                            
+                            break;
+                        }
+                    }
+                }
+                
+                if (good_match_found)
+                    break;
+            }
+            
+            if (!good_match_found)
+                iter.unset();
         }
+        
+        debug("Stripped %d emails from search for [%s] due to greedy stem matching",
+            prestripped_results - search_results.size, query.raw);
     }
     
-    public async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
+    public async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery q,
         Gee.Collection<ImapDB.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
         check_open();
+        ImapDB.SearchQuery query = check_search_query(q);
         
-        Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
-        if (query_phrases.size == 0)
-            return null;
-        
-        Gee.Set<string> search_matches = new Gee.HashSet<string>();
-        
+        Gee.Set<string>? search_matches = null;
         yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => {
-            StringBuilder sql = new StringBuilder();
-            sql.append("""
-                SELECT offsets(MessageSearchTable), *
-                FROM MessageSearchTable
-                WHERE docid IN (
-            """);
-            sql_append_ids(sql,
-                Geary.traverse<ImapDB.EmailIdentifier>(ids).map<int64?>(id => 
id.message_id).to_gee_iterable());
-            sql.append(")");
-            sql_add_query_phrases(sql, query_phrases);
+            Gee.HashMap<int64?, ImapDB.EmailIdentifier> id_map = new Gee.HashMap<
+                int64?, ImapDB.EmailIdentifier>(Collection.int64_hash_func, Collection.int64_equal_func);
+            foreach (ImapDB.EmailIdentifier id in ids)
+                id_map.set(id.message_id, id);
+            
+            Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? match_map =
+                do_get_search_matches(cx, query, id_map, cancellable);
+            if (match_map == null || match_map.size == 0)
+                return Db.TransactionOutcome.DONE;
             
-            Db.Statement stmt = cx.prepare(sql.str);
-            sql_bind_query_phrases(stmt, 0, query_phrases);
+            strip_greedy_results(query, match_map);
             
-            Db.Result result = stmt.exec(cancellable);
-            while (!result.finished) {
-                // Build a list of search offsets.
-                string[] offset_array = result.nonnull_string_at(0).split(" ");
-                Gee.ArrayList<SearchOffset> all_offsets = new Gee.ArrayList<SearchOffset>();
-                int j = 0;
-                while (true) {
-                    all_offsets.add(new SearchOffset(offset_array[j:j+4]));
-                    
-                    j += 4;
-                    if (j >= offset_array.length)
-                        break;
-                }
-                
-                // Iterate over the offset list, scrape strings from the database, and push
-                // the results into our return set.
-                foreach(SearchOffset offset in all_offsets) {
-                    string text = result.nonnull_string_at(offset.column + 1);
-                    search_matches.add(text[offset.byte_offset : offset.byte_offset + offset.size].down());
-                }
-                
-                result.next(cancellable);
-            }
+            search_matches = new Gee.HashSet<string>();
+            foreach (Gee.Set<string> matches in match_map.values)
+                search_matches.add_all(matches);
             
             return Db.TransactionOutcome.DONE;
         }, cancellable);
         
-        add_literal_matches(query.raw, search_matches);
-        
-        return (search_matches.size == 0 ? null : search_matches);
+        return search_matches;
     }
     
     public async Geary.Email fetch_email_async(ImapDB.EmailIdentifier email_id,
@@ -1561,5 +1735,69 @@ private class Geary.ImapDB.Account : BaseObject {
                 unread_change.get(path));
         }
     }
+    
+    // Not using a MultiMap because when traversing want to process all values at once per iteration,
+    // not per key-value
+    public Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>>? do_get_search_matches(Db.Connection cx,
+        ImapDB.SearchQuery query, Gee.Map<int64?, ImapDB.EmailIdentifier> id_map, Cancellable? cancellable)
+        throws Error {
+        if (id_map.size == 0)
+            return null;
+        
+        Gee.HashMap<string, string> query_phrases = get_query_phrases(query);
+        if (query_phrases.size == 0)
+            return null;
+        
+        StringBuilder sql = new StringBuilder();
+        sql.append("""
+            SELECT docid, offsets(MessageSearchTable), *
+            FROM MessageSearchTable
+            WHERE docid IN (
+        """);
+        sql_append_ids(sql, id_map.keys);
+        sql.append(")");
+        sql_add_query_phrases(sql, query_phrases);
+        
+        Db.Statement stmt = cx.prepare(sql.str);
+        sql_bind_query_phrases(stmt, 0, query_phrases);
+        
+        Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> search_matches = new Gee.HashMap<
+            ImapDB.EmailIdentifier, Gee.Set<string>>();
+        
+        Db.Result result = stmt.exec(cancellable);
+        while (!result.finished) {
+            int64 docid = result.rowid_at(0);
+            assert(id_map.contains(docid));
+            ImapDB.EmailIdentifier id = id_map.get(docid);
+            
+            // offsets() function returns a list of 4 strings that are ints indicating position
+            // and length of match string in search table corpus
+            string[] offset_array = result.nonnull_string_at(1).split(" ");
+            
+            Gee.Set<string> matches = new Gee.HashSet<string>();
+            
+            int j = 0;
+            while (true) {
+                unowned string[] offset_string = offset_array[j:j+4];
+                
+                int column = int.parse(offset_string[0]);
+                int byte_offset = int.parse(offset_string[2]);
+                int size = int.parse(offset_string[3]);
+                
+                unowned string text = result.nonnull_string_at(column + 2);
+                matches.add(text[byte_offset : byte_offset + size].down());
+                
+                j += 4;
+                if (j >= offset_array.length)
+                    break;
+            }
+            
+            search_matches.set(id, matches);
+            
+            result.next(cancellable);
+        }
+        
+        return search_matches.size > 0 ? search_matches : null;
+    }
 }
 
diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala
index 704d730..533209f 100644
--- a/src/engine/imap-db/imap-db-database.vala
+++ b/src/engine/imap-db/imap-db-database.vala
@@ -107,7 +107,11 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
             break;
             
             case 22:
-                post_rebuild_attachments();
+                post_upgrade_rebuild_attachments();
+            break;
+            
+            case 23:
+                post_upgrade_add_tokenizer_table();
             break;
         }
     }
@@ -407,7 +411,7 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
     }
     
     // Version 22
-    private void post_rebuild_attachments() {
+    private void post_upgrade_rebuild_attachments() {
         try {
             exec_transaction(Db.TransactionType.RW, (cx) => {
                 Db.Statement stmt = cx.prepare("""
@@ -471,6 +475,25 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase {
         }
     }
     
+    // Version 23
+    private void post_upgrade_add_tokenizer_table() {
+        try {
+            string stemmer = find_appropriate_search_stemmer();
+            debug("Creating tokenizer table using %s stemmer", stemmer);
+            
+            // These can't go in the .sql file because its schema (the stemmer
+            // algorithm) is determined at runtime.
+            exec("""
+                CREATE VIRTUAL TABLE TokenizerTable USING fts3tokenize(
+                    unicodesn,
+                    "stemmer=%s"
+                );
+            """.printf(stemmer));
+        } catch (Error e) {
+            error("Error creating tokenizer table: %s", e.message);
+        }
+    }
+    
     private void on_prepare_database_connection(Db.Connection cx) throws Error {
         cx.set_busy_timeout_msec(Db.Connection.RECOMMENDED_BUSY_TIMEOUT_MSEC);
         cx.set_foreign_keys(true);
diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala
new file mode 100644
index 0000000..52d0ba9
--- /dev/null
+++ b/src/engine/imap-db/imap-db-search-query.vala
@@ -0,0 +1,121 @@
+/* Copyright 2014 Yorba Foundation
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later).  See the COPYING file in this distribution.
+ */
+
+/**
+ * Internal implementation of { link Geary.SearchQuery}.
+ */
+
+private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
+    /**
+     * Associated { link ImapDB.Account}.
+     */
+    public weak ImapDB.Account account { get; private set; }
+    
+    /**
+     * Whether or not the query has been parsed and processed prior to search submission.
+     */
+    public bool parsed { get; set; default = false; }
+    
+    /**
+     * Determined by { link strategy}.
+     */
+    public bool allow_stemming { get; private set; }
+    
+    /**
+     * Minimum length of the term before stemming is allowed.
+     *
+     * This prevents short words that might be stemmed from being stemmed.
+     *
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
+     */
+    public int min_term_length_for_stemming { get; private set; }
+    
+    /**
+     * Maximum difference in lengths between term and stemmed variant.
+     *
+     * This prevents long words from being stemmed to much shorter words (which creates
+     * opportunities for greedy matching).
+     *
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
+     */
+    public int max_difference_term_stem_lengths { get; private set; }
+    
+    /**
+     * Maximum difference in lengths between a matched word and the stemmed variant it matched
+     * against.
+     *
+     * This prevents long words being matched to short stem variants (which creates opportunities
+     * for greedy matching).
+     *
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
+     */
+    public int max_difference_match_stem_lengths { get; private set; }
+    
+    // Not using a MultiMap because we (might) need a guarantee of order.
+    private Gee.HashMap<string?, Gee.ArrayList<SearchTerm>> field_map
+        = new Gee.HashMap<string?, Gee.ArrayList<SearchTerm>>();
+    private Gee.ArrayList<SearchTerm> all = new Gee.ArrayList<SearchTerm>();
+    
+    public SearchQuery(ImapDB.Account account, string query, Geary.SearchQuery.Strategy strategy) {
+        base (query, strategy);
+        
+        this.account = account;
+        
+        switch (strategy) {
+            case Strategy.EXACT:
+                allow_stemming = false;
+                min_term_length_for_stemming = int.MAX;
+                max_difference_term_stem_lengths = 0;
+                max_difference_match_stem_lengths = 0;
+            break;
+            
+            case Strategy.CONSERVATIVE:
+                allow_stemming = true;
+                min_term_length_for_stemming = 6;
+                max_difference_term_stem_lengths = 2;
+                max_difference_match_stem_lengths = 2;
+            break;
+            
+            case Strategy.AGGRESSIVE:
+                allow_stemming = true;
+                min_term_length_for_stemming = 4;
+                max_difference_term_stem_lengths = 4;
+                max_difference_match_stem_lengths = 3;
+            break;
+            
+            case Strategy.HORIZON:
+                allow_stemming = true;
+                min_term_length_for_stemming = 0;
+                max_difference_term_stem_lengths = int.MAX;
+                max_difference_match_stem_lengths = int.MAX;
+            break;
+            
+            default:
+                assert_not_reached();
+        }
+    }
+    
+    public void add_search_term(string? field, SearchTerm term) {
+        if (!field_map.has_key(field))
+            field_map.set(field, new Gee.ArrayList<SearchTerm>());
+        
+        field_map.get(field).add(term);
+        all.add(term);
+    }
+    
+    public Gee.Collection<string?> get_fields() {
+        return field_map.keys;
+    }
+    
+    public Gee.List<SearchTerm>? get_search_terms(string? field) {
+        return field_map.has_key(field) ? field_map.get(field) : null;
+    }
+    
+    public Gee.List<SearchTerm>? get_all_terms() {
+        return all;
+    }
+}
+
diff --git a/src/engine/imap-db/imap-db-search-term.vala b/src/engine/imap-db/imap-db-search-term.vala
new file mode 100644
index 0000000..c56a047
--- /dev/null
+++ b/src/engine/imap-db/imap-db-search-term.vala
@@ -0,0 +1,62 @@
+/* Copyright 2014 Yorba Foundation
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later).  See the COPYING file in this distribution.
+ */
+
+/**
+ * Various associated state with a single term in a { link ImapDB.SearchQuery}.
+ */
+
+private class Geary.ImapDB.SearchTerm : BaseObject {
+    /**
+     * The original tokenized search term with minimal other processing performed.
+     *
+     * For example, punctuation might be removed, but no casefolding has occurred.
+     */
+    public string original { get; private set; }
+    
+    /**
+     * The parsed tokenized search term.
+     *
+     * Casefolding and other normalizing text operations have been performed.
+     */
+    public string parsed { get; private set; }
+    
+    /**
+     * The stemmed search term.
+     *
+     * Only used if stemming is being done ''and'' the stem is different than the { link parsed}
+     * term.
+     */
+    public string? stemmed { get; private set; }
+    
+    /**
+     * A list of terms ready for binding to an SQLite statement.
+     *
+     * This should include prefix operators and quotes (i.e. ["party"] or [party*]).  These texts
+     * are guaranteed not to be null or empty strings.
+     */
+    public Gee.List<string> sql { get; private set; default = new Gee.ArrayList<string>(); }
+    
+    /**
+     * Returns true if the { link parsed} term is exact-match only (i.e. starts with quotes) and
+     * there is no { link stemmed} variant.
+     */
+    public bool is_exact { get { return parsed.has_prefix("\"") && stemmed == null; } }
+    
+    public SearchTerm(string original, string parsed, string? stemmed, string? sql_parsed, string? 
sql_stemmed) {
+        this.original = original;
+        this.parsed = parsed;
+        this.stemmed = stemmed;
+        
+        // for now, only two variations: the parsed string and the stemmed; since stem is usually
+        // shorter (and will be first in the OR statement), include it first
+        if (!String.is_empty(sql_stemmed))
+            sql.add(sql_stemmed);
+        
+        if (!String.is_empty(sql_parsed))
+            sql.add(sql_parsed);
+    }
+}
+
diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala 
b/src/engine/imap-engine/imap-engine-generic-account.vala
index ef84ee6..fc7c7e5 100644
--- a/src/engine/imap-engine/imap-engine-generic-account.vala
+++ b/src/engine/imap-engine/imap-engine-generic-account.vala
@@ -824,6 +824,10 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.AbstractAccount {
         return yield local.fetch_email_async(check_id(email_id), required_fields, cancellable);
     }
     
+    public override Geary.SearchQuery open_search(string query, SearchQuery.Strategy strategy) {
+        return new ImapDB.SearchQuery(local, query, strategy);
+    }
+    
     public override async Gee.Collection<Geary.EmailIdentifier>? local_search_async(Geary.SearchQuery query,
         int limit = 100, int offset = 0, Gee.Collection<Geary.FolderPath?>? folder_blacklist = null,
         Gee.Collection<Geary.EmailIdentifier>? search_ids = null, Cancellable? cancellable = null) throws 
Error {
@@ -833,7 +837,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.AbstractAccount {
         return yield local.search_async(query, limit, offset, folder_blacklist, search_ids, cancellable);
     }
     
-    public override async Gee.Collection<string>? get_search_matches_async(Geary.SearchQuery query,
+    public override async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery query,
         Gee.Collection<Geary.EmailIdentifier> ids, Cancellable? cancellable = null) throws Error {
         return yield local.get_search_matches_async(query, check_ids(ids), cancellable);
     }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]