[geary/wip/720361-stemming] Strip greedy results when fetching matches as well



commit d3a68e5c7ca5586c78c950c109e3b9104be92b6a
Author: Jim Nelson <jim yorba org>
Date:   Tue Dec 16 11:04:13 2014 -0800

    Strip greedy results when fetching matches as well

 src/engine/imap-db/imap-db-account.vala      |   18 +++++++++++-------
 src/engine/imap-db/imap-db-search-query.vala |    8 ++++----
 2 files changed, 15 insertions(+), 11 deletions(-)
---
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index 21b2c69..33b120e 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -727,7 +727,7 @@ private class Geary.ImapDB.Account : BaseObject {
      * term and its stemmed variant, then do post-search processing to strip results which are
      * too "greedy" due to prefix-matching the stemmed variant.
      *
-     * Some hueristics are in place simply to determine if stemming should occur:
+     * Some heuristics are in place simply to determine if stemming should occur:
      *
      * # If stemming is unallowed, no stemming occurs.
      * # If the term is < min. term length for stemming, no stemming occurs.
@@ -1106,11 +1106,15 @@ private class Geary.ImapDB.Account : BaseObject {
         // at this point, there should be some "full" search results to strip from
         assert(search_results != null && search_results.size > 0);
         
-        //
-        // Strip out search results that only contain a hit due to "greedy" matching of the stemmed
-        // variants on all search terms
-        //
+        strip_greedy_results(query, search_results);
         
+        return search_results.size == 0 ? null : search_results.keys;
+    }
+    
+    // Strip out search results that only contain a hit due to "greedy" matching of the stemmed
+    // variants on all search terms
+    private void strip_greedy_results(ImapDB.SearchQuery query,
+        Gee.Map<ImapDB.EmailIdentifier, Gee.Set<string>> search_results) {
         int prestripped_results = search_results.size;
         Gee.MapIterator<ImapDB.EmailIdentifier, Gee.Set<string>> iter = search_results.map_iterator();
         while (iter.next()) {
@@ -1149,8 +1153,6 @@ private class Geary.ImapDB.Account : BaseObject {
         
         debug("Stripped %d emails from search for [%s] due to greedy stem matching",
             prestripped_results - search_results.size, query.raw);
-        
-        return search_results.size == 0 ? null : search_results.keys;
     }
     
     public async Gee.Set<string>? get_search_matches_async(Geary.SearchQuery q,
@@ -1170,6 +1172,8 @@ private class Geary.ImapDB.Account : BaseObject {
             if (match_map == null || match_map.size == 0)
                 return Db.TransactionOutcome.DONE;
             
+            strip_greedy_results(query, match_map);
+            
             search_matches = new Gee.HashSet<string>();
             foreach (Gee.Set<string> matches in match_map.values)
                 search_matches.add_all(matches);
diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala
index 1c7fbf9..52d0ba9 100644
--- a/src/engine/imap-db/imap-db-search-query.vala
+++ b/src/engine/imap-db/imap-db-search-query.vala
@@ -20,7 +20,7 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
     public bool parsed { get; set; default = false; }
     
     /**
-     * Determined by { link matching}.
+     * Determined by { link strategy}.
      */
     public bool allow_stemming { get; private set; }
     
@@ -29,7 +29,7 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
      *
      * This prevents short words that might be stemmed from being stemmed.
      *
-     * Overridden by { link allow_stemming}.  Determined by { link matching}.
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
      */
     public int min_term_length_for_stemming { get; private set; }
     
@@ -39,7 +39,7 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
      * This prevents long words from being stemmed to much shorter words (which creates
      * opportunities for greedy matching).
      *
-     * Overridden by { link allow_stemming}.  Determined by { link matching}.
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
      */
     public int max_difference_term_stem_lengths { get; private set; }
     
@@ -50,7 +50,7 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
      * This prevents long words being matched to short stem variants (which creates opportunities
      * for greedy matching).
      *
-     * Overridden by { link allow_stemming}.  Determined by { link matching}.
+     * Overridden by { link allow_stemming}.  Determined by { link strategy}.
      */
     public int max_difference_match_stem_lengths { get; private set; }
     


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]