[geary/mjog/search-update: 23/43] Geary.ImapDb.SearchQuery: Require stemmer to be passed in to ctor




commit e0396c322ed027afab2bccbd6aacee681a9b1a71
Author: Michael Gratton <mike vee net>
Date:   Wed Nov 4 19:01:48 2020 +1100

    Geary.ImapDb.SearchQuery: Require stemmer to be passed in to ctor
    
    Since constructing a libstemmer object is non trivial, this allows a
    per-account instance to be created just once, and improves testability.

 src/engine/imap-db/imap-db-search-query.vala       | 50 ++--------------------
 .../imap-engine/imap-engine-generic-account.vala   | 48 ++++++++++++++++++++-
 2 files changed, 50 insertions(+), 48 deletions(-)
---
diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala
index 21dee990c..9e2c38e97 100644
--- a/src/engine/imap-db/imap-db-search-query.vala
+++ b/src/engine/imap-db/imap-db-search-query.vala
@@ -319,18 +319,19 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
     // A list of all search terms, regardless of search op field name
     private Gee.ArrayList<Term> all = new Gee.ArrayList<Term>();
 
-    private SnowBall.Stemmer stemmer;
+    private unowned SnowBall.Stemmer stemmer;
 
 
     public async SearchQuery(Geary.Account owner,
                              ImapDB.Account local,
                              Gee.Collection<Geary.SearchQuery.Term> expression,
                              string raw,
+                             SnowBall.Stemmer stemmer,
                              Geary.SearchQuery.Strategy strategy,
                              GLib.Cancellable? cancellable) {
         base(expression, raw);
         this.account = local;
-        this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer());
+        this.stemmer = stemmer;
 
         switch (strategy) {
             case Strategy.EXACT:
@@ -674,49 +675,4 @@ private class Geary.ImapDB.SearchQuery : Geary.SearchQuery {
         return stemmed;
     }
 
-    private string find_appropriate_search_stemmer() {
-        // Unfortunately, the stemmer library only accepts the full language
-        // name for the stemming algorithm.  This translates between the user's
-        // preferred language ISO 639-1 code and our available stemmers.
-        // FIXME: the available list here is determined by what's included in
-        // src/sqlite3-unicodesn/CMakeLists.txt.  We should pass that list in
-        // instead of hardcoding it here.
-        foreach (string l in Intl.get_language_names()) {
-            switch (l) {
-                case "ar": return "arabic";
-                case "eu": return "basque";
-                case "ca": return "catalan";
-                case "da": return "danish";
-                case "nl": return "dutch";
-                case "en": return "english";
-                case "fi": return "finnish";
-                case "fr": return "french";
-                case "de": return "german";
-                case "el": return "greek";
-                case "hi": return "hindi";
-                case "hu": return "hungarian";
-                case "id": return "indonesian";
-                case "ga": return "irish";
-                case "it": return "italian";
-                case "lt": return "lithuanian";
-                case "ne": return "nepali";
-                case "no": return "norwegian";
-                case "pt": return "portuguese";
-                case "ro": return "romanian";
-                case "ru": return "russian";
-                case "sr": return "serbian";
-                case "es": return "spanish";
-                case "sv": return "swedish";
-                case "ta": return "tamil";
-                case "tr": return "turkish";
-            }
-        }
-
-        // Default to English because it seems to be on average the language
-        // most likely to be present in emails, regardless of the user's
-        // language setting.  This is not an exact science, and search results
-        // should be ok either way in most cases.
-        return "english";
-    }
-
 }
diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala 
b/src/engine/imap-engine/imap-engine-generic-account.vala
index 070246586..4282ca046 100644
--- a/src/engine/imap-engine/imap-engine-generic-account.vala
+++ b/src/engine/imap-engine/imap-engine-generic-account.vala
@@ -60,6 +60,8 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
     private Gee.Map<Folder.SpecialUse,Gee.List<string>> special_search_names =
         new Gee.HashMap<Folder.SpecialUse,Gee.List<string>>();
 
+    private SnowBall.Stemmer stemmer;
+
 
     protected GenericAccount(AccountInformation config,
                              ImapDB.Account local,
@@ -107,6 +109,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
         this.db_vacuum_monitor = local.vacuum_monitor;
 
         compile_special_search_names();
+        this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer());
     }
 
     /** {@inheritDoc} */
@@ -578,7 +581,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
         GLib.Cancellable? cancellable
     ) throws GLib.Error {
         return yield new ImapDB.SearchQuery(
-            this, this.local, expression, text, EXACT, cancellable
+            this, this.local, expression, text, this.stemmer, EXACT, cancellable
         );
     }
 
@@ -1064,6 +1067,49 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account {
             throw new EngineError.OPEN_REQUIRED("Account %s not opened", to_string());
     }
 
+    private string find_appropriate_search_stemmer() {
+        // Unfortunately, the stemmer library only accepts the full
+        // language name for the stemming algorithm. This translates
+        // between the desktop sessions's preferred language ISO 639-1
+        // code and the available stemmers.
+        //
+        // FIXME: the available list here is determined by what's
+        // included in libstemmer. We should pass that list in instead
+        // of hardcoding it here.
+        foreach (string l in Intl.get_language_names()) {
+            switch (l) {
+            case "ar": return "arabic";
+            case "eu": return "basque";
+            case "ca": return "catalan";
+            case "da": return "danish";
+            case "nl": return "dutch";
+            case "en": return "english";
+            case "fi": return "finnish";
+            case "fr": return "french";
+            case "de": return "german";
+            case "el": return "greek";
+            case "hi": return "hindi";
+            case "hu": return "hungarian";
+            case "id": return "indonesian";
+            case "ga": return "irish";
+            case "it": return "italian";
+            case "lt": return "lithuanian";
+            case "ne": return "nepali";
+            case "no": return "norwegian";
+            case "pt": return "portuguese";
+            case "ro": return "romanian";
+            case "ru": return "russian";
+            case "sr": return "serbian";
+            case "es": return "spanish";
+            case "sv": return "swedish";
+            case "ta": return "tamil";
+            case "tr": return "turkish";
+            }
+        }
+
+        return "english";
+    }
+
     private void on_operation_error(AccountOperation op, Error error) {
         notify_service_problem(this.information.incoming, error);
     }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]