[geary] Treat some punctuation as word characters: Bug #714863



commit ba9b7c68b87ca633c6ccda70466052fdd0b2e6ab
Author: Jim Nelson <jim yorba org>
Date:   Wed Jan 28 14:08:56 2015 -0800

    Treat some punctuation as word characters: Bug #714863
    
    When certain punctuation marks are found in a search term (i.e. an
    extended name, or with an email address), treat the search as an
    exact-term search to remove spurious search results.

 src/engine/imap-db/imap-db-account.vala |   11 +++++++++++
 src/engine/util/util-string.vala        |   11 +++++++++++
 2 files changed, 22 insertions(+), 0 deletions(-)
---
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
index 66323fb..b09685e 100644
--- a/src/engine/imap-db/imap-db-account.vala
+++ b/src/engine/imap-db/imap-db-account.vala
@@ -7,6 +7,12 @@
 private class Geary.ImapDB.Account : BaseObject {
     private const int POPULATE_SEARCH_TABLE_DELAY_SEC = 5;
     
+    // These characters are chosen for being commonly used to continue a single word (such as
+    // extended last names, i.e. "Lars-Eric") or in terms commonly searched for in an email client,
+    // i.e. unadorned mailbox addresses.  Note that characters commonly used for wildcards or that
+    // would be interpreted as wildcards by SQLite are not included here.
+    private const unichar[] SEARCH_TERM_CONTINUATION_CHARS = { '-', '_', '.', '@' };
+    
     private class FolderReference : Geary.SmartReference {
         public Geary.FolderPath path;
         
@@ -886,6 +892,11 @@ private class Geary.ImapDB.Account : BaseObject {
                         sql_s = null;
                 }
                 
+                // if term contains continuation characters, treat as exact search to reduce effects of
+                // tokenizer splitting terms w/ punctuation in them
+                if (String.contains_any_char(s, SEARCH_TERM_CONTINUATION_CHARS))
+                    s = "\"%s\"".printf(s);
+                
                 term = new SearchTerm(original, s, stemmed, sql_s, sql_stemmed);
             }
             
diff --git a/src/engine/util/util-string.vala b/src/engine/util/util-string.vala
index 0d8c004..3af5715 100644
--- a/src/engine/util/util-string.vala
+++ b/src/engine/util/util-string.vala
@@ -27,6 +27,17 @@ public int count_char(string s, unichar c) {
     return count;
 }
 
+public bool contains_any_char(string str, unichar[] chars) {
+    int index = 0;
+    unichar ch;
+    while (str.get_next_char(ref index, out ch)) {
+        if (ch in chars)
+            return true;
+    }
+    
+    return false;
+}
+
 public uint stri_hash(string str) {
     return str_hash(str.down());
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]