[geary/mjog/589-attachment-keyword-check: 2/3] ui/composer-web-view.js: Improve keyword detection



commit a8ef91f3a88f5f85285d9ab8739267a672edb16f
Author: Michael Gratton <mike vee net>
Date:   Mon Oct 7 23:46:33 2019 +1100

    ui/composer-web-view.js: Improve keyword detection
    
    Update ComposerPageState.containsKeywords to split on any punctuation,
    not just spaces, so that text like "see the attachment." will get
    detected.

 test/js/composer-page-state-test.vala | 11 ++++++
 ui/composer-web-view.js               | 64 ++++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 23 deletions(-)
---
diff --git a/test/js/composer-page-state-test.vala b/test/js/composer-page-state-test.vala
index bfe4978e..038c99fa 100644
--- a/test/js/composer-page-state-test.vala
+++ b/test/js/composer-page-state-test.vala
@@ -358,6 +358,11 @@ unknown://example6.com
                 ).get_js_value()
             ));
 
+            assert(Util.JS.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('keyword1.', $complete_keys, $suffix_keys);"
+                ).get_js_value()
+            ));
+
             assert(Util.JS.to_bool(run_javascript(
                 @"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
                 ).get_js_value()
@@ -367,6 +372,12 @@ unknown://example6.com
                 @"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys, 
$suffix_keys);"
                 ).get_js_value()
             ));
+
+            assert(!Util.JS.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('http://something/esle.sf2', $complete_keys, 
$suffix_keys);"
+                ).get_js_value()
+            ));
+
         } catch (Util.JS.Error err) {
             print("Util.JS.Error: %s\n", err.message);
             assert_not_reached();
diff --git a/ui/composer-web-view.js b/ui/composer-web-view.js
index e56b48e2..564e5c5a 100644
--- a/ui/composer-web-view.js
+++ b/ui/composer-web-view.js
@@ -12,7 +12,8 @@
 let ComposerPageState = function() {
     this.init.apply(this, arguments);
 };
-ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
+ComposerPageState.SPACE_CHAR_REGEX = /[\s]/i;
+ComposerPageState.WORD_CHAR_REGEX = /[\s\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~\u2000-\u206F\u2E00-\u2E7F]/i;
 ComposerPageState.QUOTE_MARKER = "\x7f"; // delete
 ComposerPageState.PROTOCOL_REGEX = 
/^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):/i;
 // Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
@@ -403,33 +404,50 @@ ComposerPageState.prototype = {
 /**
  * Determines if any keywords are present in a string.
  */
-ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
-    let tokens = new Set(
-        line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
-    );
-
-    for (let key of completeKeys) {
-        if (tokens.has(key)) {
-            return true;
-        }
-    }
-
+ComposerPageState.containsKeywords = function(line, wordKeys, suffixKeys) {
     let urlRegex = ComposerPageState.URL_REGEX;
-    // XXX assuming all suffixes have length = 3 here.
-    let extLen = 3;
-    for (let token of tokens) {
-        let extDelim = token.length - (extLen + 1);
-        // We do care about "a.pdf", but not ".pdf"
-        if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
-            let suffix = token.substring(extDelim + 1);
-            if (suffixKeys.has(suffix)) {
-                if (token.match(urlRegex) == null) {
-                    return true;
+    let lastToken = -1;
+    let lastSpace = -1;
+    for (var i = 0; i <= line.length; i++) {
+        let char = (i < line.length) ? line[i] : " ";
+
+        if (char.match(ComposerPageState.WORD_CHAR_REGEX)) {
+            if (lastToken + 1 < i) {
+                let wordToken = line.substring(lastToken + 1, i).toLocaleLowerCase();
+                let isWordMatch = wordKeys.has(wordToken);
+                let isSuffixMatch = suffixKeys.has(wordToken);
+                if (isWordMatch || isSuffixMatch) {
+                    let spaceToken = line.substring(lastSpace + 1, i);
+                    let isUrl = (spaceToken.match(ComposerPageState.URL_REGEX) != null);
+
+                    // Matches a token if it is a word that isn't in a
+                    // URL. I.e. this gets "some attachment." but not
+                    // "http://attachment.com";
+                    if (isWordMatch && !isUrl) {
+                        return true;
+                    }
+
+                    // Matches a token if it is a suffix that isn't a
+                    // URL and such that the space-delimited token
+                    // ends with ".SUFFIX". I.e. this matches "see
+                    // attachment.pdf." but not
+                    // "http://example.com/attachment.pdf"; or "see the
+                    // pdf."
+                    if (isSuffixMatch &&
+                        !isUrl &&
+                        spaceToken.length != (1 + wordToken.length) &&
+                        spaceToken.endsWith("." + wordToken)) {
+                        return true;
+                    }
                 }
             }
+            lastToken = i;
+
+            if (char.match(ComposerPageState.SPACE_CHAR_REGEX)) {
+                lastSpace = i;
+            }
         }
     }
-
     return false;
 };
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]