[geary/cherry-pick-647ef90c] Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'



commit f0b3d6791f609d749aa7bc804b16b7c35c678e7d
Author: Michael Gratton <mike vee net>
Date:   Tue Oct 8 22:32:52 2019 +0000

    Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'
    
    Attachment keyword fixes
    
    Closes #589
    
    See merge request GNOME/geary!334
    
    (cherry picked from commit 647ef90cfce11342f8174e6aa7578f1e4be2b7d8)
    
    5732d23e ui/composer-web-view.js: Add element blacklist to htmlToText
    a8ef91f3 ui/composer-web-view.js: Improve keyword detection
    69e3cdf9 Fix ComposerPageState::containsAttachmentKeyword
    17a23bc5 ComposerWidget: Send both en and localised attachment keywords

 src/client/composer/composer-widget.vala |  19 ++++-
 test/js/composer-page-state-test.vala    | 137 +++++++++++++++++++++----------
 ui/composer-web-view.js                  | 123 ++++++++++++++-------------
 3 files changed, 174 insertions(+), 105 deletions(-)
---
diff --git a/src/client/composer/composer-widget.vala b/src/client/composer/composer-widget.vala
index 31a0dff8..879bacc7 100644
--- a/src/client/composer/composer-widget.vala
+++ b/src/client/composer/composer-widget.vala
@@ -161,11 +161,17 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
     private const string URI_LIST_MIME_TYPE = "text/uri-list";
     private const string FILE_URI_PREFIX = "file://";
 
+    // Keep these in sync with the next const below.
+    private const string ATTACHMENT_KEYWORDS =
+        
"attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures";
     // Translators: This is list of keywords, separated by pipe ("|")
     // characters, that suggest an attachment; since this is full-word
-    // checking, include all variants of each word.  No spaces are
-    // allowed.
-    private const string ATTACHMENT_KEYWORDS_LOCALIZED = 
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+    // checking, include all variants of each word. No spaces are
+    // allowed. The words will be converted to lower case based on
+    // locale and English versions included automatically.
+    private const string ATTACHMENT_KEYWORDS_LOCALISED =
+        
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+
 
     public Geary.Account account { get; private set; }
     private Gee.Map<string, Geary.AccountInformation> accounts;
@@ -1334,7 +1340,12 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
             confirmation = _("Send message with an empty body?");
         } else if (!has_attachment &&
                    yield this.editor.contains_attachment_keywords(
-                       ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
+                       string.join(
+                           "|",
+                           ATTACHMENT_KEYWORDS,
+                           ATTACHMENT_KEYWORDS_LOCALISED
+                       ),
+                       this.subject)) {
             confirmation = _("Send message without an attachment?");
         }
         if (confirmation != null) {
diff --git a/test/js/composer-page-state-test.vala b/test/js/composer-page-state-test.vala
index 8c1fcf08..6a7781b2 100644
--- a/test/js/composer-page-state-test.vala
+++ b/test/js/composer-page-state-test.vala
@@ -13,17 +13,19 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
 
     public ComposerPageStateTest() {
         base("ComposerPageStateTest");
+        add_test("html_to_text", html_to_text);
+        add_test("html_to_text_with_quote", html_to_text_with_quote);
+        add_test("html_to_text_with_nested_quote", html_to_text_with_nested_quote);
+        add_test("html_to_text_with_blacklist", html_to_text_with_blacklist);
         add_test("edit_context_font", edit_context_font);
         add_test("edit_context_link", edit_context_link);
         add_test("indent_line", indent_line);
-        add_test("contains_attachment_keywords", contains_attachment_keywords);
         add_test("clean_content", clean_content);
         add_test("get_html", get_html);
         add_test("get_text", get_text);
-        add_test("get_text_with_quote", get_text_with_quote);
-        add_test("get_text_with_nested_quote", get_text_with_nested_quote);
-
         add_test("contains_keywords", contains_keywords);
+        // Depends contains_keywords and html_to_text_with_blacklist
+        add_test("contains_attachment_keywords", contains_attachment_keywords);
         add_test("replace_non_breaking_space", replace_non_breaking_space);
 
         try {
@@ -33,6 +35,84 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
         }
     }
 
+    public void html_to_text() throws Error {
+        load_body_fixture("<p>para</p>");
+        try {
+            assert(
+                Util.JS.to_string(
+                    run_javascript(
+                        @"ComposerPageState.htmlToText(window.document.body);"
+                    ).get_js_value()
+                ) == "para\n\n\n\n"
+            );
+        } catch (Util.JS.Error err) {
+            print("Util.JS.Error: %s\n", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s\n", err.message);
+            assert_not_reached();
+        }
+    }
+
+    public void html_to_text_with_quote() throws Error {
+        unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
+        load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
+        try {
+            assert(
+                Util.JS.to_string(
+                    run_javascript(
+                        "ComposerPageState.htmlToText(window.document.body);"
+                    ).get_js_value()
+                ) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
+            );
+        } catch (Util.JS.Error err) {
+            print("Util.JS.Error: %s", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s", err.message);
+            assert_not_reached();
+        }
+    }
+
+    public void html_to_text_with_nested_quote() throws Error {
+        unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
+        load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> 
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
+        try {
+            assert(
+                Util.JS.to_string(
+                    run_javascript(
+                        "ComposerPageState.htmlToText(window.document.body)"
+                    ).get_js_value()
+                ) == 
@"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
+            );
+        } catch (Util.JS.Error err) {
+            print("Util.JS.Error: %s\n", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s\n", err.message);
+            assert_not_reached();
+        }
+    }
+
+    public void html_to_text_with_blacklist() throws Error {
+        load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> 
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
+        try {
+            assert(
+                Util.JS.to_string(
+                    run_javascript(
+                        "ComposerPageState.htmlToText(window.document.body, [\"blockquote\"])"
+                    ).get_js_value()
+                ) == @"pre\n\npost\n\n\n\n"
+            );
+        } catch (Util.JS.Error err) {
+            print("Util.JS.Error: %s\n", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s\n", err.message);
+            assert_not_reached();
+        }
+    }
+
     public void edit_context_link() throws Error {
         string html = "<a id=\"test\" href=\"url\">para</a>";
         load_body_fixture(html);
@@ -222,44 +302,6 @@ unknown://example6.com
         }
     }
 
-    public void get_text_with_quote() throws Error {
-        unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
-        load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
-        try {
-            assert(
-                Util.JS.to_string(
-                    run_javascript(@"window.geary.getText();")
-                    .get_js_value()
-                ) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
-            );
-        } catch (Util.JS.Error err) {
-            print("Util.JS.Error: %s", err.message);
-            assert_not_reached();
-        } catch (Error err) {
-            print("WKError: %s", err.message);
-            assert_not_reached();
-        }
-    }
-
-    public void get_text_with_nested_quote() throws Error {
-        unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
-        load_body_fixture("<p>pre</p> <blockquote><p>quote1</p> 
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
-        try {
-            assert(
-                Util.JS.to_string(
-                    run_javascript(@"window.geary.getText();")
-                    .get_js_value()
-                ) == 
@"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
-            );
-        } catch (Util.JS.Error err) {
-            print("Util.JS.Error: %s\n", err.message);
-            assert_not_reached();
-        } catch (Error err) {
-            print("WKError: %s\n", err.message);
-            assert_not_reached();
-        }
-    }
-
     public void contains_keywords() throws Error {
         load_body_fixture();
         string complete_keys = """new Set(["keyword1", "keyword2"])""";
@@ -317,6 +359,11 @@ unknown://example6.com
                 ).get_js_value()
             ));
 
+            assert(Util.JS.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('keyword1.', $complete_keys, $suffix_keys);"
+                ).get_js_value()
+            ));
+
             assert(Util.JS.to_bool(run_javascript(
                 @"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
                 ).get_js_value()
@@ -326,6 +373,12 @@ unknown://example6.com
                 @"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys, 
$suffix_keys);"
                 ).get_js_value()
             ));
+
+            assert(!Util.JS.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('http://something/esle.sf2', $complete_keys, 
$suffix_keys);"
+                ).get_js_value()
+            ));
+
         } catch (Util.JS.Error err) {
             print("Util.JS.Error: %s\n", err.message);
             assert_not_reached();
diff --git a/ui/composer-web-view.js b/ui/composer-web-view.js
index a848bb1a..3abda414 100644
--- a/ui/composer-web-view.js
+++ b/ui/composer-web-view.js
@@ -12,7 +12,8 @@
 let ComposerPageState = function() {
     this.init.apply(this, arguments);
 };
-ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
+ComposerPageState.SPACE_CHAR_REGEX = /[\s]/i;
+ComposerPageState.WORD_CHAR_REGEX = /[\s\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~\u2000-\u206F\u2E00-\u2E7F]/i;
 ComposerPageState.QUOTE_MARKER = "\x7f"; // delete
 ComposerPageState.PROTOCOL_REGEX = 
/^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):/i;
 // Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
@@ -238,30 +239,12 @@ ComposerPageState.prototype = {
             return true;
         }
 
-        // Check interesting body text
-        let node = this.bodyPart.firstChild;
-        let content = [];
-        let breakingElements = new Set([
-            "BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
-        ]);
-        while (node != null) {
-            if (node.nodeType == Node.TEXT_NODE) {
-                content.push(node.textContent);
-            } else if (content.nodeType == Node.ELEMENT_NODE) {
-                let isBreaking = breakingElements.has(node.nodeName);
-                if (isBreaking) {
-                    content.push("\n");
-                }
-
-                // Only include non-quoted text
-                if (content.nodeName != "BLOCKQUOTE") {
-                    content.push(content.textContent);
-                }
-            }
-            node = node.nextSibling;
-        }
+        // Check the body text
+        let content = ComposerPageState.htmlToText(
+            this.bodyPart, ["blockquote"]
+        );
         return ComposerPageState.containsKeywords(
-            content.join(""), completeKeys, suffixKeys
+            content, completeKeys, suffixKeys
         );
     },
     tabOut: function() {
@@ -403,33 +386,50 @@ ComposerPageState.prototype = {
 /**
  * Determines if any keywords are present in a string.
  */
-ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
-    let tokens = new Set(
-        line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
-    );
-
-    for (let key of completeKeys) {
-        if (tokens.has(key)) {
-            return true;
-        }
-    }
-
+ComposerPageState.containsKeywords = function(line, wordKeys, suffixKeys) {
     let urlRegex = ComposerPageState.URL_REGEX;
-    // XXX assuming all suffixes have length = 3 here.
-    let extLen = 3;
-    for (let token of tokens) {
-        let extDelim = token.length - (extLen + 1);
-        // We do care about "a.pdf", but not ".pdf"
-        if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
-            let suffix = token.substring(extDelim + 1);
-            if (suffixKeys.has(suffix)) {
-                if (token.match(urlRegex) == null) {
-                    return true;
+    let lastToken = -1;
+    let lastSpace = -1;
+    for (var i = 0; i <= line.length; i++) {
+        let char = (i < line.length) ? line[i] : " ";
+
+        if (char.match(ComposerPageState.WORD_CHAR_REGEX)) {
+            if (lastToken + 1 < i) {
+                let wordToken = line.substring(lastToken + 1, i).toLocaleLowerCase();
+                let isWordMatch = wordKeys.has(wordToken);
+                let isSuffixMatch = suffixKeys.has(wordToken);
+                if (isWordMatch || isSuffixMatch) {
+                    let spaceToken = line.substring(lastSpace + 1, i);
+                    let isUrl = (spaceToken.match(ComposerPageState.URL_REGEX) != null);
+
+                    // Matches a token if it is a word that isn't in a
+                    // URL. I.e. this gets "some attachment." but not
+                    // "http://attachment.com";
+                    if (isWordMatch && !isUrl) {
+                        return true;
+                    }
+
+                    // Matches a token if it is a suffix that isn't a
+                    // URL and such that the space-delimited token
+                    // ends with ".SUFFIX". I.e. this matches "see
+                    // attachment.pdf." but not
+                    // "http://example.com/attachment.pdf"; or "see the
+                    // pdf."
+                    if (isSuffixMatch &&
+                        !isUrl &&
+                        spaceToken.length != (1 + wordToken.length) &&
+                        spaceToken.endsWith("." + wordToken)) {
+                        return true;
+                    }
                 }
             }
+            lastToken = i;
+
+            if (char.match(ComposerPageState.SPACE_CHAR_REGEX)) {
+                lastSpace = i;
+            }
         }
     }
-
     return false;
 };
 
@@ -460,11 +460,16 @@ ComposerPageState.cleanPart = function(part, removeIfEmpty) {
  * `ComposerPageState.QUOTE_MARKER`, where the number of markers indicates
  * the depth of nesting of the quote.
  */
-ComposerPageState.htmlToText = function(root) {
+ComposerPageState.htmlToText = function(root, blacklist = []) {
     let parentStyle = window.getComputedStyle(root);
     let text = "";
 
     for (let node of (root.childNodes || [])) {
+        let nodeName = node.nodeName.toLowerCase();
+        if (blacklist.includes(nodeName)) {
+            continue;
+        }
+
         let isBlock = (
             node instanceof Element
                 && window.getComputedStyle(node).display == "block"
@@ -476,7 +481,7 @@ ComposerPageState.htmlToText = function(root) {
                 text += "\n";
             }
         }
-        switch (node.nodeName.toLowerCase()) {
+        switch (nodeName) {
             case "#text":
                 let nodeText = node.nodeValue;
                 switch (parentStyle.whiteSpace) {
@@ -500,24 +505,24 @@ ComposerPageState.htmlToText = function(root) {
                 break;
             case "a":
                 if (node.closest("body.plain")) {
-                    text += ComposerPageState.htmlToText(node);
+                    text += ComposerPageState.htmlToText(node, blacklist);
                 } else if (node.textContent == node.href) {
                     text += "<" + node.href + ">";
                 } else {
-                    text += ComposerPageState.htmlToText(node);
+                    text += ComposerPageState.htmlToText(node, blacklist);
                     text += " <" + node.href + ">";
                 }
                 break;
             case "b":
             case "strong":
                 if (node.closest("body.plain")) {
-                    text += ComposerPageState.htmlToText(node);
+                    text += ComposerPageState.htmlToText(node, blacklist);
                 } else {
-                    text += "*" + ComposerPageState.htmlToText(node) + "*";
+                    text += "*" + ComposerPageState.htmlToText(node, blacklist) + "*";
                 }
                 break;
             case "blockquote":
-                let bqText = ComposerPageState.htmlToText(node);
+                let bqText = ComposerPageState.htmlToText(node, blacklist);
                 // If there is a newline at the end of the quote, remove it
                 // After this switch we ensure that there is a newline after the quote
                 bqText = bqText.replace(/\n$/, "");
@@ -532,23 +537,23 @@ ComposerPageState.htmlToText = function(root) {
             case "i":
             case "em":
                 if (node.closest("body.plain")) {
-                    text += ComposerPageState.htmlToText(node);
+                    text += ComposerPageState.htmlToText(node, blacklist);
                 } else {
-                    text += "/" + ComposerPageState.htmlToText(node) + "/";
+                    text += "/" + ComposerPageState.htmlToText(node, blacklist) + "/";
                 }
                 break;
             case "u":
                 if (node.closest("body.plain")) {
-                    text += ComposerPageState.htmlToText(node);
+                    text += ComposerPageState.htmlToText(node, blacklist);
                 } else {
-                    text += "_" + ComposerPageState.htmlToText(node) + "_";
+                    text += "_" + ComposerPageState.htmlToText(node, blacklist) + "_";
                 }
                 break;
             case "#comment":
-           case "style":
+            case "style":
                 break;
             default:
-                text += ComposerPageState.htmlToText(node);
+                text += ComposerPageState.htmlToText(node, blacklist);
                 break;
         }
         if (isBlock) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]