[geary/bug/728002-webkit2: 112/140] Reenable composer attachment keyword checking.



commit e0bff994faab5170e367dcb15f67e8981444568b
Author: Michael James Gratton <mike vee net>
Date:   Thu Jan 26 13:31:08 2017 +1100

    Reenable composer attachment keyword checking.
    
    * src/client/composer/composer-web-view.vala (ComposerWebView): Add
      ::contains_attachment_keywords that thunks to JS, remove uneeded
      ::get_block_quote_representation method.
    
    * src/client/composer/composer-widget.vala (ComposerWidget): Remove
      attachment keywork checking related code, just call new method on
      editor to do the check.
    
    * src/client/web-process/util-composer.vala,
      src/client/web-process/util-webkit.vala: Remove uneeded code.
    
    * ui/composer-web-view.js (ComposerPageState): Implement new
      ::containsAttachmentKeyword method based on previous code. Add unit
      tests.

 src/client/composer/composer-web-view.vala |   22 ++++--
 src/client/composer/composer-widget.vala   |   94 +++----------------------
 src/client/web-process/util-composer.vala  |    4 -
 src/client/web-process/util-webkit.vala    |   85 ----------------------
 test/js/composer-page-state-test.vala      |   92 +++++++++++++++++++++++
 ui/composer-web-view.js                    |  109 +++++++++++++++++++++++++++-
 6 files changed, 224 insertions(+), 182 deletions(-)
---
diff --git a/src/client/composer/composer-web-view.vala b/src/client/composer/composer-web-view.vala
index 6e210c1..20593a9 100644
--- a/src/client/composer/composer-web-view.vala
+++ b/src/client/composer/composer-web-view.vala
@@ -386,6 +386,21 @@ public class ComposerWebView : ClientWebView {
     }
 
     /**
+     * Determines if the editor content contains an attachment keyword.
+     */
+    public async bool contains_attachment_keywords(string keyword_spec, string subject) {
+        try {
+            return WebKitUtil.to_bool(
+                yield run_javascript("geary.containsAttachmentKeyword(\"%s\", \"%s\");"
+                                     .printf(keyword_spec, subject), null)
+                );
+        } catch (Error err) {
+            debug("Error checking or attchment keywords: %s", err.message);
+            return false;
+        }
+    }
+
+    /**
      * Returns the editor content as an HTML string.
      */
     public async string? get_html() throws Error {
@@ -483,13 +498,6 @@ public class ComposerWebView : ClientWebView {
         // XXX
     }
 
-    /**
-     * ???
-     */
-    public string get_block_quote_representation() {
-        return ""; // XXX
-    }
-
     public override bool button_release_event(Gdk.EventButton event) {
         // WebView seems to unconditionally consume button events, so
         // to show a link popopver after the view has processed one,
diff --git a/src/client/composer/composer-widget.vala b/src/client/composer/composer-widget.vala
index f91e838..7a44bcd 100644
--- a/src/client/composer/composer-widget.vala
+++ b/src/client/composer/composer-widget.vala
@@ -160,14 +160,12 @@ public class ComposerWidget : Gtk.EventBox {
     private const string URI_LIST_MIME_TYPE = "text/uri-list";
     private const string FILE_URI_PREFIX = "file://";
 
-    public const string ATTACHMENT_KEYWORDS_SUFFIX = ".doc|.pdf|.xls|.ppt|.rtf|.pps";
-    
-    // A list of keywords, separated by pipe ("|") characters, that suggest an attachment; since
-    // this is full-word checking, include all variants of each word.  No spaces are allowed.
-    public const string ATTACHMENT_KEYWORDS_LOCALIZED = 
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
-    
-    private delegate bool CompareStringFunc(string key, string token);
-    
+    // Translators: This is list of keywords, separated by pipe ("|")
+    // characters, that suggest an attachment; since this is full-word
+    // checking, include all variants of each word.  No spaces are
+    // allowed.
+    private const string ATTACHMENT_KEYWORDS_LOCALIZED = 
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+
     public Geary.Account account { get; private set; }
 
     public Geary.RFC822.MailboxAddresses from { get; private set; }
@@ -1218,82 +1216,6 @@ public class ComposerWidget : Gtk.EventBox {
         return check_send_on_return(event) && base.key_press_event(event);
     }
 
-    // compares all keys to all tokens according to user-supplied comparison function
-    // Returns true if found
-    private bool search_tokens(string[] keys, string[] tokens, CompareStringFunc cmp_func,
-        out string? found_key, out string? found_token) {
-        foreach (string key in keys) {
-            foreach (string token in tokens) {
-                if (cmp_func(key, token)) {
-                    found_key = key;
-                    found_token = token;
-                    
-                    return true;
-                }
-            }
-        }
-        
-        found_key = null;
-        found_token = null;
-        
-        return false;
-    }
-    
-    private bool email_contains_attachment_keywords() {
-        // Filter out all content contained in block quotes
-        string filtered = @"$subject\n";
-        filtered += this.editor.get_block_quote_representation();
-        
-        Regex url_regex = null;
-        try {
-            // Prepare to ignore urls later
-            url_regex = new Regex(Geary.HTML.URL_REGEX, RegexCompileFlags.CASELESS);
-        } catch (Error error) {
-            debug("Error building regex in keyword checker: %s", error.message);
-        }
-        
-        string[] suffix_keys = ATTACHMENT_KEYWORDS_SUFFIX.casefold().split("|");
-        string[] full_word_keys = ATTACHMENT_KEYWORDS_LOCALIZED.casefold().split("|");
-        
-        foreach (string line in filtered.split("\n")) {
-            // Stop looking once we hit forwarded content
-            if (line.has_prefix("--")) {
-                break;
-            }
-            
-            // casefold line, strip start and ending whitespace, then tokenize by whitespace
-            string folded = line.casefold().strip();
-            string[] tokens = folded.split_set(" \t");
-            
-            // search for full-word matches
-            string? found_key, found_token;
-            bool found = search_tokens(full_word_keys, tokens, (key, token) => {
-                return key == token;
-            }, out found_key, out found_token);
-            
-            // if not found, search for suffix matches
-            if (!found) {
-                found = search_tokens(suffix_keys, tokens, (key, token) => {
-                    return token.has_suffix(key);
-                }, out found_key, out found_token);
-            }
-            
-            if (found) {
-                try {
-                    // Make sure the match isn't coming from a url
-                    if (found_key in url_regex.replace(folded, -1, 0, "")) {
-                        return true;
-                    }
-                } catch (Error error) {
-                    debug("Regex replacement error in keyword checker: %s", error.message);
-                    return true;
-                }
-            }
-        }
-        
-        return false;
-    }
-
     private async bool should_send() {
         bool has_subject = !Geary.String.is_empty(subject.strip());
         bool has_attachment = this.attached_files.size > 0;
@@ -1312,7 +1234,9 @@ public class ComposerWidget : Gtk.EventBox {
             confirmation = _("Send message with an empty subject?");
         } else if (!has_body && !has_attachment) {
             confirmation = _("Send message with an empty body?");
-        } else if (!has_attachment && email_contains_attachment_keywords()) {
+        } else if (!has_attachment &&
+                   yield this.editor.contains_attachment_keywords(
+                       ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
             confirmation = _("Send message without an attachment?");
         }
         if (confirmation != null) {
diff --git a/src/client/web-process/util-composer.vala b/src/client/web-process/util-composer.vala
index 2855434..ac340ff 100644
--- a/src/client/web-process/util-composer.vala
+++ b/src/client/web-process/util-composer.vala
@@ -19,10 +19,6 @@ namespace Util.Composer {
     private const string EDITING_DELETE_CONTAINER_ID = "WebKit-Editing-Delete-Container";
 
 
-    public string get_block_quote_representation(WebKit.WebPage page) {
-        return Util.DOM.get_text_representation(page.get_dom_document(), "blockquote");
-    }
-
     public void linkify_document(WebKit.WebPage page) {
         Util.DOM.linkify_document(page.get_dom_document());
     }
diff --git a/src/client/web-process/util-webkit.vala b/src/client/web-process/util-webkit.vala
index 1f2c923..ea202bf 100644
--- a/src/client/web-process/util-webkit.vala
+++ b/src/client/web-process/util-webkit.vala
@@ -8,91 +8,6 @@
 public const string PROTOCOL_REGEX = 
"^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):";
 
 namespace Util.DOM {
-    public WebKit.DOM.HTMLElement? select(WebKit.DOM.Node node, string selector) {
-        try {
-            if (node is WebKit.DOM.Document) {
-                return (node as WebKit.DOM.Document).query_selector(selector) as WebKit.DOM.HTMLElement;
-            } else {
-                return (node as WebKit.DOM.Element).query_selector(selector) as WebKit.DOM.HTMLElement;
-            }
-        } catch (Error error) {
-            debug("Error selecting element %s: %s", selector, error.message);
-            return null;
-        }
-    }
-
-    public WebKit.DOM.HTMLElement? clone_node(WebKit.DOM.Node node, bool deep = true) {
-        WebKit.DOM.HTMLElement? clone = null;
-        try {
-            clone = node.clone_node(deep) as WebKit.DOM.HTMLElement;
-        } catch (Error err) {
-            debug("Error selecting cloning node: %s", err.message);
-        }
-        return clone;
-    }
-
-    // Returns the text contained in the DOM document, after ignoring tags of type "exclude"
-    // and padding newlines where appropriate. Used to scan for attachment keywords.
-    public string get_text_representation(WebKit.DOM.Document doc, string exclude) {
-        WebKit.DOM.HTMLElement? copy = Util.DOM.clone_node(doc.get_body());
-        if (copy == null) {
-            return "";
-        }
-
-        // Keep deleting the next excluded element until there are none left
-        while (true) {
-            WebKit.DOM.HTMLElement? current = Util.DOM.select(copy, exclude);
-            if (current == null) {
-                break;
-            }
-
-            WebKit.DOM.Node parent = current.get_parent_node();
-            try {
-                parent.remove_child(current);
-            } catch (Error error) {
-                debug("Error removing blockquotes: %s", error.message);
-                break;
-            }
-        }
-
-        WebKit.DOM.NodeList node_list;
-        try {
-            node_list = copy.query_selector_all("br");
-        } catch (Error error) {
-            debug("Error finding <br>s: %s", error.message);
-            return copy.get_inner_text();
-        }
-
-        // Replace <br> tags with newlines
-        for (int i = 0; i < node_list.length; ++i) {
-            WebKit.DOM.Node br = node_list.item(i);
-            WebKit.DOM.Node parent = br.get_parent_node();
-            try {
-                parent.replace_child(doc.create_text_node("\n"), br);
-            } catch (Error error) {
-                debug("Error replacing <br>: %s", error.message);
-            }
-        }
-
-        try {
-            node_list = copy.query_selector_all("div");
-        } catch (Error error) {
-            debug("Error finding <div>s: %s", error.message);
-            return copy.get_inner_text();
-        }
-
-        // Pad each <div> with newlines
-        for (int i = 0; i < node_list.length; ++i) {
-            WebKit.DOM.Node div = node_list.item(i);
-            try {
-                div.insert_before(doc.create_text_node("\n"), div.first_child);
-                div.append_child(doc.create_text_node("\n"));
-            } catch (Error error) {
-                debug("Error padding <div> with newlines: %s", error.message);
-            }
-        }
-        return copy.get_inner_text();
-    }
 
     // Linkifies plain text links in an HTML document.
     public void linkify_document(WebKit.DOM.Document document) {
diff --git a/test/js/composer-page-state-test.vala b/test/js/composer-page-state-test.vala
index fe5f648..931933a 100644
--- a/test/js/composer-page-state-test.vala
+++ b/test/js/composer-page-state-test.vala
@@ -11,11 +11,13 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
         base("ComposerPageStateTest");
         add_test("edit_context_font", edit_context_font);
         add_test("edit_context_link", edit_context_link);
+        add_test("contains_attachment_keywords", contains_attachment_keywords);
         add_test("get_html", get_html);
         add_test("get_text", get_text);
         add_test("get_text_with_quote", get_text_with_quote);
         add_test("get_text_with_nested_quote", get_text_with_nested_quote);
         add_test("resolve_nesting", resolve_nesting);
+        add_test("contains_keywords", contains_keywords);
         add_test("quote_lines", quote_lines);
         add_test("replace_non_breaking_space", replace_non_breaking_space);
     }
@@ -52,6 +54,43 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
         }
     }
 
+    public void contains_attachment_keywords() {
+        load_body_fixture("""
+<blockquote>inner quote</blockquote>
+
+<p>some text</p>
+
+some text
+
+-- <br>sig
+
+<p>outerquote text<p>
+""");
+        try {
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"geary.containsAttachmentKeyword(\"some\", \"subject text\");"
+            )));
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"geary.containsAttachmentKeyword(\"subject\", \"subject text\");"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"geary.containsAttachmentKeyword(\"innerquote\", \"subject text\");"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"geary.containsAttachmentKeyword(\"sig\", \"subject text\");"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"geary.containsAttachmentKeyword(\"outerquote\", \"subject text\");"
+            )));
+        } catch (Geary.JS.Error err) {
+            print("Geary.JS.Error: %s\n", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s\n", err.message);
+            assert_not_reached();
+        }
+    }
+
     public void get_html() {
         string html = "<p>para</p>";
         load_body_fixture(html);
@@ -111,6 +150,59 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
         }
     }
 
+    public void contains_keywords() {
+        load_body_fixture();
+        string complete_keys = """new Set(["keyword1", "keyword2"])""";
+        string suffix_keys = """new Set(["sf1", "sf2"])""";
+        try {
+            // Doesn't contain
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('notcontained', $complete_keys, $suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('not contained', $complete_keys, $suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('not\tcontained', $complete_keys, $suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('http://www.keyword1.com', $complete_keys, 
$suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('http://www.something.com/something.sf1', 
$complete_keys, $suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('sf1', $complete_keys, $suffix_keys);"
+            )));
+            assert(!WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('.sf1', $complete_keys, $suffix_keys);"
+            )));
+
+            // Does contain
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('keyword1', $complete_keys, $suffix_keys);"
+            )));
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('keyword2 contained', $complete_keys, $suffix_keys);"
+            )));
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('keyword2\tcontained', $complete_keys, $suffix_keys);"
+            )));
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
+            )));
+            assert(WebKitUtil.to_bool(run_javascript(
+                @"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys, 
$suffix_keys);"
+            )));
+        } catch (Geary.JS.Error err) {
+            print("Geary.JS.Error: %s\n", err.message);
+            assert_not_reached();
+        } catch (Error err) {
+            print("WKError: %s\n", err.message);
+            assert_not_reached();
+        }
+    }
+
     public void resolve_nesting() {
         load_body_fixture();
         unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
diff --git a/ui/composer-web-view.js b/ui/composer-web-view.js
index 026fc21..a1adeae 100644
--- a/ui/composer-web-view.js
+++ b/ui/composer-web-view.js
@@ -13,9 +13,13 @@ let ComposerPageState = function() {
     this.init.apply(this, arguments);
 };
 ComposerPageState.BODY_ID = "message-body";
+ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
 ComposerPageState.QUOTE_START = "‘";
 ComposerPageState.QUOTE_END = "’";
 ComposerPageState.QUOTE_MARKER = "\x7f";
+// Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
+// at the start, which is unsupported in JS
+ComposerPageState.URL_REGEX = new 
RegExp("\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))",
 "i");
 
 ComposerPageState.prototype = {
     __proto__: PageState.prototype,
@@ -161,6 +165,76 @@ ComposerPageState.prototype = {
         // XXX need mark the quote somehow so we can find it, select
         // it and delete it using execCommand
     },
+    /**
+     * Determines if subject or body content refers to attachments.
+     */
+    containsAttachmentKeyword: function(keywordSpec, subject) {
+        // XXX this could also use a structured representation of the
+        // message body so we don't need to text to check
+
+        let ATTACHMENT_KEYWORDS_SUFFIX = "doc|pdf|xls|ppt|rtf|pps";
+
+        let completeKeys = new Set(keywordSpec.toLocaleLowerCase().split("|"));
+        let suffixKeys = new Set(ATTACHMENT_KEYWORDS_SUFFIX.split("|"));
+
+        // Check the subject line
+        if (ComposerPageState.containsKeywords(subject, completeKeys, suffixKeys)) {
+            return true;
+        }
+
+        // Check interesting body text
+        let breakingElements = new Set([
+            "BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
+        ]);
+        let content = this.messageBody.firstChild;
+        let found = false;
+        let done = false;
+        let textContent = [];
+        while (content != null && !done) {
+            if (content.nodeType == Node.TEXT_NODE) {
+                textContent.push(content.textContent);
+            } else if (content.nodeType == Node.ELEMENT_NODE) {
+                let isBreaking = breakingElements.has(content.nodeName);
+                if (isBreaking) {
+                    textContent.push("\n");
+                }
+
+                // Always exclude quoted text
+                if (content.nodeName != "BLOCKQUOTE") {
+                    textContent.push(content.innerText);
+                }
+
+                if (isBreaking || content.nextSibling == null) {
+                    for (let line of textContent.join("").split("\n")) {
+                        // Ignore everything after a sig or a
+                        // forwarded message.
+                        // XXX This breaks if the user types this at
+                        // the start of a line, also, WK's innerText
+                        // impl strips trailing whitespace, so can't
+                        // test for 'line == "-- "' :(
+                        if (line.startsWith("--")) {
+                            done = true;
+                            break;
+                        }
+
+                        line = line.trim();
+                        if (line != "") {
+                            if (ComposerPageState.containsKeywords(line, completeKeys, suffixKeys)) {
+                                found = true;
+                                done = true;
+                                break;
+                            }
+                        }
+                    }
+                    textContent = [];
+                }
+            }
+
+            content = content.nextSibling;
+        }
+
+        return found;
+    },
     tabOut: function() {
         document.execCommand(
             "inserthtml", false, "<span style='white-space: pre-wrap'>\t</span>"
@@ -234,6 +308,39 @@ ComposerPageState.prototype = {
 };
 
 /**
+ * Determines if any keywords are present in a string.
+ */
+ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
+    let tokens = new Set(
+        line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
+    );
+
+    for (let key of completeKeys) {
+        if (tokens.has(key)) {
+            return true;
+        }
+    }
+
+    let urlRegex = ComposerPageState.URL_REGEX;
+    // XXX assuming all suffixes have length = 3 here.
+    let extLen = 3;
+    for (let token of tokens) {
+        let extDelim = token.length - (extLen + 1);
+        // We do care about "a.pdf", but not ".pdf"
+        if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
+            let suffix = token.substring(extDelim + 1);
+            if (suffixKeys.has(suffix)) {
+                if (token.match(urlRegex) == null) {
+                    return true;
+                }
+            }
+        }
+    }
+
+    return false;
+};
+
+/**
  * Convert a HTML DOM tree to plain text with delineated quotes.
  *
  * Lines are delinated using LF. Quoted lines are prefixed with
@@ -405,7 +512,7 @@ let SelectionUtil = {
     getCursorElement: function() {
         let selection = window.getSelection();
         let node = selection.focusNode;
-        if (node != null && node.nodeType != Node.ELEMENT_TYPE) {
+        if (node != null && node.nodeType != Node.ELEMENT_NODE) {
             node = node.parentNode;
         }
         return node;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]