[geary/bug/728002-webkit2: 112/140] Reenable composer attachment keyword checking.
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/bug/728002-webkit2: 112/140] Reenable composer attachment keyword checking.
- Date: Tue, 31 Jan 2017 23:07:57 +0000 (UTC)
commit e0bff994faab5170e367dcb15f67e8981444568b
Author: Michael James Gratton <mike vee net>
Date: Thu Jan 26 13:31:08 2017 +1100
Reenable composer attachment keyword checking.
* src/client/composer/composer-web-view.vala (ComposerWebView): Add
::contains_attachment_keywords that thunks to JS, remove uneeded
::get_block_quote_representation method.
* src/client/composer/composer-widget.vala (ComposerWidget): Remove
attachment keywork checking related code, just call new method on
editor to do the check.
* src/client/web-process/util-composer.vala,
src/client/web-process/util-webkit.vala: Remove uneeded code.
* ui/composer-web-view.js (ComposerPageState): Implement new
::containsAttachmentKeyword method based on previous code. Add unit
tests.
src/client/composer/composer-web-view.vala | 22 ++++--
src/client/composer/composer-widget.vala | 94 +++----------------------
src/client/web-process/util-composer.vala | 4 -
src/client/web-process/util-webkit.vala | 85 ----------------------
test/js/composer-page-state-test.vala | 92 +++++++++++++++++++++++
ui/composer-web-view.js | 109 +++++++++++++++++++++++++++-
6 files changed, 224 insertions(+), 182 deletions(-)
---
diff --git a/src/client/composer/composer-web-view.vala b/src/client/composer/composer-web-view.vala
index 6e210c1..20593a9 100644
--- a/src/client/composer/composer-web-view.vala
+++ b/src/client/composer/composer-web-view.vala
@@ -386,6 +386,21 @@ public class ComposerWebView : ClientWebView {
}
/**
+ * Determines if the editor content contains an attachment keyword.
+ */
+ public async bool contains_attachment_keywords(string keyword_spec, string subject) {
+ try {
+ return WebKitUtil.to_bool(
+ yield run_javascript("geary.containsAttachmentKeyword(\"%s\", \"%s\");"
+ .printf(keyword_spec, subject), null)
+ );
+ } catch (Error err) {
+ debug("Error checking or attchment keywords: %s", err.message);
+ return false;
+ }
+ }
+
+ /**
* Returns the editor content as an HTML string.
*/
public async string? get_html() throws Error {
@@ -483,13 +498,6 @@ public class ComposerWebView : ClientWebView {
// XXX
}
- /**
- * ???
- */
- public string get_block_quote_representation() {
- return ""; // XXX
- }
-
public override bool button_release_event(Gdk.EventButton event) {
// WebView seems to unconditionally consume button events, so
// to show a link popopver after the view has processed one,
diff --git a/src/client/composer/composer-widget.vala b/src/client/composer/composer-widget.vala
index f91e838..7a44bcd 100644
--- a/src/client/composer/composer-widget.vala
+++ b/src/client/composer/composer-widget.vala
@@ -160,14 +160,12 @@ public class ComposerWidget : Gtk.EventBox {
private const string URI_LIST_MIME_TYPE = "text/uri-list";
private const string FILE_URI_PREFIX = "file://";
- public const string ATTACHMENT_KEYWORDS_SUFFIX = ".doc|.pdf|.xls|.ppt|.rtf|.pps";
-
- // A list of keywords, separated by pipe ("|") characters, that suggest an attachment; since
- // this is full-word checking, include all variants of each word. No spaces are allowed.
- public const string ATTACHMENT_KEYWORDS_LOCALIZED =
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
-
- private delegate bool CompareStringFunc(string key, string token);
-
+ // Translators: This is list of keywords, separated by pipe ("|")
+ // characters, that suggest an attachment; since this is full-word
+ // checking, include all variants of each word. No spaces are
+ // allowed.
+ private const string ATTACHMENT_KEYWORDS_LOCALIZED =
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+
public Geary.Account account { get; private set; }
public Geary.RFC822.MailboxAddresses from { get; private set; }
@@ -1218,82 +1216,6 @@ public class ComposerWidget : Gtk.EventBox {
return check_send_on_return(event) && base.key_press_event(event);
}
- // compares all keys to all tokens according to user-supplied comparison function
- // Returns true if found
- private bool search_tokens(string[] keys, string[] tokens, CompareStringFunc cmp_func,
- out string? found_key, out string? found_token) {
- foreach (string key in keys) {
- foreach (string token in tokens) {
- if (cmp_func(key, token)) {
- found_key = key;
- found_token = token;
-
- return true;
- }
- }
- }
-
- found_key = null;
- found_token = null;
-
- return false;
- }
-
- private bool email_contains_attachment_keywords() {
- // Filter out all content contained in block quotes
- string filtered = @"$subject\n";
- filtered += this.editor.get_block_quote_representation();
-
- Regex url_regex = null;
- try {
- // Prepare to ignore urls later
- url_regex = new Regex(Geary.HTML.URL_REGEX, RegexCompileFlags.CASELESS);
- } catch (Error error) {
- debug("Error building regex in keyword checker: %s", error.message);
- }
-
- string[] suffix_keys = ATTACHMENT_KEYWORDS_SUFFIX.casefold().split("|");
- string[] full_word_keys = ATTACHMENT_KEYWORDS_LOCALIZED.casefold().split("|");
-
- foreach (string line in filtered.split("\n")) {
- // Stop looking once we hit forwarded content
- if (line.has_prefix("--")) {
- break;
- }
-
- // casefold line, strip start and ending whitespace, then tokenize by whitespace
- string folded = line.casefold().strip();
- string[] tokens = folded.split_set(" \t");
-
- // search for full-word matches
- string? found_key, found_token;
- bool found = search_tokens(full_word_keys, tokens, (key, token) => {
- return key == token;
- }, out found_key, out found_token);
-
- // if not found, search for suffix matches
- if (!found) {
- found = search_tokens(suffix_keys, tokens, (key, token) => {
- return token.has_suffix(key);
- }, out found_key, out found_token);
- }
-
- if (found) {
- try {
- // Make sure the match isn't coming from a url
- if (found_key in url_regex.replace(folded, -1, 0, "")) {
- return true;
- }
- } catch (Error error) {
- debug("Regex replacement error in keyword checker: %s", error.message);
- return true;
- }
- }
- }
-
- return false;
- }
-
private async bool should_send() {
bool has_subject = !Geary.String.is_empty(subject.strip());
bool has_attachment = this.attached_files.size > 0;
@@ -1312,7 +1234,9 @@ public class ComposerWidget : Gtk.EventBox {
confirmation = _("Send message with an empty subject?");
} else if (!has_body && !has_attachment) {
confirmation = _("Send message with an empty body?");
- } else if (!has_attachment && email_contains_attachment_keywords()) {
+ } else if (!has_attachment &&
+ yield this.editor.contains_attachment_keywords(
+ ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
confirmation = _("Send message without an attachment?");
}
if (confirmation != null) {
diff --git a/src/client/web-process/util-composer.vala b/src/client/web-process/util-composer.vala
index 2855434..ac340ff 100644
--- a/src/client/web-process/util-composer.vala
+++ b/src/client/web-process/util-composer.vala
@@ -19,10 +19,6 @@ namespace Util.Composer {
private const string EDITING_DELETE_CONTAINER_ID = "WebKit-Editing-Delete-Container";
- public string get_block_quote_representation(WebKit.WebPage page) {
- return Util.DOM.get_text_representation(page.get_dom_document(), "blockquote");
- }
-
public void linkify_document(WebKit.WebPage page) {
Util.DOM.linkify_document(page.get_dom_document());
}
diff --git a/src/client/web-process/util-webkit.vala b/src/client/web-process/util-webkit.vala
index 1f2c923..ea202bf 100644
--- a/src/client/web-process/util-webkit.vala
+++ b/src/client/web-process/util-webkit.vala
@@ -8,91 +8,6 @@
public const string PROTOCOL_REGEX =
"^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):";
namespace Util.DOM {
- public WebKit.DOM.HTMLElement? select(WebKit.DOM.Node node, string selector) {
- try {
- if (node is WebKit.DOM.Document) {
- return (node as WebKit.DOM.Document).query_selector(selector) as WebKit.DOM.HTMLElement;
- } else {
- return (node as WebKit.DOM.Element).query_selector(selector) as WebKit.DOM.HTMLElement;
- }
- } catch (Error error) {
- debug("Error selecting element %s: %s", selector, error.message);
- return null;
- }
- }
-
- public WebKit.DOM.HTMLElement? clone_node(WebKit.DOM.Node node, bool deep = true) {
- WebKit.DOM.HTMLElement? clone = null;
- try {
- clone = node.clone_node(deep) as WebKit.DOM.HTMLElement;
- } catch (Error err) {
- debug("Error selecting cloning node: %s", err.message);
- }
- return clone;
- }
-
- // Returns the text contained in the DOM document, after ignoring tags of type "exclude"
- // and padding newlines where appropriate. Used to scan for attachment keywords.
- public string get_text_representation(WebKit.DOM.Document doc, string exclude) {
- WebKit.DOM.HTMLElement? copy = Util.DOM.clone_node(doc.get_body());
- if (copy == null) {
- return "";
- }
-
- // Keep deleting the next excluded element until there are none left
- while (true) {
- WebKit.DOM.HTMLElement? current = Util.DOM.select(copy, exclude);
- if (current == null) {
- break;
- }
-
- WebKit.DOM.Node parent = current.get_parent_node();
- try {
- parent.remove_child(current);
- } catch (Error error) {
- debug("Error removing blockquotes: %s", error.message);
- break;
- }
- }
-
- WebKit.DOM.NodeList node_list;
- try {
- node_list = copy.query_selector_all("br");
- } catch (Error error) {
- debug("Error finding <br>s: %s", error.message);
- return copy.get_inner_text();
- }
-
- // Replace <br> tags with newlines
- for (int i = 0; i < node_list.length; ++i) {
- WebKit.DOM.Node br = node_list.item(i);
- WebKit.DOM.Node parent = br.get_parent_node();
- try {
- parent.replace_child(doc.create_text_node("\n"), br);
- } catch (Error error) {
- debug("Error replacing <br>: %s", error.message);
- }
- }
-
- try {
- node_list = copy.query_selector_all("div");
- } catch (Error error) {
- debug("Error finding <div>s: %s", error.message);
- return copy.get_inner_text();
- }
-
- // Pad each <div> with newlines
- for (int i = 0; i < node_list.length; ++i) {
- WebKit.DOM.Node div = node_list.item(i);
- try {
- div.insert_before(doc.create_text_node("\n"), div.first_child);
- div.append_child(doc.create_text_node("\n"));
- } catch (Error error) {
- debug("Error padding <div> with newlines: %s", error.message);
- }
- }
- return copy.get_inner_text();
- }
// Linkifies plain text links in an HTML document.
public void linkify_document(WebKit.DOM.Document document) {
diff --git a/test/js/composer-page-state-test.vala b/test/js/composer-page-state-test.vala
index fe5f648..931933a 100644
--- a/test/js/composer-page-state-test.vala
+++ b/test/js/composer-page-state-test.vala
@@ -11,11 +11,13 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
base("ComposerPageStateTest");
add_test("edit_context_font", edit_context_font);
add_test("edit_context_link", edit_context_link);
+ add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("get_html", get_html);
add_test("get_text", get_text);
add_test("get_text_with_quote", get_text_with_quote);
add_test("get_text_with_nested_quote", get_text_with_nested_quote);
add_test("resolve_nesting", resolve_nesting);
+ add_test("contains_keywords", contains_keywords);
add_test("quote_lines", quote_lines);
add_test("replace_non_breaking_space", replace_non_breaking_space);
}
@@ -52,6 +54,43 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
+ public void contains_attachment_keywords() {
+ load_body_fixture("""
+<blockquote>inner quote</blockquote>
+
+<p>some text</p>
+
+some text
+
+-- <br>sig
+
+<p>outerquote text<p>
+""");
+ try {
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"geary.containsAttachmentKeyword(\"some\", \"subject text\");"
+ )));
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"geary.containsAttachmentKeyword(\"subject\", \"subject text\");"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"geary.containsAttachmentKeyword(\"innerquote\", \"subject text\");"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"geary.containsAttachmentKeyword(\"sig\", \"subject text\");"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"geary.containsAttachmentKeyword(\"outerquote\", \"subject text\");"
+ )));
+ } catch (Geary.JS.Error err) {
+ print("Geary.JS.Error: %s\n", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s\n", err.message);
+ assert_not_reached();
+ }
+ }
+
public void get_html() {
string html = "<p>para</p>";
load_body_fixture(html);
@@ -111,6 +150,59 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
+ public void contains_keywords() {
+ load_body_fixture();
+ string complete_keys = """new Set(["keyword1", "keyword2"])""";
+ string suffix_keys = """new Set(["sf1", "sf2"])""";
+ try {
+ // Doesn't contain
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('notcontained', $complete_keys, $suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('not contained', $complete_keys, $suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('not\tcontained', $complete_keys, $suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('http://www.keyword1.com', $complete_keys,
$suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('http://www.something.com/something.sf1',
$complete_keys, $suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('sf1', $complete_keys, $suffix_keys);"
+ )));
+ assert(!WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('.sf1', $complete_keys, $suffix_keys);"
+ )));
+
+ // Does contain
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('keyword1', $complete_keys, $suffix_keys);"
+ )));
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('keyword2 contained', $complete_keys, $suffix_keys);"
+ )));
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('keyword2\tcontained', $complete_keys, $suffix_keys);"
+ )));
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
+ )));
+ assert(WebKitUtil.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys,
$suffix_keys);"
+ )));
+ } catch (Geary.JS.Error err) {
+ print("Geary.JS.Error: %s\n", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s\n", err.message);
+ assert_not_reached();
+ }
+ }
+
public void resolve_nesting() {
load_body_fixture();
unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
diff --git a/ui/composer-web-view.js b/ui/composer-web-view.js
index 026fc21..a1adeae 100644
--- a/ui/composer-web-view.js
+++ b/ui/composer-web-view.js
@@ -13,9 +13,13 @@ let ComposerPageState = function() {
this.init.apply(this, arguments);
};
ComposerPageState.BODY_ID = "message-body";
+ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
ComposerPageState.QUOTE_START = "";
ComposerPageState.QUOTE_END = "";
ComposerPageState.QUOTE_MARKER = "\x7f";
+// Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
+// at the start, which is unsupported in JS
+ComposerPageState.URL_REGEX = new
RegExp("\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))",
"i");
ComposerPageState.prototype = {
__proto__: PageState.prototype,
@@ -161,6 +165,76 @@ ComposerPageState.prototype = {
// XXX need mark the quote somehow so we can find it, select
// it and delete it using execCommand
},
+ /**
+ * Determines if subject or body content refers to attachments.
+ */
+ containsAttachmentKeyword: function(keywordSpec, subject) {
+ // XXX this could also use a structured representation of the
+ // message body so we don't need to text to check
+
+ let ATTACHMENT_KEYWORDS_SUFFIX = "doc|pdf|xls|ppt|rtf|pps";
+
+ let completeKeys = new Set(keywordSpec.toLocaleLowerCase().split("|"));
+ let suffixKeys = new Set(ATTACHMENT_KEYWORDS_SUFFIX.split("|"));
+
+ // Check the subject line
+ if (ComposerPageState.containsKeywords(subject, completeKeys, suffixKeys)) {
+ return true;
+ }
+
+ // Check interesting body text
+ let breakingElements = new Set([
+ "BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
+ ]);
+ let content = this.messageBody.firstChild;
+ let found = false;
+ let done = false;
+ let textContent = [];
+ while (content != null && !done) {
+ if (content.nodeType == Node.TEXT_NODE) {
+ textContent.push(content.textContent);
+ } else if (content.nodeType == Node.ELEMENT_NODE) {
+ let isBreaking = breakingElements.has(content.nodeName);
+ if (isBreaking) {
+ textContent.push("\n");
+ }
+
+ // Always exclude quoted text
+ if (content.nodeName != "BLOCKQUOTE") {
+ textContent.push(content.innerText);
+ }
+
+ if (isBreaking || content.nextSibling == null) {
+ for (let line of textContent.join("").split("\n")) {
+ // Ignore everything after a sig or a
+ // forwarded message.
+ // XXX This breaks if the user types this at
+ // the start of a line, also, WK's innerText
+ // impl strips trailing whitespace, so can't
+ // test for 'line == "-- "' :(
+ if (line.startsWith("--")) {
+ done = true;
+ break;
+ }
+
+ line = line.trim();
+ if (line != "") {
+ if (ComposerPageState.containsKeywords(line, completeKeys, suffixKeys)) {
+ found = true;
+ done = true;
+ break;
+ }
+ }
+ }
+ textContent = [];
+ }
+ }
+
+ content = content.nextSibling;
+ }
+
+ return found;
+ },
tabOut: function() {
document.execCommand(
"inserthtml", false, "<span style='white-space: pre-wrap'>\t</span>"
@@ -234,6 +308,39 @@ ComposerPageState.prototype = {
};
/**
+ * Determines if any keywords are present in a string.
+ */
+ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
+ let tokens = new Set(
+ line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
+ );
+
+ for (let key of completeKeys) {
+ if (tokens.has(key)) {
+ return true;
+ }
+ }
+
+ let urlRegex = ComposerPageState.URL_REGEX;
+ // XXX assuming all suffixes have length = 3 here.
+ let extLen = 3;
+ for (let token of tokens) {
+ let extDelim = token.length - (extLen + 1);
+ // We do care about "a.pdf", but not ".pdf"
+ if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
+ let suffix = token.substring(extDelim + 1);
+ if (suffixKeys.has(suffix)) {
+ if (token.match(urlRegex) == null) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+};
+
+/**
* Convert a HTML DOM tree to plain text with delineated quotes.
*
* Lines are delinated using LF. Quoted lines are prefixed with
@@ -405,7 +512,7 @@ let SelectionUtil = {
getCursorElement: function() {
let selection = window.getSelection();
let node = selection.focusNode;
- if (node != null && node.nodeType != Node.ELEMENT_TYPE) {
+ if (node != null && node.nodeType != Node.ELEMENT_NODE) {
node = node.parentNode;
}
return node;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]