[evolution/wip/mcrha/webkit-jsc-api] e-editor.js: Change how magic links split text into parts and linkify it



commit 016a716e5f7acf76d3e5d4ed90f63b2f4ac70cc2
Author: Milan Crha <mcrha redhat com>
Date:   Wed Apr 15 17:08:22 2020 +0200

    e-editor.js: Change how magic links split text into parts and linkify it
    
    The problem is with UTF-8 letters and Emoji, where the byte position returned
    by GRegex doesn't match the character offset. It's more visible with Emoji, which
    uses (at least?) two Unicode letters in JavaScript string, instead of one.

 data/webkit/e-editor.js                            | 125 +++++---------
 src/e-util/test-html-editor-units.c                |  47 +++++
 .../web-extension/e-editor-web-extension.c         | 191 ++++++++++++++++++---
 3 files changed, 257 insertions(+), 106 deletions(-)
---
diff --git a/data/webkit/e-editor.js b/data/webkit/e-editor.js
index 90075830e8..f98d3e7eee 100644
--- a/data/webkit/e-editor.js
+++ b/data/webkit/e-editor.js
@@ -21,14 +21,6 @@
    public functions start with upper-case letter. */
 
 var EvoEditor = {
-       // stephenhay from https://mathiasbynens.be/demo/url-regex
-       URL_PATTERN : "((?:(?:(?:" + "news|telnet|nntp|file|https?|s?ftp|webcal|localhost|ssh" + 
")\\:\\/\\/)|(?:www\\.|ftp\\.))[^\\s\\/\\$\\.\\?#].[^\\s]*+)",
-       // from camel-url-scanner.c
-       URL_INVALID_TRAILING_CHARS : ",.:;?!-|}])\">",
-       // http://www.w3.org/TR/html5/forms.html#valid-e-mail-address
-       EMAIL_PATTERN : "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}" +
-                       "[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*+",
-
        CURRENT_ELEMENT_ATTR : "x-evo-dialog-current-element",
        BLOCKQUOTE_STYLE : "margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex",
 
@@ -1810,8 +1802,6 @@ EvoEditor.quoteParagraphWrap = function(node, lineLength, wrapWidth, prefixHtml)
 
                                node.parentElement.insertBefore(br, node);
                                br.insertAdjacentHTML("afterend", prefixHtml);
-                       } else {
-                               node.insertAdjacentHTML("beforebegin", prefixHtml);
                        }
 
                        offset = 0;
@@ -2931,93 +2921,66 @@ EvoEditor.linkifyText = function(anchorNode, withUndo)
                }
        }
 
-       var covered = false, done = false;
-
-       while (!done) {
-               done = true;
-
-               var isEmail = text.search("@") >= 0 && text.search("://") < 0, match;
+       var parts, ii;
 
-               // the replace call below replaces &nbsp; (0xA0) with regular space
-               match = EvoEditor.findPattern(text.replace(/ /g, " "), isEmail ? EvoEditor.EMAIL_PATTERN : 
EvoEditor.URL_PATTERN);
-               if (match) {
-                       var url = text.substring(match.start, match.end), node;
+       parts = EvoEditor.splitTextWithLinks(text);
 
-                       // because 'search' uses Regex and throws exception on brackets and other 
Regex-sensitive characters
-                       var isInvalidTrailingChar = function(chr) {
-                               var jj;
+       if (!parts)
+               return false;
 
-                               for (jj = 0; jj < EvoEditor.URL_INVALID_TRAILING_CHARS.length; jj++) {
-                                       if (chr == EvoEditor.URL_INVALID_TRAILING_CHARS.charAt(jj))
-                                               return true;
-                               }
+       if (withUndo) {
+               EvoUndoRedo.StartRecord(EvoUndoRedo.RECORD_KIND_CUSTOM, "magicLink", 
anchorNode.parentElement, anchorNode.parentElement,
+                       EvoEditor.CLAIM_CONTENT_FLAG_SAVE_HTML);
+       }
 
-                               return false;
-                       };
+       try {
+               var selection = document.getSelection(), matchEnd = 0, insBefore, parent;
+               var offset = selection.anchorOffset, updateSelection = selection.anchorNode === anchorNode, 
newAnchorNode = null;
 
-                       /* URLs are extremely unlikely to end with any punctuation, so
-                        * strip any trailing punctuation off from link and put it after
-                        * the link. Do the same for any closing double-quotes as well. */
-                       while (url.length > 0 && isInvalidTrailingChar(url.charAt(url.length - 1))) {
-                               var open_bracket = 0, close_bracket = url.charAt(url.length - 1);
-
-                               if (close_bracket == ')')
-                                       open_bracket = '(';
-                               else if (close_bracket == '}')
-                                       open_bracket = '{';
-                               else if (close_bracket == ']')
-                                       open_bracket = '[';
-                               else if (close_bracket == '>')
-                                       open_bracket = '<';
-
-                               if (open_bracket != 0) {
-                                       var n_opened = 0, n_closed = 0, ii, chr;
-
-                                       for (ii = 0; ii < url.length; ii++) {
-                                               chr = url.charAt(ii);
-
-                                               if (chr == open_bracket)
-                                                       n_opened++;
-                                               else if (chr == close_bracket)
-                                                       n_closed++;
-                                       }
+               insBefore = anchorNode;
+               parent = anchorNode.parentElement;
 
-                                       /* The closing bracket can match one inside the URL,
-                                          thus keep it there. */
-                                       if (n_opened > 0 && n_opened - n_closed >= 0)
-                                               break;
-                               }
+               for (ii = 0; ii < parts.length; ii++) {
+                       var part = parts[ii], node, isLast = ii + 1 >= parts.length;
 
-                               url = url.substr(0, url.length - 1);
-                               match.end--;
+                       if (part.href) {
+                               node = document.createElement("A");
+                               node.href = part.href;
+                               node.innerText = part.text;
+                       } else if (isLast) {
+                               node = null;
+                               // it can be a space, which cannot be added after the element, thus 
workaround it this way
+                               newAnchorNode = anchorNode.splitText(matchEnd);
+                       } else {
+                               node = document.createTextNode(part.text);
                        }
 
-                       if (url.length > 0) {
-                               covered = true;
-
-                               if (isEmail)
-                                       url = "mailto:"; + url;
-                               else if (url.startsWith("www."))
-                                       url = "https://"; + url;
+                       if (node)
+                               parent.insertBefore(node, insBefore);
 
-                               node = document.createElement("A");
-                               node.href = url;
+                       if (!isLast) {
+                               matchEnd += part.text.length;
+                       } else if (node) {
+                               newAnchorNode = node;
+                       }
+               }
 
-                               anchorNode = EvoEditor.replaceMatchWithNode("magicLink", anchorNode, match, 
node, true, withUndo);
+               if (anchorNode)
+                       anchorNode.remove();
 
-                               if (anchorNode) {
-                                       anchorNode = anchorNode.parentElement.nextSibling;
+               if (updateSelection && newAnchorNode && offset - matchEnd >= 0)
+                       selection.setPosition(newAnchorNode, offset - matchEnd);
+       } finally {
+               if (withUndo) {
+                       EvoUndoRedo.StopRecord(EvoUndoRedo.RECORD_KIND_CUSTOM, "magicLink");
 
-                                       if (anchorNode) {
-                                               text = anchorNode.nodeValue;
-                                               done = !text;
-                                       }
-                               }
-                       }
+                       EvoUndoRedo.GroupTopRecords(2);
+                       EvoEditor.maybeUpdateFormattingState(EvoEditor.FORCE_MAYBE);
+                       EvoEditor.EmitContentChanged();
                }
        }
 
-       return covered;
+       return true;
 }
 
 EvoEditor.maybeRemoveQuotationMark = function(node)
diff --git a/src/e-util/test-html-editor-units.c b/src/e-util/test-html-editor-units.c
index cdb5673e48..add1f00d83 100644
--- a/src/e-util/test-html-editor-units.c
+++ b/src/e-util/test-html-editor-units.c
@@ -4916,6 +4916,52 @@ test_cite_reply_plain (TestFixture *fixture)
                g_test_fail ();
 }
 
+static void
+test_cite_reply_link (TestFixture *fixture)
+{
+       if (!test_utils_process_commands (fixture,
+               "mode:plain\n")) {
+               g_test_fail ();
+               return;
+       }
+
+       test_utils_insert_content (fixture,
+               "<html><head></head><body><div><span>123 (here <a href=\"https://www.example.com\";>\n"
+               "https://www.example.com/1234567890/1234567890/1234567890/1234567890/1234567890/";
+               ") and </span>here ěščřžýáíé <a href=\"https://www.example.com\";>www.example.com</a>"
+               " with closing text after.</div>"
+               "<div>www.example1.com</div>"
+               "<div>before www.example2.com</div>"
+               "<div>www.example3.com after</div>"
+               "<div>😏😉🙂 user@no.where line with Emoji</div></body></html>"
+               "<span class=\"-x-evo-to-body\" data-credits=\"On Today, User wrote:\"></span>"
+               "<span class=\"-x-evo-cite-body\"></span>",
+               E_CONTENT_EDITOR_INSERT_REPLACE_ALL | E_CONTENT_EDITOR_INSERT_TEXT_HTML);
+
+       if (!test_utils_run_simple_test (fixture,
+               "",
+               HTML_PREFIX "<div style=\"width: 71ch;\">On Today, User wrote:</div>"
+               "<blockquote type=\"cite\">"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "123 (here </div>"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "<a 
href=\"https://www.example.com/1234567890/1234567890/1234567890/1234567890/1234567890/\";>"
+                       
"https://www.example.com/1234567890/1234567890/1234567890/1234567890/1234567890/</a>)<br 
class=\"-x-evo-wrap-br\">"
+               QUOTE_SPAN (QUOTE_CHR) "and here ěščřžýáíé <a 
href=\"https://www.example.com\";>www.example.com</a> with closing text after.</div>"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "<a 
href=\"https://www.example1.com\";>www.example1.com</a></div>"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "before <a 
href=\"https://www.example2.com\";>www.example2.com</a></div>"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "<a href=\"https://www.example3.com\";>www.example3.com</a> 
after</div>"
+               "<div>" QUOTE_SPAN (QUOTE_CHR) "😏😉🙂 <a href=\"mailto:user@no.where\";>user@no.where</a> line 
with Emoji</div>"
+               "</blockquote>" HTML_SUFFIX,
+               "On Today, User wrote:\n"
+               "> 123 (here \n"
+               "> https://www.example.com/1234567890/1234567890/1234567890/1234567890/1234567890/\n";
+               "> ) and here ěščřžýáíé www.example.com with closing text after.\n"
+               "> www.example1.com\n"
+               "> before www.example2.com\n"
+               "> www.example3.com after\n"
+               "> 😏😉🙂 user@no.where line with Emoji\n"))
+               g_test_fail ();
+}
+
 static void
 test_cite_editing_html (TestFixture *fixture)
 {
@@ -6474,6 +6520,7 @@ main (gint argc,
        test_utils_add_test ("/cite/reply-html", test_cite_reply_html);
        test_utils_add_test ("/cite/reply-html-to-plain", test_cite_reply_html_to_plain);
        test_utils_add_test ("/cite/reply-plain", test_cite_reply_plain);
+       test_utils_add_test ("/cite/reply-link", test_cite_reply_link);
        test_utils_add_test ("/cite/editing-html", test_cite_editing_html);
        test_utils_add_test ("/cite/editing-plain", test_cite_editing_plain);
        test_utils_add_test ("/undo/text-typed", test_undo_text_typed);
diff --git a/src/modules/webkit-editor/web-extension/e-editor-web-extension.c 
b/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
index 2def8ee041..a75de0ee20 100644
--- a/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
+++ b/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
@@ -173,18 +173,19 @@ load_javascript_file (JSCContext *jsc_context,
        g_free (content);
 }
 
-/* Returns 'null', when no match for the 'pattern' in 'text' found, otherwise
-   returns an 'object { start : nnn, end : nnn };' with the first longest pattern match. */
-static JSCValue *
-evo_editor_jsc_find_pattern (const gchar *text,
-                            const gchar *pattern,
-                            JSCContext *jsc_context)
+static void
+evo_editor_find_pattern (const gchar *text,
+                        const gchar *pattern,
+                        gint *out_start,
+                        gint *out_end)
 {
-       JSCValue *object = NULL;
        GRegex *regex;
 
-       if (!text || !*text || !pattern || !*pattern)
-               return jsc_value_new_null (jsc_context);
+       g_return_if_fail (out_start != NULL);
+       g_return_if_fail (out_end != NULL);
+
+       *out_start = -1;
+       *out_end = -1;
 
        regex = g_regex_new (pattern, 0, 0, NULL);
        if (regex) {
@@ -194,25 +195,165 @@ evo_editor_jsc_find_pattern (const gchar *text,
                if (g_regex_match_all (regex, text, G_REGEX_MATCH_NOTEMPTY, &match_info) &&
                    g_match_info_fetch_pos (match_info, 0, &start, &end) &&
                    start >= 0 && end >= 0) {
-                       JSCValue *number;
-
-                       object = jsc_value_new_object (jsc_context, NULL, NULL);
-
-                       number = jsc_value_new_number (jsc_context, start);
-                       jsc_value_object_set_property (object, "start", number);
-                       g_clear_object (&number);
-
-                       number = jsc_value_new_number (jsc_context, end);
-                       jsc_value_object_set_property (object, "end", number);
-                       g_clear_object (&number);
+                       *out_start = start;
+                       *out_end = end;
                }
 
                if (match_info)
                        g_match_info_free (match_info);
                g_regex_unref (regex);
        }
+}
 
-       return object ? object : jsc_value_new_null (jsc_context);
+/* Returns 'null', when no match for magicLinks in 'text' were found, otherwise
+   returns an array of 'object { text : string, [ href : string] };' with the text
+   split into parts, where those with also 'href' property defined are meant
+   to be anchors. */
+static JSCValue *
+evo_editor_jsc_split_text_with_links (const gchar *text,
+                                     JSCContext *jsc_context)
+{
+       // stephenhay from https://mathiasbynens.be/demo/url-regex
+       const gchar *URL_PATTERN = "((?:(?:(?:"
+                                  "news|telnet|nntp|file|https?|s?ftp|webcal|localhost|ssh"
+                                  ")\\:\\/\\/)|(?:www\\.|ftp\\.))[^\\s\\/\\$\\.\\?#].[^\\s]*+)";
+       // from camel-url-scanner.c
+       const gchar *URL_INVALID_TRAILING_CHARS = ",.:;?!-|}])\">";
+       // http://www.w3.org/TR/html5/forms.html#valid-e-mail-address
+       const gchar *EMAIL_PATTERN = "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}"
+                                    "[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*+";
+       JSCValue *array = NULL;
+       guint array_len = 0;
+       gboolean done = FALSE;
+
+       if (!text || !*text)
+               return jsc_value_new_null (jsc_context);
+
+       #define add_to_array(_obj) G_STMT_START { \
+               if (!array) \
+                       array = jsc_value_new_array (jsc_context, G_TYPE_NONE); \
+               jsc_value_object_set_property_at_index (array, array_len, _obj); \
+               array_len++; \
+               } G_STMT_END
+
+       while (!done) {
+               gboolean is_email;
+               gint start = -1, end = -1;
+
+               done = TRUE;
+
+               is_email = strchr (text, '@') && !strstr (text, "://");
+
+               evo_editor_find_pattern (text, is_email ? EMAIL_PATTERN : URL_PATTERN, &start, &end);
+
+               if (start >= 0 && end >= 0) {
+                       const gchar *url_end;
+
+                       url_end = text + end - 1;
+
+                       /* URLs are extremely unlikely to end with any punctuation, so
+                        * strip any trailing punctuation off from link and put it after
+                        * the link. Do the same for any closing double-quotes as well. */
+                       while (end > start && *url_end && strchr (URL_INVALID_TRAILING_CHARS, *url_end)) {
+                               gchar open_bracket = 0, close_bracket = *url_end;
+
+                               if (close_bracket == ')')
+                                       open_bracket = '(';
+                               else if (close_bracket == '}')
+                                       open_bracket = '{';
+                               else if (close_bracket == ']')
+                                       open_bracket = '[';
+                               else if (close_bracket == '>')
+                                       open_bracket = '<';
+
+                               if (open_bracket != 0) {
+                                       gint n_opened = 0, n_closed = 0;
+                                       const gchar *ptr;
+
+                                       for (ptr = text + start; ptr <= url_end; ptr++) {
+                                               if (*ptr == open_bracket)
+                                                       n_opened++;
+                                               else if (*ptr == close_bracket)
+                                                       n_closed++;
+                                       }
+
+                                       /* The closing bracket can match one inside the URL,
+                                          thus keep it there. */
+                                       if (n_opened > 0 && n_opened - n_closed >= 0)
+                                               break;
+                               }
+
+                               url_end--;
+                               end--;
+                       }
+
+                       if (end > start) {
+                               JSCValue *object, *string;
+                               gchar *url, *tmp;
+
+                               if (start > 0) {
+                                       tmp = g_strndup (text, start);
+
+                                       object = jsc_value_new_object (jsc_context, NULL, NULL);
+
+                                       string = jsc_value_new_string (jsc_context, tmp);
+                                       jsc_value_object_set_property (object, "text", string);
+                                       g_clear_object (&string);
+
+                                       add_to_array (object);
+
+                                       g_clear_object (&object);
+                                       g_free (tmp);
+                               }
+
+                               tmp = g_strndup (text + start, end - start);
+
+                               if (is_email)
+                                       url = g_strconcat ("mailto:";, tmp, NULL);
+                               else if (g_str_has_prefix (tmp, "www."))
+                                       url = g_strconcat ("https://";, tmp, NULL);
+                               else
+                                       url = NULL;
+
+                               object = jsc_value_new_object (jsc_context, NULL, NULL);
+
+                               string = jsc_value_new_string (jsc_context, tmp);
+                               jsc_value_object_set_property (object, "text", string);
+                               g_clear_object (&string);
+
+                               string = jsc_value_new_string (jsc_context, url ? url : tmp);
+                               jsc_value_object_set_property (object, "href", string);
+                               g_clear_object (&string);
+
+                               add_to_array (object);
+
+                               g_clear_object (&object);
+                               g_free (tmp);
+                               g_free (url);
+
+                               text = text + end;
+                               done = FALSE;
+                       }
+               }
+       }
+
+       if (array && *text) {
+               JSCValue *object, *string;
+
+               object = jsc_value_new_object (jsc_context, NULL, NULL);
+
+               string = jsc_value_new_string (jsc_context, text);
+               jsc_value_object_set_property (object, "text", string);
+               g_clear_object (&string);
+
+               add_to_array (object);
+
+               g_clear_object (&object);
+       }
+
+       #undef add_to_array
+
+       return array ? array : jsc_value_new_null (jsc_context);
 }
 
 /* Returns 'null' or an object { text : string, imageUri : string, width : nnn, height : nnn }
@@ -356,11 +497,11 @@ window_object_cleared_cb (WebKitScriptWorld *world,
                JSCValue *jsc_function;
                const gchar *func_name;
 
-               /* EvoEditor.findPattern(text, pattern) */
-               func_name = "findPattern";
+               /* EvoEditor.splitTextWithLinks(text) */
+               func_name = "splitTextWithLinks";
                jsc_function = jsc_value_new_function (jsc_context, func_name,
-                       G_CALLBACK (evo_editor_jsc_find_pattern), g_object_ref (jsc_context), g_object_unref,
-                       JSC_TYPE_VALUE, 2, G_TYPE_STRING, G_TYPE_STRING);
+                       G_CALLBACK (evo_editor_jsc_split_text_with_links), g_object_ref (jsc_context), 
g_object_unref,
+                       JSC_TYPE_VALUE, 1, G_TYPE_STRING);
 
                jsc_value_object_set_property (jsc_editor, func_name, jsc_function);
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]