[evolution/wip/webkit2] EHTMLEditorView - Improve detection of e-mail addresses in conversion from HTML to plain text



commit 72db593994ed9bd922126c38a26ce541b0944d82
Author: Tomas Popela <tpopela redhat com>
Date:   Thu Nov 27 17:46:19 2014 +0100

    EHTMLEditorView - Improve detection of e-mail addresses in conversion from HTML to plain text
    
    Check also for e-mail addresses instead just for links. Also create GRegex
    for links and e-mail addresses on-demand.

 e-util/e-html-editor-view-dom-functions.c |   33 +++++++++++++++++++++++-----
 1 files changed, 27 insertions(+), 6 deletions(-)
---
diff --git a/e-util/e-html-editor-view-dom-functions.c b/e-util/e-html-editor-view-dom-functions.c
index f70aa14..4721ed5 100644
--- a/e-util/e-html-editor-view-dom-functions.c
+++ b/e-util/e-html-editor-view-dom-functions.c
@@ -2744,6 +2744,7 @@ create_anchor_for_link (const GMatchInfo *info,
 
        address_surrounded =
                strstr (match, "@") &&
+               !strstr (match, "://") &&
                g_str_has_prefix (match, "&lt;") &&
                g_str_has_suffix (match, "&gt;");
 
@@ -2757,7 +2758,7 @@ create_anchor_for_link (const GMatchInfo *info,
                g_string_append (res, "&lt;");
 
        g_string_append (res, "<a href=\"");
-       if (strstr (match, "@")) {
+       if (strstr (match, "@") && !strstr (match, "://")) {
                g_string_append (res, "mailto:";);
                g_string_append (res, match + offset);
                if (address_surrounded)
@@ -2947,7 +2948,7 @@ parse_html_into_paragraphs (WebKitDOMDocument *document,
        gboolean citation_was_first_element = FALSE;
        const gchar *prev_br, *next_br;
        gchar *inner_html;
-       GRegex *regex_nbsp = NULL, *regex_links = NULL;
+       GRegex *regex_nbsp = NULL, *regex_links = NULL, *regex_email = NULL;
        GString *start, *end;
        WebKitDOMElement *paragraph = NULL;
 
@@ -2960,7 +2961,6 @@ parse_html_into_paragraphs (WebKitDOMDocument *document,
        /* Replace single spaces on the beginning of line, 2+ spaces and
         * tabulators with non breaking spaces */
        regex_nbsp = g_regex_new ("^\\s{1}|\\s{2,}|\x9", 0, 0, NULL);
-       regex_links = g_regex_new (URL_PATTERN, 0, 0, NULL);
 
        while (next_br) {
                gboolean local_ignore_next_br = ignore_next_br;
@@ -3031,8 +3031,17 @@ parse_html_into_paragraphs (WebKitDOMDocument *document,
                        g_free (truncated);
 
                        if (surround_links_with_anchor (rest_to_insert)) {
+                               gboolean is_email_address =
+                                       strstr (rest_to_insert, "@") &&
+                                       !strstr (rest_to_insert, "://");
+
+                               if (is_email_address && !regex_email)
+                                       regex_email = g_regex_new (E_MAIL_PATTERN, 0, 0, NULL);
+                               if (!is_email_address && !regex_link)
+                                       regex_link = g_regex_new (URL_PATTERN, 0, 0, NULL);
+
                                truncated = g_regex_replace_eval (
-                                       regex_links,
+                                       is_email_address ? regex_email : regex_link,
                                        rest_to_insert,
                                        -1,
                                        0,
@@ -3171,8 +3180,17 @@ parse_html_into_paragraphs (WebKitDOMDocument *document,
                g_free (truncated);
 
                if (surround_links_with_anchor (rest_to_insert)) {
+                       gboolean is_email_address =
+                               strstr (rest_to_insert, "@") &&
+                               !strstr (rest_to_insert, "://");
+
+                       if (is_email_address && !regex_email)
+                               regex_email = g_regex_new (E_MAIL_PATTERN, 0, 0, NULL);
+                       if (!is_email_address && !regex_link)
+                               regex_link = g_regex_new (URL_PATTERN, 0, 0, NULL);
+
                        truncated = g_regex_replace_eval (
-                               regex_links,
+                               is_email_address ? regex_email : regex_link,
                                rest_to_insert,
                                -1,
                                0,
@@ -3206,8 +3224,11 @@ parse_html_into_paragraphs (WebKitDOMDocument *document,
        webkit_dom_html_element_set_inner_html (
                WEBKIT_DOM_HTML_ELEMENT (blockquote), end->str, NULL);
 
+       if (regex_email)
+               g_regex_unref (regex_email);
+       if (regex_link)
+               g_regex_unref (regex_link);
        g_regex_unref (regex_nbsp);
-       g_regex_unref (regex_links);
        g_free (inner_html);
        g_string_free (start, TRUE);
        g_string_free (end, TRUE);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]