[evolution] Simplify and fix how the HTML is parsed into composer's DOM structure
- From: Tomas Popela <tpopela src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution] Simplify and fix how the HTML is parsed into composer's DOM structure
- Date: Fri, 5 Aug 2016 12:43:37 +0000 (UTC)
commit 4bd1a93b4aaa7d239687621c109031617cfbf785
Author: Tomas Popela <tpopela redhat com>
Date: Fri Aug 5 14:36:43 2016 +0200
Simplify and fix how the HTML is parsed into composer's DOM structure
Rework and simplify the parse_html_into_blocks():
* instead of removing new lines around citations when processing them, remove
them in advance - this allowed us to dramatically simplify the code and also
fix the code path when it was possible to lost some parts of the parsed text
* remove the option that was allowing to detect blocks as it was disabled and
the code was not quite ready - need to find a better solution or implement a
crystal ball..
e-util/e-html-editor-view.c | 477 +++++++++++-------------------------------
1 files changed, 125 insertions(+), 352 deletions(-)
---
diff --git a/e-util/e-html-editor-view.c b/e-util/e-html-editor-view.c
index 02f32fe..90f881a 100644
--- a/e-util/e-html-editor-view.c
+++ b/e-util/e-html-editor-view.c
@@ -61,8 +61,6 @@
#define HISTORY_SIZE_LIMIT 30
-#define TRY_TO_PRESERVE_BLOCKS 0
-
#define d(x)
/**
@@ -7260,74 +7258,6 @@ append_citation_mark (WebKitDOMDocument *document,
NULL);
}
-static glong
-get_decoded_line_length (WebKitDOMDocument *document,
- const gchar *line_text)
-{
- glong total_length = 0, length = 0;
- WebKitDOMElement *decode;
- WebKitDOMNode *node;
-
- decode = webkit_dom_document_create_element (document, "DIV", NULL);
- webkit_dom_html_element_set_inner_html (
- WEBKIT_DOM_HTML_ELEMENT (decode), line_text, NULL);
-
- node = webkit_dom_node_get_first_child (WEBKIT_DOM_NODE (decode));
- while (node) {
- if (WEBKIT_DOM_IS_TEXT (node)) {
- gulong text_length = 0;
-
- text_length = webkit_dom_character_data_get_length (WEBKIT_DOM_CHARACTER_DATA (node));
- total_length += text_length;
- length += text_length;
- } else if (WEBKIT_DOM_IS_ELEMENT (node)) {
- if (element_has_class (WEBKIT_DOM_ELEMENT (node), "Apple-tab-span")) {
- total_length += TAB_LENGTH - length % TAB_LENGTH;
- length = 0;
- }
- }
- node = webkit_dom_node_get_next_sibling (node);
- }
-
- g_object_unref (decode);
-
- return total_length;
-}
-
-static gboolean
-check_if_end_block (const gchar *input,
- glong length,
- gboolean preserve_next_line)
-{
- const gchar *next_space;
-
- next_space = strstr (input, " ");
- if (next_space) {
- const gchar *next_br;
- glong length_next_word =
- next_space - input - 4;
-
- if (g_str_has_prefix (input + 4, "<br>"))
- length_next_word = 0;
-
- if (length_next_word > 0)
- next_br = strstr (input + 4, "<br>");
-
- if (length_next_word > 0 && next_br < next_space)
- length_next_word = 0;
-
- if (length_next_word + length < 72)
- return TRUE;
- } else {
- /* If the current text to insert doesn't contain space we
- * have to look on the previous line if we were preserving
- * the block or not */
- return !preserve_next_line;
- }
-
- return FALSE;
-}
-
static void
replace_selection_markers (gchar **text)
{
@@ -7359,6 +7289,54 @@ replace_selection_markers (gchar **text)
}
}
+static GString *
+remove_new_lines_around_citations (const gchar *input)
+{
+ GString *str = NULL;
+ const gchar *p, *next;
+
+ str = g_string_new ("");
+
+ printf ("%s\n", input);
+ /* Remove the new lines around citations:
+ * Replace <br><br>##CITATION_START## with <br>##CITATION_START##
+ * Replace ##CITATION_START##<br><br> with ##CITATION_START##<br>
+ * Replace <br>##CITATION_END## with ##CITATION_END## */
+ p = input;
+ while (next = strstr (p, "##CITATION_"), next) {
+ gchar citation_type = 0;
+
+ if (p < next)
+ g_string_append_len (str, p, next - p);
+
+ if (next + 11)
+ citation_type = next[11];
+ /* ##CITATION_START## */
+ if (citation_type == 'S') {
+ if (g_str_has_suffix (str->str, "<br><br>"))
+ g_string_truncate (str, str->len - 4);
+
+ if (g_str_has_prefix (next + 11, "START##<br><br>")) {
+ g_string_append (str, "##CITATION_START##<br>");
+ p = next + 26;
+ continue;
+ }
+ } else if (citation_type == 'E') {
+ if (g_str_has_suffix (str->str, "<br>"))
+ g_string_truncate (str, str->len - 4);
+ }
+
+ g_string_append (str, "##CITATION_");
+
+ p = next + 11;
+ }
+
+ g_string_append (str, p);
+
+ printf ("%s\n", str->str);
+ return str;
+}
+
/* This parses the HTML code (that contains just text, and BR elements)
* into blocks.
* HTML code in that format we can get by taking innerText from some element,
@@ -7368,18 +7346,14 @@ parse_html_into_blocks (EHTMLEditorView *view,
WebKitDOMDocument *document,
WebKitDOMElement *parent,
WebKitDOMElement *passed_block_template,
- const gchar *html)
+ const gchar *input)
{
EHTMLEditorSelection *selection;
- gboolean ignore_next_br = FALSE;
- gboolean first_element = TRUE;
- gboolean citation_was_first_element = FALSE;
- gboolean preserve_next_line = FALSE;
- gboolean has_citation = FALSE;
- gboolean previously_had_empty_citation_start = FALSE;
+ gboolean has_citation = FALSE, processing_last = FALSE;
const gchar *prev_br, *next_br;
GRegex *regex_nbsp = NULL, *regex_link = NULL, *regex_email = NULL;
- WebKitDOMElement *block = NULL, *block_template = passed_block_template;
+ GString *html = NULL;
+ WebKitDOMElement *block_template = passed_block_template;
selection = e_html_editor_view_get_selection (view);
@@ -7404,9 +7378,6 @@ parse_html_into_blocks (EHTMLEditorView *view,
g_object_unref (settings);
}
- prev_br = html;
- next_br = strstr (prev_br, "<br>");
-
/* Replace the tabulators with SPAN elements that corresponds to them.
* If not inserting the content into the PRE element also replace single
* spaces on the beginning of line, 2+ spaces and with non breaking
@@ -7416,97 +7387,54 @@ parse_html_into_blocks (EHTMLEditorView *view,
else
regex_nbsp = g_regex_new ("^\\s{1}|\\s{2,}|\x9|\\s$", 0, 0, NULL);
+ html = remove_new_lines_around_citations (input);
+
+ prev_br = html->str;
+ next_br = strstr (prev_br, "<br>");
while (next_br) {
- gboolean local_ignore_next_br = ignore_next_br;
- gboolean local_preserve_next_line = preserve_next_line;
- gboolean local_previously_had_empty_citation_start =
- previously_had_empty_citation_start;
- gboolean preserve_block = TRY_TO_PRESERVE_BLOCKS;
- const gchar *citation = NULL, *citation_end = NULL;
+ const gchar *citation_start = NULL, *citation_end = NULL;
const gchar *rest = NULL, *with_br = NULL;
- gchar *to_insert = NULL;
-
- ignore_next_br = FALSE;
- preserve_next_line = TRUE;
+ gchar *to_process = NULL, *to_insert = NULL;
+ guint to_insert_start = 0, to_insert_end = 0;
- to_insert = g_utf8_substring (
- prev_br, 0, g_utf8_pointer_to_offset (prev_br, next_br));
-
- with_br = strstr (to_insert, "<br>");
- citation = strstr (to_insert, "##CITATION_");
- if (citation) {
- gboolean processed = FALSE;
-
- while (!processed) {
- gchar *citation_mark;
- gboolean citation_start = TRUE;
-
- has_citation = TRUE;
- if (g_str_has_prefix (citation + 11, "END##")) {
- citation_start = FALSE;
- if (block)
- append_new_block (parent, &block);
- } else
- previously_had_empty_citation_start = TRUE;
-
- citation_end = strstr (citation + 2, "##");
- if (citation_end)
- rest = citation_end + 2;
-
- if (rest && *rest && !g_str_has_prefix (rest, "##CITATION_"))
- previously_had_empty_citation_start = FALSE;
-
- if (first_element)
- citation_was_first_element = TRUE;
-
- if (block)
- append_new_block (parent, &block);
- else if (with_br && rest && !*rest &&
- previously_had_empty_citation_start &&
- ignore_next_br) {
- /* Insert an empty block for an empty blockquote */
- block = create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
- previously_had_empty_citation_start = FALSE;
- }
-
- if (citation_start)
- ignore_next_br = TRUE;
+ if ((to_process = g_utf8_substring (prev_br, 0, g_utf8_pointer_to_offset (prev_br, next_br)))
&& !*to_process && !processing_last) {
+ g_free (to_process);
+ to_process = g_strdup (next_br);
+ processing_last = TRUE;
+ }
+ to_insert_end = g_utf8_strlen (to_process, -1);
- citation_mark = g_utf8_substring (
- citation,
- 0,
- g_utf8_pointer_to_offset (citation, rest));
+ if ((with_br = strstr (to_process, "<br>")))
+ to_insert_start += 4;
+ if ((citation_start = strstr (to_process, "##CITATION_START"))) {
+ to_insert_start += 18; /* + ## */
+ has_citation = TRUE;
+ }
+ if ((citation_end = strstr (to_process, "##CITATION_END")))
+ to_insert_end -= 16; /* + ## */
- append_citation_mark (document, parent, citation_mark);
+ /* First BR */
+ if (with_br && prev_br == html->str)
+ create_and_append_new_block (
+ selection,
+ document,
+ parent,
+ block_template,
+ "<br id=\"-x-evo-first-br\">");
- g_free (citation_mark);
+ if (with_br && citation_start)
+ create_and_append_new_block (
+ selection, document, parent, block_template, "<br>");
- if (rest && *rest) {
- if (g_str_has_prefix (rest, "##CITATION_"))
- citation = rest;
- else
- processed = TRUE;
- } else
- processed = TRUE;
- }
- } else {
- rest = with_br ?
- to_insert + 4 + (with_br - to_insert) : to_insert;
- previously_had_empty_citation_start = FALSE;
- }
+ if (citation_start)
+ append_citation_mark (document, parent, "##CITATION_START##");
- if (!rest) {
- preserve_next_line = FALSE;
- goto next;
- }
-
- if (*rest) {
+ if ((to_insert = g_utf8_substring (to_process, to_insert_start, to_insert_end)) &&
*to_insert) {
gboolean empty = FALSE;
- gchar *truncated = g_strdup (rest);
+ gchar *truncated = g_strdup (to_insert);
gchar *rest_to_insert;
- empty = !*truncated && strlen (rest) > 0;
+ empty = !*truncated && strlen (to_insert) > 0;
rest_to_insert = g_regex_replace_eval (
regex_nbsp,
@@ -7521,9 +7449,6 @@ parse_html_into_blocks (EHTMLEditorView *view,
replace_selection_markers (&rest_to_insert);
- if (strchr (" +-@*=\t;#", *rest))
- preserve_block = FALSE;
-
if (surround_links_with_anchor (rest_to_insert)) {
gboolean is_email_address =
strstr (rest_to_insert, "@") &&
@@ -7548,206 +7473,52 @@ parse_html_into_blocks (EHTMLEditorView *view,
rest_to_insert = truncated;
}
- if (g_strcmp0 (rest_to_insert, UNICODE_ZERO_WIDTH_SPACE) == 0) {
- if (block)
- append_new_block (parent, &block);
-
- block = create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
- } else if (preserve_block) {
- gchar *html;
- gchar *content_to_append;
-
- if (!block) {
- if (WEBKIT_DOM_IS_HTML_DIV_ELEMENT (block_template))
- block = e_html_editor_selection_get_paragraph_element (
- selection, document, -1, 0);
- else
- block = WEBKIT_DOM_ELEMENT (webkit_dom_node_clone_node (
- WEBKIT_DOM_NODE (block_template), FALSE));
- }
-
- html = webkit_dom_html_element_get_inner_html (
- WEBKIT_DOM_HTML_ELEMENT (block));
-
- content_to_append = g_strconcat (
- html && *html ? " " : "",
- rest_to_insert ? rest_to_insert : "<br>",
- NULL),
-
- webkit_dom_html_element_insert_adjacent_html (
- WEBKIT_DOM_HTML_ELEMENT (block),
- "beforeend",
- content_to_append,
- NULL);
-
- g_free (html);
- g_free (content_to_append);
- } else {
- if (block)
- append_new_block (parent, &block);
-
- block = create_and_append_new_block (
- selection, document, parent, block_template, rest_to_insert);
- }
-
- if (rest_to_insert && *rest_to_insert && preserve_block && block) {
- glong length = 0;
-
- /* If the line contains some encoded chracters (i.e. >)
- * we can't use the strlen functions. */
- if (strstr (rest_to_insert, "&"))
- length = get_decoded_line_length (document, rest_to_insert);
- else
- length = g_utf8_strlen (rest_to_insert, -1);
-
- /* End the block if there is line with less that 62 characters. */
- /* The shorter line can also mean that there is a long word on next
- * line (and the line was wrapped). So look at it and decide what to do. */
- if (length < 62 && check_if_end_block (next_br, length,
local_preserve_next_line)) {
- append_new_block (parent, &block);
- preserve_next_line = FALSE;
- }
-
- if (length > 72) {
- append_new_block (parent, &block);
- preserve_next_line = FALSE;
- }
- }
-
- citation_was_first_element = FALSE;
+ create_and_append_new_block (
+ selection, document, parent, block_template, rest_to_insert);
g_free (rest_to_insert);
- } else if (with_br) {
- if (!citation && (!local_ignore_next_br || citation_was_first_element)) {
- if (block)
- append_new_block (parent, &block);
-
- block = create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
+ } else if (to_insert && !citation_start)
+ create_and_append_new_block (
+ selection, document, parent, block_template, "<br>");
- citation_was_first_element = FALSE;
- } else if (first_element && !citation_was_first_element) {
- block = create_and_append_new_block (
- selection,
- document,
- parent,
- block_template,
- "<br id=\"-x-evo-first-br\">");
- } else if (local_previously_had_empty_citation_start &&
- !citation && with_br && rest && !*rest) {
- /* Empty citation */
- if (block)
- append_new_block (parent, &block);
-
- block = create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
- } else
- preserve_next_line = FALSE;
- } else if (first_element && !citation_was_first_element) {
- block = create_and_append_new_block (
- selection,
- document,
- parent,
- block_template,
- "<br id=\"-x-evo-first-br\">");
- } else
- preserve_next_line = FALSE;
- next:
- first_element = FALSE;
- prev_br = next_br;
- next_br = strstr (prev_br + 4, "<br>");
g_free (to_insert);
- }
-
- if (block)
- append_new_block (parent, &block);
- if (g_utf8_strlen (prev_br, -1) > 0) {
- gchar *rest_to_insert;
- gchar *truncated = g_strdup (
- g_str_has_prefix (prev_br, "<br>") ? prev_br + 4 : prev_br);
+ if (citation_end)
+ append_citation_mark (document, parent, "##CITATION_END##");
- /* On the end on the HTML there is always an extra BR element,
- * so skip it and if there was another BR element before it mark it. */
- if (truncated && !*truncated) {
- WebKitDOMNode *child;
+ prev_br = next_br;
+ next_br = strstr (prev_br + 4, "<br>");
+ if (!next_br && !processing_last) {
+ if (g_utf8_strlen (prev_br, -1) > 4)
+ next_br = prev_br;
+ else {
+ WebKitDOMNode *child;
- child = webkit_dom_node_get_last_child (
- WEBKIT_DOM_NODE (parent));
- if (child) {
- child = webkit_dom_node_get_first_child (child);
- if (child && WEBKIT_DOM_IS_HTMLBR_ELEMENT (child)) {
- /* If the processed HTML contained just
- * the BR don't overwrite its id. */
- if (!element_has_id (WEBKIT_DOM_ELEMENT (child), "-x-evo-first-br"))
- webkit_dom_element_set_id (
- WEBKIT_DOM_ELEMENT (child),
- "-x-evo-last-br");
- } else if (!view->priv->is_editting_message)
+ child = webkit_dom_node_get_last_child (
+ WEBKIT_DOM_NODE (parent));
+ if (child) {
+ child = webkit_dom_node_get_first_child (child);
+ if (child && WEBKIT_DOM_IS_HTMLBR_ELEMENT (child)) {
+ /* If the processed HTML contained just
+ * the BR don't overwrite its id. */
+ if (!element_has_id (WEBKIT_DOM_ELEMENT (child),
"-x-evo-first-br"))
+ webkit_dom_element_set_id (
+ WEBKIT_DOM_ELEMENT (child),
+ "-x-evo-last-br");
+ } else if (!view->priv->is_editting_message)
+ create_and_append_new_block (
+ selection, document, parent, block_template, "<br>");
+ } else
create_and_append_new_block (
selection, document, parent, block_template, "<br>");
- } else
- create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
- g_free (truncated);
- goto end;
- }
-
- if (g_ascii_strncasecmp (truncated, "##CITATION_END##", 16) == 0) {
- append_citation_mark (document, parent, truncated);
- g_free (truncated);
- goto end;
- }
-
- rest_to_insert = g_regex_replace_eval (
- regex_nbsp,
- truncated,
- -1,
- 0,
- 0,
- (GRegexEvalCallback) replace_to_nbsp,
- NULL,
- NULL);
- g_free (truncated);
-
- replace_selection_markers (&rest_to_insert);
-
- if (surround_links_with_anchor (rest_to_insert)) {
- gboolean is_email_address =
- strstr (rest_to_insert, "@") &&
- !strstr (rest_to_insert, "://");
-
- if (is_email_address && !regex_email)
- regex_email = g_regex_new (E_MAIL_PATTERN, 0, 0, NULL);
- if (!is_email_address && !regex_link)
- regex_link = g_regex_new (URL_PATTERN, 0, 0, NULL);
-
- truncated = g_regex_replace_eval (
- is_email_address ? regex_email : regex_link,
- rest_to_insert,
- -1,
- 0,
- G_REGEX_MATCH_NOTEMPTY,
- create_anchor_for_link,
- NULL,
- NULL);
-
- g_free (rest_to_insert);
- rest_to_insert = truncated;
+ g_free (to_process);
+ break;
+ }
+ processing_last = TRUE;
}
-
- if (g_strcmp0 (rest_to_insert, UNICODE_ZERO_WIDTH_SPACE) == 0)
- create_and_append_new_block (
- selection, document, parent, block_template, "<br>");
- else
- create_and_append_new_block (
- selection, document, parent, block_template, rest_to_insert);
-
- g_free (rest_to_insert);
+ g_free (to_process);
}
- end:
if (has_citation) {
gchar *inner_html;
GString *start, *end;
@@ -7767,6 +7538,8 @@ parse_html_into_blocks (EHTMLEditorView *view,
g_string_free (end, TRUE);
}
+ g_string_free (html, TRUE);
+
if (regex_email != NULL)
g_regex_unref (regex_email);
if (regex_link != NULL)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]