[evolution/449-support-markdown-in-composer] EMarkdownUtils: Fix/enhance composer HTML parsing
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution/449-support-markdown-in-composer] EMarkdownUtils: Fix/enhance composer HTML parsing
- Date: Fri, 11 Feb 2022 08:53:55 +0000 (UTC)
commit 3c8f13ab857fa8a8793dac726a4a0c4c62d6b727
Author: Milan Crha <mcrha redhat com>
Date: Fri Feb 11 09:53:14 2022 +0100
EMarkdownUtils: Fix/enhance composer HTML parsing
src/e-util/e-markdown-utils.c | 49 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 45 insertions(+), 4 deletions(-)
---
diff --git a/src/e-util/e-markdown-utils.c b/src/e-util/e-markdown-utils.c
index b3fc121388..ccfc6acb2a 100644
--- a/src/e-util/e-markdown-utils.c
+++ b/src/e-util/e-markdown-utils.c
@@ -78,6 +78,7 @@ markdown_utils_get_attribute_value (const xmlChar **xcattrs,
struct _ComposerQuirks {
gboolean enabled;
+ gboolean reading_html_end;
gchar *to_body_credits;
gboolean cite_body;
};
@@ -161,6 +162,9 @@ markdown_utils_sax_start_element_cb (gpointer ctx,
}
}
+ if (data->composer_quirks.reading_html_end)
+ return;
+
if (g_ascii_strcasecmp (name, "body") == 0) {
data->in_body = TRUE;
return;
@@ -203,7 +207,7 @@ markdown_utils_sax_start_element_cb (gpointer ctx,
if (data->quote_prefix->len)
g_string_append (data->buffer, data->quote_prefix->str);
- } else {
+ } else if (!data->composer_quirks.enabled) {
g_string_append (data->buffer, "<br>");
}
@@ -429,7 +433,11 @@ markdown_utils_sax_end_element_cb (gpointer ctx,
g_ascii_strcasecmp (name, "h4") == 0 ||
g_ascii_strcasecmp (name, "h5") == 0 ||
g_ascii_strcasecmp (name, "h6") == 0) {
- g_string_append_c (data->buffer, '\n');
+ /* To avoid double-line ends when parsing composer HTML */
+ if (data->composer_quirks.enabled && !(
+ g_ascii_strcasecmp (name, "p") == 0 ||
+ g_ascii_strcasecmp (name, "div") == 0))
+ g_string_append_c (data->buffer, '\n');
data->in_paragraph_end = TRUE;
@@ -460,6 +468,20 @@ markdown_utils_sax_end_element_cb (gpointer ctx,
}
}
+static gboolean
+markdown_utils_only_whitespace (const gchar *text,
+ gint len)
+{
+ gint ii;
+
+ for (ii = 0; ii < len && text[ii]; ii++) {
+ if (!g_ascii_isspace (text[ii]))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static void
markdown_utils_sax_characters_cb (gpointer ctx,
const xmlChar *xctext,
@@ -470,7 +492,7 @@ markdown_utils_sax_characters_cb (gpointer ctx,
dd (printf ("%s: text:'%.*s' in_body:%d in_paragraph:%d in_li:%d\n", G_STRFUNC, len, text,
data->in_body, data->in_paragraph, data->in_li);)
- if (data->in_body && (data->in_paragraph || data->in_li)) {
+ if (data->in_body && (data->in_paragraph || data->in_li || !markdown_utils_only_whitespace (text,
len))) {
if (data->link_text) {
g_string_append_len (data->link_text, text, len);
} else {
@@ -531,6 +553,9 @@ e_markdown_utils_html_to_text (const gchar *html,
htmlSAXHandler sax;
HTMLToTextData data;
+ if (length < 0)
+ length = html ? strlen (html) : 0;
+
memset (&data, 0, sizeof (HTMLToTextData));
data.buffer = g_string_new (NULL);
@@ -546,10 +571,26 @@ e_markdown_utils_html_to_text (const gchar *html,
sax.warning = markdown_utils_sax_warning_cb;
sax.error = markdown_utils_sax_error_cb;
- ctxt = htmlCreatePushParserCtxt (&sax, &data, html ? html : "", html && length == -1 ? strlen (html)
: html ? length : 0, "", XML_CHAR_ENCODING_UTF8);
+ ctxt = htmlCreatePushParserCtxt (&sax, &data, html ? html : "", length, "", XML_CHAR_ENCODING_UTF8);
htmlParseChunk (ctxt, "", 0, 1);
+ /* The libxml doesn't read elements after </html>, but the quirks can be stored after them,
+ thus retry after that element end, if it exists. */
+ if (data.composer_quirks.enabled && html && ctxt->input && ctxt->input->cur) {
+ guint html_end_length = ctxt->input->end - ctxt->input->cur;
+
+ if (html_end_length > 1) {
+ htmlParserCtxtPtr ctxt2;
+
+ data.composer_quirks.reading_html_end = TRUE;
+
+ ctxt2 = htmlCreatePushParserCtxt (&sax, &data, (const gchar *) ctxt->input->cur,
html_end_length, "", XML_CHAR_ENCODING_UTF8);
+ htmlParseChunk (ctxt2, "", 0, 1);
+ htmlFreeParserCtxt (ctxt2);
+ }
+ }
+
htmlFreeParserCtxt (ctxt);
markdown_utils_apply_composer_quirks (data.buffer, &data.composer_quirks);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]