[evolution/449-support-markdown-in-composer] EMarkdownUtils: Fix/enhance composer HTML parsing



commit 3c8f13ab857fa8a8793dac726a4a0c4c62d6b727
Author: Milan Crha <mcrha redhat com>
Date:   Fri Feb 11 09:53:14 2022 +0100

    EMarkdownUtils: Fix/enhance composer HTML parsing

 src/e-util/e-markdown-utils.c | 49 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)
---
diff --git a/src/e-util/e-markdown-utils.c b/src/e-util/e-markdown-utils.c
index b3fc121388..ccfc6acb2a 100644
--- a/src/e-util/e-markdown-utils.c
+++ b/src/e-util/e-markdown-utils.c
@@ -78,6 +78,7 @@ markdown_utils_get_attribute_value (const xmlChar **xcattrs,
 
 struct _ComposerQuirks {
        gboolean enabled;
+       gboolean reading_html_end;
        gchar *to_body_credits;
        gboolean cite_body;
 };
@@ -161,6 +162,9 @@ markdown_utils_sax_start_element_cb (gpointer ctx,
                }
        }
 
+       if (data->composer_quirks.reading_html_end)
+               return;
+
        if (g_ascii_strcasecmp (name, "body") == 0) {
                data->in_body = TRUE;
                return;
@@ -203,7 +207,7 @@ markdown_utils_sax_start_element_cb (gpointer ctx,
 
                        if (data->quote_prefix->len)
                                g_string_append (data->buffer, data->quote_prefix->str);
-               } else {
+               } else if (!data->composer_quirks.enabled) {
                        g_string_append (data->buffer, "<br>");
                }
 
@@ -429,7 +433,11 @@ markdown_utils_sax_end_element_cb (gpointer ctx,
            g_ascii_strcasecmp (name, "h4") == 0 ||
            g_ascii_strcasecmp (name, "h5") == 0 ||
            g_ascii_strcasecmp (name, "h6") == 0) {
-               g_string_append_c (data->buffer, '\n');
+               /* To avoid double-line ends when parsing composer HTML */
+               if (data->composer_quirks.enabled && !(
+                   g_ascii_strcasecmp (name, "p") == 0 ||
+                   g_ascii_strcasecmp (name, "div") == 0))
+                       g_string_append_c (data->buffer, '\n');
 
                data->in_paragraph_end = TRUE;
 
@@ -460,6 +468,20 @@ markdown_utils_sax_end_element_cb (gpointer ctx,
        }
 }
 
+static gboolean
+markdown_utils_only_whitespace (const gchar *text,
+                               gint len)
+{
+       gint ii;
+
+       for (ii = 0; ii < len && text[ii]; ii++) {
+               if (!g_ascii_isspace (text[ii]))
+                       return FALSE;
+       }
+
+       return TRUE;
+}
+
 static void
 markdown_utils_sax_characters_cb (gpointer ctx,
                                  const xmlChar *xctext,
@@ -470,7 +492,7 @@ markdown_utils_sax_characters_cb (gpointer ctx,
 
        dd (printf ("%s: text:'%.*s' in_body:%d in_paragraph:%d in_li:%d\n", G_STRFUNC, len, text, 
data->in_body, data->in_paragraph, data->in_li);)
 
-       if (data->in_body && (data->in_paragraph || data->in_li)) {
+       if (data->in_body && (data->in_paragraph || data->in_li || !markdown_utils_only_whitespace (text, 
len))) {
                if (data->link_text) {
                        g_string_append_len (data->link_text, text, len);
                } else {
@@ -531,6 +553,9 @@ e_markdown_utils_html_to_text (const gchar *html,
        htmlSAXHandler sax;
        HTMLToTextData data;
 
+       if (length < 0)
+               length = html ? strlen (html) : 0;
+
        memset (&data, 0, sizeof (HTMLToTextData));
 
        data.buffer = g_string_new (NULL);
@@ -546,10 +571,26 @@ e_markdown_utils_html_to_text (const gchar *html,
        sax.warning = markdown_utils_sax_warning_cb;
        sax.error = markdown_utils_sax_error_cb;
 
-       ctxt = htmlCreatePushParserCtxt (&sax, &data, html ? html : "", html && length == -1 ? strlen (html) 
: html ? length : 0, "", XML_CHAR_ENCODING_UTF8);
+       ctxt = htmlCreatePushParserCtxt (&sax, &data, html ? html : "", length, "", XML_CHAR_ENCODING_UTF8);
 
        htmlParseChunk (ctxt, "", 0, 1);
 
+       /* The libxml doesn't read elements after </html>, but the quirks can be stored after them,
+          thus retry after that element end, if it exists. */
+       if (data.composer_quirks.enabled && html && ctxt->input && ctxt->input->cur) {
+               guint html_end_length = ctxt->input->end - ctxt->input->cur;
+
+               if (html_end_length > 1) {
+                       htmlParserCtxtPtr ctxt2;
+
+                       data.composer_quirks.reading_html_end = TRUE;
+
+                       ctxt2 = htmlCreatePushParserCtxt (&sax, &data, (const gchar *) ctxt->input->cur, 
html_end_length, "", XML_CHAR_ENCODING_UTF8);
+                       htmlParseChunk (ctxt2, "", 0, 1);
+                       htmlFreeParserCtxt (ctxt2);
+               }
+       }
+
        htmlFreeParserCtxt (ctxt);
 
        markdown_utils_apply_composer_quirks (data.buffer, &data.composer_quirks);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]