[evolution/gnome-42] I#1849 - markdown-utils: Ignore character encoding hints in HTML when converting to text



commit 97cd7918106e41441af7b49cb5ef34d71259c283
Author: Milan Crha <mcrha redhat com>
Date:   Wed Mar 30 09:42:02 2022 +0200

    I#1849 - markdown-utils: Ignore character encoding hints in HTML when converting to text
    
    The passed-in HTML is already in UTF-8, thus do not re-encode
    it again.
    
    Closes https://gitlab.gnome.org/GNOME/evolution/-/issues/1849

 src/e-util/e-markdown-utils.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
---
diff --git a/src/e-util/e-markdown-utils.c b/src/e-util/e-markdown-utils.c
index ccfc6acb2a..e536810323 100644
--- a/src/e-util/e-markdown-utils.c
+++ b/src/e-util/e-markdown-utils.c
@@ -571,9 +571,9 @@ e_markdown_utils_html_to_text (const gchar *html,
        sax.warning = markdown_utils_sax_warning_cb;
        sax.error = markdown_utils_sax_error_cb;
 
-       ctxt = htmlCreatePushParserCtxt (&sax, &data, html ? html : "", length, "", XML_CHAR_ENCODING_UTF8);
-
-       htmlParseChunk (ctxt, "", 0, 1);
+       ctxt = htmlCreatePushParserCtxt (&sax, &data, "", 0, "", XML_CHAR_ENCODING_UTF8);
+       htmlCtxtUseOptions (ctxt, HTML_PARSE_RECOVER | HTML_PARSE_NONET | HTML_PARSE_IGNORE_ENC);
+       htmlParseChunk (ctxt, html ? html : "", length, 1);
 
        /* The libxml doesn't read elements after </html>, but the quirks can be stored after them,
           thus retry after that element end, if it exists. */
@@ -585,8 +585,9 @@ e_markdown_utils_html_to_text (const gchar *html,
 
                        data.composer_quirks.reading_html_end = TRUE;
 
-                       ctxt2 = htmlCreatePushParserCtxt (&sax, &data, (const gchar *) ctxt->input->cur, 
html_end_length, "", XML_CHAR_ENCODING_UTF8);
-                       htmlParseChunk (ctxt2, "", 0, 1);
+                       ctxt2 = htmlCreatePushParserCtxt (&sax, &data, "", 0, "", XML_CHAR_ENCODING_UTF8);
+                       htmlCtxtUseOptions (ctxt2, HTML_PARSE_RECOVER | HTML_PARSE_NONET | 
HTML_PARSE_IGNORE_ENC);
+                       htmlParseChunk (ctxt2, (const gchar *) ctxt->input->cur, html_end_length, 1);
                        htmlFreeParserCtxt (ctxt2);
                }
        }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]