[gnumeric] html: improve BOM handling.
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] html: improve BOM handling.
- Date: Thu, 31 Oct 2019 23:06:27 +0000 (UTC)
commit 2105232e1935e771356a215fe11672bf3548ab2d
Author: Morten Welinder <terra gnome org>
Date: Thu Oct 31 19:06:09 2019 -0400
html: improve BOM handling.
NEWS | 3 +++
plugins/html/ChangeLog | 4 ++++
plugins/html/html_read.c | 22 +++++++++++++++++-----
3 files changed, 24 insertions(+), 5 deletions(-)
---
diff --git a/NEWS b/NEWS
index dd1c0d867..abb9ce11d 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ Morten:
* Fix SUMIF (etc) problem with blank criteria. [#423]
* Improve editing of percentages. [#413]
+Thomas Kuehne:
+ * Improve html import. [#392]
+
--------------------------------------------------------------------------
Gnumeric 1.12.45
diff --git a/plugins/html/ChangeLog b/plugins/html/ChangeLog
index fc37c6951..7cd07a17b 100644
--- a/plugins/html/ChangeLog
+++ b/plugins/html/ChangeLog
@@ -1,3 +1,7 @@
+2019-10-31 Morten Welinder <terra gnome org>
+
+ * html_read.c (html_file_open): Improve BOM handling. See #392.
+
2019-05-20 Morten Welinder <terra gnome org>
* Release 1.12.45
diff --git a/plugins/html/html_read.c b/plugins/html/html_read.c
index c4dd90039..dfd14c2fe 100644
--- a/plugins/html/html_read.c
+++ b/plugins/html/html_read.c
@@ -506,26 +506,38 @@ html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context,
buf = gsf_input_read (input, 4, NULL);
if (buf != NULL) {
enc = xmlDetectCharEncoding(buf, 4);
- switch (enc) { /* Skip byte order mark */
+ switch (enc) {
+#if LIBXML_VERSION < 20702
+ /* Skip byte order mark */
case XML_CHAR_ENCODING_UCS4BE:
case XML_CHAR_ENCODING_UCS4LE:
case XML_CHAR_ENCODING_UCS4_2143:
case XML_CHAR_ENCODING_UCS4_3412:
+ if (buf[0] == 0xFE || buf[1] == 0xFE || buf[2] == 0xFE || buf[3] == 0xFE)
+ bomlen = 4;
+ else
+ bomlen = 0;
+ break;
case XML_CHAR_ENCODING_EBCDIC:
- bomlen = 4;
+ if (buf[0] == 0xDD)
+ bomlen = 4;
+ else
+ bomlen = 0;
break;
case XML_CHAR_ENCODING_UTF16BE:
case XML_CHAR_ENCODING_UTF16LE:
- bomlen = 2;
+ if (buf[0] == 0xFE || buf[1] == 0xFE)
+ bomlen = 2;
+ else
+ bomlen = 0;
break;
case XML_CHAR_ENCODING_UTF8:
if (buf[0] == 0xef)
bomlen = 3;
- else if (buf[0] == 0x3c)
- bomlen = 4;
else
bomlen = 0;
break;
+#endif
case XML_CHAR_ENCODING_NONE:
bomlen = 0;
/* Try to detect unmarked UTF16LE
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]