[gnumeric] cvs: handle invalid UTF-8 by truncation.



commit eb2054a934570e74ef1e29274d31810896c62b46
Author: Morten Welinder <terra gnome org>
Date:   Sun Jun 28 13:17:43 2015 -0400

    cvs: handle invalid UTF-8 by truncation.

 ChangeLog |    5 +++++
 NEWS      |    1 +
 src/stf.c |    6 ++++++
 3 files changed, 12 insertions(+), 0 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 21e6537..77ca527 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-06-28  Morten Welinder  <terra gnome org>
+
+       * src/stf.c (clear_stray_NULs): Also truncate invalid UTF-8.
+       Fixes #751577.
+
 2015-06-26  Morten Welinder  <terra gnome org>
 
        * src/value.c (value_new_array_empty): Store value_new_empty(),
diff --git a/NEWS b/NEWS
index 4db938d..099f247 100644
--- a/NEWS
+++ b/NEWS
@@ -39,6 +39,7 @@ Morten:
        * Fix named expression problem.  [#751056]
        * Fix xlsx namespace problem.  [#751120]
        * Protect database functions against malformed database.  [#751392]
+       * Fix csv problem with invalid UTF-8 data.  [#751577]
 
 --------------------------------------------------------------------------
 Gnumeric 1.12.22
diff --git a/src/stf.c b/src/stf.c
index 5c58c12..3e9ba52 100644
--- a/src/stf.c
+++ b/src/stf.c
@@ -383,6 +383,7 @@ clear_stray_NULs (GOIOContext *context, GString *utf8data)
 {
        char *cpointer, *endpointer;
        int null_chars = 0;
+       char const *valid_end;
 
        cpointer = utf8data->str;
        endpointer = utf8data->str + utf8data->len;
@@ -406,6 +407,11 @@ clear_stray_NULs (GOIOContext *context, GString *utf8data)
                stf_warning (context, msg);
                g_free (msg);
        }
+
+       if (!g_utf8_validate (utf8data->str, utf8data->len, &valid_end)) {
+               g_string_truncate (utf8data, valid_end - utf8data->str);
+               stf_warning (context, _("The file contains invalid UTF-8 encoded characters and has been 
truncated"));
+       }
 }
 
 /*


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]