[gnumeric] xlsx: make shared strings parsing more robust.



commit cfd82752e5d5011ebd9de1caa333a71ba04f2ef0
Author: Morten Welinder <terra gnome org>
Date:   Tue Apr 26 08:06:53 2016 -0400

    xlsx: make shared strings parsing more robust.
    
    The spec isn't clear on the precise format of references to the shared strings
    table.  Until now we have only seen integers, but #765544 shows a sample with
    extra spaces.
    
    Extra spaces are silly.  The point of the shared strings table is to reduce
    file size and extra spaces work against that.  Nevertheless, it shouldn't
    hurt to support that.
    
    The file probably was not created by Excel.

 NEWS                       |    1 +
 plugins/excel/ChangeLog    |    6 ++++++
 plugins/excel/xlsx-read.c  |    2 +-
 plugins/excel/xlsx-utils.c |   15 +++++++++++++++
 plugins/excel/xlsx-utils.h |    4 ++++
 5 files changed, 27 insertions(+), 1 deletions(-)
---
diff --git a/NEWS b/NEWS
index f907194..520dd53 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@ Morten:
        * Fix rare RANDBETWEEN problem.
        * Test suite improvements.
        * Fix value-area problem.  [#765438]
+       * Make xlsx shared strings parsing more robust.  [#765544]
 
 --------------------------------------------------------------------------
 Gnumeric 1.12.28
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 8f9d73d..fb46fb4 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,3 +1,9 @@
+2016-04-26  Morten Welinder  <terra gnome org>
+
+       * xlsx-read.c (xlsx_cell_val_end): Be slightly more relaxed about
+       the syntax of references to the shared strings table.  Fixes
+       #765544.
+
 2016-03-22  Morten Welinder <terra gnome org>
 
        * Release 1.12.28
diff --git a/plugins/excel/xlsx-read.c b/plugins/excel/xlsx-read.c
index 705542c..a0395f8 100644
--- a/plugins/excel/xlsx-read.c
+++ b/plugins/excel/xlsx-read.c
@@ -1288,7 +1288,7 @@ xlsx_cell_val_end (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
                        state->val = value_new_float (gnm_strto (xin->content->str, &end));
                break;
        case XLXS_TYPE_SST_STR :
-               i = strtol (xin->content->str, &end, 10);
+               i = xlsx_relaxed_strtol (xin->content->str, &end, 10);
                if (end != xin->content->str && *end == '\0' &&
                    0 <= i  && i < (int)state->sst->len) {
                        entry = &g_array_index (state->sst, XLSXStr, i);
diff --git a/plugins/excel/xlsx-utils.c b/plugins/excel/xlsx-utils.c
index 8161726..58ed4d6 100644
--- a/plugins/excel/xlsx-utils.c
+++ b/plugins/excel/xlsx-utils.c
@@ -746,3 +746,18 @@ XLSXGradientInfo xlsx_gradient_info[GO_GRADIENT_MAX] = {
 };
 
 /*****************************************************************************/
+// Like strtol, but a little more relaxed.  For now that means...
+// * Consumes spaces at the end
+
+long
+xlsx_relaxed_strtol (const char *s, char **endp, int base)
+{
+       char *end;
+       long res = strtol (s, &end, base);
+       if (endp) {
+               while (s != end && g_ascii_isspace (*end))
+                       end++;
+               *endp = end;
+       }
+       return res;
+}
diff --git a/plugins/excel/xlsx-utils.h b/plugins/excel/xlsx-utils.h
index f53c692..b975f5f 100644
--- a/plugins/excel/xlsx-utils.h
+++ b/plugins/excel/xlsx-utils.h
@@ -95,4 +95,8 @@ extern XLSXGradientInfo xlsx_gradient_info[GO_GRADIENT_MAX];
 
 /*****************************************************************************/
 
+long xlsx_relaxed_strtol (const char *s, char **endp, int base);
+
+/*****************************************************************************/
+
 #endif /* GNM_XLSX_UTILS_H */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]