[gnumeric] xls: fix string writing
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] xls: fix string writing
- Date: Sat, 22 Feb 2014 05:14:05 +0000 (UTC)
commit 14a7f6913a5aec33ce25d7b94afa15a2e629cfcb
Author: Morten Welinder <terra gnome org>
Date: Sat Feb 22 00:13:17 2014 -0500
xls: fix string writing
NEWS | 1 +
plugins/excel/ms-biff.c | 157 +++++++++--------------------
plugins/excel/ms-biff.h | 21 ++--
plugins/excel/ms-excel-read.c | 5 -
plugins/excel/ms-excel-write.c | 207 ++++++++++++++------------------------
plugins/excel/ms-formula-write.c | 4 +-
6 files changed, 138 insertions(+), 257 deletions(-)
---
diff --git a/NEWS b/NEWS
index cfd2ae2..c3d8f9e 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,7 @@ Morten:
* Export headers and footers to xlsx. [Part of #724516]
* Fix xls export of long strings in formulas.
* Fix xls import of cells with long string results.
+ * Fix xls export of long strings in various places.
--------------------------------------------------------------------------
Gnumeric 1.12.11
diff --git a/plugins/excel/ms-biff.c b/plugins/excel/ms-biff.c
index 7f60269..5d30740 100644
--- a/plugins/excel/ms-biff.c
+++ b/plugins/excel/ms-biff.c
@@ -79,9 +79,9 @@ ms_biff_query_bound_check (BiffQuery *q, guint32 offset, unsigned len)
return offset;
}
-/*******************************************************************************/
-/* Read Side */
-/*******************************************************************************/
+/*****************************************************************************/
+/* Read Side */
+/*****************************************************************************/
/**
* ms_biff_password_hash and ms_biff_crypt_seq
@@ -564,21 +564,20 @@ ms_biff_query_destroy (BiffQuery *q)
}
}
-/*******************************************************************************/
-/* Write Side */
-/*******************************************************************************/
+/*****************************************************************************/
+/* Write Side */
+/*****************************************************************************/
#define MAX_BIFF7_RECORD_SIZE 0x820
#define MAX_BIFF8_RECORD_SIZE 0x2020
/**
* ms_biff_put_new :
- * @output: the output storage
- * @version:
- * @codepage: Ignored if negative
+ * @output: (transfer full): the output storage
+ * @version: file format version
+ * @codepage: Codepage to use for strings. Only used pre-BIff8 and ignored
+ * unless positive and for
*
- * Take responsibility for @output
- * and prepare to generate biff records.
**/
BiffPut *
ms_biff_put_new (GsfOutput *output, MsBiffVersion version, int codepage)
@@ -590,15 +589,13 @@ ms_biff_put_new (GsfOutput *output, MsBiffVersion version, int codepage)
bp = g_new (BiffPut, 1);
bp->opcode = 0;
- bp->length = 0;
- bp->length = 0;
bp->streamPos = gsf_output_tell (output);
- bp->data_malloced = FALSE;
- bp->data = NULL;
bp->len_fixed = -1;
bp->output = output;
bp->version = version;
+ bp->record = g_string_new (NULL);
+
bp->buf_len = 2048; /* maximum size for a biff7 record */
bp->buf = g_malloc (bp->buf_len);
@@ -607,7 +604,8 @@ ms_biff_put_new (GsfOutput *output, MsBiffVersion version, int codepage)
bp->codepage = 1200;
} else {
bp->codepage = (codepage > 0)
- ? codepage : gsf_msole_iconv_win_codepage ();
+ ? codepage
+ : gsf_msole_iconv_win_codepage ();
bp->convert = gsf_msole_iconv_open_codepage_for_export (bp->codepage);
}
@@ -623,14 +621,11 @@ ms_biff_put_destroy (BiffPut *bp)
if (bp->output != NULL) {
gsf_output_close (bp->output);
g_object_unref (bp->output);
- bp->output = NULL;
}
- g_free (bp->buf);
- bp->buf = NULL;
- bp->buf_len = 0;
+ g_string_free (bp->record, TRUE);
+ g_free (bp->buf);
gsf_iconv_close (bp->convert);
- bp->convert = NULL;
g_free (bp);
}
@@ -640,34 +635,24 @@ ms_biff_put_len_next (BiffPut *bp, guint16 opcode, guint32 len)
{
g_return_val_if_fail (bp, NULL);
g_return_val_if_fail (bp->output, NULL);
- g_return_val_if_fail (bp->data == NULL, NULL);
g_return_val_if_fail (bp->len_fixed == -1, NULL);
- if (bp->version >= MS_BIFF_V8)
- XL_CHECK_CONDITION_VAL (len < MAX_BIFF8_RECORD_SIZE, NULL);
- else
- XL_CHECK_CONDITION_VAL (len < MAX_BIFF7_RECORD_SIZE, NULL);
-
#if BIFF_DEBUG > 0
g_printerr ("Biff put len 0x%x\n", opcode);
#endif
bp->len_fixed = +1;
bp->opcode = opcode;
- bp->length = len;
bp->streamPos = gsf_output_tell (bp->output);
- if (len > 0) {
- bp->data = g_new (guint8, len);
- bp->data_malloced = TRUE;
- }
- return bp->data;
+ g_string_set_size (bp->record, len);
+
+ return bp->record->str;
}
void
ms_biff_put_var_next (BiffPut *bp, guint16 opcode)
{
- guint8 data[4];
g_return_if_fail (bp != NULL);
g_return_if_fail (bp->output != NULL);
g_return_if_fail (bp->len_fixed == -1);
@@ -679,13 +664,9 @@ ms_biff_put_var_next (BiffPut *bp, guint16 opcode)
bp->len_fixed = 0;
bp->opcode = opcode;
bp->curpos = 0;
- bp->length = 0;
- bp->data = NULL;
bp->streamPos = gsf_output_tell (bp->output);
- GSF_LE_SET_GUINT16 (data, opcode);
- GSF_LE_SET_GUINT16 (data + 2,0xfaff); /* To be corrected later */
- gsf_output_write (bp->output, 4, data);
+ g_string_set_size (bp->record, 0);
}
inline unsigned
@@ -700,25 +681,18 @@ ms_biff_put_var_write (BiffPut *bp, guint8 const *data, guint32 len)
g_return_if_fail (bp != NULL);
g_return_if_fail (data != NULL);
g_return_if_fail (bp->output != NULL);
+ g_return_if_fail ((gint32)len >= 0);
- g_return_if_fail (!bp->data);
g_return_if_fail (bp->len_fixed == 0);
- /* Temporary */
- XL_CHECK_CONDITION (bp->length + len < 0xf000);
-
- if ((bp->curpos + len) > ms_biff_max_record_len (bp)) {
+ /* Make room */
+ if (bp->curpos + len > bp->record->len)
+ g_string_set_size (bp->record, bp->curpos + len);
- g_return_if_fail (bp->curpos == bp->length);
-
- ms_biff_put_commit (bp);
- ms_biff_put_var_next (bp, BIFF_CONTINUE);
- }
+ /* Copy data */
+ memcpy (bp->record->str + bp->curpos, data, len);
- gsf_output_write (bp->output, len, data);
bp->curpos += len;
- if (bp->curpos > bp->length)
- bp->length = bp->curpos;
}
void
@@ -726,80 +700,43 @@ ms_biff_put_var_seekto (BiffPut *bp, int pos)
{
g_return_if_fail (bp != NULL);
g_return_if_fail (bp->output != NULL);
-
g_return_if_fail (bp->len_fixed == 0);
- g_return_if_fail (!bp->data);
+ g_return_if_fail (pos >= 0);
bp->curpos = pos;
- gsf_output_seek (bp->output, bp->streamPos + bp->curpos + 4, G_SEEK_SET);
}
-static void
-ms_biff_put_var_commit (BiffPut *bp)
+void
+ms_biff_put_commit (BiffPut *bp)
{
- guint8 tmp [4];
- int endpos;
+ guint16 opcode;
+ size_t len, maxlen;
+ const char *data;
g_return_if_fail (bp != NULL);
g_return_if_fail (bp->output != NULL);
- g_return_if_fail (bp->len_fixed == 0);
- g_return_if_fail (!bp->data);
+ maxlen = ms_biff_max_record_len (bp);
- endpos = bp->streamPos + bp->length + 4;
- gsf_output_seek (bp->output, bp->streamPos, G_SEEK_SET);
+ opcode = bp->opcode;
+ len = bp->record->len;
+ data = bp->record->str;
+ do {
+ guint8 tmp[4];
+ size_t thislen = MIN (len, maxlen);
- GSF_LE_SET_GUINT16 (tmp, bp->opcode);
- GSF_LE_SET_GUINT16 (tmp+2, bp->length);
- gsf_output_write (bp->output, 4, tmp);
+ GSF_LE_SET_GUINT16 (tmp, opcode);
+ GSF_LE_SET_GUINT16 (tmp + 2, thislen);
+ gsf_output_write (bp->output, 4, tmp);
+ gsf_output_write (bp->output, thislen, data);
- gsf_output_seek (bp->output, endpos, G_SEEK_SET);
- bp->streamPos = endpos;
- bp->curpos = 0;
-}
+ opcode = BIFF_CONTINUE;
+ data += thislen;
+ len -= thislen;
+ } while (len > 0);
-static void
-ms_biff_put_len_commit (BiffPut *bp)
-{
- guint8 tmp[4];
-
- g_return_if_fail (bp != NULL);
- g_return_if_fail (bp->output != NULL);
- g_return_if_fail (bp->len_fixed == 1);
- g_return_if_fail (bp->length == 0 || bp->data);
- if (bp->version >= MS_BIFF_V8)
- XL_CHECK_CONDITION (bp->length < MAX_BIFF8_RECORD_SIZE);
- else
- XL_CHECK_CONDITION (bp->length < MAX_BIFF7_RECORD_SIZE);
-
-/* if (!bp->data_malloced) Unimplemented optimisation
- bp->output->lseek (bp->output, bp->length, G_SEEK_CUR);
- else */
- GSF_LE_SET_GUINT16 (tmp, bp->opcode);
- GSF_LE_SET_GUINT16 (tmp + 2, bp->length);
- gsf_output_write (bp->output, 4, tmp);
- gsf_output_write (bp->output, bp->length, bp->data);
-
- g_free (bp->data);
- bp->data = NULL;
- bp->data_malloced = FALSE;
bp->streamPos = gsf_output_tell (bp->output);
bp->curpos = 0;
-}
-
-void
-ms_biff_put_commit (BiffPut *bp)
-{
- switch (bp->len_fixed) {
- case 0:
- ms_biff_put_var_commit (bp);
- break;
- case 1:
- ms_biff_put_len_commit (bp);
- break;
- default:
- g_warning ("Spurious commit");
- }
bp->len_fixed = -1;
if (0) {
diff --git a/plugins/excel/ms-biff.h b/plugins/excel/ms-biff.h
index 0443ab1..605a508 100644
--- a/plugins/excel/ms-biff.h
+++ b/plugins/excel/ms-biff.h
@@ -29,9 +29,9 @@ typedef enum {
MS_BIFF_V8 = 8 /* Excel 97, 2000, XP, 2003 */
} MsBiffVersion;
-/*******************************************************************************/
-/* Read Side */
-/*******************************************************************************/
+/*****************************************************************************/
+/* Read Side */
+/*****************************************************************************/
/**
* Returns query data, it is imperative that copies of
@@ -69,21 +69,24 @@ void ms_biff_query_dump (BiffQuery *);
guint32 ms_biff_query_bound_check (BiffQuery *q,
guint32 offset, unsigned len);
-/*******************************************************************************/
-/* Write Side */
-/*******************************************************************************/
+/*****************************************************************************/
+/* Write Side */
+/*****************************************************************************/
typedef struct _BiffPut {
guint16 opcode;
- guint32 length; /* NB. can be extended by a continue opcode */
- guint8 *data;
gsf_off_t streamPos;
unsigned curpos; /* Curpos is offset from beginning of header */
- gboolean data_malloced;
int len_fixed;
GsfOutput *output;
MsBiffVersion version;
+ /*
+ * Records are stored here until committed at which time they may
+ * by split using BIFF_CONTINUE records.
+ */
+ GString *record;
+
/* a buffer for generating unicode */
guint8 *buf;
unsigned buf_len;
diff --git a/plugins/excel/ms-excel-read.c b/plugins/excel/ms-excel-read.c
index ba79b0d..9d94e0c 100644
--- a/plugins/excel/ms-excel-read.c
+++ b/plugins/excel/ms-excel-read.c
@@ -2923,11 +2923,6 @@ excel_read_FORMULA (BiffQuery *q, ExcelReadSheet *esheet)
XL_CHECK_CONDITION (q->length >= 22);
expr_length = GSF_LE_GET_GUINT16 (q->data + 20);
offset = 22;
- if (expr_length == 0) {
- /* Now what? This happens when we have a continuation
- record for a formula. */
- expr_length = q->length - offset;
- }
} else if (esheet_ver (esheet) >= MS_BIFF_V3) {
XL_CHECK_CONDITION (q->length >= 18);
expr_length = GSF_LE_GET_GUINT16 (q->data + 16);
diff --git a/plugins/excel/ms-excel-write.c b/plugins/excel/ms-excel-write.c
index 6c6323f..51561b8 100644
--- a/plugins/excel/ms-excel-write.c
+++ b/plugins/excel/ms-excel-write.c
@@ -284,6 +284,14 @@ excel_strlen (guint8 const *str, size_t *bytes)
return i;
}
+static guint32
+string_maxlen[4] = {
+ /* STR_ONE_BYTE_LENGTH */ G_MAXUINT8,
+ /* STR_TWO_BYTE_LENGTH */ G_MAXUINT16,
+ /* STR_FOUR_BYTE_LENGTH */ G_MAXUINT32,
+ /* STR_NO_LENGTH */ G_MAXUINT32
+};
+
/**
* excel_write_string :
* @bp:
@@ -299,9 +307,11 @@ unsigned
excel_write_string (BiffPut *bp, WriteStringFlags flags,
guint8 const *txt)
{
- size_t byte_len, out_bytes, offset = 0;
- unsigned int char_len, output_len, avail;
- char *in_bytes = (char *)txt; /* bloody strict-aliasing is broken */
+ size_t char_len, byte_len, out_bytes, len_len, max_len, items, conv_bytes;
+ gboolean need_uni_marker =
+ (bp->version >= MS_BIFF_V8 && !(flags & STR_SUPPRESS_HEADER));
+ guint8 *convdata = NULL;
+ guchar isunistr, tmp[4];
g_return_val_if_fail (txt != NULL, 0);
@@ -309,132 +319,76 @@ excel_write_string (BiffPut *bp, WriteStringFlags flags,
if (bp->version < MS_BIFF_V8)
flags |= STR_LEN_IN_BYTES;
+ len_len = ((flags & STR_LENGTH_MASK) == STR_NO_LENGTH)
+ ? 0
+ : 1 << (flags & STR_LENGTH_MASK);
+ max_len = string_maxlen[flags & STR_LENGTH_MASK];
+
char_len = excel_strlen (txt, &byte_len);
if (char_len != byte_len || (flags & STR_SUPPRESS_HEADER)) {
- char *tmp;
-
- out_bytes = char_len * 2;
-
- /* 2 in case we null terminate, and up to 4 for the length */
- if ((out_bytes + 4 + 2) > bp->buf_len) {
- bp->buf_len = (((out_bytes + 6) >> 2) + 1) << 2;
- bp->buf = g_realloc (bp->buf, bp->buf_len);
+ convdata = (gpointer)g_convert_with_iconv
+ (txt, -1,
+ bp->convert,
+ NULL, &conv_bytes,
+ NULL);
+ isunistr = 1;
+
+ /* g_convert_with_iconv terminates with 4 NULs. */
+ if (flags & STR_TRAILING_NULL)
+ conv_bytes += 2;
+
+ items = (flags & STR_LEN_IN_BYTES)
+ ? conv_bytes
+ : conv_bytes / 2;
+ if (items > max_len) {
+ g_printerr ("Truncating string of %u %s\n",
+ (guint)items,
+ (flags & STR_LEN_IN_BYTES) ? "bytes" : "characters");
+ items = max_len;
+ conv_bytes = (flags & STR_LEN_IN_BYTES)
+ ? items
+ : items * 2;
}
+ } else {
+ /* char_len == byte_len here, so just use char_len */
- if ((flags & STR_LENGTH_MASK) != STR_NO_LENGTH)
- offset = 1 << (flags & STR_LENGTH_MASK);
-
- if (bp->version >= MS_BIFF_V8 && !(flags & STR_SUPPRESS_HEADER))
- bp->buf [offset++] = '\1'; /* flag as unicode */
-
- /* who cares about the extra couple of bytes */
- out_bytes = bp->buf_len - 3;
-
- tmp = (char *)(bp->buf + offset);
- g_iconv (bp->convert, &in_bytes, &byte_len, &tmp, &out_bytes);
- out_bytes = (guint8 *)tmp - bp->buf;
+ isunistr = 0;
- if (flags & STR_TRAILING_NULL) {
- GSF_LE_SET_GUINT16 (bp->buf + out_bytes, 0);
- out_bytes += 2;
- }
- if (flags & STR_LEN_IN_BYTES)
- output_len = out_bytes - offset;
- else {
- if (byte_len > 0)
- output_len = g_utf8_pointer_to_offset (txt, in_bytes);
- else
- output_len = char_len;
+ if (char_len > max_len) {
+ g_printerr ("Truncating string of %u %s\n",
+ (guint)char_len,
+ (flags & STR_LEN_IN_BYTES) ? "bytes" : "characters");
+ char_len = max_len;
}
- switch (flags & STR_LENGTH_MASK) {
- default:
- case STR_NO_LENGTH:
- if (byte_len != 0)
- g_warning (_("This is somewhat corrupt.\n"
- "We already wrote a length for a string that is being truncated
due to encoding problems."));
- break;
- case STR_ONE_BYTE_LENGTH:
- if (output_len > 255) {
- g_printerr ("Truncating string of %u %s\n",
- output_len,
- (flags & STR_LEN_IN_BYTES) ? "bytes" : "characters");
- output_len = 255;
- }
- GSF_LE_SET_GUINT8 (bp->buf, output_len);
- break;
- case STR_TWO_BYTE_LENGTH:
- if (output_len > 65535) {
- g_printerr ("Truncating string of %u %s\n",
- output_len,
- (flags & STR_LEN_IN_BYTES) ? "bytes" : "characters");
- output_len = 65535;
- }
- GSF_LE_SET_GUINT16 (bp->buf, output_len);
- break;
- case STR_FOUR_BYTE_LENGTH:
- GSF_LE_SET_GUINT32 (bp->buf, output_len);
- break;
- }
+ conv_bytes = items = char_len;
+ }
- output_len = out_bytes;
- tmp = bp->buf;
- do {
- avail = ms_biff_max_record_len (bp);
- if (offset == 0 && bp->version >= MS_BIFF_V8 && !(flags & STR_SUPPRESS_HEADER)) {
- ms_biff_put_var_write (bp, "\1", 1);
- avail -= 2;
- out_bytes++;
- }
- avail = MIN (avail, output_len);
- if (avail != output_len)
- avail = (avail - offset) / 2 * 2 + offset; /* we need to export an even byte
number */
- ms_biff_put_var_write (bp, tmp, avail);
- output_len -= avail;
- tmp += avail;
- offset = 0;
- if (output_len > 0) {
- ms_biff_put_commit (bp);
- ms_biff_put_var_next (bp, BIFF_CONTINUE);
- }
- } while (output_len > 0);
- } else {
- guint8 *tmp;
- /* char_len == byte_len here, so just use char_len */
- tmp = bp->buf;
- switch (flags & STR_LENGTH_MASK) {
- default:
- case STR_NO_LENGTH: break;
- case STR_ONE_BYTE_LENGTH:
- *tmp++ = (char_len > 255) ? 255 : char_len;
- break;
- case STR_TWO_BYTE_LENGTH:
- GSF_LE_SET_GUINT16 (tmp, char_len);
- tmp += 2;
- break;
- case STR_FOUR_BYTE_LENGTH:
- GSF_LE_SET_GUINT32 (tmp, char_len);
- tmp += 4;
- break;
- }
- out_bytes = tmp - bp->buf;
- ms_biff_put_var_write (bp, bp->buf, out_bytes);
- avail = ms_biff_max_record_len (bp) - out_bytes;
- do {
- if (bp->version >= MS_BIFF_V8 && !(flags & STR_SUPPRESS_HEADER)) {
- *tmp++ = 0; /* flag as not unicode */ /* Jean: MS docs say uncompressed */
- avail--;
- out_bytes++;
- ms_biff_put_var_write (bp, "\0", 1);
- }
- avail = MIN (avail, char_len);
- ms_biff_put_var_write (bp, txt, avail);
- out_bytes += avail;
- char_len -= avail;
- txt += avail;
- avail = ms_biff_max_record_len (bp);
- } while (char_len > 0);
+ switch (flags & STR_LENGTH_MASK) {
+ default:
+ case STR_NO_LENGTH:
+ break;
+ case STR_ONE_BYTE_LENGTH:
+ GSF_LE_SET_GUINT8 (tmp, items);
+ break;
+ case STR_TWO_BYTE_LENGTH:
+ GSF_LE_SET_GUINT16 (tmp, items);
+ break;
+ case STR_FOUR_BYTE_LENGTH:
+ GSF_LE_SET_GUINT32 (tmp, items);
+ break;
}
+ ms_biff_put_var_write (bp, tmp, len_len);
+ out_bytes = len_len;
+
+ if (need_uni_marker) {
+ ms_biff_put_var_write (bp, &isunistr, 1);
+ out_bytes++;
+ }
+ ms_biff_put_var_write (bp, convdata ? convdata : txt, conv_bytes);
+ out_bytes += conv_bytes;
+
+ g_free (convdata);
return out_bytes;
}
@@ -4519,7 +4473,7 @@ excel_write_other_v8 (ExcelWriteSheet *esheet,
ms_escher_spcontainer_end (escher, spmark);
- sppos = bp->streamPos + bp->length + 4;
+ sppos = bp->streamPos + bp->curpos + 4;
splen = GSF_LE_GET_GUINT32 (escher->str + spmark + 4);
draw_len += escher->len;
@@ -5940,16 +5894,7 @@ static void
excel_write_image_bytes (BiffPut *bp, GByteArray *bytes)
{
- int chunk = ms_biff_max_record_len (bp) - bp->curpos;
- guint8 *data = bytes->data;
- gint32 len = bytes->len;
-
- while (len > 0) {
- ms_biff_put_var_write (bp, data, MIN (chunk, len));
- data += chunk;
- len -= chunk;
- chunk = ms_biff_max_record_len (bp);
- }
+ ms_biff_put_var_write (bp, bytes->data, bytes->len);
}
/*
diff --git a/plugins/excel/ms-formula-write.c b/plugins/excel/ms-formula-write.c
index 10df2d8..3ca4d7b 100644
--- a/plugins/excel/ms-formula-write.c
+++ b/plugins/excel/ms-formula-write.c
@@ -1023,9 +1023,9 @@ excel_write_array_formula (ExcelWriteState *ewb,
pd.use_name_variant = FALSE;
pd.allow_sheetless_ref = TRUE;
- start = ewb->bp->length;
+ start = ewb->bp->curpos;
write_node (&pd, array->expr, 0, XL_ROOT);
- len = ewb->bp->length - start;
+ len = ewb->bp->curpos - start;
write_arrays (&pd);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]