[gnumeric] Set LABEL encoding based on FONT charset. [part of #304007]
- From: Andreas J. Guelzow <guelzow src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] Set LABEL encoding based on FONT charset. [part of #304007]
- Date: Thu, 11 Aug 2011 22:40:27 +0000 (UTC)
commit e0b43d3c9c4f594de9f4ee57760db06465bd863b
Author: Valek Filippov <frob gnome org>
Date: Thu Aug 11 16:39:18 2011 -0600
Set LABEL encoding based on FONT charset. [part of #304007]
2011-08-11 Valek Filippov <frob gnome org>
* plugins/excel/ms-excel-read.c (excel_read_FONT): store charset in ExcelFont
(excel_read_LABEL): pass charset to excel_get_text_fixme
(excel_get_text_fixme): pass charset to excel_get_text
(excel_get_chars): set str_iconv based on charset and change all callers
(plugins/excel/ms-excel-read.h): add charset to ExcelFont
NEWS | 5 ++-
plugins/excel/ChangeLog | 8 +++
plugins/excel/ms-excel-read.c | 104 +++++++++++++++++++++++++++++----------
plugins/excel/ms-excel-read.h | 5 +-
plugins/excel/ms-formula-read.c | 4 +-
plugins/excel/ms-obj.c | 10 ++--
plugins/excel/xls-read-pivot.c | 4 +-
7 files changed, 101 insertions(+), 39 deletions(-)
---
diff --git a/NEWS b/NEWS
index 6612e3c..19d4a48 100644
--- a/NEWS
+++ b/NEWS
@@ -5,12 +5,15 @@ Andreas:
[#584380][#651561]
* Clarify the distinction beween open/save and import/export.
* Fix enabling of modify comment or hyperlink. [#655877]
- * Fully read sceintific format from ODF.
+ * Fully read scientific format from ODF.
Jean:
* Make things build against gtk+-3.0.
* Make Ctrl-PgUp and Ctrl-PgDn work on chart-only sheets. [#645673]
+Valek:
+ * Set LABEL encoding based on FONT charset. [part of #304007]
+
--------------------------------------------------------------------------
Gnumeric 1.10.17
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index a05d47b..04d3c5e 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,3 +1,11 @@
+2011-08-11 Valek Filippov <frob gnome org>
+
+ * plugins/excel/ms-excel-read.c (excel_read_FONT): store charset in ExcelFont
+ (excel_read_LABEL): pass charset to excel_get_text_fixme
+ (excel_get_text_fixme): pass charset to excel_get_text
+ (excel_get_chars): set str_iconv based on charset and change all callers
+ (plugins/excel/ms-excel-read.h): add charset to ExcelFont
+
2011-08-01 Andreas J. Guelzow <aguelzow pyrshep ca>
* plugins/excel/plugin.xml.in: use TM symbol
diff --git a/plugins/excel/ms-excel-read.c b/plugins/excel/ms-excel-read.c
index 6423fca..9023199 100644
--- a/plugins/excel/ms-excel-read.c
+++ b/plugins/excel/ms-excel-read.c
@@ -1014,10 +1014,11 @@ excel_read_string_header (guint8 const *data, guint32 maxlen,
char *
excel_get_chars (GnmXLImporter const *importer,
- guint8 const *ptr, size_t length, gboolean use_utf16)
+ guint8 const *ptr, size_t length, gboolean use_utf16, guint8 const *charset)
{
char* ans;
size_t i;
+ GIConv str_iconv = importer->str_iconv;
if (use_utf16) {
gunichar2 *uni_text = g_alloca (sizeof (gunichar2)*length);
@@ -1031,7 +1032,49 @@ excel_get_chars (GnmXLImporter const *importer,
char *ptr2 = (char *)ptr;
ans = outbuf;
- g_iconv (importer->str_iconv,
+ if (NULL != charset) {
+ switch (*charset) {
+ case 0:
+ case 1:
+ case 255:
+ str_iconv = gsf_msole_iconv_open_for_import (1252);
+ break; /* ANSI Latin, System Default, OEM Latin I */
+ case 77: str_iconv = gsf_msole_iconv_open_for_import (10000);
+ break; /* Apple */
+ case 128: str_iconv = gsf_msole_iconv_open_for_import (932);
+ break; /* Japanese Shift-JIS */
+ case 129: str_iconv = gsf_msole_iconv_open_for_import (949);
+ break; /* Korean Hangul */
+ case 130: str_iconv = gsf_msole_iconv_open_for_import (1361);
+ break; /* Korean Johab */
+ case 134: str_iconv = gsf_msole_iconv_open_for_import (936);
+ break; /* Chinese Simplified */
+ case 136: str_iconv = gsf_msole_iconv_open_for_import (950);
+ break; /* Chinese Traditional */
+ case 161: str_iconv = gsf_msole_iconv_open_for_import (1253);
+ break; /* Greek */
+ case 162: str_iconv = gsf_msole_iconv_open_for_import (1254);
+ break; /* Turkish */
+ case 163: str_iconv = gsf_msole_iconv_open_for_import (1258);
+ break; /* Vietnamese */
+ case 177: str_iconv = gsf_msole_iconv_open_for_import (1255);
+ break; /* Hebrew */
+ case 178: str_iconv = gsf_msole_iconv_open_for_import (1256);
+ break; /* Arabic */
+ case 186: str_iconv = gsf_msole_iconv_open_for_import (1257);
+ break; /* Baltic */
+ case 204: str_iconv = gsf_msole_iconv_open_for_import (1251);
+ break; /* Russian */
+ case 222: str_iconv = gsf_msole_iconv_open_for_import (874);
+ break; /* Thai */
+ case 238: str_iconv = gsf_msole_iconv_open_for_import (1250);
+ break; /* Central European */
+ default:
+ g_printerr ("Unknown charset %#x\n", (int) *charset);
+ break;
+ }
+ }
+ g_iconv (str_iconv,
&ptr2, &length, &outbuf, &outbytes);
i = outbuf - ans;
@@ -1044,7 +1087,7 @@ excel_get_chars (GnmXLImporter const *importer,
char *
excel_get_text (GnmXLImporter const *importer,
guint8 const *pos, guint32 length,
- guint32 *byte_length, guint32 maxlen)
+ guint32 *byte_length, guint8 const *charset, guint32 maxlen)
{
char *ans;
guint8 const *ptr;
@@ -1084,7 +1127,7 @@ excel_get_text (GnmXLImporter const *importer,
} else
*byte_length += str_len_bytes;
- ans = excel_get_chars (importer, ptr, length, use_utf16);
+ ans = excel_get_chars (importer, ptr, length, use_utf16, charset);
d (4, {
g_printerr ("String len %d, byte length %d: %s %s %s:\n",
@@ -1109,9 +1152,9 @@ excel_get_text (GnmXLImporter const *importer,
**/
static char *
excel_get_text_fixme (GnmXLImporter const *importer,
- guint8 const *pos, guint32 length, guint32 *byte_length)
+ guint8 const *pos, guint32 length, guint32 *byte_length, guint8 const *charset)
{
- return excel_get_text (importer, pos, length, byte_length,
+ return excel_get_text (importer, pos, length, byte_length, charset,
G_MAXUINT);
}
@@ -1122,7 +1165,7 @@ excel_biff_text (GnmXLImporter const *importer,
XL_CHECK_CONDITION_VAL (q->length >= ofs, NULL);
return excel_get_text (importer, q->data + ofs, length,
- NULL, q->length - ofs);
+ NULL, NULL, q->length - ofs);
}
char *
@@ -1137,7 +1180,7 @@ excel_biff_text_1 (GnmXLImporter const *importer,
ofs++;
return excel_get_text (importer, q->data + ofs, length,
- NULL, q->length - ofs);
+ NULL, NULL, q->length - ofs);
}
char *
@@ -1152,7 +1195,7 @@ excel_biff_text_2 (GnmXLImporter const *importer,
ofs += 2;
return excel_get_text (importer, q->data + ofs, length,
- NULL, q->length - ofs);
+ NULL, NULL, q->length - ofs);
}
typedef struct {
@@ -1271,7 +1314,7 @@ sst_read_string (BiffQuery *q, MSContainer const *c,
XL_CHECK_CONDITION_VAL (get_len >= 0, 0);
str = excel_get_chars (c->importer,
- q->data + offset, get_len, use_utf16);
+ q->data + offset, get_len, use_utf16, NULL);
offset += get_len * (use_utf16 ? 2 : 1);
if (res_str != NULL) {
@@ -1654,6 +1697,9 @@ excel_read_FONT (BiffQuery *q, GnmXLImporter *importer)
break;
}
fd->fontname = excel_biff_text_1 (importer, q, 14);
+
+ fd->charset = GSF_LE_GET_GUINT8 (q->data + 12);
+
}
fd->color_idx &= 0x7f; /* Undocumented but a good idea */
@@ -3501,7 +3547,7 @@ excel_read_name_str (GnmXLImporter *importer,
builtin = excel_builtin_name (str);
str += use_utf16 ? 2 : 1;
if (--(*name_len)) {
- char *tmp = excel_get_chars (importer, str, *name_len, use_utf16);
+ char *tmp = excel_get_chars (importer, str, *name_len, use_utf16, NULL);
name = g_strconcat (builtin, tmp, NULL);
g_free (tmp);
*name_len = (use_utf16 ? 2 : 1) * (*name_len);
@@ -3509,7 +3555,7 @@ excel_read_name_str (GnmXLImporter *importer,
name = g_strdup (builtin);
*name_len += str - data;
} else /* converts char len to byte len, and handles header */
- name = excel_get_text_fixme (importer, data, *name_len, name_len);
+ name = excel_get_text_fixme (importer, data, *name_len, name_len, NULL);
return name;
}
@@ -3756,13 +3802,13 @@ excel_read_NAME (BiffQuery *q, GnmXLImporter *importer, ExcelReadSheet *esheet)
char *help_txt;
char *status_txt;
- menu_txt = excel_get_text_fixme (importer, data, menu_txt_len, NULL);
+ menu_txt = excel_get_text_fixme (importer, data, menu_txt_len, NULL, NULL);
data += menu_txt_len;
- descr_txt = excel_get_text_fixme (importer, data, descr_txt_len, NULL);
+ descr_txt = excel_get_text_fixme (importer, data, descr_txt_len, NULL, NULL);
data += descr_txt_len;
- help_txt = excel_get_text_fixme (importer, data, help_txt_len, NULL);
+ help_txt = excel_get_text_fixme (importer, data, help_txt_len, NULL, NULL);
data += help_txt_len;
- status_txt = excel_get_text_fixme (importer, data, status_txt_len, NULL);
+ status_txt = excel_get_text_fixme (importer, data, status_txt_len, NULL, NULL);
g_printerr ("Name record: '%s', '%s', '%s', '%s', '%s'\n",
nexpr ? expr_name_name (nexpr) : "(null)",
@@ -3853,7 +3899,7 @@ excel_read_XCT (BiffQuery *q, GnmXLImporter *importer)
XL_NEED_BYTES (1);
len = *data++;
v = value_new_string_nocopy (
- excel_get_text_fixme (importer, data, len, NULL));
+ excel_get_text_fixme (importer, data, len, NULL, NULL));
data += len;
break;
@@ -5271,22 +5317,22 @@ excel_read_DV (BiffQuery *q, ExcelReadSheet *esheet)
XL_CHECK_CONDITION (data+3 <= end);
input_title = excel_get_text_fixme (esheet->container.importer, data + 2,
- GSF_LE_GET_GUINT16 (data), &len);
+ GSF_LE_GET_GUINT16 (data), &len, NULL);
data += len + 2;
XL_CHECK_CONDITION (data+3 <= end);
error_title = excel_get_text_fixme (esheet->container.importer, data + 2,
- GSF_LE_GET_GUINT16 (data), &len);
+ GSF_LE_GET_GUINT16 (data), &len, NULL);
data += len + 2;
XL_CHECK_CONDITION (data+3 <= end);
input_msg = excel_get_text_fixme (esheet->container.importer, data + 2,
- GSF_LE_GET_GUINT16 (data), &len);
+ GSF_LE_GET_GUINT16 (data), &len, NULL);
data += len + 2;
XL_CHECK_CONDITION (data+3 <= end);
error_msg = excel_get_text_fixme (esheet->container.importer, data + 2,
- GSF_LE_GET_GUINT16 (data), &len);
+ GSF_LE_GET_GUINT16 (data), &len, NULL);
data += len + 2;
d (1, {
@@ -5763,12 +5809,12 @@ excel_read_AUTOFILTER (BiffQuery *q, ExcelReadSheet *esheet)
data = q->data + 24;
if (len0 > 0) {
v0 = value_new_string_nocopy (
- excel_get_text_fixme (esheet->container.importer, data, len0, NULL));
+ excel_get_text_fixme (esheet->container.importer, data, len0, NULL, NULL));
data += len0;
}
if (len1 > 0)
v1 = value_new_string_nocopy (
- excel_get_text_fixme (esheet->container.importer, data, len1, NULL));
+ excel_get_text_fixme (esheet->container.importer, data, len1, NULL, NULL));
if (op1 == GNM_FILTER_UNUSED) {
cond = gnm_filter_condition_new_single (op0, v0);
@@ -6051,6 +6097,8 @@ excel_read_LABEL (BiffQuery *q, ExcelReadSheet *esheet, gboolean has_markup)
GnmValue *v;
guint in_len, str_len;
gchar *txt;
+ BiffXFData const *xf;
+ ExcelFont const *fd;
GnmCell *cell = excel_cell_fetch (q, esheet);
if (!cell)
@@ -6062,14 +6110,16 @@ excel_read_LABEL (BiffQuery *q, ExcelReadSheet *esheet, gboolean has_markup)
: GSF_LE_GET_GUINT16 (q->data + 6);
XL_CHECK_CONDITION (q->length - 8 >= in_len);
+ xf = excel_set_xf (esheet, q);
+ fd = excel_font_get (esheet->container.importer, xf->font_idx);
+
txt = excel_get_text_fixme (esheet->container.importer, q->data + 8,
- in_len, &str_len);
+ in_len, &str_len, &fd->charset);
d (0, g_printerr ("%s in %s;\n",
has_markup ? "formatted string" : "string",
cell_name (cell)););
- excel_set_xf (esheet, q);
if (txt != NULL) {
GOFormat *fmt = NULL;
if (has_markup)
@@ -6644,7 +6694,7 @@ excel_read_SUPBOOK (BiffQuery *q, GnmXLImporter *importer)
XL_CHECK_CONDITION (q->length >= 5);
bookname = excel_get_text (importer, q->data + 4, len,
- &byte_length, q->length - 4);
+ &byte_length, NULL, q->length - 4);
d (2, g_printerr ("\trefers to %s\n", bookname););
/*
* Bookname can be
@@ -6672,7 +6722,7 @@ excel_read_SUPBOOK (BiffQuery *q, GnmXLImporter *importer)
length = GSF_LE_GET_GUINT16 (q->data + ofs);
ofs += 2;
name = excel_get_text (importer, q->data + ofs, length,
- &byte_length, q->length - ofs);
+ &byte_length, NULL, q->length - ofs);
d (2, g_printerr ("\tSheet %d -> %s\n", t, name););
g_free (name);
diff --git a/plugins/excel/ms-excel-read.h b/plugins/excel/ms-excel-read.h
index c19b1fb..e0fae73 100644
--- a/plugins/excel/ms-excel-read.h
+++ b/plugins/excel/ms-excel-read.h
@@ -81,6 +81,7 @@ typedef struct {
int struck_out; /* boolean : strikethrough */
int color_idx;
int boldness; /* 100->1000 dec, normal = 0x190, bold = 0x2bc */
+ guint8 charset;
GOFontScript script;
MsBiffFontUnderline underline;
char *fontname;
@@ -148,10 +149,10 @@ void ms_biff_bof_data_destroy (MsBiffBofData * data);
char *excel_get_chars (GnmXLImporter const *imp,
guint8 const *ptr, size_t length,
- gboolean use_utf16);
+ gboolean use_utf16, guint8 const *charset);
char * excel_get_text (GnmXLImporter const *imp,
guint8 const *pos, guint32 length,
- guint32 *byte_length, guint32 maxlen);
+ guint32 *byte_length, guint8 const *charset, guint32 maxlen);
char *excel_biff_text_1 (GnmXLImporter const *imp, BiffQuery const *q, guint32 ofs);
char *excel_biff_text_2 (GnmXLImporter const *imp, BiffQuery const *q, guint32 ofs);
diff --git a/plugins/excel/ms-formula-read.c b/plugins/excel/ms-formula-read.c
index e6fb394..57d8c08 100644
--- a/plugins/excel/ms-formula-read.c
+++ b/plugins/excel/ms-formula-read.c
@@ -1173,7 +1173,7 @@ excel_parse_formula1 (MSContainer const *container,
char_len = GSF_LE_GET_GUINT8 (cur);
str = excel_get_text (container->importer, cur+1,
- char_len, &byte_len,
+ char_len, &byte_len, NULL,
len_left - 1);
ptg_length = 1 + byte_len;
@@ -1351,7 +1351,7 @@ excel_parse_formula1 (MSContainer const *container,
}
str = excel_get_text
(container->importer, array_data,
- chars, &len,
+ chars, &len, NULL,
array_length - (array_data - array_data0));
array_data += len;
diff --git a/plugins/excel/ms-obj.c b/plugins/excel/ms-obj.c
index 161a52b..77e3434 100644
--- a/plugins/excel/ms-obj.c
+++ b/plugins/excel/ms-obj.c
@@ -439,7 +439,7 @@ ms_read_TXO (BiffQuery *q, MSContainer *c, PangoAttrList **markup)
use_utf16 = q->data[0] != 0;
maxlen = use_utf16 ? q->length / 2 : q->length-1;
text = excel_get_chars (c->importer,
- q->data + 1, MIN (text_len, maxlen), use_utf16);
+ q->data + 1, MIN (text_len, maxlen), use_utf16, NULL);
g_string_append (accum, text);
g_free (text);
if (text_len <= maxlen)
@@ -549,7 +549,7 @@ read_pre_biff8_read_text (BiffQuery *q, MSContainer *c, MSObj *obj,
remaining -= txo_len;
}
- str = excel_get_chars (c->importer, first, MIN (remaining, len), FALSE);
+ str = excel_get_chars (c->importer, first, MIN (remaining, len), FALSE, NULL);
if (len > remaining) {
GString *accum = g_string_new (str);
g_free (str);
@@ -557,7 +557,7 @@ read_pre_biff8_read_text (BiffQuery *q, MSContainer *c, MSObj *obj,
while (ms_biff_query_peek_next (q, &op) && op == BIFF_CONTINUE) {
ms_biff_query_next (q);
str = excel_get_chars (c->importer, q->data,
- MIN (q->length, len), FALSE);
+ MIN (q->length, len), FALSE, NULL);
g_string_append (accum, str);
g_free (str);
if (len < q->length)
@@ -642,7 +642,7 @@ read_pre_biff8_read_name_and_fmla (BiffQuery *q, MSContainer *c, MSObj *obj,
g_return_val_if_fail (data + len <= last, NULL);
- str = excel_get_chars (c->importer, data, len, FALSE);
+ str = excel_get_chars (c->importer, data, len, FALSE, NULL);
data += len;
if (((data - q->data) & 1))
data++; /* pad to word bound */
@@ -915,7 +915,7 @@ ms_obj_map_forms_obj (MSObj *obj, MSContainer *c,
return;
type = excel_get_text (c->importer, data + 16,
GSF_LE_GET_GUINT16 (data + 14),
- &len, last - data);
+ &len, NULL, last - data);
if (NULL == type || strncmp (type, "Forms.", 6)) {
g_free (type);
return;
diff --git a/plugins/excel/xls-read-pivot.c b/plugins/excel/xls-read-pivot.c
index acb2447..51ae32e 100644
--- a/plugins/excel/xls-read-pivot.c
+++ b/plugins/excel/xls-read-pivot.c
@@ -747,10 +747,10 @@ xls_read_SXVIEW (BiffQuery *q, ExcelReadSheet *esheet)
name = go_string_new_nocopy (
excel_get_text (imp, q->data + 44, name_len,
- &len, q->length - 44));
+ &len, NULL, q->length - 44));
data_field_name = go_string_new_nocopy (
excel_get_text (imp, q->data + 44 + len, data_field_name_len,
- &len, q->length - 44 - len));
+ &len, NULL, q->length - 44 - len));
d(0, fprintf (stderr, "Slicer in : %s named '%s';\n",
range_as_string (&range), name ? name->str : "<UNDEFINED>"););
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]