[gnumeric] Provide xls file opener permitting encoding specification. [#535473]
- From: Andreas J. Guelzow <guelzow src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] Provide xls file opener permitting encoding specification. [#535473]
- Date: Sun, 14 Aug 2011 22:21:59 +0000 (UTC)
commit 40fca5a6f205029f51b539efeb26d9feb07631a7
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date: Sun Aug 14 16:20:51 2011 -0600
Provide xls file opener permitting encoding specification. [#535473]
2011-08-14 Andreas J. Guelzow <aguelzow pyrshep ca>
* plugins/excel/boot.c (excel_enc_file_open): new
(excel_file_open): use excel_enc_file_open
* plugins/excel/excel.h (excel_read_workbook): add argument
* plugins/excel/ms-excel-read.c (excel_read_FONT): for charset 0 consider
encoding override
(gnm_xl_get_codepage): new
(gnm_xl_importer_new): add argument, change caller and convert codepage override
(excel_read_workbook): add argument and change all callers
* plugins/excel/ms-excel-read.h (_GnmXLImporter): add field
* plugins/excel/plugin.xml.in: add new encoding dependent file opener
NEWS | 4 +-
plugins/excel/ChangeLog | 13 ++++
plugins/excel/boot.c | 16 ++++-
plugins/excel/excel.h | 6 +-
plugins/excel/ms-excel-read.c | 124 +++++++++++++++++++++++++++++++++++++++--
plugins/excel/ms-excel-read.h | 1 +
plugins/excel/plugin.xml.in | 8 ++-
7 files changed, 159 insertions(+), 13 deletions(-)
---
diff --git a/NEWS b/NEWS
index e76897a..ebbeb12 100644
--- a/NEWS
+++ b/NEWS
@@ -6,13 +6,15 @@ Andreas:
* Clarify the distinction beween open/save and import/export.
* Fix enabling of modify comment or hyperlink. [#655877]
* Fully read scientific format from ODF.
+ * Provide xls file opener permitting encoding specification. [#535473]
Jean:
* Make things build against gtk+-3.0.
* Make Ctrl-PgUp and Ctrl-PgDn work on chart-only sheets. [#645673]
Valek:
- * Set LABEL encoding based on FONT charset converted to codepage. [#304007, ubuntu #262777]
+ * In xls import, set LABEL encoding based on FONT charset converted to
+ codepage. [#304007, ubuntu #262777]
--------------------------------------------------------------------------
Gnumeric 1.10.17
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 41929db..06b9889 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,3 +1,16 @@
+2011-08-14 Andreas J. Guelzow <aguelzow pyrshep ca>
+
+ * plugins/excel/boot.c (excel_enc_file_open): new
+ (excel_file_open): use excel_enc_file_open
+ * plugins/excel/excel.h (excel_read_workbook): add argument
+ * plugins/excel/ms-excel-read.c (excel_read_FONT): for charset 0 consider
+ encoding override
+ (gnm_xl_get_codepage): new
+ (gnm_xl_importer_new): add argument, change caller and convert codepage override
+ (excel_read_workbook): add argument and change all callers
+ * plugins/excel/ms-excel-read.h (_GnmXLImporter): add field
+ * plugins/excel/plugin.xml.in: add new encoding dependent file opener
+
2011-08-12 Valek Filippov <frob gnome org>
* plugins/excel/ms-excel-read.c (excel_read_FONT): convert charset to codepage
diff --git a/plugins/excel/boot.c b/plugins/excel/boot.c
index be00ac3..3652b8b 100644
--- a/plugins/excel/boot.c
+++ b/plugins/excel/boot.c
@@ -67,6 +67,7 @@ gint ms_excel_object_debug = 0;
gboolean excel_file_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl);
void excel_file_open (GOFileOpener const *fo, GOIOContext *context, WorkbookView *wbv, GsfInput *input);
+void excel_enc_file_open (GOFileOpener const *fo, char const *enc, GOIOContext *context, WorkbookView *wbv, GsfInput *input);
void excel_biff7_file_save (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
void excel_biff8_file_save (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
void excel_dsf_file_save (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
@@ -147,8 +148,8 @@ cb_dump_vba (char const *name, guint8 const *src_code)
/* Service entry point */
void
-excel_file_open (GOFileOpener const *fo, GOIOContext *context,
- WorkbookView *wbv, GsfInput *input)
+excel_enc_file_open (GOFileOpener const *fo, char const *enc, GOIOContext *context,
+ WorkbookView *wbv, GsfInput *input)
{
GsfInput *stream = NULL;
GError *err = NULL;
@@ -166,7 +167,7 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
if (data && data[0] == 0x09 && (data[1] & 0xf1) == 0) {
gsf_input_seek (input, -2, G_SEEK_CUR);
excel_read_workbook (context, wbv, input,
- &is_double_stream_file);
+ &is_double_stream_file, enc);
/* NOTE : we lack a saver for the early formats */
return;
}
@@ -187,7 +188,7 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
return;
}
- excel_read_workbook (context, wbv, stream, &is_double_stream_file);
+ excel_read_workbook (context, wbv, stream, &is_double_stream_file, enc);
g_object_unref (G_OBJECT (stream));
meta_data = gsf_doc_meta_data_new ();
@@ -240,6 +241,13 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
go_file_saver_for_id ("Gnumeric_Excel:excel_biff7"));
}
+void
+excel_file_open (GOFileOpener const *fo, GOIOContext *context,
+ WorkbookView *wbv, GsfInput *input)
+{
+ excel_enc_file_open (fo, NULL, context, wbv, input);
+}
+
static void
excel_save (GOIOContext *context, WorkbookView const *wbv, GsfOutput *output,
gboolean biff7, gboolean biff8)
diff --git a/plugins/excel/excel.h b/plugins/excel/excel.h
index 56f8ce6..0edd5e6 100644
--- a/plugins/excel/excel.h
+++ b/plugins/excel/excel.h
@@ -15,8 +15,10 @@
#include "gnumeric.h"
#include "ms-biff.h"
-void excel_read_workbook (GOIOContext *context, WorkbookView *new_wb, GsfInput *input,
- gboolean *is_double_stream_file);
+void excel_read_workbook (GOIOContext *context, WorkbookView *new_wb,
+ GsfInput *input,
+ gboolean *is_double_stream_file,
+ char const *opt_enc);
typedef struct _XLSExporter ExcelWriteState;
void excel_write_state_free (ExcelWriteState *ewb);
diff --git a/plugins/excel/ms-excel-read.c b/plugins/excel/ms-excel-read.c
index a5dc9cd..19eb5a7 100644
--- a/plugins/excel/ms-excel-read.c
+++ b/plugins/excel/ms-excel-read.c
@@ -1667,6 +1667,10 @@ excel_read_FONT (BiffQuery *q, GnmXLImporter *importer)
fd->codepage = cp;
break;
}
+ if (importer->codepage_override > 0) {
+ fd->codepage = importer->codepage_override;
+ break;
+ }
}
/* no break */
case 1:
@@ -3164,8 +3168,114 @@ ms_wb_get_font_markup (MSContainer const *c, unsigned indx)
return fd->attrs;
}
+static gint
+gnm_xl_get_codepage (char const *enc)
+{
+ /* These names must match charset_trans_array in go-charmap-sel.c */
+ static struct {
+ char const *name;
+ gint codepage;
+ } charset_trans_array[] = {
+ {"IBM864", 0},
+ {"IBM864i", 0},
+ {"ISO-8859-6", 0},
+ {"ISO-8859-6-E", 0},
+ {"ISO-8859-6-I", 0},
+ {"x-mac-arabic", 0},
+ {"windows-1256", 1256},
+ {"armscii-8", 0},
+ {"ISO-8859-13", 0},
+ {"ISO-8859-4", 0},
+ {"windows-1257", 1257},
+ {"ISO-8859-14", 0},
+ {"IBM852", 0},
+ {"ISO-8859-2", 0},
+ {"x-mac-ce", 0},
+ {"windows-1250", 1250},
+ {"gb18030", 0},
+ {"GB2312", 0},
+ {"x-gbk", 0},
+ {"HZ-GB-2312", 0},
+ {"windows-936", 936},
+ {"Big5", 0},
+ {"Big5-HKSCS", 0},
+ {"x-euc-tw", 0},
+ {"x-mac-croatian", 0},
+ {"IBM855", 0},
+ {"ISO-8859-5", 0},
+ {"ISO-IR-111", 0},
+ {"KOI8-R", 0},
+ {"x-mac-cyrillic", 0},
+ {"windows-1251", 1251},
+ {"IBM866", 0},
+ {"KOI8-U", 0},
+ {"x-mac-ukrainian", 0},
+ {"ANSI_X3.4-1968#ASCII", 0},
+ {"x-mac-farsi", 0},
+ {"geostd8", 0},
+ {"ISO-8859-7", 0},
+ {"x-mac-greek", 0},
+ {"windows-1253", 0},
+ {"x-mac-gujarati", 0},
+ {"x-mac-gurmukhi", 0},
+ {"IBM862", 0},
+ {"ISO-8859-8-E", 0},
+ {"ISO-8859-8-I", 0},
+ {"x-mac-hebrew", 0},
+ {"windows-1255", 1255},
+ {"x-mac-devanagari", 0},
+ {"x-mac-icelandic", 0},
+ {"EUC-JP", 0},
+ {"ISO-2022-JP", 0},
+ {"CP932", 0},
+ {"EUC-KR", 0},
+ {"ISO-2022-KR", 0},
+ {"x-johab", 0},
+ {"x-windows-949", 0},
+ {"ISO-8859-10", 0},
+ {"x-mac-romanian", 0},
+ {"ISO-8859-16", 0},
+ {"ISO-8859-3", 0},
+ {"TIS-620", 0},
+ {"IBM857", 0},
+ {"ISO-8859-9", 0},
+ {"x-mac-turkish", 0},
+ {"windows-1254", 1254},
+ {"UTF-7", 0},
+ {"UTF-8", 0},
+ {"UTF-16BE", 0},
+ {"UTF-16LE", 0},
+ {"UTF-32BE", 0},
+ {"UTF-32LE", 0},
+ {"x-user-defined", 0},
+ {"x-viet-tcvn5712", 0},
+ {"VISCII", 0},
+ {"x-viet-vps", 0},
+ {"windows-1258", 1258},
+ {"ISO-8859-8", 0},
+ {"IBM850", 0},
+ {"ISO-8859-1", 0},
+ {"ISO-8859-15", 0},
+ {"x-mac-roman", 0},
+ {"windows-1252", 1252},
+ {"T61.8bit", 0},
+ {"x-imap4-modified-utf7", 0},
+ {"x-u-escaped", 0}
+ };
+ int i;
+
+ if (enc == NULL)
+ return 0;
+
+ for (i = 0; i < G_N_ELEMENTS(charset_trans_array); i++)
+ if (0 == strcmp (enc, charset_trans_array[i].name))
+ return charset_trans_array[i].codepage;
+
+ return 0;
+}
+
static GnmXLImporter *
-gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view)
+gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view, char const *opt_enc)
{
static MSContainerClass const vtbl = {
NULL, NULL,
@@ -3182,7 +3292,9 @@ gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view)
importer->wbv = wb_view;
importer->wb = wb_view_get_workbook (wb_view);
importer->str_iconv = (GIConv)(-1);
- gnm_xl_importer_set_codepage (importer, 1252); /* set a default */
+ importer->codepage_override = gnm_xl_get_codepage (opt_enc);
+ gnm_xl_importer_set_codepage (importer, (importer->codepage_override > 0) ?
+ importer->codepage_override : 1252); /* set a default */
importer->expr_sharer = gnm_expr_sharer_new ();
importer->v8.supbook = g_array_new (FALSE, FALSE, sizeof (ExcelSupBook));
@@ -6884,8 +6996,10 @@ excel_read_CODEPAGE (BiffQuery *q, GnmXLImporter *importer)
}
void
-excel_read_workbook (GOIOContext *context, WorkbookView *wb_view, GsfInput *input,
- gboolean *is_double_stream_file)
+excel_read_workbook (GOIOContext *context, WorkbookView *wb_view,
+ GsfInput *input,
+ gboolean *is_double_stream_file,
+ char const *opt_enc)
{
GnmXLImporter *importer;
BiffQuery *q;
@@ -6899,7 +7013,7 @@ excel_read_workbook (GOIOContext *context, WorkbookView *wb_view, GsfInput *inpu
go_io_value_progress_set (context, gsf_input_size (input), N_BYTES_BETWEEN_PROGRESS_UPDATES);
q = ms_biff_query_new (input);
- importer = gnm_xl_importer_new (context, wb_view);
+ importer = gnm_xl_importer_new (context, wb_view, opt_enc);
*is_double_stream_file = FALSE;
if (ms_biff_query_next (q) &&
diff --git a/plugins/excel/ms-excel-read.h b/plugins/excel/ms-excel-read.h
index ba90ccb..f6a27a8 100644
--- a/plugins/excel/ms-excel-read.h
+++ b/plugins/excel/ms-excel-read.h
@@ -133,6 +133,7 @@ struct _GnmXLImporter {
GnmExprSharer *expr_sharer;
GIConv str_iconv;
+ int codepage_override;
};
GnmValue *xls_value_new_err (GnmEvalPos const *pos, guint8 const err);
diff --git a/plugins/excel/plugin.xml.in b/plugins/excel/plugin.xml.in
index a7b8f9f..1dc3025 100644
--- a/plugins/excel/plugin.xml.in
+++ b/plugins/excel/plugin.xml.in
@@ -11,7 +11,7 @@
<!-- IMPORT binary Office 2 -> 2003 -->
<service type="file_opener" id="excel" priority="100" probe="TRUE">
<information>
- <_description>MS Excel (tm) (*.xls)</_description>
+ <_description>MS Excel™ (*.xls)</_description>
</information>
<suffixes>
<suffix>xls</suffix>
@@ -106,5 +106,11 @@
<_description>MS Excel™ 2010 (ECMA 376 2nd edition (2008))</_description>
</information>
</service>
+ <service type="file_opener" id="excel_enc" priority="200" probe="FALSE"
+ encoding_dependent="TRUE">
+ <information>
+ <_description>MS Excel™ (*.xls) requiring encoding specification</_description>
+ </information>
+ </service>
</services>
</plugin>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]