[gnumeric] Provide xls file opener permitting encoding specification. [#535473]



commit 40fca5a6f205029f51b539efeb26d9feb07631a7
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date:   Sun Aug 14 16:20:51 2011 -0600

    Provide xls file opener permitting encoding specification. [#535473]
    
    2011-08-14  Andreas J. Guelzow <aguelzow pyrshep ca>
    
    	* plugins/excel/boot.c (excel_enc_file_open): new
    	(excel_file_open): use excel_enc_file_open
    	* plugins/excel/excel.h (excel_read_workbook): add argument
    	* plugins/excel/ms-excel-read.c (excel_read_FONT): for charset 0 consider
    	encoding override
    	(gnm_xl_get_codepage): new
    	(gnm_xl_importer_new): add argument, change caller and convert codepage override
    	(excel_read_workbook): add argument and change all callers
    	* plugins/excel/ms-excel-read.h (_GnmXLImporter): add field
    	* plugins/excel/plugin.xml.in: add new encoding dependent file opener

 NEWS                          |    4 +-
 plugins/excel/ChangeLog       |   13 ++++
 plugins/excel/boot.c          |   16 ++++-
 plugins/excel/excel.h         |    6 +-
 plugins/excel/ms-excel-read.c |  124 +++++++++++++++++++++++++++++++++++++++--
 plugins/excel/ms-excel-read.h |    1 +
 plugins/excel/plugin.xml.in   |    8 ++-
 7 files changed, 159 insertions(+), 13 deletions(-)
---
diff --git a/NEWS b/NEWS
index e76897a..ebbeb12 100644
--- a/NEWS
+++ b/NEWS
@@ -6,13 +6,15 @@ Andreas:
 	* Clarify the distinction beween open/save and import/export.
 	* Fix enabling of modify comment or hyperlink. [#655877]
 	* Fully read scientific format from ODF.
+	* Provide xls file opener permitting encoding specification. [#535473]
 
 Jean:
 	* Make things build against gtk+-3.0.
 	* Make Ctrl-PgUp and Ctrl-PgDn work on chart-only sheets. [#645673]
 
 Valek:
-	* Set LABEL encoding based on FONT charset converted to codepage. [#304007, ubuntu #262777]
+	* In xls import, set LABEL encoding based on FONT charset converted to 
+	codepage. [#304007, ubuntu #262777]
 
 --------------------------------------------------------------------------
 Gnumeric 1.10.17
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 41929db..06b9889 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,3 +1,16 @@
+2011-08-14  Andreas J. Guelzow <aguelzow pyrshep ca>
+
+	* plugins/excel/boot.c (excel_enc_file_open): new
+	(excel_file_open): use excel_enc_file_open
+	* plugins/excel/excel.h (excel_read_workbook): add argument
+	* plugins/excel/ms-excel-read.c (excel_read_FONT): for charset 0 consider
+	encoding override
+	(gnm_xl_get_codepage): new
+	(gnm_xl_importer_new): add argument, change caller and convert codepage override
+	(excel_read_workbook): add argument and change all callers
+	* plugins/excel/ms-excel-read.h (_GnmXLImporter): add field
+	* plugins/excel/plugin.xml.in: add new encoding dependent file opener
+
 2011-08-12  Valek Filippov <frob gnome org>
 
 	* plugins/excel/ms-excel-read.c (excel_read_FONT): convert charset to codepage
diff --git a/plugins/excel/boot.c b/plugins/excel/boot.c
index be00ac3..3652b8b 100644
--- a/plugins/excel/boot.c
+++ b/plugins/excel/boot.c
@@ -67,6 +67,7 @@ gint ms_excel_object_debug = 0;
 
 gboolean excel_file_probe (GOFileOpener const *fo, GsfInput *input, GOFileProbeLevel pl);
 void excel_file_open (GOFileOpener const *fo, GOIOContext *context, WorkbookView *wbv, GsfInput *input);
+void excel_enc_file_open (GOFileOpener const *fo, char const *enc, GOIOContext *context, WorkbookView *wbv, GsfInput *input);
 void excel_biff7_file_save (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
 void excel_biff8_file_save (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
 void excel_dsf_file_save   (GOFileSaver const *fs, GOIOContext *context, WorkbookView const *wbv, GsfOutput *output);
@@ -147,8 +148,8 @@ cb_dump_vba (char const *name, guint8 const *src_code)
 
 /* Service entry point */
 void
-excel_file_open (GOFileOpener const *fo, GOIOContext *context,
-                 WorkbookView *wbv, GsfInput *input)
+excel_enc_file_open (GOFileOpener const *fo, char const *enc, GOIOContext *context,
+		     WorkbookView *wbv, GsfInput *input)
 {
 	GsfInput  *stream = NULL;
 	GError    *err = NULL;
@@ -166,7 +167,7 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
 		if (data && data[0] == 0x09 && (data[1] & 0xf1) == 0) {
 			gsf_input_seek (input, -2, G_SEEK_CUR);
 			excel_read_workbook (context, wbv, input,
-				&is_double_stream_file);
+					     &is_double_stream_file, enc);
 			/* NOTE : we lack a saver for the early formats */
 			return;
 		}
@@ -187,7 +188,7 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
 		return;
 	}
 
-	excel_read_workbook (context, wbv, stream, &is_double_stream_file);
+	excel_read_workbook (context, wbv, stream, &is_double_stream_file, enc);
 	g_object_unref (G_OBJECT (stream));
 
 	meta_data = gsf_doc_meta_data_new ();
@@ -240,6 +241,13 @@ excel_file_open (GOFileOpener const *fo, GOIOContext *context,
 			go_file_saver_for_id ("Gnumeric_Excel:excel_biff7"));
 }
 
+void
+excel_file_open (GOFileOpener const *fo, GOIOContext *context,
+                 WorkbookView *wbv, GsfInput *input)
+{
+	excel_enc_file_open (fo, NULL, context, wbv, input);
+}
+
 static void
 excel_save (GOIOContext *context, WorkbookView const *wbv, GsfOutput *output,
 	    gboolean biff7, gboolean biff8)
diff --git a/plugins/excel/excel.h b/plugins/excel/excel.h
index 56f8ce6..0edd5e6 100644
--- a/plugins/excel/excel.h
+++ b/plugins/excel/excel.h
@@ -15,8 +15,10 @@
 #include "gnumeric.h"
 #include "ms-biff.h"
 
-void excel_read_workbook (GOIOContext *context, WorkbookView *new_wb, GsfInput *input,
-			  gboolean *is_double_stream_file);
+void excel_read_workbook (GOIOContext *context, WorkbookView *new_wb, 
+			  GsfInput *input,
+			  gboolean *is_double_stream_file,
+			  char const *opt_enc);
 
 typedef struct _XLSExporter	 ExcelWriteState;
 void		 excel_write_state_free (ExcelWriteState *ewb);
diff --git a/plugins/excel/ms-excel-read.c b/plugins/excel/ms-excel-read.c
index a5dc9cd..19eb5a7 100644
--- a/plugins/excel/ms-excel-read.c
+++ b/plugins/excel/ms-excel-read.c
@@ -1667,6 +1667,10 @@ excel_read_FONT (BiffQuery *q, GnmXLImporter *importer)
 				fd->codepage = cp;
 				break;
 			}
+			if (importer->codepage_override > 0) {
+				fd->codepage = importer->codepage_override;
+				break;				
+			}
 		}
 			/* no break */
 		case 1:
@@ -3164,8 +3168,114 @@ ms_wb_get_font_markup (MSContainer const *c, unsigned indx)
 	return fd->attrs;
 }
 
+static gint
+gnm_xl_get_codepage (char const *enc)
+{
+	/* These names must match charset_trans_array in go-charmap-sel.c */
+	static struct {
+		char const *name;
+		gint codepage;
+	}  charset_trans_array[] = {
+		{"IBM864",                0},
+		{"IBM864i",               0},
+		{"ISO-8859-6",            0},
+		{"ISO-8859-6-E",          0},
+		{"ISO-8859-6-I",          0},
+		{"x-mac-arabic",          0},
+		{"windows-1256",          1256},
+		{"armscii-8", 	          0},
+		{"ISO-8859-13",           0},
+		{"ISO-8859-4",            0},
+		{"windows-1257",          1257},
+		{"ISO-8859-14",           0},
+		{"IBM852",                0},
+		{"ISO-8859-2",	          0},
+		{"x-mac-ce",              0},
+		{"windows-1250",          1250},
+		{"gb18030",               0},
+		{"GB2312",                0},
+		{"x-gbk",                 0},
+		{"HZ-GB-2312",	          0},
+		{"windows-936",           936},
+		{"Big5",                  0},
+		{"Big5-HKSCS",	          0},
+		{"x-euc-tw",              0},
+		{"x-mac-croatian",        0},
+		{"IBM855",                0},
+		{"ISO-8859-5",	          0},
+		{"ISO-IR-111",	          0},
+		{"KOI8-R",                0},
+		{"x-mac-cyrillic",        0},
+		{"windows-1251",          1251},
+		{"IBM866",                0},
+		{"KOI8-U",                0},
+		{"x-mac-ukrainian",       0},
+		{"ANSI_X3.4-1968#ASCII",  0},
+		{"x-mac-farsi",           0},
+		{"geostd8",               0},
+		{"ISO-8859-7",            0},
+		{"x-mac-greek",           0},
+		{"windows-1253",          0},
+		{"x-mac-gujarati",        0},
+		{"x-mac-gurmukhi",        0},
+		{"IBM862",                0},
+		{"ISO-8859-8-E",          0},
+		{"ISO-8859-8-I",          0},
+		{"x-mac-hebrew",          0},
+		{"windows-1255",          1255},
+		{"x-mac-devanagari",      0},
+		{"x-mac-icelandic",       0},
+		{"EUC-JP",                0},
+		{"ISO-2022-JP",           0},
+		{"CP932",                 0},
+		{"EUC-KR",                0},
+		{"ISO-2022-KR",           0},
+		{"x-johab",               0},
+		{"x-windows-949",         0},
+		{"ISO-8859-10",           0},
+		{"x-mac-romanian",        0},
+		{"ISO-8859-16",           0},
+		{"ISO-8859-3",            0},
+		{"TIS-620",               0},
+		{"IBM857",                0},
+		{"ISO-8859-9",            0},
+		{"x-mac-turkish",         0},
+		{"windows-1254",          1254},
+		{"UTF-7",                 0},
+		{"UTF-8",                 0},
+		{"UTF-16BE",              0},
+		{"UTF-16LE",              0},
+		{"UTF-32BE",              0},
+		{"UTF-32LE",              0},
+		{"x-user-defined",        0},
+		{"x-viet-tcvn5712",       0},
+		{"VISCII",                0},
+		{"x-viet-vps",            0},
+		{"windows-1258",          1258},
+		{"ISO-8859-8",            0},
+		{"IBM850",                0},
+		{"ISO-8859-1",            0},
+		{"ISO-8859-15",           0},
+		{"x-mac-roman",           0},
+		{"windows-1252",          1252},
+		{"T61.8bit",              0},
+		{"x-imap4-modified-utf7", 0},
+		{"x-u-escaped",           0}
+	};
+	int i;
+
+	if (enc == NULL)
+		return 0;
+
+	for (i = 0; i < G_N_ELEMENTS(charset_trans_array); i++)
+		if (0 == strcmp (enc, charset_trans_array[i].name))
+			return charset_trans_array[i].codepage;
+	
+	return 0;
+}
+
 static GnmXLImporter *
-gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view)
+gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view, char const *opt_enc)
 {
 	static MSContainerClass const vtbl = {
 		NULL, NULL,
@@ -3182,7 +3292,9 @@ gnm_xl_importer_new (GOIOContext *context, WorkbookView *wb_view)
 	importer->wbv     = wb_view;
 	importer->wb      = wb_view_get_workbook (wb_view);
 	importer->str_iconv = (GIConv)(-1);
-	gnm_xl_importer_set_codepage (importer, 1252); /* set a default */
+	importer->codepage_override = gnm_xl_get_codepage (opt_enc);
+	gnm_xl_importer_set_codepage (importer, (importer->codepage_override > 0) ?
+				      importer->codepage_override : 1252); /* set a default */
 
 	importer->expr_sharer = gnm_expr_sharer_new ();
 	importer->v8.supbook     = g_array_new (FALSE, FALSE, sizeof (ExcelSupBook));
@@ -6884,8 +6996,10 @@ excel_read_CODEPAGE (BiffQuery *q, GnmXLImporter *importer)
 }
 
 void
-excel_read_workbook (GOIOContext *context, WorkbookView *wb_view, GsfInput *input,
-		     gboolean *is_double_stream_file)
+excel_read_workbook (GOIOContext *context, WorkbookView *wb_view, 
+		     GsfInput *input,
+		     gboolean *is_double_stream_file,
+		     char const *opt_enc)
 {
 	GnmXLImporter *importer;
 	BiffQuery *q;
@@ -6899,7 +7013,7 @@ excel_read_workbook (GOIOContext *context, WorkbookView *wb_view, GsfInput *inpu
 	go_io_value_progress_set (context, gsf_input_size (input), N_BYTES_BETWEEN_PROGRESS_UPDATES);
 	q = ms_biff_query_new (input);
 
-	importer = gnm_xl_importer_new (context, wb_view);
+	importer = gnm_xl_importer_new (context, wb_view, opt_enc);
 
 	*is_double_stream_file = FALSE;
 	if (ms_biff_query_next (q) &&
diff --git a/plugins/excel/ms-excel-read.h b/plugins/excel/ms-excel-read.h
index ba90ccb..f6a27a8 100644
--- a/plugins/excel/ms-excel-read.h
+++ b/plugins/excel/ms-excel-read.h
@@ -133,6 +133,7 @@ struct _GnmXLImporter {
 
 	GnmExprSharer    *expr_sharer;
 	GIConv            str_iconv;
+	int               codepage_override;
 };
 
 GnmValue *xls_value_new_err (GnmEvalPos const *pos, guint8 const err);
diff --git a/plugins/excel/plugin.xml.in b/plugins/excel/plugin.xml.in
index a7b8f9f..1dc3025 100644
--- a/plugins/excel/plugin.xml.in
+++ b/plugins/excel/plugin.xml.in
@@ -11,7 +11,7 @@
 		<!-- IMPORT binary Office 2 -> 2003 -->
 		<service type="file_opener" id="excel" priority="100" probe="TRUE">
 			<information>
-				<_description>MS Excel (tm) (*.xls)</_description>
+				<_description>MS Excel&#8482; (*.xls)</_description>
 			</information>
 			<suffixes>
 				<suffix>xls</suffix>
@@ -106,5 +106,11 @@
 				<_description>MS Excel&#8482; 2010 (ECMA 376 2nd edition (2008))</_description>
 			</information>
 		</service>
+		<service type="file_opener" id="excel_enc" priority="200" probe="FALSE" 
+			 encoding_dependent="TRUE">
+			<information>
+				<_description>MS Excel&#8482; (*.xls) requiring encoding specification</_description>
+			</information>
+		</service>
 	</services>
 </plugin>



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]