[gnumeric] Read some document properties from XLSX.



commit 1550d39e2e2ea7d3b0bc556201900911af8173f1
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date:   Sat Jun 25 00:27:02 2011 -0600

    Read some document properties from XLSX.
    
    2011-06-25  Andreas J. Guelzow <aguelzow pyrshep ca>
    
    	* xlsx-read-docprops.c (new)
    	* Makefile.am: add xlsx-read-docprops.c
    	* xlsx-utils.h: add XL_NS_PROP_*
    	* xlsx-write.c (xlsx_write_fills): start with some fills
    	expected by Excel
    	(xlsx_write_style): apparently Excel writes apply*
    	attributes always. So do we now.
    	* xlsx-read.c: add some includes, including
    	xlsx-read-docprops.c
    	(xlsx_ns): som emore namespaces
    	(xlsx_file_open): call xlsx_read_docprops from
    	xlsx-read-docprops.c

 NEWS                               |    2 +-
 plugins/excel/ChangeLog            |   17 +++++-
 plugins/excel/Makefile.am          |    1 +
 plugins/excel/xlsx-read-docprops.c |  128 ++++++++++++++++++++++++++++++++++++
 plugins/excel/xlsx-read.c          |   15 ++++
 plugins/excel/xlsx-utils.h         |    7 ++-
 plugins/excel/xlsx-write.c         |   35 ++++++----
 7 files changed, 188 insertions(+), 17 deletions(-)
---
diff --git a/NEWS b/NEWS
index 5b2d328..2e990f3 100644
--- a/NEWS
+++ b/NEWS
@@ -3,7 +3,7 @@ Gnumeric 1.10.17
 Andreas:
 	* Fix some style import from ODF. [#652492]
 	* Import/Export print formatting from/to ODF. [#653186]
-	* Write some document properties to XLSX.
+	* Write and read some document properties to and from XLSX.
 	* Fix xlsx schema violations.
 
 Morten:
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 58a477b..35b5384 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,6 +1,21 @@
+2011-06-25  Andreas J. Guelzow <aguelzow pyrshep ca>
+
+	* xlsx-read-docprops.c (new)
+	* Makefile.am: add xlsx-read-docprops.c
+	* xlsx-utils.h: add XL_NS_PROP_*
+	* xlsx-write.c (xlsx_write_fills): start with some fills
+	expected by Excel
+	(xlsx_write_style): apparently Excel writes apply*
+	attributes always. So do we now.
+	* xlsx-read.c: add some includes, including
+	xlsx-read-docprops.c
+	(xlsx_ns): som emore namespaces
+	(xlsx_file_open): call xlsx_read_docprops from
+	xlsx-read-docprops.c
+
 2011-06-24  Andreas J. Guelzow <aguelzow pyrshep ca>
 
-	* xlsx-write.c (xlsx_write_style): usually wrte an xfId,
+	* xlsx-write.c (xlsx_write_style): usually write an xfId,
 	write apply* attributes only if they are false (nobody
 	seems to write true ones)
 	(xlsx_write_cellStyleXfs): write a named style record to refer
diff --git a/plugins/excel/Makefile.am b/plugins/excel/Makefile.am
index d9f8da8..51a0b06 100644
--- a/plugins/excel/Makefile.am
+++ b/plugins/excel/Makefile.am
@@ -79,6 +79,7 @@ xml_DATA = $(xml_in_files:.xml.in=.xml)
 @INTLTOOL_XML_RULE@
 
 EXTRA_DIST = $(xml_in_files)	\
+	xlsx-read-docprops.c	\
 	xlsx-write-docprops.c	\
 	xlsx-read-pivot.c	\
 	xlsx-write-pivot.c	\
diff --git a/plugins/excel/xlsx-read-docprops.c b/plugins/excel/xlsx-read-docprops.c
new file mode 100644
index 0000000..12356db
--- /dev/null
+++ b/plugins/excel/xlsx-read-docprops.c
@@ -0,0 +1,128 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * xlsx-read-docprops.c : import MS Office Open xlsx document properties.
+ *
+ * Copyright (C) 2011 Andreas J. Guelzow All Rights Reserved
+ * (aguelzow pyrshep ca)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+/*
+ *
+ * DO * NOT * COMPILE * DIRECTLY *
+ * DO * NOT * COMPILE * DIRECTLY *
+ * DO * NOT * COMPILE * DIRECTLY *
+ *
+ * included via xlsx-read.c
+ **/
+
+static void
+xlsx_read_core_keys (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+/* xin->content->str */
+/* xin->node->user_data.v_str */
+}
+
+static void
+xlsx_read_core_prop_type (GsfXMLIn *xin, GType g_type)
+{
+	XLSXReadState *state = (XLSXReadState *)xin->user_state;
+	GValue *res = g_new0 (GValue, 1);
+	if (gsf_xml_gvalue_from_str (res, g_type, xin->content->str))
+		gsf_doc_meta_data_insert 
+			(state->metadata,
+			 g_strdup (xin->node->user_data.v_str), res);
+	else
+		g_free (res);
+}
+
+static void
+xlsx_read_core_prop (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+	xlsx_read_core_prop_type (xin, G_TYPE_STRING);
+}
+static void
+xlsx_read_core_prop_dt (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+	xlsx_read_core_prop_type (xin, GSF_TIMESTAMP_TYPE);
+}
+
+static GsfXMLInNode const xlsx_docprops_core_dtd[] = {
+GSF_XML_IN_NODE_FULL (START, START, -1, NULL, GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE_FULL (START, CORE_PROPS, XL_NS_PROP_CP, "coreProperties", GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CATEGORY, XL_NS_PROP_CP, "category", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_CATEGORY),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CONTENT_STATUS, XL_NS_PROP_CP, "contentStatus", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:contentStatus"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CONTENT_TYPE, XL_NS_PROP_CP, "contentType", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:contentType"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_KEYWORDS, XL_NS_PROP_CP, "keywords", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_keys, 0),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LAST_NODIFIED_BY, XL_NS_PROP_CP, "lastModifiedBy", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_CREATOR),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LAST_PRINTED, XL_NS_PROP_CP, "lastPrinted", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_PRINT_DATE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_REVISION, XL_NS_PROP_CP, "revision", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_REVISION_COUNT),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_VERSION, XL_NS_PROP_CP, "version", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:version"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CREATOR, XL_NS_PROP_DC, "creator", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_INITIAL_CREATOR),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_DESCRIPTION, XL_NS_PROP_DC, "description", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_DESCRIPTION),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_IDENTIFIER, XL_NS_PROP_DC, "identifier", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "dc:identifier"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LANGUAGE, XL_NS_PROP_DC, "language", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_LANGUAGE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_SUBJECT, XL_NS_PROP_DC, "subject", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_SUBJECT),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_TITLE, XL_NS_PROP_DC, "title", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_TITLE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CREATED, XL_NS_PROP_DCTERMS, "created", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_DATE_CREATED),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_MODIFIED, XL_NS_PROP_DCTERMS, "modified", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_DATE_MODIFIED),
+GSF_XML_IN_NODE_END
+};
+
+static void
+xlsx_read_docprops_core (XLSXReadState *state)
+{
+	GsfInput *in;
+	/* optional */
+	in = gsf_open_pkg_open_rel_by_type 
+		(GSF_INPUT (state->zip),
+		 "http://schemas.openxmlformats.org/package/2006/relationships/metadata/";
+		 "core-properties", NULL);
+	
+	if (in == NULL) return;
+	xlsx_parse_stream (state, in, xlsx_docprops_core_dtd);
+	
+}
+
+static void
+xlsx_read_docprops_extended (XLSXReadState *state)
+{
+	GsfInput *in;
+	/* optional */
+	in = gsf_open_pkg_open_rel_by_type 
+		(GSF_INPUT (state->zip),
+		 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/";
+		 "extended-properties", NULL);
+
+	if (in == NULL) return;
+	/* xlsx_parse_stream (&state, in, xlsx_docprops_extended_dtd); */
+	g_object_unref (in);
+}
+
+static void
+xlsx_read_docprops (XLSXReadState *state)
+{
+	state->metadata = gsf_doc_meta_data_new ();
+
+	xlsx_read_docprops_core (state);
+	xlsx_read_docprops_extended (state);
+
+	go_doc_set_meta_data (GO_DOC (state->wb), state->metadata);
+	g_object_unref (state->metadata);
+	state->metadata = NULL;
+}
+
diff --git a/plugins/excel/xlsx-read.c b/plugins/excel/xlsx-read.c
index 147f3ab..fedc4df 100644
--- a/plugins/excel/xlsx-read.c
+++ b/plugins/excel/xlsx-read.c
@@ -65,6 +65,10 @@
 #include <gsf/gsf-infile.h>
 #include <gsf/gsf-infile-zip.h>
 #include <gsf/gsf-open-pkg-utils.h>
+#include <gsf/gsf-meta-names.h>
+#include <gsf/gsf-doc-meta-data.h>
+#include <gsf/gsf-docprop-vector.h>
+#include <gsf/gsf-timestamp.h>
 
 #include <glib/gi18n-lib.h>
 #include <gmodule.h>
@@ -243,6 +247,8 @@ typedef struct {
 	GPtrArray	*authors;
 	GObject		*comment;
 	GString		*comment_text;
+
+	GsfDocMetaData   *metadata;
 } XLSXReadState;
 typedef struct {
 	GOString	*str;
@@ -266,6 +272,11 @@ static GsfXMLInNS const xlsx_ns[] = {
 	GSF_XML_IN_NS (XL_NS_LEG_OFF,   "urn:schemas-microsoft-com:office:office"),
 	GSF_XML_IN_NS (XL_NS_LEG_XL,    "urn:schemas-microsoft-com:office:excel"),
 	GSF_XML_IN_NS (XL_NS_LEG_VML,   "urn:schemas-microsoft-com:vml"),
+	GSF_XML_IN_NS (XL_NS_PROP_CP,   "http://schemas.openxmlformats.org/package/2006/metadata/core-properties";),
+	GSF_XML_IN_NS (XL_NS_PROP_DC,   "http://purl.org/dc/elements/1.1/";),
+	GSF_XML_IN_NS (XL_NS_PROP_DCMITYPE, "http://purl.org/dc/dcmitype";),
+	GSF_XML_IN_NS (XL_NS_PROP_DCTERMS,  "http://purl.org/dc/terms/";),
+	GSF_XML_IN_NS (XL_NS_PROP_XSI,  "http://www.w3.org/2001/XMLSchema-instance";),
 	{ NULL }
 };
 
@@ -4198,6 +4209,8 @@ xlsx_style_array_free (GPtrArray *styles)
 	}
 }
 
+#include "xlsx-read-docprops.c"
+
 G_MODULE_EXPORT void
 xlsx_file_open (GOFileOpener const *fo, GOIOContext *context,
 		WorkbookView *wb_view, GsfInput *input);
@@ -4255,6 +4268,8 @@ xlsx_file_open (GOFileOpener const *fo, GOIOContext *context,
 			xlsx_parse_stream (&state, in, xlsx_styles_dtd);
 
 			xlsx_parse_stream (&state, wb_part, xlsx_workbook_dtd);
+
+			xlsx_read_docprops (&state);
 		} else
 			go_cmd_context_error_import (GO_CMD_CONTEXT (context),
 				_("No workbook stream found."));
diff --git a/plugins/excel/xlsx-utils.h b/plugins/excel/xlsx-utils.h
index 016fdb5..7cefe9e 100644
--- a/plugins/excel/xlsx-utils.h
+++ b/plugins/excel/xlsx-utils.h
@@ -36,7 +36,12 @@ enum {
 	XL_NS_PKG_REL,
 	XL_NS_LEG_OFF,
 	XL_NS_LEG_XL,
-	XL_NS_LEG_VML
+	XL_NS_LEG_VML,
+	XL_NS_PROP_CP,
+	XL_NS_PROP_DC,
+	XL_NS_PROP_DCMITYPE,
+	XL_NS_PROP_DCTERMS,
+	XL_NS_PROP_XSI
 };
 
 #define XLSX_MaxCol	16384
diff --git a/plugins/excel/xlsx-write.c b/plugins/excel/xlsx-write.c
index e528686..f3fb8e5 100644
--- a/plugins/excel/xlsx-write.c
+++ b/plugins/excel/xlsx-write.c
@@ -494,16 +494,28 @@ xlsx_write_fills (XLSXWriteState *state, GsfXMLOut *xml)
 			if (fill_n < 0) {
 				g_ptr_array_add (styles_w_fills, (gpointer)style);
 				g_hash_table_insert (fills_hash, (gpointer)style, 
-						     GINT_TO_POINTER (styles_w_fills->len));
+						     GINT_TO_POINTER (styles_w_fills->len + 1));
 			} else
 				g_hash_table_insert (fills_hash, (gpointer)style, 
-						     GINT_TO_POINTER (fill_n + 1));
+						     GINT_TO_POINTER (fill_n + 2));
 		}
 	}
 
 	if (styles_w_fills->len > 0) {
 		gsf_xml_out_start_element (xml, "fills");
-		gsf_xml_out_add_int (xml, "count", styles_w_fills->len);
+		gsf_xml_out_add_int (xml, "count", styles_w_fills->len + 2);
+		/* Excel considers the first two fills special (not according to  ECMA), */
+		/* so we start with two unused ones.                                     */
+		gsf_xml_out_start_element (xml, "fill");
+		gsf_xml_out_start_element (xml, "patternFill");
+		gsf_xml_out_add_cstr_unchecked (xml, "patternType","none");
+		gsf_xml_out_end_element (xml);				
+		gsf_xml_out_end_element (xml);				
+		gsf_xml_out_start_element (xml, "fill");
+		gsf_xml_out_start_element (xml, "patternFill");
+		gsf_xml_out_add_cstr_unchecked (xml, "patternType","gray125");
+		gsf_xml_out_end_element (xml);				
+		gsf_xml_out_end_element (xml);				
 		for (i = 0 ; i < styles_w_fills->len ; i++) {
 			GnmStyle const *style = g_ptr_array_index (styles_w_fills, i);
 			gsf_xml_out_start_element (xml, "fill");
@@ -874,21 +886,16 @@ xlsx_write_style (XLSXWriteState *state, GsfXMLOut *xml,
 	gboolean num_fmt = gnm_style_is_element_set (style, MSTYLE_FORMAT);
 
 	if (id >= 0) {
-		if (!alignment)
-			xlsx_add_bool (xml, "applyAlignment", alignment);
-		if (!border)
-			xlsx_add_bool (xml, "applyBorder", border);
-		if (!font)
-			xlsx_add_bool (xml, "applyFont", font);
-		if (!fill)
-			xlsx_add_bool (xml, "applyFill", fill);
-		if (!num_fmt)
-			xlsx_add_bool (xml, "applyNumberFormat", num_fmt);
+		xlsx_add_bool (xml, "applyAlignment", alignment);
+		xlsx_add_bool (xml, "applyBorder", border);
+		xlsx_add_bool (xml, "applyFont", font);
+		xlsx_add_bool (xml, "applyFill", fill);
+		xlsx_add_bool (xml, "applyNumberFormat", num_fmt);
 	}
 	if (font)
 		gsf_xml_out_add_int (xml, "fontId", GPOINTER_TO_INT (tmp_font) - 1);
 	if (fill)
-		gsf_xml_out_add_int (xml, "fillId", GPOINTER_TO_INT (tmp_fill) - 1);
+		gsf_xml_out_add_int (xml, "fillId", GPOINTER_TO_INT (tmp_fill));
 	if (border)
 		gsf_xml_out_add_int (xml, "borderId", GPOINTER_TO_INT (tmp_border) - 1);
 	if (num_fmt)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]