[gnumeric] Read some document properties from XLSX.
- From: Andreas J. Guelzow <guelzow src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] Read some document properties from XLSX.
- Date: Sat, 25 Jun 2011 06:48:30 +0000 (UTC)
commit 1550d39e2e2ea7d3b0bc556201900911af8173f1
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date: Sat Jun 25 00:27:02 2011 -0600
Read some document properties from XLSX.
2011-06-25 Andreas J. Guelzow <aguelzow pyrshep ca>
* xlsx-read-docprops.c (new)
* Makefile.am: add xlsx-read-docprops.c
* xlsx-utils.h: add XL_NS_PROP_*
* xlsx-write.c (xlsx_write_fills): start with some fills
expected by Excel
(xlsx_write_style): apparently Excel writes apply*
attributes always. So do we now.
* xlsx-read.c: add some includes, including
xlsx-read-docprops.c
(xlsx_ns): som emore namespaces
(xlsx_file_open): call xlsx_read_docprops from
xlsx-read-docprops.c
NEWS | 2 +-
plugins/excel/ChangeLog | 17 +++++-
plugins/excel/Makefile.am | 1 +
plugins/excel/xlsx-read-docprops.c | 128 ++++++++++++++++++++++++++++++++++++
plugins/excel/xlsx-read.c | 15 ++++
plugins/excel/xlsx-utils.h | 7 ++-
plugins/excel/xlsx-write.c | 35 ++++++----
7 files changed, 188 insertions(+), 17 deletions(-)
---
diff --git a/NEWS b/NEWS
index 5b2d328..2e990f3 100644
--- a/NEWS
+++ b/NEWS
@@ -3,7 +3,7 @@ Gnumeric 1.10.17
Andreas:
* Fix some style import from ODF. [#652492]
* Import/Export print formatting from/to ODF. [#653186]
- * Write some document properties to XLSX.
+ * Write and read some document properties to and from XLSX.
* Fix xlsx schema violations.
Morten:
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 58a477b..35b5384 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,6 +1,21 @@
+2011-06-25 Andreas J. Guelzow <aguelzow pyrshep ca>
+
+ * xlsx-read-docprops.c (new)
+ * Makefile.am: add xlsx-read-docprops.c
+ * xlsx-utils.h: add XL_NS_PROP_*
+ * xlsx-write.c (xlsx_write_fills): start with some fills
+ expected by Excel
+ (xlsx_write_style): apparently Excel writes apply*
+ attributes always. So do we now.
+ * xlsx-read.c: add some includes, including
+ xlsx-read-docprops.c
+ (xlsx_ns): som emore namespaces
+ (xlsx_file_open): call xlsx_read_docprops from
+ xlsx-read-docprops.c
+
2011-06-24 Andreas J. Guelzow <aguelzow pyrshep ca>
- * xlsx-write.c (xlsx_write_style): usually wrte an xfId,
+ * xlsx-write.c (xlsx_write_style): usually write an xfId,
write apply* attributes only if they are false (nobody
seems to write true ones)
(xlsx_write_cellStyleXfs): write a named style record to refer
diff --git a/plugins/excel/Makefile.am b/plugins/excel/Makefile.am
index d9f8da8..51a0b06 100644
--- a/plugins/excel/Makefile.am
+++ b/plugins/excel/Makefile.am
@@ -79,6 +79,7 @@ xml_DATA = $(xml_in_files:.xml.in=.xml)
@INTLTOOL_XML_RULE@
EXTRA_DIST = $(xml_in_files) \
+ xlsx-read-docprops.c \
xlsx-write-docprops.c \
xlsx-read-pivot.c \
xlsx-write-pivot.c \
diff --git a/plugins/excel/xlsx-read-docprops.c b/plugins/excel/xlsx-read-docprops.c
new file mode 100644
index 0000000..12356db
--- /dev/null
+++ b/plugins/excel/xlsx-read-docprops.c
@@ -0,0 +1,128 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * xlsx-read-docprops.c : import MS Office Open xlsx document properties.
+ *
+ * Copyright (C) 2011 Andreas J. Guelzow All Rights Reserved
+ * (aguelzow pyrshep ca)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+/*
+ *
+ * DO * NOT * COMPILE * DIRECTLY *
+ * DO * NOT * COMPILE * DIRECTLY *
+ * DO * NOT * COMPILE * DIRECTLY *
+ *
+ * included via xlsx-read.c
+ **/
+
+static void
+xlsx_read_core_keys (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+/* xin->content->str */
+/* xin->node->user_data.v_str */
+}
+
+static void
+xlsx_read_core_prop_type (GsfXMLIn *xin, GType g_type)
+{
+ XLSXReadState *state = (XLSXReadState *)xin->user_state;
+ GValue *res = g_new0 (GValue, 1);
+ if (gsf_xml_gvalue_from_str (res, g_type, xin->content->str))
+ gsf_doc_meta_data_insert
+ (state->metadata,
+ g_strdup (xin->node->user_data.v_str), res);
+ else
+ g_free (res);
+}
+
+static void
+xlsx_read_core_prop (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+ xlsx_read_core_prop_type (xin, G_TYPE_STRING);
+}
+static void
+xlsx_read_core_prop_dt (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+ xlsx_read_core_prop_type (xin, GSF_TIMESTAMP_TYPE);
+}
+
+static GsfXMLInNode const xlsx_docprops_core_dtd[] = {
+GSF_XML_IN_NODE_FULL (START, START, -1, NULL, GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE_FULL (START, CORE_PROPS, XL_NS_PROP_CP, "coreProperties", GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CATEGORY, XL_NS_PROP_CP, "category", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_CATEGORY),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CONTENT_STATUS, XL_NS_PROP_CP, "contentStatus", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:contentStatus"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CONTENT_TYPE, XL_NS_PROP_CP, "contentType", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:contentType"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_KEYWORDS, XL_NS_PROP_CP, "keywords", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_keys, 0),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LAST_NODIFIED_BY, XL_NS_PROP_CP, "lastModifiedBy", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_CREATOR),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LAST_PRINTED, XL_NS_PROP_CP, "lastPrinted", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_PRINT_DATE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_REVISION, XL_NS_PROP_CP, "revision", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_REVISION_COUNT),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_VERSION, XL_NS_PROP_CP, "version", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "cp:version"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CREATOR, XL_NS_PROP_DC, "creator", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_INITIAL_CREATOR),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_DESCRIPTION, XL_NS_PROP_DC, "description", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_DESCRIPTION),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_IDENTIFIER, XL_NS_PROP_DC, "identifier", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = "dc:identifier"),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_LANGUAGE, XL_NS_PROP_DC, "language", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_LANGUAGE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_SUBJECT, XL_NS_PROP_DC, "subject", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_SUBJECT),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_TITLE, XL_NS_PROP_DC, "title", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop, .v_str = GSF_META_NAME_TITLE),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_CREATED, XL_NS_PROP_DCTERMS, "created", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_DATE_CREATED),
+GSF_XML_IN_NODE_FULL (CORE_PROPS, PROP_MODIFIED, XL_NS_PROP_DCTERMS, "modified", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_core_prop_dt, .v_str = GSF_META_NAME_DATE_MODIFIED),
+GSF_XML_IN_NODE_END
+};
+
+static void
+xlsx_read_docprops_core (XLSXReadState *state)
+{
+ GsfInput *in;
+ /* optional */
+ in = gsf_open_pkg_open_rel_by_type
+ (GSF_INPUT (state->zip),
+ "http://schemas.openxmlformats.org/package/2006/relationships/metadata/"
+ "core-properties", NULL);
+
+ if (in == NULL) return;
+ xlsx_parse_stream (state, in, xlsx_docprops_core_dtd);
+
+}
+
+static void
+xlsx_read_docprops_extended (XLSXReadState *state)
+{
+ GsfInput *in;
+ /* optional */
+ in = gsf_open_pkg_open_rel_by_type
+ (GSF_INPUT (state->zip),
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/"
+ "extended-properties", NULL);
+
+ if (in == NULL) return;
+ /* xlsx_parse_stream (&state, in, xlsx_docprops_extended_dtd); */
+ g_object_unref (in);
+}
+
+static void
+xlsx_read_docprops (XLSXReadState *state)
+{
+ state->metadata = gsf_doc_meta_data_new ();
+
+ xlsx_read_docprops_core (state);
+ xlsx_read_docprops_extended (state);
+
+ go_doc_set_meta_data (GO_DOC (state->wb), state->metadata);
+ g_object_unref (state->metadata);
+ state->metadata = NULL;
+}
+
diff --git a/plugins/excel/xlsx-read.c b/plugins/excel/xlsx-read.c
index 147f3ab..fedc4df 100644
--- a/plugins/excel/xlsx-read.c
+++ b/plugins/excel/xlsx-read.c
@@ -65,6 +65,10 @@
#include <gsf/gsf-infile.h>
#include <gsf/gsf-infile-zip.h>
#include <gsf/gsf-open-pkg-utils.h>
+#include <gsf/gsf-meta-names.h>
+#include <gsf/gsf-doc-meta-data.h>
+#include <gsf/gsf-docprop-vector.h>
+#include <gsf/gsf-timestamp.h>
#include <glib/gi18n-lib.h>
#include <gmodule.h>
@@ -243,6 +247,8 @@ typedef struct {
GPtrArray *authors;
GObject *comment;
GString *comment_text;
+
+ GsfDocMetaData *metadata;
} XLSXReadState;
typedef struct {
GOString *str;
@@ -266,6 +272,11 @@ static GsfXMLInNS const xlsx_ns[] = {
GSF_XML_IN_NS (XL_NS_LEG_OFF, "urn:schemas-microsoft-com:office:office"),
GSF_XML_IN_NS (XL_NS_LEG_XL, "urn:schemas-microsoft-com:office:excel"),
GSF_XML_IN_NS (XL_NS_LEG_VML, "urn:schemas-microsoft-com:vml"),
+ GSF_XML_IN_NS (XL_NS_PROP_CP, "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"),
+ GSF_XML_IN_NS (XL_NS_PROP_DC, "http://purl.org/dc/elements/1.1/"),
+ GSF_XML_IN_NS (XL_NS_PROP_DCMITYPE, "http://purl.org/dc/dcmitype"),
+ GSF_XML_IN_NS (XL_NS_PROP_DCTERMS, "http://purl.org/dc/terms/"),
+ GSF_XML_IN_NS (XL_NS_PROP_XSI, "http://www.w3.org/2001/XMLSchema-instance"),
{ NULL }
};
@@ -4198,6 +4209,8 @@ xlsx_style_array_free (GPtrArray *styles)
}
}
+#include "xlsx-read-docprops.c"
+
G_MODULE_EXPORT void
xlsx_file_open (GOFileOpener const *fo, GOIOContext *context,
WorkbookView *wb_view, GsfInput *input);
@@ -4255,6 +4268,8 @@ xlsx_file_open (GOFileOpener const *fo, GOIOContext *context,
xlsx_parse_stream (&state, in, xlsx_styles_dtd);
xlsx_parse_stream (&state, wb_part, xlsx_workbook_dtd);
+
+ xlsx_read_docprops (&state);
} else
go_cmd_context_error_import (GO_CMD_CONTEXT (context),
_("No workbook stream found."));
diff --git a/plugins/excel/xlsx-utils.h b/plugins/excel/xlsx-utils.h
index 016fdb5..7cefe9e 100644
--- a/plugins/excel/xlsx-utils.h
+++ b/plugins/excel/xlsx-utils.h
@@ -36,7 +36,12 @@ enum {
XL_NS_PKG_REL,
XL_NS_LEG_OFF,
XL_NS_LEG_XL,
- XL_NS_LEG_VML
+ XL_NS_LEG_VML,
+ XL_NS_PROP_CP,
+ XL_NS_PROP_DC,
+ XL_NS_PROP_DCMITYPE,
+ XL_NS_PROP_DCTERMS,
+ XL_NS_PROP_XSI
};
#define XLSX_MaxCol 16384
diff --git a/plugins/excel/xlsx-write.c b/plugins/excel/xlsx-write.c
index e528686..f3fb8e5 100644
--- a/plugins/excel/xlsx-write.c
+++ b/plugins/excel/xlsx-write.c
@@ -494,16 +494,28 @@ xlsx_write_fills (XLSXWriteState *state, GsfXMLOut *xml)
if (fill_n < 0) {
g_ptr_array_add (styles_w_fills, (gpointer)style);
g_hash_table_insert (fills_hash, (gpointer)style,
- GINT_TO_POINTER (styles_w_fills->len));
+ GINT_TO_POINTER (styles_w_fills->len + 1));
} else
g_hash_table_insert (fills_hash, (gpointer)style,
- GINT_TO_POINTER (fill_n + 1));
+ GINT_TO_POINTER (fill_n + 2));
}
}
if (styles_w_fills->len > 0) {
gsf_xml_out_start_element (xml, "fills");
- gsf_xml_out_add_int (xml, "count", styles_w_fills->len);
+ gsf_xml_out_add_int (xml, "count", styles_w_fills->len + 2);
+ /* Excel considers the first two fills special (not according to ECMA), */
+ /* so we start with two unused ones. */
+ gsf_xml_out_start_element (xml, "fill");
+ gsf_xml_out_start_element (xml, "patternFill");
+ gsf_xml_out_add_cstr_unchecked (xml, "patternType","none");
+ gsf_xml_out_end_element (xml);
+ gsf_xml_out_end_element (xml);
+ gsf_xml_out_start_element (xml, "fill");
+ gsf_xml_out_start_element (xml, "patternFill");
+ gsf_xml_out_add_cstr_unchecked (xml, "patternType","gray125");
+ gsf_xml_out_end_element (xml);
+ gsf_xml_out_end_element (xml);
for (i = 0 ; i < styles_w_fills->len ; i++) {
GnmStyle const *style = g_ptr_array_index (styles_w_fills, i);
gsf_xml_out_start_element (xml, "fill");
@@ -874,21 +886,16 @@ xlsx_write_style (XLSXWriteState *state, GsfXMLOut *xml,
gboolean num_fmt = gnm_style_is_element_set (style, MSTYLE_FORMAT);
if (id >= 0) {
- if (!alignment)
- xlsx_add_bool (xml, "applyAlignment", alignment);
- if (!border)
- xlsx_add_bool (xml, "applyBorder", border);
- if (!font)
- xlsx_add_bool (xml, "applyFont", font);
- if (!fill)
- xlsx_add_bool (xml, "applyFill", fill);
- if (!num_fmt)
- xlsx_add_bool (xml, "applyNumberFormat", num_fmt);
+ xlsx_add_bool (xml, "applyAlignment", alignment);
+ xlsx_add_bool (xml, "applyBorder", border);
+ xlsx_add_bool (xml, "applyFont", font);
+ xlsx_add_bool (xml, "applyFill", fill);
+ xlsx_add_bool (xml, "applyNumberFormat", num_fmt);
}
if (font)
gsf_xml_out_add_int (xml, "fontId", GPOINTER_TO_INT (tmp_font) - 1);
if (fill)
- gsf_xml_out_add_int (xml, "fillId", GPOINTER_TO_INT (tmp_fill) - 1);
+ gsf_xml_out_add_int (xml, "fillId", GPOINTER_TO_INT (tmp_fill));
if (border)
gsf_xml_out_add_int (xml, "borderId", GPOINTER_TO_INT (tmp_border) - 1);
if (num_fmt)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]