[gnumeric] read custom properties from xlsx files



commit ac28462593b8a9a76e9867554024e2520352b0ea
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date:   Sun Jun 26 14:20:49 2011 -0600

    read custom properties from xlsx files
    
    2011-06-26  Andreas J. Guelzow <aguelzow pyrshep ca>
    
    	* xlsx-utils.h (XL_NS_PROP_CUSTOM): new
    	* xlsx-read.c (XLSXReadState): new field meta_prop_name
    	(XL_NS_PROP_CUSTOM): new namespace
    	* xlsx-read-docprops.c (xlsx_read_property_begin): new
    	(xlsx_read_property_end): new
    	(xlsx_read_custom_property_type): new
    	(xlsx_read_custom_property): new
    	(xlsx_read_property_date): new
    	(xlsx_docprops_custom_dtd): new
    	(xlsx_docprops_extended_dtd): add elements
    	(xlsx_read_docprops_custom): new
    	(xlsx_read_docprops): call xlsx_read_docprops_custom

 plugins/excel/ChangeLog            |   15 +++++
 plugins/excel/xlsx-read-docprops.c |  103 ++++++++++++++++++++++++++++++++++-
 plugins/excel/xlsx-read.c          |    5 ++-
 plugins/excel/xlsx-utils.h         |    3 +-
 4 files changed, 121 insertions(+), 5 deletions(-)
---
diff --git a/plugins/excel/ChangeLog b/plugins/excel/ChangeLog
index 4618128..4589d91 100644
--- a/plugins/excel/ChangeLog
+++ b/plugins/excel/ChangeLog
@@ -1,5 +1,20 @@
 2011-06-26  Andreas J. Guelzow <aguelzow pyrshep ca>
 
+	* xlsx-utils.h (XL_NS_PROP_CUSTOM): new
+	* xlsx-read.c (XLSXReadState): new field meta_prop_name
+	(XL_NS_PROP_CUSTOM): new namespace
+	* xlsx-read-docprops.c (xlsx_read_property_begin): new
+	(xlsx_read_property_end): new
+	(xlsx_read_custom_property_type): new
+	(xlsx_read_custom_property): new
+	(xlsx_read_property_date): new
+	(xlsx_docprops_custom_dtd): new
+	(xlsx_docprops_extended_dtd): add elements
+	(xlsx_read_docprops_custom): new
+	(xlsx_read_docprops): call xlsx_read_docprops_custom
+
+2011-06-26  Andreas J. Guelzow <aguelzow pyrshep ca>
+
 	* xlsx-write.c: new includes (for xlsx-write-docprops.c)
 	* xlsx-write-docprops.c (xlsx_map_time_to_int): new
 	(xlsx_map_to_int): new
diff --git a/plugins/excel/xlsx-read-docprops.c b/plugins/excel/xlsx-read-docprops.c
index 5035be2..b61cd7f 100644
--- a/plugins/excel/xlsx-read-docprops.c
+++ b/plugins/excel/xlsx-read-docprops.c
@@ -73,6 +73,78 @@ xlsx_read_prop_boolean (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
 	xlsx_read_prop_type (xin, G_TYPE_BOOLEAN);
 }
 
+static void
+xlsx_read_property_begin (GsfXMLIn *xin, xmlChar const **attrs)
+{
+	XLSXReadState *state = (XLSXReadState *)xin->user_state;
+	xmlChar const *fmt_id = NULL, *pid = NULL, *name = NULL;
+
+	for (; attrs != NULL && attrs[0] && attrs[1] ; attrs += 2)
+		if (0 == strcmp (attrs[0], "fmtid"))
+			fmt_id = attrs[1];
+		else if (0 == strcmp (attrs[0], "pid"))
+			pid = attrs[1];
+		else if (0 == strcmp (attrs[0], "name"))
+			name = attrs[1];
+	if (name != NULL)
+		state->meta_prop_name = g_strdup (name);
+	else
+		state->meta_prop_name = g_strdup_printf ("%s-%s", fmt_id, pid);
+}
+
+static void
+xlsx_read_property_end (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+	XLSXReadState *state = (XLSXReadState *)xin->user_state;
+	g_free (state->meta_prop_name);
+	state->meta_prop_name = NULL;
+}
+
+static void
+xlsx_read_custom_property_type (GsfXMLIn *xin, GType g_type)
+{
+	XLSXReadState *state = (XLSXReadState *)xin->user_state;
+	GValue *res;
+	
+	if (state->meta_prop_name == NULL) {
+		xlsx_warning (xin, _("Corrupt file: Second child element in custom property encountered."));
+		return;
+	}
+
+	res = g_new0 (GValue, 1);
+	if (gsf_xml_gvalue_from_str (res, g_type, xin->content->str)) {
+		gsf_doc_meta_data_insert 
+			(state->metadata,
+			 state->meta_prop_name, res);
+		state->meta_prop_name = NULL;
+	} else
+		g_free (res);	
+}
+
+static void
+xlsx_read_custom_property (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+	xlsx_read_custom_property_type (xin, xin->node->user_data.v_int);
+}
+
+static void
+xlsx_read_property_date (GsfXMLIn *xin, G_GNUC_UNUSED GsfXMLBlob *blob)
+{
+	xlsx_read_custom_property_type (xin, GSF_TIMESTAMP_TYPE);
+}
+
+static GsfXMLInNode const xlsx_docprops_custom_dtd[] = {
+GSF_XML_IN_NODE_FULL (START, START, -1, NULL, GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE_FULL (START, CUSTOM_PROPS, XL_NS_PROP_CUSTOM, "Properties", GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
+GSF_XML_IN_NODE (CUSTOM_PROPS, CUSTOM_PROP, XL_NS_PROP_CUSTOM, "property", GSF_XML_NO_CONTENT, &xlsx_read_property_begin, &xlsx_read_property_end),
+GSF_XML_IN_NODE_FULL (CUSTOM_PROP, CUSTOM_PROP_LPWSTR, XL_NS_PROP_VT, "lpwstr", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_custom_property, G_TYPE_STRING),
+GSF_XML_IN_NODE_FULL (CUSTOM_PROP, CUSTOM_PROP_LPSTR, XL_NS_PROP_VT, "lpstr", GSF_XML_CONTENT,  FALSE, FALSE, NULL, &xlsx_read_custom_property, G_TYPE_STRING),
+GSF_XML_IN_NODE_FULL (CUSTOM_PROP, CUSTOM_PROP_I4, XL_NS_PROP_VT, "i4", GSF_XML_CONTENT,  FALSE, FALSE, NULL, &xlsx_read_custom_property, G_TYPE_INT),
+GSF_XML_IN_NODE_FULL (CUSTOM_PROP, CUSTOM_PROP_BOOL, XL_NS_PROP_VT, "bool", GSF_XML_CONTENT,  FALSE, FALSE, NULL, &xlsx_read_custom_property, G_TYPE_BOOLEAN),
+GSF_XML_IN_NODE (CUSTOM_PROP, CUSTOM_PROP_DATE, XL_NS_PROP_VT, "date", GSF_XML_CONTENT, NULL, &xlsx_read_property_date),
+GSF_XML_IN_NODE_END
+};
+
 static GsfXMLInNode const xlsx_docprops_extended_dtd[] = {
 GSF_XML_IN_NODE_FULL (START, START, -1, NULL, GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
 GSF_XML_IN_NODE_FULL (START, X_PROPS, XL_NS_PROP, "Properties", GSF_XML_NO_CONTENT, FALSE, TRUE, NULL, NULL, 0),
@@ -98,9 +170,19 @@ GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HYPERLINK_BASE, XL_NS_PROP, "HyperlinkBase
 GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HYPERLINKS_CHANGED, XL_NS_PROP, "HyperlinksChanged", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_prop_boolean, .v_str = "xlsx:HyperlinksChanged"),
 GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_DOC_SECURITY, XL_NS_PROP, "DocSecurity", GSF_XML_CONTENT, FALSE, FALSE, NULL, &xlsx_read_prop_int, .v_str = GSF_META_NAME_SECURITY),
 GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_DIG_SIG, XL_NS_PROP, "DigSig", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
-GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HEADING_PAIRS, XL_NS_PROP, "HeadingPairs", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
-GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HLINKS, XL_NS_PROP, "HLinks", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
-GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_TITLES_OF_PARTS, XL_NS_PROP, "TitlesOfParts", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
+GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HEADING_PAIRS, XL_NS_PROP, "HeadingPairs", GSF_XML_NO_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
+GSF_XML_IN_NODE (X_PROP_HEADING_PAIRS, X_PROP_SUB_VECTOR, XL_NS_PROP_VT, "vector", GSF_XML_NO_CONTENT, NULL, NULL),
+GSF_XML_IN_NODE (X_PROP_SUB_VECTOR, X_PROP_SUB_LPWSTR, XL_NS_PROP_VT, "lpwstr", GSF_XML_NO_CONTENT, NULL, NULL),
+GSF_XML_IN_NODE (X_PROP_SUB_VECTOR, X_PROP_SUB_LPSTR, XL_NS_PROP_VT, "lpstr", GSF_XML_NO_CONTENT, NULL, NULL),
+GSF_XML_IN_NODE (X_PROP_SUB_VECTOR, X_PROP_SUB_I4, XL_NS_PROP_VT, "i4", GSF_XML_NO_CONTENT, NULL, NULL),
+GSF_XML_IN_NODE (X_PROP_SUB_VECTOR, X_PROP_SUB_VARIANT, XL_NS_PROP_VT, "variant", GSF_XML_NO_CONTENT, NULL, NULL),
+GSF_XML_IN_NODE (X_PROP_SUB_VARIANT, X_PROP_SUB_LPWSTR, XL_NS_PROP_VT, "lpwstr", GSF_XML_NO_CONTENT, NULL, NULL), /* 2nd */
+GSF_XML_IN_NODE (X_PROP_SUB_VARIANT, X_PROP_SUB_LPSTR, XL_NS_PROP_VT, "lpstr", GSF_XML_NO_CONTENT, NULL, NULL), /* 2nd */
+GSF_XML_IN_NODE (X_PROP_SUB_VARIANT, X_PROP_SUB_I4, XL_NS_PROP_VT, "i4", GSF_XML_NO_CONTENT, NULL, NULL), /* 2nd */
+GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_HLINKS, XL_NS_PROP, "HLinks", GSF_XML_NO_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
+GSF_XML_IN_NODE (X_PROP_HLINKS, X_PROP_SUB_VECTOR, XL_NS_PROP_VT, "vector", GSF_XML_NO_CONTENT, NULL, NULL), /* 2nd */
+GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_TITLES_OF_PARTS, XL_NS_PROP, "TitlesOfParts", GSF_XML_NO_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
+GSF_XML_IN_NODE (X_PROP_TITLES_OF_PARTS, X_PROP_SUB_VECTOR, XL_NS_PROP_VT, "vector", GSF_XML_NO_CONTENT, NULL, NULL), /* 2nd */
 GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_APPLICATION, XL_NS_PROP, "Application", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
 GSF_XML_IN_NODE_FULL (X_PROPS, X_PROP_APP_VERSION, XL_NS_PROP, "AppVersion", GSF_XML_CONTENT, FALSE, FALSE, NULL, NULL, .v_str = ""),
 GSF_XML_IN_NODE_END
@@ -158,12 +240,27 @@ xlsx_read_docprops_extended (XLSXReadState *state)
 }
 
 static void
+xlsx_read_docprops_custom (XLSXReadState *state)
+{
+	GsfInput *in;
+	/* optional */
+	in = gsf_open_pkg_open_rel_by_type 
+		(GSF_INPUT (state->zip),
+		 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/";
+		 "custom-properties", NULL);
+
+	if (in == NULL) return;
+	xlsx_parse_stream (state, in, xlsx_docprops_custom_dtd);
+}
+
+static void
 xlsx_read_docprops (XLSXReadState *state)
 {
 	state->metadata = gsf_doc_meta_data_new ();
 
 	xlsx_read_docprops_core (state);
 	xlsx_read_docprops_extended (state);
+	xlsx_read_docprops_custom (state);
 
 	go_doc_set_meta_data (GO_DOC (state->wb), state->metadata);
 	g_object_unref (state->metadata);
diff --git a/plugins/excel/xlsx-read.c b/plugins/excel/xlsx-read.c
index 3e25ee6..affabf2 100644
--- a/plugins/excel/xlsx-read.c
+++ b/plugins/excel/xlsx-read.c
@@ -248,7 +248,9 @@ typedef struct {
 	GObject		*comment;
 	GString		*comment_text;
 
+	/* Document Properties */
 	GsfDocMetaData   *metadata;
+	char *meta_prop_name;
 } XLSXReadState;
 typedef struct {
 	GOString	*str;
@@ -264,7 +266,7 @@ static GsfXMLInNS const xlsx_ns[] = {
 	GSF_XML_IN_NS (XL_NS_SS_DRAW,	"http://schemas.openxmlformats.org/drawingml/2006/3/spreadsheetDrawing";), /* Office 12 BETA-2 Technical Refresh */
 	GSF_XML_IN_NS (XL_NS_CHART,	"http://schemas.openxmlformats.org/drawingml/2006/3/chart";),		  /* Office 12 BETA-2 */
 	GSF_XML_IN_NS (XL_NS_CHART,	"http://schemas.openxmlformats.org/drawingml/2006/chart";),		  /* Office 12 BETA-2 Technical Refresh */
-	GSF_XML_IN_NS (XL_NS_CHART_DRAW, "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing";),
+	GSF_XML_IN_NS (XL_NS_CHART_DRAW,    "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing";),
 	GSF_XML_IN_NS (XL_NS_DRAW,	"http://schemas.openxmlformats.org/drawingml/2006/3/main";),		  /* Office 12 BETA-2 */
 	GSF_XML_IN_NS (XL_NS_DRAW,	"http://schemas.openxmlformats.org/drawingml/2006/main";),		  /* Office 12 BETA-2 Technical Refresh */
 	GSF_XML_IN_NS (XL_NS_DOC_REL,	"http://schemas.openxmlformats.org/officeDocument/2006/relationships";),
@@ -279,6 +281,7 @@ static GsfXMLInNS const xlsx_ns[] = {
 	GSF_XML_IN_NS (XL_NS_PROP_XSI,  "http://www.w3.org/2001/XMLSchema-instance";),
 	GSF_XML_IN_NS (XL_NS_PROP,      "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties";),
 	GSF_XML_IN_NS (XL_NS_PROP_VT,   "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes";),
+	GSF_XML_IN_NS (XL_NS_PROP_CUSTOM,   "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties";),
 	{ NULL }
 };
 
diff --git a/plugins/excel/xlsx-utils.h b/plugins/excel/xlsx-utils.h
index 6b1419d..c38d52b 100644
--- a/plugins/excel/xlsx-utils.h
+++ b/plugins/excel/xlsx-utils.h
@@ -43,7 +43,8 @@ enum {
 	XL_NS_PROP_DCTERMS,
 	XL_NS_PROP_XSI,
 	XL_NS_PROP,
-	XL_NS_PROP_VT
+	XL_NS_PROP_VT,
+	XL_NS_PROP_CUSTOM
 };
 
 #define XLSX_MaxCol	16384



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]