[tracker] tracker-extract: Fixed epub extractor so it doesn't use broken ontology
- From: Martyn James Russell <mr src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Fixed epub extractor so it doesn't use broken ontology
- Date: Fri, 14 Mar 2014 17:37:07 +0000 (UTC)
commit 2963354d90af92426d199486a02afe4a46c89394
Author: Martyn Russell <martyn lanedo com>
Date: Fri Mar 14 17:32:09 2014 +0000
tracker-extract: Fixed epub extractor so it doesn't use broken ontology
- The nco:nameOther ontology doesn't exist. Now we use nco:nameAdditional
- The graph was not being used for inserts.
- The roles / affiliation were not inserted properly. WE now use artist
(like other extractors)
src/tracker-extract/tracker-extract-epub.c | 75 +++++++++++++++------------
1 files changed, 42 insertions(+), 33 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c
index 667c13a..d36599a 100644
--- a/src/tracker-extract/tracker-extract-epub.c
+++ b/src/tracker-extract/tracker-extract-epub.c
@@ -46,8 +46,10 @@ typedef enum {
} OPFTagType;
typedef struct {
+ gchar *graph;
TrackerSparqlBuilder *preupdate;
TrackerSparqlBuilder *metadata;
+
OPFTagType element;
GList *pages;
guint in_metadata : 1;
@@ -61,18 +63,18 @@ typedef struct {
} OPFContentData;
static inline OPFData *
-opf_data_new (TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata)
+opf_data_new (TrackerExtractInfo *info)
{
- OPFData *data = g_new0 (OPFData, 1);
+ OPFData *data = g_slice_new0 (OPFData);
+ TrackerSparqlBuilder *builder;
- if (metadata) {
- data->metadata = g_object_ref (metadata);
- }
+ builder = tracker_extract_info_get_preupdate_builder (info);
+ data->preupdate = g_object_ref (builder);
- if (preupdate) {
- data->preupdate = g_object_ref (preupdate);
- }
+ builder = tracker_extract_info_get_metadata_builder (info);
+ data->metadata = g_object_ref (builder);
+
+ data->graph = g_strdup (tracker_extract_info_get_graph (info));
return data;
}
@@ -100,6 +102,8 @@ opf_data_free (OPFData *data)
g_list_foreach (data->pages, (GFunc) g_free, NULL);
g_list_free (data->pages);
+ g_free (data->graph);
+
if (data->metadata) {
g_object_unref (data->metadata);
}
@@ -108,7 +112,7 @@ opf_data_free (OPFData *data)
g_object_unref (data->preupdate);
}
- g_free (data);
+ g_slice_free (OPFData, data);
}
/* Methods to parse the container.xml file
@@ -373,29 +377,37 @@ opf_xml_text_handler (GMarkupParseContext *context,
}
/* Role details */
+ role_uri = tracker_sparql_escape_uri_printf ("urn:artist:%s", fullname);
+
if (data->element == OPF_TAG_TYPE_AUTHOR) {
- role_uri = tracker_sparql_escape_uri_printf ("urn:role:author");
- role_str = "Author";
+ role_str = "nco:creator";
} else if (data->element == OPF_TAG_TYPE_EDITOR) {
- role_uri = tracker_sparql_escape_uri_printf ("urn:role:editor");
- role_str = "Editor";
+ /* Should this be nco:contributor ?
+ * 'Editor' is a bit vague here.
+ */
+ role_str = "nco:publisher";
} else if (data->element == OPF_TAG_TYPE_ILLUSTRATOR) {
- role_uri = tracker_sparql_escape_uri_printf ("urn:role:illustrator");
- role_str = "Illustrator";
+ /* There is no illustrator class, using contributor */
+ role_str = "nco:contributor";
} else {
g_assert ("Unknown role");
}
if (role_uri) {
- tracker_sparql_builder_insert_silent_open (data->preupdate, NULL);
+ tracker_sparql_builder_insert_open (data->preupdate, NULL);
+ if (data->graph) {
+ tracker_sparql_builder_graph_open (data->preupdate, data->graph);
+ }
tracker_sparql_builder_subject_iri (data->preupdate, role_uri);
-
tracker_sparql_builder_predicate (data->preupdate, "a");
- tracker_sparql_builder_object (data->preupdate, "nco:Role");
- tracker_sparql_builder_predicate (data->preupdate, "nco:role");
- tracker_sparql_builder_object_unvalidated (data->preupdate, role_str);
+ tracker_sparql_builder_object (data->preupdate, "nmm:Artist");
+ tracker_sparql_builder_predicate (data->preupdate, "nmm:artistName");
+ tracker_sparql_builder_object_unvalidated (data->preupdate, fullname);
+ if (data->graph) {
+ tracker_sparql_builder_graph_close (data->preupdate);
+ }
tracker_sparql_builder_insert_close (data->preupdate);
}
@@ -420,13 +432,13 @@ opf_xml_text_handler (GMarkupParseContext *context,
}
if (oname) {
- tracker_sparql_builder_predicate (data->metadata, "nco:nameOther");
+ tracker_sparql_builder_predicate (data->metadata, "nco:nameAdditional");
tracker_sparql_builder_object_unvalidated (data->metadata, oname);
g_free (oname);
}
if (role_uri) {
- tracker_sparql_builder_predicate (data->metadata, "nco:role");
+ tracker_sparql_builder_predicate (data->metadata, role_str);
tracker_sparql_builder_object_iri (data->metadata, role_uri);
g_free (role_uri);
}
@@ -585,8 +597,7 @@ extract_opf_contents (const gchar *uri,
static gboolean
extract_opf (const gchar *uri,
const gchar *opf_path,
- TrackerSparqlBuilder *preupdate,
- TrackerSparqlBuilder *metadata)
+ TrackerExtractInfo *info)
{
GMarkupParseContext *context;
OPFData *data = NULL;
@@ -601,10 +612,10 @@ extract_opf (const gchar *uri,
g_debug ("Extracting OPF file contents from EPUB '%s'", uri);
- data = opf_data_new (preupdate, metadata);
+ data = opf_data_new (info);
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nfo:TextDocument");
+ tracker_sparql_builder_predicate (data->metadata, "a");
+ tracker_sparql_builder_object (data->metadata, "nfo:TextDocument");
/* Create parsing context */
context = g_markup_parse_context_new (&opf_parser, 0, data, NULL);
@@ -628,8 +639,8 @@ extract_opf (const gchar *uri,
g_free (dirname);
if (contents && *contents) {
- tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
- tracker_sparql_builder_object_unvalidated (metadata, contents);
+ tracker_sparql_builder_predicate (data->metadata, "nie:plainTextContent");
+ tracker_sparql_builder_object_unvalidated (data->metadata, contents);
}
opf_data_free (data);
@@ -654,9 +665,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
return FALSE;
}
- extract_opf (uri, opf_path,
- tracker_extract_info_get_preupdate_builder (info),
- tracker_extract_info_get_metadata_builder (info));
+ extract_opf (uri, opf_path, info);
g_free (opf_path);
g_free (uri);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]