[tracker] rss: Add support for enclosures
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] rss: Add support for enclosures
- Date: Fri, 28 Jul 2017 11:45:33 +0000 (UTC)
commit deb5f6cf46e2cdbd8493e3bf12c19b13ed492fca
Author: Lucas Satabin <lucas satabin gnieh org>
Date: Tue Jul 25 19:18:26 2017 +0200
rss: Add support for enclosures
Some feed (e.g. podcasts) use the `enclosure` element of the RSS feed
format. The ontology supports these element throught the
`mfo:enclosureList` relationship and `mfo:Enclosure` class.
Support for enclosures is simply added by looping over such elements if
any, and adding the relationships upon feed message insertion or update.
Enclosures are deleted when the `mfo:FeedMessage` that links to it is
deleted.
Also fixes a problem of double free when contributor list is not empty.
https://bugzilla.gnome.org/show_bug.cgi?id=785454
src/miners/rss/tracker-miner-rss.c | 82 +++++++++++++++++++++++++++++++++---
1 files changed, 76 insertions(+), 6 deletions(-)
---
diff --git a/src/miners/rss/tracker-miner-rss.c b/src/miners/rss/tracker-miner-rss.c
index daa09cb..3d87052 100644
--- a/src/miners/rss/tracker-miner-rss.c
+++ b/src/miners/rss/tracker-miner-rss.c
@@ -303,8 +303,10 @@ static void
delete_unbound_messages (TrackerMinerRSS *miner)
{
tracker_sparql_connection_update_async (tracker_miner_get_connection (TRACKER_MINER (miner)),
- "DELETE { ?msg a rdfs:Resource }"
- "WHERE { ?msg a mfo:FeedMessage ."
+ "DELETE { ?msg a rdfs:Resource."
+ " ?encl a rdfs:Resource. }"
+ "WHERE { ?msg a mfo:FeedMessage ;"
+ " mfo:enclosureList ?encl ."
" FILTER(!BOUND(nmo:communicationChannel(?msg)))"
"}",
G_PRIORITY_DEFAULT,
@@ -720,6 +722,37 @@ sparql_add_contact (TrackerSparqlBuilder *sparql,
}
}
+static void
+sparql_add_enclosure (TrackerSparqlBuilder *sparql,
+ const gchar *subject,
+ GrssFeedEnclosure *enclosure)
+{
+ gchar *url;
+ gint length;
+ gchar * format;
+
+ url = grss_feed_enclosure_get_url (enclosure);
+ length = grss_feed_enclosure_get_length (enclosure);
+ format = grss_feed_enclosure_get_format (enclosure);
+
+ tracker_sparql_builder_subject (sparql, subject);
+ tracker_sparql_builder_predicate (sparql, "a");
+ tracker_sparql_builder_object (sparql, "mfo:Enclosure");
+ tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
+
+ tracker_sparql_builder_predicate (sparql, "mfo:remoteLink");
+ tracker_sparql_builder_object_unvalidated (sparql, url);
+
+ tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
+ tracker_sparql_builder_object_int64 (sparql, length);
+
+ if (format != NULL) {
+ tracker_sparql_builder_predicate (sparql, "nie:mimeType");
+ tracker_sparql_builder_object_unvalidated (sparql, format);
+ }
+
+}
+
static gchar *
feed_message_create_update_channel_query (const gchar *item_urn,
GrssFeedItem *item)
@@ -761,10 +794,14 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
GrssFeedChannel *channel;
gboolean has_geolocation;
const GList *contributors;
+ const GList *enclosures;
const GList *list, *l;
GList *contrib_aliases = NULL;
+ GList *enclosure_aliases = NULL;
GHashTable *websites;
+ GHashTable *enclosure_urls;
gboolean is_iri = FALSE;
+ gint i = 0;
if (!item_urn) {
item_urn = "_:message";
@@ -784,6 +821,7 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
author = grss_feed_item_get_author (item);
contributors = grss_feed_item_get_contributors (item);
channel = grss_feed_item_get_parent (item);
+ enclosures = grss_feed_item_get_enclosures (item);
for (l = contributors; l; l = l->next) {
const gchar *person_url;
@@ -840,9 +878,9 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
(tmp_string && tmp_string[0] != '_'));
}
+ i = 0;
for (l = contributors; l; l = l->next) {
gchar *subject;
- gint i = 0;
g_debug (" Contributor:'%s'", grss_person_get_name (l->data));
@@ -856,7 +894,33 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
sparql_add_contact (sparql, subject, l->data, tmp_string,
(tmp_string && tmp_string[0] != '_'));
- g_free (subject);
+
+ /* subject is not freed right now as it will be freed at the end of the
+ * method in a foreach loop over the contrib_aliases list */
+ }
+
+ enclosure_urls = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify) g_free,
+ (GDestroyNotify) g_free);
+ i = 0;
+ for (l = enclosures; l; l = l->next) {
+ gchar *subject;
+ gchar *url;
+
+ url = grss_feed_enclosure_get_url (l->data);
+
+ if (!g_hash_table_contains (enclosure_urls, url)) {
+ g_debug (" Enclosure:'%s'", url);
+ subject = g_strdup_printf ("_:enclosure%d", i++);
+ enclosure_aliases = g_list_prepend (enclosure_aliases, subject);
+ g_hash_table_insert (enclosure_urls, g_strdup (url), NULL);
+
+ sparql_add_enclosure (sparql, subject, l->data);
+
+ }
+
+ /* subject is not freed right now as it will be freed at the end of the
+ * method in a foreach loop over the enclosure_aliases list */
}
if (is_iri) {
@@ -892,6 +956,11 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
tracker_sparql_builder_object (sparql, l->data);
}
+ for (l = enclosure_aliases; l; l = l->next) {
+ tracker_sparql_builder_predicate (sparql, "mfo:enclosureList");
+ tracker_sparql_builder_object (sparql, l->data);
+ }
+
tmp_string = grss_feed_item_get_description (item);
if (tmp_string != NULL) {
gchar *plain_text;
@@ -946,9 +1015,10 @@ feed_message_create_insert_builder (TrackerMinerRSS *miner,
tracker_sparql_builder_insert_close (sparql);
- g_list_foreach (contrib_aliases, (GFunc) g_free, NULL);
- g_list_free (contrib_aliases);
+ g_list_free_full (contrib_aliases, g_free);
+ g_list_free_full (enclosure_aliases, g_free);
g_hash_table_destroy (websites);
+ g_hash_table_destroy (enclosure_urls);
return sparql;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]