[tracker/rss-enclosures] tracker-miner-rss: Added support for RSS enclosures and FeedMessages auto-expiry



commit 7dc3c61e5755f9cbae78cee7595dee5360a6ad03
Author: Roberto Guido <bob4mail gmail com>
Date:   Mon Mar 29 21:58:30 2010 +0200

    tracker-miner-rss: Added support for RSS enclosures and FeedMessages auto-expiry
    
    Signed-off-by: Michele Tameni <michele tameni it>

 src/miners/rss/Makefile.am           |    6 +-
 src/miners/rss/tracker-main.c        |    2 +
 src/miners/rss/tracker-miner-rss.c   |  355 ++++++++++++++++++++++++++++------
 src/miners/rss/wrap-feed-channel.c   |  233 ++++++++++++++++++++++
 src/miners/rss/wrap-feed-channel.h   |   58 ++++++
 src/miners/rss/wrap-feed-enclosure.c |  283 +++++++++++++++++++++++++++
 src/miners/rss/wrap-feed-enclosure.h |   49 +++++
 7 files changed, 927 insertions(+), 59 deletions(-)
---
diff --git a/src/miners/rss/Makefile.am b/src/miners/rss/Makefile.am
index 316b178..89185c9 100644
--- a/src/miners/rss/Makefile.am
+++ b/src/miners/rss/Makefile.am
@@ -21,7 +21,11 @@ libexec_PROGRAMS = tracker-miner-rss
 tracker_miner_rss_SOURCES =						\
 	tracker-main.c							\
 	tracker-miner-rss.h						\
-	tracker-miner-rss.c
+	tracker-miner-rss.c                                             \
+	wrap-feed-channel.h                                             \
+	wrap-feed-channel.c                                             \
+	wrap-feed-enclosure.h                                           \
+	wrap-feed-enclosure.c
 
 tracker_miner_rss_LDADD =						\
 	$(top_builddir)/src/libtracker-client/libtracker-client- TRACKER_API_VERSION@.la \
diff --git a/src/miners/rss/tracker-main.c b/src/miners/rss/tracker-main.c
index 8411aba..98f2c51 100644
--- a/src/miners/rss/tracker-main.c
+++ b/src/miners/rss/tracker-main.c
@@ -50,6 +50,7 @@ main (int argc, char **argv)
 
 	g_type_init ();
 	g_thread_init (NULL);
+	dbus_g_thread_init ();
 
 	setlocale (LC_ALL, "");
 
@@ -74,6 +75,7 @@ main (int argc, char **argv)
 	tracker_miner_start (TRACKER_MINER (miner));
 
 	loop = g_main_loop_new (NULL, FALSE);
+	g_print ("Running.\n");
 	g_main_loop_run (loop);
 
 	tracker_log_shutdown ();
diff --git a/src/miners/rss/tracker-miner-rss.c b/src/miners/rss/tracker-miner-rss.c
index d4ef515..b106722 100644
--- a/src/miners/rss/tracker-miner-rss.c
+++ b/src/miners/rss/tracker-miner-rss.c
@@ -29,6 +29,8 @@
 #include <glib/gi18n.h>
 
 #include "tracker-miner-rss.h"
+#include "wrap-feed-channel.h"
+#include "wrap-feed-enclosure.h"
 
 #define TRACKER_MINER_RSS_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_MINER_RSS, TrackerMinerRSSPrivate))
 
@@ -128,7 +130,7 @@ tracker_miner_rss_init (TrackerMinerRSS *object)
 	GError *error = NULL;
 	TrackerMinerRSSPrivate *priv;
 
-	g_message ("Initializing...");
+	g_print ("Initializing...\n");
 
 	connection = dbus_g_bus_get (DBUS_BUS_SESSION, &error);
 
@@ -149,7 +151,7 @@ tracker_miner_rss_init (TrackerMinerRSS *object)
 	/*                                   "org.freedesktop.Tracker1.Resources.Class"); */
 
 	if (!proxy) {
-		g_message ("Could not create DBusGProxy for interface:'%s'",
+		g_message ("Could not create DBusGProxy for interface: '%s'",
 		           TRACKER_DBUS_INTERFACE_FEED);
 		return;
 	}
@@ -191,7 +193,7 @@ verify_channel_update (GObject      *source,
 
 static void
 update_updated_interval (TrackerMinerRSS *miner,
-                         gchar           *uri,
+                         const gchar     *uri,
                          time_t          *now)
 {
 	TrackerSparqlBuilder *sparql;
@@ -229,6 +231,137 @@ update_updated_interval (TrackerMinerRSS *miner,
 }
 
 static void
+enclosure_downloaded_cb (SoupSession *session,
+                         SoupMessage *msg,
+                         gpointer     user_data)
+{
+	int status;
+	WrapFeedEnclosure *enclosure;
+
+	enclosure = user_data;
+	g_object_get (msg, "status-code", &status, NULL);
+
+	if (status < 200 || status > 299) {
+		g_warning ("Unable to download enclosure.");
+	}
+	else {
+		wrap_feed_enclosure_save_data (enclosure,
+					       g_memdup (msg->response_body->data, msg->response_body->length),
+					       msg->response_body->length);
+	}
+
+	g_object_unref (enclosure);
+}
+
+static void
+download_enclosure_now (TrackerMinerRSS *miner, FeedEnclosure *enclosure, WrapFeedChannel *channel)
+{
+	const gchar *url;
+	SoupMessage *msg;
+	TrackerMinerRSSPrivate *priv;
+	WrapFeedEnclosure *wrap_enc;
+
+	priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+
+	wrap_enc = wrap_feed_enclosure_new (enclosure, channel);
+	url = feed_enclosure_get_url (enclosure);
+	msg = soup_message_new ("GET", url);
+
+	g_message ("Downloading enclosures in %s", url);
+
+	priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+	soup_session_queue_message (feeds_pool_get_session (priv->pool), msg,
+				    enclosure_downloaded_cb, wrap_enc);
+}
+
+static void
+manage_enclosure (TrackerMinerRSS *miner,
+                  WrapFeedChannel *feed,
+                  FeedEnclosure   *enclosure)
+{
+	int size;
+	const gchar *path;
+
+	if (wrap_feed_channel_get_download_enclosures (feed) == FALSE)
+		return;
+
+	size = wrap_feed_channel_get_enclosures_maxsize (feed);
+	if (size > 0 && (feed_enclosure_get_length (enclosure) * 1024) > size)
+		return;
+
+	path = wrap_feed_channel_get_enclosures_saving_path (feed);
+	if (path == NULL) {
+		/* TODO Provide a fallback? */
+		g_warning ("No saving folder set for enclosures.");
+		return;
+	}
+
+	download_enclosure_now (miner, enclosure, feed);
+}
+
+static int
+queue_enclosures (TrackerMinerRSS      *miner,
+                  WrapFeedChannel      *channel,
+                  FeedItem             *item,
+                  TrackerSparqlBuilder *sparql)
+{
+	int num;
+	const gchar *tmp_string;
+	gchar *subject;
+	gchar *rsubject;
+	const GList *iter;
+	FeedEnclosure *enc;
+
+	for (iter = feed_item_get_enclosures (item), num = 0; iter; iter = iter->next) {
+		enc = iter->data;
+
+		tmp_string = feed_enclosure_get_url (enc);
+		if (tmp_string == NULL) {
+			g_message ("Enclosure without URL, skipping");
+			continue;
+		}
+
+		rsubject = g_strdup_printf ("_:enclosurefile%d", num);
+		subject = g_strdup_printf ("_:enclosure%d", num);
+
+		tracker_sparql_builder_insert_open (sparql, rsubject);
+
+		tracker_sparql_builder_subject (sparql, rsubject);
+		tracker_sparql_builder_predicate (sparql, "a");
+		tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
+		tracker_sparql_builder_predicate (sparql, "a");
+		tracker_sparql_builder_object (sparql, "nie:InformationElement");
+
+		tracker_sparql_builder_predicate (sparql, "nie:url");
+		tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
+
+		tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
+		tracker_sparql_builder_object_int64 (sparql, (gint64) feed_enclosure_get_length (enc));
+
+		tmp_string = feed_enclosure_get_format (enc);
+		if (tmp_string != NULL) {
+			tracker_sparql_builder_predicate (sparql, "nie:mimeType");
+			tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
+		}
+
+		tracker_sparql_builder_subject (sparql, subject);
+		tracker_sparql_builder_predicate (sparql, "a");
+		tracker_sparql_builder_object (sparql, "mfo:Enclosure");
+
+		tracker_sparql_builder_predicate (sparql, "mfo:remoteLink");
+		tracker_sparql_builder_object (sparql, rsubject);
+
+		g_free (rsubject);
+		g_free (subject);
+
+		manage_enclosure (miner, channel, enc);
+		num++;
+	}
+
+	return num;
+}
+
+static void
 change_status (FeedsPool   *pool,
                FeedChannel *feed,
                gpointer     user_data)
@@ -247,7 +380,7 @@ change_status (FeedsPool   *pool,
 	if (priv->now_fetching > avail)
 		priv->now_fetching = avail;
 
-	g_message ("Fetching channel '%s' (in progress: %d/%d)", 
+	g_message ("Fetching channel '%s' (in progress: %d/%d)",
 	           feed_channel_get_source (feed),
 	           priv->now_fetching,
 	           avail);
@@ -262,14 +395,19 @@ verify_item_insertion (GObject      *source,
                        gpointer      user_data)
 {
 	GError *error;
+	FeedItem *item;
 
-	error = NULL;
+	item = user_data;
 
+	error = NULL;
 	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
 	if (error != NULL) {
 		g_critical ("Could not insert feed information, %s", error->message);
 		g_error_free (error);
 	}
+
+	g_object_unref (item);
 }
 
 static void
@@ -277,22 +415,26 @@ item_verify_reply_cb (GObject      *source_object,
                       GAsyncResult *res,
                       gpointer      user_data)
 {
+	int i;
+	int enclosures_num;
+	gboolean has_geopoint;
 	time_t t;
-	gchar *uri;
-	gchar **values;
+	gchar *enclosure_ref;
+	const gchar *uri;
 	const gchar *url;
+	const gchar *tmp_string;
 	gdouble latitude;
 	gdouble longitude;
-	const gchar *tmp_string;
-	const GPtrArray *response;
 	GError *error;
 	TrackerSparqlBuilder *sparql;
 	FeedItem *item;
 	FeedChannel *feed;
 	TrackerMinerRSS *miner;
-	gboolean has_geopoint;
+	TrackerResultIterator *response;
 
 	miner = TRACKER_MINER_RSS (source_object);
+	item = user_data;
+
 	error = NULL;
 	response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
 	                                                res,
@@ -300,31 +442,43 @@ item_verify_reply_cb (GObject      *source_object,
 
 	if (error != NULL) {
 		g_message ("Could not verify feed existance, %s", error->message);
+		g_object_unref (item);
 		g_error_free (error);
 		return;
 	}
 
-	values = g_ptr_array_index (response, 0);
-	if (g_strcmp0 (values[0], "1") == 0) {
+	tracker_result_iterator_next (response);
+	tmp_string = tracker_result_iterator_value (response, 0);
+	if (g_strcmp0 (tmp_string, "1") == 0) {
+		g_object_unref (item);
 		return;
 	}
 
-	item = user_data;
+	feed = feed_item_get_parent (item);
 
 	url = get_message_url (item);
 
 	g_message ("Updating feed information for '%s'", url);
 
 	sparql = tracker_sparql_builder_new_update ();
+	enclosures_num = queue_enclosures (miner, WRAP_FEED_CHANNEL (feed), item, sparql);
 
 	has_geopoint = feed_item_get_geo_point (item, &latitude, &longitude);
-	tracker_sparql_builder_insert_open (sparql, url);
 
-	if (has_geopoint) {
-		g_message ("  Geopoint, using longitude:%f, latitude:%f", 
+	tracker_sparql_builder_insert_open (sparql, "_:message");
+	tracker_sparql_builder_subject (sparql, "_:message");
+	tracker_sparql_builder_predicate (sparql, "a");
+	tracker_sparql_builder_object (sparql, "mfo:FeedMessage");
+	tracker_sparql_builder_predicate (sparql, "a");
+	tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
+
+	if (has_geopoint == TRUE) {
+		g_message ("  Geopoint, using longitude:%f, latitude:%f",
 		           longitude, latitude);
 
-		tracker_sparql_builder_subject (sparql, "_:location");
+		tracker_sparql_builder_predicate (sparql, "mlo:location");
+
+		tracker_sparql_builder_object_blank_open (sparql);
 		tracker_sparql_builder_predicate (sparql, "a");
 		tracker_sparql_builder_object (sparql, "mlo:GeoLocation");
 		tracker_sparql_builder_predicate (sparql, "mlo:asGeoPoint");
@@ -337,17 +491,8 @@ item_verify_reply_cb (GObject      *source_object,
 		tracker_sparql_builder_predicate (sparql, "mlo:longitude");
 		tracker_sparql_builder_object_double (sparql, longitude);
 		tracker_sparql_builder_object_blank_close (sparql);
-	}
 
-	tracker_sparql_builder_subject (sparql, "_:message");
-	tracker_sparql_builder_predicate (sparql, "a");
-	tracker_sparql_builder_object (sparql, "mfo:FeedMessage");
-	tracker_sparql_builder_predicate (sparql, "a");
-	tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
-
-	if (has_geopoint == TRUE) {
-		tracker_sparql_builder_predicate (sparql, "mlo:location");
-		tracker_sparql_builder_object (sparql, "_:location");
+		tracker_sparql_builder_object_blank_close (sparql);
 	}
 
 	tmp_string = feed_item_get_title (item);
@@ -387,41 +532,56 @@ item_verify_reply_cb (GObject      *source_object,
 	tracker_sparql_builder_predicate (sparql, "nmo:isRead");
 	tracker_sparql_builder_object_boolean (sparql, FALSE);
 
-	feed = feed_item_get_parent (item);
-	uri = g_object_get_data (G_OBJECT (feed), "subject");
+	uri = wrap_feed_channel_get_subject (WRAP_FEED_CHANNEL (feed));
 	tracker_sparql_builder_predicate (sparql, "nmo:communicationChannel");
 	tracker_sparql_builder_object_iri (sparql, uri);
 
+	for (i = 0; i < enclosures_num; i++) {
+		tracker_sparql_builder_predicate (sparql, "mfo:enclosureList");
+		enclosure_ref = g_strdup_printf ("_:enclosure%d", i);
+		tracker_sparql_builder_object (sparql, enclosure_ref);
+		g_free (enclosure_ref);
+	}
+
 	tracker_sparql_builder_insert_close (sparql);
 
 	tracker_miner_execute_update (TRACKER_MINER (miner),
 	                              tracker_sparql_builder_get_result (sparql),
 	                              NULL,
 	                              verify_item_insertion,
-	                              NULL);
+	                              item);
 
 	g_object_unref (sparql);
 }
 
 static void
 check_if_save (TrackerMinerRSS *miner,
-               FeedItem        *item)
+               FeedItem        *item,
+               FeedChannel     *feed)
 {
-	FeedChannel *feed;
+	WrapFeedChannel *wfeed;
 	gchar *query;
-	gchar *communication_channel;
 	const gchar *url;
 
 	url = get_message_url (item);
-	feed = feed_item_get_parent (item);
-	communication_channel = g_object_get_data (G_OBJECT (feed), "subject");
+	wfeed = WRAP_FEED_CHANNEL (feed);
+
+	/*
+		TODO	Sort of "cache" of already downloaded items can be
+			saved into the WrapFeedChannel, to avoid ask Tracker
+			every time. Pay attention to the fact some feed
+			(particulary for podcasts) use always the same url
+			for their items, so a check also on enclosures urls
+			is required
+	*/
 
 	g_debug ("Verifying feed '%s' is stored", url);
+	g_object_ref (item);
 
 	query = g_strdup_printf ("ASK { ?message a mfo:FeedMessage; "
 	                         "nie:url \"%s\"; nmo:communicationChannel <%s> }",
 	                         url,
-	                         communication_channel);
+	                         wrap_feed_channel_get_subject (wfeed));
 
 	tracker_miner_execute_sparql (TRACKER_MINER (miner),
 	                              query,
@@ -432,12 +592,67 @@ check_if_save (TrackerMinerRSS *miner,
 }
 
 static void
+mandatory_enclosures_collected (GObject      *source_object,
+                                GAsyncResult *res,
+                                gpointer      user_data)
+{
+	const gchar *url;
+	GError *error;
+	TrackerResultIterator *response;
+	FeedEnclosure *enclosure;
+	WrapFeedChannel *feed;
+
+	error = NULL;
+	response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
+	                                                res,
+	                                                &error);
+
+	if (error != NULL) {
+		g_message ("Could not verify mandatory enclosures, %s", error->message);
+		g_error_free (error);
+		return;
+	}
+
+	feed = user_data;
+
+	while (tracker_result_iterator_next (response)) {
+		url = tracker_result_iterator_value (response, 0);
+		if (url == NULL)
+			continue;
+
+		enclosure = feed_enclosure_new ((gchar*) url);
+		download_enclosure_now (TRACKER_MINER_RSS (source_object), enclosure, feed);
+	}
+}
+
+static void
+check_mandatory_enclosures (TrackerMinerRSS *miner,
+                            WrapFeedChannel *feed)
+{
+	gchar *query;
+
+	query = g_strdup_printf ("SELECT ?u "
+	                         "WHERE { ?e a mfo:Enclosure . ?e mfo:optional false . "
+	                         "?i mfo:enclosureList ?e . ?i nmo:communicationChannel <%s> . "
+	                         "?e mfo:remoteLink ?r . ?r nie:url ?u }",
+				 wrap_feed_channel_get_subject (feed));
+
+	tracker_miner_execute_sparql (TRACKER_MINER (miner),
+				      query,
+				      NULL,
+				      mandatory_enclosures_collected,
+				      feed);
+
+	g_free (query);
+}
+
+static void
 feed_fetched (FeedsPool   *pool,
               FeedChannel *feed,
               GList       *items,
               gpointer     user_data)
 {
-	gchar *uri;
+	const gchar *uri;
 	time_t now;
 	GList *iter;
 	FeedItem *item;
@@ -457,13 +672,16 @@ feed_fetched (FeedsPool   *pool,
 		return;
 
 	now = time (NULL);
-	uri = g_object_get_data (G_OBJECT (feed), "subject");
+	uri = wrap_feed_channel_get_subject (WRAP_FEED_CHANNEL (feed));
 	update_updated_interval (miner, uri, &now);
 
 	for (iter = items; iter; iter = iter->next) {
 		item = iter->data;
-		check_if_save (miner, item);
+		check_if_save (miner, item, feed);
 	}
+
+	if (wrap_feed_channel_get_download_enclosures (WRAP_FEED_CHANNEL (feed)) == FALSE)
+		check_mandatory_enclosures (miner, WRAP_FEED_CHANNEL (feed));
 }
 
 static void
@@ -472,13 +690,13 @@ feeds_retrieve_cb (GObject      *source_object,
                    gpointer      user_data)
 {
 	gint interval;
-	guint i;
-	gchar **values;
+	int int_value;
+	const gchar *str;
 	GList *channels;
-	const GPtrArray *response;
 	GError *error;
 	TrackerMinerRSSPrivate *priv;
-	FeedChannel *chan;
+	TrackerResultIterator *response;
+	WrapFeedChannel *chan;
 
 	error = NULL;
 	response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
@@ -493,26 +711,43 @@ feeds_retrieve_cb (GObject      *source_object,
 
 	channels = NULL;
 
-	g_message ("Found %d feeds", response->len);
-
-	for (i = 0; i < response->len; i++) {
-		values = g_ptr_array_index (response, i);
-
-		chan = feed_channel_new ();
-		g_object_set_data_full (G_OBJECT (chan),
-		                        "subject",
-		                        g_strdup (values [2]),
-		                        g_free);
-		feed_channel_set_source (chan, values [0]);
+	while (tracker_result_iterator_next (response)) {
+		str = tracker_result_iterator_value (response, 2);
+		chan = wrap_feed_channel_new (TRACKER_MINER_RSS (source_object), (gchar*) str);
+		str = tracker_result_iterator_value (response, 0);
+		feed_channel_set_source (FEED_CHANNEL (chan), (gchar*) str);
 
 		/* TODO How to manage feeds with an update mfo:updateInterval == 0 ?
 		 * Here the interval is forced to be at least 1 minute, but perhaps those
 		 * elements are to be considered "disabled"
 		 */
-		interval = strtoull (values [1], NULL, 10);
+		str = tracker_result_iterator_value (response, 1);
+		interval = strtoull (str, NULL, 10);
 		if (interval <= 0)
 			interval = 1;
-		feed_channel_set_update_interval (chan, interval);
+		feed_channel_set_update_interval (FEED_CHANNEL (chan), interval);
+
+		str = tracker_result_iterator_value (response, 3);
+		if (str != NULL && strcmp (str, "") != 0) {
+			int_value = strtoull (str, NULL, 10);
+			if (int_value > 0)
+				wrap_feed_channel_set_feeds_expiry (chan, int_value);
+		}
+
+		str = tracker_result_iterator_value (response, 4);
+		if (str != NULL && strcmp (str, "") != 0)
+			wrap_feed_channel_set_download_enclosures (chan, strcmp (str, "true") == 0);
+
+		str = tracker_result_iterator_value (response, 5);
+		if (str != NULL && strcmp (str, "") != 0)
+			wrap_feed_channel_set_enclosures_saving_path (chan, (gchar*) str);
+
+		str = tracker_result_iterator_value (response, 6);
+		if (str != NULL && strcmp (str, "") != 0) {
+			int_value = strtoull (str, NULL, 10);
+			if (int_value > 0)
+				wrap_feed_channel_set_enclosures_maxsize (chan, int_value);
+		}
 
 		channels = g_list_prepend (channels, chan);
 	}
@@ -528,11 +763,15 @@ retrieve_and_schedule_feeds (TrackerMinerRSS *miner)
 
 	g_message ("Retrieving and scheduling feeds...");
 
-	sparql = "SELECT ?chanUrl ?interval ?chanUrn WHERE "
+	sparql = "SELECT ?chanUrl ?interval ?chanUrn ?expiry ?download ?path ?msize WHERE "
 	         "{ ?chanUrn a mfo:FeedChannel . "
 	         "?chanUrn mfo:feedSettings ?settings . "
 	         "?chanUrn nie:url ?chanUrl . "
-	         "?settings mfo:updateInterval ?interval }";
+	         "OPTIONAL { ?settings mfo:updateInterval ?interval } . "
+	         "OPTIONAL { ?settings mfo:expiryInterval ?expiry } . "
+	         "OPTIONAL { ?settings mfo:downloadFlag ?download } . "
+	         "OPTIONAL { ?settings mfo:downloadPath ?path } . "
+	         "OPTIONAL { ?settings mfo:maxSize ?msize } }";
 
 	tracker_miner_execute_sparql (TRACKER_MINER (miner),
 	                              sparql,
diff --git a/src/miners/rss/wrap-feed-channel.c b/src/miners/rss/wrap-feed-channel.c
new file mode 100644
index 0000000..c1497b0
--- /dev/null
+++ b/src/miners/rss/wrap-feed-channel.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2010, Roberto Guido <madbob users barberaware org>
+ *                     Michele Tameni <michele amdplanet it>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "wrap-feed-channel.h"
+#include "tracker-miner-rss.h"
+
+#define GET_PRIV(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), WRAP_FEED_CHANNEL_TYPE, WrapFeedChannelPrivate))
+
+typedef struct _WrapFeedChannelPrivate	WrapFeedChannelPrivate;
+
+struct _WrapFeedChannelPrivate {
+	TrackerMinerRSS	*miner;
+
+	gchar *subject;
+
+	GList *saved_items;
+
+	gint items_expiry_interval;
+	guint expiration_handler;
+
+	gboolean download_enclosures;
+	gint enclosures_maxsize;
+	gchar *enclosures_saving_path;
+};
+
+G_DEFINE_TYPE (WrapFeedChannel, wrap_feed_channel, FEED_CHANNEL_TYPE);
+
+static gboolean
+check_expired_items_cb (gpointer data)
+{
+	gchar *query;
+	gchar time_ago_str [100];
+	time_t time_ago_t;
+	struct tm time_ago_tm;
+	WrapFeedChannel *node;
+	WrapFeedChannelPrivate *priv;
+
+	node = data;
+	priv = GET_PRIV (node);
+
+	time_ago_t = time (NULL) - (priv->items_expiry_interval * 60);
+	localtime_r (&time_ago_t, &time_ago_tm);
+	strftime (time_ago_str, 100, "%Y-%m-%dT%H:%M:%SZ", &time_ago_tm);
+
+	query = g_strdup_printf ("DELETE {?i a rdfs:Resource} WHERE {?i nmo:communicationChannel <%s> . ?i mfo:downloadedTime ?t FILTER (?t < \"%s\")}",
+	                         priv->subject, time_ago_str);
+
+	tracker_miner_execute_update (TRACKER_MINER (priv->miner), query, NULL, NULL, NULL);
+	g_free (query);
+
+	return TRUE;
+}
+
+static void
+review_expiration_timer (WrapFeedChannel *node)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (node);
+
+	if (priv->expiration_handler != 0)
+		g_source_remove (priv->expiration_handler);
+
+	if (priv->items_expiry_interval == 0)
+		return;
+
+	check_expired_items_cb (node);
+	priv->expiration_handler = g_timeout_add_seconds (priv->items_expiry_interval * 60,
+							  check_expired_items_cb, node);
+}
+
+static void
+wrap_feed_channel_finalize (GObject *obj)
+{
+	GList *iter;
+	WrapFeedChannel *chan;
+	WrapFeedChannelPrivate *priv;
+
+	chan = WRAP_FEED_CHANNEL (obj);
+	priv = GET_PRIV (chan);
+
+	if (priv->subject != NULL)
+		g_free (priv->subject);
+
+	if (priv->enclosures_saving_path != NULL)
+		g_free (priv->enclosures_saving_path);
+
+	if (priv->saved_items != NULL) {
+		for (iter = priv->saved_items; iter; iter = iter->next)
+			g_free (iter->data);
+		g_list_free (priv->saved_items);
+	}
+}
+
+static void
+wrap_feed_channel_class_init (WrapFeedChannelClass *klass)
+{
+	GObjectClass *gobject_class;
+
+	gobject_class = G_OBJECT_CLASS (klass);
+	gobject_class->finalize = wrap_feed_channel_finalize;
+
+	g_type_class_add_private (klass, sizeof (WrapFeedChannelPrivate));
+}
+
+static void
+wrap_feed_channel_init (WrapFeedChannel *node)
+{
+}
+
+WrapFeedChannel*
+wrap_feed_channel_new (TrackerMinerRSS *miner,
+                       gchar           *subject)
+{
+	WrapFeedChannel *ret;
+	WrapFeedChannelPrivate *priv;
+
+	ret = g_object_new (WRAP_FEED_CHANNEL_TYPE, NULL);
+
+	priv = GET_PRIV (ret);
+	priv->miner = miner;
+	priv->subject = g_strdup (subject);
+	return ret;
+}
+
+TrackerMinerRSS*
+wrap_feed_channel_get_referring_miner (WrapFeedChannel *feed)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	return priv->miner;
+}
+
+const gchar*
+wrap_feed_channel_get_subject (WrapFeedChannel *feed)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	return (const gchar*) priv->subject;
+}
+
+void
+wrap_feed_channel_set_feeds_expiry (WrapFeedChannel *feed,
+                                    int              minutes)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+
+	if (priv->items_expiry_interval != minutes) {
+		priv->items_expiry_interval = minutes;
+		review_expiration_timer (feed);
+	}
+}
+
+void
+wrap_feed_channel_set_download_enclosures (WrapFeedChannel *feed,
+                                           gboolean         download)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	priv->download_enclosures = download;
+}
+
+gboolean
+wrap_feed_channel_get_download_enclosures (WrapFeedChannel *feed)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	return priv->download_enclosures;
+}
+
+void
+wrap_feed_channel_set_enclosures_maxsize (WrapFeedChannel *feed,
+                                          int              kb)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	priv->enclosures_maxsize = kb;
+}
+
+int
+wrap_feed_channel_get_enclosures_maxsize (WrapFeedChannel *feed)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	return priv->enclosures_maxsize;
+}
+
+void
+wrap_feed_channel_set_enclosures_saving_path (WrapFeedChannel *feed,
+                                              gchar           *path)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+
+	if (priv->enclosures_saving_path != NULL)
+		g_free (priv->enclosures_saving_path);
+	priv->enclosures_saving_path = g_strdup (path);
+}
+
+const gchar*
+wrap_feed_channel_get_enclosures_saving_path (WrapFeedChannel *feed)
+{
+	WrapFeedChannelPrivate *priv;
+
+	priv = GET_PRIV (feed);
+	return (const gchar*) priv->enclosures_saving_path;
+}
diff --git a/src/miners/rss/wrap-feed-channel.h b/src/miners/rss/wrap-feed-channel.h
new file mode 100644
index 0000000..34b291e
--- /dev/null
+++ b/src/miners/rss/wrap-feed-channel.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010, Roberto Guido <madbob users barberaware org>
+ *                     Michele Tameni <michele amdplanet it>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __WRAP_FEED_CHANNEL_H__
+#define __WRAP_FEED_CHANNEL_H__
+
+#include <libgrss.h>
+#include "tracker-miner-rss.h"
+
+#define WRAP_FEED_CHANNEL_TYPE         (wrap_feed_channel_get_type())
+#define WRAP_FEED_CHANNEL(o)           (G_TYPE_CHECK_INSTANCE_CAST ((o), WRAP_FEED_CHANNEL_TYPE, WrapFeedChannel))
+#define WRAP_FEED_CHANNEL_CLASS(c)     (G_TYPE_CHECK_CLASS_CAST ((c), WRAP_FEED_CHANNEL_TYPE, WrapFeedChannelClass))
+#define IS_WRAP_FEED_CHANNEL(o)        (G_TYPE_CHECK_INSTANCE_TYPE ((o), WRAP_FEED_CHANNEL_TYPE))
+#define IS_WRAP_FEED_CHANNEL_CLASS(c)  (G_TYPE_CHECK_CLASS_TYPE ((c),  WRAP_FEED_CHANNEL_TYPE))
+#define WRAP_FEED_CHANNEL_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), WRAP_FEED_CHANNEL_TYPE, WrapFeedChannelClass))
+
+typedef struct _WrapFeedChannel		WrapFeedChannel;
+
+struct _WrapFeedChannel {
+	FeedChannel parent;
+};
+
+typedef struct {
+	FeedChannelClass parent;
+} WrapFeedChannelClass;
+
+GType            wrap_feed_channel_get_type                   (void) G_GNUC_CONST;
+
+WrapFeedChannel* wrap_feed_channel_new                        (TrackerMinerRSS *miner, gchar *subject);
+
+TrackerMinerRSS* wrap_feed_channel_get_referring_miner        (WrapFeedChannel *feed);
+const gchar*     wrap_feed_channel_get_subject                (WrapFeedChannel *feed);
+void             wrap_feed_channel_set_feeds_expiry           (WrapFeedChannel *feed, int minutes);
+void             wrap_feed_channel_set_download_enclosures    (WrapFeedChannel *feed, gboolean download);
+gboolean         wrap_feed_channel_get_download_enclosures    (WrapFeedChannel *feed);
+void             wrap_feed_channel_set_enclosures_maxsize     (WrapFeedChannel *feed, int kb);
+int              wrap_feed_channel_get_enclosures_maxsize     (WrapFeedChannel *feed);
+void             wrap_feed_channel_set_enclosures_saving_path (WrapFeedChannel *feed, gchar *path);
+const gchar*     wrap_feed_channel_get_enclosures_saving_path (WrapFeedChannel *feed);
+
+#endif /* __WRAP_FEED_CHANNEL_H__ */
diff --git a/src/miners/rss/wrap-feed-enclosure.c b/src/miners/rss/wrap-feed-enclosure.c
new file mode 100644
index 0000000..62112bf
--- /dev/null
+++ b/src/miners/rss/wrap-feed-enclosure.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2010, Roberto Guido <madbob users barberaware org>
+ *                     Michele Tameni <michele amdplanet it>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <unistd.h>
+#include <errno.h>
+
+#include <dbus/dbus-glib.h>
+
+#include "wrap-feed-enclosure.h"
+
+#define GET_PRIV(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), WRAP_FEED_ENCLOSURE_TYPE, WrapFeedEnclosurePrivate))
+
+typedef struct _WrapFeedEnclosurePrivate	WrapFeedEnclosurePrivate;
+
+struct _WrapFeedEnclosurePrivate {
+	FeedEnclosure *enclosure;
+	WrapFeedChannel *channel;
+
+	gchar *save_path;
+
+	gchar *data;
+	gsize data_len;
+};
+
+G_DEFINE_TYPE (WrapFeedEnclosure, wrap_feed_enclosure, G_TYPE_OBJECT);
+
+static void
+wrap_feed_enclosure_finalize (GObject *obj)
+{
+	WrapFeedEnclosure *enc;
+	WrapFeedEnclosurePrivate *priv;
+
+	enc = WRAP_FEED_ENCLOSURE (obj);
+	priv = GET_PRIV (enc);
+
+	g_object_unref (priv->enclosure);
+	g_object_unref (priv->channel);
+
+	if (priv->save_path != NULL)
+		g_free (priv->save_path);
+	if (priv->data != NULL)
+		g_free (priv->data);
+}
+
+static void
+wrap_feed_enclosure_class_init (WrapFeedEnclosureClass *klass)
+{
+	GObjectClass *gobject_class;
+
+	gobject_class = G_OBJECT_CLASS (klass);
+	gobject_class->finalize = wrap_feed_enclosure_finalize;
+
+	g_type_class_add_private (klass, sizeof (WrapFeedEnclosurePrivate));
+}
+
+static void
+wrap_feed_enclosure_init (WrapFeedEnclosure *node)
+{
+}
+
+WrapFeedEnclosure*
+wrap_feed_enclosure_new (FeedEnclosure   *enclosure,
+                         WrapFeedChannel *channel)
+{
+	WrapFeedEnclosure *ret;
+	WrapFeedEnclosurePrivate *priv;
+
+	ret = g_object_new (WRAP_FEED_ENCLOSURE_TYPE, NULL);
+
+	priv = GET_PRIV (ret);
+	priv->enclosure = enclosure;
+	g_object_ref (priv->enclosure);
+	priv->channel = channel;
+	g_object_ref (priv->channel);
+
+	return ret;
+}
+
+static const gchar*
+saving_path (WrapFeedEnclosure *enclosure)
+{
+	int modifier;
+	gchar *name;
+	gchar *new_name;
+	gchar *path;
+	const gchar *folder;
+	WrapFeedEnclosurePrivate *priv;
+
+	priv = GET_PRIV (enclosure);
+
+	if (priv->save_path == NULL || strlen (priv->save_path) == 0) {
+		if (priv->save_path != NULL)
+			g_free (priv->save_path);
+		priv->save_path = NULL;
+
+		folder = wrap_feed_channel_get_enclosures_saving_path (priv->channel);
+
+		if (folder == NULL) {
+			g_warning ("No saving folder set for enclosures.");
+		}
+		else {
+			name = g_path_get_basename (feed_enclosure_get_url (priv->enclosure));
+			path = g_build_filename (folder, name, NULL);
+
+			/* This is to avoid overlapping existing files with the same name */
+
+			modifier = 0;
+
+			while (access (path, F_OK) == 0) {
+				modifier++;
+				new_name = g_strdup_printf ("%d_%s", modifier, name);
+
+				g_free (path);
+				g_free (name);
+
+				path = g_build_filename (folder, new_name, NULL);
+				name = new_name;
+			}
+
+			g_free (name);
+			priv->save_path = path;
+		}
+	}
+
+	return (const gchar*) priv->save_path;
+}
+
+static gchar*
+get_local_node_query (WrapFeedEnclosure *enclosure)
+{
+	gchar *query;
+	const gchar *path;
+	WrapFeedEnclosurePrivate *priv;
+
+	path = saving_path (enclosure);
+	if (path == NULL)
+		return NULL;
+
+	priv = GET_PRIV (enclosure);
+
+	query = g_strdup_printf ("INSERT {_:enclosure a nfo:FileDataObject; nie:url \"%s\" . ?i mfo:localLink _:enclosure} "
+				 "WHERE {?r nie:url \"%s\" . ?i mfo:remoteLink ?r}",
+				 path, feed_enclosure_get_url (priv->enclosure));
+
+	return query;
+}
+
+static gboolean
+notify_miner_fs (TrackerMiner *miner, const gchar *path)
+{
+	gchar **params;
+
+	params = g_new0 (const gchar*, 2);
+	params [0] = path;
+
+	tracker_miner_ignore_next_update (miner, params);
+
+	g_free (params);
+	return TRUE;
+}
+
+static void
+verify_enclosure_unmandatory (GObject      *source,
+                              GAsyncResult *result,
+                              gpointer      user_data)
+{
+	GError *error;
+	WrapFeedEnclosure *enclosure;
+
+	enclosure = user_data;
+
+	error = NULL;
+	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
+	if (error != NULL) {
+		g_critical ("Could not remove flag about mandatory enclosure, %s", error->message);
+		g_error_free (error);
+	}
+
+	g_object_unref (enclosure);
+}
+
+static void
+unmandatory_enclosure (WrapFeedEnclosure *enclosure)
+{
+	gchar *query;
+	WrapFeedEnclosurePrivate *priv;
+
+	priv = GET_PRIV (enclosure);
+
+	query = g_strdup_printf ("DELETE {?e mfo:optional ?o} "
+				 "WHERE {?r nie:url \"%s\" . ?e mfo:remoteLink ?r . ?e mfo:optional ?o}",
+				 feed_enclosure_get_url (priv->enclosure));
+
+	tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
+				      query, NULL, verify_enclosure_unmandatory, enclosure);
+
+	g_free (query);
+}
+
+static void
+enclosure_node_set (GObject      *source,
+                    GAsyncResult *result,
+                    gpointer      user_data)
+{
+	const gchar *path;
+	FILE *fd;
+	GError *error;
+	WrapFeedEnclosurePrivate *priv;
+	WrapFeedEnclosure *enclosure;
+
+	error = NULL;
+	enclosure = user_data;
+
+	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+	if (error != NULL) {
+		g_critical ("Could not save enclosure informations, %s", error->message);
+		g_error_free (error);
+		g_object_unref (enclosure);
+	}
+	else {
+		priv = GET_PRIV (enclosure);
+
+		path = saving_path (enclosure);
+		if (path == NULL)
+			return;
+
+		if (notify_miner_fs (TRACKER_MINER (source), path) == FALSE)
+			return;
+
+		fd = fopen (path, "w+");
+		if (fd == NULL) {
+			g_warning ("Unable to open saving location (%s) for enclosure.", path);
+		}
+		else {
+			if (fwrite (priv->data, priv->data_len, 1, fd) != 1)
+				g_warning ("Error while writing enclosure contents on the filesystem: %s.", strerror (errno));
+			fclose (fd);
+		}
+
+		unmandatory_enclosure (enclosure);
+	}
+}
+
+void
+wrap_feed_enclosure_save_data (WrapFeedEnclosure *enclosure,
+                               gchar             *data,
+                               gsize              len)
+{
+	gchar *query;
+	WrapFeedEnclosurePrivate *priv;
+
+	priv = GET_PRIV (enclosure);
+	priv->data = data;
+	priv->data_len = len;
+
+	g_object_ref (enclosure);
+
+	query = get_local_node_query (enclosure);
+	if (query == NULL)
+		return;
+
+	tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
+				      query, NULL, enclosure_node_set, enclosure);
+	g_free (query);
+}
diff --git a/src/miners/rss/wrap-feed-enclosure.h b/src/miners/rss/wrap-feed-enclosure.h
new file mode 100644
index 0000000..c37cf72
--- /dev/null
+++ b/src/miners/rss/wrap-feed-enclosure.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2010, Roberto Guido <madbob users barberaware org>
+ *                     Michele Tameni <michele amdplanet it>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __WRAP_FEED_ENCLOSURE_H__
+#define __WRAP_FEED_ENCLOSURE_H__
+
+#include <libgrss.h>
+#include "wrap-feed-channel.h"
+
+#define WRAP_FEED_ENCLOSURE_TYPE		(wrap_feed_enclosure_get_type())
+#define WRAP_FEED_ENCLOSURE(o)			(G_TYPE_CHECK_INSTANCE_CAST ((o), WRAP_FEED_ENCLOSURE_TYPE, WrapFeedEnclosure))
+#define WRAP_FEED_ENCLOSURE_CLASS(c)		(G_TYPE_CHECK_CLASS_CAST ((c), WRAP_FEED_ENCLOSURE_TYPE, WrapFeedEnclosureClass))
+#define IS_WRAP_FEED_ENCLOSURE(o)		(G_TYPE_CHECK_INSTANCE_TYPE ((o), WRAP_FEED_ENCLOSURE_TYPE))
+#define IS_WRAP_FEED_ENCLOSURE_CLASS(c)		(G_TYPE_CHECK_CLASS_TYPE ((c),  WRAP_FEED_ENCLOSURE_TYPE))
+#define WRAP_FEED_ENCLOSURE_GET_CLASS(o)	(G_TYPE_INSTANCE_GET_CLASS ((o), WRAP_FEED_ENCLOSURE_TYPE, WrapFeedEnclosureClass))
+
+typedef struct _WrapFeedEnclosure		WrapFeedEnclosure;
+
+struct _WrapFeedEnclosure {
+	GObject parent;
+};
+
+typedef struct {
+	GObjectClass parent;
+} WrapFeedEnclosureClass;
+
+GType              wrap_feed_enclosure_get_type           (void) G_GNUC_CONST;
+
+WrapFeedEnclosure* wrap_feed_enclosure_new                (FeedEnclosure *enclosure, WrapFeedChannel *channel);
+void               wrap_feed_enclosure_save_data          (WrapFeedEnclosure *enclosure, gchar *data, gsize len);
+
+#endif /* __WRAP_FEED_ENCLOSURE_H__ */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]