[tracker/rss-enclosures: 6/9] tracker-miner-rss: management of mandatory enclosures (mfo:optional = false)



commit 4e026c653a7a9decd7bfb68dcac695a14018e96e
Author: Roberto Guido <bob4mail gmail com>
Date:   Thu May 27 00:08:12 2010 +0200

    tracker-miner-rss: management of mandatory enclosures (mfo:optional = false)
    
    Signed-off-by: Michele Tameni <michele tameni it>

 src/miners/rss/tracker-miner-rss.c   |  127 ++++++++++++++++++++++++++++-----
 src/miners/rss/wrap-feed-channel.c   |    9 +++
 src/miners/rss/wrap-feed-enclosure.c |  107 ++++++++++++++++++++++-------
 3 files changed, 199 insertions(+), 44 deletions(-)
---
diff --git a/src/miners/rss/tracker-miner-rss.c b/src/miners/rss/tracker-miner-rss.c
index a309aad..f5daf57 100644
--- a/src/miners/rss/tracker-miner-rss.c
+++ b/src/miners/rss/tracker-miner-rss.c
@@ -242,8 +242,7 @@ enclosure_downloaded_cb (SoupSession *session,
 	g_object_get (msg, "status-code", &status, NULL);
 
 	if (status < 200 || status > 299) {
-		g_warning ("Unable to download enclosure in %s.",
-			   feed_enclosure_get_url (FEED_ENCLOSURE (enclosure)));
+		g_warning ("Unable to download enclosure.");
 	}
 	else {
 		wrap_feed_enclosure_save_data (enclosure,
@@ -255,15 +254,33 @@ enclosure_downloaded_cb (SoupSession *session,
 }
 
 static void
+download_enclosure_now (TrackerMinerRSS *miner, FeedEnclosure *enclosure, WrapFeedChannel *channel)
+{
+	const gchar *url;
+	SoupMessage *msg;
+	TrackerMinerRSSPrivate *priv;
+	WrapFeedEnclosure *wrap_enc;
+
+	priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+
+	wrap_enc = wrap_feed_enclosure_new (enclosure, channel);
+	url = feed_enclosure_get_url (enclosure);
+	msg = soup_message_new ("GET", url);
+
+	g_message ("Downloading enclosures in %s", url);
+
+	priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+	soup_session_queue_message (feeds_pool_get_session (priv->pool), msg,
+				    enclosure_downloaded_cb, wrap_enc);
+}
+
+static void
 manage_enclosure (TrackerMinerRSS *miner,
                   WrapFeedChannel *feed,
                   FeedEnclosure   *enclosure)
 {
 	int size;
 	const gchar *path;
-	SoupMessage *msg;
-	TrackerMinerRSSPrivate *priv;
-	WrapFeedEnclosure *enc_wrap;
 
 	if (wrap_feed_channel_get_download_enclosures (feed) == FALSE) {
 		return;
@@ -280,14 +297,7 @@ manage_enclosure (TrackerMinerRSS *miner,
 		return;
 	}
 
-	enc_wrap = wrap_feed_enclosure_new (enclosure, feed);
-	msg = soup_message_new ("GET", feed_enclosure_get_url (enclosure));
-
-	g_message ("Downloading enclosures in %s", feed_enclosure_get_url (enclosure));
-
-	priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
-	soup_session_queue_message (feeds_pool_get_session (priv->pool), msg,
-				    enclosure_downloaded_cb, enc_wrap);
+	download_enclosure_now (miner, enclosure, feed);
 }
 
 static int
@@ -314,6 +324,7 @@ queue_enclosures (TrackerMinerRSS      *miner,
 		subject = g_strdup_printf ("_:enclosure%d", num);
 
 		tracker_sparql_builder_insert_open (sparql, rsubject);
+
 		tracker_sparql_builder_subject (sparql, rsubject);
 		tracker_sparql_builder_predicate (sparql, "a");
 		tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
@@ -321,7 +332,7 @@ queue_enclosures (TrackerMinerRSS      *miner,
 		tracker_sparql_builder_object (sparql, "nie:InformationElement");
 
 		tracker_sparql_builder_predicate (sparql, "nie:url");
-		tracker_sparql_builder_object_string (sparql, tmp_string);
+		tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
 
 		tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
 		tracker_sparql_builder_object_int64 (sparql, (gint64) feed_enclosure_get_length (enc));
@@ -329,7 +340,7 @@ queue_enclosures (TrackerMinerRSS      *miner,
 		tmp_string = feed_enclosure_get_format (enc);
 		if (tmp_string != NULL) {
 			tracker_sparql_builder_predicate (sparql, "nie:mimeType");
-			tracker_sparql_builder_object_string (sparql, tmp_string);
+			tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
 		}
 
 		tracker_sparql_builder_subject (sparql, subject);
@@ -383,14 +394,19 @@ verify_item_insertion (GObject      *source,
                        gpointer      user_data)
 {
 	GError *error;
+	FeedItem *item;
 
-	error = NULL;
+	item = user_data;
 
+	error = NULL;
 	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
 	if (error != NULL) {
 		g_critical ("Could not insert feed information, %s", error->message);
 		g_error_free (error);
 	}
+
+	g_object_unref (item);
 }
 
 static void
@@ -417,6 +433,8 @@ item_verify_reply_cb (GObject      *source_object,
 	gboolean has_geopoint;
 
 	miner = TRACKER_MINER_RSS (source_object);
+	item = user_data;
+
 	error = NULL;
 	response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
 	                                                res,
@@ -424,16 +442,17 @@ item_verify_reply_cb (GObject      *source_object,
 
 	if (error != NULL) {
 		g_message ("Could not verify feed existance, %s", error->message);
+		g_object_unref (item);
 		g_error_free (error);
 		return;
 	}
 
 	values = g_ptr_array_index (response, 0);
 	if (g_strcmp0 (values[0], "1") == 0) {
+		g_object_unref (item);
 		return;
 	}
 
-	item = user_data;
 	feed = feed_item_get_parent (item);
 
 	url = get_message_url (item);
@@ -449,6 +468,7 @@ item_verify_reply_cb (GObject      *source_object,
 		g_message ("  Geopoint, using longitude:%f, latitude:%f",
 		           longitude, latitude);
 
+		tracker_sparql_builder_insert_open (sparql, "_:location");
 		tracker_sparql_builder_subject (sparql, "_:location");
 		tracker_sparql_builder_predicate (sparql, "a");
 		tracker_sparql_builder_object (sparql, "mlo:GeoLocation");
@@ -464,6 +484,7 @@ item_verify_reply_cb (GObject      *source_object,
 		tracker_sparql_builder_object_blank_close (sparql);
 	}
 
+	tracker_sparql_builder_insert_open (sparql, "_:message");
 	tracker_sparql_builder_subject (sparql, "_:message");
 	tracker_sparql_builder_predicate (sparql, "a");
 	tracker_sparql_builder_object (sparql, "mfo:FeedMessage");
@@ -529,7 +550,7 @@ item_verify_reply_cb (GObject      *source_object,
 	                              tracker_sparql_builder_get_result (sparql),
 	                              NULL,
 	                              verify_item_insertion,
-	                              NULL);
+	                              item);
 
 	g_object_unref (sparql);
 }
@@ -546,7 +567,17 @@ check_if_save (TrackerMinerRSS *miner,
 	url = get_message_url (item);
 	wfeed = WRAP_FEED_CHANNEL (feed);
 
+	/*
+		TODO	Sort of "cache" of already downloaded items can be
+			saved into the WrapFeedChannel, to avoid ask Tracker
+			every time. Pay attention to the fact some feed
+			(particulary for podcasts) use always the same url
+			for their items, so a check also on enclosures urls
+			is required
+	*/
+
 	g_debug ("Verifying feed '%s' is stored", url);
+	g_object_ref (item);
 
 	query = g_strdup_printf ("ASK { ?message a mfo:FeedMessage; "
 	                         "nie:url \"%s\"; nmo:communicationChannel <%s> }",
@@ -562,6 +593,59 @@ check_if_save (TrackerMinerRSS *miner,
 }
 
 static void
+mandatory_enclosures_collected (GObject      *source_object,
+                                GAsyncResult *res,
+                                gpointer      user_data)
+{
+	int i;
+	gchar **values;
+	const GPtrArray *response;
+	GError *error;
+	FeedEnclosure *enclosure;
+	WrapFeedChannel *feed;
+
+	error = NULL;
+	response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
+	                                                res,
+	                                                &error);
+
+	if (error != NULL) {
+		g_message ("Could not verify mandatory enclosures, %s", error->message);
+		g_error_free (error);
+		return;
+	}
+
+	feed = user_data;
+
+	for (i = 0; i < response->len; i++) {
+		values = g_ptr_array_index (response, i);
+		enclosure = feed_enclosure_new (values [0]);
+		download_enclosure_now (TRACKER_MINER_RSS (source_object), enclosure, feed);
+	}
+}
+
+static void
+check_mandatory_enclosures (TrackerMinerRSS *miner,
+                            WrapFeedChannel *feed)
+{
+	gchar *query;
+
+	query = g_strdup_printf ("SELECT ?u "
+	                         "WHERE { ?e a mfo:Enclosure . ?e mfo:optional false . "
+	                         "?i mfo:enclosureList ?e . ?i nmo:communicationChannel <%s> . "
+	                         "?e mfo:remoteLink ?r . ?r nie:url ?u }",
+				 wrap_feed_channel_get_subject (feed));
+
+	tracker_miner_execute_sparql (TRACKER_MINER (miner),
+				      query,
+				      NULL,
+				      mandatory_enclosures_collected,
+				      feed);
+
+	g_free (query);
+}
+
+static void
 feed_fetched (FeedsPool   *pool,
               FeedChannel *feed,
               GList       *items,
@@ -594,6 +678,9 @@ feed_fetched (FeedsPool   *pool,
 		item = iter->data;
 		check_if_save (miner, item, feed);
 	}
+
+	if (wrap_feed_channel_get_download_enclosures (WRAP_FEED_CHANNEL (feed)) == FALSE)
+		check_mandatory_enclosures (miner, WRAP_FEED_CHANNEL (feed));
 }
 
 static void
@@ -626,8 +713,8 @@ feeds_retrieve_cb (GObject      *source_object,
 
 	g_message ("Found %d feeds", response->len);
 
-	for (i = 0; i < response->len; i++) {
-		values = g_ptr_array_index (response, i);
+		for (i = 0; i < response->len; i++) {
+			values = g_ptr_array_index (response, i);
 
 		chan = wrap_feed_channel_new (TRACKER_MINER_RSS (source_object), values [2]);
 		feed_channel_set_source (FEED_CHANNEL (chan), values [0]);
diff --git a/src/miners/rss/wrap-feed-channel.c b/src/miners/rss/wrap-feed-channel.c
index 2bc0d8f..c1497b0 100644
--- a/src/miners/rss/wrap-feed-channel.c
+++ b/src/miners/rss/wrap-feed-channel.c
@@ -30,6 +30,8 @@ struct _WrapFeedChannelPrivate {
 
 	gchar *subject;
 
+	GList *saved_items;
+
 	gint items_expiry_interval;
 	guint expiration_handler;
 
@@ -87,6 +89,7 @@ review_expiration_timer (WrapFeedChannel *node)
 static void
 wrap_feed_channel_finalize (GObject *obj)
 {
+	GList *iter;
 	WrapFeedChannel *chan;
 	WrapFeedChannelPrivate *priv;
 
@@ -98,6 +101,12 @@ wrap_feed_channel_finalize (GObject *obj)
 
 	if (priv->enclosures_saving_path != NULL)
 		g_free (priv->enclosures_saving_path);
+
+	if (priv->saved_items != NULL) {
+		for (iter = priv->saved_items; iter; iter = iter->next)
+			g_free (iter->data);
+		g_list_free (priv->saved_items);
+	}
 }
 
 static void
diff --git a/src/miners/rss/wrap-feed-enclosure.c b/src/miners/rss/wrap-feed-enclosure.c
index 09318bc..5108f9e 100644
--- a/src/miners/rss/wrap-feed-enclosure.c
+++ b/src/miners/rss/wrap-feed-enclosure.c
@@ -105,45 +105,59 @@ saving_path (WrapFeedEnclosure *enclosure)
 
 	priv = GET_PRIV (enclosure);
 
-	if (priv->save_path == NULL) {
+	if (priv->save_path == NULL || strlen (priv->save_path) == 0) {
+		if (priv->save_path != NULL)
+			g_free (priv->save_path);
+		priv->save_path = NULL;
+
 		folder = wrap_feed_channel_get_enclosures_saving_path (priv->channel);
-		name = g_path_get_basename (feed_enclosure_get_url (priv->enclosure));
-		path = g_build_filename (folder, name, NULL);
 
-		/* This is to avoid overlapping existing files with the same name */
+		if (folder == NULL) {
+			g_warning ("No saving folder set for enclosures.");
+		}
+		else {
+			name = g_path_get_basename (feed_enclosure_get_url (priv->enclosure));
+			path = g_build_filename (folder, name, NULL);
+
+			/* This is to avoid overlapping existing files with the same name */
 
-		modifier = 0;
+			modifier = 0;
 
-		while (access (path, F_OK) == 0) {
-			modifier++;
-			new_name = g_strdup_printf ("%d_%s", modifier, name);
+			while (access (path, F_OK) == 0) {
+				modifier++;
+				new_name = g_strdup_printf ("%d_%s", modifier, name);
 
-			g_free (path);
-			g_free (name);
+				g_free (path);
+				g_free (name);
 
-			path = g_build_filename (folder, new_name, NULL);
-			name = new_name;
-		}
+				path = g_build_filename (folder, new_name, NULL);
+				name = new_name;
+			}
 
-		g_free (name);
-		priv->save_path = path;
+			g_free (name);
+			priv->save_path = path;
+		}
 	}
 
 	return (const gchar*) priv->save_path;
 }
 
 static gchar*
-get_node_query (WrapFeedEnclosure *enclosure)
+get_local_node_query (WrapFeedEnclosure *enclosure)
 {
 	gchar *query;
+	const gchar *path;
 	WrapFeedEnclosurePrivate *priv;
 
+	path = saving_path (enclosure);
+	if (path == NULL)
+		return NULL;
+
 	priv = GET_PRIV (enclosure);
 
 	query = g_strdup_printf ("INSERT {_:enclosure a nfo:FileDataObject; nie:url \"%s\" . ?i mfo:localLink _:enclosure} "
 				 "WHERE {?r nie:url \"%s\" . ?i mfo:remoteLink ?r}",
-				 saving_path (enclosure),
-				 feed_enclosure_get_url (priv->enclosure));
+				 path, feed_enclosure_get_url (priv->enclosure));
 
 	return query;
 }
@@ -179,6 +193,45 @@ notify_miner_fs (const gchar *path)
 }
 
 static void
+verify_enclosure_unmandatory (GObject      *source,
+                              GAsyncResult *result,
+                              gpointer      user_data)
+{
+	GError *error;
+	WrapFeedEnclosure *enclosure;
+
+	enclosure = user_data;
+
+	error = NULL;
+	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
+	if (error != NULL) {
+		g_critical ("Could not remove flag about mandatory enclosure, %s", error->message);
+		g_error_free (error);
+	}
+
+	g_object_unref (enclosure);
+}
+
+static void
+unmandatory_enclosure (WrapFeedEnclosure *enclosure)
+{
+	gchar *query;
+	WrapFeedEnclosurePrivate *priv;
+
+	priv = GET_PRIV (enclosure);
+
+	query = g_strdup_printf ("DELETE {?e mfo:optional ?o} "
+				 "WHERE {?r nie:url \"%s\" . ?e mfo:remoteLink ?r . ?e mfo:optional ?o}",
+				 feed_enclosure_get_url (priv->enclosure));
+
+	tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
+				      query, NULL, verify_enclosure_unmandatory, enclosure);
+
+	g_free (query);
+}
+
+static void
 enclosure_node_set (GObject      *source,
                     GAsyncResult *result,
                     gpointer      user_data)
@@ -190,33 +243,36 @@ enclosure_node_set (GObject      *source,
 	WrapFeedEnclosure *enclosure;
 
 	error = NULL;
+	enclosure = user_data;
 
 	tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
 	if (error != NULL) {
 		g_critical ("Could not save enclosure informations, %s", error->message);
 		g_error_free (error);
+		g_object_unref (enclosure);
 	}
 	else {
-		enclosure = user_data;
-
 		priv = GET_PRIV (enclosure);
+
 		path = saving_path (enclosure);
+		if (path == NULL)
+			return;
 
 		if (notify_miner_fs (path) == FALSE)
 			return;
 
 		fd = fopen (path, "w+");
 		if (fd == NULL) {
-			g_warning ("Unable to open saving location for enclosure.");
+			g_warning ("Unable to open saving location (%s) for enclosure.", path);
 		}
 		else {
 			if (fwrite (priv->data, priv->data_len, 1, fd) != 1)
 				g_warning ("Error while writing enclosure contents on the filesystem: %s.", strerror (errno));
 			fclose (fd);
 		}
-	}
 
-	g_object_unref (enclosure);
+		unmandatory_enclosure (enclosure);
+	}
 }
 
 void
@@ -233,7 +289,10 @@ wrap_feed_enclosure_save_data (WrapFeedEnclosure *enclosure,
 
 	g_object_ref (enclosure);
 
-	query = get_node_query (enclosure);
+	query = get_local_node_query (enclosure);
+	if (query == NULL)
+		return;
+
 	tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
 				      query, NULL, enclosure_node_set, enclosure);
 	g_free (query);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]