[tracker/rss-enclosures] tracker-miner-rss: management of mandatory enclosures (mfo:optional = false)
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] tracker-miner-rss: management of mandatory enclosures (mfo:optional = false)
- Date: Wed, 26 May 2010 22:06:04 +0000 (UTC)
commit e48d9f3ba9477ede66a6cc115cc5beb45c80ddbe
Author: Roberto Guido <bob4mail gmail com>
Date: Thu May 27 00:08:12 2010 +0200
tracker-miner-rss: management of mandatory enclosures (mfo:optional = false)
src/miners/rss/tracker-miner-rss.c | 127 ++++++++++++++++++++++++++++-----
src/miners/rss/wrap-feed-channel.c | 9 +++
src/miners/rss/wrap-feed-enclosure.c | 107 ++++++++++++++++++++++-------
3 files changed, 199 insertions(+), 44 deletions(-)
---
diff --git a/src/miners/rss/tracker-miner-rss.c b/src/miners/rss/tracker-miner-rss.c
index a309aad..f5daf57 100644
--- a/src/miners/rss/tracker-miner-rss.c
+++ b/src/miners/rss/tracker-miner-rss.c
@@ -242,8 +242,7 @@ enclosure_downloaded_cb (SoupSession *session,
g_object_get (msg, "status-code", &status, NULL);
if (status < 200 || status > 299) {
- g_warning ("Unable to download enclosure in %s.",
- feed_enclosure_get_url (FEED_ENCLOSURE (enclosure)));
+ g_warning ("Unable to download enclosure.");
}
else {
wrap_feed_enclosure_save_data (enclosure,
@@ -255,15 +254,33 @@ enclosure_downloaded_cb (SoupSession *session,
}
static void
+download_enclosure_now (TrackerMinerRSS *miner, FeedEnclosure *enclosure, WrapFeedChannel *channel)
+{
+ const gchar *url;
+ SoupMessage *msg;
+ TrackerMinerRSSPrivate *priv;
+ WrapFeedEnclosure *wrap_enc;
+
+ priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+
+ wrap_enc = wrap_feed_enclosure_new (enclosure, channel);
+ url = feed_enclosure_get_url (enclosure);
+ msg = soup_message_new ("GET", url);
+
+ g_message ("Downloading enclosures in %s", url);
+
+ priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
+ soup_session_queue_message (feeds_pool_get_session (priv->pool), msg,
+ enclosure_downloaded_cb, wrap_enc);
+}
+
+static void
manage_enclosure (TrackerMinerRSS *miner,
WrapFeedChannel *feed,
FeedEnclosure *enclosure)
{
int size;
const gchar *path;
- SoupMessage *msg;
- TrackerMinerRSSPrivate *priv;
- WrapFeedEnclosure *enc_wrap;
if (wrap_feed_channel_get_download_enclosures (feed) == FALSE) {
return;
@@ -280,14 +297,7 @@ manage_enclosure (TrackerMinerRSS *miner,
return;
}
- enc_wrap = wrap_feed_enclosure_new (enclosure, feed);
- msg = soup_message_new ("GET", feed_enclosure_get_url (enclosure));
-
- g_message ("Downloading enclosures in %s", feed_enclosure_get_url (enclosure));
-
- priv = TRACKER_MINER_RSS_GET_PRIVATE (miner);
- soup_session_queue_message (feeds_pool_get_session (priv->pool), msg,
- enclosure_downloaded_cb, enc_wrap);
+ download_enclosure_now (miner, enclosure, feed);
}
static int
@@ -314,6 +324,7 @@ queue_enclosures (TrackerMinerRSS *miner,
subject = g_strdup_printf ("_:enclosure%d", num);
tracker_sparql_builder_insert_open (sparql, rsubject);
+
tracker_sparql_builder_subject (sparql, rsubject);
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "nfo:RemoteDataObject");
@@ -321,7 +332,7 @@ queue_enclosures (TrackerMinerRSS *miner,
tracker_sparql_builder_object (sparql, "nie:InformationElement");
tracker_sparql_builder_predicate (sparql, "nie:url");
- tracker_sparql_builder_object_string (sparql, tmp_string);
+ tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
tracker_sparql_builder_object_int64 (sparql, (gint64) feed_enclosure_get_length (enc));
@@ -329,7 +340,7 @@ queue_enclosures (TrackerMinerRSS *miner,
tmp_string = feed_enclosure_get_format (enc);
if (tmp_string != NULL) {
tracker_sparql_builder_predicate (sparql, "nie:mimeType");
- tracker_sparql_builder_object_string (sparql, tmp_string);
+ tracker_sparql_builder_object_unvalidated (sparql, tmp_string);
}
tracker_sparql_builder_subject (sparql, subject);
@@ -383,14 +394,19 @@ verify_item_insertion (GObject *source,
gpointer user_data)
{
GError *error;
+ FeedItem *item;
- error = NULL;
+ item = user_data;
+ error = NULL;
tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
if (error != NULL) {
g_critical ("Could not insert feed information, %s", error->message);
g_error_free (error);
}
+
+ g_object_unref (item);
}
static void
@@ -417,6 +433,8 @@ item_verify_reply_cb (GObject *source_object,
gboolean has_geopoint;
miner = TRACKER_MINER_RSS (source_object);
+ item = user_data;
+
error = NULL;
response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
res,
@@ -424,16 +442,17 @@ item_verify_reply_cb (GObject *source_object,
if (error != NULL) {
g_message ("Could not verify feed existance, %s", error->message);
+ g_object_unref (item);
g_error_free (error);
return;
}
values = g_ptr_array_index (response, 0);
if (g_strcmp0 (values[0], "1") == 0) {
+ g_object_unref (item);
return;
}
- item = user_data;
feed = feed_item_get_parent (item);
url = get_message_url (item);
@@ -449,6 +468,7 @@ item_verify_reply_cb (GObject *source_object,
g_message (" Geopoint, using longitude:%f, latitude:%f",
longitude, latitude);
+ tracker_sparql_builder_insert_open (sparql, "_:location");
tracker_sparql_builder_subject (sparql, "_:location");
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "mlo:GeoLocation");
@@ -464,6 +484,7 @@ item_verify_reply_cb (GObject *source_object,
tracker_sparql_builder_object_blank_close (sparql);
}
+ tracker_sparql_builder_insert_open (sparql, "_:message");
tracker_sparql_builder_subject (sparql, "_:message");
tracker_sparql_builder_predicate (sparql, "a");
tracker_sparql_builder_object (sparql, "mfo:FeedMessage");
@@ -529,7 +550,7 @@ item_verify_reply_cb (GObject *source_object,
tracker_sparql_builder_get_result (sparql),
NULL,
verify_item_insertion,
- NULL);
+ item);
g_object_unref (sparql);
}
@@ -546,7 +567,17 @@ check_if_save (TrackerMinerRSS *miner,
url = get_message_url (item);
wfeed = WRAP_FEED_CHANNEL (feed);
+ /*
+ TODO Sort of "cache" of already downloaded items can be
+ saved into the WrapFeedChannel, to avoid ask Tracker
+ every time. Pay attention to the fact some feed
+ (particulary for podcasts) use always the same url
+ for their items, so a check also on enclosures urls
+ is required
+ */
+
g_debug ("Verifying feed '%s' is stored", url);
+ g_object_ref (item);
query = g_strdup_printf ("ASK { ?message a mfo:FeedMessage; "
"nie:url \"%s\"; nmo:communicationChannel <%s> }",
@@ -562,6 +593,59 @@ check_if_save (TrackerMinerRSS *miner,
}
static void
+mandatory_enclosures_collected (GObject *source_object,
+ GAsyncResult *res,
+ gpointer user_data)
+{
+ int i;
+ gchar **values;
+ const GPtrArray *response;
+ GError *error;
+ FeedEnclosure *enclosure;
+ WrapFeedChannel *feed;
+
+ error = NULL;
+ response = tracker_miner_execute_sparql_finish (TRACKER_MINER (source_object),
+ res,
+ &error);
+
+ if (error != NULL) {
+ g_message ("Could not verify mandatory enclosures, %s", error->message);
+ g_error_free (error);
+ return;
+ }
+
+ feed = user_data;
+
+ for (i = 0; i < response->len; i++) {
+ values = g_ptr_array_index (response, i);
+ enclosure = feed_enclosure_new (values [0]);
+ download_enclosure_now (TRACKER_MINER_RSS (source_object), enclosure, feed);
+ }
+}
+
+static void
+check_mandatory_enclosures (TrackerMinerRSS *miner,
+ WrapFeedChannel *feed)
+{
+ gchar *query;
+
+ query = g_strdup_printf ("SELECT ?u "
+ "WHERE { ?e a mfo:Enclosure . ?e mfo:optional false . "
+ "?i mfo:enclosureList ?e . ?i nmo:communicationChannel <%s> . "
+ "?e mfo:remoteLink ?r . ?r nie:url ?u }",
+ wrap_feed_channel_get_subject (feed));
+
+ tracker_miner_execute_sparql (TRACKER_MINER (miner),
+ query,
+ NULL,
+ mandatory_enclosures_collected,
+ feed);
+
+ g_free (query);
+}
+
+static void
feed_fetched (FeedsPool *pool,
FeedChannel *feed,
GList *items,
@@ -594,6 +678,9 @@ feed_fetched (FeedsPool *pool,
item = iter->data;
check_if_save (miner, item, feed);
}
+
+ if (wrap_feed_channel_get_download_enclosures (WRAP_FEED_CHANNEL (feed)) == FALSE)
+ check_mandatory_enclosures (miner, WRAP_FEED_CHANNEL (feed));
}
static void
@@ -626,8 +713,8 @@ feeds_retrieve_cb (GObject *source_object,
g_message ("Found %d feeds", response->len);
- for (i = 0; i < response->len; i++) {
- values = g_ptr_array_index (response, i);
+ for (i = 0; i < response->len; i++) {
+ values = g_ptr_array_index (response, i);
chan = wrap_feed_channel_new (TRACKER_MINER_RSS (source_object), values [2]);
feed_channel_set_source (FEED_CHANNEL (chan), values [0]);
diff --git a/src/miners/rss/wrap-feed-channel.c b/src/miners/rss/wrap-feed-channel.c
index 2bc0d8f..c1497b0 100644
--- a/src/miners/rss/wrap-feed-channel.c
+++ b/src/miners/rss/wrap-feed-channel.c
@@ -30,6 +30,8 @@ struct _WrapFeedChannelPrivate {
gchar *subject;
+ GList *saved_items;
+
gint items_expiry_interval;
guint expiration_handler;
@@ -87,6 +89,7 @@ review_expiration_timer (WrapFeedChannel *node)
static void
wrap_feed_channel_finalize (GObject *obj)
{
+ GList *iter;
WrapFeedChannel *chan;
WrapFeedChannelPrivate *priv;
@@ -98,6 +101,12 @@ wrap_feed_channel_finalize (GObject *obj)
if (priv->enclosures_saving_path != NULL)
g_free (priv->enclosures_saving_path);
+
+ if (priv->saved_items != NULL) {
+ for (iter = priv->saved_items; iter; iter = iter->next)
+ g_free (iter->data);
+ g_list_free (priv->saved_items);
+ }
}
static void
diff --git a/src/miners/rss/wrap-feed-enclosure.c b/src/miners/rss/wrap-feed-enclosure.c
index 09318bc..5108f9e 100644
--- a/src/miners/rss/wrap-feed-enclosure.c
+++ b/src/miners/rss/wrap-feed-enclosure.c
@@ -105,45 +105,59 @@ saving_path (WrapFeedEnclosure *enclosure)
priv = GET_PRIV (enclosure);
- if (priv->save_path == NULL) {
+ if (priv->save_path == NULL || strlen (priv->save_path) == 0) {
+ if (priv->save_path != NULL)
+ g_free (priv->save_path);
+ priv->save_path = NULL;
+
folder = wrap_feed_channel_get_enclosures_saving_path (priv->channel);
- name = g_path_get_basename (feed_enclosure_get_url (priv->enclosure));
- path = g_build_filename (folder, name, NULL);
- /* This is to avoid overlapping existing files with the same name */
+ if (folder == NULL) {
+ g_warning ("No saving folder set for enclosures.");
+ }
+ else {
+ name = g_path_get_basename (feed_enclosure_get_url (priv->enclosure));
+ path = g_build_filename (folder, name, NULL);
+
+ /* This is to avoid overlapping existing files with the same name */
- modifier = 0;
+ modifier = 0;
- while (access (path, F_OK) == 0) {
- modifier++;
- new_name = g_strdup_printf ("%d_%s", modifier, name);
+ while (access (path, F_OK) == 0) {
+ modifier++;
+ new_name = g_strdup_printf ("%d_%s", modifier, name);
- g_free (path);
- g_free (name);
+ g_free (path);
+ g_free (name);
- path = g_build_filename (folder, new_name, NULL);
- name = new_name;
- }
+ path = g_build_filename (folder, new_name, NULL);
+ name = new_name;
+ }
- g_free (name);
- priv->save_path = path;
+ g_free (name);
+ priv->save_path = path;
+ }
}
return (const gchar*) priv->save_path;
}
static gchar*
-get_node_query (WrapFeedEnclosure *enclosure)
+get_local_node_query (WrapFeedEnclosure *enclosure)
{
gchar *query;
+ const gchar *path;
WrapFeedEnclosurePrivate *priv;
+ path = saving_path (enclosure);
+ if (path == NULL)
+ return NULL;
+
priv = GET_PRIV (enclosure);
query = g_strdup_printf ("INSERT {_:enclosure a nfo:FileDataObject; nie:url \"%s\" . ?i mfo:localLink _:enclosure} "
"WHERE {?r nie:url \"%s\" . ?i mfo:remoteLink ?r}",
- saving_path (enclosure),
- feed_enclosure_get_url (priv->enclosure));
+ path, feed_enclosure_get_url (priv->enclosure));
return query;
}
@@ -179,6 +193,45 @@ notify_miner_fs (const gchar *path)
}
static void
+verify_enclosure_unmandatory (GObject *source,
+ GAsyncResult *result,
+ gpointer user_data)
+{
+ GError *error;
+ WrapFeedEnclosure *enclosure;
+
+ enclosure = user_data;
+
+ error = NULL;
+ tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
+
+ if (error != NULL) {
+ g_critical ("Could not remove flag about mandatory enclosure, %s", error->message);
+ g_error_free (error);
+ }
+
+ g_object_unref (enclosure);
+}
+
+static void
+unmandatory_enclosure (WrapFeedEnclosure *enclosure)
+{
+ gchar *query;
+ WrapFeedEnclosurePrivate *priv;
+
+ priv = GET_PRIV (enclosure);
+
+ query = g_strdup_printf ("DELETE {?e mfo:optional ?o} "
+ "WHERE {?r nie:url \"%s\" . ?e mfo:remoteLink ?r . ?e mfo:optional ?o}",
+ feed_enclosure_get_url (priv->enclosure));
+
+ tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
+ query, NULL, verify_enclosure_unmandatory, enclosure);
+
+ g_free (query);
+}
+
+static void
enclosure_node_set (GObject *source,
GAsyncResult *result,
gpointer user_data)
@@ -190,33 +243,36 @@ enclosure_node_set (GObject *source,
WrapFeedEnclosure *enclosure;
error = NULL;
+ enclosure = user_data;
tracker_miner_execute_update_finish (TRACKER_MINER (source), result, &error);
if (error != NULL) {
g_critical ("Could not save enclosure informations, %s", error->message);
g_error_free (error);
+ g_object_unref (enclosure);
}
else {
- enclosure = user_data;
-
priv = GET_PRIV (enclosure);
+
path = saving_path (enclosure);
+ if (path == NULL)
+ return;
if (notify_miner_fs (path) == FALSE)
return;
fd = fopen (path, "w+");
if (fd == NULL) {
- g_warning ("Unable to open saving location for enclosure.");
+ g_warning ("Unable to open saving location (%s) for enclosure.", path);
}
else {
if (fwrite (priv->data, priv->data_len, 1, fd) != 1)
g_warning ("Error while writing enclosure contents on the filesystem: %s.", strerror (errno));
fclose (fd);
}
- }
- g_object_unref (enclosure);
+ unmandatory_enclosure (enclosure);
+ }
}
void
@@ -233,7 +289,10 @@ wrap_feed_enclosure_save_data (WrapFeedEnclosure *enclosure,
g_object_ref (enclosure);
- query = get_node_query (enclosure);
+ query = get_local_node_query (enclosure);
+ if (query == NULL)
+ return;
+
tracker_miner_execute_update (TRACKER_MINER (wrap_feed_channel_get_referring_miner (priv->channel)),
query, NULL, enclosure_node_set, enclosure);
g_free (query);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]