[libgrss/wip/ignatenko/autodiscover] implement basic version of autodiscover



commit 12638e47cb8ff368fca2c3d4479653ee6b93901d
Author: Igor Gnatenko <ignatenko src gnome org>
Date:   Thu Jul 30 02:18:56 2015 +0300

    implement basic version of autodiscover
    
    Reference: http://www.rssboard.org/rss-autodiscovery
    Related: https://github.com/ignatenkobrain/gnome-news/issues/19
    Signed-off-by: Igor Gnatenko <ignatenko src gnome org>

 examples/autodiscovery.c |   14 ++
 src/Makefile.am          |    8 +-
 src/grss-autodiscovery.c |  330 ++++++++++++++++++++++++++++++++++++++++++++++
 src/grss-autodiscovery.h |   43 ++++++
 src/libgrss.h            |    1 +
 5 files changed, 394 insertions(+), 2 deletions(-)
---
diff --git a/examples/autodiscovery.c b/examples/autodiscovery.c
new file mode 100644
index 0000000..dca048c
--- /dev/null
+++ b/examples/autodiscovery.c
@@ -0,0 +1,14 @@
+#include <glib.h>
+
+#include <libgrss.h>
+
+gint
+main (gint   argc,
+      gchar *argv[])
+{
+  g_autoptr(GrssAutodiscovery) discovery = grss_autodiscovery_new ("https://planet.gnome.org/";);
+  grss_autodiscovery_fetch (discovery, NULL);
+  g_autoptr(GList) lst = grss_autodiscovery_discover (discovery);
+
+  return 0;
+}
diff --git a/src/Makefile.am b/src/Makefile.am
index eaf53c8..8640063 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -43,7 +43,9 @@ sources_public_h = \
        feeds-pool.h                    \
        feeds-publisher.h               \
        feeds-store.h                   \
-       feeds-subscriber.h
+       feeds-subscriber.h              \
+       grss-autodiscovery.h            \
+       $(NULL)
 
 sources_public_c = \
        person.c                        \
@@ -58,7 +60,9 @@ sources_public_c = \
        feeds-pool.c                    \
        feeds-publisher.c               \
        feeds-store.c                   \
-       feeds-subscriber.c
+       feeds-subscriber.c              \
+       grss-autodiscovery.c            \
+       $(NULL)
 
 sources_private_c = \
        $(marshal_source)               \
diff --git a/src/grss-autodiscovery.c b/src/grss-autodiscovery.c
new file mode 100644
index 0000000..5ebea42
--- /dev/null
+++ b/src/grss-autodiscovery.c
@@ -0,0 +1,330 @@
+/* grss-autodiscovery.c
+ *
+ * Copyright (C) 2015 Igor Gnatenko <ignatenko src gnome org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "grss-autodiscovery.h"
+
+#include <libxml/HTMLparser.h>
+#include <libsoup/soup.h>
+
+#define XML_TO_CHAR(s)  ((char *) (s))
+#define CHAR_TO_XML(s)  ((unsigned char *) (s))
+
+/**
+ * SECTION: autodiscovery
+ * @short_description: autodiscovery
+ *
+ * #GrssAutodiscovery rappresents RSS autodiscovery feature.
+ */
+
+typedef struct
+{
+  gchar *url;
+  htmlDocPtr doc;
+  GInputStream *stream;
+} GrssAutodiscoveryPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE (GrssAutodiscovery, grss_autodiscovery, G_TYPE_OBJECT)
+
+enum {
+  PROP_0,
+  PROP_URL,
+  LAST_PROP
+};
+
+static GParamSpec *gParamSpecs [LAST_PROP];
+
+static int
+grss_autodiscovery_io_read_cb (void *context,
+                               char *buffer,
+                               int   len)
+{
+  GInputStream *stream = (GInputStream *)context;
+  g_return_val_if_fail (G_IS_INPUT_STREAM (stream), -1);
+  return g_input_stream_read (stream, buffer, len, NULL, NULL);
+}
+
+static int
+grss_autodiscovery_io_close_cb (void *context)
+{
+  GInputStream *stream = (GInputStream *)context;
+  g_return_val_if_fail (G_IS_INPUT_STREAM (stream), -1);
+  return g_input_stream_close (stream, NULL, NULL) ? 0 : -1;
+}
+
+gboolean
+grss_autodiscovery_load_from_stream (GrssAutodiscovery *discovery)
+{
+  g_return_val_if_fail (GRSS_IS_AUTODISCOVERY (discovery), FALSE);
+
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (discovery);
+
+  priv->doc = htmlReadIO (grss_autodiscovery_io_read_cb,
+                          grss_autodiscovery_io_close_cb,
+                          priv->stream,
+                          priv->url,
+                          "utf8", /* FIXME */
+                          0);
+
+  /* TODO: also have a error setting if fails */
+  if (!priv->doc)
+    return FALSE;
+
+  return TRUE;
+}
+
+/**
+ * grss_autodiscovery_new:
+ *
+ * Allocates a new #GrssAutodiscovery.
+ *
+ * Returns: a #GrssAutodiscovery.
+ */
+GrssAutodiscovery *
+grss_autodiscovery_new (const gchar *url)
+{
+  return g_object_new (GRSS_TYPE_AUTODISCOVERY, "url", url, NULL);
+}
+
+static void
+grss_autodiscovery_finalize (GObject *object)
+{
+  GrssAutodiscovery *self = (GrssAutodiscovery *)object;
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (self);
+
+  g_object_unref (priv->stream);
+  xmlFreeDoc (priv->doc);
+
+  G_OBJECT_CLASS (grss_autodiscovery_parent_class)->finalize (object);
+}
+
+static void
+grss_autodiscovery_get_property (GObject    *object,
+                                 guint       prop_id,
+                                 GValue     *value,
+                                 GParamSpec *pspec)
+{
+  GrssAutodiscovery *self = GRSS_AUTODISCOVERY (object);
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (self);
+
+  switch (prop_id)
+    {
+    case PROP_URL:
+      g_value_set_string (value, priv->url);
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+grss_autodiscovery_set_property (GObject      *object,
+                                 guint         prop_id,
+                                 const GValue *value,
+                                 GParamSpec   *pspec)
+{
+  GrssAutodiscovery *self = GRSS_AUTODISCOVERY (object);
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (self);
+
+  switch (prop_id)
+    {
+    case PROP_URL:
+      priv->url = g_strdup (g_value_get_string (value));
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+grss_autodiscovery_class_init (GrssAutodiscoveryClass *klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+  object_class->finalize = grss_autodiscovery_finalize;
+  object_class->get_property = grss_autodiscovery_get_property;
+  object_class->set_property = grss_autodiscovery_set_property;
+
+  gParamSpecs [PROP_URL] =
+    g_param_spec_string ("url",
+                         "Url",
+                         "Url of site for RSS discovery",
+                         NULL,
+                         (G_PARAM_CONSTRUCT_ONLY |
+                          G_PARAM_READWRITE |
+                          G_PARAM_STATIC_STRINGS));
+  g_object_class_install_property (object_class, PROP_URL,
+                                   gParamSpecs [PROP_URL]);
+}
+
+static void
+grss_autodiscovery_init (GrssAutodiscovery *self)
+{
+}
+
+/**
+ * grss_autodiscovery_fetch:
+ * @discovery: a #GrssAutodiscovery.
+ * @error: a #GError.
+ *
+ * Returns: %TRUE if fetch and parsing was ok, %FALSE otherwise.
+ */
+gboolean
+grss_autodiscovery_fetch (GrssAutodiscovery  *discovery,
+                          GError            **error)
+{
+  g_return_val_if_fail (GRSS_IS_AUTODISCOVERY (discovery), FALSE);
+
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (discovery);
+  SoupSession *session = soup_session_new ();
+  SoupMessage *message = soup_message_new ("GET", priv->url);
+  priv->stream = soup_session_send (session, message, NULL, error);
+  if (error)
+    return FALSE;
+
+  grss_autodiscovery_load_from_stream (discovery);
+
+  return TRUE;
+}
+
+static gboolean
+grss_autodiscovery_validate_link_node (xmlNode  *link_node,
+                                       gchar   **type,
+                                       gchar   **href)
+{
+  xmlAttr *props = link_node->properties;
+
+  gchar *allowed_mime_types[] = {
+    "application/atom+xml",
+    "application/rss+xml",
+    NULL
+  };
+
+  gboolean href_ok = FALSE;
+  gboolean rel_ok = FALSE;
+  gboolean type_ok = FALSE;
+
+  for (; props; props = props->next) {
+    /*
+     * The href attribute MUST be the feed's URL. This can be a relative URL in
+     * pages that include a base element in the header.
+     *
+     * <head>
+     *   <title>RSS Advisory Board</title>
+     *   <base href="http://www.rssboard.org/";>
+     *   <link rel="alternate" type="application/rss+xml" href="rss-feed">
+     * </head>
+     *
+     * Because some software might not check for a base URL in relation to
+     * autodiscovery links, publishers SHOULD identify feeds with full URLs.
+     * When an autodiscovery link is relative and no base URL has been
+     * provided, clients should treat the web page's URL as the base.
+     */
+    if (g_strcmp0 (XML_TO_CHAR (props->name), "href") == 0)
+      /* TODO: add some more checks, for example base element */
+      if (props->children->type == XML_TEXT_NODE) {
+        *href = XML_TO_CHAR (props->children->content);
+        href_ok = TRUE;
+      }
+
+    /*
+     * The rel attribute MUST have a value of "alternate", a keyword that
+     * indicates the link is an alternate version of the site's main content.
+     *
+     * Although for purposes other than autodiscovery this attribute may
+     * contain multiple keywords separated by spaces, in an autodiscovery link,
+     * the value MUST NOT contain keywords other than "alternate".
+     *
+     * Additionally, though rel keywords are case-insensitive elsewhere,
+     * "alternate" MUST be lowercase.
+     */
+    if (g_strcmp0 (XML_TO_CHAR (props->name), "rel") == 0)
+      if (props->children->type == XML_TEXT_NODE &&
+          g_strcmp0 (XML_TO_CHAR (props->children->content), "alternate") == 0)
+        rel_ok = TRUE;
+
+    /*
+     * The type attribute MUST contain the feed's MIME type, which is
+     * "application/rss+xml" for RSS 1.0 or RSS 2.0 feeds.
+     *
+     * Although type values are case-insensitive for other HTML and XHTML
+     * links, the value must be lowercase for autodiscovery.
+     */
+    if (g_strcmp0 (XML_TO_CHAR (props->name), "type") == 0)
+      if (props->children->type == XML_TEXT_NODE)
+        for (guint i = 0; allowed_mime_types[i] != NULL; i++)
+          if (g_strcmp0 (XML_TO_CHAR (props->children->content),
+                         allowed_mime_types [i]) == 0) {
+            type_ok = TRUE;
+            break;
+          }
+  }
+
+  return (href_ok && rel_ok && type_ok);
+}
+
+xmlNode *
+grss_autodiscovery_get_head_node (xmlNode *html_node)
+{
+  xmlNode *cur_node = NULL;
+
+  for (cur_node = html_node->children; cur_node; cur_node = cur_node->next)
+    if (cur_node->type == XML_ELEMENT_NODE &&
+        g_strcmp0 (XML_TO_CHAR (cur_node->name), "head") == 0)
+      return cur_node;
+
+  return NULL;
+}
+
+/**
+ * grss_autodiscovery_discover:
+ * @discovery: a #GrssAutodiscovery.
+ *
+ * We will trturn all possible auto-discovered links, but you'd probably want
+ * to use first of list.
+ *
+ * Returns: a #GList.
+ */
+GList *
+grss_autodiscovery_discover (GrssAutodiscovery *discovery)
+{
+  g_return_val_if_fail (GRSS_IS_AUTODISCOVERY (discovery), NULL);
+
+  GrssAutodiscoveryPrivate *priv = grss_autodiscovery_get_instance_private (discovery);
+  g_return_val_if_fail (priv->doc, NULL);
+
+  GList *ret = NULL;
+  xmlNode *tmp = NULL;
+  xmlNode *root_element = xmlDocGetRootElement (priv->doc);
+  gchar *href = NULL;
+
+  tmp = grss_autodiscovery_get_head_node (root_element);
+  if (!tmp)
+    return NULL;
+
+  for (tmp = tmp->children; tmp; tmp = tmp->next)
+    if (tmp->type == XML_ELEMENT_NODE)
+      if (g_strcmp0 (XML_TO_CHAR (tmp->name), "link") == 0)
+        if (grss_autodiscovery_validate_link_node (tmp, NULL, &href)) {
+          ret = g_list_append (ret, href);
+          g_debug ("RSS discovered link: %s\n", href);
+        }
+
+  return ret;
+}
diff --git a/src/grss-autodiscovery.h b/src/grss-autodiscovery.h
new file mode 100644
index 0000000..19a99b0
--- /dev/null
+++ b/src/grss-autodiscovery.h
@@ -0,0 +1,43 @@
+/* grss-autodiscovery.h
+ *
+ * Copyright (C) 2015 Igor Gnatenko <ignatenko src gnome org>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GRSS_AUTODISCOVERY_H
+#define GRSS_AUTODISCOVERY_H
+
+#include <glib-object.h>
+
+G_BEGIN_DECLS
+
+#define GRSS_TYPE_AUTODISCOVERY (grss_autodiscovery_get_type())
+
+G_DECLARE_DERIVABLE_TYPE (GrssAutodiscovery, grss_autodiscovery, GRSS, AUTODISCOVERY, GObject)
+
+struct _GrssAutodiscoveryClass
+{
+  GObjectClass parent;
+};
+
+GrssAutodiscovery *grss_autodiscovery_new (const gchar *url);
+
+gboolean grss_autodiscovery_fetch (GrssAutodiscovery  *discovery,
+                                   GError            **error);
+GList *grss_autodiscovery_discover (GrssAutodiscovery *discovery);
+
+G_END_DECLS
+
+#endif /* GRSS_AUTODISCOVERY_H */
diff --git a/src/libgrss.h b/src/libgrss.h
index 053c5d7..758cb59 100644
--- a/src/libgrss.h
+++ b/src/libgrss.h
@@ -45,5 +45,6 @@
 #include "feeds-subscriber.h"
 #include "feeds-publisher.h"
 #include "feeds-group.h"
+#include "grss-autodiscovery.h"
 
 #endif /* __LIBGRSS_H__ */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]