evolution-rss r388 - in trunk: . src
- From: lucilanga svn gnome org
- To: svn-commits-list gnome org
- Subject: evolution-rss r388 - in trunk: . src
- Date: Wed, 8 Oct 2008 16:53:19 +0000 (UTC)
Author: lucilanga
Date: Wed Oct 8 16:53:18 2008
New Revision: 388
URL: http://svn.gnome.org/viewvc/evolution-rss?rev=388&view=rev
Log:
2008-10-08 Lucian Langa <lucilanga gnome org>
* src/parser.c, src/parser.h move parsing
functions to those files
* pixmaps/rss-text-html, pixmaps/rss-text-x-generic
add new summary icons
* src/rss.c: finish_feed(): correctly
add non-validated feed
Added:
trunk/src/parser.c
trunk/src/parser.h
Modified:
trunk/ChangeLog
trunk/src/Makefile.in
trunk/src/rss.c
trunk/src/rss.h
Modified: trunk/src/Makefile.in
==============================================================================
--- trunk/src/Makefile.in (original)
+++ trunk/src/Makefile.in Wed Oct 8 16:53:18 2008
@@ -382,6 +382,8 @@
gecko-utils.h \
rss-config-factory.c \
rss-config-factory.h \
+ rss-icon-factory.c \
+ rss-icon-factory.h \
evolution-import-rss.c \
evolution-rss.schemas.in \
org-gnome-evolution-rss.eplug.xml \
Added: trunk/src/parser.c
==============================================================================
--- (empty file)
+++ trunk/src/parser.c Wed Oct 8 16:53:18 2008
@@ -0,0 +1,986 @@
+/* Evoution RSS Reader Plugin
+ * Copyright (C) 2007-2008 Lucian Langa <cooly gnome eu org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "rss.h"
+#include "parser.h"
+
+/************ RDF Parser *******************/
+
+void
+html_set_base(xmlNode *doc, char *base, char *tag, char *prop, char *basehref)
+{
+ gchar *url;
+#if LIBSOUP_VERSION < 2003000
+ SoupUri *newuri;
+#else
+ SoupURI *newuri;
+#endif
+ gchar *newuristr;
+#if LIBSOUP_VERSION < 2003000
+ SoupUri *base_uri = soup_uri_new (base);
+#else
+ SoupURI *base_uri = soup_uri_new (base);
+#endif
+ while (doc = html_find((xmlNode *)doc, tag))
+ {
+ if (url = xmlGetProp(doc, prop))
+ {
+ if (!strncmp(tag, "img", 3) && !strncmp(prop, "src", 3))
+ {
+ gchar *tmpurl = strplchr(url);
+ xmlSetProp(doc, prop, tmpurl);
+ g_free(tmpurl);
+ }
+ d(g_print("DEBUG: parsing: %s\n", url));
+ if (url[0] == '/' && url[1] != '/')
+ {
+ gchar *server = get_server_from_uri(base);
+ gchar *tmp = g_strdup_printf("%s/%s", server, url);
+ xmlSetProp(doc, prop, tmp);
+ g_free(tmp);
+ g_free(server);
+ }
+ if (url[0] == '/' && url[1] == '/')
+ {
+ /*FIXME handle ssl */
+ gchar *tmp = g_strdup_printf("%s%s", "http:", url);
+ xmlSetProp(doc, prop, tmp);
+ g_free(tmp);
+ }
+ if (url[0] != '/' && !g_str_has_prefix(url, "http://")
+ && !g_str_has_prefix(url, "https://"))
+ {
+ // in case we have a base href= set then rewrite
+ // all relative links
+ if (basehref != NULL)
+ {
+#if LIBSOUP_VERSION < 2003000
+ SoupUri *newbase_uri = soup_uri_new (basehref);
+#else
+ SoupURI *newbase_uri = soup_uri_new (basehref);
+#endif
+ newuri = soup_uri_new_with_base (newbase_uri, url);
+ soup_uri_free(newbase_uri);
+ }
+ else
+ newuri = soup_uri_new_with_base (base_uri, url);
+ //xmlSetProp(doc, prop, g_strdup_printf("%s/%s", get_server_from_uri(base), url));
+ if (newuri)
+ {
+ newuristr = soup_uri_to_string (newuri, FALSE);
+ xmlSetProp(doc, prop, (xmlChar *)newuristr);
+ g_free(newuristr);
+ soup_uri_free(newuri);
+ }
+ }
+ xmlFree(url);
+ }
+ }
+ soup_uri_free(base_uri);
+}
+
+static void
+my_xml_parser_error_handler (void *ctx, const char *msg, ...)
+{
+ ;
+}
+
+xmlDoc *
+xml_parse_sux (const char *buf, int len)
+{
+ static xmlSAXHandler *sax;
+ xmlParserCtxtPtr ctxt;
+ xmlDoc *doc;
+
+ g_return_val_if_fail (buf != NULL, NULL);
+
+ if (!sax) {
+ xmlInitParser();
+ sax = xmlMalloc (sizeof (xmlSAXHandler));
+//#if LIBXML_VERSION > 20600
+ xmlSAXVersion (sax, 2);
+//#else
+ // memcpy (sax, &xmlDefaultSAXHandler, sizeof (xmlSAXHandler));
+//#endif
+ sax->warning = my_xml_parser_error_handler;
+ sax->error = my_xml_parser_error_handler;
+ }
+
+ if (len == -1)
+ len = strlen (buf);
+ ctxt = xmlCreateMemoryParserCtxt (buf, len);
+ if (!ctxt)
+ return NULL;
+
+ xmlFree (ctxt->sax);
+ ctxt->sax = sax;
+//#if LIBXML_VERSION > 20600
+ ctxt->sax2 = 1;
+ ctxt->str_xml = xmlDictLookup (ctxt->dict, BAD_CAST "xml", 3);
+ ctxt->str_xmlns = xmlDictLookup (ctxt->dict, BAD_CAST "xmlns", 5);
+ ctxt->str_xml_ns = xmlDictLookup (ctxt->dict, XML_XML_NAMESPACE, 36);
+//#endif
+
+ ctxt->recovery = TRUE;
+ ctxt->vctxt.error = my_xml_parser_error_handler;
+ ctxt->vctxt.warning = my_xml_parser_error_handler;
+
+ xmlCtxtUseOptions(ctxt, XML_PARSE_DTDLOAD
+ | XML_PARSE_NOENT
+ | XML_PARSE_NOCDATA);
+
+
+ xmlParseDocument (ctxt);
+
+ doc = ctxt->myDoc;
+ ctxt->sax = NULL;
+ xmlFreeParserCtxt (ctxt);
+
+ return doc;
+}
+
+xmlDoc *
+parse_html_sux (const char *buf, int len)
+{
+ xmlDoc *doc;
+#if LIBXML_VERSION > 20600
+ static xmlSAXHandler *sax;
+ htmlParserCtxtPtr ctxt;
+
+ g_return_val_if_fail (buf != NULL, NULL);
+
+ if (!sax) {
+ xmlInitParser();
+ sax = xmlMalloc (sizeof (htmlSAXHandler));
+ memcpy (sax, &htmlDefaultSAXHandler, sizeof (xmlSAXHandlerV1));
+ sax->warning = my_xml_parser_error_handler;
+ sax->error = my_xml_parser_error_handler;
+ }
+
+ if (len == -1)
+ len = strlen (buf);
+ ctxt = htmlCreateMemoryParserCtxt (buf, len);
+ if (!ctxt)
+ return NULL;
+
+ xmlFree (ctxt->sax);
+ ctxt->sax = sax;
+ ctxt->vctxt.error = my_xml_parser_error_handler;
+ ctxt->vctxt.warning = my_xml_parser_error_handler;
+
+ htmlCtxtUseOptions(ctxt, HTML_PARSE_NONET
+ | HTML_PARSE_COMPACT
+ | HTML_PARSE_NOBLANKS);
+
+ htmlParseDocument (ctxt);
+ doc = ctxt->myDoc;
+
+ ctxt->sax = NULL;
+ htmlFreeParserCtxt (ctxt);
+
+#else /* LIBXML_VERSION <= 20600 */
+ char *buf_copy = g_strndup (buf, len);
+
+ doc = htmlParseDoc (buf_copy, NULL);
+ g_free (buf_copy);
+#endif
+ return doc;
+}
+
+/*modifies a html document to be absolute */
+xmlDoc *
+parse_html(char *url, const char *html, int len)
+{
+ xmlDoc *src = NULL;
+ xmlDoc *doc = NULL;
+
+ src = (xmlDoc *)parse_html_sux(html, len);
+
+ if (!src)
+ return NULL;
+ doc = src;
+ gchar *newbase = NULL;
+ newbase = xmlGetProp(html_find((xmlNode *)doc, "base"), "href");
+ d(g_print("newbase:|%s|\n", newbase));
+ xmlDoc *tmpdoc = (xmlDoc *)html_find((xmlNode *)doc, "base");
+ xmlUnlinkNode((xmlNode *)tmpdoc);
+ html_set_base((xmlNode *)doc, url, "a", "href", newbase);
+ html_set_base((xmlNode *)doc, url, "img", "src", newbase);
+ html_set_base((xmlNode *)doc, url, "input", "src", newbase);
+ html_set_base((xmlNode *)doc, url, "link", "src", newbase);
+ html_set_base((xmlNode *)doc, url, "body", "background", newbase);
+ html_set_base((xmlNode *)doc, url, "script", "src", newbase);
+/* while (doc = html_find((xmlNode *)doc, "img"))
+ {
+ if (url = xmlGetProp(doc, "src"))
+ {
+ gchar *str = strplchr(url);
+ g_print("%s\n", str);
+ xmlSetProp(doc, "src", str);
+ g_free(str);
+ xmlFree(url);
+ }
+ }*/
+ doc = src;
+ if (newbase)
+ xmlFree(newbase);
+ return doc;
+}
+
+static gchar *
+parse_href (const gchar *s, const gchar *base)
+{
+ gchar *retval;
+ gchar *tmp;
+ gchar *tmpurl;
+
+ if(s == NULL || *s == 0)
+ return g_strdup ("");
+
+// tmpurl = html_url_new (s);
+// if (html_url_get_protocol (tmpurl) == NULL) {
+ if (s[0] == '/') {
+ if (s[1] == '/') {
+ gchar *t;
+
+ /* Double slash at the beginning. */
+
+ /* FIXME? This is a bit sucky. */
+/* t = g_strconcat (html_url_get_protocol (baseURL),
+ ":", s, NULL);
+ html_url_destroy (tmpurl);
+ tmpurl = html_url_new (t);
+ retval = html_url_to_string (tmpurl);
+ html_url_destroy (tmpurl);
+ g_free (t);*/
+ } else {
+ /* Single slash at the beginning. */
+
+ tmpurl = g_strdup_printf("%s%s", base, s);
+ }
+ } else {
+ gchar *t;
+/* html_url_destroy (tmpurl);
+ tmpurl = html_url_append_path (baseURL, s);
+ retval = html_url_to_string (tmpurl);
+ html_url_destroy (tmpurl);*/
+ }
+// } else {
+ // retval = html_url_to_string (tmpurl);
+ // html_url_destroy (tmpurl);
+ // }
+
+ return tmpurl;
+}
+
+
+static char *
+layer_find_innerelement (xmlNodePtr node,
+ char *match, char *el,
+ char *fail)
+{
+ while (node!=NULL) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (strcasecmp (node->name, match)==0) {
+ return xmlGetProp(node, el);
+ }
+ node = node->next;
+ }
+ return fail;
+}
+
+xmlNode *
+html_find (xmlNode *node,
+ char *match)
+{
+#ifdef RDF_DEBUG
+g_print("parser entry 3_1!!!\n");
+#endif
+ while (node) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (node->children)
+ node = node->children;
+ else {
+ while (node && !node->next)
+ node = node->parent;
+ //if (!node || node == top)
+ if (!node)
+{
+#ifdef RDF_DEBUG
+g_print("parser error 3_2 -> return NULL!!!\n");
+#endif
+ return NULL;
+}
+ node = node->next;
+ }
+
+ if (node->name) {
+ if (!strcmp (node->name, match))
+{
+#ifdef RDF_DEBUG
+g_print("parser error 3_3 -> return NULL!!!\n");
+#endif
+ return node;
+}
+ }
+ }
+#ifdef RDF_DEBUG
+g_print("parser error 3_4 -> return NULL!!!\n");
+#endif
+ return NULL;
+}
+
+static char *
+layer_find (xmlNodePtr node,
+ char *match,
+ char *fail)
+{
+ while (node!=NULL) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (strcasecmp (node->name, match)==0) {
+ if (node->children != NULL && node->children->content != NULL) {
+ return node->children->content;
+ } else {
+ return fail;
+ }
+ }
+ node = node->next;
+ }
+ return fail;
+}
+
+//
+//namespace-based modularization
+//standard modules
+//
+// mod_content
+// * only handles content:encoding
+// * if it's necessary handle
+// content:item stuff
+
+gchar *
+content_rss(xmlNode *node, gchar *fail)
+{
+ //guint len=0;
+ //xmlBufferPtr buf = xmlBufferCreate();
+ gchar *content;
+
+ content = xmlNodeGetContent(node);
+ if (content)
+ return content;
+ else
+ return fail;
+/* len = xmlNodeDump(buf, node->doc, node->children->next, 0, 0);
+ if (len)
+ {
+ content = g_strdup_printf("%s", xmlBufferContent(buf));
+ xmlBufferFree(buf);
+ return content;
+ }
+ else
+ return fail;*/
+}
+
+void
+dublin_core_rss(void)
+{
+ g_print("dublin core\n");
+}
+
+void
+syndication_rss(void)
+{
+ g_print("syndication\n");
+}
+
+
+gchar *standard_rss_modules[3][3] = {
+ {"content", "content", (gchar *)content_rss},
+ {"dublin core", "dc", (gchar *)dublin_core_rss},
+ {"syndication", "sy", (gchar *)syndication_rss}};
+
+
+static char *
+layer_find_tag (xmlNodePtr node,
+ char *match,
+ char *fail)
+{
+ xmlBufferPtr buf = xmlBufferCreate();
+ gchar *content;
+ guint len = 0;
+ int i;
+ char* (*func)();
+
+ while (node!=NULL) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (node->ns && node->ns->prefix)
+ {
+ for (i=0; i < 3; i++)
+ {
+ if (!strcasecmp (node->ns->prefix, standard_rss_modules[i][1]))
+ {
+ func = (gpointer)standard_rss_modules[i][2];
+ if (strcasecmp (node->ns->prefix, match)==0)
+ {
+ xmlBufferFree(buf);
+ return func(node, fail);
+ }
+ }
+ }
+ }
+ if (strcasecmp (node->name, match)==0) {
+ if (node->children != NULL)
+ {
+ if (node->children->type == 1
+ || node->children->next != NULL) {
+#ifdef RDF_DEBUG
+ g_print("NODE DUMP:%s\n", xmlNodeGetContent(node->children->next));
+#endif
+ len = xmlNodeDump(buf, node->doc, node->children, 0, 0);
+ content = g_strdup_printf("%s", xmlBufferContent(buf));
+ xmlBufferFree(buf);
+ return content;
+ } else {
+ xmlBufferFree(buf);
+ return fail;
+ }
+ }
+ }
+ node = node->next;
+ }
+ xmlBufferFree(buf);
+ return fail;
+}
+
+static gchar *
+layer_find_innerhtml (xmlNodePtr node,
+ char *match, char *submatch,
+ char *fail)
+{
+ while (node!=NULL) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (strcasecmp (node->name, match)==0 && node->children) {
+ return layer_find(node->children->next, submatch, fail);
+ }
+ node = node->next;
+ }
+ return fail;
+}
+
+xmlNodePtr
+layer_find_pos (xmlNodePtr node,
+ char *match, char *submatch)
+{
+ xmlNodePtr subnode;
+ while (node!=NULL) {
+#ifdef RDF_DEBUG
+ xmlDebugDumpNode (stdout, node, 32);
+ printf("%s.\n", node->name);
+#endif
+ if (strcasecmp (node->name, match)==0 && node->children) {
+ subnode = node->children;
+ while (subnode!=NULL) {
+ if (strcasecmp (subnode->name, submatch)==0 && subnode->children)
+ {
+ return subnode->children->next;
+ }
+ subnode = subnode->next;
+ }
+ }
+ node = node->next;
+ }
+ return NULL;
+}
+
+static char *
+layer_find_url (xmlNodePtr node,
+ char *match,
+ char *fail)
+{
+ char *p = layer_find (node, match, fail);
+ char *r = p;
+ static char *wb = NULL;
+ char *w;
+
+ if (wb) {
+ g_free (wb);
+ }
+
+ wb = w = g_malloc (3 * strlen (p));
+
+ if (w == NULL) {
+ return fail;
+ }
+
+ if (*r == ' ') r++; /* Fix UF bug */
+
+ while (*r) {
+ if (strncmp (r, "&", 5) == 0) {
+ *w++ = '&';
+ r += 5;
+ continue;
+ }
+ if (strncmp (r, "<", 4) == 0) {
+ *w++ = '<';
+ r += 4;
+ continue;
+ }
+ if (strncmp (r, ">", 4) == 0) {
+ *w++ = '>';
+ r += 4;
+ continue;
+ }
+ if (*r == '"' || *r == ' '){
+ *w++ = '%';
+ *w++ = "0123456789ABCDEF"[*r/16];
+ *w++ = "0123456789ABCDEF"[*r&15];
+ r++;
+ continue;
+ }
+ *w++ = *r++;
+ }
+ *w = 0;
+ return wb;
+}
+
+gchar *
+tree_walk (xmlNodePtr root, RDF *r)
+{
+ xmlNodePtr walk;
+ xmlNodePtr rewalk = root;
+ xmlNodePtr channel = NULL;
+ xmlNodePtr image = NULL;
+ GArray *item = g_array_new (TRUE, TRUE, sizeof (xmlNodePtr));
+ char *t;
+ char *charset;
+ gchar *img_src = NULL;
+
+ /* check in-memory encoding first, fallback to transport encoding, which may or may not be correct */
+ if (r->cache->charset == XML_CHAR_ENCODING_UTF8
+ || r->cache->charset == XML_CHAR_ENCODING_ASCII) {
+ charset = NULL;
+ } else {
+ /* bad/missing encoding, fallback to latin1 (locale?) */
+ charset = r->cache->encoding ? (char *)r->cache->encoding : "iso-8859-1";
+ }
+
+ do {
+ walk = rewalk;
+ rewalk = NULL;
+
+ while (walk!=NULL){
+#ifdef RDF_DEBUG
+ printf ("%p, %s\n", walk, walk->name);
+#endif
+ if (strcasecmp (walk->name, "rdf") == 0) {
+// xmlNode *node = walk;
+ rewalk = walk->children;
+ walk = walk->next;
+ if (!r->type)
+ r->type = g_strdup("RDF");
+ r->type_id = RDF_FEED;
+// gchar *ver = xmlGetProp(node, "version");
+ if (r->version)
+ g_free(r->version);
+ r->version = g_strdup("(RSS 1.0)");
+// if (ver)
+// xmlFree(ver);
+ continue;
+ }
+ if (strcasecmp (walk->name, "rss") == 0){
+ xmlNode *node = walk;
+ rewalk = walk->children;
+ walk = walk->next;
+ if (!r->type)
+ r->type = g_strdup("RSS");
+ r->type_id = RSS_FEED;
+ gchar *ver = xmlGetProp(node, "version");
+ if (r->version)
+ g_free(r->version);
+ r->version = g_strdup(ver);
+ if (ver)
+ xmlFree(ver);
+ continue;
+ }
+ if (strcasecmp (walk->name, "feed") == 0) {
+ xmlNode *node = walk;
+ if (!r->type)
+ r->type = g_strdup("ATOM");
+ r->type_id = ATOM_FEED;
+ gchar *ver = xmlGetProp(node, "version");
+ if (ver)
+ {
+ if (r->version)
+ g_free(r->version);
+ r->version = g_strdup(ver);
+ xmlFree(ver);
+ }
+ else
+ {
+ if (r->version)
+ g_free(r->version);
+ r->version = g_strdup("1.0");
+ }
+ }
+
+ /* This is the channel top level */
+#ifdef RDF_DEBUG
+ printf ("Top level '%s'.\n", walk->name);
+#endif
+ if (strcasecmp (walk->name, "channel") == 0) {
+ channel = walk;
+ rewalk = channel->children;
+ }
+ if (strcasecmp (walk->name, "feed") == 0) {
+ channel = walk;
+ rewalk = channel->children;
+ }
+ if (strcasecmp (walk->name, "image") == 0) {
+ image = walk;
+ }
+ if (strcasecmp (walk->name, "item") == 0) {
+ g_array_append_val(item, walk);
+ }
+ if (strcasecmp (walk->name, "entry") == 0) {
+ g_array_append_val(item, walk);
+ }
+ walk = walk->next;
+ }
+ }
+ while (rewalk);
+
+ if (channel == NULL) {
+ fprintf(stderr, "No channel definition.\n");
+ return NULL;
+ }
+ if (image != NULL)
+ img_src = layer_find(image->children, "url", NULL);
+
+ t = g_strdup(get_real_channel_name(r->uri, NULL));
+ //feed might be added with no validation
+ //so it could be named Untitled channel
+ //till validation process
+ if (t == NULL || !g_ascii_strncasecmp(t,
+ DEFAULT_NO_CHANNEL,
+ strlen(DEFAULT_NO_CHANNEL)))
+ {
+
+ t = layer_find(channel->children,
+ "title",
+ DEFAULT_NO_CHANNEL);
+ t = decode_html_entities(t);
+ gchar *tmp = sanitize_folder(t);
+ g_free(t);
+ t = tmp;
+ t = generate_safe_chn_name(t);
+ }
+ gchar *tmp = layer_find(channel->children, "ttl", NULL);
+ if (tmp)
+ r->ttl = atoi(tmp);
+ else
+ r->ttl = 0;
+
+ update_feed_image(img_src, gen_md5(r->uri));
+
+ //items might not have a date
+ // so try to grab channel/feed date
+ gchar *md2 = g_strdup(layer_find(channel->children, "date",
+ layer_find(channel->children, "pubDate",
+ layer_find(channel->children, "updated", NULL))));
+
+ r->total = item->len;
+
+ r->feedid = update_channel(
+ //atempt to find real_channel name using url
+ t,
+ r->uri,
+ md2,
+ item,
+ r->progress);
+ if (md2)
+ g_free(md2);
+ g_array_free(item, TRUE);
+ g_free(r->feedid);
+ return t;
+}
+
+gchar *
+update_channel(const char *chn_name, gchar *url, char *main_date, GArray *item, GtkWidget *progress)
+{
+ guint i;
+ gchar *sender = g_strdup_printf("%s <%s>", chn_name, chn_name);
+ CamelStore *store = mail_component_peek_local_store(NULL);
+ char *d2 = NULL;
+ xmlNodePtr el;
+ char *q = NULL;
+ char *b = NULL;
+ gchar *feed = NULL;
+ gboolean freeb = 0; //if b needs to be freed or not
+ gchar *encl, *encl_file;
+ xmlChar *buff = NULL;
+ int size = 0;
+
+ migrate_crc_md5(chn_name, url);
+
+ gchar *buf = gen_md5(url);
+
+ gchar *feed_dir = rss_component_peek_base_directory(mail_component_peek());
+ if (!g_file_test(feed_dir, G_FILE_TEST_EXISTS))
+ g_mkdir_with_parents (feed_dir, 0755);
+
+ gchar *feed_name = g_strdup_printf("%s/%s", feed_dir, buf);
+ g_free(feed_dir);
+
+ FILE *fr = fopen(feed_name, "r");
+ FILE *fw = fopen(feed_name, "a+");
+ //int fw = g_open (feed_name, O_WRONLY | O_CREAT| O_APPEND | O_BINARY, 0666);
+
+ for (i=0; NULL != (el = g_array_index(item, xmlNodePtr, i)); i++)
+ {
+ update_sr_message();
+ if (rf->cancel) goto out;
+
+ if (progress)
+ {
+ gdouble fraction = (gdouble)i/item->len;
+ gtk_progress_bar_set_fraction((GtkProgressBar *)progress, fraction);
+ gchar *msg = g_strdup_printf("%2.0f%% done", fraction*100);
+ gtk_progress_bar_set_text((GtkProgressBar *)progress, msg);
+ g_free(msg);
+ }
+
+ char *p = layer_find (el->children, "title", "Untitled article");
+ //firstly try to parse as an ATOM author
+ //process person construct
+ char *q1 = g_strdup(layer_find_innerhtml (el->children, "author", "name", NULL));
+ char *q2 = g_strdup(layer_find_innerhtml (el->children, "author", "uri", NULL));
+ char *q3 = g_strdup(layer_find_innerhtml (el->children, "author", "email", NULL));
+ if (q1)
+ {
+ q1 = g_strdelimit(q1, "><", ' ');
+ if (q3)
+ {
+ q3 = g_strdelimit(q3, "><", ' ');
+ q = g_strdup_printf("%s <%s>", q1, q3);
+ g_free(q1);
+ if (q2) g_free(q2);
+ g_free(q3);
+ }
+ else
+ {
+ if (q2)
+ q2 = g_strdelimit(q2, "><", ' ');
+ else
+ q2 = g_strdup(q1);
+ q = g_strdup_printf("%s <%s>", q1, q2);
+ g_free(q1);
+ g_free(q2);
+ }
+ }
+ else //then RSS or RDF
+ {
+ xmlNodePtr source;
+ source = layer_find_pos(el->children, "source", "author");
+ //try the source construct
+ //source = layer_find_pos(el->children, "source", "contributor");
+ if (source != NULL)
+ q = g_strdup(layer_find(source, "name", NULL));
+ else
+ q = g_strdup(layer_find (el->children, "author",
+ layer_find (el->children, "creator", NULL)));
+ if (q)
+ {
+ //evo will go crazy when it'll encounter ":" character
+ //it probably enforces strict rfc2047 compliance
+ q = g_strdelimit(q, "><:", ' ');
+ gchar *tmp = g_strdup_printf("\"%s\" <\"%s\">", q, q);
+ g_free(q);
+ q = tmp;
+ if (q2) g_free(q2);
+ if (q3) g_free(q3);
+ }
+ }
+ //FIXME this might need xmlFree when namespacing
+ b = layer_find_tag (el->children, "description",
+ layer_find_tag (el->children, "content",
+ layer_find_tag (el->children, "summary",
+ NULL)));
+
+ if (!b)
+ b = g_strdup(layer_find (el->children, "description",
+ layer_find (el->children, "content",
+ layer_find (el->children, "summary", "No information"))));
+
+ char *d = layer_find (el->children, "pubDate", NULL);
+ //date in dc module format
+ if (!d)
+ {
+ d2 = layer_find (el->children, "date", NULL); //RSS2
+ if (!d2)
+ {
+ d2 = layer_find(el->children, "updated", NULL); //ATOM
+ if (!d2) //take channel date if exists
+ d2 = main_date;
+ }
+ }
+
+ encl = layer_find_innerelement(el->children, "enclosure", "url", // RSS 2.0 Enclosure
+ layer_find_innerelement(el->children, "link", "enclosure", NULL)); // ATOM Enclosure
+ //we have to free this somehow
+ char *link = g_strdup(layer_find (el->children, "link", NULL)); //RSS,
+ if (!link)
+ link = layer_find_innerelement(el->children, "link", "href", g_strdup(_("No Information"))); //ATOM
+
+ char *id = layer_find (el->children, "id", //ATOM
+ layer_find (el->children, "guid", NULL)); //RSS 2.0
+ feed = g_strdup_printf("%s\n", id ? id : link);
+ d(g_print("link:%s\n", link));
+ d(g_print("author:%s\n", q));
+ d(g_print("sender:%s\n", sender));
+ d(g_print("title:%s\n", p));
+ d(g_print("date:%s\n", d));
+ d(g_print("date:%s\n", d2));
+
+ gchar rfeed[513];
+ memset(rfeed, 0, 512);
+ int occ = 0;
+
+ while (gtk_events_pending())
+ gtk_main_iteration ();
+
+ if (!feed_is_new(feed_name, feed)) {
+/*
+ if (fr)
+ {
+ while (fgets(rfeed, 511, fr) != NULL)
+ {
+ if (rfeed && strstr(rfeed, feed))
+ {
+ occ=1;
+ break;
+ }
+ }
+ (void)fseek(fr, 0L, SEEK_SET);
+ }
+
+ while (gtk_events_pending())
+ gtk_main_iteration ();
+
+ if (!occ)
+ {*/
+ ftotal++;
+ p = decode_html_entities (p);
+ gchar *tmp = decode_utf8_entities(b);
+ g_free(b);
+
+ xmlDoc *src = (xmlDoc *)parse_html_sux(tmp, strlen(tmp));
+ if (src)
+ {
+ xmlNode *doc = (xmlNode *)src;
+
+ while (doc = html_find(doc, "img"))
+ {
+ gchar *name = NULL;
+ xmlChar *url = xmlGetProp(doc, "src");
+ if (url) {
+ if (name = fetch_image(url))
+ xmlSetProp(doc, "src", name);
+ xmlFree(url);
+ }
+ }
+ xmlDocDumpMemory(src, &buff, &size);
+ xmlFree(src);
+ }
+ g_free(tmp);
+// tmp = decode_html_entities(buff);
+// tmp = xmlEncodeSpecialChars(NULL, buff);
+// b = tmp;
+// g_free(b);
+ b=buff;
+
+ while (gtk_events_pending())
+ gtk_main_iteration ();
+
+ create_feed *CF = g_new0(create_feed, 1);
+ /* pack all data */
+ CF->full_path = g_strdup(chn_name);
+ CF->q = g_strdup(q);
+ CF->sender = g_strdup(sender);
+ CF->subj = g_strdup(p);
+ CF->body = g_strdup(b);
+ CF->date = g_strdup(d);
+ CF->dcdate = g_strdup(d2);
+ CF->website = g_strdup(link);
+ CF->feedid = g_strdup(buf);
+ CF->encl = g_strdup(encl);
+ CF->feed_fname = g_strdup(feed_name); //feed file name
+ CF->feed_uri = g_strdup(feed); //feed file url
+
+ if (encl)
+ {
+ GError *err = NULL;
+ net_get_unblocking(
+ encl,
+ textcb,
+ NULL,
+ (gpointer)finish_enclosure,
+ CF,
+ 0,
+ &err);
+ }
+ else
+ {
+ create_mail(CF);
+ write_feed_status_line(feed_name, feed);
+ free_cf(CF);
+ }
+ farticle++;
+ flicker_status_icon(chn_name, p);
+ g_free(p);
+ }
+ d(g_print("put success()\n"));
+tout: if (q) g_free(q);
+ g_free(b);
+ if (feed) g_free(feed);
+ if (encl) g_free(encl);
+ g_free(link);
+ }
+out: g_free(sender);
+
+ if (fr) fclose(fr);
+ if (fw) fclose(fw);
+
+ g_free(feed_name);
+ return buf;
+}
+
Added: trunk/src/parser.h
==============================================================================
--- (empty file)
+++ trunk/src/parser.h Wed Oct 8 16:53:18 2008
@@ -0,0 +1,14 @@
+
+#ifndef __PARSER_H__
+#define __PARSER_H__
+
+gchar *update_channel(const char *chn_name, char *url, char *main_date, GArray *item, GtkWidget *progress);
+
+static char *layer_find (xmlNodePtr node, char *match, char *fail);
+static char *layer_find_innerelement (xmlNodePtr node, char *match, char *el, char *fail);
+static gchar *layer_find_innerhtml (xmlNodePtr node, char *match, char *submatch, char *fail);
+xmlNodePtr layer_find_pos (xmlNodePtr node, char *match, char *submatch);
+static char *layer_find_tag (xmlNodePtr node, char *match, char *fail);
+
+#endif /*__RSS_H__*/
+
Modified: trunk/src/rss.c
==============================================================================
--- trunk/src/rss.c (original)
+++ trunk/src/rss.c Wed Oct 8 16:53:18 2008
@@ -126,6 +126,7 @@
#endif
#include "rss.h"
+#include "parser.h"
#include "network-soup.c"
#include "misc.c"
#if HAVE_DBUS
@@ -133,10 +134,9 @@
#endif
#include "rss-config-factory.c"
#include "rss-icon-factory.c"
+#include "parser.c"
int pop = 0;
-guint ftotal;
-guint farticle;
GtkWidget *flabel;
//#define RSS_DEBUG 1
guint nettime_id = 0;
@@ -147,16 +147,6 @@
gchar *flat_status_msg;
GPtrArray *filter_uids;
-#define DEFAULT_FEEDS_FOLDER "News&Blogs"
-#define DEFAULT_NO_CHANNEL "Untitled channel"
-#define DEFAULT_TTL 1800
-
-/* ms between status updates to the gui */
-#define STATUS_TIMEOUT (250)
-
-#define NETWORK_TIMEOUT (180000)
-#define HTTP_CACHE_PATH "http"
-
static CamelDataCache *http_cache;
static volatile int org_gnome_rss_controls_counter_id = 0;
@@ -186,14 +176,9 @@
gboolean setup_feed(add_feed *feed);
gchar *display_doc (RDF *r);
void check_folders(void);
-//u_int32_t
-gchar *
-update_channel(const char *chn_name, char *url, char *main_date, GArray *item, GtkWidget *progress);
-gchar *decode_utf8_entities(gchar *str);
gchar *strplchr(gchar *source);
static char *gen_md5(gchar *buffer);
CamelMimePart *file_to_message(const char *name);
-gchar *get_real_channel_name(gchar *uri, gchar *failed);
void save_gconf_feed(void);
void check_feed_age(void);
static gboolean check_if_match (gpointer key, gpointer value, gpointer user_data);
@@ -239,7 +224,6 @@
static void
dialog_key_destroy (GtkWidget *widget, gpointer data);
guint fallback_engine(void);
-gchar *fetch_image(gchar *url);
/*======================================================================*/
@@ -1231,273 +1215,6 @@
return res;
}
-void
-html_set_base(xmlNode *doc, char *base, char *tag, char *prop, char *basehref)
-{
- gchar *url;
-#if LIBSOUP_VERSION < 2003000
- SoupUri *newuri;
-#else
- SoupURI *newuri;
-#endif
- gchar *newuristr;
-#if LIBSOUP_VERSION < 2003000
- SoupUri *base_uri = soup_uri_new (base);
-#else
- SoupURI *base_uri = soup_uri_new (base);
-#endif
- while (doc = html_find((xmlNode *)doc, tag))
- {
- if (url = xmlGetProp(doc, prop))
- {
- if (!strncmp(tag, "img", 3) && !strncmp(prop, "src", 3))
- {
- gchar *tmpurl = strplchr(url);
- xmlSetProp(doc, prop, tmpurl);
- g_free(tmpurl);
- }
- d(g_print("DEBUG: parsing: %s\n", url));
- if (url[0] == '/' && url[1] != '/')
- {
- gchar *server = get_server_from_uri(base);
- gchar *tmp = g_strdup_printf("%s/%s", server, url);
- xmlSetProp(doc, prop, tmp);
- g_free(tmp);
- g_free(server);
- }
- if (url[0] == '/' && url[1] == '/')
- {
- /*FIXME handle ssl */
- gchar *tmp = g_strdup_printf("%s%s", "http:", url);
- xmlSetProp(doc, prop, tmp);
- g_free(tmp);
- }
- if (url[0] != '/' && !g_str_has_prefix(url, "http://")
- && !g_str_has_prefix(url, "https://"))
- {
- // in case we have a base href= set then rewrite
- // all relative links
- if (basehref != NULL)
- {
-#if LIBSOUP_VERSION < 2003000
- SoupUri *newbase_uri = soup_uri_new (basehref);
-#else
- SoupURI *newbase_uri = soup_uri_new (basehref);
-#endif
- newuri = soup_uri_new_with_base (newbase_uri, url);
- soup_uri_free(newbase_uri);
- }
- else
- newuri = soup_uri_new_with_base (base_uri, url);
- //xmlSetProp(doc, prop, g_strdup_printf("%s/%s", get_server_from_uri(base), url));
- if (newuri)
- {
- newuristr = soup_uri_to_string (newuri, FALSE);
- xmlSetProp(doc, prop, (xmlChar *)newuristr);
- g_free(newuristr);
- soup_uri_free(newuri);
- }
- }
- xmlFree(url);
- }
- }
- soup_uri_free(base_uri);
-}
-
-static void
-my_xml_parser_error_handler (void *ctx, const char *msg, ...)
-{
- ;
-}
-
-xmlDoc *
-xml_parse_sux (const char *buf, int len)
-{
- static xmlSAXHandler *sax;
- xmlParserCtxtPtr ctxt;
- xmlDoc *doc;
-
- g_return_val_if_fail (buf != NULL, NULL);
-
- if (!sax) {
- xmlInitParser();
- sax = xmlMalloc (sizeof (xmlSAXHandler));
-//#if LIBXML_VERSION > 20600
- xmlSAXVersion (sax, 2);
-//#else
- // memcpy (sax, &xmlDefaultSAXHandler, sizeof (xmlSAXHandler));
-//#endif
- sax->warning = my_xml_parser_error_handler;
- sax->error = my_xml_parser_error_handler;
- }
-
- if (len == -1)
- len = strlen (buf);
- ctxt = xmlCreateMemoryParserCtxt (buf, len);
- if (!ctxt)
- return NULL;
-
- xmlFree (ctxt->sax);
- ctxt->sax = sax;
-//#if LIBXML_VERSION > 20600
- ctxt->sax2 = 1;
- ctxt->str_xml = xmlDictLookup (ctxt->dict, BAD_CAST "xml", 3);
- ctxt->str_xmlns = xmlDictLookup (ctxt->dict, BAD_CAST "xmlns", 5);
- ctxt->str_xml_ns = xmlDictLookup (ctxt->dict, XML_XML_NAMESPACE, 36);
-//#endif
-
- ctxt->recovery = TRUE;
- ctxt->vctxt.error = my_xml_parser_error_handler;
- ctxt->vctxt.warning = my_xml_parser_error_handler;
-
- xmlCtxtUseOptions(ctxt, XML_PARSE_DTDLOAD
- | XML_PARSE_NOENT
- | XML_PARSE_NOCDATA);
-
-
- xmlParseDocument (ctxt);
-
- doc = ctxt->myDoc;
- ctxt->sax = NULL;
- xmlFreeParserCtxt (ctxt);
-
- return doc;
-}
-
-xmlDoc *
-parse_html_sux (const char *buf, int len)
-{
- xmlDoc *doc;
-#if LIBXML_VERSION > 20600
- static xmlSAXHandler *sax;
- htmlParserCtxtPtr ctxt;
-
- g_return_val_if_fail (buf != NULL, NULL);
-
- if (!sax) {
- xmlInitParser();
- sax = xmlMalloc (sizeof (htmlSAXHandler));
- memcpy (sax, &htmlDefaultSAXHandler, sizeof (xmlSAXHandlerV1));
- sax->warning = my_xml_parser_error_handler;
- sax->error = my_xml_parser_error_handler;
- }
-
- if (len == -1)
- len = strlen (buf);
- ctxt = htmlCreateMemoryParserCtxt (buf, len);
- if (!ctxt)
- return NULL;
-
- xmlFree (ctxt->sax);
- ctxt->sax = sax;
- ctxt->vctxt.error = my_xml_parser_error_handler;
- ctxt->vctxt.warning = my_xml_parser_error_handler;
-
- htmlCtxtUseOptions(ctxt, HTML_PARSE_NONET
- | HTML_PARSE_COMPACT
- | HTML_PARSE_NOBLANKS);
-
- htmlParseDocument (ctxt);
- doc = ctxt->myDoc;
-
- ctxt->sax = NULL;
- htmlFreeParserCtxt (ctxt);
-
-#else /* LIBXML_VERSION <= 20600 */
- char *buf_copy = g_strndup (buf, len);
-
- doc = htmlParseDoc (buf_copy, NULL);
- g_free (buf_copy);
-#endif
- return doc;
-}
-
-/*modifies a html document to be absolute */
-xmlDoc *
-parse_html(char *url, const char *html, int len)
-{
- xmlDoc *src = NULL;
- xmlDoc *doc = NULL;
-
- src = (xmlDoc *)parse_html_sux(html, len);
-
- if (!src)
- return NULL;
- doc = src;
- gchar *newbase = NULL;
- newbase = xmlGetProp(html_find((xmlNode *)doc, "base"), "href");
- d(g_print("newbase:|%s|\n", newbase));
- xmlDoc *tmpdoc = (xmlDoc *)html_find((xmlNode *)doc, "base");
- xmlUnlinkNode((xmlNode *)tmpdoc);
- html_set_base((xmlNode *)doc, url, "a", "href", newbase);
- html_set_base((xmlNode *)doc, url, "img", "src", newbase);
- html_set_base((xmlNode *)doc, url, "input", "src", newbase);
- html_set_base((xmlNode *)doc, url, "link", "src", newbase);
- html_set_base((xmlNode *)doc, url, "body", "background", newbase);
- html_set_base((xmlNode *)doc, url, "script", "src", newbase);
-/* while (doc = html_find((xmlNode *)doc, "img"))
- {
- if (url = xmlGetProp(doc, "src"))
- {
- gchar *str = strplchr(url);
- g_print("%s\n", str);
- xmlSetProp(doc, "src", str);
- g_free(str);
- xmlFree(url);
- }
- }*/
- doc = src;
- if (newbase)
- xmlFree(newbase);
- return doc;
-}
-
-static gchar *
-parse_href (const gchar *s, const gchar *base)
-{
- gchar *retval;
- gchar *tmp;
- gchar *tmpurl;
-
- if(s == NULL || *s == 0)
- return g_strdup ("");
-
-// tmpurl = html_url_new (s);
-// if (html_url_get_protocol (tmpurl) == NULL) {
- if (s[0] == '/') {
- if (s[1] == '/') {
- gchar *t;
-
- /* Double slash at the beginning. */
-
- /* FIXME? This is a bit sucky. */
-/* t = g_strconcat (html_url_get_protocol (baseURL),
- ":", s, NULL);
- html_url_destroy (tmpurl);
- tmpurl = html_url_new (t);
- retval = html_url_to_string (tmpurl);
- html_url_destroy (tmpurl);
- g_free (t);*/
- } else {
- /* Single slash at the beginning. */
-
- tmpurl = g_strdup_printf("%s%s", base, s);
- }
- } else {
- gchar *t;
-/* html_url_destroy (tmpurl);
- tmpurl = html_url_append_path (baseURL, s);
- retval = html_url_to_string (tmpurl);
- html_url_destroy (tmpurl);*/
- }
-// } else {
- // retval = html_url_to_string (tmpurl);
- // html_url_destroy (tmpurl);
- // }
-
- return tmpurl;
-}
-
static void
summary_cb (GtkWidget *button, EMFormatHTMLPObject *pobject)
{
@@ -3741,290 +3458,6 @@
/************ RDF Parser *******************/
-static char *
-layer_find_innerelement (xmlNodePtr node,
- char *match, char *el,
- char *fail)
-{
- while (node!=NULL) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (strcasecmp (node->name, match)==0) {
- return xmlGetProp(node, el);
- }
- node = node->next;
- }
- return fail;
-}
-
-xmlNode *
-html_find (xmlNode *node,
- char *match)
-{
-#ifdef RDF_DEBUG
-g_print("parser entry 3_1!!!\n");
-#endif
- while (node) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (node->children)
- node = node->children;
- else {
- while (node && !node->next)
- node = node->parent;
- //if (!node || node == top)
- if (!node)
-{
-#ifdef RDF_DEBUG
-g_print("parser error 3_2 -> return NULL!!!\n");
-#endif
- return NULL;
-}
- node = node->next;
- }
-
- if (node->name) {
- if (!strcmp (node->name, match))
-{
-#ifdef RDF_DEBUG
-g_print("parser error 3_3 -> return NULL!!!\n");
-#endif
- return node;
-}
- }
- }
-#ifdef RDF_DEBUG
-g_print("parser error 3_4 -> return NULL!!!\n");
-#endif
- return NULL;
-}
-
-static char *
-layer_find (xmlNodePtr node,
- char *match,
- char *fail)
-{
- while (node!=NULL) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (strcasecmp (node->name, match)==0) {
- if (node->children != NULL && node->children->content != NULL) {
- return node->children->content;
- } else {
- return fail;
- }
- }
- node = node->next;
- }
- return fail;
-}
-
-//
-//namespace-based modularization
-//standard modules
-//
-// mod_content
-// * only handles content:encoding
-// * if it's necessary handle
-// content:item stuff
-
-gchar *
-content_rss(xmlNode *node, gchar *fail)
-{
- //guint len=0;
- //xmlBufferPtr buf = xmlBufferCreate();
- gchar *content;
-
- content = xmlNodeGetContent(node);
- if (content)
- return content;
- else
- return fail;
-/* len = xmlNodeDump(buf, node->doc, node->children->next, 0, 0);
- if (len)
- {
- content = g_strdup_printf("%s", xmlBufferContent(buf));
- xmlBufferFree(buf);
- return content;
- }
- else
- return fail;*/
-}
-
-void
-dublin_core_rss(void)
-{
- g_print("dublin core\n");
-}
-
-void
-syndication_rss(void)
-{
- g_print("syndication\n");
-}
-
-
-gchar *standard_rss_modules[3][3] = {
- {"content", "content", (gchar *)content_rss},
- {"dublin core", "dc", (gchar *)dublin_core_rss},
- {"syndication", "sy", (gchar *)syndication_rss}};
-
-
-static char *
-layer_find_tag (xmlNodePtr node,
- char *match,
- char *fail)
-{
- xmlBufferPtr buf = xmlBufferCreate();
- gchar *content;
- guint len = 0;
- int i;
- char* (*func)();
-
- while (node!=NULL) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (node->ns && node->ns->prefix)
- {
- for (i=0; i < 3; i++)
- {
- if (!strcasecmp (node->ns->prefix, standard_rss_modules[i][1]))
- {
- func = (gpointer)standard_rss_modules[i][2];
- if (strcasecmp (node->ns->prefix, match)==0)
- {
- xmlBufferFree(buf);
- return func(node, fail);
- }
- }
- }
- }
- if (strcasecmp (node->name, match)==0) {
- if (node->children != NULL)
- {
- if (node->children->type == 1
- || node->children->next != NULL) {
-#ifdef RDF_DEBUG
- g_print("NODE DUMP:%s\n", xmlNodeGetContent(node->children->next));
-#endif
- len = xmlNodeDump(buf, node->doc, node->children, 0, 0);
- content = g_strdup_printf("%s", xmlBufferContent(buf));
- xmlBufferFree(buf);
- return content;
- } else {
- xmlBufferFree(buf);
- return fail;
- }
- }
- }
- node = node->next;
- }
- xmlBufferFree(buf);
- return fail;
-}
-
-static gchar *
-layer_find_innerhtml (xmlNodePtr node,
- char *match, char *submatch,
- char *fail)
-{
- while (node!=NULL) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (strcasecmp (node->name, match)==0 && node->children) {
- return layer_find(node->children->next, submatch, fail);
- }
- node = node->next;
- }
- return fail;
-}
-
-xmlNodePtr
-layer_find_pos (xmlNodePtr node,
- char *match, char *submatch)
-{
- xmlNodePtr subnode;
- while (node!=NULL) {
-#ifdef RDF_DEBUG
- xmlDebugDumpNode (stdout, node, 32);
- printf("%s.\n", node->name);
-#endif
- if (strcasecmp (node->name, match)==0 && node->children) {
- subnode = node->children;
- while (subnode!=NULL) {
- if (strcasecmp (subnode->name, submatch)==0 && subnode->children)
- {
- return subnode->children->next;
- }
- subnode = subnode->next;
- }
- }
- node = node->next;
- }
- return NULL;
-}
-
-static char *
-layer_find_url (xmlNodePtr node,
- char *match,
- char *fail)
-{
- char *p = layer_find (node, match, fail);
- char *r = p;
- static char *wb = NULL;
- char *w;
-
- if (wb) {
- g_free (wb);
- }
-
- wb = w = g_malloc (3 * strlen (p));
-
- if (w == NULL) {
- return fail;
- }
-
- if (*r == ' ') r++; /* Fix UF bug */
-
- while (*r) {
- if (strncmp (r, "&", 5) == 0) {
- *w++ = '&';
- r += 5;
- continue;
- }
- if (strncmp (r, "<", 4) == 0) {
- *w++ = '<';
- r += 4;
- continue;
- }
- if (strncmp (r, ">", 4) == 0) {
- *w++ = '>';
- r += 4;
- continue;
- }
- if (*r == '"' || *r == ' '){
- *w++ = '%';
- *w++ = "0123456789ABCDEF"[*r/16];
- *w++ = "0123456789ABCDEF"[*r&15];
- r++;
- continue;
- }
- *w++ = *r++;
- }
- *w = 0;
- return wb;
-}
-
gchar *
get_real_channel_name(gchar *uri, gchar *failed)
{
@@ -4034,167 +3467,6 @@
return chn_name ? chn_name : failed;
}
-gchar *
-tree_walk (xmlNodePtr root, RDF *r)
-{
- xmlNodePtr walk;
- xmlNodePtr rewalk = root;
- xmlNodePtr channel = NULL;
- xmlNodePtr image = NULL;
- GArray *item = g_array_new (TRUE, TRUE, sizeof (xmlNodePtr));
- char *t;
- char *charset;
- gchar *img_src = NULL;
-
- /* check in-memory encoding first, fallback to transport encoding, which may or may not be correct */
- if (r->cache->charset == XML_CHAR_ENCODING_UTF8
- || r->cache->charset == XML_CHAR_ENCODING_ASCII) {
- charset = NULL;
- } else {
- /* bad/missing encoding, fallback to latin1 (locale?) */
- charset = r->cache->encoding ? (char *)r->cache->encoding : "iso-8859-1";
- }
-
- do {
- walk = rewalk;
- rewalk = NULL;
-
- while (walk!=NULL){
-#ifdef RDF_DEBUG
- printf ("%p, %s\n", walk, walk->name);
-#endif
- if (strcasecmp (walk->name, "rdf") == 0) {
-// xmlNode *node = walk;
- rewalk = walk->children;
- walk = walk->next;
- if (!r->type)
- r->type = g_strdup("RDF");
- r->type_id = RDF_FEED;
-// gchar *ver = xmlGetProp(node, "version");
- if (r->version)
- g_free(r->version);
- r->version = g_strdup("(RSS 1.0)");
-// if (ver)
-// xmlFree(ver);
- continue;
- }
- if (strcasecmp (walk->name, "rss") == 0){
- xmlNode *node = walk;
- rewalk = walk->children;
- walk = walk->next;
- if (!r->type)
- r->type = g_strdup("RSS");
- r->type_id = RSS_FEED;
- gchar *ver = xmlGetProp(node, "version");
- if (r->version)
- g_free(r->version);
- r->version = g_strdup(ver);
- if (ver)
- xmlFree(ver);
- continue;
- }
- if (strcasecmp (walk->name, "feed") == 0) {
- xmlNode *node = walk;
- if (!r->type)
- r->type = g_strdup("ATOM");
- r->type_id = ATOM_FEED;
- gchar *ver = xmlGetProp(node, "version");
- if (ver)
- {
- if (r->version)
- g_free(r->version);
- r->version = g_strdup(ver);
- xmlFree(ver);
- }
- else
- {
- if (r->version)
- g_free(r->version);
- r->version = g_strdup("1.0");
- }
- }
-
- /* This is the channel top level */
-#ifdef RDF_DEBUG
- printf ("Top level '%s'.\n", walk->name);
-#endif
- if (strcasecmp (walk->name, "channel") == 0) {
- channel = walk;
- rewalk = channel->children;
- }
- if (strcasecmp (walk->name, "feed") == 0) {
- channel = walk;
- rewalk = channel->children;
- }
- if (strcasecmp (walk->name, "image") == 0) {
- image = walk;
- }
- if (strcasecmp (walk->name, "item") == 0) {
- g_array_append_val(item, walk);
- }
- if (strcasecmp (walk->name, "entry") == 0) {
- g_array_append_val(item, walk);
- }
- walk = walk->next;
- }
- }
- while (rewalk);
-
- if (channel == NULL) {
- fprintf(stderr, "No channel definition.\n");
- return NULL;
- }
- if (image != NULL)
- img_src = layer_find(image->children, "url", NULL);
-
- t = g_strdup(get_real_channel_name(r->uri, NULL));
- //feed might be added with no validation
- //so it could be named Untitled channel
- //till validation process
- if (t == NULL || !g_ascii_strncasecmp(t,
- DEFAULT_NO_CHANNEL,
- strlen(DEFAULT_NO_CHANNEL)))
- {
-
- t = layer_find(channel->children,
- "title",
- DEFAULT_NO_CHANNEL);
- t = decode_html_entities(t);
- gchar *tmp = sanitize_folder(t);
- g_free(t);
- t = tmp;
- t = generate_safe_chn_name(t);
- }
- gchar *tmp = layer_find(channel->children, "ttl", NULL);
- if (tmp)
- r->ttl = atoi(tmp);
- else
- r->ttl = 0;
-
- update_feed_image(img_src, gen_md5(r->uri));
-
- //items might not have a date
- // so try to grab channel/feed date
- gchar *md2 = g_strdup(layer_find(channel->children, "date",
- layer_find(channel->children, "pubDate",
- layer_find(channel->children, "updated", NULL))));
-
- r->total = item->len;
-
- r->feedid = update_channel(
- //atempt to find real_channel name using url
- t,
- r->uri,
- md2,
- item,
- r->progress);
- if (md2)
- g_free(md2);
- g_array_free(item, TRUE);
- g_free(r->feedid);
- return t;
-}
-
CamelMimePart *
file_to_message(const char *filename)
{
@@ -4543,256 +3815,6 @@
}
gchar *
-update_channel(const char *chn_name, gchar *url, char *main_date, GArray *item, GtkWidget *progress)
-{
- guint i;
- gchar *sender = g_strdup_printf("%s <%s>", chn_name, chn_name);
- CamelStore *store = mail_component_peek_local_store(NULL);
- char *d2 = NULL;
- xmlNodePtr el;
- char *q = NULL;
- char *b = NULL;
- gchar *feed = NULL;
- gboolean freeb = 0; //if b needs to be freed or not
- gchar *encl, *encl_file;
- xmlChar *buff = NULL;
- int size = 0;
-
- migrate_crc_md5(chn_name, url);
-
- gchar *buf = gen_md5(url);
-
- gchar *feed_dir = rss_component_peek_base_directory(mail_component_peek());
- if (!g_file_test(feed_dir, G_FILE_TEST_EXISTS))
- g_mkdir_with_parents (feed_dir, 0755);
-
- gchar *feed_name = g_strdup_printf("%s/%s", feed_dir, buf);
- g_free(feed_dir);
-
- FILE *fr = fopen(feed_name, "r");
- FILE *fw = fopen(feed_name, "a+");
- //int fw = g_open (feed_name, O_WRONLY | O_CREAT| O_APPEND | O_BINARY, 0666);
-
- for (i=0; NULL != (el = g_array_index(item, xmlNodePtr, i)); i++)
- {
- update_sr_message();
- if (rf->cancel) goto out;
-
- if (progress)
- {
- gdouble fraction = (gdouble)i/item->len;
- gtk_progress_bar_set_fraction((GtkProgressBar *)progress, fraction);
- gchar *msg = g_strdup_printf("%2.0f%% done", fraction*100);
- gtk_progress_bar_set_text((GtkProgressBar *)progress, msg);
- g_free(msg);
- }
-
- char *p = layer_find (el->children, "title", "Untitled article");
- //firstly try to parse as an ATOM author
- //process person construct
- char *q1 = g_strdup(layer_find_innerhtml (el->children, "author", "name", NULL));
- char *q2 = g_strdup(layer_find_innerhtml (el->children, "author", "uri", NULL));
- char *q3 = g_strdup(layer_find_innerhtml (el->children, "author", "email", NULL));
- if (q1)
- {
- q1 = g_strdelimit(q1, "><", ' ');
- if (q3)
- {
- q3 = g_strdelimit(q3, "><", ' ');
- q = g_strdup_printf("%s <%s>", q1, q3);
- g_free(q1);
- if (q2) g_free(q2);
- g_free(q3);
- }
- else
- {
- if (q2)
- q2 = g_strdelimit(q2, "><", ' ');
- else
- q2 = g_strdup(q1);
- q = g_strdup_printf("%s <%s>", q1, q2);
- g_free(q1);
- g_free(q2);
- }
- }
- else //then RSS or RDF
- {
- xmlNodePtr source;
- source = layer_find_pos(el->children, "source", "author");
- //try the source construct
- //source = layer_find_pos(el->children, "source", "contributor");
- if (source != NULL)
- q = g_strdup(layer_find(source, "name", NULL));
- else
- q = g_strdup(layer_find (el->children, "author",
- layer_find (el->children, "creator", NULL)));
- if (q)
- {
- //evo will go crazy when it'll encounter ":" character
- //it probably enforces strict rfc2047 compliance
- q = g_strdelimit(q, "><:", ' ');
- gchar *tmp = g_strdup_printf("\"%s\" <\"%s\">", q, q);
- g_free(q);
- q = tmp;
- if (q2) g_free(q2);
- if (q3) g_free(q3);
- }
- }
- //FIXME this might need xmlFree when namespacing
- b = layer_find_tag (el->children, "description",
- layer_find_tag (el->children, "content",
- layer_find_tag (el->children, "summary",
- NULL)));
-
- if (!b)
- b = g_strdup(layer_find (el->children, "description",
- layer_find (el->children, "content",
- layer_find (el->children, "summary", "No information"))));
-
- char *d = layer_find (el->children, "pubDate", NULL);
- //date in dc module format
- if (!d)
- {
- d2 = layer_find (el->children, "date", NULL); //RSS2
- if (!d2)
- {
- d2 = layer_find(el->children, "updated", NULL); //ATOM
- if (!d2) //take channel date if exists
- d2 = main_date;
- }
- }
-
- encl = layer_find_innerelement(el->children, "enclosure", "url", // RSS 2.0 Enclosure
- layer_find_innerelement(el->children, "link", "enclosure", NULL)); // ATOM Enclosure
- //we have to free this somehow
- char *link = g_strdup(layer_find (el->children, "link", NULL)); //RSS,
- if (!link)
- link = layer_find_innerelement(el->children, "link", "href", g_strdup(_("No Information"))); //ATOM
-
- char *id = layer_find (el->children, "id", //ATOM
- layer_find (el->children, "guid", NULL)); //RSS 2.0
- feed = g_strdup_printf("%s\n", id ? id : link);
- d(g_print("link:%s\n", link));
- d(g_print("author:%s\n", q));
- d(g_print("sender:%s\n", sender));
- d(g_print("title:%s\n", p));
- d(g_print("date:%s\n", d));
- d(g_print("date:%s\n", d2));
-
- gchar rfeed[513];
- memset(rfeed, 0, 512);
- int occ = 0;
-
- while (gtk_events_pending())
- gtk_main_iteration ();
-
- if (!feed_is_new(feed_name, feed)) {
-/*
- if (fr)
- {
- while (fgets(rfeed, 511, fr) != NULL)
- {
- if (rfeed && strstr(rfeed, feed))
- {
- occ=1;
- break;
- }
- }
- (void)fseek(fr, 0L, SEEK_SET);
- }
-
- while (gtk_events_pending())
- gtk_main_iteration ();
-
- if (!occ)
- {*/
- ftotal++;
- p = decode_html_entities (p);
- gchar *tmp = decode_utf8_entities(b);
- g_free(b);
-
- xmlDoc *src = (xmlDoc *)parse_html_sux(tmp, strlen(tmp));
- if (src)
- {
- xmlNode *doc = (xmlNode *)src;
-
- while (doc = html_find(doc, "img"))
- {
- gchar *name = NULL;
- xmlChar *url = xmlGetProp(doc, "src");
- if (url) {
- if (name = fetch_image(url))
- xmlSetProp(doc, "src", name);
- xmlFree(url);
- }
- }
- xmlDocDumpMemory(src, &buff, &size);
- xmlFree(src);
- }
- g_free(tmp);
-// tmp = decode_html_entities(buff);
-// tmp = xmlEncodeSpecialChars(NULL, buff);
-// b = tmp;
-// g_free(b);
- b=buff;
-
- while (gtk_events_pending())
- gtk_main_iteration ();
-
- create_feed *CF = g_new0(create_feed, 1);
- /* pack all data */
- CF->full_path = g_strdup(chn_name);
- CF->q = g_strdup(q);
- CF->sender = g_strdup(sender);
- CF->subj = g_strdup(p);
- CF->body = g_strdup(b);
- CF->date = g_strdup(d);
- CF->dcdate = g_strdup(d2);
- CF->website = g_strdup(link);
- CF->feedid = g_strdup(buf);
- CF->encl = g_strdup(encl);
- CF->feed_fname = g_strdup(feed_name); //feed file name
- CF->feed_uri = g_strdup(feed); //feed file url
-
- if (encl)
- {
- GError *err = NULL;
- net_get_unblocking(
- encl,
- textcb,
- NULL,
- (gpointer)finish_enclosure,
- CF,
- 0,
- &err);
- }
- else
- {
- create_mail(CF);
- write_feed_status_line(feed_name, feed);
- free_cf(CF);
- }
- farticle++;
- flicker_status_icon(chn_name, p);
- g_free(p);
- }
- d(g_print("put success()\n"));
-tout: if (q) g_free(q);
- g_free(b);
- if (feed) g_free(feed);
- if (encl) g_free(encl);
- g_free(link);
- }
-out: g_free(sender);
-
- if (fr) fclose(fr);
- if (fw) fclose(fw);
-
- g_free(feed_name);
- return buf;
-}
-
-gchar *
display_doc (RDF *r)
{
xmlNodePtr root = xmlDocGetRootElement (r->cache);
Modified: trunk/src/rss.h
==============================================================================
--- trunk/src/rss.h (original)
+++ trunk/src/rss.h Wed Oct 8 16:53:18 2008
@@ -20,11 +20,21 @@
#include <dbus/dbus-glib.h>
#include <dbus/dbus-glib-lowlevel.h>
#endif
+#include "network.h"
#ifndef __RSS_H_
#define __RSS_H_
#define PLUGIN_INSTALL_DIR @PLUGIN_INSTALL_DIR@
+#define DEFAULT_FEEDS_FOLDER "News&Blogs"
+#define DEFAULT_NO_CHANNEL "Untitled channel"
+#define DEFAULT_TTL 1800
+
+/* ms between status updates to the gui */
+#define STATUS_TIMEOUT (250)
+
+#define NETWORK_TIMEOUT (180000)
+#define HTTP_CACHE_PATH "http"
GConfClient *rss_gconf;
GSList *rss_list = NULL;
@@ -256,10 +266,11 @@
gchar *encl;
} create_feed;
+guint upgrade = 0; // set to 2 when initailization successfull
guint count = 0;
gchar *buffer = NULL;
-
-guint upgrade = 0; // set to 2 when initailization successfull
+guint ftotal;
+guint farticle;
u_int32_t gen_crc(const char *msg);
gboolean create_user_pass_dialog(gchar *url);
@@ -274,7 +285,25 @@
static xmlNode *html_find (xmlNode *node, char *match);
gchar *lookup_main_folder(void);
gchar *lookup_feed_folder(gchar *folder);
+gchar *decode_utf8_entities(gchar *str);
gchar *decode_html_entities(gchar *str);
+gchar *get_real_channel_name(gchar *uri, gchar *failed);
+gchar *fetch_image(gchar *url);
+void create_mail(create_feed *CF);
+void migrate_crc_md5(const char *name, gchar *url);
+void write_feed_status_line(gchar *file, gchar *needle);
+void free_cf(create_feed *CF);
+gchar *generate_safe_chn_name(gchar *chn_name);
+void update_sr_message(void);
+void update_feed_image(gchar *image, gchar *key);
+static void flicker_status_icon(const char *channel, gchar *title);
+static void
+#if LIBSOUP_VERSION < 2003000
+finish_enclosure (SoupMessage *msg, create_feed *user_data);
+#else
+finish_enclosure (SoupSession *soup_sess, SoupMessage *msg, create_feed *user_data);
+#endif
+static void textcb(NetStatusType status, gpointer statusdata, gpointer data);
#ifdef HAVE_GECKO
void rss_mozilla_init(void);
#endif
@@ -287,11 +316,6 @@
void populate_reversed(gpointer key, gpointer value, GHashTable *hash);
gchar *rss_component_peek_base_directory(MailComponent *component);
static void custom_feed_timeout(void);
-static char *layer_find (xmlNodePtr node, char *match, char *fail);
-static char *layer_find_innerelement (xmlNodePtr node, char *match, char *el, char *fail);
-static gchar *layer_find_innerhtml (xmlNodePtr node, char *match, char *submatch, char *fail);
-xmlNodePtr layer_find_pos (xmlNodePtr node, char *match, char *submatch);
-static char *layer_find_tag (xmlNodePtr node, char *match, char *fail);
typedef struct FEED_FOLDERS {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]