Snownews backend



Here's a backend that will index the feed cache from Snownews
(text-based RSS aggregator). It's completely ripped off of the Liferea
backend and works with my Beagle 0.0.7 and snownews 1.5.6.1
installations.

Enjoy!

raj
//
// SnownewsQueryable.cs
//
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//

using System;
using System.IO;
using System.Collections;
using System.Threading;

using System.Xml;
using System.Xml.Serialization;
	
using Beagle.Daemon;
using Beagle.Util;

namespace Beagle.Daemon.SnownewsQueryable {

	[QueryableFlavor (Name="Snownews", Domain=QueryDomain.Local, RequireInotify=false)]
	public class SnownewsQueryable : LuceneQueryable {

		private static Logger log = Logger.Get ("SnownewsQueryable");

		string snownews_dir;
		int snownews_wd = -1;

		public SnownewsQueryable () : base ("SnownewsIndex")
		{
			snownews_dir = Path.Combine (PathFinder.HomeDir, ".snownews");
			snownews_dir = Path.Combine (snownews_dir, "cache");
		}

		/////////////////////////////////////////////////

		public override void Start () 
		{			
			base.Start ();

			ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
		}

		private void StartWorker ()
		{
			if (!Directory.Exists (snownews_dir)) {
				GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
                                return;
			}
				
			if (Inotify.Enabled) {
				Inotify.EventType mask = Inotify.EventType.CloseWrite;

				snownews_wd = Inotify.Watch (snownews_dir, mask);
				Inotify.Event += OnInotifyEvent;
			} else {
                                FileSystemWatcher fsw = new FileSystemWatcher ();
                                fsw.Path = snownews_dir;

                                fsw.Changed += new FileSystemEventHandler (OnChanged);
                                fsw.Created += new FileSystemEventHandler (OnChanged);

                                fsw.EnableRaisingEvents = true;
                        }                                                                                                                                                                                                     
                        log.Info ("Scanning Snownews feeds...");

                        Stopwatch stopwatch = new Stopwatch ();
                        int feed_count = 0, item_count = 0;

                        stopwatch.Start ();

                        DirectoryInfo dir = new DirectoryInfo (snownews_dir);
                        foreach (FileInfo file in dir.GetFiles ()) {
				item_count += IndexFeed (file.FullName, Scheduler.Priority.Delayed);
				feed_count++;
                        }

                        stopwatch.Stop ();

                        log.Info ("Scanned {0} items in {1} feeds in {2}", item_count, feed_count, stopwatch);
		}

		private bool CheckForExistence ()
                {
                        if (!Directory.Exists (snownews_dir))
                                return true;

                        this.Start ();

                        return false;
                }

		/////////////////////////////////////////////////

                // Modified/Created event using Inotify

		private void OnInotifyEvent (int wd,
					     string path,
					     string subitem,
					     Inotify.EventType type,
					     uint cookie)
		{
			if (wd != snownews_wd)
				return;

			if (subitem == "")
				return;

			IndexFeed (Path.Combine (path, subitem), Scheduler.Priority.Immediate);
		}

		// Modified/Created event using FSW
		
		private void OnChanged (object o, FileSystemEventArgs args)
		{
			IndexFeed (args.FullPath, Scheduler.Priority.Immediate);
		}
		
		/////////////////////////////////////////////////

		// Parse and index a feed

		private int IndexFeed (string filename, Scheduler.Priority priority)
		{
			FileInfo file = new FileInfo(filename);
			
			RDF feed;
			int item_count = 0;

			if (this.FileAttributesStore.IsUpToDate (file.FullName))
			        return 0;

			Scheduler.TaskGroup group = NewMarkingTaskGroup (file.FullName, file.LastWriteTime);
			
			feed = RDF.LoadFromFile(file.FullName);
			
			if(feed == null)
				return 0;

			if(feed.mChannel.Source == null)
				return -1;			
			if(feed.Items == null)
				return 0;

			foreach (Item item in feed.Items) {
				item_count++;
				
				Indexable indexable = new Indexable ( new Uri (String.Format ("feed:{0};item={1}", feed.mChannel.Source, item.Source)));
				indexable.MimeType = "text/html";
				indexable.Type = "FeedItem";
				
				DateTime date = new DateTime (1970, 1, 1);
				date = date.AddSeconds (item.Timestamp);
				indexable.Timestamp = date;				

				indexable.AddProperty (Property.NewKeyword ("dc:title", item.Title));
				indexable.AddProperty (Property.NewKeyword ("dc:description", item.Description));
				indexable.AddProperty (Property.NewDate ("fixme:published", date));
				indexable.AddProperty (Property.NewKeyword ("fixme:itemuri", item.Source));
				indexable.AddProperty (Property.NewKeyword ("fixme:webloguri", feed.mChannel.Source));
				
				StringReader reader = new StringReader (item.Description);
				indexable.SetTextReader (reader);
				
				Scheduler.Task task = NewAddTask (indexable);
				task.Priority = priority;
				task.SubPriority = 0;
				task.AddTaskGroup (group);
				ThisScheduler.Add (task);
				
			}
		     
			return item_count;
		}
	}	

	////////////////////////////////////////////////

	// De-serialization classes
	// FIXME: Change to standard stream parsing for performance? 

	public class Item {
		[XmlElement ("title", Namespace="http://purl.org/rss/1.0/";)] public string Title = "";
		[XmlElement ("link", Namespace="http://purl.org/rss/1.0/";)] public string Source="";
		[XmlElement ("description", Namespace="http://purl.org/rss/1.0/";)] public string Description ="";
		[XmlElement ("date", Namespace="http://snownews.kcore.de/ns/1.0/";)] public ulong Timestamp; 
	}
	
	public class Channel{
		[XmlElement ("title", Namespace="http://purl.org/rss/1.0/";)] public string Title="";
		[XmlElement ("link", Namespace="http://purl.org/rss/1.0/";)] public string Source="";
		[XmlElement ("description", Namespace="http://purl.org/rss/1.0/";)] public string Description="";
	}

	public class RDF{
		[XmlElement ("channel", typeof (Channel), Namespace="http://purl.org/rss/1.0/";)]
		public Channel mChannel;

		[XmlElement ("item", typeof (Item), Namespace="http://purl.org/rss/1.0/";)]
		public ArrayList Items {
			get { return mItems; }
			set { mItems = value; }
		}
		
		private ArrayList mItems = new ArrayList ();
		private static Logger log = Logger.Get ("SnownewsQueryable");
		
		public static RDF LoadFromFile (string filename) {
			RDF f;
			XmlRootAttribute xRoot = new XmlRootAttribute();
			xRoot.ElementName = "RDF";
			xRoot.Namespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";;
			XmlSerializer serializer = new XmlSerializer (typeof (RDF), xRoot);
			Stream stream = new FileStream (filename,
							FileMode.Open,
							FileAccess.Read,
							FileShare.Read);
			XmlTextReader reader = new XmlTextReader (stream);

			if (!serializer.CanDeserialize(reader) )
				Console.WriteLine ("Muopp");
			f = (RDF) serializer.Deserialize (reader);

			reader.Close ();
			stream.Close ();
			return f;
		}
	}
}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]