Snownews backend
- From: Raj Taneja <raj taneja gmail com>
- To: Beagle List <dashboard-hackers gnome org>
- Subject: Snownews backend
- Date: Thu, 17 Mar 2005 08:50:30 -0500
Here's a backend that will index the feed cache from Snownews
(text-based RSS aggregator). It's completely ripped off of the Liferea
backend and works with my Beagle 0.0.7 and snownews 1.5.6.1
installations.
Enjoy!
raj
//
// SnownewsQueryable.cs
//
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
using System;
using System.IO;
using System.Collections;
using System.Threading;
using System.Xml;
using System.Xml.Serialization;
using Beagle.Daemon;
using Beagle.Util;
namespace Beagle.Daemon.SnownewsQueryable {
[QueryableFlavor (Name="Snownews", Domain=QueryDomain.Local, RequireInotify=false)]
public class SnownewsQueryable : LuceneQueryable {
private static Logger log = Logger.Get ("SnownewsQueryable");
string snownews_dir;
int snownews_wd = -1;
public SnownewsQueryable () : base ("SnownewsIndex")
{
snownews_dir = Path.Combine (PathFinder.HomeDir, ".snownews");
snownews_dir = Path.Combine (snownews_dir, "cache");
}
/////////////////////////////////////////////////
public override void Start ()
{
base.Start ();
ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
}
private void StartWorker ()
{
if (!Directory.Exists (snownews_dir)) {
GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
return;
}
if (Inotify.Enabled) {
Inotify.EventType mask = Inotify.EventType.CloseWrite;
snownews_wd = Inotify.Watch (snownews_dir, mask);
Inotify.Event += OnInotifyEvent;
} else {
FileSystemWatcher fsw = new FileSystemWatcher ();
fsw.Path = snownews_dir;
fsw.Changed += new FileSystemEventHandler (OnChanged);
fsw.Created += new FileSystemEventHandler (OnChanged);
fsw.EnableRaisingEvents = true;
}
log.Info ("Scanning Snownews feeds...");
Stopwatch stopwatch = new Stopwatch ();
int feed_count = 0, item_count = 0;
stopwatch.Start ();
DirectoryInfo dir = new DirectoryInfo (snownews_dir);
foreach (FileInfo file in dir.GetFiles ()) {
item_count += IndexFeed (file.FullName, Scheduler.Priority.Delayed);
feed_count++;
}
stopwatch.Stop ();
log.Info ("Scanned {0} items in {1} feeds in {2}", item_count, feed_count, stopwatch);
}
private bool CheckForExistence ()
{
if (!Directory.Exists (snownews_dir))
return true;
this.Start ();
return false;
}
/////////////////////////////////////////////////
// Modified/Created event using Inotify
private void OnInotifyEvent (int wd,
string path,
string subitem,
Inotify.EventType type,
uint cookie)
{
if (wd != snownews_wd)
return;
if (subitem == "")
return;
IndexFeed (Path.Combine (path, subitem), Scheduler.Priority.Immediate);
}
// Modified/Created event using FSW
private void OnChanged (object o, FileSystemEventArgs args)
{
IndexFeed (args.FullPath, Scheduler.Priority.Immediate);
}
/////////////////////////////////////////////////
// Parse and index a feed
private int IndexFeed (string filename, Scheduler.Priority priority)
{
FileInfo file = new FileInfo(filename);
RDF feed;
int item_count = 0;
if (this.FileAttributesStore.IsUpToDate (file.FullName))
return 0;
Scheduler.TaskGroup group = NewMarkingTaskGroup (file.FullName, file.LastWriteTime);
feed = RDF.LoadFromFile(file.FullName);
if(feed == null)
return 0;
if(feed.mChannel.Source == null)
return -1;
if(feed.Items == null)
return 0;
foreach (Item item in feed.Items) {
item_count++;
Indexable indexable = new Indexable ( new Uri (String.Format ("feed:{0};item={1}", feed.mChannel.Source, item.Source)));
indexable.MimeType = "text/html";
indexable.Type = "FeedItem";
DateTime date = new DateTime (1970, 1, 1);
date = date.AddSeconds (item.Timestamp);
indexable.Timestamp = date;
indexable.AddProperty (Property.NewKeyword ("dc:title", item.Title));
indexable.AddProperty (Property.NewKeyword ("dc:description", item.Description));
indexable.AddProperty (Property.NewDate ("fixme:published", date));
indexable.AddProperty (Property.NewKeyword ("fixme:itemuri", item.Source));
indexable.AddProperty (Property.NewKeyword ("fixme:webloguri", feed.mChannel.Source));
StringReader reader = new StringReader (item.Description);
indexable.SetTextReader (reader);
Scheduler.Task task = NewAddTask (indexable);
task.Priority = priority;
task.SubPriority = 0;
task.AddTaskGroup (group);
ThisScheduler.Add (task);
}
return item_count;
}
}
////////////////////////////////////////////////
// De-serialization classes
// FIXME: Change to standard stream parsing for performance?
public class Item {
[XmlElement ("title", Namespace="http://purl.org/rss/1.0/")] public string Title = "";
[XmlElement ("link", Namespace="http://purl.org/rss/1.0/")] public string Source="";
[XmlElement ("description", Namespace="http://purl.org/rss/1.0/")] public string Description ="";
[XmlElement ("date", Namespace="http://snownews.kcore.de/ns/1.0/")] public ulong Timestamp;
}
public class Channel{
[XmlElement ("title", Namespace="http://purl.org/rss/1.0/")] public string Title="";
[XmlElement ("link", Namespace="http://purl.org/rss/1.0/")] public string Source="";
[XmlElement ("description", Namespace="http://purl.org/rss/1.0/")] public string Description="";
}
public class RDF{
[XmlElement ("channel", typeof (Channel), Namespace="http://purl.org/rss/1.0/")]
public Channel mChannel;
[XmlElement ("item", typeof (Item), Namespace="http://purl.org/rss/1.0/")]
public ArrayList Items {
get { return mItems; }
set { mItems = value; }
}
private ArrayList mItems = new ArrayList ();
private static Logger log = Logger.Get ("SnownewsQueryable");
public static RDF LoadFromFile (string filename) {
RDF f;
XmlRootAttribute xRoot = new XmlRootAttribute();
xRoot.ElementName = "RDF";
xRoot.Namespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
XmlSerializer serializer = new XmlSerializer (typeof (RDF), xRoot);
Stream stream = new FileStream (filename,
FileMode.Open,
FileAccess.Read,
FileShare.Read);
XmlTextReader reader = new XmlTextReader (stream);
if (!serializer.CanDeserialize(reader) )
Console.WriteLine ("Muopp");
f = (RDF) serializer.Deserialize (reader);
reader.Close ();
stream.Close ();
return f;
}
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]