Re: Creating new backend - help wanted



Thanks Daniel and Fredrik,
    I checked (manually and via luke) that the feeds are properly
indexed. But beagle-query is not returning results from the feeds. I
am wondering if beagle-query needs to be somehow informed about the
presence of the new driver.
I dont feel right to submit to bugzilla since the query is not even
working the least.
I am attaching 
* the backend file beagled/AkregatorQueryable/AkregatorQueryable.cs 
* akregator feed data ~/.kde/share/apps/akregator/... (as tar.gz file)
if anybody wants to test the code

Is there any easy to enable debug output of beagle-query while it is
searching (I am getting lost in the response/query details).

Thanks,
D. Bera


On 7/5/05, Fredrik Hedberg <fredrik hedberg avafan com> wrote:
> Hi,
> 
> If you're using the CVS version of Beagle you can inspect your index
> with the beagle-manage-index tool, for example:
> 
> $ beagle-manage-index ~/.beagle/AkregatorIndex list
> 
> Another easy way to get input, is just to post the code to either the
> mailing-list or to the Bugzilla for review and feedback.
> 
> /Fredrik 
> 
> 
> On Tue, 2005-07-05 at 14:23 -0400, D Bera wrote:
> > Hi
> > 
> > I am writing a backend AkregatorQueryable for Akregator (KDE RSS
> > Reader) based on Liferea's code. All I did was write
> > AkregatorQueryable (similar to LifereaQueryable) and make changes in
> > the beagled Makefile to have it compiled. The driver is compiling fine
> > and feed data seems to be properly indexed too. I checked that index
> > files are being created in .beagle/AkregatorIndex.
> > 
> > However beagle-query or best is not returning any hits from the feeds.
> > Any clue what might be wrong or where can I can look at or how to get
> > more debug info (I can post output of beagled --debug --fg if
> > required) ?
> > 
> > Thanks,
> > - Bera
> > _______________________________________________
> > Dashboard-hackers mailing list
> > Dashboard-hackers gnome org
> > http://mail.gnome.org/mailman/listinfo/dashboard-hackers
> 
>

Attachment: akregatordata.tgz
Description: GNU Zip compressed data

//
// AkregatorQueryable.cs
//
// Copyright (C) 2005 Debajyoti Bera
//
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//

using System;
using System.IO;
using System.Collections;
using System.Threading;

using System.Xml;
using System.Xml.Serialization;
	
using Beagle.Daemon;
using Beagle.Util;

namespace Beagle.Daemon.AkregatorQueryable {

	[QueryableFlavor (Name="Akregator", Domain=QueryDomain.Local, RequireInotify=false)]
	public class AkregatorQueryable : LuceneQueryable {

		private static Logger log = Logger.Get ("AkregatorQueryable");

		string akregator_dir;

		public AkregatorQueryable () : base ("AkregatorIndex")
		{
			akregator_dir = Path.Combine (PathFinder.HomeDir, ".kde");
			akregator_dir = Path.Combine (akregator_dir, "share");
			akregator_dir = Path.Combine (akregator_dir, "apps");
			akregator_dir = Path.Combine (akregator_dir, "akregator");
			akregator_dir = Path.Combine (akregator_dir, "Archive");
		}

		/////////////////////////////////////////////////

		public override void Start () 
		{			
			base.Start ();

			ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
		}

		private void StartWorker ()
		{
			if (!Directory.Exists (akregator_dir)) {
				GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
                                return;
			}
				
			if (Inotify.Enabled) {
				Inotify.EventType mask = Inotify.EventType.CloseWrite;

				Inotify.Subscribe (akregator_dir, OnInotifyEvent, mask);
			} else {
                                FileSystemWatcher fsw = new FileSystemWatcher ();
                                fsw.Path = akregator_dir;

                                fsw.Changed += new FileSystemEventHandler (OnChanged);
                                fsw.Created += new FileSystemEventHandler (OnChanged);

                                fsw.EnableRaisingEvents = true;
                        }                                                                                                                                                                                                     
                        log.Info ("Scanning Akregator feeds...");

                        Stopwatch stopwatch = new Stopwatch ();
                        int feed_count = 0, item_count = 0;

                        stopwatch.Start ();

                        DirectoryInfo dir = new DirectoryInfo (akregator_dir);
                        foreach (FileInfo file in dir.GetFiles ()) {
				item_count += IndexFeed (file.FullName, Scheduler.Priority.Delayed);
				feed_count++;
                        }

                        stopwatch.Stop ();

                        log.Info ("Scanned {0} items in {1} akregator feeds in {2}", item_count, feed_count, stopwatch);
		}

		private bool CheckForExistence ()
                {
                        if (!Directory.Exists (akregator_dir))
                                return true;

                        this.Start ();

                        return false;
                }

		/////////////////////////////////////////////////

                // Modified/Created event using Inotify

		private void OnInotifyEvent (Inotify.Watch watch,
					     string path,
					     string subitem,
					     string srcpath,
					     Inotify.EventType type)
		{
			if (subitem == "")
				return;

			IndexFeed (Path.Combine (path, subitem), Scheduler.Priority.Immediate);
		}

		// Modified/Created event using FSW
		
		private void OnChanged (object o, FileSystemEventArgs args)
		{
			IndexFeed (args.FullPath, Scheduler.Priority.Immediate);
		}
		
		/////////////////////////////////////////////////
		
		// Ugly ... but couldnt find anything better
		// DateTime.parse is unable to parse RFC822 datetime format
		[System.Runtime.InteropServices.DllImport("libgmime-2.0.so")]
		static extern int g_mime_utils_header_decode_date(string str, out IntPtr saveoffset);

		// Parse and index a feed

		private int IndexFeed (string filename, Scheduler.Priority priority)
		{
			FileInfo file = new FileInfo(filename);
			
			RSS feed;
			int item_count = 0;

			if (this.FileAttributesStore.IsUpToDate (file.FullName))
			        return 0;

			Scheduler.TaskGroup group = NewMarkingTaskGroup (file.FullName, file.LastWriteTime);
			
			feed = RSS.LoadFromFile(file.FullName);
			
			if(feed == null)
				return 0;
			
			if (feed.channel == null)
				return 0;
			
			if(feed.channel.Items == null)
				return 0;
			
			foreach (Item item in feed.channel.Items) {
				log.Info("Akregator: Indexing " + item.Title);
				item_count++;
				
				Indexable indexable = new Indexable ( new Uri (String.Format ("feed:{0};item={1}", feed.channel.Link, item.Link)));
				indexable.MimeType = "text/html";
				indexable.Type = "FeedItem";

				//DateTime date = new DateTime (1970, 1, 1);
				//date = date.AddSeconds (item.Timestamp);
				//date = TimeZone.CurrentTimeZone.ToLocalTime (date);

				IntPtr offset; //will be ignored - only store the time at current machine
				int itemPubTime = g_mime_utils_header_decode_date(item.PubDate, out offset);
				DateTime date = GLib.Marshaller.time_tToDateTime(new IntPtr(itemPubTime));

				indexable.Timestamp = date;

				indexable.AddProperty (Property.NewKeyword ("dc:title", item.Title));
				indexable.AddProperty (Property.NewKeyword ("dc:description", item.Description));
				//indexable.AddProperty (Property.NewKeyword ("fixme:author", item.Attribs.Author));
				indexable.AddProperty (Property.NewDate ("fixme:publishdate", date));
				indexable.AddProperty (Property.NewKeyword ("fixme:itemuri", item.Link));
				indexable.AddProperty (Property.NewKeyword ("fixme:webloguri", feed.channel.Link));
				
				StringReader reader = new StringReader (item.Description);
				indexable.SetTextReader (reader);
				
				Scheduler.Task task = NewAddTask (indexable);
				task.Priority = priority;
				task.SubPriority = 0;
				task.AddTaskGroup (group);
				ThisScheduler.Add (task);

				log.Info("Akregator: scheduled indexing for:" + item.Description);
				
			}
		     
			return item_count;
		}
	}	

	////////////////////////////////////////////////

	// De-serialization classes
	// FIXME: Change to standard stream parsing for performance? 

	public class Item {
		[XmlElement ("title")] public string Title = "";
		[XmlElement ("description")] public string Description ="";
		[XmlElement ("link")] public string Link="";
		//[XmlElement ("attributes")] public Attributes Attribs;
		[XmlElement ("pubDate")] public string PubDate; 
	}
	
	//public class Attributes{
	//	[XmlAttribute ("author")] public string Author = "";
	//}

	public class Channel{
		[XmlElement ("title")] public string Title="";
		[XmlElement ("link")] public string Link="";
		[XmlElement ("description")] public string Description="";
		
		//[XmlElement ("feedStatus")] public int Status;
		//[XmlElement ("feedUpdateInterval")] public int UpdateInterval;
		//[XmlElement ("feedDiscontinued")] public string Discontinued ="";
		//[XmlElement ("feedLastModified")] public string LastModified ="";

		[XmlElement ("item", typeof (Item))]
		public ArrayList Items {
			get { return mItems; }
			set { mItems = value; }
		}
		private ArrayList mItems = new ArrayList ();
	}	
	
	public class RSS{
		[XmlElement ("channel", typeof (Channel))]
		public Channel channel;
		
		public static RSS LoadFromFile (string filename) {
			RSS f;
			XmlRootAttribute xRoot = new XmlRootAttribute();
			xRoot.ElementName = "rss";
			
			XmlSerializer serializer = new XmlSerializer (typeof (RSS), xRoot);
			Stream stream = new FileStream (filename,
							FileMode.Open,
							FileAccess.Read,
							FileShare.Read);
			XmlTextReader reader = new XmlTextReader (stream);
			
			if (!serializer.CanDeserialize(reader) )
				Console.WriteLine ("Muopp");
			f = (RSS) serializer.Deserialize (reader);

			reader.Close ();
			stream.Close ();
			return f;
		}
	}
}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]