Rough cut at a Gnus nnmail backend queryable/indexer



Hi,

Since I use Gnus as my mail client, I've been hoping that Beagle index
my mail correctly (instead of treating them as plain text files). The
KMail backend could have worked for this, but I decided to just write a
new backend to handle nnmail-format Gnus mail (which is stored in ~/Mail
by default). The attached is a VERY ROUGH FIRST CUT for the
backend. Hope someone finds this useful -- I'm still in the process of:

    * Cleaning it up to conform to the codebase's coding
      standards. Sorry -- I did it as a quick hack.
    * Adding inotify support. Currently, just crawls ~/Mail. That's it.
    * Adding parsing of .overview files and proper folder/group
      support. Currently, the folder being tagged is the parent of the
      parent; the indexer should translate mail in
      ~/Mail/lists/gnome/dashboard-hackers, for example, to the folder
      lists.gnome.dashboard-hackers .

I'm thinking parts of the KMail backend and this backend could be
refactored to a common class, particularly to handle maildir and
maildir-like mail stores.


>From 454adb55e05265bfcf791431838a97495d21b2c6 Mon Sep 17 00:00:00 2001
From: JM Ibanez <jm orangeandbronze com>
Date: Mon, 23 Apr 2007 19:06:48 +0800
Subject: [PATCH] Very rough first cut for Gnus NNMAIL queryable/indexer.

Currently just a crawler and not inotify-aware, nor does it do any
parsing of .gnus.el, .overview, etc. files.
---
 beagled/AssemblyInfo.cs                            |    1 +
 beagled/GnusNnmailQueryable/GnusNnmailIndexer.cs   |    7 +
 beagled/GnusNnmailQueryable/GnusNnmailQueryable.cs |  120 ++++++++++++++++++++
 beagled/Makefile.am                                |    6 +
 4 files changed, 134 insertions(+), 0 deletions(-)
 create mode 100644 beagled/GnusNnmailQueryable/GnusNnmailIndexer.cs
 create mode 100644 beagled/GnusNnmailQueryable/GnusNnmailQueryable.cs

diff --git a/beagled/AssemblyInfo.cs b/beagled/AssemblyInfo.cs
index 3a26dce..17f209c 100644
--- a/beagled/AssemblyInfo.cs
+++ b/beagled/AssemblyInfo.cs
@@ -53,6 +53,7 @@ using Beagle.Daemon;
 // All backends in this assembly must be registered here.
 [assembly: IQueryableTypes (
 	typeof (Beagle.Daemon.KMailQueryable.KMailQueryable),
+        typeof (Beagle.Daemon.GnusNnmailQueryable.GnusNnmailQueryable),
 	typeof (Beagle.Daemon.FileSystemQueryable.FileSystemQueryable),
 	typeof (Beagle.Daemon.GaimLogQueryable.GaimLogQueryable),
 	typeof (Beagle.Daemon.IndexingServiceQueryable.IndexingServiceQueryable),
diff --git a/beagled/GnusNnmailQueryable/GnusNnmailIndexer.cs b/beagled/GnusNnmailQueryable/GnusNnmailIndexer.cs
new file mode 100644
index 0000000..021b779
--- /dev/null
+++ b/beagled/GnusNnmailQueryable/GnusNnmailIndexer.cs
@@ -0,0 +1,7 @@
+
+namespace Beagle.Daemon.GnusNnmailQueryable {
+
+  public class GnusNnmailIndexer {
+  }
+
+}
\ No newline at end of file
diff --git a/beagled/GnusNnmailQueryable/GnusNnmailQueryable.cs b/beagled/GnusNnmailQueryable/GnusNnmailQueryable.cs
new file mode 100644
index 0000000..734712d
--- /dev/null
+++ b/beagled/GnusNnmailQueryable/GnusNnmailQueryable.cs
@@ -0,0 +1,120 @@
+using System;
+using System.Collections;
+using System.IO;
+using System.Threading;
+
+using Beagle.Util;
+
+namespace Beagle.Daemon.GnusNnmailQueryable {
+  [QueryableFlavor (Name="GnusNNMail", Domain=QueryDomain.Local, RequireInotify=false)]
+  public class GnusNnmailQueryable : LuceneFileQueryable, IIndexableGenerator {
+
+    private IEnumerator dir_enumerator = null;
+    private IEnumerator file_enumerator = null;
+    private IEnumerable files_to_parse;
+    private DirectoryInfo current_dir;
+    private FileInfo CrawlFile;
+
+    public GnusNnmailQueryable () : base ("GnusNNMailIndex") {
+      
+    }
+
+    public override void Start ()
+    {
+      Log.Debug("GNUS NNMAIL WORKER STARTED");
+      base.Start ();
+      ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
+    }
+
+    private void StartWorker()
+    {
+      Log.Debug("Picking up mail in ~/Mail");
+      ArrayList l = new ArrayList ();
+      string basedir = Path.Combine (PathFinder.HomeDir, "Mail");
+      Log.Debug("GNUS: basedir {0}", basedir);
+
+      foreach (DirectoryInfo di in DirectoryWalker.GetDirectoryInfos (basedir)) {
+        Log.Debug ("GNUS: Adding {0}", di.Name);
+        AddSubDirs (l, di, basedir);
+      }
+
+      dir_enumerator = l.GetEnumerator ();
+      
+
+      // State = QueryableState.Crawling;
+
+      Scheduler.Task task;
+      task = NewAddTask (this); // The parameter is an IIndexableGenerator instance
+      task.Tag = "GnusNNMailReader"; // This task's unique identifier
+      task.Source = this; // The object that is responsible for this task
+
+      // Add the task to the scheduler
+      ThisScheduler.Add (task);
+    }
+
+    private void AddSubDirs (ArrayList l, DirectoryInfo di, string parent)
+    {
+      string this_base = Path.Combine (parent, di.Name);
+      foreach (DirectoryInfo di_sub in DirectoryWalker.GetDirectoryInfos (this_base)) {
+        AddSubDirs (l, di_sub, this_base);
+      }
+
+      l.Add (new DirectoryInfo (this_base));
+    }
+
+    // IIndexableGenerator implementations.
+    public string StatusName {
+      // Displayed in beagle-status
+      get { return "GnusNnmailQueryable"; }
+    }
+
+    // Called each time a set of indexable is written to the index
+    public void PostFlushHook () { }
+
+    public bool HasNextIndexable ()
+    {
+      do {
+        while (file_enumerator == null || !file_enumerator.MoveNext ()) {
+          if (!dir_enumerator.MoveNext ()) {
+            dir_enumerator = null;
+            return false;
+          }
+          current_dir = (DirectoryInfo) dir_enumerator.Current;
+          Logger.Log.Info ("Scanning nnmail dir: " + current_dir.FullName);
+          files_to_parse = DirectoryWalker.GetFileInfos (current_dir);
+          file_enumerator = files_to_parse.GetEnumerator ();
+        }
+        CrawlFile = (FileInfo) file_enumerator.Current;
+      } while (IsUpToDate (CrawlFile.FullName));
+		    
+      return true;
+    }
+
+    public Indexable GetNextIndexable ()
+    {
+      FileInfo file = (FileInfo) file_enumerator.Current;
+      string filename = file.FullName;
+      string folder = GetFolderMaildir(filename);
+
+      Uri file_uri = UriFu.PathToFileUri (filename);
+      Indexable indexable = new Indexable (file_uri);
+      indexable.HitType = "MailMessage";
+      indexable.MimeType = "message/rfc822";
+      indexable.CacheContent = false;
+
+      indexable.AddProperty (Property.NewUnsearched ("fixme:client", "gnus"));
+      indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder));
+      indexable.ContentUri = file_uri;
+
+      Log.Debug ("GNUS: URI: {0}", file_uri);
+      Log.Debug ("GNUS: Folder: {0}", folder);
+
+      return indexable;
+    }
+
+    public string GetFolderMaildir (string mailFile)
+    {
+      return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name);
+    }
+  }
+}
diff --git a/beagled/Makefile.am b/beagled/Makefile.am
index 4f47007..0d3d483 100644
--- a/beagled/Makefile.am
+++ b/beagled/Makefile.am
@@ -314,6 +314,11 @@ KMAIL_QUERYABLE_CSFILES =				\
 	$(kmailqueryable)/KMailIndexableGenerator.cs	\
 	$(kmailqueryable)/KMailIndexer.cs
 
+gnusnnmailqueryable = $(srcdir)/GnusNnmailQueryable
+GNUSNNMAIL_QUERYABLE_CSFILES =				\
+	$(gnusnnmailqueryable)/GnusNnmailQueryable.cs	\
+	$(gnusnnmailqueryable)/GnusNnmailIndexer.cs
+
 blamqueryable = $(srcdir)/BlamQueryable
 BLAM_QUERYABLE_CSFILES =				\
 	$(blamqueryable)/BlamQueryable.cs
@@ -372,6 +377,7 @@ NAUTILUS_QUERYABLE_CSFILES =					\
 DAEMON_DLL_CSFILES = 						\
 	$(LUCENE_CSFILES)					\
 	$(KMAIL_QUERYABLE_CSFILES)				\
+	$(GNUSNNMAIL_QUERYABLE_CSFILES)				\
 	$(FILE_SYSTEM_QUERYABLE_CSFILES)			\
 	$(GAIM_LOG_QUERYABLE_CSFILES)				\
 	$(INDEXING_SERVICE_QUERYABLE_CSFILES)			\
-- 
1.5.0.5


-- 
JM Ibanez
Software Architect
Orange & Bronze Software Labs, Ltd. Co.

jm orangeandbronze com
http://software.orangeandbronze.com/


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]