[PATCH] beagle maildir backend



Hello everyone,

Please cc me on any replies as I'm not subscribed.

Here is some quick and dirty code to add a maildir backend to beagle
0.2.10.  It is based on the KMail backend and just assumes that ~/Mail
is a maildir directory.

Without this patch a big portion of my emails get misclassified as
text/plain.  It probably does need to be smarter about detecting maildir
directories, or trying other places, but it's a starting point if anyone
is interested in improving it.

-chris


diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs	1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs	2006-09-28 13:48:16.000000000 -0400
@@ -0,0 +1,303 @@
+
+//
+// MaildirIndexableGenerator.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Runtime.Serialization.Formatters.Binary;
+using System.Threading;
+using System.Xml;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+	/**
+	 * Indexable generator for maildir mails
+	 */
+	public class MaildirdirIndexableGenerator : IIndexableGenerator {
+		// store the indexer
+		private MaildirIndexer indexer;
+		// message file currently indexing
+		private FileInfo CrawlFile;
+		// directory currently parsing
+		private DirectoryInfo current_dir;
+		// list of files in current directory
+		private IEnumerable files_to_parse;
+		// list of directories to scan
+		private ArrayList dirs_to_scan;
+		private IEnumerator dir_enumerator = null;
+		private IEnumerator file_enumerator = null;
+
+		private string account_name {
+			get { return indexer.AccountName; }
+		}
+
+		public MaildirdirIndexableGenerator (MaildirIndexer indexer, ArrayList mail_directories)
+		{
+			this.indexer = indexer;
+			dirs_to_scan = new ArrayList ();
+
+			foreach (string directory in mail_directories) {
+				AddDirectory (directory);
+			}
+			dir_enumerator = dirs_to_scan.GetEnumerator ();
+		}
+
+		public void PostFlushHook ()
+		{
+		}
+
+		private void AddDirectory (string _dir) {
+			DirectoryInfo dir;
+			
+			// scan mails in directory cur and new, not tmp
+			if (Directory.Exists (Path.Combine (_dir, "cur"))) {
+				dir = new DirectoryInfo (Path.Combine (_dir, "cur"));
+				dirs_to_scan.Add (dir);
+			}
+
+			if (Directory.Exists (Path.Combine (_dir, "new"))) {
+				dir = new DirectoryInfo (Path.Combine (_dir, "new"));
+				dirs_to_scan.Add (dir);
+			}
+		}
+
+		public string StatusName {
+			get { return indexer.MailRoot; }
+		}
+
+		public Indexable GetNextIndexable ()
+		{
+			FileInfo file = (FileInfo) file_enumerator.Current;
+			return indexer.MaildirMessageToIndexable (file.FullName);
+		}
+
+		public bool IsUpToDate (string path)
+		{
+			return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
+		}
+
+		public bool HasNextIndexable ()
+		{
+			do {
+				while (file_enumerator == null || !file_enumerator.MoveNext ()) {
+					if (!dir_enumerator.MoveNext ()) {
+						dir_enumerator = null;
+						return false;
+					}
+					current_dir = (DirectoryInfo) dir_enumerator.Current;
+					Logger.Log.Info ("Scanning maildir feeds in " + current_dir.FullName);
+					files_to_parse = DirectoryWalker.GetFileInfos (current_dir);
+					file_enumerator = files_to_parse.GetEnumerator ();
+				}
+				CrawlFile = (FileInfo) file_enumerator.Current;
+			} while (IsUpToDate (CrawlFile.FullName));
+		    
+			return true;
+		}
+
+	}
+
+	/**
+	 * Indexable generator for mbox mail files
+	 * based on Evo code
+	 */
+	public class MaildirMboxIndexableGenerator : IIndexableGenerator {
+		// path of the mbox file
+		private string mbox_file;
+		// fd, stream, parser needed for gmime parsing
+		private int mbox_fd = -1;
+		private GMime.StreamFs mbox_stream;
+		private GMime.Parser mbox_parser;
+		// store the indexer
+		private MaildirIndexer indexer;
+		// number of mails scanned
+		private int indexed_count;
+		// is this initial scan - in which case the mbox might have been modified since last scan
+		private bool initial_scan;
+		
+		private string account_name {
+			get { return indexer.AccountName; }
+		}
+
+		private string folder_name {
+			get { return indexer.GetFolderMbox (mbox_file); }
+		}
+
+		public MaildirMboxIndexableGenerator (MaildirIndexer indexer, string mbox_file, bool initial_scan)
+		{
+			this.indexer = indexer;
+			this.mbox_file = mbox_file;
+			this.initial_scan = initial_scan;
+		}
+
+		public void PostFlushHook ()
+		{
+			Checkpoint ();
+		}
+
+		/**
+		 * store how long indexing is done on the disk
+		 * in case indexing stops midway we dont have to restart from the beginning
+		 *   if the mbox file hasnt been modified
+		 */
+		public void Checkpoint ()
+		{
+			if (mbox_parser != null) {
+				MboxLastOffset = mbox_parser.Tell ();
+				indexer.Queryable.FileAttributesStore.AttachLastWriteTime (mbox_file, DateTime.UtcNow);
+			}
+		}
+
+		public string StatusName {
+			get { return mbox_file; }
+		}
+
+		private long MboxLastOffset {
+			get {
+				string offset_str = indexer.Queryable.ReadDataLine ("offset-" + mbox_file.Replace ('/', '-'));
+				long offset = Convert.ToInt64 (offset_str);
+				return offset;
+			}
+
+			set {
+				indexer.Queryable.WriteDataLine ("offset-" + mbox_file.Replace ('/', '-'), value.ToString ());
+			}
+		}
+
+		public bool IsUpToDate (string path)
+		{
+			//Logger.Log.Info (path + " is uptodate:" + indexer.Queryable.FileAttributesStore.IsUpToDate (path));
+			return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
+		}
+
+		/**
+		 * Advance to the next mail in the mbox file.
+		 */
+		public bool HasNextIndexable ()
+		{	
+			if (mbox_fd < 0) {
+				Logger.Log.Debug ("Opening mbox {0}", mbox_file);
+
+				try {
+					MaildirQueryable.InitializeGMime ();
+				} catch (Exception e) {
+					Logger.Log.Warn (e, "Caught exception trying to initalize gmime:");
+					return false;
+				}
+
+				
+				try {
+					mbox_fd = Mono.Unix.Native.Syscall.open (mbox_file, Mono.Unix.Native.OpenFlags.O_RDONLY);
+				} catch (System.IO.FileNotFoundException e) {
+					Logger.Log.Warn ("mbox " + mbox_file + " deleted while indexing.");
+					return false;
+				}
+				mbox_stream = new GMime.StreamFs (mbox_fd);
+				if (initial_scan && !IsUpToDate (mbox_file))
+					// this is the initial scan and
+					// file has changed since last scan =>
+					// set mboxlastoffset to 0 and seek to 0
+					mbox_stream.Seek ((int)(MboxLastOffset = 0));
+				else
+					mbox_stream.Seek ((int) MboxLastOffset);
+				mbox_parser = new GMime.Parser (mbox_stream);
+				mbox_parser.ScanFrom = true;
+			}
+
+			if (mbox_parser.Eos ()) {
+				// save the state ASAP
+				Checkpoint ();
+
+				mbox_stream.Close ();
+				mbox_fd = -1;
+				mbox_stream.Dispose ();
+				mbox_stream = null;
+				mbox_parser.Dispose ();
+				mbox_parser = null;
+				
+				Logger.Log.Debug ("{0}: Finished indexing {1} messages", folder_name, indexed_count);
+				return false;
+			} else
+				return true;
+		}
+
+		public Indexable GetNextIndexable ()
+		{
+			GMime.Message message = null;
+			try {
+				message = mbox_parser.ConstructMessage ();
+			} catch (System.IO.FileNotFoundException e) {
+				Logger.Log.Warn ("mbox " + mbox_file + " deleted while parsing.");
+				return null;
+			}
+
+			try {
+				// Again comment from Evo :P
+				// Work around what I think is a bug in GMime: If you
+				// have a zero-byte file or seek to the end of a
+				// file, parser.Eos () will return true until it
+				// actually tries to read something off the wire.
+				// Since parser.ConstructMessage() always returns a
+				// message (which may also be a bug), we'll often get
+				// one empty message which we need to deal with here.
+				//
+				// Check if its empty by seeing if the Headers
+				// property is null or empty.
+				if (message == null || message.Headers == null || message.Headers == "") {
+					return null;
+				}
+			
+				// mbox KIO slave uses the From line as URI - how weird!
+				// are those lines supposed to be unique ???
+				string id = mbox_parser.From;
+				System.Uri uri = EmailUri (id);
+			
+				Indexable indexable = indexer.MessageToIndexable (mbox_file, uri, message, indexer.GetFolderMbox (mbox_file));
+			
+				if (indexable == null)
+					return null;
+
+				++indexed_count;
+
+				return indexable;
+			} finally {
+				if (message != null)
+					message.Dispose ();
+			}
+		}
+
+		// TODO: confirm that this works with the mbox kio-slave from new kdepim
+		public Uri EmailUri (string id)
+		{
+			FileInfo fi = new FileInfo (mbox_file);
+			return new Uri (String.Format ("mbox:///{0}/{1}", fi.FullName, id));
+		}
+	}
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs	1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs	2006-09-28 14:13:27.000000000 -0400
@@ -0,0 +1,511 @@
+
+//
+// MaildirIndexer.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+	
+	/**
+	 * Main indexer class
+	 * The bulk of the indexing work is done here
+	 */
+	public class MaildirIndexer {
+		// location of mail folder
+		private string mail_root;
+		public string MailRoot {
+			get { return mail_root; }
+		}
+		// account name for this folder
+		private string account_name;
+		public string AccountName {
+			get { return account_name; }
+		}
+		// mail folders not to scan
+		private ArrayList excludes;
+		// list of maildir directories which store mails in cur/, new/, tmp/ subdirs
+		private ArrayList mail_directories;
+		// list of directories which contain mbox files and other mail folders
+		private ArrayList folder_directories;
+		// list of mbox files
+		private ArrayList mbox_files;
+		// also store the queryable
+		private MaildirQueryable queryable;
+		public MaildirQueryable Queryable {
+		    get { return queryable; }
+		}
+		
+		private string lastGoodDirPath = ""; // cache last successful directory
+
+		public MaildirIndexer (MaildirQueryable queryable, string account, string root)
+		{
+			this.queryable = queryable;
+			account_name = account;
+			mail_root = root;
+			mail_directories = new ArrayList ();
+			Logger.Log.Debug ("mail_directories created for:" + mail_root + " (" + mail_directories.Count + ")");
+			folder_directories = new ArrayList ();
+			mbox_files = new ArrayList ();
+
+			excludes = new ArrayList ();
+			excludes.Add ("spam");
+			excludes.Add ("outbox");
+			excludes.Add ("trash");
+			excludes.Add ("drafts");
+		}
+
+		/**
+		 * inotify callback
+		 */
+		private void OnInotifyEvent (Inotify.Watch watch,
+					     string path,
+					     string subitem,
+					     string srcpath,
+					     Inotify.EventType type)
+		{
+			//FIXME this case should NEVER occur, still it does
+			if (mail_directories == null) {
+				Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root);
+				Logger.Log.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, mail_root);
+				return;
+			}
+			
+			if (subitem == "")
+				return;
+			string fullPath = Path.Combine (path, subitem);
+
+			// we need to watch for all kinds of events - this is tricky
+
+			// Case: new file is created
+			// - if it is one of the folder_directories, index it
+			// - if is in one of the mail_directories, index it if it is an mbox file
+			if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+				if (IsMailDir (path)) {
+					Indexable indexable = MaildirMessageToIndexable (fullPath);
+					AddIndexableTask (indexable, fullPath);
+				}
+				return;
+			}
+
+			// Case: file is deleted
+			// - if it is a mail file, we might like it to be deleted
+			if ((type & Inotify.EventType.MovedFrom) != 0 ||
+			    ((type & Inotify.EventType.Delete) != 0 &&
+			    (type & Inotify.EventType.IsDirectory) == 0)) {
+				if (IsMailDir (path))
+					RemoveMail (fullPath);
+				else if (mbox_files.Contains (fullPath)) {
+					RemoveMbox (fullPath);
+					mbox_files.Remove (fullPath);
+				}
+				return;
+			}
+
+			// Case: file is moved
+			// - files are moved from tmp/new to cur
+			// - need to delete from the source
+			if ((type & Inotify.EventType.MovedTo) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+				if (IsMailDir (path)) {
+					Indexable indexable = MaildirMessageToIndexable (fullPath);
+					AddIndexableTask (indexable, fullPath);
+				}
+				if (IsMailDir (srcpath))
+					RemoveMail (srcpath);
+				if (mbox_files.Contains (fullPath)) {
+					// check if this because of compaction, in which case need to delete previous mbox
+					if (srcpath != null && srcpath.EndsWith ("." + subitem + ".compacted"))
+						RemoveMbox (fullPath);
+					// FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes
+					// RemoveMbox creates a job with immediate priority while
+					// IndexMbox creates a job with the default priority of a generator
+					// Is there a better way to ensure the order ?
+					IndexMbox (fullPath, true);
+				}
+				return;
+			}
+
+			// Case: file is modified i.e. there was no create event but closewrite event
+			// - possibly some mbox was changed
+			// FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted
+			// - which means one has to read the .index files to find deleted messages...
+			// - need to find the format of the .index/.index.ids etc files and parse them
+			if ((type & Inotify.EventType.Modify) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+				if (mbox_files.Contains (fullPath))
+					IndexMbox (fullPath, false);
+				return;
+			}
+
+			// Case: a directory is created:
+			// well watch it anyway but also make sure its a maildir directory
+			// if it a maildir directory, then add it to maildir_dirs
+			if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
+			    	if (!IgnoreFolder (fullPath)) {
+					Watch (fullPath);
+					UpdateDirectories(fullPath);
+				}
+				return;
+			}
+
+			// Case: if a directory is deleted:
+			// remove watch
+			if ((type & Inotify.EventType.Delete) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
+				watch.Unsubscribe ();
+				mail_directories.Remove (fullPath);
+				folder_directories.Remove (fullPath);
+				return;
+			}
+
+			// Case: directory is moved
+			// FIXME: implement renaming of mail folders
+			
+		}
+
+		/**
+		 * Add watch to the parameter directory and its subdirs, recursively
+		 */
+		public void Watch (string path)
+		{
+			DirectoryInfo root = new DirectoryInfo (path);
+			if (! root.Exists)
+				return;
+
+			Queue queue = new Queue ();
+			queue.Enqueue (root);
+
+			while (queue.Count > 0) {
+				DirectoryInfo dir = queue.Dequeue () as DirectoryInfo;
+
+				if (! dir.Exists)
+					continue;
+
+				//log.Debug ("Adding inotify watch to " + dir.FullName);
+				Inotify.Subscribe (dir.FullName, OnInotifyEvent,
+							Inotify.EventType.Create
+							| Inotify.EventType.Delete
+							| Inotify.EventType.MovedFrom
+							| Inotify.EventType.MovedTo);
+
+				foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
+					queue.Enqueue (subdir);
+			}
+		}
+		
+		/**
+		 * Recursively traverse the files and dirctories under mail_root
+		 * to find files that need to be indexed, directories that
+		 * need to be watched for changes
+		 */
+		public void Crawl ()
+		{
+			if (!Directory.Exists (mail_root))
+				return;
+
+			mail_directories.Clear ();
+			folder_directories.Clear ();
+			mbox_files.Clear();
+
+			Queue pending = new Queue ();
+			pending.Enqueue (mail_root);
+			folder_directories.Add (mail_root);
+			// add inotify watch to root folder
+			if (Inotify.Enabled)
+				Inotify.Subscribe (mail_root, OnInotifyEvent,
+					Inotify.EventType.Create
+					| Inotify.EventType.Delete
+					| Inotify.EventType.MovedFrom
+					| Inotify.EventType.MovedTo
+					| Inotify.EventType.Modify);
+
+			while (pending.Count > 0) {
+				string dir = (string) pending.Dequeue ();
+				Logger.Log.Debug ("Searching for mbox and maildirs in " + dir);
+
+				foreach (string d in DirectoryWalker.GetDirectoryNames(dir)) {
+					if (d == "cur" || d == "new" || d == "tmp") {
+						continue;
+					}
+					string fullpath = Path.Combine(dir, d);
+					mail_directories.Add (fullpath);
+					if (Inotify.Enabled) {
+						Watch (fullpath);
+					}
+				}
+			}	
+
+			// copy the contents as mail_directories, mbox_files might change due to async events
+			ArrayList _mail_directories = new ArrayList (mail_directories);
+			ArrayList _mbox_files = new ArrayList (mbox_files);
+			
+			if (queryable.ThisScheduler.ContainsByTag (mail_root)) {
+				Logger.Log.Debug ("Not adding task for already running task: {0}", mail_root);
+				return;
+			} else {
+				MaildirdirIndexableGenerator generator = new MaildirdirIndexableGenerator (this, _mail_directories);
+				AddIIndexableTask (generator, mail_root);
+			}
+
+			foreach (string mbox_file in _mbox_files) {
+				IndexMbox (mbox_file, true);
+			}
+		}
+
+		private void AddIndexableTask (Indexable indexable, string tag)
+		{
+			if (indexable == null)
+				return;
+
+			Scheduler.Task task = queryable.NewAddTask (indexable);
+			task.Priority = Scheduler.Priority.Immediate;
+			task.Tag = tag;
+			queryable.ThisScheduler.Add (task);
+		}	
+
+		private void AddIIndexableTask (IIndexableGenerator generator, string tag)
+		{
+			if (generator == null)
+				return;
+
+			Scheduler.Task task = queryable.NewAddTask (generator);
+			task.Tag = tag;
+			queryable.ThisScheduler.Add (task);
+		}	
+
+		/**
+		 * Start a task for indexing an mbox file
+		 */
+		public void IndexMbox (string mbox_file, bool initial_scan)
+		{
+			if (queryable.ThisScheduler.ContainsByTag (mbox_file)) {
+				Logger.Log.Debug ("Not adding task for already running task: {0}", mbox_file);
+				return;
+			}
+
+			//Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file);
+			MaildirMboxIndexableGenerator generator = new MaildirMboxIndexableGenerator (this, mbox_file, initial_scan);
+			AddIIndexableTask (generator, mbox_file);
+		}
+
+		/**
+		 * Remove maildir mail file
+		 */
+		private void RemoveMail (string file)
+		{
+			Logger.Log.Debug ("Removing mail:" + file);
+			Uri uri = UriFu.PathToFileUri (file);
+			Scheduler.Task task = queryable.NewRemoveTask (uri);
+			task.Priority = Scheduler.Priority.Immediate;
+			task.SubPriority = 0;
+			queryable.ThisScheduler.Add (task);
+		}
+
+		/** 
+		 * Create an indexable from a maildir message
+		 */
+		public Indexable MaildirMessageToIndexable (string filename)
+		{
+			Logger.Log.Debug ("+ indexing maildir mail:" + filename);
+			String folder = GetFolderMaildir(filename);
+			Uri file_uri = UriFu.PathToFileUri (filename);
+
+			Indexable indexable = new Indexable (file_uri);
+			indexable.HitType = "MailMessage";
+			indexable.MimeType = "message/rfc822";
+			indexable.CacheContent = false;
+
+			indexable.AddProperty (Property.NewUnsearched ("fixme:client", "maildir"));
+			indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
+                        indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder));
+			indexable.ContentUri = file_uri;
+
+			return indexable;
+		}
+	
+		/**
+		 * Create an indexable from an mbox message
+		 * Most of the code here is from Evo backend
+		 */
+		public Indexable MessageToIndexable (string file_name, System.Uri uri, GMime.Message message, string folder_name)
+		{
+			//Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name);
+			Indexable indexable = new Indexable (uri);
+			// set parent uri to the filename so that when an mbox file
+			// is deleted, all the messages in that file can be deleted
+			indexable.ParentUri = UriFu.PathToFileUri (file_name);
+
+			indexable.Timestamp = message.Date.ToUniversalTime ();
+			indexable.HitType = "MailMessage";
+			indexable.MimeType = "message/rfc822";
+			indexable.CacheContent = false;
+
+			indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
+			indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
+                        indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder_name));
+
+			GMime.InternetAddressList addrs;
+
+			addrs = message.GetRecipients (GMime.Message.RecipientType.To);
+			foreach (GMime.InternetAddress ia in addrs) {
+				if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+					indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
+			}
+			addrs.Dispose ();
+
+			addrs = message.GetRecipients (GMime.Message.RecipientType.Cc);
+			foreach (GMime.InternetAddress ia in addrs) {
+				if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+					indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
+			}
+			addrs.Dispose ();
+
+			addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (message.Sender));
+			foreach (GMime.InternetAddress ia in addrs) {
+				if (folder_name != Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+					indexable.AddProperty (Property.NewKeyword ("fixme:gotFrom", ia.Addr));
+			}
+			addrs.Dispose ();
+
+			if (folder_name == Queryable.SentMailFolderName)
+				indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
+			else {
+				string kmail_msg_sent = message.GetHeader ("X-KMail-Link-Type");
+				if (kmail_msg_sent == "reply")
+					indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
+			}
+				
+// no need to store date again, use the issent flag to determine if the date is sentdate or not			
+#if false
+			if (folder_name == Queryable.SentMailFolderName)
+				indexable.AddProperty (Property.NewDate ("fixme:sentdate", message.Date.ToUniversalTime ()));
+			else
+				indexable.AddProperty (Property.NewDate ("fixme:received", message.Date.ToUniversalTime ()));
+#endif
+
+			indexable.SetBinaryStream (message.Stream);
+
+			return indexable;
+		}
+		
+		/**
+		 * deleting mbox means deleting all the mails which were in this mbox
+		 * we use the idea of parent-uri
+		 * while creating indexables, we set the parent uri to be the uri of the mbox file
+		 * so to delete all mails in the mbox we just delete all documents whose parent uri
+		 *     is the uri of the mbox file
+		 */
+		public void RemoveMbox (string file)
+		{
+			Logger.Log.Debug ("Removing mbox:" + file);
+			Uri uri = UriFu.PathToFileUri (file);
+			Scheduler.Task task = queryable.NewRemoveTask (uri);
+			task.Priority = Scheduler.Priority.Immediate;
+			task.SubPriority = 0;
+			queryable.ThisScheduler.Add (task);
+		}
+
+		///////////////////////////////////////////////////////////
+
+		// Helpers
+
+		/**
+		 * a maildir is of format:
+		 * some_dir_in_currently_watched_directories/{cur,new,tmp}
+		 * again we ignore tmp - no point trying to watch it - it will be moved anyway
+		 * should we check with the kmail directory structure ?
+		 * presence of files like directory.index, directory.index.ids ?
+		 */
+		public bool IsMailDir (string dirPath)
+		{
+			if (dirPath == null || ! (dirPath.EndsWith("cur") || dirPath.EndsWith("new")))
+				return false;
+
+			string possibleMaildir = (Directory.GetParent (dirPath)).FullName;
+			if (lastGoodDirPath == possibleMaildir)
+				return true;
+			Logger.Log.Debug ("checking if " + possibleMaildir + " is a maildir ?");
+			if (mail_directories.Contains (possibleMaildir)) {
+				lastGoodDirPath = possibleMaildir;
+				return true;
+			} else
+				return false;
+		}
+
+		/**
+		 * Called when a new directory is created
+		 * Decide what to do with this new directory
+		 */
+		public void UpdateDirectories (string dirPath)
+		{
+			string parentDir = (Directory.GetParent (dirPath)).FullName;
+			DirectoryInfo dirinfo = new DirectoryInfo (dirPath);
+			string dirName = dirinfo.Name;
+			
+			if (dirName == "cur" || dirName == "new" || dirName == "tmp") {
+				// check and add the parentdir to mail_directories
+				if (!mail_directories.Contains (parentDir))
+					mail_directories.Add (parentDir);
+				return;
+			}
+			
+			// format .name.directory - in which case add it to folder_dir
+			// format name  - in which case add it to mail_dir
+			if (dirName.EndsWith (".directory"))
+				folder_directories.Add (dirPath);
+			else
+				mail_directories.Add (dirPath);
+		}
+
+		/**
+		 * FIXME:if we can parse kmailrc file, then we might be
+		 * able to deduce the mail folder name
+		 * currently get it from the file name (mbox) or parent.parent directory name
+		 */
+		
+		public string GetFolderMbox (string mbox_file)
+		{
+			FileInfo fi = new FileInfo (mbox_file);
+			return fi.Name;
+		}
+		
+		public string GetFolderMaildir (string mailFile)
+		{
+			return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name);
+		}
+
+		private bool IgnoreFolder (string path)
+		{
+			foreach (string exclude in excludes) {
+				if (path.ToLower().EndsWith (exclude))
+					return true;
+			}
+			return false;
+		}
+	}
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs	1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs	2006-09-28 12:54:28.000000000 -0400
@@ -0,0 +1,234 @@
+//
+// MaildirQueryable.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Threading;
+
+using Beagle.Util;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+	[QueryableFlavor (Name="Maildir", Domain=QueryDomain.Local, RequireInotify=false)]
+	public class MaildirQueryable : LuceneFileQueryable {
+
+		// for non-inotify case, poll after this number of seconds
+		public const int polling_interval_in_seconds = 300;
+		// mail folder paths
+		private string local_path;
+		// indexers - one for each mailfolder path
+		private MaildirIndexer local_indexer;
+		// global variable
+		public static bool gmime_initialized = false;
+		public static void InitializeGMime ()
+		{
+			if (!gmime_initialized) {
+				GMime.Global.Init ();
+				gmime_initialized = true;
+			}
+		}
+
+		// name of the sentmail folder - should be parsed from kmailrc
+		private string sentmail_foldername;
+		public string SentMailFolderName {
+			get { return sentmail_foldername; }
+		}
+		
+		public MaildirQueryable () : base ("MaildirIndex")
+		{
+			// the local mail path is different for different distributions
+			local_path = GuessLocalFolderPath ();
+			if (local_path == null) {
+				Logger.Log.Info ("Maildir folders not found. Will keep trying ");
+			} else
+				Logger.Log.Info ("Guessing for location of Maildir folders ... found at " + local_path);
+
+			local_indexer = null;
+			sentmail_foldername = "sent-mail";
+		}
+
+		//////////////////////////////////////////////////////////////////////////////////////////////
+
+		/**
+		 * initial method called by the daemon
+		 */
+		public override void Start () 
+		{
+			base.Start ();
+			ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
+		}
+
+		/**
+		 * for non-inotify case, this method is invoked repeatedly
+		 */
+		private void CrawlHook (Scheduler.Task task)
+		{
+			if (local_indexer != null)
+				local_indexer.Crawl ();
+			task.Reschedule = true;
+			task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
+		}
+
+		/**
+		 * called by Start(), starts actual work
+		 * create indexers
+		 * ask indexers to crawl the mails
+		 * for non-inotify case, ask to poll
+		 */
+		private void StartWorker ()
+		{
+			Logger.Log.Info ("Starting Maildir backend");
+
+			Stopwatch stopwatch = new Stopwatch ();
+			stopwatch.Start ();
+
+			// check if there is at all anything to crawl
+                        if ( local_path == null ) {
+				GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
+				Logger.Log.Debug ("Maildir directories (local mail) " + " not found, will repoll.");
+                                return;
+			}
+
+			Logger.Log.Debug ("Starting mail crawl");
+			State = QueryableState.Crawling;
+			if (local_path != null) {
+				local_indexer = new MaildirIndexer (this, "local", local_path);
+				local_indexer.Crawl ();
+			}
+			State = QueryableState.Idle;
+			Logger.Log.Debug ("Mail crawl done");
+
+			if (! Inotify.Enabled) {
+				Scheduler.Task task = Scheduler.TaskFromHook (new Scheduler.TaskHook (CrawlHook));
+				task.Tag = "Crawling Maildir directories";
+				task.Source = this;
+				task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
+				ThisScheduler.Add (task);
+			}
+
+			stopwatch.Stop ();
+			Logger.Log.Info ("Maildir driver worker thread done in {0}", stopwatch);
+		}
+
+		/** 
+		 * use this method to determine if we have anything to crawl and index
+		 */
+		private bool CheckForExistence ()
+                {
+			local_path = GuessLocalFolderPath ();
+                        if (local_path == null)
+                                return true;
+
+			StartWorker();
+                        return false;
+                }
+
+		/////////////////////////////////////////////////////////////////////////////
+
+		override public string GetSnippet (string[] query_terms, Hit hit)
+		{
+			Logger.Log.Debug ("Fetching snippet for " + hit.Uri.LocalPath);
+			// FIXME: Also handle mbox emails
+			if (! hit.Uri.IsFile)
+				return null;
+
+			// Dont get snippets from attachments, they arent even indexed currently
+			if (hit.ParentUri != null)
+				return null;
+			
+			int mail_fd = Mono.Unix.Native.Syscall.open (hit.Uri.LocalPath, Mono.Unix.Native.OpenFlags.O_RDONLY);
+			if (mail_fd == -1)
+				return null;
+
+			InitializeGMime ();
+			GMime.StreamFs stream = new GMime.StreamFs (mail_fd);
+			GMime.Parser parser = new GMime.Parser (stream);
+			GMime.Message message = parser.ConstructMessage ();
+			stream.Dispose ();
+			parser.Dispose ();
+
+			bool html = false;
+			string body = message.GetBody (true, out html);
+			// FIXME: Also handle snippets from html message parts - involves invoking html filter
+			if (html) {
+				Logger.Log.Debug ("No text/plain message part in " + hit.Uri);
+				message.Dispose ();
+				return null;
+			}
+
+			StringReader reader = new StringReader (body);
+			string snippet = SnippetFu.GetSnippet (query_terms, reader);
+			message.Dispose ();
+
+			return snippet;
+		}
+
+		/////////////////////////////////////////////////////////////////////////////
+		// FIXME: How to determine if an mbox hit is valid without scanning the whole file
+
+		public string Name {
+			get { return "Maildir"; }
+		}
+
+		/** 
+		 * path of local maildir - mine is in ~/.Mail
+		 * This is distribution specific. Mandrake puts kmail mails in
+		 * ~/.Mail whereas default kmail folder location is ~/Mail
+		 * I guess each distribution can fix this path as they know what is
+		 * the path.
+		 * Till then, using a guesser to find out which of ~/.Mail and ~/Mail
+		 * is valid.
+		 * Guesses the kmail local folder path
+		 * first try ~/.Mail, then try ~/Mail
+		 */
+		private string GuessLocalFolderPath ()
+		{
+			string location1 = Path.Combine (PathFinder.HomeDir, "Mail");
+			string location2 = Path.Combine (PathFinder.HomeDir, ".Mail");
+
+			if (GuessLocalFolder (location1))
+				return location1;
+			else if (GuessLocalFolder (location2))
+				return location2;
+			else 
+				return null;
+		}
+
+		/**
+		 * to check if the path represents a maildir directory:
+		 */
+		private bool GuessLocalFolder (string path)
+		{
+			if (! Directory.Exists (path))
+				return false;
+			return true;
+		}
+
+	}
+
+}
diff -urN beagle-0.2.10/beagled/Makefile.am beagle-0.2.10/beagled/Makefile.am
--- beagle-0.2.10/beagled/Makefile.am	2006-09-18 18:24:29.000000000 -0400
+++ beagle-0.2.10/beagled/Makefile.am	2006-09-28 12:53:38.000000000 -0400
@@ -291,6 +291,12 @@
 	$(kmailqueryable)/KMailIndexableGenerator.cs	\
 	$(kmailqueryable)/KMailIndexer.cs
 
+maildirqueryable = $(srcdir)/MaildirQueryable
+MAILDIR_QUERYABLE_CSFILES = \
+	$(maildirqueryable)/MaildirQueryable.cs		\
+	$(maildirqueryable)/MaildirIndexableGenerator.cs	\
+	$(maildirqueryable)/MaildirIndexer.cs
+
 blamqueryable = $(srcdir)/BlamQueryable
 BLAM_QUERYABLE_CSFILES =				\
 	$(blamqueryable)/BlamQueryable.cs
@@ -319,6 +325,7 @@
 DAEMON_DLL_CSFILES = 						\
 	$(LUCENE_CSFILES)					\
 	$(KMAIL_QUERYABLE_CSFILES)				\
+	$(MAILDIR_QUERYABLE_CSFILES)				\
 	$(FILE_SYSTEM_QUERYABLE_CSFILES)			\
 	$(GAIM_LOG_QUERYABLE_CSFILES)				\
 	$(INDEXING_SERVICE_QUERYABLE_CSFILES)			\



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]