[PATCH] beagle maildir backend
- From: Chris Mason <chris mason oracle com>
- To: dashboard-hackers gnome org
- Subject: [PATCH] beagle maildir backend
- Date: Thu, 28 Sep 2006 17:12:41 -0000
Hello everyone,
Please cc me on any replies as I'm not subscribed.
Here is some quick and dirty code to add a maildir backend to beagle
0.2.10. It is based on the KMail backend and just assumes that ~/Mail
is a maildir directory.
Without this patch a big portion of my emails get misclassified as
text/plain. It probably does need to be smarter about detecting maildir
directories, or trying other places, but it's a starting point if anyone
is interested in improving it.
-chris
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexableGenerator.cs 2006-09-28 13:48:16.000000000 -0400
@@ -0,0 +1,303 @@
+
+//
+// MaildirIndexableGenerator.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Runtime.Serialization.Formatters.Binary;
+using System.Threading;
+using System.Xml;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+ /**
+ * Indexable generator for maildir mails
+ */
+ public class MaildirdirIndexableGenerator : IIndexableGenerator {
+ // store the indexer
+ private MaildirIndexer indexer;
+ // message file currently indexing
+ private FileInfo CrawlFile;
+ // directory currently parsing
+ private DirectoryInfo current_dir;
+ // list of files in current directory
+ private IEnumerable files_to_parse;
+ // list of directories to scan
+ private ArrayList dirs_to_scan;
+ private IEnumerator dir_enumerator = null;
+ private IEnumerator file_enumerator = null;
+
+ private string account_name {
+ get { return indexer.AccountName; }
+ }
+
+ public MaildirdirIndexableGenerator (MaildirIndexer indexer, ArrayList mail_directories)
+ {
+ this.indexer = indexer;
+ dirs_to_scan = new ArrayList ();
+
+ foreach (string directory in mail_directories) {
+ AddDirectory (directory);
+ }
+ dir_enumerator = dirs_to_scan.GetEnumerator ();
+ }
+
+ public void PostFlushHook ()
+ {
+ }
+
+ private void AddDirectory (string _dir) {
+ DirectoryInfo dir;
+
+ // scan mails in directory cur and new, not tmp
+ if (Directory.Exists (Path.Combine (_dir, "cur"))) {
+ dir = new DirectoryInfo (Path.Combine (_dir, "cur"));
+ dirs_to_scan.Add (dir);
+ }
+
+ if (Directory.Exists (Path.Combine (_dir, "new"))) {
+ dir = new DirectoryInfo (Path.Combine (_dir, "new"));
+ dirs_to_scan.Add (dir);
+ }
+ }
+
+ public string StatusName {
+ get { return indexer.MailRoot; }
+ }
+
+ public Indexable GetNextIndexable ()
+ {
+ FileInfo file = (FileInfo) file_enumerator.Current;
+ return indexer.MaildirMessageToIndexable (file.FullName);
+ }
+
+ public bool IsUpToDate (string path)
+ {
+ return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
+ }
+
+ public bool HasNextIndexable ()
+ {
+ do {
+ while (file_enumerator == null || !file_enumerator.MoveNext ()) {
+ if (!dir_enumerator.MoveNext ()) {
+ dir_enumerator = null;
+ return false;
+ }
+ current_dir = (DirectoryInfo) dir_enumerator.Current;
+ Logger.Log.Info ("Scanning maildir feeds in " + current_dir.FullName);
+ files_to_parse = DirectoryWalker.GetFileInfos (current_dir);
+ file_enumerator = files_to_parse.GetEnumerator ();
+ }
+ CrawlFile = (FileInfo) file_enumerator.Current;
+ } while (IsUpToDate (CrawlFile.FullName));
+
+ return true;
+ }
+
+ }
+
+ /**
+ * Indexable generator for mbox mail files
+ * based on Evo code
+ */
+ public class MaildirMboxIndexableGenerator : IIndexableGenerator {
+ // path of the mbox file
+ private string mbox_file;
+ // fd, stream, parser needed for gmime parsing
+ private int mbox_fd = -1;
+ private GMime.StreamFs mbox_stream;
+ private GMime.Parser mbox_parser;
+ // store the indexer
+ private MaildirIndexer indexer;
+ // number of mails scanned
+ private int indexed_count;
+ // is this initial scan - in which case the mbox might have been modified since last scan
+ private bool initial_scan;
+
+ private string account_name {
+ get { return indexer.AccountName; }
+ }
+
+ private string folder_name {
+ get { return indexer.GetFolderMbox (mbox_file); }
+ }
+
+ public MaildirMboxIndexableGenerator (MaildirIndexer indexer, string mbox_file, bool initial_scan)
+ {
+ this.indexer = indexer;
+ this.mbox_file = mbox_file;
+ this.initial_scan = initial_scan;
+ }
+
+ public void PostFlushHook ()
+ {
+ Checkpoint ();
+ }
+
+ /**
+ * store how long indexing is done on the disk
+ * in case indexing stops midway we dont have to restart from the beginning
+ * if the mbox file hasnt been modified
+ */
+ public void Checkpoint ()
+ {
+ if (mbox_parser != null) {
+ MboxLastOffset = mbox_parser.Tell ();
+ indexer.Queryable.FileAttributesStore.AttachLastWriteTime (mbox_file, DateTime.UtcNow);
+ }
+ }
+
+ public string StatusName {
+ get { return mbox_file; }
+ }
+
+ private long MboxLastOffset {
+ get {
+ string offset_str = indexer.Queryable.ReadDataLine ("offset-" + mbox_file.Replace ('/', '-'));
+ long offset = Convert.ToInt64 (offset_str);
+ return offset;
+ }
+
+ set {
+ indexer.Queryable.WriteDataLine ("offset-" + mbox_file.Replace ('/', '-'), value.ToString ());
+ }
+ }
+
+ public bool IsUpToDate (string path)
+ {
+ //Logger.Log.Info (path + " is uptodate:" + indexer.Queryable.FileAttributesStore.IsUpToDate (path));
+ return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
+ }
+
+ /**
+ * Advance to the next mail in the mbox file.
+ */
+ public bool HasNextIndexable ()
+ {
+ if (mbox_fd < 0) {
+ Logger.Log.Debug ("Opening mbox {0}", mbox_file);
+
+ try {
+ MaildirQueryable.InitializeGMime ();
+ } catch (Exception e) {
+ Logger.Log.Warn (e, "Caught exception trying to initalize gmime:");
+ return false;
+ }
+
+
+ try {
+ mbox_fd = Mono.Unix.Native.Syscall.open (mbox_file, Mono.Unix.Native.OpenFlags.O_RDONLY);
+ } catch (System.IO.FileNotFoundException e) {
+ Logger.Log.Warn ("mbox " + mbox_file + " deleted while indexing.");
+ return false;
+ }
+ mbox_stream = new GMime.StreamFs (mbox_fd);
+ if (initial_scan && !IsUpToDate (mbox_file))
+ // this is the initial scan and
+ // file has changed since last scan =>
+ // set mboxlastoffset to 0 and seek to 0
+ mbox_stream.Seek ((int)(MboxLastOffset = 0));
+ else
+ mbox_stream.Seek ((int) MboxLastOffset);
+ mbox_parser = new GMime.Parser (mbox_stream);
+ mbox_parser.ScanFrom = true;
+ }
+
+ if (mbox_parser.Eos ()) {
+ // save the state ASAP
+ Checkpoint ();
+
+ mbox_stream.Close ();
+ mbox_fd = -1;
+ mbox_stream.Dispose ();
+ mbox_stream = null;
+ mbox_parser.Dispose ();
+ mbox_parser = null;
+
+ Logger.Log.Debug ("{0}: Finished indexing {1} messages", folder_name, indexed_count);
+ return false;
+ } else
+ return true;
+ }
+
+ public Indexable GetNextIndexable ()
+ {
+ GMime.Message message = null;
+ try {
+ message = mbox_parser.ConstructMessage ();
+ } catch (System.IO.FileNotFoundException e) {
+ Logger.Log.Warn ("mbox " + mbox_file + " deleted while parsing.");
+ return null;
+ }
+
+ try {
+ // Again comment from Evo :P
+ // Work around what I think is a bug in GMime: If you
+ // have a zero-byte file or seek to the end of a
+ // file, parser.Eos () will return true until it
+ // actually tries to read something off the wire.
+ // Since parser.ConstructMessage() always returns a
+ // message (which may also be a bug), we'll often get
+ // one empty message which we need to deal with here.
+ //
+ // Check if its empty by seeing if the Headers
+ // property is null or empty.
+ if (message == null || message.Headers == null || message.Headers == "") {
+ return null;
+ }
+
+ // mbox KIO slave uses the From line as URI - how weird!
+ // are those lines supposed to be unique ???
+ string id = mbox_parser.From;
+ System.Uri uri = EmailUri (id);
+
+ Indexable indexable = indexer.MessageToIndexable (mbox_file, uri, message, indexer.GetFolderMbox (mbox_file));
+
+ if (indexable == null)
+ return null;
+
+ ++indexed_count;
+
+ return indexable;
+ } finally {
+ if (message != null)
+ message.Dispose ();
+ }
+ }
+
+ // TODO: confirm that this works with the mbox kio-slave from new kdepim
+ public Uri EmailUri (string id)
+ {
+ FileInfo fi = new FileInfo (mbox_file);
+ return new Uri (String.Format ("mbox:///{0}/{1}", fi.FullName, id));
+ }
+ }
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs 1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirIndexer.cs 2006-09-28 14:13:27.000000000 -0400
@@ -0,0 +1,511 @@
+
+//
+// MaildirIndexer.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.Collections;
+using System.IO;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+ /**
+ * Main indexer class
+ * The bulk of the indexing work is done here
+ */
+ public class MaildirIndexer {
+ // location of mail folder
+ private string mail_root;
+ public string MailRoot {
+ get { return mail_root; }
+ }
+ // account name for this folder
+ private string account_name;
+ public string AccountName {
+ get { return account_name; }
+ }
+ // mail folders not to scan
+ private ArrayList excludes;
+ // list of maildir directories which store mails in cur/, new/, tmp/ subdirs
+ private ArrayList mail_directories;
+ // list of directories which contain mbox files and other mail folders
+ private ArrayList folder_directories;
+ // list of mbox files
+ private ArrayList mbox_files;
+ // also store the queryable
+ private MaildirQueryable queryable;
+ public MaildirQueryable Queryable {
+ get { return queryable; }
+ }
+
+ private string lastGoodDirPath = ""; // cache last successful directory
+
+ public MaildirIndexer (MaildirQueryable queryable, string account, string root)
+ {
+ this.queryable = queryable;
+ account_name = account;
+ mail_root = root;
+ mail_directories = new ArrayList ();
+ Logger.Log.Debug ("mail_directories created for:" + mail_root + " (" + mail_directories.Count + ")");
+ folder_directories = new ArrayList ();
+ mbox_files = new ArrayList ();
+
+ excludes = new ArrayList ();
+ excludes.Add ("spam");
+ excludes.Add ("outbox");
+ excludes.Add ("trash");
+ excludes.Add ("drafts");
+ }
+
+ /**
+ * inotify callback
+ */
+ private void OnInotifyEvent (Inotify.Watch watch,
+ string path,
+ string subitem,
+ string srcpath,
+ Inotify.EventType type)
+ {
+ //FIXME this case should NEVER occur, still it does
+ if (mail_directories == null) {
+ Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root);
+ Logger.Log.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, mail_root);
+ return;
+ }
+
+ if (subitem == "")
+ return;
+ string fullPath = Path.Combine (path, subitem);
+
+ // we need to watch for all kinds of events - this is tricky
+
+ // Case: new file is created
+ // - if it is one of the folder_directories, index it
+ // - if is in one of the mail_directories, index it if it is an mbox file
+ if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+ if (IsMailDir (path)) {
+ Indexable indexable = MaildirMessageToIndexable (fullPath);
+ AddIndexableTask (indexable, fullPath);
+ }
+ return;
+ }
+
+ // Case: file is deleted
+ // - if it is a mail file, we might like it to be deleted
+ if ((type & Inotify.EventType.MovedFrom) != 0 ||
+ ((type & Inotify.EventType.Delete) != 0 &&
+ (type & Inotify.EventType.IsDirectory) == 0)) {
+ if (IsMailDir (path))
+ RemoveMail (fullPath);
+ else if (mbox_files.Contains (fullPath)) {
+ RemoveMbox (fullPath);
+ mbox_files.Remove (fullPath);
+ }
+ return;
+ }
+
+ // Case: file is moved
+ // - files are moved from tmp/new to cur
+ // - need to delete from the source
+ if ((type & Inotify.EventType.MovedTo) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+ if (IsMailDir (path)) {
+ Indexable indexable = MaildirMessageToIndexable (fullPath);
+ AddIndexableTask (indexable, fullPath);
+ }
+ if (IsMailDir (srcpath))
+ RemoveMail (srcpath);
+ if (mbox_files.Contains (fullPath)) {
+ // check if this because of compaction, in which case need to delete previous mbox
+ if (srcpath != null && srcpath.EndsWith ("." + subitem + ".compacted"))
+ RemoveMbox (fullPath);
+ // FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes
+ // RemoveMbox creates a job with immediate priority while
+ // IndexMbox creates a job with the default priority of a generator
+ // Is there a better way to ensure the order ?
+ IndexMbox (fullPath, true);
+ }
+ return;
+ }
+
+ // Case: file is modified i.e. there was no create event but closewrite event
+ // - possibly some mbox was changed
+ // FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted
+ // - which means one has to read the .index files to find deleted messages...
+ // - need to find the format of the .index/.index.ids etc files and parse them
+ if ((type & Inotify.EventType.Modify) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
+ if (mbox_files.Contains (fullPath))
+ IndexMbox (fullPath, false);
+ return;
+ }
+
+ // Case: a directory is created:
+ // well watch it anyway but also make sure its a maildir directory
+ // if it a maildir directory, then add it to maildir_dirs
+ if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
+ if (!IgnoreFolder (fullPath)) {
+ Watch (fullPath);
+ UpdateDirectories(fullPath);
+ }
+ return;
+ }
+
+ // Case: if a directory is deleted:
+ // remove watch
+ if ((type & Inotify.EventType.Delete) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
+ watch.Unsubscribe ();
+ mail_directories.Remove (fullPath);
+ folder_directories.Remove (fullPath);
+ return;
+ }
+
+ // Case: directory is moved
+ // FIXME: implement renaming of mail folders
+
+ }
+
+ /**
+ * Add watch to the parameter directory and its subdirs, recursively
+ */
+ public void Watch (string path)
+ {
+ DirectoryInfo root = new DirectoryInfo (path);
+ if (! root.Exists)
+ return;
+
+ Queue queue = new Queue ();
+ queue.Enqueue (root);
+
+ while (queue.Count > 0) {
+ DirectoryInfo dir = queue.Dequeue () as DirectoryInfo;
+
+ if (! dir.Exists)
+ continue;
+
+ //log.Debug ("Adding inotify watch to " + dir.FullName);
+ Inotify.Subscribe (dir.FullName, OnInotifyEvent,
+ Inotify.EventType.Create
+ | Inotify.EventType.Delete
+ | Inotify.EventType.MovedFrom
+ | Inotify.EventType.MovedTo);
+
+ foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
+ queue.Enqueue (subdir);
+ }
+ }
+
+ /**
+ * Recursively traverse the files and dirctories under mail_root
+ * to find files that need to be indexed, directories that
+ * need to be watched for changes
+ */
+ public void Crawl ()
+ {
+ if (!Directory.Exists (mail_root))
+ return;
+
+ mail_directories.Clear ();
+ folder_directories.Clear ();
+ mbox_files.Clear();
+
+ Queue pending = new Queue ();
+ pending.Enqueue (mail_root);
+ folder_directories.Add (mail_root);
+ // add inotify watch to root folder
+ if (Inotify.Enabled)
+ Inotify.Subscribe (mail_root, OnInotifyEvent,
+ Inotify.EventType.Create
+ | Inotify.EventType.Delete
+ | Inotify.EventType.MovedFrom
+ | Inotify.EventType.MovedTo
+ | Inotify.EventType.Modify);
+
+ while (pending.Count > 0) {
+ string dir = (string) pending.Dequeue ();
+ Logger.Log.Debug ("Searching for mbox and maildirs in " + dir);
+
+ foreach (string d in DirectoryWalker.GetDirectoryNames(dir)) {
+ if (d == "cur" || d == "new" || d == "tmp") {
+ continue;
+ }
+ string fullpath = Path.Combine(dir, d);
+ mail_directories.Add (fullpath);
+ if (Inotify.Enabled) {
+ Watch (fullpath);
+ }
+ }
+ }
+
+ // copy the contents as mail_directories, mbox_files might change due to async events
+ ArrayList _mail_directories = new ArrayList (mail_directories);
+ ArrayList _mbox_files = new ArrayList (mbox_files);
+
+ if (queryable.ThisScheduler.ContainsByTag (mail_root)) {
+ Logger.Log.Debug ("Not adding task for already running task: {0}", mail_root);
+ return;
+ } else {
+ MaildirdirIndexableGenerator generator = new MaildirdirIndexableGenerator (this, _mail_directories);
+ AddIIndexableTask (generator, mail_root);
+ }
+
+ foreach (string mbox_file in _mbox_files) {
+ IndexMbox (mbox_file, true);
+ }
+ }
+
+ private void AddIndexableTask (Indexable indexable, string tag)
+ {
+ if (indexable == null)
+ return;
+
+ Scheduler.Task task = queryable.NewAddTask (indexable);
+ task.Priority = Scheduler.Priority.Immediate;
+ task.Tag = tag;
+ queryable.ThisScheduler.Add (task);
+ }
+
+ private void AddIIndexableTask (IIndexableGenerator generator, string tag)
+ {
+ if (generator == null)
+ return;
+
+ Scheduler.Task task = queryable.NewAddTask (generator);
+ task.Tag = tag;
+ queryable.ThisScheduler.Add (task);
+ }
+
+ /**
+ * Start a task for indexing an mbox file
+ */
+ public void IndexMbox (string mbox_file, bool initial_scan)
+ {
+ if (queryable.ThisScheduler.ContainsByTag (mbox_file)) {
+ Logger.Log.Debug ("Not adding task for already running task: {0}", mbox_file);
+ return;
+ }
+
+ //Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file);
+ MaildirMboxIndexableGenerator generator = new MaildirMboxIndexableGenerator (this, mbox_file, initial_scan);
+ AddIIndexableTask (generator, mbox_file);
+ }
+
+ /**
+ * Remove maildir mail file
+ */
+ private void RemoveMail (string file)
+ {
+ Logger.Log.Debug ("Removing mail:" + file);
+ Uri uri = UriFu.PathToFileUri (file);
+ Scheduler.Task task = queryable.NewRemoveTask (uri);
+ task.Priority = Scheduler.Priority.Immediate;
+ task.SubPriority = 0;
+ queryable.ThisScheduler.Add (task);
+ }
+
+ /**
+ * Create an indexable from a maildir message
+ */
+ public Indexable MaildirMessageToIndexable (string filename)
+ {
+ Logger.Log.Debug ("+ indexing maildir mail:" + filename);
+ String folder = GetFolderMaildir(filename);
+ Uri file_uri = UriFu.PathToFileUri (filename);
+
+ Indexable indexable = new Indexable (file_uri);
+ indexable.HitType = "MailMessage";
+ indexable.MimeType = "message/rfc822";
+ indexable.CacheContent = false;
+
+ indexable.AddProperty (Property.NewUnsearched ("fixme:client", "maildir"));
+ indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
+ indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder));
+ indexable.ContentUri = file_uri;
+
+ return indexable;
+ }
+
+ /**
+ * Create an indexable from an mbox message
+ * Most of the code here is from Evo backend
+ */
+ public Indexable MessageToIndexable (string file_name, System.Uri uri, GMime.Message message, string folder_name)
+ {
+ //Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name);
+ Indexable indexable = new Indexable (uri);
+ // set parent uri to the filename so that when an mbox file
+ // is deleted, all the messages in that file can be deleted
+ indexable.ParentUri = UriFu.PathToFileUri (file_name);
+
+ indexable.Timestamp = message.Date.ToUniversalTime ();
+ indexable.HitType = "MailMessage";
+ indexable.MimeType = "message/rfc822";
+ indexable.CacheContent = false;
+
+ indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
+ indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
+ indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder_name));
+
+ GMime.InternetAddressList addrs;
+
+ addrs = message.GetRecipients (GMime.Message.RecipientType.To);
+ foreach (GMime.InternetAddress ia in addrs) {
+ if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+ indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
+ }
+ addrs.Dispose ();
+
+ addrs = message.GetRecipients (GMime.Message.RecipientType.Cc);
+ foreach (GMime.InternetAddress ia in addrs) {
+ if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+ indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
+ }
+ addrs.Dispose ();
+
+ addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (message.Sender));
+ foreach (GMime.InternetAddress ia in addrs) {
+ if (folder_name != Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
+ indexable.AddProperty (Property.NewKeyword ("fixme:gotFrom", ia.Addr));
+ }
+ addrs.Dispose ();
+
+ if (folder_name == Queryable.SentMailFolderName)
+ indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
+ else {
+ string kmail_msg_sent = message.GetHeader ("X-KMail-Link-Type");
+ if (kmail_msg_sent == "reply")
+ indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
+ }
+
+// no need to store date again, use the issent flag to determine if the date is sentdate or not
+#if false
+ if (folder_name == Queryable.SentMailFolderName)
+ indexable.AddProperty (Property.NewDate ("fixme:sentdate", message.Date.ToUniversalTime ()));
+ else
+ indexable.AddProperty (Property.NewDate ("fixme:received", message.Date.ToUniversalTime ()));
+#endif
+
+ indexable.SetBinaryStream (message.Stream);
+
+ return indexable;
+ }
+
+ /**
+ * deleting mbox means deleting all the mails which were in this mbox
+ * we use the idea of parent-uri
+ * while creating indexables, we set the parent uri to be the uri of the mbox file
+ * so to delete all mails in the mbox we just delete all documents whose parent uri
+ * is the uri of the mbox file
+ */
+ public void RemoveMbox (string file)
+ {
+ Logger.Log.Debug ("Removing mbox:" + file);
+ Uri uri = UriFu.PathToFileUri (file);
+ Scheduler.Task task = queryable.NewRemoveTask (uri);
+ task.Priority = Scheduler.Priority.Immediate;
+ task.SubPriority = 0;
+ queryable.ThisScheduler.Add (task);
+ }
+
+ ///////////////////////////////////////////////////////////
+
+ // Helpers
+
+ /**
+ * a maildir is of format:
+ * some_dir_in_currently_watched_directories/{cur,new,tmp}
+ * again we ignore tmp - no point trying to watch it - it will be moved anyway
+ * should we check with the kmail directory structure ?
+ * presence of files like directory.index, directory.index.ids ?
+ */
+ public bool IsMailDir (string dirPath)
+ {
+ if (dirPath == null || ! (dirPath.EndsWith("cur") || dirPath.EndsWith("new")))
+ return false;
+
+ string possibleMaildir = (Directory.GetParent (dirPath)).FullName;
+ if (lastGoodDirPath == possibleMaildir)
+ return true;
+ Logger.Log.Debug ("checking if " + possibleMaildir + " is a maildir ?");
+ if (mail_directories.Contains (possibleMaildir)) {
+ lastGoodDirPath = possibleMaildir;
+ return true;
+ } else
+ return false;
+ }
+
+ /**
+ * Called when a new directory is created
+ * Decide what to do with this new directory
+ */
+ public void UpdateDirectories (string dirPath)
+ {
+ string parentDir = (Directory.GetParent (dirPath)).FullName;
+ DirectoryInfo dirinfo = new DirectoryInfo (dirPath);
+ string dirName = dirinfo.Name;
+
+ if (dirName == "cur" || dirName == "new" || dirName == "tmp") {
+ // check and add the parentdir to mail_directories
+ if (!mail_directories.Contains (parentDir))
+ mail_directories.Add (parentDir);
+ return;
+ }
+
+ // format .name.directory - in which case add it to folder_dir
+ // format name - in which case add it to mail_dir
+ if (dirName.EndsWith (".directory"))
+ folder_directories.Add (dirPath);
+ else
+ mail_directories.Add (dirPath);
+ }
+
+ /**
+ * FIXME:if we can parse kmailrc file, then we might be
+ * able to deduce the mail folder name
+ * currently get it from the file name (mbox) or parent.parent directory name
+ */
+
+ public string GetFolderMbox (string mbox_file)
+ {
+ FileInfo fi = new FileInfo (mbox_file);
+ return fi.Name;
+ }
+
+ public string GetFolderMaildir (string mailFile)
+ {
+ return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name);
+ }
+
+ private bool IgnoreFolder (string path)
+ {
+ foreach (string exclude in excludes) {
+ if (path.ToLower().EndsWith (exclude))
+ return true;
+ }
+ return false;
+ }
+ }
+}
diff -urN beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs
--- beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs 1969-12-31 19:00:00.000000000 -0500
+++ beagle-0.2.10/beagled/MaildirQueryable/MaildirQueryable.cs 2006-09-28 12:54:28.000000000 -0400
@@ -0,0 +1,234 @@
+//
+// MaildirQueryable.cs
+//
+// Copyright (C) 2005 Novell, Inc.
+// Copyright (C) 2005 Debajyoti Bera
+//
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+
+using System;
+using System.Collections;
+using System.IO;
+using System.Threading;
+
+using Beagle.Util;
+
+namespace Beagle.Daemon.MaildirQueryable {
+
+ [QueryableFlavor (Name="Maildir", Domain=QueryDomain.Local, RequireInotify=false)]
+ public class MaildirQueryable : LuceneFileQueryable {
+
+ // for non-inotify case, poll after this number of seconds
+ public const int polling_interval_in_seconds = 300;
+ // mail folder paths
+ private string local_path;
+ // indexers - one for each mailfolder path
+ private MaildirIndexer local_indexer;
+ // global variable
+ public static bool gmime_initialized = false;
+ public static void InitializeGMime ()
+ {
+ if (!gmime_initialized) {
+ GMime.Global.Init ();
+ gmime_initialized = true;
+ }
+ }
+
+ // name of the sentmail folder - should be parsed from kmailrc
+ private string sentmail_foldername;
+ public string SentMailFolderName {
+ get { return sentmail_foldername; }
+ }
+
+ public MaildirQueryable () : base ("MaildirIndex")
+ {
+ // the local mail path is different for different distributions
+ local_path = GuessLocalFolderPath ();
+ if (local_path == null) {
+ Logger.Log.Info ("Maildir folders not found. Will keep trying ");
+ } else
+ Logger.Log.Info ("Guessing for location of Maildir folders ... found at " + local_path);
+
+ local_indexer = null;
+ sentmail_foldername = "sent-mail";
+ }
+
+ //////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * initial method called by the daemon
+ */
+ public override void Start ()
+ {
+ base.Start ();
+ ExceptionHandlingThread.Start (new ThreadStart (StartWorker));
+ }
+
+ /**
+ * for non-inotify case, this method is invoked repeatedly
+ */
+ private void CrawlHook (Scheduler.Task task)
+ {
+ if (local_indexer != null)
+ local_indexer.Crawl ();
+ task.Reschedule = true;
+ task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
+ }
+
+ /**
+ * called by Start(), starts actual work
+ * create indexers
+ * ask indexers to crawl the mails
+ * for non-inotify case, ask to poll
+ */
+ private void StartWorker ()
+ {
+ Logger.Log.Info ("Starting Maildir backend");
+
+ Stopwatch stopwatch = new Stopwatch ();
+ stopwatch.Start ();
+
+ // check if there is at all anything to crawl
+ if ( local_path == null ) {
+ GLib.Timeout.Add (60000, new GLib.TimeoutHandler (CheckForExistence));
+ Logger.Log.Debug ("Maildir directories (local mail) " + " not found, will repoll.");
+ return;
+ }
+
+ Logger.Log.Debug ("Starting mail crawl");
+ State = QueryableState.Crawling;
+ if (local_path != null) {
+ local_indexer = new MaildirIndexer (this, "local", local_path);
+ local_indexer.Crawl ();
+ }
+ State = QueryableState.Idle;
+ Logger.Log.Debug ("Mail crawl done");
+
+ if (! Inotify.Enabled) {
+ Scheduler.Task task = Scheduler.TaskFromHook (new Scheduler.TaskHook (CrawlHook));
+ task.Tag = "Crawling Maildir directories";
+ task.Source = this;
+ task.TriggerTime = DateTime.Now.AddSeconds (polling_interval_in_seconds);
+ ThisScheduler.Add (task);
+ }
+
+ stopwatch.Stop ();
+ Logger.Log.Info ("Maildir driver worker thread done in {0}", stopwatch);
+ }
+
+ /**
+ * use this method to determine if we have anything to crawl and index
+ */
+ private bool CheckForExistence ()
+ {
+ local_path = GuessLocalFolderPath ();
+ if (local_path == null)
+ return true;
+
+ StartWorker();
+ return false;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+
+ override public string GetSnippet (string[] query_terms, Hit hit)
+ {
+ Logger.Log.Debug ("Fetching snippet for " + hit.Uri.LocalPath);
+ // FIXME: Also handle mbox emails
+ if (! hit.Uri.IsFile)
+ return null;
+
+ // Dont get snippets from attachments, they arent even indexed currently
+ if (hit.ParentUri != null)
+ return null;
+
+ int mail_fd = Mono.Unix.Native.Syscall.open (hit.Uri.LocalPath, Mono.Unix.Native.OpenFlags.O_RDONLY);
+ if (mail_fd == -1)
+ return null;
+
+ InitializeGMime ();
+ GMime.StreamFs stream = new GMime.StreamFs (mail_fd);
+ GMime.Parser parser = new GMime.Parser (stream);
+ GMime.Message message = parser.ConstructMessage ();
+ stream.Dispose ();
+ parser.Dispose ();
+
+ bool html = false;
+ string body = message.GetBody (true, out html);
+ // FIXME: Also handle snippets from html message parts - involves invoking html filter
+ if (html) {
+ Logger.Log.Debug ("No text/plain message part in " + hit.Uri);
+ message.Dispose ();
+ return null;
+ }
+
+ StringReader reader = new StringReader (body);
+ string snippet = SnippetFu.GetSnippet (query_terms, reader);
+ message.Dispose ();
+
+ return snippet;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // FIXME: How to determine if an mbox hit is valid without scanning the whole file
+
+ public string Name {
+ get { return "Maildir"; }
+ }
+
+ /**
+ * path of local maildir - mine is in ~/.Mail
+ * This is distribution specific. Mandrake puts kmail mails in
+ * ~/.Mail whereas default kmail folder location is ~/Mail
+ * I guess each distribution can fix this path as they know what is
+ * the path.
+ * Till then, using a guesser to find out which of ~/.Mail and ~/Mail
+ * is valid.
+ * Guesses the kmail local folder path
+ * first try ~/.Mail, then try ~/Mail
+ */
+ private string GuessLocalFolderPath ()
+ {
+ string location1 = Path.Combine (PathFinder.HomeDir, "Mail");
+ string location2 = Path.Combine (PathFinder.HomeDir, ".Mail");
+
+ if (GuessLocalFolder (location1))
+ return location1;
+ else if (GuessLocalFolder (location2))
+ return location2;
+ else
+ return null;
+ }
+
+ /**
+ * to check if the path represents a maildir directory:
+ */
+ private bool GuessLocalFolder (string path)
+ {
+ if (! Directory.Exists (path))
+ return false;
+ return true;
+ }
+
+ }
+
+}
diff -urN beagle-0.2.10/beagled/Makefile.am beagle-0.2.10/beagled/Makefile.am
--- beagle-0.2.10/beagled/Makefile.am 2006-09-18 18:24:29.000000000 -0400
+++ beagle-0.2.10/beagled/Makefile.am 2006-09-28 12:53:38.000000000 -0400
@@ -291,6 +291,12 @@
$(kmailqueryable)/KMailIndexableGenerator.cs \
$(kmailqueryable)/KMailIndexer.cs
+maildirqueryable = $(srcdir)/MaildirQueryable
+MAILDIR_QUERYABLE_CSFILES = \
+ $(maildirqueryable)/MaildirQueryable.cs \
+ $(maildirqueryable)/MaildirIndexableGenerator.cs \
+ $(maildirqueryable)/MaildirIndexer.cs
+
blamqueryable = $(srcdir)/BlamQueryable
BLAM_QUERYABLE_CSFILES = \
$(blamqueryable)/BlamQueryable.cs
@@ -319,6 +325,7 @@
DAEMON_DLL_CSFILES = \
$(LUCENE_CSFILES) \
$(KMAIL_QUERYABLE_CSFILES) \
+ $(MAILDIR_QUERYABLE_CSFILES) \
$(FILE_SYSTEM_QUERYABLE_CSFILES) \
$(GAIM_LOG_QUERYABLE_CSFILES) \
$(INDEXING_SERVICE_QUERYABLE_CSFILES) \
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]