[mhonarc] implement process_fd, make handle_message work better



commit d35c06c8e32db7539027f2aaa76a6c4707d1b786
Author: Olav Vitters <olav vitters nl>
Date:   Tue Feb 12 20:57:50 2013 +0100

    implement process_fd, make handle_message work better

 archive.py |   97 +++++++++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 73 insertions(+), 24 deletions(-)
---
diff --git a/archive.py b/archive.py
index 0acde8f..0e580eb 100755
--- a/archive.py
+++ b/archive.py
@@ -10,6 +10,10 @@ import dateutil.parser
 import re
 import datetime
 import email.utils
+try:
+        from cStringIO import StringIO
+except ImportError:
+        from StringIO import StringIO
 
 
 class Archiver:
@@ -35,14 +39,17 @@ class Archiver:
         'december': 12
     }
 
-    def __init__(self, listname, private=False, debug=False):
+    def __init__(self, listname, private=False, debug=False,
+                       start_time = None, end_time = None):
 
         self.listname = listname
         self.private = private
         self.debug = debug
+        self.start_time = None
+        self.end_time = None
 
         self.olddir = None
-        self.tmpname = 'XXX'
+        self.tmpfile = None
 
         self.start_time = None
         self.end_time = None
@@ -62,7 +69,7 @@ class Archiver:
             cmd = ['mhonarc', '-umask', '022', '-rcfile', rcfile, '-add', '-output', path, tmpname,
                               '-definevar', 'ARCHDATE=%s LISTNAME=%s' % (self.olddir, self.listname)]
 
-            subprocess.call(cmd, stdout=error_fd, stderr=subprocess.STDOUT)
+            #subprocess.call(cmd, stdout=error_fd, stderr=subprocess.STDOUT)
 
         # Append message(s) to mbox (possibly gzipped)
         if os.path.exists("%s.txt" % path):
@@ -84,11 +91,14 @@ class Archiver:
             pass
 
 
-    def handle_message(self, msg):
-        received_texts = msg.get_all('Received')
+    def handle_message(self, fd):
+        msg = email.message_from_file(fd)
+        received_texts = msg.get_all('received')
 
         received_time = None
 
+        print received_texts
+
         # Determine received time
         for text in received_texts:
             if ';' not in text: continue
@@ -102,31 +112,71 @@ class Archiver:
 
             if time is not None:
                 received_time = datetime.datetime.fromtimestamp(time)
+                received_time_text = text
                 break
 
-        if received_time is not None:
-            if self.start_time and received_time < self.start_time:
-                return False
+        print received_time
+
+        if received_time is None:
+            # XXX - do some debug stuff
+            return False
 
-            if self.end_time and received_time > self.end_time:
-                return False
+        if self.start_time and received_time < self.start_time:
+            return False
 
-            # Archive emails per month
-            path = received_time.strftime("%Y-%B")
+        if self.end_time and received_time > self.end_time:
+            return False
 
-            # mhonarc can archive multiple emails at once, so only run mhonarc
-            # once output path changes
-            if self.olddir is not None and self.olddir != path:
-                # XXX wtf
-                self.output(False)
+        # Archive emails per month
+        path = received_time.strftime("%Y-%B")
 
-            self.olddir = path
-            self.last_time = received_time
-            self.last_parseable_time = received_time_text
+        # mhonarc can archive multiple emails at once, so only run mhonarc
+        # once output path changes
+        if self.olddir is not None and self.olddir != path:
+            # XXX wtf
+            self.output(False)
+
+        self.olddir = path
+        self.last_time = received_time
+        self.last_parseable_time = received_time_text
 
     def process_fd(self, fd):
-        # XXX - implement
-        pass
+        """Process a filedescriptor for multiple emails seperated using the mbox format
+        
+        Calls handle_message for each individual message"""
+        _fromlinepattern = (r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+"
+                            r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*"
+                            r"[^\s]*\s*"
+                            "$")
+        re_from = re.compile(_fromlinepattern)
+
+        blank = True
+        msg = StringIO()
+        while 1:
+            line = fd.readline()
+            if line == "":
+                break
+
+            if line == "\n":
+                msg.write(line)
+                blank = True
+                continue
+
+            if blank and re_from.match(line):
+                sys.stdout.write(line)
+
+                if msg.tell():
+                    msg.seek(0)
+                    self.handle_message(msg)
+                    msg.seek(0)
+                    msg.truncate(0)
+
+            msg.write(line)
+
+        if msg.tell():
+            self.handle_message(msg)
+
+        self.output(True)
 
 
     @classmethod
@@ -177,7 +227,7 @@ def main():
     parser.add_argument('--private', action='store_true')
     parser.add_argument('--start-time', type=mkdate)
     parser.add_argument('--end-time', type=mkdate)
-    parser.add_argument('--listname')
+    parser.add_argument('--listname', required=True)
 
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--makeindex', action='store_true')
@@ -193,7 +243,6 @@ def main():
 
     import pprint
     pprint.pprint(options)
-    sys.exit(2)
 
     if options.makeindex:
         Archiver.make_index(options.listname, options.private)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]