[mhonarc] archive.py: Parse Date header if Received is not available



commit e00e5946eedb999a068cc46bbf64fc31a2bbee9c
Author: Bartłomiej Piotrowski <bpiotrowski gnome org>
Date:   Tue Jul 16 16:06:17 2019 +0200

    archive.py: Parse Date header if Received is not available

 archive.py | 63 ++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 22 deletions(-)
---
diff --git a/archive.py b/archive.py
index 2a8f5b0..84d0224 100755
--- a/archive.py
+++ b/archive.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 
 import os
 import os.path
@@ -138,37 +138,56 @@ class Archiver:
             newmsgs.seek(0)
             newmsgs.truncate(0)
 
-    def determine_received_time(self, fd):
+    def determine_time(self, fd):
         """Determines the received time of a message"""
         fd.seek(0)
         msg = email.message_from_file(fd)
+
         received_texts = msg.get_all('received')
-        if received_texts is None:
-            if self.debug:
-                print sys.stderr, "No received header found!"
-            return None
+        if received_texts is not None:
+            # Determine received time
+            for text in received_texts:
+                received_time = None
+                if ';' not in text:
+                    continue
+
+                received_time = email.utils.parsedate_tz(text.rpartition(';')[2])
+                if received_time is None:
+                    try:
+                        received_time = dateutil.parser.parse(text.split(':')[2])
+                    except ValueError:
+                        continue
+                try:
+                    received_time = email.utils.mktime_tz(received_time)
+                except ValueError:
+                    received_time = None
+
+                if received_time is not None:
+                    received_time = datetime.datetime.utcfromtimestamp(received_time)
+                    break
 
-        received_time = None
+        else:
+            date = msg.get('date')
+            received_time = email.utils.parsedate_tz(date)
 
-        # Determine received time
-        for text in received_texts:
-            if ';' not in text: continue
-            received_time = email.utils.parsedate_tz(text.rpartition(';')[2])
             if received_time is None:
-                continue
-            try:
-                received_time = email.utils.mktime_tz(received_time)
-            except ValueError:
-                received_time = None
+                try:
+                    received_time = dateutil.parser.parse(received_time_text)
+                except ValueError:
+                    received_time = None
+
+                try:
+                    received_time = email.utils.mktime_tz(received_time)
+                except ValueError:
+                    received_time = None
 
             if received_time is not None:
-                received_time = datetime.datetime.utcfromtimestamp(received_time)
-                break
+                received_time = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(received_time))
 
         if received_time is None:
             if self.debug:
-                 print sys.stderr, "Failed to parse time from the received headers!"
-            return None
+                print sys.stderr, "Failed to parse time from the received headers!"
+                return None
 
         if self.start_time and received_time < self.start_time:
             return False
@@ -179,11 +198,12 @@ class Archiver:
         # Archive emails per month
         return received_time.strftime("%Y-%B")
 
+
     def handle_message(self, fd, newmsgs, archivepath):
         """Process one message
 
         Called from process_fd"""
-        path = self.determine_received_time(fd)
+        path = self.determine_time(fd)
 
         if path == False:
             # Message should be ignored
@@ -588,4 +608,3 @@ if __name__ == "__main__":
         if e.errno != errno.EPIPE:
             raise
         sys.exit(0)
-


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]