Current state of mail feed code



Attached is a full diff of my workspace against HEAD (walters wanted to
work on it tonight)

It's probably committable (apart from a few minor local edits), but
since the python client appears to be in a non-working state I didn't
want to commit in case it destabilised the server further.

Dave
Index: .classpath
===================================================================
RCS file: /cvs/gnome/yarrr/.classpath,v
retrieving revision 1.26
diff -u -r1.26 .classpath
--- .classpath	3 Mar 2005 22:14:17 -0000	1.26
+++ .classpath	10 Mar 2005 23:55:54 -0000
@@ -16,11 +16,11 @@
 	<classpathentry kind="lib" path="/usr/share/java/commons-logging-1.0.4.jar"/>
 	<classpathentry kind="lib" path="/usr/share/java/log4j.jar"/>
 	<classpathentry kind="lib" path="/usr/share/java/commons-collections.jar"/>
-	<classpathentry kind="lib" path="/usr/share/java/javamail/mailapi.jar"/>
+	<classpathentry sourcepath="/opt/Downloads/mail-1.0/source" kind="lib" path="/usr/share/java/javamail/mailapi.jar"/>
 	<classpathentry kind="lib" path="/usr/share/java/jaf.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/JTA"/>
 	<classpathentry sourcepath="/home/seth/Desktop/rhino1_6R1/src" kind="lib" path="lib/js.jar"/>
-	<classpathentry kind="lib" path="/usr/share/java/javamail/smtp.jar"/>
+	<classpathentry sourcepath="/opt/Downloads/mail-1.0/source" kind="lib" path="/usr/share/java/javamail/smtp.jar"/>
 	<classpathentry kind="lib" path="lib/hibernate3.jar"/>
 	<classpathentry kind="lib" path="lib/ehcache-1.1.jar"/>
 	<classpathentry kind="lib" path="/usr/share/java/antlr.jar"/>
Index: launchers/Yarrr.launch
===================================================================
RCS file: /cvs/gnome/yarrr/launchers/Yarrr.launch,v
retrieving revision 1.3
diff -u -r1.3 Yarrr.launch
--- launchers/Yarrr.launch	12 Feb 2005 11:03:40 -0000	1.3
+++ launchers/Yarrr.launch	10 Mar 2005 23:55:54 -0000
@@ -6,6 +6,7 @@
 <listEntry value="org.eclipse.debug.ui.launchGroup.run"/>
 </listAttribute>
 <booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<booleanAttribute key="org.eclipse.jdt.launching.STOP_IN_MAIN" value="true"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.gnome.yarrr.Yarrr"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="yarrr"/>
 </launchConfiguration>
Index: src/org/gnome/yarrr/Topic.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/Topic.java,v
retrieving revision 1.28
diff -u -r1.28 Topic.java
--- src/org/gnome/yarrr/Topic.java	10 Mar 2005 16:11:32 -0000	1.28
+++ src/org/gnome/yarrr/Topic.java	10 Mar 2005 23:55:54 -0000
@@ -198,6 +198,8 @@
         .setLong("clientVersion", clientVersion)
         .setLong("currentVersion", this.version)
         .list();
+        
+        //seems to NOT properly set up an EmailMessage if if returns one of these as the child.message
 	}
     
 	public List getNewLinksSince(long clientVersion) {
Index: src/org/gnome/yarrr/database/PostgresqlManager.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/database/PostgresqlManager.java,v
retrieving revision 1.4
diff -u -r1.4 PostgresqlManager.java
--- src/org/gnome/yarrr/database/PostgresqlManager.java	4 Mar 2005 19:59:13 -0000	1.4
+++ src/org/gnome/yarrr/database/PostgresqlManager.java	10 Mar 2005 23:55:54 -0000
@@ -25,6 +25,7 @@
 import java.util.Random;
 
 import org.apache.log4j.Logger;
+import org.gnome.yarrr.utils.OutputInputStream;
 
 /**
  * Manages an instance of the PostgreSQL server pointing at a custom
@@ -227,35 +228,6 @@
 
 	}
 	
-	class OutputInputStream extends java.lang.Thread {
-		private InputStream in;
-		private OutputStream out;
-		
-		OutputInputStream(String name, InputStream in, OutputStream out) {
-			super(name);
-			this.in = in;
-			this.out = out;
-		}
-
-		public void run() {
-			byte buf[] = new byte[128];
-			int len;
-			try {
-				boolean doneReading = false;
-				while (!doneReading) {
-					len = this.in.read(buf);
-					if (len < 0) {
-						doneReading = true;
-					} else {
-						this.out.write(buf, 0, len);
-					}
-				}
-			} catch (IOException e) {
-				
-			}
-		}
-	}
-
 	/**
 	 * @return a singleton
 	 */
@@ -273,7 +245,7 @@
 		return new PrintStream(fos);
 	}
 	
-	private int execOutput(String cmd) throws IOException {
+	int execOutput(String cmd) throws IOException {
 		logger.debug("Running '" + cmd + "'");
 		
 		// TODO This is what we want to do, but classpath doesn't have getenv();
Index: src/org/gnome/yarrr/email/Importer.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/email/Importer.java,v
retrieving revision 1.3
diff -u -r1.3 Importer.java
--- src/org/gnome/yarrr/email/Importer.java	9 Mar 2005 15:21:40 -0000	1.3
+++ src/org/gnome/yarrr/email/Importer.java	10 Mar 2005 23:55:54 -0000
@@ -16,6 +16,7 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import javax.mail.Address;
 import javax.mail.MessagingException;
 import javax.mail.internet.InternetAddress;
 import javax.mail.internet.MimeMessage;
@@ -53,15 +54,23 @@
         }
     }
     
-    public Person getAuthorForInjectedMail(MimeMessage mimeMessage) throws NoSuchAlgorithmException, NoSuchProviderException{
-        // TODO: Message instances require an author public key
-        // Exactly how should this be implemented?  Dunno, so for now 
-        // just generate a new keypair for each message we inject (!)
-        // Ultimately we might want to be able to identify people in some smart fashion...
-        // signed emails?  or heuristics?
-        // Depends on how I can best wind up Seth (or Havoc)
-        KeyPair pair = Cryptography.generateKeyPair();
-        return Person.getPerson(pair.getPublic());
+    static String getFrom(MimeMessage mimeMessage) throws MessagingException {
+        Address[] addresses = mimeMessage.getFrom();        
+        InternetAddress iaddr = (InternetAddress)addresses[0];
+        return iaddr.getAddress();        
+    }
+    
+    public Person getAuthorForInjectedMail(MimeMessage mimeMessage) throws NoSuchAlgorithmException, NoSuchProviderException, MessagingException, HibernateException{
+        String senderAddress = getFrom(mimeMessage);
+        
+        EmailAddress emailAddress = EmailAddress.lookup(senderAddress);
+        if (emailAddress==null) {
+            // create new instance of email
+            KeyPair pair = Cryptography.generateKeyPair();
+            emailAddress = new EmailAddress(senderAddress, pair);
+        }
+        
+        return emailAddress.getPerson();
     }
     
     public Topic getTopicForInjectedMail(MimeMessage mimeMessage, EmailMessage parentEmailMessage) throws HibernateException, MessagingException {
Index: src/org/gnome/yarrr/email/MailingListSniffer.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/email/MailingListSniffer.java,v
retrieving revision 1.1
diff -u -r1.1 MailingListSniffer.java
--- src/org/gnome/yarrr/email/MailingListSniffer.java	9 Feb 2005 23:22:25 -0000	1.1
+++ src/org/gnome/yarrr/email/MailingListSniffer.java	10 Mar 2005 23:55:54 -0000
@@ -27,7 +27,9 @@
 
         public String matchMessage(MimeMessage mimeMessage) throws MessagingException {
             String[] headerValues = mimeMessage.getHeader(headerName);
-            
+            if (headerValues==null) {
+                return null;
+            }
             for (int i=0;i<headerValues.length;i++) {
                 Matcher matcher = pattern.matcher(headerValues[i]);
                 if (matcher.find()) {
Index: src/org/gnome/yarrr/hibernate/HibernateUtil.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/hibernate/HibernateUtil.java,v
retrieving revision 1.14
diff -u -r1.14 HibernateUtil.java
--- src/org/gnome/yarrr/hibernate/HibernateUtil.java	9 Mar 2005 15:21:39 -0000	1.14
+++ src/org/gnome/yarrr/hibernate/HibernateUtil.java	10 Mar 2005 23:55:54 -0000
@@ -7,6 +7,7 @@
 import org.gnome.yarrr.Topic;
 import org.gnome.yarrr.TopicPerUser;
 import org.gnome.yarrr.email.Email;
+import org.gnome.yarrr.email.EmailAddress;
 import org.gnome.yarrr.email.MailingList;
 import org.gnome.yarrr.person.Group;
 import org.gnome.yarrr.person.Person;
@@ -44,6 +45,7 @@
 		.addClass(Topic.class)
 		.addClass(TopicPerUser.class)
 		.addClass(Email.class)
+        .addClass(EmailAddress.class)
 		.addClass(MailingList.class)
 		.setProperty(Environment.HBM2DDL_AUTO, "update")
 		;
Index: src/org/gnome/yarrr/tests/EmailTests.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/tests/EmailTests.java,v
retrieving revision 1.3
diff -u -r1.3 EmailTests.java
--- src/org/gnome/yarrr/tests/EmailTests.java	10 Feb 2005 03:37:55 -0000	1.3
+++ src/org/gnome/yarrr/tests/EmailTests.java	10 Mar 2005 23:55:54 -0000
@@ -4,7 +4,6 @@
  */
 package org.gnome.yarrr.tests;
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Properties;
@@ -13,27 +12,17 @@
 import javax.mail.Session;
 import javax.mail.internet.MimeMessage;
 
+import org.gnome.yarrr.utils.StreamUtils;
+
 import junit.framework.TestCase;
 
 /**
  * @author dmalcolm
  */
 public class EmailTests extends TestCase {
-    static byte [] loadAllOfStreamJavaDamnYou(InputStream input) throws IOException {
-        ByteArrayOutputStream out = new ByteArrayOutputStream();
-        byte [] buf = new byte[8192];
-        int len;
-        
-        while ((len = input.read(buf)) != -1) {
-            out.write(buf);
-        }
-        return out.toByteArray();
-    }
-    
-    
     static byte [] getTestEmail(String filename) throws IOException {
         InputStream inputStream = EmailTests.class.getResourceAsStream("test-emails/"+filename);
-        return loadAllOfStreamJavaDamnYou(inputStream);
+        return StreamUtils.loadAsByteArray(inputStream);
     }
     
     static public MimeMessage loadTestEmail(String filename) throws MessagingException, IOException {
@@ -52,5 +41,6 @@
         MimeMessage message = loadTestEmail("multiline-subject.email");        
         // FIXME: should it strip out the \r\n\t from the email Subject?  is this a bug in the javamail implementation?
         assertEquals("British English (was Re: GNOME Lovers Needed: l10n work for\r\n\tlocations database)", message.getSubject());        
-    }
+    }    
 }
+
Index: src/org/gnome/yarrr/tests/TestInitializerTests.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/tests/TestInitializerTests.java,v
retrieving revision 1.7
diff -u -r1.7 TestInitializerTests.java
--- src/org/gnome/yarrr/tests/TestInitializerTests.java	9 Mar 2005 15:21:38 -0000	1.7
+++ src/org/gnome/yarrr/tests/TestInitializerTests.java	10 Mar 2005 23:55:54 -0000
@@ -23,7 +23,7 @@
         init.doCreation();
         
         List topics = Topic.getAllTopics();        
-        assertEquals(10,topics.size());
+        assertEquals(6,topics.size());
         
         for (Iterator i=topics.iterator(); i.hasNext(); ) {
             Topic topic = (Topic)i.next();
Index: src/org/gnome/yarrr/xmlrpc/DefaultMethods.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/xmlrpc/DefaultMethods.java,v
retrieving revision 1.21
diff -u -r1.21 DefaultMethods.java
--- src/org/gnome/yarrr/xmlrpc/DefaultMethods.java	10 Mar 2005 16:16:29 -0000	1.21
+++ src/org/gnome/yarrr/xmlrpc/DefaultMethods.java	10 Mar 2005 23:55:54 -0000
@@ -175,22 +175,28 @@
     	logger.info("injectEmail(" + contents.length + " bytes)");
 
     	Importer importer = new Importer();
-        InjectionResult injectionResult = importer.injectEmail(contents);
-        String stringResult = "avast! added email:\""+injectionResult.email.getMimeMessage().getSubject()+"\"\n";
+        try {
+            InjectionResult injectionResult = importer.injectEmail(contents);
         
-        for (Iterator i = injectionResult.messageInsertions.iterator(); i.hasNext();) {
-            Importer.EmailMessageInsertion insertion = (Importer.EmailMessageInsertion)i.next();
-        
-            stringResult += "added message:\""+insertion.emailMessage.getSubject()+"\"";
-            if (insertion.topic != null) {
-                stringResult += " to topic \"" + insertion.topic.getName() + "\" ";;
-            } else {
-                stringResult += " to database (not added to any topics)";
-            }        
-        }
-        logger.info(stringResult);
+            String stringResult = "avast! added email:\""+injectionResult.email.getMimeMessage().getSubject()+"\"\n";
+            
+            for (Iterator i = injectionResult.messageInsertions.iterator(); i.hasNext();) {
+                Importer.EmailMessageInsertion insertion = (Importer.EmailMessageInsertion)i.next();
+                
+                stringResult += "added message:\""+insertion.emailMessage.getSubject()+"\"";
+                if (insertion.topic!=null) {
+                    stringResult += " to topic \"" + insertion.topic.getName() + "\" ";            
+                } else {
+                    stringResult += " to database (not added to any topics)";
+                }        
+            }
+            logger.info(stringResult);
 
-        return stringResult;
+            return stringResult;
+        } catch (Exception e) {
+            logger.error(new String(contents), e);
+            throw e;
+        }
     }
 	
 	public Vector updateTopic(String topicId, byte[] userPublicKeyBytes, String versionAtClientString, String perUserVersionAtClientString) throws Exception {
Index: src/org/gnome/yarrr/email/EmailAddress.hbm.xml
===================================================================
RCS file: src/org/gnome/yarrr/email/EmailAddress.hbm.xml
diff -N src/org/gnome/yarrr/email/EmailAddress.hbm.xml
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/EmailAddress.hbm.xml	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!DOCTYPE hibernate-mapping PUBLIC 
+	"-//Hibernate/Hibernate Mapping DTD 2.0//EN"
+	"http://hibernate.sourceforge.net/hibernate-mapping-2.0.dtd";>
+<hibernate-mapping 
+	package="org.gnome.yarrr.email">
+
+	<class name="EmailAddress">
+		<id name="id">
+			<generator class="native"/>
+		</id>
+		<property name="address"/>
+		<property name="keypair"/>
+	</class>
+	
+</hibernate-mapping>
Index: src/org/gnome/yarrr/email/EmailAddress.java
===================================================================
RCS file: src/org/gnome/yarrr/email/EmailAddress.java
diff -N src/org/gnome/yarrr/email/EmailAddress.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/EmailAddress.java	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,69 @@
+/*
+ * Created on 07-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.email;
+
+import java.security.KeyPair;
+
+import org.gnome.yarrr.hibernate.HibernateUtil;
+import org.gnome.yarrr.hibernate.Persistent;
+import org.gnome.yarrr.person.Person;
+import org.hibernate.HibernateException;
+import org.hibernate.criterion.Expression;
+
+/**
+ * @author dmalcolm
+ *
+ * Class representing an email address, when handling email feeds.
+ * We autogenerate a keypair/person to cover each email address we encounter, since all we can say about these messages
+ * is that they appear to come from a particular email address (and relatively few people sign their messages).
+ */
+public class EmailAddress extends Persistent {
+    String address;
+    KeyPair keypair;
+    
+    protected EmailAddress() {
+    }
+    
+    public EmailAddress(String address, KeyPair keypair) throws HibernateException {
+        this.address = address;
+        this.keypair = keypair;
+        HibernateUtil.getSession().save(this);
+    }
+    
+    public String getAddress() {
+        return address;
+    }
+    public void setAddress(String address) {
+        this.address = address;
+    }
+    public KeyPair getKeypair() {
+        return keypair;
+    }
+    public void setKeypair(KeyPair keypair) {
+        this.keypair = keypair;
+    }
+
+    /**
+     * @param senderAddress
+     * @return
+     * @throws HibernateException 
+     */
+    public static EmailAddress lookup(String stringAddress) throws HibernateException {
+        EmailAddress result = (EmailAddress)
+            HibernateUtil.getSession().createCriteria(EmailAddress.class)
+                .add( Expression.eq("address", stringAddress) )
+                .uniqueResult();
+        return result;
+    }
+
+    /**
+     * @return
+     */
+    public Person getPerson() {
+        return Person.getPerson(keypair.getPublic());
+    }
+}
Index: src/org/gnome/yarrr/email/MailmanArchiveScraper.java
===================================================================
RCS file: src/org/gnome/yarrr/email/MailmanArchiveScraper.java
diff -N src/org/gnome/yarrr/email/MailmanArchiveScraper.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/MailmanArchiveScraper.java	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,238 @@
+/*
+ * Created on 02-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.email;
+
+import gnu.mail.providers.mbox.MboxStore;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.mail.Folder;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.URLName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.xmlrpc.XmlRpcClient;
+import org.apache.xmlrpc.XmlRpcException;
+import org.gnome.yarrr.utils.OutputInputStream;
+import org.gnome.yarrr.utils.StreamUtils;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class MailmanArchiveScraper {
+
+        /**
+     * @author dmalcolm
+     *
+     * TODO To change the template for this generated type comment go to
+     * Window - Preferences - Java - Code Style - Code Templates
+     */
+    public class FormailError extends Exception {
+        /**
+         * Comment for <code>serialVersionUID</code>
+         */
+        private static final long serialVersionUID = 1L;
+
+        /**
+         * @param formailResult
+         */
+        public FormailError(int formailResult) {
+            
+            // TODO Auto-generated constructor stub
+        }
+    }
+// alas this doesn't work: 
+    // <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+    // [Fatal Error] :1:49: White spaces are required between publicId and systemId.
+    // org.xml.sax.SAXParseException: White spaces are required between publicId and systemId.
+    Document readArchivePage(URL archiveUrl) throws ParserConfigurationException, IOException, IOException, SAXException {
+        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+        DocumentBuilder builder = factory.newDocumentBuilder();
+        Document document = builder.parse(archiveUrl.openStream());
+        return document;
+        
+    }
+    
+    // alas, the HTML is old-school HTML and not XHTML, so we have to do this in a hackish way
+    public Vector getAllgzippedTextURLs(URL archivePageUrl) throws IOException {
+        Vector result = new Vector();
+        BufferedReader reader = new BufferedReader(new InputStreamReader(archivePageUrl.openStream()));
+        
+        // Regex pattern: <td><A href="(.*\.txt\.gz)">
+        // Escape into a string literal:
+        final String REGEX = "<td><A href=\"(.*\\.txt\\.gz)\">";
+        Pattern pattern = Pattern.compile(REGEX);
+        
+        while (reader.ready()) {
+            String line = reader.readLine();
+            
+            Matcher matcher = pattern.matcher(line);
+            if (matcher.find()) {
+                String gzippedTextName = matcher.group(1); 
+                URL gzippedTextURL = new URL(archivePageUrl, gzippedTextName);
+                result.add(gzippedTextURL);
+            }
+        }
+        return result;
+
+    }
+    
+    public MailmanArchiveScraper(URL archivePageUrl) throws Exception {
+        Vector gzippedTextURLs = getAllgzippedTextURLs(archivePageUrl);
+        
+        for (Iterator i=gzippedTextURLs.iterator(); i.hasNext(); ) {
+            URL gzippedTextURL = (URL)i.next();
+            importGZippedText(gzippedTextURL);
+        }
+    }
+    
+    private byte[] runCommandWithCapture(String command, byte[] stdin) throws Exception {
+        Process process = Runtime.getRuntime().exec(command);
+        OutputStream stdinStream = process.getOutputStream();        
+        InputStream stdoutStream = process.getInputStream();
+        InputStream stderrStream = process.getErrorStream();
+        ByteArrayOutputStream bufOutput = new ByteArrayOutputStream();
+        
+        // having these lines seems to cause problems:
+        new OutputInputStream("stdout", stdoutStream, bufOutput).start();
+        new OutputInputStream("stderr", stderrStream, System.err).start();
+        
+        stdinStream.write(stdin);
+        stdinStream.close();
+        int result = process.waitFor();
+        if (result!=0) {
+            throw new Exception();
+        }
+        
+        
+        return bufOutput.toByteArray();
+    }
+    
+    // Run a command, feeding the byte array to stdin:
+    private int runCommand(String command, byte[] stdin) throws IOException, InterruptedException {
+        Process process = Runtime.getRuntime().exec(command);
+        OutputStream stdinStream = process.getOutputStream();        
+        InputStream stdoutStream = process.getInputStream();
+        InputStream stderrStream = process.getErrorStream();
+        
+        // having these lines seems to cause problems:
+        new OutputInputStream("formail stdout", stdoutStream, System.out).start();
+        new OutputInputStream("formail stderr", stderrStream, System.err).start();
+        
+        stdinStream.write(stdin);
+        stdinStream.close();
+        int formailResult = process.waitFor();
+        return formailResult;
+    }
+    
+    private void importMBoxArchiveUsingFormail(byte[] decompressedData) throws Exception {
+        // Run formail on the stream, and pipe back to pressgang.py...\
+        String injectionURL = "http://localhost:19842";; 
+        String injectionCommand = "/opt/Extraction/eclipse/workspace/yarrr/tools/pressgang.py " + injectionURL;
+        //String injectionCommand = "date";
+        String formailCommand = "formail -s "+ injectionCommand; 
+        //String formailCommand = "sort";
+        //System.out.println(formailCommand);
+        int formailResult = runCommand(formailCommand, decompressedData);
+        if (formailResult!=0) {
+            //System.out.write(decompressedData);
+            throw new FormailError(formailResult);
+        }        
+    }
+
+    private void importMBoxArchiveUsingJavaStuff(byte[] decompressedData) throws Exception {
+        File tmpFile = File.createTempFile("yarrr-mail-import", ".mbox");
+        FileOutputStream foo = new FileOutputStream(tmpFile);
+        foo.write(decompressedData);
+        foo.close();
+        
+        Session session = Session.getInstance(System.getProperties());           
+        MboxStore tmpStore = new MboxStore(session, new URLName(tmpFile.toString()));
+        tmpStore.connect();
+        Folder folder = tmpStore.getDefaultFolder();
+        folder.open(Folder.READ_ONLY);
+        Message[] messages = folder.getMessages();
+        
+        
+        for (int i=0;i<messages.length;i++) {
+            importMessage(messages[i]);
+        }
+    }
+
+    public byte[] messageToByteArray(Message message) throws IOException, MessagingException {
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        message.writeTo(out);
+        return out.toByteArray();
+    }
+
+    /**
+     * @param message
+     * @throws MessagingException 
+     * @throws IOException 
+     * @throws XmlRpcException 
+     */
+    private void importMessage(Message message) throws IOException, MessagingException, XmlRpcException {
+        System.out.println(message.getSubject());
+        byte[] bytes = messageToByteArray(message);
+
+        XmlRpcClient xmlrpc = new XmlRpcClient ("http://localhost:19842";);        
+
+        Vector params = new Vector();
+        params.add(bytes);        
+        xmlrpc.execute("injectEmail", params);
+    }
+
+    /**
+     * @param gzippedTextURL
+     * @throws Exception 
+     */
+    private void importGZippedText(URL gzippedTextURL) throws Exception {
+        System.out.println("Importing " + gzippedTextURL.toExternalForm());
+        byte[] gzipData = StreamUtils.loadAsByteArray(gzippedTextURL.openStream());
+        
+        //InputStream in = new GZIPInputStream(gzippedTextURL.openStream());
+        
+        // Maybe just pipe to stdout, so we can pipe results into formail...        
+        //byte[] gzipData = StreamUtils.loadAsByteArray(in);
+        System.out.println("...downloaded " + gzipData.length + " bytes (compressed).");
+        byte[] decompressedData = runCommandWithCapture("gunzip", gzipData);
+        System.out.println("...decompressed to " + decompressedData.length + " bytes.");
+        
+        importMBoxArchiveUsingJavaStuff(decompressedData);
+    }
+
+    public static void main(String[] args) {
+        try {
+            new MailmanArchiveScraper(new URL("http://mail.gnome.org/archives/desktop-devel-list/";));
+            
+        } catch (Exception e) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+    }
+}
Index: src/org/gnome/yarrr/tests/StreamUtilTests.java
===================================================================
RCS file: src/org/gnome/yarrr/tests/StreamUtilTests.java
diff -N src/org/gnome/yarrr/tests/StreamUtilTests.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/tests/StreamUtilTests.java	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,55 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.tests;
+
+import java.io.InputStream;
+
+import junit.framework.TestCase;
+
+import org.gnome.yarrr.utils.StreamUtils;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class StreamUtilTests extends TestCase {
+    private static final String TXT_GZ_URL = "http://mail.gnome.org/archives/desktop-devel-list/2003-August.txt.gz";;
+    
+    public void testLoadLocalFile() throws Exception {
+        InputStream inputStream = EmailTests.class.getResourceAsStream("test-emails/multiline-subject.email");
+        byte[] byteArray = StreamUtils.loadAsByteArray(inputStream);
+        assertEquals(5119, byteArray.length);
+    }
+    
+//    public void testLoadRemoteFile() throws Exception {
+//        URL url = new URL(TXT_GZ_URL);   
+//        InputStream in = url.openStream();
+//        
+//        byte[] byteArray = StreamUtils.loadAsByteArray(in);
+//        assertEquals(996100, byteArray.length);        
+//    }
+//
+//    public void testLoadRemoteInflateFile() throws Exception {
+//        URL url = new URL(TXT_GZ_URL);
+//        
+//        // appears to be DEFLATE data format:
+//        InputStream in = new InflaterInputStream(url.openStream());        
+//        byte[] byteArray = StreamUtils.loadAsByteArray(in);        
+//        assertEquals(3087158, byteArray.length);       
+//        
+//    }
+    
+//    public void testLoadRemoteGzipFile() throws Exception {
+//        URL url = new URL(TXT_GZ_URL);
+//        
+//        InputStream in = url.openStream();        
+//        byte[] byteArray = StreamUtils.loadAsGZippedByteArray(in);
+//        assertEquals(3087158, byteArray.length);        
+//    }
+}
Index: src/org/gnome/yarrr/utils/OutputInputStream.java
===================================================================
RCS file: src/org/gnome/yarrr/utils/OutputInputStream.java
diff -N src/org/gnome/yarrr/utils/OutputInputStream.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/utils/OutputInputStream.java	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,41 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+
+public class OutputInputStream extends java.lang.Thread {
+	InputStream in;
+	OutputStream out;
+	
+	public OutputInputStream(String name, InputStream in, OutputStream out) {
+		super(name);
+		this.in = in;
+		this.out = out;
+	}
+
+	public void run() {
+		byte buf[] = new byte[128];
+		int len;
+		try {
+			boolean doneReading = false;
+			while (!doneReading) {
+				len = this.in.read(buf);
+				if (len < 0) {
+					doneReading = true;
+				} else {
+					this.out.write(buf, 0, len);
+				}
+			}
+		} catch (IOException e) {
+			
+		}
+	}
+}
Index: src/org/gnome/yarrr/utils/StreamUtils.java
===================================================================
RCS file: src/org/gnome/yarrr/utils/StreamUtils.java
diff -N src/org/gnome/yarrr/utils/StreamUtils.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/utils/StreamUtils.java	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,48 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class StreamUtils {
+    static public byte [] loadAsByteArray(InputStream input) throws IOException {
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        byte [] buf = new byte[8192];
+        int len;
+        int offset=0;
+        
+        while ((len = input.read(buf)) != -1) {
+            out.write(buf, 0, len);
+            //offset+=len;
+        }
+        return out.toByteArray();
+    }
+    
+    // For some reason loadAsByteArray(new GZIPInputStream(input)) doesn't work when downloading from a URL
+    // grrr.. only wants to read 2401 bytes...
+    // Do it in two stages instead.
+    static public byte [] loadAsGZippedByteArray(InputStream input) throws IOException {
+        byte[] compressedByteArray = loadAsByteArray(input);
+        
+        InputStream gzipStream = new GZIPInputStream(new ByteArrayInputStream(compressedByteArray));
+        
+        byte[] decompressedByteArray = loadAsByteArray(gzipStream);
+        
+        return decompressedByteArray;
+    }
+    
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]