Current state of mail feed code
- From: David Malcolm <dmalcolm redhat com>
- To: ☠ <yarrr-list gnome org>
- Subject: Current state of mail feed code
- Date: Thu, 10 Mar 2005 18:58:35 -0500
Attached is a full diff of my workspace against HEAD (walters wanted to
work on it tonight)
It's probably committable (apart from a few minor local edits), but
since the python client appears to be in a non-working state I didn't
want to commit in case it destabilised the server further.
Dave
Index: .classpath
===================================================================
RCS file: /cvs/gnome/yarrr/.classpath,v
retrieving revision 1.26
diff -u -r1.26 .classpath
--- .classpath 3 Mar 2005 22:14:17 -0000 1.26
+++ .classpath 10 Mar 2005 23:55:54 -0000
@@ -16,11 +16,11 @@
<classpathentry kind="lib" path="/usr/share/java/commons-logging-1.0.4.jar"/>
<classpathentry kind="lib" path="/usr/share/java/log4j.jar"/>
<classpathentry kind="lib" path="/usr/share/java/commons-collections.jar"/>
- <classpathentry kind="lib" path="/usr/share/java/javamail/mailapi.jar"/>
+ <classpathentry sourcepath="/opt/Downloads/mail-1.0/source" kind="lib" path="/usr/share/java/javamail/mailapi.jar"/>
<classpathentry kind="lib" path="/usr/share/java/jaf.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/JTA"/>
<classpathentry sourcepath="/home/seth/Desktop/rhino1_6R1/src" kind="lib" path="lib/js.jar"/>
- <classpathentry kind="lib" path="/usr/share/java/javamail/smtp.jar"/>
+ <classpathentry sourcepath="/opt/Downloads/mail-1.0/source" kind="lib" path="/usr/share/java/javamail/smtp.jar"/>
<classpathentry kind="lib" path="lib/hibernate3.jar"/>
<classpathentry kind="lib" path="lib/ehcache-1.1.jar"/>
<classpathentry kind="lib" path="/usr/share/java/antlr.jar"/>
Index: launchers/Yarrr.launch
===================================================================
RCS file: /cvs/gnome/yarrr/launchers/Yarrr.launch,v
retrieving revision 1.3
diff -u -r1.3 Yarrr.launch
--- launchers/Yarrr.launch 12 Feb 2005 11:03:40 -0000 1.3
+++ launchers/Yarrr.launch 10 Mar 2005 23:55:54 -0000
@@ -6,6 +6,7 @@
<listEntry value="org.eclipse.debug.ui.launchGroup.run"/>
</listAttribute>
<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<booleanAttribute key="org.eclipse.jdt.launching.STOP_IN_MAIN" value="true"/>
<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.gnome.yarrr.Yarrr"/>
<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="yarrr"/>
</launchConfiguration>
Index: src/org/gnome/yarrr/Topic.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/Topic.java,v
retrieving revision 1.28
diff -u -r1.28 Topic.java
--- src/org/gnome/yarrr/Topic.java 10 Mar 2005 16:11:32 -0000 1.28
+++ src/org/gnome/yarrr/Topic.java 10 Mar 2005 23:55:54 -0000
@@ -198,6 +198,8 @@
.setLong("clientVersion", clientVersion)
.setLong("currentVersion", this.version)
.list();
+
+ //seems to NOT properly set up an EmailMessage if if returns one of these as the child.message
}
public List getNewLinksSince(long clientVersion) {
Index: src/org/gnome/yarrr/database/PostgresqlManager.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/database/PostgresqlManager.java,v
retrieving revision 1.4
diff -u -r1.4 PostgresqlManager.java
--- src/org/gnome/yarrr/database/PostgresqlManager.java 4 Mar 2005 19:59:13 -0000 1.4
+++ src/org/gnome/yarrr/database/PostgresqlManager.java 10 Mar 2005 23:55:54 -0000
@@ -25,6 +25,7 @@
import java.util.Random;
import org.apache.log4j.Logger;
+import org.gnome.yarrr.utils.OutputInputStream;
/**
* Manages an instance of the PostgreSQL server pointing at a custom
@@ -227,35 +228,6 @@
}
- class OutputInputStream extends java.lang.Thread {
- private InputStream in;
- private OutputStream out;
-
- OutputInputStream(String name, InputStream in, OutputStream out) {
- super(name);
- this.in = in;
- this.out = out;
- }
-
- public void run() {
- byte buf[] = new byte[128];
- int len;
- try {
- boolean doneReading = false;
- while (!doneReading) {
- len = this.in.read(buf);
- if (len < 0) {
- doneReading = true;
- } else {
- this.out.write(buf, 0, len);
- }
- }
- } catch (IOException e) {
-
- }
- }
- }
-
/**
* @return a singleton
*/
@@ -273,7 +245,7 @@
return new PrintStream(fos);
}
- private int execOutput(String cmd) throws IOException {
+ int execOutput(String cmd) throws IOException {
logger.debug("Running '" + cmd + "'");
// TODO This is what we want to do, but classpath doesn't have getenv();
Index: src/org/gnome/yarrr/email/Importer.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/email/Importer.java,v
retrieving revision 1.3
diff -u -r1.3 Importer.java
--- src/org/gnome/yarrr/email/Importer.java 9 Mar 2005 15:21:40 -0000 1.3
+++ src/org/gnome/yarrr/email/Importer.java 10 Mar 2005 23:55:54 -0000
@@ -16,6 +16,7 @@
import java.util.LinkedList;
import java.util.List;
+import javax.mail.Address;
import javax.mail.MessagingException;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
@@ -53,15 +54,23 @@
}
}
- public Person getAuthorForInjectedMail(MimeMessage mimeMessage) throws NoSuchAlgorithmException, NoSuchProviderException{
- // TODO: Message instances require an author public key
- // Exactly how should this be implemented? Dunno, so for now
- // just generate a new keypair for each message we inject (!)
- // Ultimately we might want to be able to identify people in some smart fashion...
- // signed emails? or heuristics?
- // Depends on how I can best wind up Seth (or Havoc)
- KeyPair pair = Cryptography.generateKeyPair();
- return Person.getPerson(pair.getPublic());
+ static String getFrom(MimeMessage mimeMessage) throws MessagingException {
+ Address[] addresses = mimeMessage.getFrom();
+ InternetAddress iaddr = (InternetAddress)addresses[0];
+ return iaddr.getAddress();
+ }
+
+ public Person getAuthorForInjectedMail(MimeMessage mimeMessage) throws NoSuchAlgorithmException, NoSuchProviderException, MessagingException, HibernateException{
+ String senderAddress = getFrom(mimeMessage);
+
+ EmailAddress emailAddress = EmailAddress.lookup(senderAddress);
+ if (emailAddress==null) {
+ // create new instance of email
+ KeyPair pair = Cryptography.generateKeyPair();
+ emailAddress = new EmailAddress(senderAddress, pair);
+ }
+
+ return emailAddress.getPerson();
}
public Topic getTopicForInjectedMail(MimeMessage mimeMessage, EmailMessage parentEmailMessage) throws HibernateException, MessagingException {
Index: src/org/gnome/yarrr/email/MailingListSniffer.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/email/MailingListSniffer.java,v
retrieving revision 1.1
diff -u -r1.1 MailingListSniffer.java
--- src/org/gnome/yarrr/email/MailingListSniffer.java 9 Feb 2005 23:22:25 -0000 1.1
+++ src/org/gnome/yarrr/email/MailingListSniffer.java 10 Mar 2005 23:55:54 -0000
@@ -27,7 +27,9 @@
public String matchMessage(MimeMessage mimeMessage) throws MessagingException {
String[] headerValues = mimeMessage.getHeader(headerName);
-
+ if (headerValues==null) {
+ return null;
+ }
for (int i=0;i<headerValues.length;i++) {
Matcher matcher = pattern.matcher(headerValues[i]);
if (matcher.find()) {
Index: src/org/gnome/yarrr/hibernate/HibernateUtil.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/hibernate/HibernateUtil.java,v
retrieving revision 1.14
diff -u -r1.14 HibernateUtil.java
--- src/org/gnome/yarrr/hibernate/HibernateUtil.java 9 Mar 2005 15:21:39 -0000 1.14
+++ src/org/gnome/yarrr/hibernate/HibernateUtil.java 10 Mar 2005 23:55:54 -0000
@@ -7,6 +7,7 @@
import org.gnome.yarrr.Topic;
import org.gnome.yarrr.TopicPerUser;
import org.gnome.yarrr.email.Email;
+import org.gnome.yarrr.email.EmailAddress;
import org.gnome.yarrr.email.MailingList;
import org.gnome.yarrr.person.Group;
import org.gnome.yarrr.person.Person;
@@ -44,6 +45,7 @@
.addClass(Topic.class)
.addClass(TopicPerUser.class)
.addClass(Email.class)
+ .addClass(EmailAddress.class)
.addClass(MailingList.class)
.setProperty(Environment.HBM2DDL_AUTO, "update")
;
Index: src/org/gnome/yarrr/tests/EmailTests.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/tests/EmailTests.java,v
retrieving revision 1.3
diff -u -r1.3 EmailTests.java
--- src/org/gnome/yarrr/tests/EmailTests.java 10 Feb 2005 03:37:55 -0000 1.3
+++ src/org/gnome/yarrr/tests/EmailTests.java 10 Mar 2005 23:55:54 -0000
@@ -4,7 +4,6 @@
*/
package org.gnome.yarrr.tests;
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
@@ -13,27 +12,17 @@
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
+import org.gnome.yarrr.utils.StreamUtils;
+
import junit.framework.TestCase;
/**
* @author dmalcolm
*/
public class EmailTests extends TestCase {
- static byte [] loadAllOfStreamJavaDamnYou(InputStream input) throws IOException {
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- byte [] buf = new byte[8192];
- int len;
-
- while ((len = input.read(buf)) != -1) {
- out.write(buf);
- }
- return out.toByteArray();
- }
-
-
static byte [] getTestEmail(String filename) throws IOException {
InputStream inputStream = EmailTests.class.getResourceAsStream("test-emails/"+filename);
- return loadAllOfStreamJavaDamnYou(inputStream);
+ return StreamUtils.loadAsByteArray(inputStream);
}
static public MimeMessage loadTestEmail(String filename) throws MessagingException, IOException {
@@ -52,5 +41,6 @@
MimeMessage message = loadTestEmail("multiline-subject.email");
// FIXME: should it strip out the \r\n\t from the email Subject? is this a bug in the javamail implementation?
assertEquals("British English (was Re: GNOME Lovers Needed: l10n work for\r\n\tlocations database)", message.getSubject());
- }
+ }
}
+
Index: src/org/gnome/yarrr/tests/TestInitializerTests.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/tests/TestInitializerTests.java,v
retrieving revision 1.7
diff -u -r1.7 TestInitializerTests.java
--- src/org/gnome/yarrr/tests/TestInitializerTests.java 9 Mar 2005 15:21:38 -0000 1.7
+++ src/org/gnome/yarrr/tests/TestInitializerTests.java 10 Mar 2005 23:55:54 -0000
@@ -23,7 +23,7 @@
init.doCreation();
List topics = Topic.getAllTopics();
- assertEquals(10,topics.size());
+ assertEquals(6,topics.size());
for (Iterator i=topics.iterator(); i.hasNext(); ) {
Topic topic = (Topic)i.next();
Index: src/org/gnome/yarrr/xmlrpc/DefaultMethods.java
===================================================================
RCS file: /cvs/gnome/yarrr/src/org/gnome/yarrr/xmlrpc/DefaultMethods.java,v
retrieving revision 1.21
diff -u -r1.21 DefaultMethods.java
--- src/org/gnome/yarrr/xmlrpc/DefaultMethods.java 10 Mar 2005 16:16:29 -0000 1.21
+++ src/org/gnome/yarrr/xmlrpc/DefaultMethods.java 10 Mar 2005 23:55:54 -0000
@@ -175,22 +175,28 @@
logger.info("injectEmail(" + contents.length + " bytes)");
Importer importer = new Importer();
- InjectionResult injectionResult = importer.injectEmail(contents);
- String stringResult = "avast! added email:\""+injectionResult.email.getMimeMessage().getSubject()+"\"\n";
+ try {
+ InjectionResult injectionResult = importer.injectEmail(contents);
- for (Iterator i = injectionResult.messageInsertions.iterator(); i.hasNext();) {
- Importer.EmailMessageInsertion insertion = (Importer.EmailMessageInsertion)i.next();
-
- stringResult += "added message:\""+insertion.emailMessage.getSubject()+"\"";
- if (insertion.topic != null) {
- stringResult += " to topic \"" + insertion.topic.getName() + "\" ";;
- } else {
- stringResult += " to database (not added to any topics)";
- }
- }
- logger.info(stringResult);
+ String stringResult = "avast! added email:\""+injectionResult.email.getMimeMessage().getSubject()+"\"\n";
+
+ for (Iterator i = injectionResult.messageInsertions.iterator(); i.hasNext();) {
+ Importer.EmailMessageInsertion insertion = (Importer.EmailMessageInsertion)i.next();
+
+ stringResult += "added message:\""+insertion.emailMessage.getSubject()+"\"";
+ if (insertion.topic!=null) {
+ stringResult += " to topic \"" + insertion.topic.getName() + "\" ";
+ } else {
+ stringResult += " to database (not added to any topics)";
+ }
+ }
+ logger.info(stringResult);
- return stringResult;
+ return stringResult;
+ } catch (Exception e) {
+ logger.error(new String(contents), e);
+ throw e;
+ }
}
public Vector updateTopic(String topicId, byte[] userPublicKeyBytes, String versionAtClientString, String perUserVersionAtClientString) throws Exception {
Index: src/org/gnome/yarrr/email/EmailAddress.hbm.xml
===================================================================
RCS file: src/org/gnome/yarrr/email/EmailAddress.hbm.xml
diff -N src/org/gnome/yarrr/email/EmailAddress.hbm.xml
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/EmailAddress.hbm.xml 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!DOCTYPE hibernate-mapping PUBLIC
+ "-//Hibernate/Hibernate Mapping DTD 2.0//EN"
+ "http://hibernate.sourceforge.net/hibernate-mapping-2.0.dtd">
+<hibernate-mapping
+ package="org.gnome.yarrr.email">
+
+ <class name="EmailAddress">
+ <id name="id">
+ <generator class="native"/>
+ </id>
+ <property name="address"/>
+ <property name="keypair"/>
+ </class>
+
+</hibernate-mapping>
Index: src/org/gnome/yarrr/email/EmailAddress.java
===================================================================
RCS file: src/org/gnome/yarrr/email/EmailAddress.java
diff -N src/org/gnome/yarrr/email/EmailAddress.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/EmailAddress.java 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,69 @@
+/*
+ * Created on 07-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.email;
+
+import java.security.KeyPair;
+
+import org.gnome.yarrr.hibernate.HibernateUtil;
+import org.gnome.yarrr.hibernate.Persistent;
+import org.gnome.yarrr.person.Person;
+import org.hibernate.HibernateException;
+import org.hibernate.criterion.Expression;
+
+/**
+ * @author dmalcolm
+ *
+ * Class representing an email address, when handling email feeds.
+ * We autogenerate a keypair/person to cover each email address we encounter, since all we can say about these messages
+ * is that they appear to come from a particular email address (and relatively few people sign their messages).
+ */
+public class EmailAddress extends Persistent {
+ String address;
+ KeyPair keypair;
+
+ protected EmailAddress() {
+ }
+
+ public EmailAddress(String address, KeyPair keypair) throws HibernateException {
+ this.address = address;
+ this.keypair = keypair;
+ HibernateUtil.getSession().save(this);
+ }
+
+ public String getAddress() {
+ return address;
+ }
+ public void setAddress(String address) {
+ this.address = address;
+ }
+ public KeyPair getKeypair() {
+ return keypair;
+ }
+ public void setKeypair(KeyPair keypair) {
+ this.keypair = keypair;
+ }
+
+ /**
+ * @param senderAddress
+ * @return
+ * @throws HibernateException
+ */
+ public static EmailAddress lookup(String stringAddress) throws HibernateException {
+ EmailAddress result = (EmailAddress)
+ HibernateUtil.getSession().createCriteria(EmailAddress.class)
+ .add( Expression.eq("address", stringAddress) )
+ .uniqueResult();
+ return result;
+ }
+
+ /**
+ * @return
+ */
+ public Person getPerson() {
+ return Person.getPerson(keypair.getPublic());
+ }
+}
Index: src/org/gnome/yarrr/email/MailmanArchiveScraper.java
===================================================================
RCS file: src/org/gnome/yarrr/email/MailmanArchiveScraper.java
diff -N src/org/gnome/yarrr/email/MailmanArchiveScraper.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/email/MailmanArchiveScraper.java 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,238 @@
+/*
+ * Created on 02-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.email;
+
+import gnu.mail.providers.mbox.MboxStore;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.mail.Folder;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.URLName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.xmlrpc.XmlRpcClient;
+import org.apache.xmlrpc.XmlRpcException;
+import org.gnome.yarrr.utils.OutputInputStream;
+import org.gnome.yarrr.utils.StreamUtils;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class MailmanArchiveScraper {
+
+ /**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+ public class FormailError extends Exception {
+ /**
+ * Comment for <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @param formailResult
+ */
+ public FormailError(int formailResult) {
+
+ // TODO Auto-generated constructor stub
+ }
+ }
+// alas this doesn't work:
+ // <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ // [Fatal Error] :1:49: White spaces are required between publicId and systemId.
+ // org.xml.sax.SAXParseException: White spaces are required between publicId and systemId.
+ Document readArchivePage(URL archiveUrl) throws ParserConfigurationException, IOException, IOException, SAXException {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document document = builder.parse(archiveUrl.openStream());
+ return document;
+
+ }
+
+ // alas, the HTML is old-school HTML and not XHTML, so we have to do this in a hackish way
+ public Vector getAllgzippedTextURLs(URL archivePageUrl) throws IOException {
+ Vector result = new Vector();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(archivePageUrl.openStream()));
+
+ // Regex pattern: <td><A href="(.*\.txt\.gz)">
+ // Escape into a string literal:
+ final String REGEX = "<td><A href=\"(.*\\.txt\\.gz)\">";
+ Pattern pattern = Pattern.compile(REGEX);
+
+ while (reader.ready()) {
+ String line = reader.readLine();
+
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.find()) {
+ String gzippedTextName = matcher.group(1);
+ URL gzippedTextURL = new URL(archivePageUrl, gzippedTextName);
+ result.add(gzippedTextURL);
+ }
+ }
+ return result;
+
+ }
+
+ public MailmanArchiveScraper(URL archivePageUrl) throws Exception {
+ Vector gzippedTextURLs = getAllgzippedTextURLs(archivePageUrl);
+
+ for (Iterator i=gzippedTextURLs.iterator(); i.hasNext(); ) {
+ URL gzippedTextURL = (URL)i.next();
+ importGZippedText(gzippedTextURL);
+ }
+ }
+
+ private byte[] runCommandWithCapture(String command, byte[] stdin) throws Exception {
+ Process process = Runtime.getRuntime().exec(command);
+ OutputStream stdinStream = process.getOutputStream();
+ InputStream stdoutStream = process.getInputStream();
+ InputStream stderrStream = process.getErrorStream();
+ ByteArrayOutputStream bufOutput = new ByteArrayOutputStream();
+
+ // having these lines seems to cause problems:
+ new OutputInputStream("stdout", stdoutStream, bufOutput).start();
+ new OutputInputStream("stderr", stderrStream, System.err).start();
+
+ stdinStream.write(stdin);
+ stdinStream.close();
+ int result = process.waitFor();
+ if (result!=0) {
+ throw new Exception();
+ }
+
+
+ return bufOutput.toByteArray();
+ }
+
+ // Run a command, feeding the byte array to stdin:
+ private int runCommand(String command, byte[] stdin) throws IOException, InterruptedException {
+ Process process = Runtime.getRuntime().exec(command);
+ OutputStream stdinStream = process.getOutputStream();
+ InputStream stdoutStream = process.getInputStream();
+ InputStream stderrStream = process.getErrorStream();
+
+ // having these lines seems to cause problems:
+ new OutputInputStream("formail stdout", stdoutStream, System.out).start();
+ new OutputInputStream("formail stderr", stderrStream, System.err).start();
+
+ stdinStream.write(stdin);
+ stdinStream.close();
+ int formailResult = process.waitFor();
+ return formailResult;
+ }
+
+ private void importMBoxArchiveUsingFormail(byte[] decompressedData) throws Exception {
+ // Run formail on the stream, and pipe back to pressgang.py...\
+ String injectionURL = "http://localhost:19842";
+ String injectionCommand = "/opt/Extraction/eclipse/workspace/yarrr/tools/pressgang.py " + injectionURL;
+ //String injectionCommand = "date";
+ String formailCommand = "formail -s "+ injectionCommand;
+ //String formailCommand = "sort";
+ //System.out.println(formailCommand);
+ int formailResult = runCommand(formailCommand, decompressedData);
+ if (formailResult!=0) {
+ //System.out.write(decompressedData);
+ throw new FormailError(formailResult);
+ }
+ }
+
+ private void importMBoxArchiveUsingJavaStuff(byte[] decompressedData) throws Exception {
+ File tmpFile = File.createTempFile("yarrr-mail-import", ".mbox");
+ FileOutputStream foo = new FileOutputStream(tmpFile);
+ foo.write(decompressedData);
+ foo.close();
+
+ Session session = Session.getInstance(System.getProperties());
+ MboxStore tmpStore = new MboxStore(session, new URLName(tmpFile.toString()));
+ tmpStore.connect();
+ Folder folder = tmpStore.getDefaultFolder();
+ folder.open(Folder.READ_ONLY);
+ Message[] messages = folder.getMessages();
+
+
+ for (int i=0;i<messages.length;i++) {
+ importMessage(messages[i]);
+ }
+ }
+
+ public byte[] messageToByteArray(Message message) throws IOException, MessagingException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ message.writeTo(out);
+ return out.toByteArray();
+ }
+
+ /**
+ * @param message
+ * @throws MessagingException
+ * @throws IOException
+ * @throws XmlRpcException
+ */
+ private void importMessage(Message message) throws IOException, MessagingException, XmlRpcException {
+ System.out.println(message.getSubject());
+ byte[] bytes = messageToByteArray(message);
+
+ XmlRpcClient xmlrpc = new XmlRpcClient ("http://localhost:19842");
+
+ Vector params = new Vector();
+ params.add(bytes);
+ xmlrpc.execute("injectEmail", params);
+ }
+
+ /**
+ * @param gzippedTextURL
+ * @throws Exception
+ */
+ private void importGZippedText(URL gzippedTextURL) throws Exception {
+ System.out.println("Importing " + gzippedTextURL.toExternalForm());
+ byte[] gzipData = StreamUtils.loadAsByteArray(gzippedTextURL.openStream());
+
+ //InputStream in = new GZIPInputStream(gzippedTextURL.openStream());
+
+ // Maybe just pipe to stdout, so we can pipe results into formail...
+ //byte[] gzipData = StreamUtils.loadAsByteArray(in);
+ System.out.println("...downloaded " + gzipData.length + " bytes (compressed).");
+ byte[] decompressedData = runCommandWithCapture("gunzip", gzipData);
+ System.out.println("...decompressed to " + decompressedData.length + " bytes.");
+
+ importMBoxArchiveUsingJavaStuff(decompressedData);
+ }
+
+ public static void main(String[] args) {
+ try {
+ new MailmanArchiveScraper(new URL("http://mail.gnome.org/archives/desktop-devel-list/"));
+
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+}
Index: src/org/gnome/yarrr/tests/StreamUtilTests.java
===================================================================
RCS file: src/org/gnome/yarrr/tests/StreamUtilTests.java
diff -N src/org/gnome/yarrr/tests/StreamUtilTests.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/tests/StreamUtilTests.java 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,55 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.tests;
+
+import java.io.InputStream;
+
+import junit.framework.TestCase;
+
+import org.gnome.yarrr.utils.StreamUtils;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class StreamUtilTests extends TestCase {
+ private static final String TXT_GZ_URL = "http://mail.gnome.org/archives/desktop-devel-list/2003-August.txt.gz";
+
+ public void testLoadLocalFile() throws Exception {
+ InputStream inputStream = EmailTests.class.getResourceAsStream("test-emails/multiline-subject.email");
+ byte[] byteArray = StreamUtils.loadAsByteArray(inputStream);
+ assertEquals(5119, byteArray.length);
+ }
+
+// public void testLoadRemoteFile() throws Exception {
+// URL url = new URL(TXT_GZ_URL);
+// InputStream in = url.openStream();
+//
+// byte[] byteArray = StreamUtils.loadAsByteArray(in);
+// assertEquals(996100, byteArray.length);
+// }
+//
+// public void testLoadRemoteInflateFile() throws Exception {
+// URL url = new URL(TXT_GZ_URL);
+//
+// // appears to be DEFLATE data format:
+// InputStream in = new InflaterInputStream(url.openStream());
+// byte[] byteArray = StreamUtils.loadAsByteArray(in);
+// assertEquals(3087158, byteArray.length);
+//
+// }
+
+// public void testLoadRemoteGzipFile() throws Exception {
+// URL url = new URL(TXT_GZ_URL);
+//
+// InputStream in = url.openStream();
+// byte[] byteArray = StreamUtils.loadAsGZippedByteArray(in);
+// assertEquals(3087158, byteArray.length);
+// }
+}
Index: src/org/gnome/yarrr/utils/OutputInputStream.java
===================================================================
RCS file: src/org/gnome/yarrr/utils/OutputInputStream.java
diff -N src/org/gnome/yarrr/utils/OutputInputStream.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/utils/OutputInputStream.java 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,41 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+
+public class OutputInputStream extends java.lang.Thread {
+ InputStream in;
+ OutputStream out;
+
+ public OutputInputStream(String name, InputStream in, OutputStream out) {
+ super(name);
+ this.in = in;
+ this.out = out;
+ }
+
+ public void run() {
+ byte buf[] = new byte[128];
+ int len;
+ try {
+ boolean doneReading = false;
+ while (!doneReading) {
+ len = this.in.read(buf);
+ if (len < 0) {
+ doneReading = true;
+ } else {
+ this.out.write(buf, 0, len);
+ }
+ }
+ } catch (IOException e) {
+
+ }
+ }
+}
Index: src/org/gnome/yarrr/utils/StreamUtils.java
===================================================================
RCS file: src/org/gnome/yarrr/utils/StreamUtils.java
diff -N src/org/gnome/yarrr/utils/StreamUtils.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/org/gnome/yarrr/utils/StreamUtils.java 1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,48 @@
+/*
+ * Created on 03-Mar-2005
+ *
+ * TODO To change the template for this generated file go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+package org.gnome.yarrr.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * @author dmalcolm
+ *
+ * TODO To change the template for this generated type comment go to
+ * Window - Preferences - Java - Code Style - Code Templates
+ */
+public class StreamUtils {
+ static public byte [] loadAsByteArray(InputStream input) throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ byte [] buf = new byte[8192];
+ int len;
+ int offset=0;
+
+ while ((len = input.read(buf)) != -1) {
+ out.write(buf, 0, len);
+ //offset+=len;
+ }
+ return out.toByteArray();
+ }
+
+ // For some reason loadAsByteArray(new GZIPInputStream(input)) doesn't work when downloading from a URL
+ // grrr.. only wants to read 2401 bytes...
+ // Do it in two stages instead.
+ static public byte [] loadAsGZippedByteArray(InputStream input) throws IOException {
+ byte[] compressedByteArray = loadAsByteArray(input);
+
+ InputStream gzipStream = new GZIPInputStream(new ByteArrayInputStream(compressedByteArray));
+
+ byte[] decompressedByteArray = loadAsByteArray(gzipStream);
+
+ return decompressedByteArray;
+ }
+
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]