[geary] Better Message-ID parsing
- From: Jim Nelson <jnelson src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary] Better Message-ID parsing
- Date: Thu, 31 Jul 2014 02:10:15 +0000 (UTC)
commit 37253c08bff5e40147206d2c79b4f3fc354a6453
Author: Jim Nelson <jim yorba org>
Date: Wed Jul 30 19:08:36 2014 -0700
Better Message-ID parsing
While going through a database upgrade, I noticed (once again) debug
messages reporting bad Message-IDs in messages. On closer inspection,
now see that our parser was tripping on Message-IDs with spaces inside
the brackets. Need to allow this, so this new algorithm deals with
that.
src/engine/rfc822/rfc822-message-data.vala | 64 +++++++++-------------------
1 files changed, 21 insertions(+), 43 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala
index 84d7539..9992d7d 100644
--- a/src/engine/rfc822/rfc822-message-data.vala
+++ b/src/engine/rfc822/rfc822-message-data.vala
@@ -45,29 +45,31 @@ public class Geary.RFC822.MessageIDList : Geary.MessageData.AbstractMessageData,
public MessageIDList.from_rfc822_string(string value) {
this ();
- // Have seen some mailers use commas between Message-IDs, meaning that the standard
- // whitespace tokenizer is not sufficient; however, can't add the comma (or every other
- // delimiter that mailers dream up) because it may be used within a Message-ID. The
- // only guarantee made of a Message-ID is that it's surrounded by angle brackets, so
- // mark anything not an angle bracket as a space and strip
+ // Have seen some mailers use commas between Message-IDs and whitespace inside Message-IDs,
+ // meaning that the standard whitespace tokenizer is not sufficient. The only guarantee
+ // made of a Message-ID is that it's surrounded by angle brackets, so save anything inside
+ // angle brackets
//
// NOTE: Seen at least one spamfilter mailer that imaginatively uses parens instead of
- // angle brackets for its Message-IDs; accounting for that as well here.
+ // angle brackets for its Message-IDs; accounting for that as well here. The addt'l logic
+ // is to allow open-parens inside a Message-ID and not treat it as a delimiter; if a
+ // close-parens is found, that's a problem (but isn't expected)
StringBuilder canonicalized = new StringBuilder();
int index = 0;
unichar ch;
bool in_message_id = false;
while (value.get_next_char(ref index, out ch)) {
+ bool add_char = false;
switch (ch) {
case '<':
in_message_id = true;
break;
case '(':
- if (!in_message_id) {
- ch = '<';
+ if (!in_message_id)
in_message_id = true;
- }
+ else
+ add_char = true;
break;
case '>':
@@ -75,50 +77,26 @@ public class Geary.RFC822.MessageIDList : Geary.MessageData.AbstractMessageData,
break;
case ')':
- if (in_message_id) {
- ch = '>';
+ if (in_message_id)
in_message_id = false;
- }
+ else
+ add_char = true;
break;
- // anything not inside the message-id brackets is turned into spaces
default:
- if (!in_message_id)
- ch = ' ';
+ // only add characters inside the brackets
+ add_char = in_message_id;
break;
}
- canonicalized.append_unichar(ch);
- }
-
- if (value != canonicalized.str)
- debug("Message-ID list corrected: \"%s\" -> \"%s\"", value, canonicalized.str);
-
- // there's some additional paranoia here with getting the Message-ID sliced out of the
- // strings, but it's worth it to get a valid Message-ID or none at all vs. a bogus one
- string[] ids = canonicalized.str.split(" ");
- foreach (string id in ids) {
- if (String.is_empty(id))
- continue;
+ if (add_char)
+ canonicalized.append_unichar(ch);
- int start = id.index_of_char('<');
- int end = id.last_index_of_char('>');
-
- // if either end not found or the end comes before the beginning, invalid Message-ID
- if (start < 0 || end < 0 || (start >= end)) {
- debug("Invalid Message-ID found: \"%s\"", id);
+ if (!in_message_id && !String.is_empty(canonicalized.str)) {
+ list.add(new MessageID(canonicalized.str));
- continue;
+ canonicalized = new StringBuilder();
}
-
- // take out the valid slice of the string
- string valid = id.slice(start, end + 1);
- assert(!String.is_empty(valid));
-
- if (id != valid)
- debug("Corrected Message-ID: \"%s\" -> \"%s\"", id, valid);
-
- list.add(new MessageID(valid));
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]