[geary] Fix high-bit char corruption when sent as ISO-8859-1.
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary] Fix high-bit char corruption when sent as ISO-8859-1.
- Date: Fri, 2 Sep 2016 04:23:55 +0000 (UTC)
commit 048fa15c7b88ecc68996e05ed808cd9571afca34
Author: Michael James Gratton <mike vee net>
Date: Fri Sep 2 14:21:23 2016 +1000
Fix high-bit char corruption when sent as ISO-8859-1.
Commit 0c31607 made it possible for Geary to send messages as both
US-ASCII and ISO-8859-1, as part of the workaround for Bug 769137.
This introduced a bug however where if a message contained 8-bit
ISO-8859-1 chars but didn't include ant UTF-8 chars, the UTF-8 message
body would be encoded as if it was ISO-8859-1, and hence the high-bit
chars would be corrupted.
The solution is to re-encode the body in whatever charset it will be sent
as, before sending it.
* src/engine/rfc822/rfc822-message.vala (Message): Break out the common
parts of generating a message body part from
Message::from_composed_email into the new ::body_data_to_part method.
Ensure that the body data is conveted into US-ASCII or ISO-8859-1
before attempting to guess what transfer encoding should be used or
setting it as the content of the new body part.
* src/engine/rfc822/rfc822-utils.vala (Geary.RFC822.Utils): Replace
get_best() with get_best_charset(), since we can't guess both charset
and transfer encoding at the same time any more. Ensure we reset the
source stream after guessing (and in get_best_encoding) so that
subsequent uses don't simply get an EOS.
src/engine/rfc822/rfc822-message.vala | 126 +++++++++++++++++++++------------
src/engine/rfc822/rfc822-utils.vala | 14 ++--
2 files changed, 88 insertions(+), 52 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index 6fe73f0..b2f4923 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -15,8 +15,8 @@ public class Geary.RFC822.Message : BaseObject {
*/
public delegate string? InlinePartReplacer(string filename, Mime.ContentType? content_type,
Mime.ContentDisposition? disposition, string? content_id, Geary.Memory.Buffer buffer);
-
- private const string DEFAULT_ENCODING = "UTF8";
+
+ private const string DEFAULT_CHARSET = "UTF-8";
private const string HEADER_SENDER = "Sender";
private const string HEADER_IN_REPLY_TO = "In-Reply-To";
@@ -152,53 +152,29 @@ public class Geary.RFC822.Message : BaseObject {
this.message.set_header(HEADER_MAILER, email.mailer);
}
- // Body: text format (optional)
- string? charset = null;
- GMime.ContentEncoding? encoding = null;
+ // Share the body charset and encoding between plain and HTML
+ // parts, so we don't need to work it out twice.
+ string? body_charset = null;
+ GMime.ContentEncoding? body_encoding = null;
+ // Body: text format (optional)
GMime.Part? body_text = null;
if (email.body_text != null) {
- GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
- Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
- if (encoding == GMime.ContentEncoding.BASE64) {
- // Base64-encoded text needs to have CR's added after
- // LF's before encoding, otherwise it breaks
- // format=flowed. See bug 753528.
- GMime.StreamFilter filter_stream = new GMime.StreamFilter(stream);
- filter_stream.add(new GMime.FilterCRLF(true, false));
- stream = filter_stream;
- }
- GMime.DataWrapper content = new GMime.DataWrapper.with_stream(
- stream, GMime.ContentEncoding.DEFAULT
- );
- body_text = new GMime.Part();
- body_text.set_content_type(
- new GMime.ContentType.from_string(
- "text/plain; charset=%s; format=flowed".printf(charset)
- )
- );
- body_text.set_content_object(content);
- body_text.set_content_encoding(encoding);
+ body_text = body_data_to_part(email.body_text.data,
+ ref body_charset,
+ ref body_encoding,
+ "text/plain",
+ true);
}
// Body: HTML format (also optional)
GMime.Part? body_html = null;
if (email.body_html != null) {
- GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
- if (charset == null) {
- Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
- }
- GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
- GMime.ContentEncoding.DEFAULT);
-
- body_html = new GMime.Part();
- body_html.set_content_type(
- new GMime.ContentType.from_string(
- "text/html; charset=%s".printf(charset)
- )
- );
- body_html.set_content_object(content);
- body_html.set_content_encoding(encoding);
+ body_html = body_data_to_part(email.body_html.data,
+ ref body_charset,
+ ref body_encoding,
+ "text/html",
+ false);
}
// Build the message's mime part.
@@ -921,12 +897,12 @@ public class Geary.RFC822.Message : BaseObject {
GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
stream.set_owner(false);
- // Convert encoding to UTF-8.
+ // Convert to UTF-8.
GMime.StreamFilter stream_filter = new GMime.StreamFilter(stream);
if (to_utf8) {
string? charset = (content_type != null) ? content_type.params.get_value("charset") : null;
if (String.is_empty(charset))
- charset = DEFAULT_ENCODING;
+ charset = DEFAULT_CHARSET;
stream_filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
}
@@ -957,5 +933,67 @@ public class Geary.RFC822.Message : BaseObject {
public string to_string() {
return message.to_string();
}
-}
+ /**
+ * Returns a MIME part for some body content.
+ *
+ * Determining the appropriate body charset and encoding is
+ * unfortunately a multi-step process that involves reading it
+ * completely, several times:
+ *
+ * 1. Guess the best charset by scanning the complete body.
+ * 2. Convert the body into the preferred charset, essential
+ * to avoid e.g. guessing Base64 encoding for ISO-8859-1
+ * because of the 0x0's present in UTF bytes with high-bit
+ * chars.
+ * 3. Determine, given the correctly encoded charset
+ * what the appropriate encoding is by scanning the
+ * complete, encoded body.
+ *
+ * This applies to both text/plain and text/html parts, but we
+ * don't need to do it repeatedly for each, since HTML is 7-bit
+ * clean ASCII. So if we have guessed both already for a plain
+ * text body, it will still apply for any HTML part.
+ */
+ private GMime.Part body_data_to_part(uint8[] content,
+ ref string? charset,
+ ref GMime.ContentEncoding? encoding,
+ string content_type,
+ bool is_flowed) {
+ GMime.Stream content_stream = new GMime.StreamMem.with_buffer(content);
+ if (charset == null) {
+ charset = Geary.RFC822.Utils.get_best_charset(content_stream);
+ }
+ GMime.StreamFilter filter_stream = new GMime.StreamFilter(content_stream);
+ if (charset != DEFAULT_CHARSET) {
+ filter_stream.add(new GMime.FilterCharset(DEFAULT_CHARSET, charset));
+ }
+ if (encoding == null) {
+ encoding = Geary.RFC822.Utils.get_best_encoding(filter_stream);
+ }
+ if (is_flowed && encoding == GMime.ContentEncoding.BASE64) {
+ // Base64-encoded text needs to have CR's added after LF's
+ // before encoding, otherwise it breaks format=flowed. See
+ // Bug 753528.
+ filter_stream.add(new GMime.FilterCRLF(true, false));
+ }
+
+ GMime.ContentType complete_type =
+ new GMime.ContentType.from_string(content_type);
+ complete_type.set_parameter("charset", charset);
+ if (is_flowed) {
+ complete_type.set_parameter("format", "flowed");
+ }
+
+ GMime.DataWrapper body = new GMime.DataWrapper.with_stream(
+ filter_stream, GMime.ContentEncoding.DEFAULT
+ );
+
+ GMime.Part body_part = new GMime.Part();
+ body_part.set_content_type(complete_type);
+ body_part.set_content_object(body);
+ body_part.set_content_encoding(encoding);
+ return body_part;
+ }
+
+}
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index 2e9b8fa..a481756 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -322,24 +322,21 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
}
/**
- * Uses a GMime.FilterBest to determine the best charset and encoding.
+ * Uses a GMime.FilterBest to determine the best charset.
*
* WARNING: This call does not perform async I/O, meaning it will loop on the
* stream without relinquishing control to the event loop. Use with
* caution.
*/
-public void get_best(GMime.Stream in_stream,
- out string charset,
- out GMime.ContentEncoding encoding) {
+public string get_best_charset(GMime.Stream in_stream) {
GMime.FilterBest filter = new GMime.FilterBest(
- GMime.FilterBestFlags.CHARSET |
- GMime.FilterBestFlags.ENCODING
+ GMime.FilterBestFlags.CHARSET
);
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
out_stream.add(filter);
in_stream.write_to_stream(out_stream);
- charset = filter.charset();
- encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
+ in_stream.reset();
+ return filter.charset();
}
/**
@@ -356,6 +353,7 @@ public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
out_stream.add(filter);
in_stream.write_to_stream(out_stream);
+ in_stream.reset();
return filter.encoding(GMime.EncodingConstraint.7BIT);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]