[geary] Fix high-bit char corruption when sent as ISO-8859-1.



commit 048fa15c7b88ecc68996e05ed808cd9571afca34
Author: Michael James Gratton <mike vee net>
Date:   Fri Sep 2 14:21:23 2016 +1000

    Fix high-bit char corruption when sent as ISO-8859-1.
    
    Commit 0c31607 made it possible for Geary to send messages as both
    US-ASCII and ISO-8859-1, as part of the workaround for Bug 769137.
    
    This introduced a bug however where if a message contained 8-bit
    ISO-8859-1 chars but didn't include ant UTF-8 chars, the UTF-8 message
    body would be encoded as if it was ISO-8859-1, and hence the high-bit
    chars would be corrupted.
    
    The solution is to re-encode the body in whatever charset it will be sent
    as, before sending it.
    
    * src/engine/rfc822/rfc822-message.vala (Message): Break out the common
      parts of generating a message body part from
      Message::from_composed_email into the new ::body_data_to_part method.
      Ensure that the body data is conveted into US-ASCII or ISO-8859-1
      before attempting to guess what transfer encoding should be used or
      setting it as the content of the new body part.
    
    * src/engine/rfc822/rfc822-utils.vala (Geary.RFC822.Utils): Replace
      get_best() with get_best_charset(), since we can't guess both charset
      and transfer encoding at the same time any more. Ensure we reset the
      source stream after guessing (and in get_best_encoding) so that
      subsequent uses don't simply get an EOS.

 src/engine/rfc822/rfc822-message.vala |  126 +++++++++++++++++++++------------
 src/engine/rfc822/rfc822-utils.vala   |   14 ++--
 2 files changed, 88 insertions(+), 52 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index 6fe73f0..b2f4923 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -15,8 +15,8 @@ public class Geary.RFC822.Message : BaseObject {
      */
     public delegate string? InlinePartReplacer(string filename, Mime.ContentType? content_type,
         Mime.ContentDisposition? disposition, string? content_id, Geary.Memory.Buffer buffer);
-    
-    private const string DEFAULT_ENCODING = "UTF8";
+
+    private const string DEFAULT_CHARSET = "UTF-8";
 
     private const string HEADER_SENDER = "Sender";
     private const string HEADER_IN_REPLY_TO = "In-Reply-To";
@@ -152,53 +152,29 @@ public class Geary.RFC822.Message : BaseObject {
             this.message.set_header(HEADER_MAILER, email.mailer);
         }
 
-        // Body: text format (optional)
-        string? charset = null;
-        GMime.ContentEncoding? encoding = null;
+        // Share the body charset and encoding between plain and HTML
+        // parts, so we don't need to work it out twice.
+        string? body_charset = null;
+        GMime.ContentEncoding? body_encoding = null;
 
+        // Body: text format (optional)
         GMime.Part? body_text = null;
         if (email.body_text != null) {
-            GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
-            Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
-            if (encoding == GMime.ContentEncoding.BASE64) {
-                // Base64-encoded text needs to have CR's added after
-                // LF's before encoding, otherwise it breaks
-                // format=flowed. See bug 753528.
-                GMime.StreamFilter filter_stream = new GMime.StreamFilter(stream);
-                filter_stream.add(new GMime.FilterCRLF(true, false));
-                stream = filter_stream;
-            }
-            GMime.DataWrapper content = new GMime.DataWrapper.with_stream(
-                stream, GMime.ContentEncoding.DEFAULT
-            );
-            body_text = new GMime.Part();
-            body_text.set_content_type(
-                new GMime.ContentType.from_string(
-                    "text/plain; charset=%s; format=flowed".printf(charset)
-                )
-            );
-            body_text.set_content_object(content);
-            body_text.set_content_encoding(encoding);
+            body_text = body_data_to_part(email.body_text.data,
+                                          ref body_charset,
+                                          ref body_encoding,
+                                          "text/plain",
+                                          true);
         }
 
         // Body: HTML format (also optional)
         GMime.Part? body_html = null;
         if (email.body_html != null) {
-            GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
-            if (charset == null) {
-                Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
-            }
-            GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
-                GMime.ContentEncoding.DEFAULT);
-
-            body_html = new GMime.Part();
-            body_html.set_content_type(
-                new GMime.ContentType.from_string(
-                    "text/html; charset=%s".printf(charset)
-                )
-            );
-            body_html.set_content_object(content);
-            body_html.set_content_encoding(encoding);
+            body_html = body_data_to_part(email.body_html.data,
+                                          ref body_charset,
+                                          ref body_encoding,
+                                          "text/html",
+                                          false);
         }
 
         // Build the message's mime part.
@@ -921,12 +897,12 @@ public class Geary.RFC822.Message : BaseObject {
         GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
         stream.set_owner(false);
         
-        // Convert encoding to UTF-8.
+        // Convert to UTF-8.
         GMime.StreamFilter stream_filter = new GMime.StreamFilter(stream);
         if (to_utf8) {
             string? charset = (content_type != null) ? content_type.params.get_value("charset") : null;
             if (String.is_empty(charset))
-                charset = DEFAULT_ENCODING;
+                charset = DEFAULT_CHARSET;
             stream_filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
         }
         
@@ -957,5 +933,67 @@ public class Geary.RFC822.Message : BaseObject {
     public string to_string() {
         return message.to_string();
     }
-}
 
+    /**
+     * Returns a MIME part for some body content.
+     *
+     * Determining the appropriate body charset and encoding is
+     * unfortunately a multi-step process that involves reading it
+     * completely, several times:
+     *
+     * 1. Guess the best charset by scanning the complete body.
+     * 2. Convert the body into the preferred charset, essential
+     *    to avoid e.g. guessing Base64 encoding for ISO-8859-1
+     *    because of the 0x0's present in UTF bytes with high-bit
+     *    chars.
+     * 3. Determine, given the correctly encoded charset
+     *    what the appropriate encoding is by scanning the
+     *    complete, encoded body.
+     *
+     * This applies to both text/plain and text/html parts, but we
+     * don't need to do it repeatedly for each, since HTML is 7-bit
+     * clean ASCII. So if we have guessed both already for a plain
+     * text body, it will still apply for any HTML part.
+     */
+    private GMime.Part body_data_to_part(uint8[] content,
+                                         ref string? charset,
+                                         ref GMime.ContentEncoding? encoding,
+                                         string content_type,
+                                         bool is_flowed) {
+        GMime.Stream content_stream = new GMime.StreamMem.with_buffer(content);
+        if (charset == null) {
+            charset = Geary.RFC822.Utils.get_best_charset(content_stream);
+        }
+        GMime.StreamFilter filter_stream = new GMime.StreamFilter(content_stream);
+        if (charset != DEFAULT_CHARSET) {
+            filter_stream.add(new GMime.FilterCharset(DEFAULT_CHARSET, charset));
+        }
+        if (encoding == null) {
+            encoding = Geary.RFC822.Utils.get_best_encoding(filter_stream);
+        }
+        if (is_flowed && encoding == GMime.ContentEncoding.BASE64) {
+            // Base64-encoded text needs to have CR's added after LF's
+            // before encoding, otherwise it breaks format=flowed. See
+            // Bug 753528.
+            filter_stream.add(new GMime.FilterCRLF(true, false));
+        }
+
+        GMime.ContentType complete_type =
+            new GMime.ContentType.from_string(content_type);
+        complete_type.set_parameter("charset", charset);
+        if (is_flowed) {
+            complete_type.set_parameter("format", "flowed");
+        }
+
+        GMime.DataWrapper body = new GMime.DataWrapper.with_stream(
+            filter_stream, GMime.ContentEncoding.DEFAULT
+        );
+
+        GMime.Part body_part = new GMime.Part();
+        body_part.set_content_type(complete_type);
+        body_part.set_content_object(body);
+        body_part.set_content_encoding(encoding);
+        return body_part;
+    }
+
+}
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index 2e9b8fa..a481756 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -322,24 +322,21 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
 }
 
 /**
- * Uses a GMime.FilterBest to determine the best charset and encoding.
+ * Uses a GMime.FilterBest to determine the best charset.
  *
  * WARNING: This call does not perform async I/O, meaning it will loop on the
  * stream without relinquishing control to the event loop.  Use with
  * caution.
  */
-public void get_best(GMime.Stream in_stream,
-                     out string charset,
-                     out GMime.ContentEncoding encoding) {
+public string get_best_charset(GMime.Stream in_stream) {
     GMime.FilterBest filter = new GMime.FilterBest(
-        GMime.FilterBestFlags.CHARSET |
-        GMime.FilterBestFlags.ENCODING
+        GMime.FilterBestFlags.CHARSET
     );
     GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
     out_stream.add(filter);
     in_stream.write_to_stream(out_stream);
-    charset = filter.charset();
-    encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
+    in_stream.reset();
+    return filter.charset();
 }
 
 /**
@@ -356,6 +353,7 @@ public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
     GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
     out_stream.add(filter);
     in_stream.write_to_stream(out_stream);
+    in_stream.reset();
     return filter.encoding(GMime.EncodingConstraint.7BIT);
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]