[geary] Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.



commit 0c3160779e93ff1716f13a64595baa0233f76e52
Author: Michael James Gratton <mike vee net>
Date:   Mon Aug 1 23:21:28 2016 +1000

    Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.
    
    Previously, Geary would simply assume that UTF-8 is a reasonable charset
    to use when sending both plain text and HTML parts. It is, however
    Mailman will re-encode content sent ast UTF-8 using Base64, and break
    format=flowed in the process.
    
    This works around the problem for messages containing US-ASCII or
    ISO-8859-1 chars, at least. No solution yet for UTF-8 except maybe
    getting Mailman fixed.
    
    Bug 769137
    
    * src/engine/rfc822/rfc822-message.vala (Message): Guess the best charset
      for a message's content rather than assuming UTF-8. Set the charset for
      both plain text and HTML parts, but only guess it and the encoding at
      max once for both.
    
    * src/engine/rfc822/rfc822-utils.vala: Replace get_best_content_encoding
      with get_best for determing both charset and encoding, and
      get_best_encoding for just guessing encoding, both using
      GMime.FilterBest instead of our own custom code.

 src/engine/rfc822/rfc822-message.vala |   35 ++++++++-----
 src/engine/rfc822/rfc822-utils.vala   |   88 ++++++++++++---------------------
 2 files changed, 52 insertions(+), 71 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index aca8bfb..0cc48f0 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -153,12 +153,13 @@ public class Geary.RFC822.Message : BaseObject {
         }
 
         // Body: text format (optional)
+        string? charset = null;
+        GMime.ContentEncoding? encoding = null;
+
         GMime.Part? body_text = null;
         if (email.body_text != null) {
             GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
-            GMime.ContentEncoding encoding = Geary.RFC822.Utils.get_best_content_encoding(
-                stream, GMime.EncodingConstraint.7BIT
-            );
+            Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
             if (encoding == GMime.ContentEncoding.BASE64) {
                 // Base64-encoded text needs to have CR's added after
                 // LF's before encoding, otherwise it breaks
@@ -171,7 +172,11 @@ public class Geary.RFC822.Message : BaseObject {
                 stream, GMime.ContentEncoding.DEFAULT
             );
             body_text = new GMime.Part();
-            body_text.set_content_type(new GMime.ContentType.from_string("text/plain; charset=utf-8; 
format=flowed"));
+            body_text.set_content_type(
+                new GMime.ContentType.from_string(
+                    "text/plain; charset=%s; format=flowed".printf(charset)
+                )
+            );
             body_text.set_content_object(content);
             body_text.set_content_encoding(encoding);
         }
@@ -180,16 +185,22 @@ public class Geary.RFC822.Message : BaseObject {
         GMime.Part? body_html = null;
         if (email.body_html != null) {
             GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
+            if (charset == null) {
+                Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
+            }
             GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
                 GMime.ContentEncoding.DEFAULT);
-            
+
             body_html = new GMime.Part();
-            body_html.set_content_type(new GMime.ContentType.from_string("text/html; charset=utf-8"));
+            body_html.set_content_type(
+                new GMime.ContentType.from_string(
+                    "text/html; charset=%s".printf(charset)
+                )
+            );
             body_html.set_content_object(content);
-            body_html.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
-                GMime.EncodingConstraint.7BIT));
+            body_html.set_content_encoding(encoding);
         }
-        
+
         // Build the message's mime part.
         Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>();
         
@@ -283,11 +294,7 @@ public class Geary.RFC822.Message : BaseObject {
         GMime.StreamGIO stream = new GMime.StreamGIO(file);
         stream.set_owner(false);
         part.set_content_object(new GMime.DataWrapper.with_stream(stream, GMime.ContentEncoding.BINARY));
-        
-        // This encoding is the "Content-Transfer-Encoding", which GMime automatically converts to.
-        part.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
-            GMime.EncodingConstraint.7BIT));
-        
+        part.set_content_encoding(Geary.RFC822.Utils.get_best_encoding(stream));
         return part;
     }
     
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index 41da289..2e9b8fa 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -321,68 +321,42 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
     return true;
 }
 
-/*
- * This function is adapted from the GMimeFilterBest source in the GMime
- * library (gmime-filter-best.c) by Jeffrey Stedfast, LGPL 2.1.
+/**
+ * Uses a GMime.FilterBest to determine the best charset and encoding.
  *
  * WARNING: This call does not perform async I/O, meaning it will loop on the
  * stream without relinquishing control to the event loop.  Use with
  * caution.
  */
-public GMime.ContentEncoding get_best_content_encoding(GMime.Stream stream,
-    GMime.EncodingConstraint constraint) {
-    int count0 = 0, count8 = 0, linelen = 0, maxline = 0;
-    size_t total = 0, readlen;
-    // TODO: Increase buffer size?
-    uint8[] buffer = new uint8[1024];
-    
-    while ((readlen = stream.read(buffer)) > 0) {
-        total += readlen;
-        for(int i = 0; i < readlen; i++) {
-            char c = (char) buffer[i];
-            if (c == '\n') {
-                maxline = maxline > linelen ? maxline : linelen;
-                linelen = 0;
-            } else {
-                linelen++;
-                if (c == 0)
-                    count0++;
-                else if ((c & 0x80) != 0)
-                    count8++;
-            }
-        }
-    }
-    maxline = maxline > linelen ? maxline : linelen;
-    
-    GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT;
-    switch (constraint) {
-        case GMime.EncodingConstraint.7BIT:
-            if (count0 > 0) {
-                encoding = GMime.ContentEncoding.BASE64;
-            } else if (count8 > 0) {
-                if (count8 > (int) (total * 0.17))
-                    encoding = GMime.ContentEncoding.BASE64;
-                else
-                    encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
-            } else if (maxline > 998) {
-                encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
-            }
-        break;
-        
-        case GMime.EncodingConstraint.8BIT:
-            if (count0 > 0)
-                encoding = GMime.ContentEncoding.BASE64;
-            else if (maxline > 998)
-                encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
-        break;
-        
-        case GMime.EncodingConstraint.BINARY:
-            if (count0 + count8 > 0)
-                encoding = GMime.ContentEncoding.BINARY;
-        break;
-    }
-    
-    return encoding;
+public void get_best(GMime.Stream in_stream,
+                     out string charset,
+                     out GMime.ContentEncoding encoding) {
+    GMime.FilterBest filter = new GMime.FilterBest(
+        GMime.FilterBestFlags.CHARSET |
+        GMime.FilterBestFlags.ENCODING
+    );
+    GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
+    out_stream.add(filter);
+    in_stream.write_to_stream(out_stream);
+    charset = filter.charset();
+    encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
+}
+
+/**
+ * Uses a GMime.FilterBest to determine the best encoding.
+ *
+ * WARNING: This call does not perform async I/O, meaning it will loop on the
+ * stream without relinquishing control to the event loop.  Use with
+ * caution.
+ */
+public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
+    GMime.FilterBest filter = new GMime.FilterBest(
+        GMime.FilterBestFlags.ENCODING
+    );
+    GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
+    out_stream.add(filter);
+    in_stream.write_to_stream(out_stream);
+    return filter.encoding(GMime.EncodingConstraint.7BIT);
 }
 
 public string get_clean_attachment_filename(GMime.Part part) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]