[geary/wip/714317-hide-html-in-preview: 1/4] Tidy up RFC822 default charset handling a bit.



commit 330d263947b53fa9d3f68a688e90cdaff6ad021a
Author: Michael James Gratton <mike vee net>
Date:   Sun Dec 18 20:34:57 2016 +1100

    Tidy up RFC822 default charset handling a bit.
    
    * src/engine/rfc822/rfc822-utils.vala
      (Geary.RFC822.Utils::create_utf8_filter_charset): Allow the given
      charset to be null, empty or invalid, and if so use the default
      specificed in RFC 2045. Update call sites to simply pass in given
      charsets, rather than checking if they are empty, etc.
    
    * src/engine/rfc822/rfc822.vala (Geary.RFC822): Define a const value for
      UTF-8 encoding, using that instead of literals throughout the codebase.

 src/engine/rfc822/rfc822-message-data.vala |   10 ++++----
 src/engine/rfc822/rfc822-message.vala      |    8 +------
 src/engine/rfc822/rfc822-utils.vala        |   29 ++++++++++++++++++++++++---
 src/engine/rfc822/rfc822.vala              |    5 +++-
 src/engine/util/util-html.vala             |    2 +-
 5 files changed, 36 insertions(+), 18 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala
index 658a397..0adb252 100644
--- a/src/engine/rfc822/rfc822-message-data.vala
+++ b/src/engine/rfc822/rfc822-message-data.vala
@@ -373,7 +373,8 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
             
             encoding = part.get_header("Content-Transfer-Encoding");
         }
-        
+
+        // Parse the preview
         GMime.StreamMem input_stream = Utils.create_stream_mem(preview);
         ByteArray output = new ByteArray();
         GMime.StreamMem output_stream = new GMime.StreamMem.with_byte_array(output);
@@ -383,10 +384,9 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
         GMime.StreamFilter filter = new GMime.StreamFilter(output_stream);
         if (encoding != null)
             filter.add(new GMime.FilterBasic(GMime.content_encoding_from_string(encoding), false));
-        
-        if (!String.is_empty(charset))
-            filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
-        
+
+        filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
+
         input_stream.write_to_stream(filter);
         uint8[] data = output.data;
         data += (uint8) '\0';
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index b37a157..1d0dce1 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -16,8 +16,6 @@ public class Geary.RFC822.Message : BaseObject {
     public delegate string? InlinePartReplacer(string filename, Mime.ContentType? content_type,
         Mime.ContentDisposition? disposition, string? content_id, Geary.Memory.Buffer buffer);
 
-    private const string DEFAULT_CHARSET = "UTF-8";
-
     private const string HEADER_SENDER = "Sender";
     private const string HEADER_IN_REPLY_TO = "In-Reply-To";
     private const string HEADER_REFERENCES = "References";
@@ -1000,8 +998,6 @@ public class Geary.RFC822.Message : BaseObject {
             // Assume encoded text, convert to unencoded UTF-8
             GMime.StreamFilter stream_filter = new GMime.StreamFilter(stream);
             string? charset = (content_type != null) ? content_type.params.get_value("charset") : null;
-            if (String.is_empty(charset))
-                charset = DEFAULT_CHARSET;
             stream_filter.add(Geary.RFC822.Utils.create_utf8_filter_charset(charset));
 
             bool flowed = (content_type != null) ? content_type.params.has_value_ci("format", "flowed") : 
false;
@@ -1068,9 +1064,7 @@ public class Geary.RFC822.Message : BaseObject {
             charset = Geary.RFC822.Utils.get_best_charset(content_stream);
         }
         GMime.StreamFilter filter_stream = new GMime.StreamFilter(content_stream);
-        if (charset != DEFAULT_CHARSET) {
-            filter_stream.add(new GMime.FilterCharset(DEFAULT_CHARSET, charset));
-        }
+        filter_stream.add(new GMime.FilterCharset(UTF8_CHARSET, charset));
         if (encoding == null) {
             encoding = Geary.RFC822.Utils.get_best_encoding(filter_stream);
         }
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index a481756..b7405bd 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -11,11 +11,32 @@ namespace Geary.RFC822.Utils {
 // in UTF-8, and is unmolested by GMime.FilterHTML.
 public const char QUOTE_MARKER = '\x7f';
 
-public GMime.FilterCharset create_utf8_filter_charset(string from_charset) {
-    GMime.FilterCharset? filter_charset = new GMime.FilterCharset(from_charset, "UTF-8");
+/**
+ * Charset to use when it is otherwise missing or invalid
+ *
+ * Per RFC 2045, Section 5.2.
+ */
+public const string DEFAULT_MIME_CHARSET = "us-ascii";
+
+/**
+ * Creates a filter to convert a MIME charset to UTF-8.
+ *
+ * Param `from_charset` may be null, empty or invalid, in which case
+ * `DEFAULT_MIME_CHARSET` will be used instead.
+ */
+public GMime.FilterCharset create_utf8_filter_charset(string? from_charset) {
+    string actual_charset = from_charset != null ? from_charset.strip() : "";
+    if (Geary.String.is_empty(actual_charset)) {
+        actual_charset = DEFAULT_MIME_CHARSET;
+    }
+    GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
+        actual_charset, Geary.RFC822.UTF8_CHARSET
+    );
     if (filter_charset == null) {
-        debug("Unknown charset %s; treating as UTF-8", from_charset);
-        filter_charset = new GMime.FilterCharset("UTF-8", "UTF-8");
+        debug("Unknown charset: %s; using RFC 2045 default instead", from_charset);
+        filter_charset = new GMime.FilterCharset(
+            DEFAULT_MIME_CHARSET, Geary.RFC822.UTF8_CHARSET
+        );
         assert(filter_charset != null);
     }
     return filter_charset;
diff --git a/src/engine/rfc822/rfc822.vala b/src/engine/rfc822/rfc822.vala
index b09376a..486acec 100644
--- a/src/engine/rfc822/rfc822.vala
+++ b/src/engine/rfc822/rfc822.vala
@@ -14,10 +14,13 @@ public enum TextFormat {
     HTML
 }
 
+/** Offical IANA charset encoding name for UTF-8. */
+public const string UTF8_CHARSET = "UTF-8";
+
 // This has the effect of ensuring all non US-ASCII and non-ISO-8859-1
 // headers are always encoded as UTF-8. This should be fine because
 // message bodies are also always sent as UTF-8.
-private const string[] USER_CHARSETS =  { "UTF-8" };
+private const string[] USER_CHARSETS =  { UTF8_CHARSET };
 
 private int init_count = 0;
 
diff --git a/src/engine/util/util-html.vala b/src/engine/util/util-html.vala
index 051d034..4c9216d 100644
--- a/src/engine/util/util-html.vala
+++ b/src/engine/util/util-html.vala
@@ -141,7 +141,7 @@ public string remove_html_tags(string input) {
  * entities, etc.  The layout of the text is largely lost.  This is primarily
  * useful for pulling out tokens for searching, not for presenting to the user.
  */
-public string html_to_text(string html, string encoding = "UTF-8") {
+public string html_to_text(string html, string encoding = Geary.RFC822.UTF8_CHARSET) {
     Html.Doc *doc = Html.Doc.read_doc(html, "", encoding, Html.ParserOption.RECOVER |
         Html.ParserOption.NOERROR | Html.ParserOption.NOWARNING | Html.ParserOption.NOBLANKS |
         Html.ParserOption.NONET | Html.ParserOption.COMPACT);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]