[geary] Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary] Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.
- Date: Mon, 1 Aug 2016 13:22:08 +0000 (UTC)
commit 0c3160779e93ff1716f13a64595baa0233f76e52
Author: Michael James Gratton <mike vee net>
Date: Mon Aug 1 23:21:28 2016 +1000
Don't always UTF-8 as the charset, use ASCII or 8859-1 if possible.
Previously, Geary would simply assume that UTF-8 is a reasonable charset
to use when sending both plain text and HTML parts. It is, however
Mailman will re-encode content sent ast UTF-8 using Base64, and break
format=flowed in the process.
This works around the problem for messages containing US-ASCII or
ISO-8859-1 chars, at least. No solution yet for UTF-8 except maybe
getting Mailman fixed.
Bug 769137
* src/engine/rfc822/rfc822-message.vala (Message): Guess the best charset
for a message's content rather than assuming UTF-8. Set the charset for
both plain text and HTML parts, but only guess it and the encoding at
max once for both.
* src/engine/rfc822/rfc822-utils.vala: Replace get_best_content_encoding
with get_best for determing both charset and encoding, and
get_best_encoding for just guessing encoding, both using
GMime.FilterBest instead of our own custom code.
src/engine/rfc822/rfc822-message.vala | 35 ++++++++-----
src/engine/rfc822/rfc822-utils.vala | 88 ++++++++++++---------------------
2 files changed, 52 insertions(+), 71 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index aca8bfb..0cc48f0 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -153,12 +153,13 @@ public class Geary.RFC822.Message : BaseObject {
}
// Body: text format (optional)
+ string? charset = null;
+ GMime.ContentEncoding? encoding = null;
+
GMime.Part? body_text = null;
if (email.body_text != null) {
GMime.Stream stream = new GMime.StreamMem.with_buffer(email.body_text.data);
- GMime.ContentEncoding encoding = Geary.RFC822.Utils.get_best_content_encoding(
- stream, GMime.EncodingConstraint.7BIT
- );
+ Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
if (encoding == GMime.ContentEncoding.BASE64) {
// Base64-encoded text needs to have CR's added after
// LF's before encoding, otherwise it breaks
@@ -171,7 +172,11 @@ public class Geary.RFC822.Message : BaseObject {
stream, GMime.ContentEncoding.DEFAULT
);
body_text = new GMime.Part();
- body_text.set_content_type(new GMime.ContentType.from_string("text/plain; charset=utf-8;
format=flowed"));
+ body_text.set_content_type(
+ new GMime.ContentType.from_string(
+ "text/plain; charset=%s; format=flowed".printf(charset)
+ )
+ );
body_text.set_content_object(content);
body_text.set_content_encoding(encoding);
}
@@ -180,16 +185,22 @@ public class Geary.RFC822.Message : BaseObject {
GMime.Part? body_html = null;
if (email.body_html != null) {
GMime.StreamMem stream = new GMime.StreamMem.with_buffer(email.body_html.data);
+ if (charset == null) {
+ Geary.RFC822.Utils.get_best(stream, out charset, out encoding);
+ }
GMime.DataWrapper content = new GMime.DataWrapper.with_stream(stream,
GMime.ContentEncoding.DEFAULT);
-
+
body_html = new GMime.Part();
- body_html.set_content_type(new GMime.ContentType.from_string("text/html; charset=utf-8"));
+ body_html.set_content_type(
+ new GMime.ContentType.from_string(
+ "text/html; charset=%s".printf(charset)
+ )
+ );
body_html.set_content_object(content);
- body_html.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
- GMime.EncodingConstraint.7BIT));
+ body_html.set_content_encoding(encoding);
}
-
+
// Build the message's mime part.
Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>();
@@ -283,11 +294,7 @@ public class Geary.RFC822.Message : BaseObject {
GMime.StreamGIO stream = new GMime.StreamGIO(file);
stream.set_owner(false);
part.set_content_object(new GMime.DataWrapper.with_stream(stream, GMime.ContentEncoding.BINARY));
-
- // This encoding is the "Content-Transfer-Encoding", which GMime automatically converts to.
- part.set_content_encoding(Geary.RFC822.Utils.get_best_content_encoding(stream,
- GMime.EncodingConstraint.7BIT));
-
+ part.set_content_encoding(Geary.RFC822.Utils.get_best_encoding(stream));
return part;
}
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index 41da289..2e9b8fa 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -321,68 +321,42 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
return true;
}
-/*
- * This function is adapted from the GMimeFilterBest source in the GMime
- * library (gmime-filter-best.c) by Jeffrey Stedfast, LGPL 2.1.
+/**
+ * Uses a GMime.FilterBest to determine the best charset and encoding.
*
* WARNING: This call does not perform async I/O, meaning it will loop on the
* stream without relinquishing control to the event loop. Use with
* caution.
*/
-public GMime.ContentEncoding get_best_content_encoding(GMime.Stream stream,
- GMime.EncodingConstraint constraint) {
- int count0 = 0, count8 = 0, linelen = 0, maxline = 0;
- size_t total = 0, readlen;
- // TODO: Increase buffer size?
- uint8[] buffer = new uint8[1024];
-
- while ((readlen = stream.read(buffer)) > 0) {
- total += readlen;
- for(int i = 0; i < readlen; i++) {
- char c = (char) buffer[i];
- if (c == '\n') {
- maxline = maxline > linelen ? maxline : linelen;
- linelen = 0;
- } else {
- linelen++;
- if (c == 0)
- count0++;
- else if ((c & 0x80) != 0)
- count8++;
- }
- }
- }
- maxline = maxline > linelen ? maxline : linelen;
-
- GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT;
- switch (constraint) {
- case GMime.EncodingConstraint.7BIT:
- if (count0 > 0) {
- encoding = GMime.ContentEncoding.BASE64;
- } else if (count8 > 0) {
- if (count8 > (int) (total * 0.17))
- encoding = GMime.ContentEncoding.BASE64;
- else
- encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
- } else if (maxline > 998) {
- encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
- }
- break;
-
- case GMime.EncodingConstraint.8BIT:
- if (count0 > 0)
- encoding = GMime.ContentEncoding.BASE64;
- else if (maxline > 998)
- encoding = GMime.ContentEncoding.QUOTEDPRINTABLE;
- break;
-
- case GMime.EncodingConstraint.BINARY:
- if (count0 + count8 > 0)
- encoding = GMime.ContentEncoding.BINARY;
- break;
- }
-
- return encoding;
+public void get_best(GMime.Stream in_stream,
+ out string charset,
+ out GMime.ContentEncoding encoding) {
+ GMime.FilterBest filter = new GMime.FilterBest(
+ GMime.FilterBestFlags.CHARSET |
+ GMime.FilterBestFlags.ENCODING
+ );
+ GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
+ out_stream.add(filter);
+ in_stream.write_to_stream(out_stream);
+ charset = filter.charset();
+ encoding = filter.encoding(GMime.EncodingConstraint.7BIT);
+}
+
+/**
+ * Uses a GMime.FilterBest to determine the best encoding.
+ *
+ * WARNING: This call does not perform async I/O, meaning it will loop on the
+ * stream without relinquishing control to the event loop. Use with
+ * caution.
+ */
+public GMime.ContentEncoding get_best_encoding(GMime.Stream in_stream) {
+ GMime.FilterBest filter = new GMime.FilterBest(
+ GMime.FilterBestFlags.ENCODING
+ );
+ GMime.StreamFilter out_stream = new GMime.StreamFilter(new GMime.StreamNull());
+ out_stream.add(filter);
+ in_stream.write_to_stream(out_stream);
+ return filter.encoding(GMime.EncodingConstraint.7BIT);
}
public string get_clean_attachment_filename(GMime.Part part) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]