[geary/wip/362-utf8-attachments: 3/3] Don't munge Base64 encoded text parts when saving to disk
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/wip/362-utf8-attachments: 3/3] Don't munge Base64 encoded text parts when saving to disk
- Date: Thu, 18 Jul 2019 04:51:03 +0000 (UTC)
commit 2cb0a83342bcdab3375b866fd549ec5b5c9dd744
Author: Michael Gratton <mike vee net>
Date: Thu Jul 18 14:38:38 2019 +1000
Don't munge Base64 encoded text parts when saving to disk
Avoid doing character encoding conversion and CRLF stripping when saving
text parts to disk. This prevents UTF-8 encoded text attachments from
being broken when base64 encoded and no encoding is specified.
See #362
.../conversation-viewer/conversation-message.vala | 5 +-
src/engine/imap-db/imap-db-attachment.vala | 2 +-
src/engine/rfc822/rfc822-message-data.vala | 4 +-
src/engine/rfc822/rfc822-message.vala | 1 +
src/engine/rfc822/rfc822-part.vala | 63 +++++++++++++++++-----
src/engine/rfc822/rfc822-utils.vala | 30 -----------
test/engine/rfc822-part-test.vala | 16 ++++--
7 files changed, 73 insertions(+), 48 deletions(-)
---
diff --git a/src/client/conversation-viewer/conversation-message.vala
b/src/client/conversation-viewer/conversation-message.vala
index c8fef70b..a52b6ff8 100644
--- a/src/client/conversation-viewer/conversation-message.vala
+++ b/src/client/conversation-viewer/conversation-message.vala
@@ -943,7 +943,10 @@ public class ConversationMessage : Gtk.Grid, Geary.BaseInterface {
}
try {
- this.web_view.add_internal_resource(id, part.write_to_buffer());
+ this.web_view.add_internal_resource(
+ id,
+ part.write_to_buffer(Geary.RFC822.Part.EncodingConversion.UTF8)
+ );
} catch (Geary.RFC822Error err) {
debug("Failed to get inline buffer: %s", err.message);
return null;
diff --git a/src/engine/imap-db/imap-db-attachment.vala b/src/engine/imap-db/imap-db-attachment.vala
index 540222d7..f2aacc25 100644
--- a/src/engine/imap-db/imap-db-attachment.vala
+++ b/src/engine/imap-db/imap-db-attachment.vala
@@ -183,7 +183,7 @@ private class Geary.ImapDB.Attachment : Geary.Attachment {
stream, GMime.StreamBufferMode.BLOCK_WRITE
);
- part.write_to_stream(stream);
+ part.write_to_stream(stream, RFC822.Part.EncodingConversion.NONE);
// Using the stream's length is a bit of a hack, but at
// least on one system we are getting 0 back for the file
diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala
index 9476ce78..f5eeb628 100644
--- a/src/engine/rfc822/rfc822-message-data.vala
+++ b/src/engine/rfc822/rfc822-message-data.vala
@@ -414,7 +414,9 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
output_stream.set_owner(false);
try {
- part.write_to_stream(output_stream);
+ part.write_to_stream(
+ output_stream, Part.EncodingConversion.NONE
+ );
uint8[] data = output.data;
data += (uint8) '\0';
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index 9eba825b..e140fad3 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -596,6 +596,7 @@ public class Geary.RFC822.Message : BaseObject, EmailHeaderSet {
if (content_type.is_type("text", text_subtype)) {
body = part.write_to_buffer(
+ Part.EncodingConversion.UTF8,
to_html ? Part.BodyFormatting.HTML : Part.BodyFormatting.NONE
).to_string();
} else if (replacer != null &&
diff --git a/src/engine/rfc822/rfc822-part.vala b/src/engine/rfc822/rfc822-part.vala
index c7d941fd..cfbdde90 100644
--- a/src/engine/rfc822/rfc822-part.vala
+++ b/src/engine/rfc822/rfc822-part.vala
@@ -16,7 +16,17 @@
public class Geary.RFC822.Part : Object {
- /** Specifies a format to apply to body data when writing it. */
+ /** Specifies a character set encoding when writing text bodies. */
+ public enum EncodingConversion {
+
+ /** No conversion will be applied. */
+ NONE,
+
+ /** Plain text bodies will be converted to UTF-8. */
+ UTF8;
+ }
+
+ /** Specifies a format to apply when writing text bodies. */
public enum BodyFormatting {
/** No formatting will be applied. */
@@ -133,18 +143,20 @@ public class Geary.RFC822.Part : Object {
return filename;
}
- public Memory.Buffer write_to_buffer(BodyFormatting format = BodyFormatting.NONE)
+ public Memory.Buffer write_to_buffer(EncodingConversion conversion,
+ BodyFormatting format = BodyFormatting.NONE)
throws RFC822Error {
ByteArray byte_array = new ByteArray();
GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
stream.set_owner(false);
- write_to_stream(stream, format);
+ write_to_stream(stream, conversion, format);
return new Geary.Memory.ByteBuffer.from_byte_array(byte_array);
}
internal void write_to_stream(GMime.Stream destination,
+ EncodingConversion conversion,
BodyFormatting format = BodyFormatting.NONE)
throws RFC822Error {
GMime.DataWrapper? wrapper = (this.source_part != null)
@@ -156,22 +168,40 @@ public class Geary.RFC822.Part : Object {
);
}
- Mime.ContentType content_type = this.get_effective_content_type();
- if (content_type.is_type("text", Mime.ContentType.WILDCARD)) {
- // Assume encoded text, convert to unencoded UTF-8
+ if (this.content_type.is_type("text", Mime.ContentType.WILDCARD)) {
GMime.StreamFilter filter = new GMime.StreamFilter(destination);
- string? charset = content_type.params.get_value("charset");
- filter.add(
- Geary.RFC822.Utils.create_utf8_filter_charset(charset)
- );
+
+ // Do charset conversion if needed
+ string? charset = this.content_type.params.get_value("charset");
+ if (String.is_empty(charset)) {
+ // Fallback charset per RFC 2045, Section 5.2
+ charset = "US-ASCII";
+ }
+ if (conversion == UTF8 && !is_utf_8(charset)) {
+ print("\nconverting!\n");
+ GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
+ charset, Geary.RFC822.UTF8_CHARSET
+ );
+ if (filter_charset == null) {
+ // Source charset not supported, so assume
+ // US-ASCII
+ filter_charset = new GMime.FilterCharset(
+ "US-ASCII", Geary.RFC822.UTF8_CHARSET
+ );
+ }
+ filter.add(filter_charset);
+ }
bool flowed = content_type.params.has_value_ci("format", "flowed");
bool delsp = content_type.params.has_value_ci("DelSp", "yes");
// Remove the CR's in any CRLF sequence since they are
// effectively a wire encoding, unless the format requires
- // them.
- if (!(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
+ // them or the content encoding is Base64 (being a binary
+ // format)
+ if ((this.source_part == null ||
+ this.source_part.encoding != BASE64) &&
+ !(content_type.media_subtype in CR_PRESERVING_TEXT_TYPES)) {
filter.add(new GMime.FilterCRLF(false, false));
}
@@ -207,3 +237,12 @@ public class Geary.RFC822.Part : Object {
}
}
+
+private inline bool is_utf_8(string charset) {
+ return (
+ charset == "UTF-8" ||
+ charset == "utf-8" ||
+ charset == "UTF8" ||
+ charset == "utf8"
+ );
+}
\ No newline at end of file
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index 7672d8ae..dfa4180e 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -11,36 +11,6 @@ namespace Geary.RFC822.Utils {
// in UTF-8, and is unmolested by GMime.FilterHTML.
public const char QUOTE_MARKER = '\x7f';
-/**
- * Charset to use when it is otherwise missing or invalid
- *
- * Per RFC 2045, Section 5.2.
- */
-public const string DEFAULT_MIME_CHARSET = "us-ascii";
-
-/**
- * Creates a filter to convert a MIME charset to UTF-8.
- *
- * Param `from_charset` may be null, empty or invalid, in which case
- * `DEFAULT_MIME_CHARSET` will be used instead.
- */
-public GMime.FilterCharset create_utf8_filter_charset(string? from_charset) {
- string actual_charset = from_charset != null ? from_charset.strip() : "";
- if (Geary.String.is_empty(actual_charset)) {
- actual_charset = DEFAULT_MIME_CHARSET;
- }
- GMime.FilterCharset? filter_charset = new GMime.FilterCharset(
- actual_charset, Geary.RFC822.UTF8_CHARSET
- );
- if (filter_charset == null) {
- debug("Unknown charset: %s; using RFC 2045 default instead", from_charset);
- filter_charset = new GMime.FilterCharset(
- DEFAULT_MIME_CHARSET, Geary.RFC822.UTF8_CHARSET
- );
- assert(filter_charset != null);
- }
- return filter_charset;
-}
/**
* Uses the best-possible transfer of bytes from the Memory.Buffer to the GMime.StreamMem object.
diff --git a/test/engine/rfc822-part-test.vala b/test/engine/rfc822-part-test.vala
index ab5f026e..3af30ec0 100644
--- a/test/engine/rfc822-part-test.vala
+++ b/test/engine/rfc822-part-test.vala
@@ -10,6 +10,7 @@ class Geary.RFC822.PartTest : TestCase {
private const string CR_BODY = "This is an attachment.\n";
private const string CRLF_BODY = "This is an attachment.\r\n";
private const string ICAL_BODY = "BEGIN:VCALENDAR\r\nEND:VCALENDAR\r\n";
+ private const string UTF8_BODY = "Тест.";
public PartTest() {
@@ -19,6 +20,7 @@ class Geary.RFC822.PartTest : TestCase {
add_test("write_to_buffer_plain", write_to_buffer_plain);
add_test("write_to_buffer_plain_crlf", write_to_buffer_plain_crlf);
add_test("write_to_buffer_plain_ical", write_to_buffer_plain_ical);
+ add_test("write_to_buffer_plain_utf8", write_to_buffer_plain_utf8);
}
public void new_from_minimal_mime_part() throws Error {
@@ -56,7 +58,7 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain() throws Error {
Part test = new Part(new_part("text/plain", CR_BODY.data));
- Memory.Buffer buf = test.write_to_buffer();
+ Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
assert_string(CR_BODY, buf.to_string());
}
@@ -64,7 +66,7 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain_crlf() throws Error {
Part test = new Part(new_part("text/plain", CRLF_BODY.data));
- Memory.Buffer buf = test.write_to_buffer();
+ Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
// CRLF should be stripped
assert_string(CR_BODY, buf.to_string());
@@ -73,12 +75,20 @@ class Geary.RFC822.PartTest : TestCase {
public void write_to_buffer_plain_ical() throws Error {
Part test = new Part(new_part("text/calendar", ICAL_BODY.data));
- Memory.Buffer buf = test.write_to_buffer();
+ Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
// CRLF should not be stripped
assert_string(ICAL_BODY, buf.to_string());
}
+ public void write_to_buffer_plain_utf8() throws GLib.Error {
+ Part test = new Part(new_part("text/plain", UTF8_BODY.data));
+
+ Memory.Buffer buf = test.write_to_buffer(Part.EncodingConversion.NONE);
+
+ assert_string(UTF8_BODY, buf.to_string());
+ }
+
private GMime.Part new_part(string? mime_type,
uint8[] body,
GMime.ContentEncoding encoding = GMime.ContentEncoding.DEFAULT) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]