[geary/wip/714317-hide-html-in-preview: 3/4] Combine fetch and convo message codepaths for generating preview text.



commit 41002ed2b66ce155e4e07bc5e91f31c8a942870f
Author: Michael James Gratton <mike vee net>
Date:   Mon Dec 19 01:59:32 2016 +1100

    Combine fetch and convo message codepaths for generating preview text.
    
    Ensures that both appear the same to the user, and that the conversation
    message preview gets the same armour and quote stripping that the fecth
    preview does.
    
    Added tests.
    
    Bug 714317
    
    * src/engine/rfc822/rfc822-utils.vala
      (Geary.RFC822.Utils::to_preview_text): New common function to handle
      generating a preview from a RFC 822 plain text or HTML string.
    
    * src/engine/rfc822/rfc822-message-data.vala (PreviewText::with_header):
      Move plain text armour and quote stripping to to_preview_text(), call
      that to generate preview text.
    
    * src/engine/rfc822/rfc822-message.vala (Message::get_preview): call
      to_preview_text() to generate the preview text.

 src/engine/rfc822/rfc822-message-data.vala |   36 +--------------
 src/engine/rfc822/rfc822-message.vala      |   19 +++++---
 src/engine/rfc822/rfc822-utils.vala        |   67 ++++++++++++++++++++++++++++
 test/CMakeLists.txt                        |    1 +
 test/engine/rfc822-message-data-test.vala  |   26 ++++++++---
 test/engine/rfc822-utils-test.vala         |   45 +++++++++++++++++++
 test/main.vala                             |    1 +
 7 files changed, 149 insertions(+), 46 deletions(-)
---
diff --git a/src/engine/rfc822/rfc822-message-data.vala b/src/engine/rfc822/rfc822-message-data.vala
index 6d80177..2f232b0 100644
--- a/src/engine/rfc822/rfc822-message-data.vala
+++ b/src/engine/rfc822/rfc822-message-data.vala
@@ -391,40 +391,10 @@ public class Geary.RFC822.PreviewText : Geary.RFC822.Text {
         uint8[] data = output.data;
         data += (uint8) '\0';
 
-        // Fix the preview up by removing HTML tags, redundant white space, common types of
-        // message armor, text-based quotes, and various MIME fields.
-        string preview_text = "";
-        string original_text = is_html ? Geary.HTML.html_to_text((string) data, false) : (string) data;
-        string[] all_lines = original_text.split("\r\n");
-        bool in_header = false; // True after a header
-
-        foreach(string line in all_lines) {
-            if (in_header && line.has_prefix(" ") || line.has_prefix("\t")) {
-                continue; // Skip "folded" (multi-line) headers.
-            } else {
-                in_header = false;
-            }
-            
-            if (line.has_prefix("Content-")) {
-                in_header = true;
-                continue;
-            }
-            
-            if (Geary.String.is_empty_or_whitespace(line))
-                continue;
-            
-            if (line.has_prefix("--"))
-                continue;
-            
-            if (line.has_prefix(">"))
-                continue;
-            
-            preview_text += " " + line;
-        }
-        
-        base (new Geary.Memory.StringBuffer(Geary.String.reduce_whitespace(preview_text)));
+        string preview_text = Geary.RFC822.Utils.to_preview_text((string) data, is_html ? TextFormat.HTML : 
TextFormat.PLAIN);
+        base(new Geary.Memory.StringBuffer(preview_text));
     }
-    
+
     public PreviewText.from_string(string preview) {
         base (new Geary.Memory.StringBuffer(preview));
     }
diff --git a/src/engine/rfc822/rfc822-message.vala b/src/engine/rfc822/rfc822-message.vala
index 217b445..4d927ad 100644
--- a/src/engine/rfc822/rfc822-message.vala
+++ b/src/engine/rfc822/rfc822-message.vala
@@ -375,23 +375,30 @@ public class Geary.RFC822.Message : BaseObject {
         
         return email;
     }
-    
-    // Takes an e-mail object with a body and generates a preview.  If there is no body
-    // or the body is the empty string, the empty string will be returned.
+
+    /**
+     * Generates a preview from the email's message body.
+     *
+     * If there is no body or the body is the empty string, the empty
+     * string will be returned.
+     */
     public string get_preview() {
+        TextFormat format = TextFormat.PLAIN;
         string? preview = null;
         try {
             preview = get_plain_body(false, null);
         } catch (Error e) {
             try {
-                preview = Geary.HTML.html_to_text(get_html_body(null), false);
+                format = TextFormat.HTML;
+                preview = get_html_body(null);
             } catch (Error error) {
                 debug("Could not generate message preview: %s\n and: %s", e.message, error.message);
             }
         }
 
-        return Geary.String.safe_byte_substring((preview ?? "").chug(),
-            Geary.Email.MAX_PREVIEW_BYTES);
+        return (preview != null)
+          ? Geary.RFC822.Utils.to_preview_text(preview, format)
+          : "";
     }
 
     /**
diff --git a/src/engine/rfc822/rfc822-utils.vala b/src/engine/rfc822/rfc822-utils.vala
index b7405bd..43fcaf7 100644
--- a/src/engine/rfc822/rfc822-utils.vala
+++ b/src/engine/rfc822/rfc822-utils.vala
@@ -343,6 +343,73 @@ public bool comp_char_arr_slice(char[] array, uint start, string comp) {
 }
 
 /**
+ * Obtains the best preview text from a plain or HTML string.
+ *
+ * The string returned will be at most `Geary.Email.MAX_PREVIEW_BYTES`
+ * long, and will have had its whitespace squashed.
+ */
+public string to_preview_text(string? text, TextFormat format) {
+    string preview = "";
+
+    if (format == TextFormat.PLAIN) {
+        StringBuilder buf = new StringBuilder();
+        string[] all_lines = text.split("\r\n");
+        bool in_mime_header = false;
+        bool in_inline_pgp_header = false;
+        foreach (string line in all_lines) {
+            if ((in_mime_header || in_inline_pgp_header) &&
+                line.has_prefix(" ") ||
+                line.has_prefix("\t")) {
+                continue; // Skip "folded" (multi-line) headers.
+            } else {
+                in_mime_header = false;
+            }
+
+            if (in_inline_pgp_header) {
+                if (Geary.String.is_empty(line)) {
+                    in_inline_pgp_header = false;
+                }
+                continue;
+            }
+
+            if (line.has_prefix("Content-")) {
+                in_mime_header = true;
+                continue;
+            }
+
+            if (line.has_prefix("-----BEGIN PGP SIGNED MESSAGE-----")) {
+                in_inline_pgp_header = true;
+                continue;
+            }
+
+            if (Geary.String.is_empty_or_whitespace(line))
+                continue;
+
+            if (line.has_prefix("--"))
+                continue;
+
+            if (line.has_prefix("===="))
+                continue;
+
+            if (line.has_prefix("~~~~"))
+                continue;
+
+            buf.append(" ");
+            buf.append(line);
+        }
+
+        preview = buf.str;
+    } else if (format == TextFormat.HTML) {
+        preview = Geary.HTML.html_to_text(text, false);
+    }
+
+    return Geary.String.safe_byte_substring(
+        Geary.String.reduce_whitespace(preview),
+        Geary.Email.MAX_PREVIEW_BYTES
+    );
+}
+
+/**
  * Uses a GMime.FilterBest to determine the best charset.
  *
  * WARNING: This call does not perform async I/O, meaning it will loop on the
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7b39df7..4c7c87e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,6 +8,7 @@ set(TEST_SRC
 
   engine/rfc822-mailbox-address-test.vala
   engine/rfc822-message-data-test.vala
+  engine/rfc822-utils-test.vala
   engine/util-html-test.vala
 )
 
diff --git a/test/engine/rfc822-message-data-test.vala b/test/engine/rfc822-message-data-test.vala
index 0c4ce5a..fdf819e 100644
--- a/test/engine/rfc822-message-data-test.vala
+++ b/test/engine/rfc822-message-data-test.vala
@@ -13,21 +13,33 @@ class Geary.RFC822.MessageDataTest : Gee.TestCase {
     }
 
     public void preview_text_with_header() {
-        string part_headers = "Content-Type: text/html; charset=utf-8\r\nContent-Transfer-Encoding: 
quoted-printable\r\n\r\n";
+        PreviewText plain_preview1 = new PreviewText.with_header(
+            new Geary.Memory.StringBuffer(PLAIN_BODY1_ENCODED),
+            new Geary.Memory.StringBuffer(PLAIN_BODY1_HEADERS)
+        );
+        assert(plain_preview1.buffer.to_string() == PLAIN_BODY1_EXPECTED.substring(0, 
Geary.Email.MAX_PREVIEW_BYTES));
+
+        string html_part_headers = "Content-Type: text/html; charset=utf-8\r\nContent-Transfer-Encoding: 
quoted-printable\r\n\r\n";
 
-        PreviewText preview1 = new PreviewText.with_header(
+        PreviewText html_preview1 = new PreviewText.with_header(
             new Geary.Memory.StringBuffer(HTML_BODY1_ENCODED),
-            new Geary.Memory.StringBuffer(part_headers)
+            new Geary.Memory.StringBuffer(html_part_headers)
         );
-        assert(preview1.buffer.to_string() == HTML_BODY1_EXPECTED);
+        assert(html_preview1.buffer.to_string() == HTML_BODY1_EXPECTED.substring(0, 
Geary.Email.MAX_PREVIEW_BYTES));
 
-        PreviewText preview2 = new PreviewText.with_header(
+        PreviewText html_preview2 = new PreviewText.with_header(
             new Geary.Memory.StringBuffer(HTML_BODY2_ENCODED),
-            new Geary.Memory.StringBuffer(part_headers)
+            new Geary.Memory.StringBuffer(html_part_headers)
         );
-        assert(preview2.buffer.to_string() == HTML_BODY2_EXPECTED);
+        assert(html_preview2.buffer.to_string() == HTML_BODY2_EXPECTED.substring(0, 
Geary.Email.MAX_PREVIEW_BYTES));
     }
 
+    public static string PLAIN_BODY1_HEADERS = "Content-Type: text/plain; 
charset=\"us-ascii\"\r\nContent-Transfer-Encoding: 7bit\r\n";
+
+    public static string PLAIN_BODY1_ENCODED = "Content-Type: text/plain; 
charset=\"us-ascii\"\r\nContent-Transfer-Encoding: 7bit\r\n\r\n-----BEGIN PGP SIGNED MESSAGE-----\r\nHash: 
SHA512\r\n\r\n=============================================================================\r\nFreeBSD-EN-16:11.vmbus
                                          Errata Notice\r\n                                                   
       The FreeBSD Project\r\n\r\nTopic:          Avoid using spin locks for channel message 
locks\r\n\r\nCategory:       core\r\nModule:         vmbus\r\nAnnounced:      2016-08-12\r\nCredits:        
Microsoft OSTC\r\nAffects:        FreeBSD 10.3\r\nCorrected:      2016-06-15 09:52:01 UTC (stable/10, 
10.3-STABLE)\r\n                2016-08-12 04:01:16 UTC (releng/10.3, 10.3-RELEASE-p7)\r\n\r\nFor general 
information regarding FreeBSD Errata Notices and Security\r\nAdvisories, including descriptions of the fields 
above, security\r\nbranches, and the following sections, pleas
 e visit\r\n<URL:https://security.FreeBSD.org/>.\r\n";
+
+    public static string PLAIN_BODY1_EXPECTED = "FreeBSD-EN-16:11.vmbus Errata Notice The FreeBSD Project 
Topic: Avoid using spin locks for channel message locks Category: core Module: vmbus Announced: 2016-08-12 
Credits: Microsoft OSTC Affects: FreeBSD 10.3 Corrected: 2016-06-15 09:52:01 UTC (stable/10, 10.3-STABLE) 
2016-08-12 04:01:16 UTC (releng/10.3, 10.3-RELEASE-p7)";
+
     public static string HTML_BODY1_ENCODED = """<html><head>
 <meta http-equiv=3DContent-Type content=3D"text/html; charset=3Dutf-8">
 <style>
diff --git a/test/engine/rfc822-utils-test.vala b/test/engine/rfc822-utils-test.vala
new file mode 100644
index 0000000..af548ab
--- /dev/null
+++ b/test/engine/rfc822-utils-test.vala
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2016 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+class Geary.RFC822.Utils.Test : Gee.TestCase {
+
+    public Test() {
+        base("Geary.RFC822.Utils.Test");
+        add_test("to_preview_text", to_preview_text);
+    }
+
+    public void to_preview_text() {
+        assert(Geary.RFC822.Utils.to_preview_text(PLAIN_BODY_ENCODED, Geary.RFC822.TextFormat.PLAIN) ==
+               PLAIN_BODY_EXPECTED.substring(0, Geary.Email.MAX_PREVIEW_BYTES));
+        assert(Geary.RFC822.Utils.to_preview_text(HTML_BODY_ENCODED, Geary.RFC822.TextFormat.HTML) ==
+               HTML_BODY_EXPECTED.substring(0, Geary.Email.MAX_PREVIEW_BYTES));
+        assert(Geary.RFC822.Utils.to_preview_text(HTML_BODY_ENCODED, Geary.RFC822.TextFormat.HTML) ==
+               HTML_BODY_EXPECTED.substring(0, Geary.Email.MAX_PREVIEW_BYTES));
+    }
+
+    public static string PLAIN_BODY_ENCODED = "Content-Type: text/plain; 
charset=\"us-ascii\"\r\nContent-Transfer-Encoding: 7bit\r\n\r\n-----BEGIN PGP SIGNED MESSAGE-----\r\nHash: 
SHA512\r\n\r\n=============================================================================\r\nFreeBSD-EN-16:11.vmbus
                                          Errata Notice\r\n                                                   
       The FreeBSD Project\r\n\r\nTopic:          Avoid using spin locks for channel message 
locks\r\n\r\nCategory:       core\r\nModule:         vmbus\r\nAnnounced:      2016-08-12\r\nCredits:        
Microsoft OSTC\r\nAffects:        FreeBSD 10.3\r\nCorrected:      2016-06-15 09:52:01 UTC (stable/10, 
10.3-STABLE)\r\n                2016-08-12 04:01:16 UTC (releng/10.3, 10.3-RELEASE-p7)\r\n\r\nFor general 
information regarding FreeBSD Errata Notices and Security\r\nAdvisories, including descriptions of the fields 
above, security\r\nbranches, and the following sections, please
  visit\r\n<URL:https://security.FreeBSD.org/>.\r\n";
+
+    public static string PLAIN_BODY_EXPECTED = "FreeBSD-EN-16:11.vmbus Errata Notice The FreeBSD Project 
Topic: Avoid using spin locks for channel message locks Category: core Module: vmbus Announced: 2016-08-12 
Credits: Microsoft OSTC Affects: FreeBSD 10.3 Corrected: 2016-06-15 09:52:01 UTC (stable/10, 10.3-STABLE) 
2016-08-12 04:01:16 UTC (releng/10.3, 10.3-RELEASE-p7)";
+
+    public static string HTML_BODY_ENCODED = """<html><head>
+<meta http-equiv=Content-Type content="text/html; charset=utf-8">
+<style>
+.bodyblack { font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 12px; }
+td { font-size: 12px; }
+.footer { font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 10px; }
+</style>
+</head>
+<body><table cellSpacing="0" cellPadding="0" width="450" border="0" class="bodyblack"><tr><td>
+<p><br />Hi Kenneth, <br /> <br /> We xxxxx xxxx xx xxx xxx xx xxxx x xxxxxxxx xxxxxxxx.
+<br /> <br /> <br /> <br />Thank you, <br /> <br />XXXXX
+X XXXXXX<br /><br />You can reply directly to this message or click the following link:<br /><a 
href="https://app.foobar.com/xxxxxxxx752a0ab01641966deff6c48623aba";>https://app.foobar.com/xxxxxxxxxxxxxxxx1641966deff6c48623aba</a><br
 /><br />You can change your email preferences at:<br /><a 
href="https://app.foobar.com/xxxxxxxxxxxxx";>https://app.foobar.com/xxxxxxxxxxx</a></p></td></tr>
+</table></body></html>
+""";
+
+    public static string HTML_BODY_EXPECTED = "Hi Kenneth, We xxxxx xxxx xx xxx xxx xx xxxx x xxxxxxxx 
xxxxxxxx. Thank you, XXXXX X XXXXXX You can reply directly to this message or click the following link: 
https://app.foobar.com/xxxxxxxxxxxxxxxx1641966deff6c48623aba You can change your email preferences at: 
https://app.foobar.com/xxxxxxxxxxx";;
+
+}
diff --git a/test/main.vala b/test/main.vala
index e6228cc..c2e4db9 100644
--- a/test/main.vala
+++ b/test/main.vala
@@ -17,6 +17,7 @@ int main(string[] args) {
     root.add_suite(new Geary.HTML.UtilTest().get_suite());
     root.add_suite(new Geary.RFC822.MailboxAddressTest().get_suite());
     root.add_suite(new Geary.RFC822.MessageDataTest().get_suite());
+    root.add_suite(new Geary.RFC822.Utils.Test().get_suite());
 
     return Test.run();
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]