[evolution/wip-webkit2] EMailFormatter - Add new filter to remove META tags from message to avoid wrong encoding



commit f9018fa21469b14497b30725532dab56f6ff5aac
Author: Tomas Popela <tpopela redhat com>
Date:   Fri Dec 13 14:15:26 2013 +0100

    EMailFormatter - Add new filter to remove META tags from message to avoid wrong encoding
    
    We have to remove HTML meta tag with charset property set from
    text/html parts of messages. Before we load message into web view Evolution
    is transforming all messages to UTF-8. When WebKit2 loads the message that is
    transformed by Evolution to UTF-8, but the message contains meta tag
    that specifies the encoding WK2 prefers the encoding given by that meta
    tag. So we have to remove this meta tag to show the message with right
    encoding.

 em-format/Makefile.am                 |    2 +
 em-format/e-mail-formatter.c          |   12 ++
 em-format/e-mail-meta-remove-filter.c |  255 +++++++++++++++++++++++++++++++++
 em-format/e-mail-meta-remove-filter.h |   66 +++++++++
 4 files changed, 335 insertions(+), 0 deletions(-)
---
diff --git a/em-format/Makefile.am b/em-format/Makefile.am
index 2983eb0..62cb493 100644
--- a/em-format/Makefile.am
+++ b/em-format/Makefile.am
@@ -30,6 +30,7 @@ evolution_mail_formatter_include_HEADERS =            \
        e-mail-formatter-quote.h                        \
        e-mail-formatter-utils.h                        \
        e-mail-inline-filter.h                          \
+       e-mail-meta-remove-filter.h                     \
        e-mail-parser-extension.h                       \
        e-mail-parser.h                                 \
        e-mail-part.h                                   \
@@ -86,6 +87,7 @@ libevolution_mail_formatter_la_SOURCES =              \
        e-mail-formatter-quote-text-enriched.c          \
        e-mail-formatter-quote-text-html.c              \
        e-mail-formatter-quote-text-plain.c             \
+       e-mail-meta-remove-filter.c                     \
        e-mail-parser-extension.c                       \
        e-mail-parser.c                                 \
        e-mail-parser-application-mbox.c                \
diff --git a/em-format/e-mail-formatter.c b/em-format/e-mail-formatter.c
index 4a079ff..e6a99d0 100644
--- a/em-format/e-mail-formatter.c
+++ b/em-format/e-mail-formatter.c
@@ -21,6 +21,7 @@
 #include "e-mail-formatter-extension.h"
 #include "e-mail-formatter-utils.h"
 #include "e-mail-part.h"
+#include "e-mail-meta-remove-filter.h"
 
 #include <e-util/e-util.h>
 #include <libebackend/libebackend.h>
@@ -1033,6 +1034,7 @@ e_mail_formatter_format_text (EMailFormatter *formatter,
        CamelMimeFilter *filter;
        const gchar *charset = NULL;
        CamelMimeFilter *windows = NULL;
+       CamelMimeFilter *meta_remove = NULL;
        CamelStream *mem_stream = NULL;
        CamelMimePart *mime_part;
        CamelContentType *mime_type;
@@ -1082,6 +1084,13 @@ e_mail_formatter_format_text (EMailFormatter *formatter,
        if (filter != NULL) {
                camel_stream_filter_add (
                        CAMEL_STREAM_FILTER (filter_stream), filter);
+
+               if (g_strcmp0 (e_mail_part_get_mime_type (part), "text/html") == 0) {
+                       meta_remove = e_mail_meta_remove_filter_new (FALSE);
+
+                       camel_stream_filter_add (
+                               CAMEL_STREAM_FILTER (filter_stream), meta_remove);
+               }
                g_object_unref (filter);
        }
 
@@ -1100,6 +1109,9 @@ e_mail_formatter_format_text (EMailFormatter *formatter,
        if (windows != NULL)
                g_object_unref (windows);
 
+       if (meta_remove != NULL)
+               g_object_unref (meta_remove);
+
        g_object_unref (mem_stream);
 
        g_object_unref (mime_part);
diff --git a/em-format/e-mail-meta-remove-filter.c b/em-format/e-mail-meta-remove-filter.c
new file mode 100644
index 0000000..5ede12a
--- /dev/null
+++ b/em-format/e-mail-meta-remove-filter.c
@@ -0,0 +1,255 @@
+/*
+ * e-mail-meta-remove-filter.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with the program; if not, see <http://www.gnu.org/licenses/>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "e-mail-meta-remove-filter.h"
+
+G_DEFINE_TYPE (EMailMetaRemoveFilter, e_mail_meta_remove_filter, CAMEL_TYPE_MIME_FILTER)
+
+static void
+remove_meta_tag (CamelMimeFilter *filter,
+                 const gchar *in,
+                 gsize len,
+                 gsize prespace,
+                 gchar **out,
+                 gsize *outlen)
+{
+       EMailMetaRemoveFilter *meta_remove = (EMailMetaRemoveFilter *) filter;
+       register const gchar *inptr = in;
+       const gchar *inend = in + len;
+       const gchar *start = NULL;
+       const gchar *end_of_prev_meta = NULL;
+       gboolean in_meta = meta_remove->in_meta;
+       gboolean previously_in_meta = meta_remove->in_meta;
+       gboolean charset_meta = FALSE;
+       GString *new_out = NULL;
+       gsize offset = 0;
+
+       new_out = g_string_new ("");
+
+       if (meta_remove->after_head)
+               goto copy_input;
+
+       while (inptr < inend) {
+               /* Start of meta */
+               if (g_ascii_strncasecmp (inptr, "<meta ", 6) == 0) {
+                       /* If there was previous meta tag */
+                       if (end_of_prev_meta) {
+                               /* And there were some tags between these two meta tags */
+                               if (inptr - 1 != end_of_prev_meta) {
+                                       /* Save them */
+                                       gchar *tags;
+
+                                       tags = g_strndup (
+                                               end_of_prev_meta + 1,
+                                               inptr - end_of_prev_meta - 2);
+
+                                       g_string_append (new_out, tags);
+                                       g_free (tags);
+                               }
+                       }
+
+                       in_meta = TRUE;
+                       start = inptr;
+                       inptr += 6;
+               }
+
+               /* Meta tags are valid just in head element */
+               if (g_ascii_strncasecmp (inptr, "</head>", 7) == 0) {
+                       meta_remove->after_head = TRUE;
+                       if (end_of_prev_meta)
+                               break;
+                       else
+                               goto copy_input;
+               }
+
+               /* Charset meta */
+               if (in_meta && !meta_remove->remove_all_meta) {
+                       if (g_ascii_strncasecmp (inptr, "charset", 7) == 0)
+                               charset_meta = TRUE;
+               }
+
+               /* End of meta tag */
+               if (in_meta && g_ascii_strncasecmp (inptr, ">", 1) == 0) {
+                       end_of_prev_meta = inptr;
+                       in_meta = FALSE;
+                       /* Strip meta tag from input */
+                       if (meta_remove->remove_all_meta || (charset_meta)) {
+                               if (new_out->len == 0 && !previously_in_meta) {
+                                       /* Copy tags before meta tag */
+                                       gchar *beginning;
+
+                                       if (start) {
+                                               beginning = g_strndup (in, start - in);
+                                               g_string_append (new_out, beginning);
+                                               g_free (beginning);
+                                       } else {
+                                               /* If meta tag continues from previous buffer 
+                                                * just adjust the offset */
+                                               offset = end_of_prev_meta + 1 - in;
+                                       }
+                               }
+
+                               /* If we wanted to remove just charset meta and we
+                                * removed it, quit */
+                               if (!meta_remove->remove_all_meta) {
+                                       meta_remove->after_head = TRUE;
+                                       break;
+                               }
+                       }
+                       start = NULL;
+                       charset_meta = FALSE;
+               }
+
+               inptr++;
+       }
+
+       if (in_meta) {
+               /* Meta tag doesn't end in this buffer */
+               gchar *tags = NULL;
+
+               if (end_of_prev_meta && start) {
+                       /* No tags between two meta tags */
+                       if (end_of_prev_meta + 1 == start)
+                               goto save_output;
+                       tags = g_strndup (
+                               end_of_prev_meta + 1,
+                               start - end_of_prev_meta - 2);
+               } else if (!end_of_prev_meta && start) {
+                       tags = g_strndup (in + offset , start - in - offset);
+               }
+
+               if (tags) {
+                       g_string_append (new_out, tags);
+                       g_free (tags);
+               }
+       } else if (end_of_prev_meta) {
+               gchar *end;
+
+               /* Copy tags after last meta to output */
+               end = g_strndup (end_of_prev_meta + 1, inend - end_of_prev_meta - 1);
+               g_string_append (new_out, end);
+               g_free (end);
+       } else if (!end_of_prev_meta) {
+               /* Meta was not found in this buffer */
+               camel_mime_filter_backup (filter, inend - 6, 6);
+               goto copy_input;
+       }
+
+ save_output:
+       *out = (gchar *) new_out->str;
+       *outlen = new_out->len;
+       g_string_free (new_out, FALSE);
+
+       meta_remove->in_meta = in_meta;
+
+       return;
+
+ copy_input:
+       *out = (gchar *) in;
+       *outlen = inend - in;
+
+       meta_remove->in_meta = in_meta;
+
+       g_string_free (new_out, TRUE);
+}
+
+static void
+filter_filter (CamelMimeFilter *filter,
+               const gchar *in,
+               gsize len,
+               gsize prespace,
+               gchar **out,
+               gsize *outlen,
+               gsize *outprespace)
+{
+       printf ("%s\n", __FUNCTION__);
+       remove_meta_tag (filter, in, len, prespace, out, outlen);
+
+       *outprespace = prespace;
+}
+
+static void
+filter_complete (CamelMimeFilter *filter,
+                 const gchar *in,
+                 gsize len,
+                 gsize prespace,
+                 gchar **out,
+                 gsize *outlen,
+                 gsize *outprespace)
+{
+       printf ("%s\n", __FUNCTION__);
+//     remove_meta_tag (
+//             filter, in, len, prespace, out, outlen, TRUE);
+
+       *out = (gchar *) in;
+       *outlen = len;
+       *outprespace = prespace;
+}
+
+static void
+filter_reset (CamelMimeFilter *filter)
+{
+       EMailMetaRemoveFilter *meta_remove = (EMailMetaRemoveFilter *) filter;
+
+       meta_remove->in_meta = FALSE;
+       meta_remove->after_head = FALSE;
+}
+
+static void
+e_mail_meta_remove_filter_class_init (EMailMetaRemoveFilterClass *class)
+{
+       CamelMimeFilterClass *mime_filter_class;
+
+       mime_filter_class = CAMEL_MIME_FILTER_CLASS (class);
+       mime_filter_class->filter = filter_filter;
+       mime_filter_class->complete = filter_complete;
+       mime_filter_class->reset = filter_reset;
+}
+
+static void
+e_mail_meta_remove_filter_init (EMailMetaRemoveFilter *filter)
+{
+}
+
+/**
+ * e_mail_meta_remove_filter_new:
+ * @remove_all_meta: Whether remove all meta tags from message or just meta
+ * tag with charset attribute
+ *
+ * Creates a new meta_remove filter.
+ *
+ * Returns a new meta_remove filter.
+ **/
+CamelMimeFilter *
+e_mail_meta_remove_filter_new (gboolean remove_all_meta)
+{
+       EMailMetaRemoveFilter *filter = g_object_new (E_TYPE_MAIL_META_REMOVE_FILTER, NULL);
+
+       filter->remove_all_meta = remove_all_meta;
+       filter->in_meta = FALSE;
+       filter->after_head = FALSE;
+
+       return CAMEL_MIME_FILTER (filter);
+}
diff --git a/em-format/e-mail-meta-remove-filter.h b/em-format/e-mail-meta-remove-filter.h
new file mode 100644
index 0000000..e46242a
--- /dev/null
+++ b/em-format/e-mail-meta-remove-filter.h
@@ -0,0 +1,66 @@
+/*
+ * e-mail-meta-remove-filter.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with the program; if not, see <http://www.gnu.org/licenses/>
+ *
+ */
+
+#ifndef E_MAIL_META_REMOVE_FILTER_H
+#define E_MAIL_META_REMOVE_FILTER_H
+
+#include <camel/camel.h>
+
+/* Standard GObject macros */
+#define E_TYPE_MAIL_META_REMOVE_FILTER \
+       (e_mail_meta_remove_filter_get_type ())
+#define E_MAIL_META_REMOVE_FILTER(obj) \
+       (G_TYPE_CHECK_INSTANCE_CAST \
+       ((obj), E_TYPE_MAIL_META_REMOVE_FILTER, EMailmeta_removeFilter))
+#define E_MAIL_META_REMOVE_FILTER_CLASS(cls) \
+       (G_TYPE_CHECK_CLASS_CAST \
+       ((cls), E_TYPE_MAIL_META_REMOVE_FILTER, EMailmeta_removeFilterClass))
+#define E_IS_MAIL_META_REMOVE_FILTER(obj) \
+       (G_TYPE_CHECK_INSTANCE_TYPE \
+       ((obj), E_TYPE_MAIL_META_REMOVE_FILTER))
+#define E_IS_MAIL_META_REMOVE_FILTER_CLASS(cls) \
+       (G_TYPE_CHECK_CLASS_TYPE \
+       ((cls), E_TYPE_MAIL_META_REMOVE_FILTER))
+#define E_MAIL_META_REMOVE_FILTER_GET_CLASS(obj) \
+       (G_TYPE_INSTANCE_GET_CLASS \
+       ((obj), E_TYPE_MAIL_META_REMOVE_FILTER, EMailmeta_removeFilterClass))
+
+G_BEGIN_DECLS
+
+typedef struct _EMailMetaRemoveFilter EMailMetaRemoveFilter;
+typedef struct _EMailMetaRemoveFilterClass EMailMetaRemoveFilterClass;
+
+struct _EMailMetaRemoveFilter {
+       CamelMimeFilter parent;
+
+       gboolean remove_all_meta;
+       gboolean in_meta;
+       gboolean after_head;
+};
+
+struct _EMailMetaRemoveFilterClass {
+       CamelMimeFilterClass parent_class;
+};
+
+GType          e_mail_meta_remove_filter_get_type      (void);
+CamelMimeFilter *
+               e_mail_meta_remove_filter_new           (gboolean remove_all_meta);
+
+G_END_DECLS
+
+#endif /* E_MAIL_META_REMOVE_FILTER_H */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]