[balsa/html-filter] improved Webkit HTML privacy filter




commit e34fa3a2a05614de69997890a16a8eb83ee2654c
Author: Albrecht Dreß <albrecht dress netcologne de>
Date:   Sat Oct 23 18:19:33 2021 +0200

    improved Webkit HTML privacy filter
    
    Provide improved HTML privacy filtering using a Webkit extension.  See
    issue #62 for further details.
    
    changed files:
    * libbalsa/html-filter.c: Webkit filter extension (new file)
    * libbalsa/html.c: use the filter extension if available
    * configure.ac, meson.build, libbalsa/Makefile.am, libbalsa/meson.build:
    configure web extension folder, build the extension
    
    Signed-off-by: Albrecht Dreß <albrecht dress netcologne de>

 configure.ac           |   3 +
 libbalsa/Makefile.am   |   8 +++
 libbalsa/html-filter.c | 135 +++++++++++++++++++++++++++++++++++++++++++
 libbalsa/html.c        | 154 +++++++++++++++++++++++++++++++++++++------------
 libbalsa/meson.build   |   7 +++
 meson.build            |   3 +
 6 files changed, 272 insertions(+), 38 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index e59b34026..ba4cf446f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -263,6 +263,9 @@ case "$use_html_widget" in
         # note: sqlite3 is needed to manage html vs. plain and image download preferences
         PKG_CHECK_MODULES(HTML, [ webkit2gtk-4.0 >= 2.28.0
                                   sqlite3 >= 3.24.0])
+        BALSA_WEB_EXTENSIONS="${libdir}/${PACKAGE}"
+        AC_SUBST(BALSA_WEB_EXTENSIONS)
+        CPPFLAGS="$CPPFLAGS -DBALSA_WEB_EXTENSIONS=\\\"$BALSA_WEB_EXTENSIONS\\\""
         AC_PATH_PROGS(HTML2TEXT,
                       [python-html2text  \
                        html2markdown     \
diff --git a/libbalsa/Makefile.am b/libbalsa/Makefile.am
index b272d4269..28ae1d2e3 100644
--- a/libbalsa/Makefile.am
+++ b/libbalsa/Makefile.am
@@ -139,6 +139,12 @@ libbalsa_a_SOURCES =               \
        x509-cert-widget.h
 
 
+pkglib_LTLIBRARIES = libhtmlfilter.la
+libhtmlfilter_la_SOURCES = html-filter.c
+libhtmlfilter_la_CFLAGS = $(BALSA_CFLAGS)
+libhtmlfilter_la_LIBADD = $(BALSA_LIBS)
+libhtmlfilter_la_LDFLAGS = -module -avoid-version -no-undefined
+
 EXTRA_DIST =                           \
        meson.build     \
        padlock-keyhole.xpm
@@ -149,3 +155,5 @@ AM_CPPFLAGS = -I${top_builddir} -I${top_srcdir} -I${top_srcdir}/libbalsa \
        $(BALSA_DEFS)
 
 AM_CFLAGS = $(BALSA_CFLAGS)
+
+html.o:        AM_CPPFLAGS += -DBALSA_WEB_EXT_DEVEL=\"${abs_builddir}/.libs\"
diff --git a/libbalsa/html-filter.c b/libbalsa/html-filter.c
new file mode 100644
index 000000000..258ece1a2
--- /dev/null
+++ b/libbalsa/html-filter.c
@@ -0,0 +1,135 @@
+/* -*-mode:c; c-style:k&r; c-basic-offset:4; -*- */
+/* Balsa E-Mail Client
+ *
+ * Copyright (C) 2021 Albrecht Dreß <albrecht dress arcor de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Filtering of external resources referenced in HTML messages
+ *
+ * The Webkit extension expects a user message "load_ext" from the main process indicating if external 
resources shall be loaded.
+ * If not, all images are redirected to a non-existing image, whilst other resources (e.g. fonts etc.) are 
simply ignored.
+ */
+
+#if defined(HAVE_CONFIG_H) && HAVE_CONFIG_H
+# include "config.h"
+#endif                          /* HAVE_CONFIG_H */
+
+#ifdef HAVE_HTML_WIDGET
+
+#if defined(GTK_DISABLE_DEPRECATED)
+#define GtkAction GAction
+#include <webkit2/webkit-web-extension.h>
+#undef GtkAction
+#else  /* defined(GTK_DISABLE_DEPRECATED) */
+#include <webkit2/webkit-web-extension.h>
+#endif /* defined(GTK_DISABLE_DEPRECATED) */
+#include <string.h>
+
+#ifdef G_LOG_DOMAIN
+#  undef G_LOG_DOMAIN
+#endif
+#define G_LOG_DOMAIN "html"
+
+
+#define LOAD_EXT_KEY           "load-ext"
+
+
+G_MODULE_EXPORT void webkit_web_extension_initialize(WebKitWebExtension *extension);
+
+
+static gboolean
+lbhf_chk_send_request(WebKitWebPage                   *web_page,
+                                         WebKitURIRequest                *request,
+                                         WebKitURIResponse G_GNUC_UNUSED *redirected_response,
+                                         gpointer G_GNUC_UNUSED           user_data)
+{
+       const gchar *uri = webkit_uri_request_get_uri(request);
+       gboolean result;        /* note: TRUE to skip this request, FALSE to process it */
+
+       result = (strcmp(uri, "about:blank") != 0) && (strncmp(uri, "cid:", 4UL) != 0);
+       if (result) {
+               gboolean *load_val;
+
+               load_val = g_object_get_data(G_OBJECT(web_page), LOAD_EXT_KEY);
+               if (load_val == NULL) {
+                       g_warning("[HTML filter] %s: no policy for loading external resource %s", __func__, 
uri);
+               } else if (*load_val) {
+                       g_debug("[HTML filter] %s: accept %s", __func__, uri);
+                       result = FALSE;
+               } else {
+                       webkit_uri_request_set_uri(request, "about:blank");
+                       g_debug("[HTML filter] %s: request for uri %s blocked", __func__, uri);
+                       result = FALSE;
+               }
+       }
+       return result;
+}
+
+
+static gboolean
+lbhf_page_user_message_cb(WebKitWebPage          *web_page,
+                                                 WebKitUserMessage      *message,
+                                                 gpointer G_GNUC_UNUSED  user_data)
+{
+       gboolean result;
+
+       if (strcmp(webkit_user_message_get_name(message), "load_ext") == 0) {
+               GVariant *data;
+               gboolean *load_val;
+
+               data = webkit_user_message_get_parameters(message);
+               load_val = g_new(gboolean, 1U);
+               *load_val = g_variant_get_boolean(data);
+               g_debug("[HTML filter] %s: page %p: load externals = %d", __func__, web_page, *load_val);
+               g_object_set_data_full(G_OBJECT(web_page), LOAD_EXT_KEY, load_val, g_free);
+               result = TRUE;
+       } else {
+               g_debug("[HTML filter] %s: unexpected message '%s'", __func__, 
webkit_user_message_get_name(message));
+               result = FALSE;
+       }
+       return result;
+}
+
+
+static void
+lbhf_page_created_callback(WebKitWebExtension G_GNUC_UNUSED *extension,
+                                                  WebKitWebPage                    *web_page,
+                                                  gpointer G_GNUC_UNUSED            user_data)
+{
+       g_debug("[HTML filter] %s: page %p created", __func__, web_page);
+       g_signal_connect(web_page, "user-message-received", G_CALLBACK(lbhf_page_user_message_cb), NULL);
+       g_signal_connect(web_page, "send-request", G_CALLBACK(lbhf_chk_send_request), NULL);
+}
+
+
+G_MODULE_EXPORT void
+webkit_web_extension_initialize(WebKitWebExtension *extension)
+{
+       static guint main_notified = 0U;
+
+       g_debug("[HTML filter] %s", __func__);
+       g_signal_connect(extension, "page-created", G_CALLBACK(lbhf_page_created_callback), NULL);
+       if (g_atomic_int_or(&main_notified, 1U) == 0U) {
+               WebKitUserMessage *message;
+
+               /* report the main Balsa process that the HTML filter extension has been found */
+               message = webkit_user_message_new("balsa-html-filter", NULL);
+               webkit_web_extension_send_message_to_context(extension, message, NULL, NULL, NULL);
+       }
+}
+
+#endif
diff --git a/libbalsa/html.c b/libbalsa/html.c
index 59c3e770c..42532561e 100644
--- a/libbalsa/html.c
+++ b/libbalsa/html.c
@@ -33,7 +33,9 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <unistd.h>
 #include <glib/gi18n.h>
+#include <glib/gstdio.h>
 
 #ifdef HAVE_HTML_WIDGET
 
@@ -43,6 +45,15 @@
 #define G_LOG_DOMAIN "html"
 
 
+/* Note:
+ * If the following variable is set, search the build folder for the Webkit
+ * extension first, and fall back to the install folder if the folder does
+ * not exist.  Thus, this variable should be set for testing/development,
+ * and unset for release builds to avoid leaking the build folder name.
+ */
+#define WEB_EXT_CHECK_BUILDDIR 1
+
+
 #define CID_REGEX      "<[^>]*src\\s*=\\s*['\"]?\\s*cid:"
 /* use a negative lookahead assertion to match "src=" *not* followed by "cid:" */
 #define SRC_REGEX      "<[^>]*src\\s*=\\s*(?!['\"]?\\s*cid:)"
@@ -59,6 +70,10 @@
 #define LBH_NATURAL_SIZE (-1)
 
 
+/* indicates if Balsa's HTML filter webkit extension is available */
+static guint html_filter_found = 0U;
+
+
 /*
  * lbh_get_body_content
  *
@@ -361,6 +376,20 @@ lbh_decide_policy_cb(WebKitWebView           * web_view,
     return TRUE;
 }
 
+
+static void
+lbh_load_external_resources(WebKitWebView *web_view, gboolean load_resources)
+{
+       WebKitUserMessage *message;
+       GVariant *data;
+
+       data = g_variant_new_boolean(load_resources);
+       message = webkit_user_message_new("load_ext", data);
+       webkit_web_view_send_message_to_page(web_view, message, NULL, NULL, NULL);
+       g_usleep(1000);
+}
+
+
 /*
  * Show the GtkInfoBar for asking about downloading images
  *
@@ -369,25 +398,29 @@ lbh_decide_policy_cb(WebKitWebView           * web_view,
 
 static void
 lbh_info_bar_response_cb(GtkInfoBar * info_bar,
-                         gint response_id, gpointer data)
+       gint response_id, gpointer data)
 {
-    LibBalsaWebKitInfo *info = data;
-
-    if (response_id == GTK_RESPONSE_OK) {
-        gchar *text;
-
-        if (lbh_get_body_content_utf8(info->body, &text) >= 0) {
-            WebKitSettings *settings;
-
-            settings = webkit_web_view_get_settings(info->web_view);
-            webkit_settings_set_auto_load_images(settings, TRUE);
-            webkit_web_view_load_html(info->web_view, text, NULL);
-            g_free(text);
-        }
-    }
+       LibBalsaWebKitInfo *info = data;
+
+       if (response_id == GTK_RESPONSE_OK) {
+               gchar *text;
+
+               if (lbh_get_body_content_utf8(info->body, &text) >= 0) {
+                       if (g_atomic_int_get(&html_filter_found) != 0) {
+                               lbh_load_external_resources(info->web_view, TRUE);
+                       } else {
+                               WebKitSettings *settings;
+
+                               settings = webkit_web_view_get_settings(info->web_view);
+                               webkit_settings_set_auto_load_images(settings, TRUE);
+                       }
+                       webkit_web_view_load_html(info->web_view, text, NULL);
+                       g_free(text);
+               }
+       }
 
-    gtk_widget_destroy(info->info_bar);
-    info->info_bar = NULL;
+       gtk_widget_destroy(info->info_bar);
+       info->info_bar = NULL;
 }
 
 static void
@@ -403,26 +436,13 @@ lbh_info_bar(LibBalsaWebKitInfo * info)
     GtkInfoBar *info_bar;
     GtkWidget *label;
     GtkWidget *content_area;
-#ifdef GTK_INFO_BAR_WRAPPING_IS_BROKEN
     static const gchar text[] =
-                 N_("This message part contains images "
-                    "from a remote server.\n"
-                    "To protect your privacy, "
-                    "Balsa has not downloaded them.\n"
-                    "You may choose to download them "
-                    "if you trust the server.");
-#else                           /* GTK_INFO_BAR_WRAPPING_IS_BROKEN */
-    static const gchar text[] =
-                 N_("This message part contains images "
-                    "from a remote server. "
-                    "To protect your privacy, "
-                    "Balsa has not downloaded them. "
-                    "You may choose to download them "
-                    "if you trust the server.");
-#endif                          /* GTK_INFO_BAR_WRAPPING_IS_BROKEN */
+                 N_("This message part references contents on one or more external servers. "
+                       "To protect your privacy, Balsa has not downloaded them. You may choose "
+                       "to download them if you trust the sender of the message.");
 
     info_bar_widget =
-        gtk_info_bar_new_with_buttons(_("_Download images"),
+        gtk_info_bar_new_with_buttons(_("_Download external contents"),
                                      GTK_RESPONSE_OK,
                                      _("_Close"), GTK_RESPONSE_CLOSE,
                                      NULL);
@@ -454,14 +474,15 @@ lbh_resource_notify_response_cb(WebKitWebResource * resource,
                                 GParamSpec        * pspec,
                                 gpointer            data)
 {
-    LibBalsaWebKitInfo *info = data;
+    LibBalsaWebKitInfo *info = (LibBalsaWebKitInfo *) data;
     const gchar *mime_type;
     WebKitURIResponse *response;
 
     response = webkit_web_resource_get_response(resource);
     mime_type = webkit_uri_response_get_mime_type(response);
     g_debug("%s mime-type %s", __func__, mime_type);
-    if (g_ascii_strncasecmp(mime_type, "image/", 6) != 0)
+    if ((g_atomic_int_get(&html_filter_found) != 0) ||
+       (g_ascii_strncasecmp(mime_type, "image/", 6) != 0))
         return;
 
     if (info->info_bar) {
@@ -589,6 +610,25 @@ lbh_context_menu_cb(WebKitWebView       * web_view,
     return retval;
 }
 
+static gboolean
+lbh_web_extension_cb(WebKitWebContext G_GNUC_UNUSED *context,
+                                        WebKitUserMessage              *message,
+                                        gpointer G_GNUC_UNUSED          user_data)
+{
+       gboolean result;
+
+       if (strcmp(webkit_user_message_get_name(message), "balsa-html-filter") == 0) {
+               g_atomic_int_or(&html_filter_found, 1U);
+               g_debug("%s: Balsa HTML filter WebKit extension found", __func__);
+               result = TRUE;
+       } else {
+               g_info("%s: unknown webkit extension message '%s'", __func__, 
webkit_user_message_get_name(message));
+               result = FALSE;
+       }
+       return result;
+}
+
+
 static WebKitWebContext *
 lbh_get_web_view_context(void)
 {
@@ -596,16 +636,52 @@ lbh_get_web_view_context(void)
 
        if (g_once_init_enter(&context)) {
                WebKitWebsiteDataManager *data_manager;
+               WebKitCookieManager *cookie_manager;
                WebKitWebContext *tmp;
+               WebKitWebView *view;
                gchar *cache_dir;
 
                cache_dir = g_build_filename(g_get_home_dir(), ".balsa", CACHE_DIR, NULL);
                data_manager = webkit_website_data_manager_new("base-cache-directory", cache_dir, NULL);
                g_free(cache_dir);
+               webkit_website_data_manager_set_tls_errors_policy(data_manager, 
WEBKIT_TLS_ERRORS_POLICY_FAIL);
+               cookie_manager = webkit_website_data_manager_get_cookie_manager(data_manager);
+               webkit_cookie_manager_set_accept_policy(cookie_manager, WEBKIT_COOKIE_POLICY_ACCEPT_NEVER);
                tmp = webkit_web_context_new_with_website_data_manager(data_manager);
-               webkit_web_context_set_cache_model(tmp, WEBKIT_CACHE_MODEL_DOCUMENT_BROWSER);
+#ifdef WEB_EXT_CHECK_BUILDDIR
+               g_debug("%s: WEB_EXT_CHECK_BUILDDIR is defined, check for '%s'", __func__, 
BALSA_WEB_EXT_DEVEL "/libhtmlfilter.so");
+               if (g_access(BALSA_WEB_EXT_DEVEL "/libhtmlfilter.so", R_OK) == 0) {
+                       g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXT_DEVEL);
+                       webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXT_DEVEL);
+               } else {
+                       g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXTENSIONS);
+                       webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXTENSIONS);
+               }
+#else
+               g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXTENSIONS);
+               webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXTENSIONS);
+#endif
+               g_signal_connect(tmp, "user-message-received", G_CALLBACK(lbh_web_extension_cb), NULL);
+               webkit_web_context_set_cache_model(tmp, WEBKIT_CACHE_MODEL_WEB_BROWSER);
                webkit_web_context_register_uri_scheme(tmp, "cid", lbh_cid_cb, NULL, NULL);
                g_debug("%s: registered “cid:” scheme", __func__);
+
+               /* create a dummy view to trigger loading the html filter extension */
+               view = WEBKIT_WEB_VIEW(webkit_web_view_new_with_context(tmp));
+               webkit_web_view_load_uri(view, "about:blank");
+               while (webkit_web_view_is_loading(view)) {
+                       gtk_main_iteration_do(FALSE);
+                       g_usleep(100);
+               }
+               g_object_ref_sink(view);
+               g_object_unref(view);
+               if (g_atomic_int_get(&html_filter_found) != 0) {
+                       g_debug("%s: Balsa HTML filter available", __func__);
+               } else {
+                       libbalsa_information(LIBBALSA_INFORMATION_WARNING,
+                               _("Balsa's external HTML resources filter web extension is not available in 
the folder “%s”, "
+                                 "falling back to simplified image filtering. Please check your 
installation. "), BALSA_WEB_EXTENSIONS);
+               }
                g_once_init_leave(&context, tmp);
        }
        return context;
@@ -633,7 +709,9 @@ lbh_web_view_new(LibBalsaWebKitInfo *info,
     webkit_settings_set_enable_javascript(settings, FALSE);
        webkit_settings_set_enable_java(settings, FALSE);
        webkit_settings_set_enable_hyperlink_auditing(settings, TRUE);
-       webkit_settings_set_auto_load_images(settings, auto_load_images);
+       webkit_settings_set_auto_load_images(settings,
+               auto_load_images || (g_atomic_int_get(&html_filter_found) != 0));
+       lbh_load_external_resources(view, auto_load_images);
 
        g_signal_connect(view, "web-process-terminated", G_CALLBACK(lbh_web_process_terminated_cb), info);
     g_signal_connect(view, "decide-policy", G_CALLBACK(lbh_decide_policy_cb), info);
diff --git a/libbalsa/meson.build b/libbalsa/meson.build
index 8abc1824b..c4945695e 100644
--- a/libbalsa/meson.build
+++ b/libbalsa/meson.build
@@ -145,4 +145,11 @@ libbalsa_a = static_library('balsa', libbalsa_a_sources,
                                                    libimap_include],
                             install             : false)
 
+libhtmlfilter_la = shared_library('htmlfilter',
+                                  'html-filter.c',
+                                  dependencies        : balsa_deps,
+                                  include_directories : [top_include],
+                                  install             : true,
+                                  install_dir         : balsa_web_extensions)
+
 subdir('imap')
diff --git a/meson.build b/meson.build
index a84c775a7..601650cbd 100644
--- a/meson.build
+++ b/meson.build
@@ -168,6 +168,9 @@ libnetclient_deps = [glib_dep,
 if html_widget == 'webkit2'
   html_dep = dependency('webkit2gtk-4.0', version : '>= 2.28.0')
   htmlpref_dep = dependency('sqlite3', version : '>= 3.24.0')
+  balsa_web_extensions = join_paths(get_option('prefix'), get_option('libdir'), 'balsa')
+  add_project_arguments('-DBALSA_WEB_EXTENSIONS="' + balsa_web_extensions + '"', language : 'c')
+  add_project_arguments('-DBALSA_WEB_EXT_DEVEL="' + join_paths(meson.current_build_dir(), 'libbalsa') + '"', 
language : 'c')
 
   html2text = find_program('python-html2text',
                            'html2markdown',


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]