[epiphany] adblock: Re-add ad blocker using WebKitUserContentFilters
- From: Michael Catanzaro <mcatanzaro src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [epiphany] adblock: Re-add ad blocker using WebKitUserContentFilters
- Date: Mon, 5 Aug 2019 23:59:32 +0000 (UTC)
commit 3ecc9bf0247a52ebf4e3fa792d2580b715c4ac03
Author: Adrian Perez de Castro <aperez igalia com>
Date: Wed Jan 23 01:20:22 2019 +0200
adblock: Re-add ad blocker using WebKitUserContentFilters
embed/ephy-embed-shell.c | 17 +-
embed/ephy-filters-manager.c | 907 +++++++++++++++++++++++++++++++++++--------
embed/ephy-filters-manager.h | 2 +-
3 files changed, 750 insertions(+), 176 deletions(-)
---
diff --git a/embed/ephy-embed-shell.c b/embed/ephy-embed-shell.c
index 3667d3f66..5705a63c1 100644
--- a/embed/ephy-embed-shell.c
+++ b/embed/ephy-embed-shell.c
@@ -1090,12 +1090,6 @@ ephy_embed_shell_create_web_context (EphyEmbedShell *shell)
priv->web_context = webkit_web_context_new_with_website_data_manager (manager);
}
-static char *
-adblock_filters_dir (EphyEmbedShell *shell)
-{
- return g_build_filename (ephy_cache_dir (), "adblock", NULL);
-}
-
static void
download_started_cb (WebKitWebContext *web_context,
WebKitDownload *download,
@@ -1280,9 +1274,18 @@ ephy_embed_shell_startup (GApplication *application)
EPHY_PREFS_WEB_COOKIES_POLICY);
ephy_embed_prefs_set_cookie_accept_policy (cookie_manager, cookie_policy);
- filters_dir = adblock_filters_dir (shell);
+ filters_dir = g_build_filename (ephy_cache_dir (), "adblock", NULL);
priv->filters_manager = ephy_filters_manager_new (filters_dir);
+ g_signal_connect_object (priv->filters_manager, "filters-disabled",
+ G_CALLBACK (webkit_user_content_manager_remove_all_filters),
+ priv->user_content,
+ G_CONNECT_SWAPPED);
+ g_signal_connect_object (priv->filters_manager, "filter-ready",
+ G_CALLBACK (webkit_user_content_manager_add_filter),
+ priv->user_content,
+ G_CONNECT_SWAPPED);
+
g_signal_connect_object (priv->web_context, "download-started",
G_CALLBACK (download_started_cb), shell, 0);
}
diff --git a/embed/ephy-filters-manager.c b/embed/ephy-filters-manager.c
index 277a6b10d..31fe7a7ed 100644
--- a/embed/ephy-filters-manager.c
+++ b/embed/ephy-filters-manager.c
@@ -1,6 +1,6 @@
/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2; -*- */
/*
- * Copyright © 2017 Igalia S.L.
+ * Copyright © 2017, 2019 Igalia S.L.
*
* This file is part of Epiphany.
*
@@ -21,23 +21,38 @@
#include "config.h"
#include "ephy-filters-manager.h"
+#include "ephy-debug.h"
#include "ephy-download.h"
#include "ephy-prefs.h"
#include "ephy-settings.h"
#include <gio/gio.h>
+#include <inttypes.h>
+
#define ADBLOCK_FILTER_UPDATE_FREQUENCY 24 * 60 * 60 /* In seconds */
+#define ADBLOCK_FILTER_SIDECAR_FILE_SUFFIX ".filterinfo"
struct _EphyFiltersManager {
GObject parent_instance;
char *filters_dir;
+ GHashTable *filters; /* (identifier, FilterInfo) */
+ guint64 update_time;
GCancellable *cancellable;
+ WebKitUserContentFilterStore *store;
};
G_DEFINE_TYPE (EphyFiltersManager, ephy_filters_manager, G_TYPE_OBJECT)
+enum {
+ FILTER_READY,
+ FILTERS_DISABLED,
+ LAST_SIGNAL,
+};
+
+static guint s_signals[LAST_SIGNAL];
+
enum {
PROP_0,
PROP_FILTERS_DIR,
@@ -46,229 +61,760 @@ enum {
static GParamSpec *object_properties[N_PROPERTIES] = { NULL, };
+typedef struct {
+ EphyFiltersManager *manager;
+ char *identifier; /* Lazily derived from source_uri. */
+ char *source_uri; /* Saved. */
+ char *checksum; /* Saved. */
+ guint64 last_update; /* Saved, seconds. */
+
+ gboolean found : 1; /* WebKitUserContentFilter found during lookup. */
+ gboolean enabled : 1; /* The filter is already enabled. */
+ gboolean local : 1; /* The source_uri is a local file URI. */
+} FilterInfo;
+
+/* The "saved" fields from the struct above are stored as versioned sidecar
+ * metadata files, using GVariant for serialization. An integer indicating
+ * the version of the on-disk format is prepended to the data, and it must
+ * be increased by 1 in the source code whenever the GVariant format below
+ * changes.
+ */
+#define FILTER_INFO_VARIANT_VERSION ((uint32_t)1)
+#define FILTER_INFO_VARIANT_FORMAT "(usmst)"
+
+static void
+filter_info_free (FilterInfo *self)
+{
+ g_clear_weak_pointer (&self->manager);
+ g_clear_pointer (&self->identifier, g_free);
+ g_clear_pointer (&self->source_uri, g_free);
+ g_clear_pointer (&self->checksum, g_free);
+ g_free (self);
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC (FilterInfo, filter_info_free)
+
+static FilterInfo *
+filter_info_new (const char *source_uri,
+ EphyFiltersManager *manager)
+{
+ g_autoptr (FilterInfo) self = NULL;
+
+ g_assert (source_uri);
+ g_assert (manager);
+
+ self = g_new0 (FilterInfo, 1);
+ self->source_uri = g_strdup (source_uri);
+ g_set_weak_pointer (&self->manager, manager);
+ return g_steal_pointer (&self);
+}
+
static gboolean
-adblock_filter_file_is_valid (GFile *file)
-{
- GFileInfo *file_info;
- gboolean result = FALSE;
-
- /* Now check if the local file is too old. */
- file_info = g_file_query_info (file,
- G_FILE_ATTRIBUTE_TIME_MODIFIED ","G_FILE_ATTRIBUTE_STANDARD_SIZE,
- G_FILE_QUERY_INFO_NONE,
- NULL,
- NULL);
- if (file_info) {
- if (g_file_info_get_size (file_info) > 0) {
- GTimeVal current_time;
- GTimeVal mod_time;
-
- g_get_current_time (¤t_time);
- g_file_info_get_modification_time (file_info, &mod_time);
-
- if (current_time.tv_sec > mod_time.tv_sec) {
- gint64 expire_time = mod_time.tv_sec + ADBLOCK_FILTER_UPDATE_FREQUENCY;
-
- result = current_time.tv_sec < expire_time;
- }
- }
- g_object_unref (file_info);
+filter_info_load_from_bytes (FilterInfo *self,
+ GBytes *data,
+ GError **error)
+{
+ uint32_t saved_version = 0;
+ g_autofree char *source_uri = NULL;
+ g_autofree char *checksum = NULL;
+ guint64 last_update = 0;
+
+ g_autoptr (GVariantType) value_type = g_variant_type_new (FILTER_INFO_VARIANT_FORMAT);
+ g_autoptr (GVariant) value = g_variant_ref_sink (g_variant_new_from_bytes (value_type, data, TRUE));
+
+ if (!value) {
+ g_set_error_literal (error,
+ G_IO_ERROR,
+ G_IO_ERROR_INVALID_ARGUMENT,
+ "Cannot decode GVariant from bytes");
+ return FALSE;
+ }
+
+ g_variant_get_child (value, 0, "u", &saved_version);
+ if (saved_version != FILTER_INFO_VARIANT_VERSION) {
+ g_set_error (error,
+ G_IO_ERROR,
+ G_IO_ERROR_INVALID_DATA,
+ "Attempted to decode content filter data GVariant with"
+ " format version %" PRIu32 " (expected %" PRIu32 ")",
+ saved_version,
+ FILTER_INFO_VARIANT_VERSION);
+ return FALSE;
+ }
+
+ g_variant_get (value,
+ FILTER_INFO_VARIANT_FORMAT,
+ NULL, /* Ignore the version, it has been checked already. */
+ &source_uri,
+ &checksum,
+ &last_update);
+
+ if (strcmp (source_uri, self->source_uri) != 0) {
+ g_set_error (error,
+ G_IO_ERROR,
+ G_IO_ERROR_INVALID_DATA,
+ "Attempted to decode content filter data GVariant with"
+ " wrong filter URI <%s> (expected <%s>)",
+ source_uri,
+ self->source_uri);
+ return FALSE;
}
- return result;
+ /* All sanity checks passed. The "source_uri" member does not need to
+ * be updated in the struct because at this point it is known to be the
+ * same as in the sidecar metadata file, and the same applies to the
+ * "identifier" field.
+ */
+ g_clear_pointer (&self->checksum, g_free);
+ self->checksum = g_steal_pointer (&checksum);
+ self->last_update = last_update;
+
+ LOG ("Loaded metadata: uri=<%s>, identifier=%s, checksum=%s, last_update=%" PRIu64,
+ self->source_uri,
+ self->identifier,
+ self->checksum,
+ self->last_update);
+
+ return TRUE;
}
-typedef struct {
- EphyFiltersManager *manager;
- EphyDownload *download;
- char *source_uri;
-} AdblockFilterRetrieveData;
+static char *
+filter_info_identifier_for_source_uri (const char *source_uri)
+{
+ g_assert (source_uri);
+ return g_compute_checksum_for_string (G_CHECKSUM_SHA256, source_uri, -1);
+}
-static AdblockFilterRetrieveData *
-adblock_filter_retrieve_data_new (EphyFiltersManager *manager,
- EphyDownload *download,
- const char *source_uri)
+static const char *
+filter_info_get_identifier (FilterInfo *self)
{
- AdblockFilterRetrieveData *data;
- data = g_new (AdblockFilterRetrieveData, 1);
- data->manager = g_object_ref (manager);
- data->download = g_object_ref (download);
- data->source_uri = g_strdup (source_uri);
- return data;
+ g_assert (self);
+ if (!self->identifier)
+ self->identifier = filter_info_identifier_for_source_uri (self->source_uri);
+ return self->identifier;
+}
+
+static GFile *
+filter_info_get_sidecar_file (FilterInfo *self)
+{
+ const char *filters_dir = ephy_filters_manager_get_adblock_filters_dir (self->manager);
+ g_autofree char *sidecar_filename = g_strconcat (filter_info_get_identifier (self),
+ ADBLOCK_FILTER_SIDECAR_FILE_SUFFIX,
+ NULL);
+ return g_file_new_build_filename (filters_dir, sidecar_filename, NULL);
}
static void
-adblock_filter_retrieve_data_free (AdblockFilterRetrieveData *data)
+sidecar_bytes_loaded_cb (GFile *file,
+ GAsyncResult *result,
+ GTask *task)
{
- g_object_unref (data->manager);
- g_object_unref (data->download);
- g_free (data->source_uri);
- g_free (data);
+ GError *error = NULL;
+ FilterInfo *self = g_task_get_task_data (task);
+ g_autoptr (GBytes) data = g_file_load_bytes_finish (file,
+ result,
+ NULL, /* etag_out */
+ &error);
+ if (data && filter_info_load_from_bytes (self, data, &error)) {
+ g_task_return_boolean (task, TRUE);
+ } else {
+ g_task_return_error (task, error);
+ }
}
static void
-download_completed_cb (EphyDownload *download,
- AdblockFilterRetrieveData *data)
+filter_info_load_sidecar (FilterInfo *self,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ void *user_data)
+{
+ g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (self);
+ g_autofree char *sidecar_file_path = g_file_get_path (sidecar_file);
+ GFileType file_type = g_file_query_file_type (sidecar_file,
+ G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+ NULL);
+ if (file_type != G_FILE_TYPE_REGULAR) {
+ int error_code;
+ const char *message;
+
+ if (file_type == G_FILE_TYPE_UNKNOWN) {
+ error_code = G_IO_ERROR_NOT_FOUND;
+ message = "File not found";
+ } else {
+ error_code = G_IO_ERROR_NOT_REGULAR_FILE;
+ message = "Not a regular file";
+ }
+ g_task_report_new_error (NULL,
+ callback,
+ user_data,
+ filter_info_load_sidecar,
+ G_IO_ERROR,
+ error_code,
+ "%s: %s",
+ sidecar_file_path,
+ message);
+ } else {
+ GTask *task = g_task_new (NULL,
+ cancellable,
+ callback,
+ user_data);
+ g_autofree char *task_name = g_strconcat ("load sidecar file: ",
+ sidecar_file_path,
+ NULL);
+ /* The FilterInfo itself as used async task data: it already contains
+ * all the bits of information needed by the completion callback.
+ */
+ g_task_set_task_data (task, self, NULL);
+ g_task_set_name (task, task_name);
+ g_file_load_bytes_async (sidecar_file,
+ g_task_get_cancellable (task),
+ (GAsyncReadyCallback)sidecar_bytes_loaded_cb,
+ task);
+ }
+}
+
+static gboolean
+filter_info_load_sidecar_finish (GAsyncResult *result,
+ GError **error)
{
- g_signal_handlers_disconnect_by_data (download, data);
- adblock_filter_retrieve_data_free (data);
+ return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+static GBytes *
+filter_info_get_data_as_bytes (FilterInfo *self)
+{
+ g_autoptr (GVariant) value = g_variant_ref_sink (g_variant_new (FILTER_INFO_VARIANT_FORMAT,
+ FILTER_INFO_VARIANT_VERSION,
+ self->source_uri,
+ self->checksum,
+ self->last_update));
+ return g_variant_get_data_as_bytes (value);
}
static void
-download_error_cb (EphyDownload *download,
- GError *error,
- AdblockFilterRetrieveData *data)
+sidecar_contents_replaced_cb (GFile *file,
+ GAsyncResult *result,
+ GTask *task)
{
- GFileOutputStream *stream;
- GFile *file;
+ GError *error = NULL;
+ if (g_file_replace_contents_finish (file,
+ result,
+ NULL, /* new_etag */
+ &error)) {
+ g_task_return_boolean (task, TRUE);
+ } else {
+ g_task_return_error (task, error);
+ }
+}
- /* Create an empty file if it doesn't exist to unblock extensions */
- file = g_file_new_for_uri (ephy_download_get_destination_uri (download));
- stream = g_file_create (file, G_FILE_CREATE_NONE, NULL, NULL);
- if (stream)
- g_object_unref (stream);
- g_object_unref (file);
+static void
+filter_info_save_sidecar (FilterInfo *self,
+ GCancellable *cancellable,
+ GAsyncReadyCallback callback,
+ void *user_data)
+{
+ g_autoptr (GBytes) data = filter_info_get_data_as_bytes (self);
+ g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (self);
+ g_autofree char *sidecar_file_path = g_file_get_path (sidecar_file);
+ g_autofree char *task_name = g_strconcat ("save sidecar file: ",
+ sidecar_file_path,
+ NULL);
+ GTask *task = g_task_new (NULL, cancellable, callback, user_data);
+ g_task_set_name (task, task_name);
+
+ LOG ("Saving metadata: uri=<%s>, identifier=%s, checksum=%s, last_update=%" PRIu64,
+ self->source_uri,
+ self->identifier,
+ self->checksum,
+ self->last_update);
+
+ g_file_replace_contents_bytes_async (sidecar_file,
+ data,
+ NULL, /* etag */
+ FALSE, /* make_backup */
+ G_FILE_CREATE_PRIVATE | G_FILE_CREATE_REPLACE_DESTINATION,
+ g_task_get_cancellable (task),
+ (GAsyncReadyCallback)sidecar_contents_replaced_cb,
+ task);
+}
- if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
- g_warning ("Error retrieving filter %s: %s\n", data->source_uri, error->message);
+static gboolean
+filter_info_save_sidecar_finish (GAsyncResult *result,
+ GError **error)
+{
+ return g_task_propagate_boolean (G_TASK (result), error);
+}
- g_signal_handlers_disconnect_by_data (download, data);
- adblock_filter_retrieve_data_free (data);
+static GFile *
+filter_info_get_source_file (FilterInfo *self)
+{
+ g_autofree char *filename = g_strconcat (filter_info_get_identifier (self), ".json", NULL);
+ const char *filters_dir = ephy_filters_manager_get_adblock_filters_dir (self->manager);
+ return g_file_new_build_filename (filters_dir, filename, NULL);
}
static void
-start_retrieving_filter_file (EphyFiltersManager *manager,
- const char *filter_url,
- GFile *destination)
+filter_info_setup_enable_compiled_filter (FilterInfo *self,
+ WebKitUserContentFilter *wk_filter)
{
- EphyDownload *download;
- WebKitDownload *wk_download;
- AdblockFilterRetrieveData *data;
- char *path;
+ g_assert (self);
+ g_assert (wk_filter);
- download = ephy_download_new_for_uri_internal (filter_url);
- path = g_file_get_uri (destination);
- ephy_download_set_destination_uri (download, path);
- ephy_download_disable_desktop_notification (download);
- g_free (path);
+ LOG ("Emitting EphyFiltersManager::filter-ready for %s.", filter_info_get_identifier (self));
+ g_signal_emit (self->manager, s_signals[FILTER_READY], 0, wk_filter);
+ self->enabled = TRUE;
+}
+
+static gboolean
+filter_info_needs_updating_from_source (const FilterInfo *self)
+{
+ g_assert (self);
+
+ if (!self->manager)
+ return FALSE;
+
+ /* For local files, check whether their modification time is newer
+ * than the last update time saved for it.
+ */
+ if (self->local) {
+ GTimeVal modification_time = { .tv_sec = 0 };
+ g_autoptr (GError) error = NULL;
+ g_autoptr (GFile) source_file = g_file_new_for_uri (self->source_uri);
+ g_autoptr (GFileInfo) info = g_file_query_info (source_file,
+ G_FILE_ATTRIBUTE_TIME_MODIFIED,
+ G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+ NULL,
+ &error);
+ if (!info) {
+ g_warning ("Cannot get file modification time: %s", error->message);
+ return TRUE;
+ }
- wk_download = ephy_download_get_webkit_download (download);
- webkit_download_set_allow_overwrite (wk_download, TRUE);
+ g_file_info_get_modification_time (info, &modification_time);
+ return (modification_time.tv_sec > 0) && ((gulong)modification_time.tv_sec > self->last_update);
+ }
- data = adblock_filter_retrieve_data_new (manager, download, filter_url);
+ /* For remote filters, check the time elapsed since the last fetch. */
+ return (self->manager->update_time - self->last_update) >= ADBLOCK_FILTER_UPDATE_FREQUENCY;
+}
- g_signal_connect (download, "completed",
- G_CALLBACK (download_completed_cb), data);
- g_signal_connect (download, "error",
- G_CALLBACK (download_error_cb), data);
- g_object_unref (download);
+static void
+file_removed_cb (GFile *file,
+ GAsyncResult *result,
+ void *user_data)
+{
+ g_autoptr (GError) error = NULL;
+
+ g_assert (G_IS_FILE (file));
+ g_assert (result);
+
+ if (!g_file_delete_finish (file, result, &error) &&
+ !g_error_matches (error, G_FILE_ERROR, G_FILE_ERROR_NOENT) &&
+ !g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+ g_autofree char *file_path = g_file_get_path (file);
+ g_warning ("Cannot delete '%s': %s", file_path, error->message);
+ }
}
static void
-remove_old_adblock_filters (EphyFiltersManager *manager,
- GList *current_files)
-{
- GFile *file;
- GFile *filters_dir;
- GFileEnumerator *enumerator;
- gboolean current_filter;
- char *path;
- GError *error = NULL;
+sidecar_saved_cb (GObject *source_object,
+ GAsyncResult *result,
+ FilterInfo *self)
+{
+ g_autoptr (GError) error = NULL;
+ if (filter_info_save_sidecar_finish (result, &error)) {
+ LOG ("Sidecar successfully saved for filter %s.",
+ filter_info_get_identifier (self));
+ } else {
+ g_warning ("Cannot save sidecar for filter %s: %s",
+ filter_info_get_identifier (self),
+ error->message);
+ }
+}
- filters_dir = g_file_new_for_path (manager->filters_dir);
- enumerator = g_file_enumerate_children (filters_dir,
- G_FILE_ATTRIBUTE_STANDARD_NAME,
- G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
- NULL,
- &error);
- if (error != NULL) {
- g_warning ("Failed to enumerate children of %s: %s", manager->filters_dir, error->message);
- g_error_free (error);
- g_object_unref (filters_dir);
+static void
+filter_saved_cb (WebKitUserContentFilterStore *store,
+ GAsyncResult *result,
+ FilterInfo *self)
+{
+ g_autoptr (GError) error = NULL;
+ g_autoptr (WebKitUserContentFilter) wk_filter = NULL;
+
+ if (!self->manager)
return;
+
+ g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+ g_assert (result);
+ g_assert (self);
+ g_assert (self->manager->store == store);
+
+ wk_filter = webkit_user_content_filter_store_save_finish (self->manager->store,
+ result,
+ &error);
+ if (wk_filter) {
+ LOG ("Filter %s compiled successfully.", filter_info_get_identifier (self));
+ filter_info_setup_enable_compiled_filter (self, wk_filter);
+ filter_info_save_sidecar (self,
+ self->manager->cancellable,
+ (GAsyncReadyCallback)sidecar_saved_cb,
+ self);
+ } else if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+ g_warning ("Filter %s <%s> cannot be compiled: %s.",
+ filter_info_get_identifier (self), self->source_uri,
+ error->message);
}
+}
- /* For each file in the adblock directory, check if it is a currently-enabled
- * and remove it if not, since filter files can be quite large. */
- for (;;) {
- g_file_enumerator_iterate (enumerator, NULL, &file, NULL, &error);
- if (error != NULL) {
- g_warning ("Failed to iterate file enumerator for %s: %s", manager->filters_dir, error->message);
- g_clear_error (&error);
- continue;
- }
+static void
+filter_info_setup_load_file (FilterInfo *self,
+ GFile *json_file)
+{
+ g_autofree char *old_checksum = NULL;
+ g_autofree char *json_file_path = NULL;
+ g_autoptr (GMappedFile) file_map = NULL;
+ g_autoptr (GBytes) json_data = NULL;
+ g_autoptr (GError) error = NULL;
- /* Success: no more files left to iterate. */
- if (file == NULL)
- break;
+ g_assert (self);
+ g_assert (G_IS_FILE (json_file));
- current_filter = FALSE;
- for (GList *l = current_files; l != NULL; l = l->next) {
- if (g_file_equal (l->data, file)) {
- current_filter = TRUE;
- break;
- }
- }
+ if (!self->manager)
+ return;
- if (!current_filter) {
- g_file_delete (file, NULL, &error);
- if (error != NULL) {
- path = g_file_get_path (file);
- g_warning ("Failed to remove %s: %s", path, error->message);
- g_free (path);
- g_clear_error (&error);
- }
- }
+ /* Some filter source JSON files can be big (tens of megabytes), so instead
+ * of reading the data for compilation, just map the source file in memory.
+ */
+ json_file_path = g_file_get_path (json_file);
+ file_map = g_mapped_file_new (json_file_path,
+ FALSE, /* writable */
+ &error);
+
+ /* Immediately unlink a fetched file after it has been mapped. */
+ if (!self->local) {
+ LOG ("Unlinking fetched JSON file: %s", json_file_path);
+ g_file_delete_async (json_file,
+ G_PRIORITY_LOW,
+ self->manager->cancellable,
+ (GAsyncReadyCallback)file_removed_cb,
+ NULL);
+ }
+
+ if (!file_map) {
+ g_warning ("Cannot map filter %s source file %s: %s",
+ filter_info_get_identifier (self),
+ json_file_path, error->message);
+ return;
+ }
+
+ json_data = g_mapped_file_get_bytes (file_map);
+ old_checksum = g_steal_pointer (&self->checksum);
+ self->checksum = g_compute_checksum_for_bytes (G_CHECKSUM_SHA256, json_data);
+ self->last_update = self->manager->update_time;
+
+ if (!filter_info_needs_updating_from_source (self) && self->found &&
+ old_checksum && strcmp (self->checksum, old_checksum) == 0) {
+ /* Even if an update is not needed, the sidecar needs to be updated. */
+ filter_info_save_sidecar (self,
+ self->manager->cancellable,
+ (GAsyncReadyCallback)sidecar_saved_cb,
+ self);
+ LOG ("Filter %s not stale, source checksum unchanged (%s), recompilation skipped.",
+ filter_info_get_identifier (self), self->checksum);
+ } else {
+ webkit_user_content_filter_store_save (self->manager->store,
+ filter_info_get_identifier (self),
+ json_data,
+ self->manager->cancellable,
+ (GAsyncReadyCallback)filter_saved_cb,
+ self);
+ }
+}
+
+static void
+download_completed_cb (EphyDownload *download,
+ FilterInfo *self)
+{
+ g_assert (download);
+ g_assert (self);
+
+ g_signal_handlers_disconnect_by_data (download, self);
+
+ LOG ("Filter source %s fetched from <%s>", filter_info_get_identifier (self), self->source_uri);
+
+ if (g_strcmp0 ("application/json", ephy_download_get_content_type (download)) == 0) {
+ g_autoptr (GFile) json_file = g_file_new_for_uri (ephy_download_get_destination_uri (download));
+ filter_info_setup_load_file (self, json_file);
+ } else {
+ g_warning ("Filter source %s has invalid MIME type: %s",
+ ephy_download_get_destination_uri (download),
+ ephy_download_get_content_type (download));
}
- g_object_unref (filters_dir);
- g_object_unref (enumerator);
+ g_object_unref (download);
+}
+
+static void
+download_errored_cb (EphyDownload *download,
+ GError *error,
+ FilterInfo *self)
+{
+ g_assert (download);
+ g_assert (error);
+ g_assert (self);
+
+ g_signal_handlers_disconnect_by_data (download, self);
+
+ if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
+ g_warning ("Cannot fetch source for filter %s from <%s>",
+ filter_info_get_identifier (self), self->source_uri);
+
+ /* There is not much else we can do if the download failed. Note that it
+ * is still possible that if a precompiled version of the filter was found
+ * that may get used instead.
+ */
+ LOG ("Done fetching filter %s", filter_info_get_identifier (self));
+
+ g_object_unref (download);
}
static void
-update_adblock_filter_files (EphyFiltersManager *manager)
+filter_load_cb (WebKitUserContentFilterStore *store,
+ GAsyncResult *result,
+ FilterInfo *self)
{
- char **filters;
- GList *files = NULL;
+ g_autoptr (GError) error = NULL;
+ g_autoptr (WebKitUserContentFilter) wk_filter = NULL;
+ g_autoptr (GFile) source_file = NULL;
+ g_autoptr (GFile) json_file = NULL;
+ g_autofree char *json_file_uri = NULL;
+ EphyDownload *download;
- if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_ADBLOCK))
+ if (!self->manager)
return;
- /* Only once at a time please! Newest set of filters wins. */
- g_cancellable_cancel (manager->cancellable);
- g_object_unref (manager->cancellable);
- manager->cancellable = g_cancellable_new ();
+ g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+ g_assert (result);
+ g_assert (self);
+ g_assert (store == self->manager->store);
+
+ wk_filter = webkit_user_content_filter_store_load_finish (self->manager->store,
+ result,
+ &error);
+ self->found = (wk_filter != NULL);
+
+ if (wk_filter) {
+ LOG ("Found compiled filter %s.", filter_info_get_identifier (self));
+ filter_info_setup_enable_compiled_filter (self, wk_filter);
+ LOG ("Update %sneeded for filter %s (last %" PRIu64 "s ago, interval %us)",
+ filter_info_needs_updating_from_source (self) ? "" : "not ",
+ filter_info_get_identifier (self),
+ (self->manager->update_time - self->last_update),
+ ADBLOCK_FILTER_UPDATE_FREQUENCY);
+ } else if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+ return;
+ } else if (g_error_matches (error,
+ WEBKIT_USER_CONTENT_FILTER_ERROR,
+ WEBKIT_USER_CONTENT_FILTER_ERROR_NOT_FOUND)) {
+ LOG ("Compiled filter %s not found, needs fetching.",
+ filter_info_get_identifier (self));
+ } else {
+ g_warning ("Lookup failed for compiled filter %s: %s.",
+ filter_info_get_identifier (self),
+ error->message);
+ }
- filters = g_settings_get_strv (EPHY_SETTINGS_MAIN, EPHY_PREFS_ADBLOCK_FILTERS);
- for (guint i = 0; filters[i]; i++) {
- GFile *filter_file;
+ if (!filter_info_needs_updating_from_source (self))
+ return;
- filter_file = ephy_uri_tester_get_adblock_filter_file (manager->filters_dir, filters[i]);
- if (!adblock_filter_file_is_valid (filter_file))
- start_retrieving_filter_file (manager, filters[i], filter_file);
- files = g_list_prepend (files, filter_file);
+ /* Even if a compiled filter was found, we may need to compile an updated
+ * version if the local file has changed, or the contents of remote URIs
+ * have changed. If an updated ruleset is available, it will replace the
+ * precompiled version found above (if any) once it has been compiled.
+ */
+ LOG ("Loading filter %s from <%s>", filter_info_get_identifier (self), self->source_uri);
+
+ /* Skip fetching local file:// URIs; load them directly. */
+ source_file = g_file_new_for_uri (self->source_uri);
+ if ((self->local = g_file_is_native (source_file))) {
+ filter_info_setup_load_file (self, source_file);
+ return;
}
- remove_old_adblock_filters (manager, files);
+ /* Download non-local URIs. */
+ download = ephy_download_new_for_uri_internal (self->source_uri);
+
+ json_file = filter_info_get_source_file (self);
+ json_file_uri = g_file_get_uri (json_file);
+ ephy_download_set_destination_uri (download, json_file_uri);
+ ephy_download_disable_desktop_notification (download);
+ webkit_download_set_allow_overwrite (ephy_download_get_webkit_download (download), TRUE);
+
+ g_signal_connect (download, "completed",
+ G_CALLBACK (download_completed_cb), self);
+ g_signal_connect (download, "error",
+ G_CALLBACK (download_errored_cb), self);
+}
+
+static void
+filter_info_setup_start (FilterInfo *self)
+{
+ g_assert (self);
+
+ if (!self->manager)
+ return;
+
+ LOG ("Setup started for <%s> id=%s", self->source_uri, filter_info_get_identifier (self));
- g_strfreev (filters);
- g_list_free_full (files, g_object_unref);
+ webkit_user_content_filter_store_load (self->manager->store,
+ filter_info_get_identifier (self),
+ self->manager->cancellable,
+ (GAsyncReadyCallback)filter_load_cb,
+ self);
}
static void
-adblock_filters_changed_cb (GSettings *settings,
- char *key,
- EphyFiltersManager *manager)
+filter_removed_cb (WebKitUserContentFilterStore *store,
+ GAsyncResult *result,
+ void *user_data)
{
- update_adblock_filter_files (manager);
+ g_autoptr (GError) error = NULL;
+
+ g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+ g_assert (result);
+
+ if (!webkit_user_content_filter_store_remove_finish (store,
+ result,
+ &error) &&
+ !g_error_matches (error,
+ WEBKIT_USER_CONTENT_FILTER_ERROR,
+ WEBKIT_USER_CONTENT_FILTER_ERROR_NOT_FOUND)) {
+ g_warning ("Cannot remove compiled filter: %s", error->message);
+ }
}
static void
-enable_adblock_changed_cb (GSettings *settings,
- char *key,
- EphyFiltersManager *manager)
+remove_unused_filter (const char *identifier,
+ FilterInfo *filter)
+{
+ g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (filter);
+
+ g_assert (strcmp (identifier, filter_info_get_identifier (filter)) == 0);
+ g_assert (!g_hash_table_contains (filter->manager->filters, identifier));
+
+ g_file_delete_async (sidecar_file,
+ G_PRIORITY_LOW,
+ filter->manager->cancellable,
+ (GAsyncReadyCallback)file_removed_cb,
+ NULL);
+ webkit_user_content_filter_store_remove (filter->manager->store,
+ identifier,
+ filter->manager->cancellable,
+ (GAsyncReadyCallback)filter_removed_cb,
+ NULL);
+ LOG ("Filter %s removal scheduled scheduled.", identifier);
+}
+
+void
+sidecar_loaded_cb (GObject *source_object,
+ GAsyncResult *result,
+ FilterInfo *self)
{
- update_adblock_filter_files (manager);
+ g_autoptr (GError) error = NULL;
+ if (!filter_info_load_sidecar_finish (result, &error)) {
+ if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
+ return;
+
+ if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) {
+ LOG ("Sidecar missing for filter %s: %s",
+ filter_info_get_identifier (self),
+ error->message);
+ } else {
+ g_warning ("Cannot load sidecar file for filter %s: %s",
+ filter_info_get_identifier (self),
+ error->message);
+ }
+ }
+ filter_info_setup_start (self);
+}
+
+static void
+update_adblock_filter_files_cb (GSettings *settings,
+ char *key,
+ EphyFiltersManager *manager)
+{
+ const gint64 update_time = g_get_real_time () / G_USEC_PER_SEC;
+ g_autoptr (GHashTable) old_filters = NULL;
+ g_auto (GStrv) uris = NULL;
+
+ g_assert (update_time >= 0);
+ g_assert (manager);
+
+ if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_ADBLOCK)) {
+ LOG ("Filters are disabled, skipping update.");
+ g_signal_emit (manager, s_signals[FILTERS_DISABLED], 0);
+ return;
+ }
+
+ LOG ("Emitting EphyFiltersManager::filters-disabled.");
+ g_signal_emit (manager, s_signals[FILTERS_DISABLED], 0);
+
+ /* Only once at a time please! Newest set of filters wins. */
+ g_cancellable_cancel (manager->cancellable);
+ g_object_unref (manager->cancellable);
+ manager->cancellable = g_cancellable_new ();
+ manager->update_time = update_time;
+
+ old_filters = g_steal_pointer (&manager->filters);
+ manager->filters = g_hash_table_new_full (g_str_hash,
+ g_str_equal,
+ NULL,
+ (GDestroyNotify)filter_info_free);
+
+ uris = g_settings_get_strv (EPHY_SETTINGS_MAIN,
+ EPHY_PREFS_ADBLOCK_FILTERS);
+ for (unsigned i = 0; uris[i]; i++) {
+ g_autofree char *filter_id = filter_info_identifier_for_source_uri (uris[i]);
+ FilterInfo *filter_info = NULL;
+ char *old_filter_id = NULL;
+
+ /* Check whether there was already a FilterInfo for the URI in the old
+ * filters table, and reuse it instead of creating a new one and reloading
+ * the sidecar file from disk.
+ *
+ * Note that the value is stolen from the old hash table in order to
+ * look it up and remove it from the old table *without* destroying it.
+ */
+ if (g_hash_table_steal_extended (old_filters,
+ filter_id,
+ (void **)&old_filter_id,
+ (void **)&filter_info)) {
+ g_assert (strcmp (old_filter_id, filter_id) == 0);
+ g_assert (strcmp (old_filter_id, filter_info_get_identifier (filter_info)) == 0);
+
+ LOG ("Filter %s in old set, stolen and starting setup.", filter_id);
+ filter_info_setup_start (filter_info);
+ } else {
+ /* Filter was not present in the old hash table: create a FilterInfo
+ * for the URI and start by loading its sidecar file.
+ */
+ LOG ("Filter %s not in old set, creating anew.", filter_id);
+ filter_info = filter_info_new (uris[i], manager);
+ filter_info->identifier = g_steal_pointer (&filter_id);
+ filter_info_load_sidecar (filter_info,
+ manager->cancellable,
+ (GAsyncReadyCallback)sidecar_loaded_cb,
+ filter_info);
+ }
+
+ g_hash_table_replace (manager->filters,
+ (void *)filter_info_get_identifier (filter_info),
+ filter_info);
+ }
+
+ /* Remove the filters which are no longer in the configured set. */
+ g_hash_table_foreach (old_filters,
+ (GHFunc)remove_unused_filter,
+ NULL);
}
static void
@@ -280,6 +826,7 @@ ephy_filters_manager_dispose (GObject *object)
g_cancellable_cancel (manager->cancellable);
g_clear_object (&manager->cancellable);
}
+ g_clear_object (&manager->store);
G_OBJECT_CLASS (ephy_filters_manager_parent_class)->dispose (object);
}
@@ -289,6 +836,7 @@ ephy_filters_manager_finalize (GObject *object)
{
EphyFiltersManager *manager = EPHY_FILTERS_MANAGER (object);
+ g_clear_pointer (&manager->filters, g_hash_table_unref);
g_free (manager->filters_dir);
G_OBJECT_CLASS (ephy_filters_manager_parent_class)->finalize (object);
@@ -298,17 +846,21 @@ static void
ephy_filters_manager_constructed (GObject *object)
{
EphyFiltersManager *manager = EPHY_FILTERS_MANAGER (object);
+ g_autofree char *saved_filters_dir = NULL;
G_OBJECT_CLASS (ephy_filters_manager_parent_class)->constructed (object);
+ saved_filters_dir = g_build_filename (manager->filters_dir, "compiled", NULL);
+ g_mkdir_with_parents (saved_filters_dir, 0700);
+ manager->store = webkit_user_content_filter_store_new (saved_filters_dir);
+
/* Note: up here because we must connect *before* reading the settings. */
- g_signal_connect (EPHY_SETTINGS_MAIN, "changed::" EPHY_PREFS_ADBLOCK_FILTERS,
- G_CALLBACK (adblock_filters_changed_cb), manager);
- g_signal_connect (EPHY_SETTINGS_WEB, "changed::" EPHY_PREFS_WEB_ENABLE_ADBLOCK,
- G_CALLBACK (enable_adblock_changed_cb), manager);
+ g_signal_connect_object (EPHY_SETTINGS_MAIN, "changed::" EPHY_PREFS_ADBLOCK_FILTERS,
+ G_CALLBACK (update_adblock_filter_files_cb), manager, 0);
+ g_signal_connect_object (EPHY_SETTINGS_WEB, "changed::" EPHY_PREFS_WEB_ENABLE_ADBLOCK,
+ G_CALLBACK (update_adblock_filter_files_cb), manager, 0);
- g_mkdir_with_parents (manager->filters_dir, 0700);
- update_adblock_filter_files (manager);
+ update_adblock_filter_files_cb (NULL, NULL, manager);
}
static void
@@ -356,6 +908,21 @@ ephy_filters_manager_class_init (EphyFiltersManagerClass *klass)
object_class->set_property = ephy_filters_manager_set_property;
object_class->get_property = ephy_filters_manager_get_property;
+ s_signals[FILTER_READY] =
+ g_signal_new ("filter-ready",
+ G_OBJECT_CLASS_TYPE (klass),
+ G_SIGNAL_RUN_FIRST,
+ 0, NULL, NULL, NULL,
+ G_TYPE_NONE, 1,
+ WEBKIT_TYPE_USER_CONTENT_FILTER);
+
+ s_signals[FILTERS_DISABLED] =
+ g_signal_new ("filters-disabled",
+ G_OBJECT_CLASS_TYPE (klass),
+ G_SIGNAL_RUN_FIRST,
+ 0, NULL, NULL, NULL,
+ G_TYPE_NONE, 0);
+
object_properties[PROP_FILTERS_DIR] =
g_param_spec_string ("filters-dir",
"Filters directory",
@@ -372,6 +939,10 @@ static void
ephy_filters_manager_init (EphyFiltersManager *manager)
{
manager->cancellable = g_cancellable_new ();
+ manager->filters = g_hash_table_new_full (g_str_hash,
+ g_str_equal,
+ NULL,
+ (GDestroyNotify)filter_info_free);
}
EphyFiltersManager *
diff --git a/embed/ephy-filters-manager.h b/embed/ephy-filters-manager.h
index bd444f08d..7fbb66b98 100644
--- a/embed/ephy-filters-manager.h
+++ b/embed/ephy-filters-manager.h
@@ -31,7 +31,7 @@ G_BEGIN_DECLS
G_DECLARE_FINAL_TYPE (EphyFiltersManager, ephy_filters_manager, EPHY, FILTERS_MANAGER, GObject)
-EphyFiltersManager *ephy_filters_manager_new (const char *adblock_filters_dir);
+EphyFiltersManager *ephy_filters_manager_new (const char *adblock_filters_dir);
const char *ephy_filters_manager_get_adblock_filters_dir (EphyFiltersManager *manager);
G_END_DECLS
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]