[epiphany] adblock: Re-add ad blocker using WebKitUserContentFilters



commit 3ecc9bf0247a52ebf4e3fa792d2580b715c4ac03
Author: Adrian Perez de Castro <aperez igalia com>
Date:   Wed Jan 23 01:20:22 2019 +0200

    adblock: Re-add ad blocker using WebKitUserContentFilters

 embed/ephy-embed-shell.c     |  17 +-
 embed/ephy-filters-manager.c | 907 +++++++++++++++++++++++++++++++++++--------
 embed/ephy-filters-manager.h |   2 +-
 3 files changed, 750 insertions(+), 176 deletions(-)
---
diff --git a/embed/ephy-embed-shell.c b/embed/ephy-embed-shell.c
index 3667d3f66..5705a63c1 100644
--- a/embed/ephy-embed-shell.c
+++ b/embed/ephy-embed-shell.c
@@ -1090,12 +1090,6 @@ ephy_embed_shell_create_web_context (EphyEmbedShell *shell)
   priv->web_context = webkit_web_context_new_with_website_data_manager (manager);
 }
 
-static char *
-adblock_filters_dir (EphyEmbedShell *shell)
-{
-  return g_build_filename (ephy_cache_dir (), "adblock", NULL);
-}
-
 static void
 download_started_cb (WebKitWebContext *web_context,
                      WebKitDownload   *download,
@@ -1280,9 +1274,18 @@ ephy_embed_shell_startup (GApplication *application)
                                          EPHY_PREFS_WEB_COOKIES_POLICY);
   ephy_embed_prefs_set_cookie_accept_policy (cookie_manager, cookie_policy);
 
-  filters_dir = adblock_filters_dir (shell);
+  filters_dir = g_build_filename (ephy_cache_dir (), "adblock", NULL);
   priv->filters_manager = ephy_filters_manager_new (filters_dir);
 
+  g_signal_connect_object (priv->filters_manager, "filters-disabled",
+                           G_CALLBACK (webkit_user_content_manager_remove_all_filters),
+                           priv->user_content,
+                           G_CONNECT_SWAPPED);
+  g_signal_connect_object (priv->filters_manager, "filter-ready",
+                           G_CALLBACK (webkit_user_content_manager_add_filter),
+                           priv->user_content,
+                           G_CONNECT_SWAPPED);
+
   g_signal_connect_object (priv->web_context, "download-started",
                            G_CALLBACK (download_started_cb), shell, 0);
 }
diff --git a/embed/ephy-filters-manager.c b/embed/ephy-filters-manager.c
index 277a6b10d..31fe7a7ed 100644
--- a/embed/ephy-filters-manager.c
+++ b/embed/ephy-filters-manager.c
@@ -1,6 +1,6 @@
 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2; -*- */
 /*
- *  Copyright © 2017 Igalia S.L.
+ *  Copyright © 2017, 2019 Igalia S.L.
  *
  *  This file is part of Epiphany.
  *
@@ -21,23 +21,38 @@
 #include "config.h"
 #include "ephy-filters-manager.h"
 
+#include "ephy-debug.h"
 #include "ephy-download.h"
 #include "ephy-prefs.h"
 #include "ephy-settings.h"
 
 #include <gio/gio.h>
 
+#include <inttypes.h>
+
 #define ADBLOCK_FILTER_UPDATE_FREQUENCY 24 * 60 * 60 /* In seconds */
+#define ADBLOCK_FILTER_SIDECAR_FILE_SUFFIX ".filterinfo"
 
 struct _EphyFiltersManager {
   GObject parent_instance;
 
   char *filters_dir;
+  GHashTable *filters;  /* (identifier, FilterInfo) */
+  guint64 update_time;
   GCancellable *cancellable;
+  WebKitUserContentFilterStore *store;
 };
 
 G_DEFINE_TYPE (EphyFiltersManager, ephy_filters_manager, G_TYPE_OBJECT)
 
+enum {
+  FILTER_READY,
+  FILTERS_DISABLED,
+  LAST_SIGNAL,
+};
+
+static guint s_signals[LAST_SIGNAL];
+
 enum {
   PROP_0,
   PROP_FILTERS_DIR,
@@ -46,229 +61,760 @@ enum {
 
 static GParamSpec *object_properties[N_PROPERTIES] = { NULL, };
 
+typedef struct {
+  EphyFiltersManager *manager;
+  char *identifier;      /* Lazily derived from source_uri. */
+  char *source_uri;      /* Saved. */
+  char *checksum;        /* Saved. */
+  guint64 last_update;   /* Saved, seconds. */
+
+  gboolean found   : 1;  /* WebKitUserContentFilter found during lookup. */
+  gboolean enabled : 1;  /* The filter is already enabled. */
+  gboolean local   : 1;  /* The source_uri is a local file URI. */
+} FilterInfo;
+
+/* The "saved" fields from the struct above are stored as versioned sidecar
+ * metadata files, using GVariant for serialization. An integer indicating
+ * the version of the on-disk format is prepended to the data, and it must
+ * be increased by 1 in the source code whenever the GVariant format below
+ * changes.
+ */
+#define FILTER_INFO_VARIANT_VERSION ((uint32_t)1)
+#define FILTER_INFO_VARIANT_FORMAT  "(usmst)"
+
+static void
+filter_info_free (FilterInfo *self)
+{
+  g_clear_weak_pointer (&self->manager);
+  g_clear_pointer (&self->identifier, g_free);
+  g_clear_pointer (&self->source_uri, g_free);
+  g_clear_pointer (&self->checksum, g_free);
+  g_free (self);
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC (FilterInfo, filter_info_free)
+
+static FilterInfo *
+filter_info_new (const char         *source_uri,
+                 EphyFiltersManager *manager)
+{
+  g_autoptr (FilterInfo) self = NULL;
+
+  g_assert (source_uri);
+  g_assert (manager);
+
+  self = g_new0 (FilterInfo, 1);
+  self->source_uri = g_strdup (source_uri);
+  g_set_weak_pointer (&self->manager, manager);
+  return g_steal_pointer (&self);
+}
+
 static gboolean
-adblock_filter_file_is_valid (GFile *file)
-{
-  GFileInfo *file_info;
-  gboolean result = FALSE;
-
-  /* Now check if the local file is too old. */
-  file_info = g_file_query_info (file,
-                                 G_FILE_ATTRIBUTE_TIME_MODIFIED ","G_FILE_ATTRIBUTE_STANDARD_SIZE,
-                                 G_FILE_QUERY_INFO_NONE,
-                                 NULL,
-                                 NULL);
-  if (file_info) {
-    if (g_file_info_get_size (file_info) > 0) {
-      GTimeVal current_time;
-      GTimeVal mod_time;
-
-      g_get_current_time (&current_time);
-      g_file_info_get_modification_time (file_info, &mod_time);
-
-      if (current_time.tv_sec > mod_time.tv_sec) {
-        gint64 expire_time = mod_time.tv_sec + ADBLOCK_FILTER_UPDATE_FREQUENCY;
-
-        result = current_time.tv_sec < expire_time;
-      }
-    }
-    g_object_unref (file_info);
+filter_info_load_from_bytes (FilterInfo  *self,
+                             GBytes      *data,
+                             GError     **error)
+{
+  uint32_t saved_version = 0;
+  g_autofree char *source_uri = NULL;
+  g_autofree char *checksum = NULL;
+  guint64 last_update = 0;
+
+  g_autoptr (GVariantType) value_type = g_variant_type_new (FILTER_INFO_VARIANT_FORMAT);
+  g_autoptr (GVariant) value = g_variant_ref_sink (g_variant_new_from_bytes (value_type, data, TRUE));
+
+  if (!value) {
+    g_set_error_literal (error,
+                         G_IO_ERROR,
+                         G_IO_ERROR_INVALID_ARGUMENT,
+                         "Cannot decode GVariant from bytes");
+    return FALSE;
+  }
+
+  g_variant_get_child (value, 0, "u", &saved_version);
+  if (saved_version != FILTER_INFO_VARIANT_VERSION) {
+    g_set_error (error,
+                 G_IO_ERROR,
+                 G_IO_ERROR_INVALID_DATA,
+                 "Attempted to decode content filter data GVariant with"
+                 " format version %" PRIu32 " (expected %" PRIu32 ")",
+                 saved_version,
+                 FILTER_INFO_VARIANT_VERSION);
+    return FALSE;
+  }
+
+  g_variant_get (value,
+                 FILTER_INFO_VARIANT_FORMAT,
+                 NULL,  /* Ignore the version, it has been checked already. */
+                 &source_uri,
+                 &checksum,
+                 &last_update);
+
+  if (strcmp (source_uri, self->source_uri) != 0) {
+    g_set_error (error,
+                 G_IO_ERROR,
+                 G_IO_ERROR_INVALID_DATA,
+                 "Attempted to decode content filter data GVariant with"
+                 " wrong filter URI <%s> (expected <%s>)",
+                 source_uri,
+                 self->source_uri);
+    return FALSE;
   }
 
-  return result;
+  /* All sanity checks passed. The "source_uri" member does not need to
+   * be updated in the struct because at this point it is known to be the
+   * same as in the sidecar metadata file, and the same applies to the
+   * "identifier" field.
+   */
+  g_clear_pointer (&self->checksum, g_free);
+  self->checksum = g_steal_pointer (&checksum);
+  self->last_update = last_update;
+
+  LOG ("Loaded metadata: uri=<%s>, identifier=%s, checksum=%s, last_update=%" PRIu64,
+       self->source_uri,
+       self->identifier,
+       self->checksum,
+       self->last_update);
+
+  return TRUE;
 }
 
-typedef struct {
-  EphyFiltersManager *manager;
-  EphyDownload *download;
-  char *source_uri;
-} AdblockFilterRetrieveData;
+static char *
+filter_info_identifier_for_source_uri (const char *source_uri)
+{
+  g_assert (source_uri);
+  return g_compute_checksum_for_string (G_CHECKSUM_SHA256, source_uri, -1);
+}
 
-static AdblockFilterRetrieveData *
-adblock_filter_retrieve_data_new (EphyFiltersManager *manager,
-                                  EphyDownload       *download,
-                                  const char         *source_uri)
+static const char *
+filter_info_get_identifier (FilterInfo *self)
 {
-  AdblockFilterRetrieveData *data;
-  data = g_new (AdblockFilterRetrieveData, 1);
-  data->manager = g_object_ref (manager);
-  data->download = g_object_ref (download);
-  data->source_uri = g_strdup (source_uri);
-  return data;
+  g_assert (self);
+  if (!self->identifier)
+    self->identifier = filter_info_identifier_for_source_uri (self->source_uri);
+  return self->identifier;
+}
+
+static GFile *
+filter_info_get_sidecar_file (FilterInfo *self)
+{
+  const char *filters_dir = ephy_filters_manager_get_adblock_filters_dir (self->manager);
+  g_autofree char *sidecar_filename = g_strconcat (filter_info_get_identifier (self),
+                                                   ADBLOCK_FILTER_SIDECAR_FILE_SUFFIX,
+                                                   NULL);
+  return g_file_new_build_filename (filters_dir, sidecar_filename, NULL);
 }
 
 static void
-adblock_filter_retrieve_data_free (AdblockFilterRetrieveData *data)
+sidecar_bytes_loaded_cb (GFile        *file,
+                         GAsyncResult *result,
+                         GTask        *task)
 {
-  g_object_unref (data->manager);
-  g_object_unref (data->download);
-  g_free (data->source_uri);
-  g_free (data);
+  GError *error = NULL;
+  FilterInfo *self = g_task_get_task_data (task);
+  g_autoptr (GBytes) data = g_file_load_bytes_finish (file,
+                                                      result,
+                                                      NULL,  /* etag_out */
+                                                      &error);
+  if (data && filter_info_load_from_bytes (self, data, &error)) {
+    g_task_return_boolean (task, TRUE);
+  } else {
+    g_task_return_error (task, error);
+  }
 }
 
 static void
-download_completed_cb (EphyDownload              *download,
-                       AdblockFilterRetrieveData *data)
+filter_info_load_sidecar (FilterInfo          *self,
+                          GCancellable        *cancellable,
+                          GAsyncReadyCallback  callback,
+                          void                *user_data)
+{
+  g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (self);
+  g_autofree char *sidecar_file_path = g_file_get_path (sidecar_file);
+  GFileType file_type = g_file_query_file_type (sidecar_file,
+                                                G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+                                                NULL);
+  if (file_type != G_FILE_TYPE_REGULAR) {
+    int error_code;
+    const char *message;
+
+    if (file_type == G_FILE_TYPE_UNKNOWN) {
+      error_code = G_IO_ERROR_NOT_FOUND;
+      message = "File not found";
+    } else {
+      error_code = G_IO_ERROR_NOT_REGULAR_FILE;
+      message = "Not a regular file";
+    }
+    g_task_report_new_error (NULL,
+                             callback,
+                             user_data,
+                             filter_info_load_sidecar,
+                             G_IO_ERROR,
+                             error_code,
+                             "%s: %s",
+                             sidecar_file_path,
+                             message);
+  } else {
+    GTask *task = g_task_new (NULL,
+                              cancellable,
+                              callback,
+                              user_data);
+    g_autofree char *task_name = g_strconcat ("load sidecar file: ",
+                                              sidecar_file_path,
+                                              NULL);
+    /* The FilterInfo itself as used async task data: it already contains
+     * all the bits of information needed by the completion callback.
+     */
+    g_task_set_task_data (task, self, NULL);
+    g_task_set_name (task, task_name);
+    g_file_load_bytes_async (sidecar_file,
+                             g_task_get_cancellable (task),
+                             (GAsyncReadyCallback)sidecar_bytes_loaded_cb,
+                             task);
+  }
+}
+
+static gboolean
+filter_info_load_sidecar_finish (GAsyncResult  *result,
+                                 GError       **error)
 {
-  g_signal_handlers_disconnect_by_data (download, data);
-  adblock_filter_retrieve_data_free (data);
+  return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+static GBytes *
+filter_info_get_data_as_bytes (FilterInfo *self)
+{
+  g_autoptr (GVariant) value = g_variant_ref_sink (g_variant_new (FILTER_INFO_VARIANT_FORMAT,
+                                                                  FILTER_INFO_VARIANT_VERSION,
+                                                                  self->source_uri,
+                                                                  self->checksum,
+                                                                  self->last_update));
+  return g_variant_get_data_as_bytes (value);
 }
 
 static void
-download_error_cb (EphyDownload              *download,
-                   GError                    *error,
-                   AdblockFilterRetrieveData *data)
+sidecar_contents_replaced_cb (GFile        *file,
+                              GAsyncResult *result,
+                              GTask        *task)
 {
-  GFileOutputStream *stream;
-  GFile *file;
+  GError *error = NULL;
+  if (g_file_replace_contents_finish (file,
+                                      result,
+                                      NULL,  /* new_etag */
+                                      &error)) {
+    g_task_return_boolean (task, TRUE);
+  } else {
+    g_task_return_error (task, error);
+  }
+}
 
-  /* Create an empty file if it doesn't exist to unblock extensions */
-  file = g_file_new_for_uri (ephy_download_get_destination_uri (download));
-  stream = g_file_create (file, G_FILE_CREATE_NONE, NULL, NULL);
-  if (stream)
-    g_object_unref (stream);
-  g_object_unref (file);
+static void
+filter_info_save_sidecar (FilterInfo          *self,
+                          GCancellable        *cancellable,
+                          GAsyncReadyCallback  callback,
+                          void                *user_data)
+{
+  g_autoptr (GBytes) data = filter_info_get_data_as_bytes (self);
+  g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (self);
+  g_autofree char *sidecar_file_path = g_file_get_path (sidecar_file);
+  g_autofree char *task_name = g_strconcat ("save sidecar file: ",
+                                            sidecar_file_path,
+                                            NULL);
+  GTask *task = g_task_new (NULL, cancellable, callback, user_data);
+  g_task_set_name (task, task_name);
+
+  LOG ("Saving metadata: uri=<%s>, identifier=%s, checksum=%s, last_update=%" PRIu64,
+       self->source_uri,
+       self->identifier,
+       self->checksum,
+       self->last_update);
+
+  g_file_replace_contents_bytes_async (sidecar_file,
+                                       data,
+                                       NULL,   /* etag */
+                                       FALSE,  /* make_backup */
+                                       G_FILE_CREATE_PRIVATE | G_FILE_CREATE_REPLACE_DESTINATION,
+                                       g_task_get_cancellable (task),
+                                       (GAsyncReadyCallback)sidecar_contents_replaced_cb,
+                                       task);
+}
 
-  if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
-    g_warning ("Error retrieving filter %s: %s\n", data->source_uri, error->message);
+static gboolean
+filter_info_save_sidecar_finish (GAsyncResult  *result,
+                                 GError       **error)
+{
+  return g_task_propagate_boolean (G_TASK (result), error);
+}
 
-  g_signal_handlers_disconnect_by_data (download, data);
-  adblock_filter_retrieve_data_free (data);
+static GFile *
+filter_info_get_source_file (FilterInfo *self)
+{
+  g_autofree char *filename = g_strconcat (filter_info_get_identifier (self), ".json", NULL);
+  const char *filters_dir = ephy_filters_manager_get_adblock_filters_dir (self->manager);
+  return g_file_new_build_filename (filters_dir, filename, NULL);
 }
 
 static void
-start_retrieving_filter_file (EphyFiltersManager *manager,
-                              const char         *filter_url,
-                              GFile              *destination)
+filter_info_setup_enable_compiled_filter (FilterInfo              *self,
+                                          WebKitUserContentFilter *wk_filter)
 {
-  EphyDownload *download;
-  WebKitDownload *wk_download;
-  AdblockFilterRetrieveData *data;
-  char *path;
+  g_assert (self);
+  g_assert (wk_filter);
 
-  download = ephy_download_new_for_uri_internal (filter_url);
-  path = g_file_get_uri (destination);
-  ephy_download_set_destination_uri (download, path);
-  ephy_download_disable_desktop_notification (download);
-  g_free (path);
+  LOG ("Emitting EphyFiltersManager::filter-ready for %s.", filter_info_get_identifier (self));
+  g_signal_emit (self->manager, s_signals[FILTER_READY], 0, wk_filter);
+  self->enabled = TRUE;
+}
+
+static gboolean
+filter_info_needs_updating_from_source (const FilterInfo *self)
+{
+  g_assert (self);
+
+  if (!self->manager)
+    return FALSE;
+
+  /* For local files, check whether their modification time is newer
+   * than the last update time saved for it.
+   */
+  if (self->local) {
+    GTimeVal modification_time = { .tv_sec = 0 };
+    g_autoptr (GError) error = NULL;
+    g_autoptr (GFile) source_file = g_file_new_for_uri (self->source_uri);
+    g_autoptr (GFileInfo) info = g_file_query_info (source_file,
+                                                    G_FILE_ATTRIBUTE_TIME_MODIFIED,
+                                                    G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+                                                    NULL,
+                                                    &error);
+    if (!info) {
+      g_warning ("Cannot get file modification time: %s", error->message);
+      return TRUE;
+    }
 
-  wk_download = ephy_download_get_webkit_download (download);
-  webkit_download_set_allow_overwrite (wk_download, TRUE);
+    g_file_info_get_modification_time (info, &modification_time);
+    return (modification_time.tv_sec > 0) && ((gulong)modification_time.tv_sec > self->last_update);
+  }
 
-  data = adblock_filter_retrieve_data_new (manager, download, filter_url);
+  /* For remote filters, check the time elapsed since the last fetch. */
+  return (self->manager->update_time - self->last_update) >= ADBLOCK_FILTER_UPDATE_FREQUENCY;
+}
 
-  g_signal_connect (download, "completed",
-                    G_CALLBACK (download_completed_cb), data);
-  g_signal_connect (download, "error",
-                    G_CALLBACK (download_error_cb), data);
-  g_object_unref (download);
+static void
+file_removed_cb (GFile        *file,
+                 GAsyncResult *result,
+                 void         *user_data)
+{
+  g_autoptr (GError) error = NULL;
+
+  g_assert (G_IS_FILE (file));
+  g_assert (result);
+
+  if (!g_file_delete_finish (file, result, &error) &&
+      !g_error_matches (error, G_FILE_ERROR, G_FILE_ERROR_NOENT) &&
+      !g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+    g_autofree char *file_path = g_file_get_path (file);
+    g_warning ("Cannot delete '%s': %s", file_path, error->message);
+  }
 }
 
 static void
-remove_old_adblock_filters (EphyFiltersManager *manager,
-                            GList              *current_files)
-{
-  GFile *file;
-  GFile *filters_dir;
-  GFileEnumerator *enumerator;
-  gboolean current_filter;
-  char *path;
-  GError *error = NULL;
+sidecar_saved_cb (GObject      *source_object,
+                  GAsyncResult *result,
+                  FilterInfo   *self)
+{
+  g_autoptr (GError) error = NULL;
+  if (filter_info_save_sidecar_finish (result, &error)) {
+    LOG ("Sidecar successfully saved for filter %s.",
+         filter_info_get_identifier (self));
+  } else {
+    g_warning ("Cannot save sidecar for filter %s: %s",
+               filter_info_get_identifier (self),
+               error->message);
+  }
+}
 
-  filters_dir = g_file_new_for_path (manager->filters_dir);
-  enumerator = g_file_enumerate_children (filters_dir,
-                                          G_FILE_ATTRIBUTE_STANDARD_NAME,
-                                          G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
-                                          NULL,
-                                          &error);
-  if (error != NULL) {
-    g_warning ("Failed to enumerate children of %s: %s", manager->filters_dir, error->message);
-    g_error_free (error);
-    g_object_unref (filters_dir);
+static void
+filter_saved_cb (WebKitUserContentFilterStore *store,
+                 GAsyncResult                 *result,
+                 FilterInfo                   *self)
+{
+  g_autoptr (GError) error = NULL;
+  g_autoptr (WebKitUserContentFilter) wk_filter = NULL;
+
+  if (!self->manager)
     return;
+
+  g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+  g_assert (result);
+  g_assert (self);
+  g_assert (self->manager->store == store);
+
+  wk_filter = webkit_user_content_filter_store_save_finish (self->manager->store,
+                                                            result,
+                                                            &error);
+  if (wk_filter) {
+    LOG ("Filter %s compiled successfully.", filter_info_get_identifier (self));
+    filter_info_setup_enable_compiled_filter (self, wk_filter);
+    filter_info_save_sidecar (self,
+                              self->manager->cancellable,
+                              (GAsyncReadyCallback)sidecar_saved_cb,
+                              self);
+  } else if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+    g_warning ("Filter %s <%s> cannot be compiled: %s.",
+               filter_info_get_identifier (self), self->source_uri,
+               error->message);
   }
+}
 
-  /* For each file in the adblock directory, check if it is a currently-enabled
-   * and remove it if not, since filter files can be quite large. */
-  for (;;) {
-    g_file_enumerator_iterate (enumerator, NULL, &file, NULL, &error);
-    if (error != NULL) {
-      g_warning ("Failed to iterate file enumerator for %s: %s", manager->filters_dir, error->message);
-      g_clear_error (&error);
-      continue;
-    }
+static void
+filter_info_setup_load_file (FilterInfo *self,
+                             GFile      *json_file)
+{
+  g_autofree char *old_checksum = NULL;
+  g_autofree char *json_file_path = NULL;
+  g_autoptr (GMappedFile) file_map = NULL;
+  g_autoptr (GBytes) json_data = NULL;
+  g_autoptr (GError) error = NULL;
 
-    /* Success: no more files left to iterate. */
-    if (file == NULL)
-      break;
+  g_assert (self);
+  g_assert (G_IS_FILE (json_file));
 
-    current_filter = FALSE;
-    for (GList *l = current_files; l != NULL; l = l->next) {
-      if (g_file_equal (l->data, file)) {
-        current_filter = TRUE;
-        break;
-      }
-    }
+  if (!self->manager)
+    return;
 
-    if (!current_filter) {
-      g_file_delete (file, NULL, &error);
-      if (error != NULL) {
-        path = g_file_get_path (file);
-        g_warning ("Failed to remove %s: %s", path, error->message);
-        g_free (path);
-        g_clear_error (&error);
-      }
-    }
+  /* Some filter source JSON files can be big (tens of megabytes), so instead
+   * of reading the data for compilation, just map the source file in memory.
+   */
+  json_file_path = g_file_get_path (json_file);
+  file_map = g_mapped_file_new (json_file_path,
+                                FALSE,  /* writable */
+                                &error);
+
+  /* Immediately unlink a fetched file after it has been mapped. */
+  if (!self->local) {
+    LOG ("Unlinking fetched JSON file: %s", json_file_path);
+    g_file_delete_async (json_file,
+                         G_PRIORITY_LOW,
+                         self->manager->cancellable,
+                         (GAsyncReadyCallback)file_removed_cb,
+                         NULL);
+  }
+
+  if (!file_map) {
+    g_warning ("Cannot map filter %s source file %s: %s",
+               filter_info_get_identifier (self),
+               json_file_path, error->message);
+    return;
+  }
+
+  json_data = g_mapped_file_get_bytes (file_map);
+  old_checksum = g_steal_pointer (&self->checksum);
+  self->checksum = g_compute_checksum_for_bytes (G_CHECKSUM_SHA256, json_data);
+  self->last_update = self->manager->update_time;
+
+  if (!filter_info_needs_updating_from_source (self) && self->found &&
+      old_checksum && strcmp (self->checksum, old_checksum) == 0) {
+    /* Even if an update is not needed, the sidecar needs to be updated. */
+    filter_info_save_sidecar (self,
+                              self->manager->cancellable,
+                              (GAsyncReadyCallback)sidecar_saved_cb,
+                              self);
+    LOG ("Filter %s not stale, source checksum unchanged (%s), recompilation skipped.",
+         filter_info_get_identifier (self), self->checksum);
+  } else {
+    webkit_user_content_filter_store_save (self->manager->store,
+                                           filter_info_get_identifier (self),
+                                           json_data,
+                                           self->manager->cancellable,
+                                           (GAsyncReadyCallback)filter_saved_cb,
+                                           self);
+  }
+}
+
+static void
+download_completed_cb (EphyDownload *download,
+                       FilterInfo   *self)
+{
+  g_assert (download);
+  g_assert (self);
+
+  g_signal_handlers_disconnect_by_data (download, self);
+
+  LOG ("Filter source %s fetched from <%s>", filter_info_get_identifier (self), self->source_uri);
+
+  if (g_strcmp0 ("application/json", ephy_download_get_content_type (download)) == 0) {
+    g_autoptr (GFile) json_file = g_file_new_for_uri (ephy_download_get_destination_uri (download));
+    filter_info_setup_load_file (self, json_file);
+  } else {
+    g_warning ("Filter source %s has invalid MIME type: %s",
+               ephy_download_get_destination_uri (download),
+               ephy_download_get_content_type (download));
   }
 
-  g_object_unref (filters_dir);
-  g_object_unref (enumerator);
+  g_object_unref (download);
+}
+
+static void
+download_errored_cb (EphyDownload *download,
+                     GError       *error,
+                     FilterInfo   *self)
+{
+  g_assert (download);
+  g_assert (error);
+  g_assert (self);
+
+  g_signal_handlers_disconnect_by_data (download, self);
+
+  if (!g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
+    g_warning ("Cannot fetch source for filter %s from <%s>",
+               filter_info_get_identifier (self), self->source_uri);
+
+  /* There is not much else we can do if the download failed. Note that it
+   * is still possible that if a precompiled version of the filter was found
+   * that may get used instead.
+   */
+  LOG ("Done fetching filter %s", filter_info_get_identifier (self));
+
+  g_object_unref (download);
 }
 
 static void
-update_adblock_filter_files (EphyFiltersManager *manager)
+filter_load_cb (WebKitUserContentFilterStore *store,
+                GAsyncResult                 *result,
+                FilterInfo                   *self)
 {
-  char **filters;
-  GList *files = NULL;
+  g_autoptr (GError) error = NULL;
+  g_autoptr (WebKitUserContentFilter) wk_filter = NULL;
+  g_autoptr (GFile) source_file = NULL;
+  g_autoptr (GFile) json_file = NULL;
+  g_autofree char *json_file_uri = NULL;
+  EphyDownload *download;
 
-  if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_ADBLOCK))
+  if (!self->manager)
     return;
 
-  /* Only once at a time please! Newest set of filters wins. */
-  g_cancellable_cancel (manager->cancellable);
-  g_object_unref (manager->cancellable);
-  manager->cancellable = g_cancellable_new ();
+  g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+  g_assert (result);
+  g_assert (self);
+  g_assert (store == self->manager->store);
+
+  wk_filter = webkit_user_content_filter_store_load_finish (self->manager->store,
+                                                            result,
+                                                            &error);
+  self->found = (wk_filter != NULL);
+
+  if (wk_filter) {
+    LOG ("Found compiled filter %s.", filter_info_get_identifier (self));
+    filter_info_setup_enable_compiled_filter (self, wk_filter);
+    LOG ("Update %sneeded for filter %s (last %" PRIu64 "s ago, interval %us)",
+         filter_info_needs_updating_from_source (self) ? "" : "not ",
+         filter_info_get_identifier (self),
+         (self->manager->update_time - self->last_update),
+         ADBLOCK_FILTER_UPDATE_FREQUENCY);
+  } else if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) {
+    return;
+  } else if (g_error_matches (error,
+                              WEBKIT_USER_CONTENT_FILTER_ERROR,
+                              WEBKIT_USER_CONTENT_FILTER_ERROR_NOT_FOUND)) {
+    LOG ("Compiled filter %s not found, needs fetching.",
+         filter_info_get_identifier (self));
+  } else {
+    g_warning ("Lookup failed for compiled filter %s: %s.",
+               filter_info_get_identifier (self),
+               error->message);
+  }
 
-  filters = g_settings_get_strv (EPHY_SETTINGS_MAIN, EPHY_PREFS_ADBLOCK_FILTERS);
-  for (guint i = 0; filters[i]; i++) {
-    GFile *filter_file;
+  if (!filter_info_needs_updating_from_source (self))
+    return;
 
-    filter_file = ephy_uri_tester_get_adblock_filter_file (manager->filters_dir, filters[i]);
-    if (!adblock_filter_file_is_valid (filter_file))
-      start_retrieving_filter_file (manager, filters[i], filter_file);
-    files = g_list_prepend (files, filter_file);
+  /* Even if a compiled filter was found, we may need to compile an updated
+   * version if the local file has changed, or the contents of remote URIs
+   * have changed. If an updated ruleset is available, it will replace the
+   * precompiled version found above (if any) once it has been compiled.
+   */
+  LOG ("Loading filter %s from <%s>", filter_info_get_identifier (self), self->source_uri);
+
+  /* Skip fetching local file:// URIs; load them directly. */
+  source_file = g_file_new_for_uri (self->source_uri);
+  if ((self->local = g_file_is_native (source_file))) {
+    filter_info_setup_load_file (self, source_file);
+    return;
   }
 
-  remove_old_adblock_filters (manager, files);
+  /* Download non-local URIs. */
+  download = ephy_download_new_for_uri_internal (self->source_uri);
+
+  json_file = filter_info_get_source_file (self);
+  json_file_uri = g_file_get_uri (json_file);
+  ephy_download_set_destination_uri (download, json_file_uri);
+  ephy_download_disable_desktop_notification (download);
+  webkit_download_set_allow_overwrite (ephy_download_get_webkit_download (download), TRUE);
+
+  g_signal_connect (download, "completed",
+                    G_CALLBACK (download_completed_cb), self);
+  g_signal_connect (download, "error",
+                    G_CALLBACK (download_errored_cb), self);
+}
+
+static void
+filter_info_setup_start (FilterInfo *self)
+{
+  g_assert (self);
+
+  if (!self->manager)
+    return;
+
+  LOG ("Setup started for <%s> id=%s", self->source_uri, filter_info_get_identifier (self));
 
-  g_strfreev (filters);
-  g_list_free_full (files, g_object_unref);
+  webkit_user_content_filter_store_load (self->manager->store,
+                                         filter_info_get_identifier (self),
+                                         self->manager->cancellable,
+                                         (GAsyncReadyCallback)filter_load_cb,
+                                         self);
 }
 
 static void
-adblock_filters_changed_cb (GSettings          *settings,
-                            char               *key,
-                            EphyFiltersManager *manager)
+filter_removed_cb (WebKitUserContentFilterStore *store,
+                   GAsyncResult                 *result,
+                   void                         *user_data)
 {
-  update_adblock_filter_files (manager);
+  g_autoptr (GError) error = NULL;
+
+  g_assert (WEBKIT_IS_USER_CONTENT_FILTER_STORE (store));
+  g_assert (result);
+
+  if (!webkit_user_content_filter_store_remove_finish (store,
+                                                       result,
+                                                       &error) &&
+      !g_error_matches (error,
+                        WEBKIT_USER_CONTENT_FILTER_ERROR,
+                        WEBKIT_USER_CONTENT_FILTER_ERROR_NOT_FOUND)) {
+    g_warning ("Cannot remove compiled filter: %s", error->message);
+  }
 }
 
 static void
-enable_adblock_changed_cb (GSettings          *settings,
-                           char               *key,
-                           EphyFiltersManager *manager)
+remove_unused_filter (const char *identifier,
+                      FilterInfo *filter)
+{
+  g_autoptr (GFile) sidecar_file = filter_info_get_sidecar_file (filter);
+
+  g_assert (strcmp (identifier, filter_info_get_identifier (filter)) == 0);
+  g_assert (!g_hash_table_contains (filter->manager->filters, identifier));
+
+  g_file_delete_async (sidecar_file,
+                       G_PRIORITY_LOW,
+                       filter->manager->cancellable,
+                       (GAsyncReadyCallback)file_removed_cb,
+                       NULL);
+  webkit_user_content_filter_store_remove (filter->manager->store,
+                                           identifier,
+                                           filter->manager->cancellable,
+                                           (GAsyncReadyCallback)filter_removed_cb,
+                                           NULL);
+  LOG ("Filter %s removal scheduled scheduled.", identifier);
+}
+
+void
+sidecar_loaded_cb (GObject      *source_object,
+                   GAsyncResult *result,
+                   FilterInfo   *self)
 {
-  update_adblock_filter_files (manager);
+  g_autoptr (GError) error = NULL;
+  if (!filter_info_load_sidecar_finish (result, &error)) {
+    if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_CANCELLED))
+      return;
+
+    if (g_error_matches (error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) {
+      LOG ("Sidecar missing for filter %s: %s",
+           filter_info_get_identifier (self),
+           error->message);
+    } else {
+      g_warning ("Cannot load sidecar file for filter %s: %s",
+                 filter_info_get_identifier (self),
+                 error->message);
+    }
+  }
+  filter_info_setup_start (self);
+}
+
+static void
+update_adblock_filter_files_cb (GSettings          *settings,
+                                char               *key,
+                                EphyFiltersManager *manager)
+{
+  const gint64 update_time = g_get_real_time () / G_USEC_PER_SEC;
+  g_autoptr (GHashTable) old_filters = NULL;
+  g_auto (GStrv) uris = NULL;
+
+  g_assert (update_time >= 0);
+  g_assert (manager);
+
+  if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_ADBLOCK)) {
+    LOG ("Filters are disabled, skipping update.");
+    g_signal_emit (manager, s_signals[FILTERS_DISABLED], 0);
+    return;
+  }
+
+  LOG ("Emitting EphyFiltersManager::filters-disabled.");
+  g_signal_emit (manager, s_signals[FILTERS_DISABLED], 0);
+
+  /* Only once at a time please! Newest set of filters wins. */
+  g_cancellable_cancel (manager->cancellable);
+  g_object_unref (manager->cancellable);
+  manager->cancellable = g_cancellable_new ();
+  manager->update_time = update_time;
+
+  old_filters = g_steal_pointer (&manager->filters);
+  manager->filters = g_hash_table_new_full (g_str_hash,
+                                            g_str_equal,
+                                            NULL,
+                                            (GDestroyNotify)filter_info_free);
+
+  uris = g_settings_get_strv (EPHY_SETTINGS_MAIN,
+                              EPHY_PREFS_ADBLOCK_FILTERS);
+  for (unsigned i = 0; uris[i]; i++) {
+    g_autofree char *filter_id = filter_info_identifier_for_source_uri (uris[i]);
+    FilterInfo *filter_info = NULL;
+    char *old_filter_id = NULL;
+
+    /* Check whether there was already a FilterInfo for the URI in the old
+     * filters table, and reuse it instead of creating a new one and reloading
+     * the sidecar file from disk.
+     *
+     * Note that the value is stolen from the old hash table in order to
+     * look it up and remove it from the old table *without* destroying it.
+     */
+    if (g_hash_table_steal_extended (old_filters,
+                                     filter_id,
+                                     (void **)&old_filter_id,
+                                     (void **)&filter_info)) {
+      g_assert (strcmp (old_filter_id, filter_id) == 0);
+      g_assert (strcmp (old_filter_id, filter_info_get_identifier (filter_info)) == 0);
+
+      LOG ("Filter %s in old set, stolen and starting setup.", filter_id);
+      filter_info_setup_start (filter_info);
+    } else {
+      /* Filter was not present in the old hash table: create a FilterInfo
+       * for the URI and start by loading its sidecar file.
+       */
+      LOG ("Filter %s not in old set, creating anew.", filter_id);
+      filter_info = filter_info_new (uris[i], manager);
+      filter_info->identifier = g_steal_pointer (&filter_id);
+      filter_info_load_sidecar (filter_info,
+                                manager->cancellable,
+                                (GAsyncReadyCallback)sidecar_loaded_cb,
+                                filter_info);
+    }
+
+    g_hash_table_replace (manager->filters,
+                          (void *)filter_info_get_identifier (filter_info),
+                          filter_info);
+  }
+
+  /* Remove the filters which are no longer in the configured set. */
+  g_hash_table_foreach (old_filters,
+                        (GHFunc)remove_unused_filter,
+                        NULL);
 }
 
 static void
@@ -280,6 +826,7 @@ ephy_filters_manager_dispose (GObject *object)
     g_cancellable_cancel (manager->cancellable);
     g_clear_object (&manager->cancellable);
   }
+  g_clear_object (&manager->store);
 
   G_OBJECT_CLASS (ephy_filters_manager_parent_class)->dispose (object);
 }
@@ -289,6 +836,7 @@ ephy_filters_manager_finalize (GObject *object)
 {
   EphyFiltersManager *manager = EPHY_FILTERS_MANAGER (object);
 
+  g_clear_pointer (&manager->filters, g_hash_table_unref);
   g_free (manager->filters_dir);
 
   G_OBJECT_CLASS (ephy_filters_manager_parent_class)->finalize (object);
@@ -298,17 +846,21 @@ static void
 ephy_filters_manager_constructed (GObject *object)
 {
   EphyFiltersManager *manager = EPHY_FILTERS_MANAGER (object);
+  g_autofree char *saved_filters_dir = NULL;
 
   G_OBJECT_CLASS (ephy_filters_manager_parent_class)->constructed (object);
 
+  saved_filters_dir = g_build_filename (manager->filters_dir, "compiled", NULL);
+  g_mkdir_with_parents (saved_filters_dir, 0700);
+  manager->store = webkit_user_content_filter_store_new (saved_filters_dir);
+
   /* Note: up here because we must connect *before* reading the settings. */
-  g_signal_connect (EPHY_SETTINGS_MAIN, "changed::" EPHY_PREFS_ADBLOCK_FILTERS,
-                    G_CALLBACK (adblock_filters_changed_cb), manager);
-  g_signal_connect (EPHY_SETTINGS_WEB, "changed::" EPHY_PREFS_WEB_ENABLE_ADBLOCK,
-                    G_CALLBACK (enable_adblock_changed_cb), manager);
+  g_signal_connect_object (EPHY_SETTINGS_MAIN, "changed::" EPHY_PREFS_ADBLOCK_FILTERS,
+                           G_CALLBACK (update_adblock_filter_files_cb), manager, 0);
+  g_signal_connect_object (EPHY_SETTINGS_WEB, "changed::" EPHY_PREFS_WEB_ENABLE_ADBLOCK,
+                           G_CALLBACK (update_adblock_filter_files_cb), manager, 0);
 
-  g_mkdir_with_parents (manager->filters_dir, 0700);
-  update_adblock_filter_files (manager);
+  update_adblock_filter_files_cb (NULL, NULL, manager);
 }
 
 static void
@@ -356,6 +908,21 @@ ephy_filters_manager_class_init (EphyFiltersManagerClass *klass)
   object_class->set_property = ephy_filters_manager_set_property;
   object_class->get_property = ephy_filters_manager_get_property;
 
+  s_signals[FILTER_READY] =
+    g_signal_new ("filter-ready",
+                  G_OBJECT_CLASS_TYPE (klass),
+                  G_SIGNAL_RUN_FIRST,
+                  0, NULL, NULL, NULL,
+                  G_TYPE_NONE, 1,
+                  WEBKIT_TYPE_USER_CONTENT_FILTER);
+
+  s_signals[FILTERS_DISABLED] =
+    g_signal_new ("filters-disabled",
+                  G_OBJECT_CLASS_TYPE (klass),
+                  G_SIGNAL_RUN_FIRST,
+                  0, NULL, NULL, NULL,
+                  G_TYPE_NONE, 0);
+
   object_properties[PROP_FILTERS_DIR] =
     g_param_spec_string ("filters-dir",
                          "Filters directory",
@@ -372,6 +939,10 @@ static void
 ephy_filters_manager_init (EphyFiltersManager *manager)
 {
   manager->cancellable = g_cancellable_new ();
+  manager->filters = g_hash_table_new_full (g_str_hash,
+                                            g_str_equal,
+                                            NULL,
+                                            (GDestroyNotify)filter_info_free);
 }
 
 EphyFiltersManager *
diff --git a/embed/ephy-filters-manager.h b/embed/ephy-filters-manager.h
index bd444f08d..7fbb66b98 100644
--- a/embed/ephy-filters-manager.h
+++ b/embed/ephy-filters-manager.h
@@ -31,7 +31,7 @@ G_BEGIN_DECLS
 
 G_DECLARE_FINAL_TYPE (EphyFiltersManager, ephy_filters_manager, EPHY, FILTERS_MANAGER, GObject)
 
-EphyFiltersManager *ephy_filters_manager_new                     (const char *adblock_filters_dir);
+EphyFiltersManager *ephy_filters_manager_new                     (const char         *adblock_filters_dir);
 const char         *ephy_filters_manager_get_adblock_filters_dir (EphyFiltersManager *manager);
 
 G_END_DECLS



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]