[epiphany/mcatanzaro/send-request] Remove ad-hoc query parameter stripping




commit 2bb84105d2b9d95c18b4e26a157bd05d0c272163
Author: Michael Catanzaro <mcatanzaro gnome org>
Date:   Thu Sep 3 18:05:00 2020 -0500

    Remove ad-hoc query parameter stripping
    
    This old, manual, and very non-comprehensive method of tracking
    prevention doesn't make much sense to do now that we have far more
    sophisticated anti-tracking measures in WebKit and enabled by default
    that don't require listing ad-hoc query parameters to remove from URLs.

 .../ephy-web-process-extension.c                   |  22 --
 lib/ephy-uri-helpers.c                             | 225 ---------------------
 lib/ephy-uri-helpers.h                             |   1 -
 tests/ephy-uri-helpers-test.c                      |  88 --------
 tests/meson.build                                  |  10 -
 5 files changed, 346 deletions(-)
---
diff --git a/embed/web-process-extension/ephy-web-process-extension.c 
b/embed/web-process-extension/ephy-web-process-extension.c
index a8631b86c..54392025a 100644
--- a/embed/web-process-extension/ephy-web-process-extension.c
+++ b/embed/web-process-extension/ephy-web-process-extension.c
@@ -60,25 +60,6 @@ struct _EphyWebProcessExtension {
 
 G_DEFINE_TYPE (EphyWebProcessExtension, ephy_web_process_extension, G_TYPE_OBJECT)
 
-static gboolean
-web_page_send_request (WebKitWebPage           *web_page,
-                       WebKitURIRequest        *request,
-                       WebKitURIResponse       *redirected_response,
-                       EphyWebProcessExtension *extension)
-{
-  /* FIXME: We should probably remove ephy_remove_tracking_from_uri and instead
-   * trust Intelligent Tracking Prevention to mitigate potential privacy impact
-   * of tracking query parameters. But first we need to enable ITP.
-   */
-  const char *request_uri = webkit_uri_request_get_uri (request);
-  g_autofree char *modified_uri = ephy_remove_tracking_from_uri (request_uri);
-  if (modified_uri && g_strcmp0 (request_uri, modified_uri) != 0) {
-    LOG ("Rewrote %s to %s", request_uri, modified_uri);
-    webkit_uri_request_set_uri (request, modified_uri);
-  }
-  return FALSE;
-}
-
 static void
 web_page_will_submit_form (WebKitWebPage            *web_page,
                            WebKitDOMHTMLFormElement *dom_form,
@@ -246,9 +227,6 @@ ephy_web_process_extension_page_created_cb (EphyWebProcessExtension *extension,
   js_context = webkit_frame_get_js_context_for_script_world (webkit_web_page_get_main_frame (web_page), 
extension->script_world);
   (void)js_context;
 
-  g_signal_connect (web_page, "send-request",
-                    G_CALLBACK (web_page_send_request),
-                    extension);
   g_signal_connect (web_page, "context-menu",
                     G_CALLBACK (web_page_context_menu),
                     extension);
diff --git a/lib/ephy-uri-helpers.c b/lib/ephy-uri-helpers.c
index b02d7eb85..2f4e7bab3 100644
--- a/lib/ephy-uri-helpers.c
+++ b/lib/ephy-uri-helpers.c
@@ -27,231 +27,6 @@
 #include <string.h>
 #include <webkit2/webkit2.h>
 
-/**
- * SECTION:ephy-uri-helpers
- * @short_description: miscellaneous URI related utility functions
- *
- * URI related functions, including functions to clean up URI.
- */
-
-/* QueryItem holds the decoded name for each parameter, as well as the untouched
- * name/value pair. The name is unescaped in query_decode() with form_decode(),
- * the pair is not altered. */
-typedef struct {
-  char *decoded_name;
-  char *pair;
-} QueryItem;
-
-static void
-query_item_free (QueryItem *item)
-{
-  g_free (item->decoded_name);
-  g_free (item->pair);
-  g_free (item);
-}
-
-#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
-#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
-
-/* From libsoup, in libsoup/soup-form.c */
-static gboolean
-form_decode (char *part)
-{
-  unsigned char *s, *d;
-
-  s = d = (unsigned char *)part;
-  do {
-    if (*s == '%') {
-      if (!g_ascii_isxdigit (s[1]) ||
-          !g_ascii_isxdigit (s[2]))
-        return FALSE;
-      *d++ = HEXCHAR (s);
-      s += 2;
-    } else if (*s == '+')
-      *d++ = ' ';
-    else
-      *d++ = *s;
-  } while (*s++);
-
-  return TRUE;
-}
-
-static GList *
-query_split (const char *query)
-{
-  GList *items;
-  char **pairs;
-  int i;
-
-  items = NULL;
-  pairs = g_strsplit (query, "&", -1);
-  for (i = 0; pairs[i]; i++) {
-    QueryItem *item;
-    char *decoded_name = NULL;
-    char *pair, *eq;
-
-    pair = pairs[i];
-    eq = strchr (pair, '=');
-    if (eq)
-      decoded_name = g_strndup (pair, eq - pair);
-    else
-      decoded_name = g_strdup (pair);
-
-    if (!form_decode (decoded_name)) {
-      g_free (decoded_name);
-      decoded_name = NULL;
-    }
-
-    item = g_new0 (QueryItem, 1);
-    item->decoded_name = decoded_name;
-    item->pair = pair;
-    items = g_list_prepend (items, item);
-  }
-  g_free (pairs);
-
-  return g_list_reverse (items);
-}
-
-static char *
-query_concat (GList *items)
-{
-  GList *l;
-  GPtrArray *array;
-  char *ret;
-
-  if (!items)
-    return NULL;
-
-  array = g_ptr_array_new ();
-
-  for (l = items; l != NULL; l = l->next) {
-    QueryItem *item = l->data;
-
-    g_ptr_array_add (array, item->pair);
-  }
-  g_ptr_array_add (array, NULL);
-
-  ret = g_strjoinv ("&", (char **)array->pdata);
-  g_ptr_array_free (array, TRUE);
-
-  return ret;
-}
-
-static gboolean
-is_garbage (const char *name,
-            const char *host)
-{
-  struct {
-    const char *field;
-    const char *host;
-  } const fields[] = {
-    /* analytics.google.com */
-    { "utm_source", NULL },
-    { "utm_medium", NULL },
-    { "utm_term", NULL },
-    { "utm_content", NULL },
-    { "utm_campaign", NULL },
-    { "utm_reader", NULL },
-    /* metrika.yandex.ru */
-    { "yclid", NULL },
-    /* youtube.com */
-    { "feature", "youtube.com" },
-    /* facebook.com */
-    { "fb_action_ids", NULL },
-    { "fb_action_types", NULL },
-    { "fb_ref", NULL },
-    { "fb_source", NULL },
-    { "action_object_map", NULL },
-    { "action_type_map", NULL },
-    { "action_ref_map", NULL },
-    { "ref", "facebook.com" },
-    { "fref", "facebook.com" },
-    { "hc_location", "facebook.com" },
-    /* imdb.com */
-    { "ref_", "imdb.com" },
-    /* addons.mozilla.org */
-    { "src", "addons.mozilla.org" }
-  };
-  guint i;
-
-  if (name == NULL)
-    return FALSE;
-
-  for (i = 0; i < G_N_ELEMENTS (fields); i++) {
-    if (host != NULL &&
-        fields[i].host != NULL &&
-        !g_str_has_suffix (host, fields[i].host))
-      continue;
-    if (!g_strcmp0 (fields[i].field, name))
-      return TRUE;
-  }
-
-  return FALSE;
-}
-
-/**
- * ephy_remove_tracking_from_uri:
- * @uri_string: a uri
- *
- * Sanitize @uri to make sure it does not contain analytics tracking
- * information. Inspired by the Firefox PureURL add-on:
- * https://addons.mozilla.org/fr/firefox/addon/pure-url/
- *
- * Returns: the sanitized uri, or %NULL on error or when the URI did
- * not change.
- */
-char *
-ephy_remove_tracking_from_uri (const char *uri_string)
-{
-  SoupURI *uri;
-  GList *items, *new_items, *l;
-  const char *query, *host;
-  gboolean has_garbage = FALSE;
-  char *ret = NULL;
-
-  uri = soup_uri_new (uri_string);
-  if (!uri)
-    return ret;
-
-  host = soup_uri_get_host (uri);
-  query = soup_uri_get_query (uri);
-  if (!query)
-    goto bail;
-
-  items = query_split (query);
-  if (!items)
-    goto bail;
-
-  new_items = NULL;
-  for (l = items; l != NULL; l = l->next) {
-    QueryItem *item = l->data;
-
-    if (!is_garbage (item->decoded_name, host))
-      new_items = g_list_prepend (new_items, item);
-    else
-      has_garbage = TRUE;
-  }
-
-  if (has_garbage) {
-    char *new_query;
-
-    new_items = g_list_reverse (new_items);
-    new_query = query_concat (new_items);
-
-    soup_uri_set_query (uri, new_query);
-    g_free (new_query);
-
-    ret = soup_uri_to_string (uri, FALSE);
-  }
-
-  g_list_free_full (items, (GDestroyNotify)query_item_free);
-  g_list_free (new_items);
-
-bail:
-  soup_uri_free (uri);
-  return ret;
-}
-
 /* Use this function to format a URI for display. The URIs used
  * internally by WebKit may contain percent-encoded characters or
  * punycode, which we do not want the user to see.
diff --git a/lib/ephy-uri-helpers.h b/lib/ephy-uri-helpers.h
index cf46a6a5f..5b51d240e 100644
--- a/lib/ephy-uri-helpers.h
+++ b/lib/ephy-uri-helpers.h
@@ -25,7 +25,6 @@
 
 G_BEGIN_DECLS
 
-char *ephy_remove_tracking_from_uri (const char *uri);
 char *ephy_uri_decode (const char *uri);
 char *ephy_uri_normalize (const char *uri);
 char *ephy_uri_to_security_origin (const char *uri);
diff --git a/tests/meson.build b/tests/meson.build
index f2a0ac6a9..8ddcb250b 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -156,16 +156,6 @@ if get_option('unit_tests').enabled()
        env: envs
   )
 
-  uri_helpers_test = executable('test-ephy-uri-helpers',
-    'ephy-uri-helpers-test.c',
-    dependencies: ephymain_dep,
-    c_args: test_cargs,
-  )
-  test('URI helpers test',
-       uri_helpers_test,
-       env: envs,
-  )
-
   web_app_utils_test = executable('test-ephy-web-app-utils',
     'ephy-web-app-utils-test.c',
     dependencies: ephymain_dep,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]