[epiphany] lib: Add helper to remove tracking from URIs
- From: Bastien Nocera <hadess src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [epiphany] lib: Add helper to remove tracking from URIs
- Date: Mon, 13 Jan 2014 17:30:53 +0000 (UTC)
commit 7f09ee3763add068fc715da677568ffbbd2273df
Author: Bastien Nocera <hadess hadess net>
Date: Mon Dec 16 11:11:21 2013 +0100
lib: Add helper to remove tracking from URIs
This function, based on the PureURL Firefox add-on, will strip
tracking information from URLs, such as analytics information.
See https://addons.mozilla.org/fr/firefox/addon/pure-url/
https://bugzilla.gnome.org/show_bug.cgi?id=720520
lib/Makefile.am | 2 +
lib/ephy-uri-helpers.c | 258 +++++++++++++++++++++++++++++++++++++++++
lib/ephy-uri-helpers.h | 38 ++++++
tests/Makefile.am | 4 +
tests/ephy-uri-helpers-test.c | 82 +++++++++++++
5 files changed, 384 insertions(+), 0 deletions(-)
---
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 1c555be..3770bf2 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -24,6 +24,7 @@ NOINST_H_FILES = \
ephy-string.h \
ephy-snapshot-service.h \
ephy-time-helpers.h \
+ ephy-uri-helpers.h \
ephy-web-app-utils.h \
ephy-web-dom-utils.h \
ephy-zoom.h
@@ -67,6 +68,7 @@ libephymisc_la_SOURCES = \
ephy-sqlite-statement.c \
ephy-string.c \
ephy-time-helpers.c \
+ ephy-uri-helpers.c \
ephy-web-app-utils.c \
ephy-web-dom-utils.c \
ephy-zoom.c \
diff --git a/lib/ephy-uri-helpers.c b/lib/ephy-uri-helpers.c
new file mode 100644
index 0000000..acd46aa
--- /dev/null
+++ b/lib/ephy-uri-helpers.c
@@ -0,0 +1,258 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Copyright © 2013 Bastien Nocera <hadess hadess net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "ephy-uri-helpers.h"
+
+#include <glib.h>
+#include <libsoup/soup.h>
+#include <string.h>
+
+/**
+ * SECTION:ephy-uri-helpers
+ * @short_description: miscellaneous URI related utility functions
+ *
+ * URI related functions, including functions to clean up URI.
+ */
+
+typedef struct {
+ char *name;
+ char *value;
+} QueryItem;
+
+static void
+query_item_free (QueryItem *item)
+{
+ g_free (item->name);
+ /* value is actually part of the name allocation,
+ * see query_decode() */
+ g_slice_free (QueryItem, item);
+}
+
+#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
+#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
+
+/* From libsoup, in libsoup/soup-form.c */
+static gboolean
+form_decode (char *part)
+{
+ unsigned char *s, *d;
+
+ s = d = (unsigned char *)part;
+ do {
+ if (*s == '%') {
+ if (!g_ascii_isxdigit (s[1]) ||
+ !g_ascii_isxdigit (s[2]))
+ return FALSE;
+ *d++ = HEXCHAR (s);
+ s += 2;
+ } else if (*s == '+')
+ *d++ = ' ';
+ else
+ *d++ = *s;
+ } while (*s++);
+
+ return TRUE;
+}
+
+static void
+append_form_encoded (GString *str, const char *in)
+{
+ const unsigned char *s = (const unsigned char *)in;
+
+ while (*s) {
+ if (*s == ' ') {
+ g_string_append_c (str, '+');
+ s++;
+ } else if (!g_ascii_isalnum (*s))
+ g_string_append_printf (str, "%%%02X", (int)*s++);
+ else
+ g_string_append_c (str, *s++);
+ }
+}
+
+static void
+encode_pair (GString *str, const char *name, const char *value)
+{
+ g_return_if_fail (name != NULL);
+ g_return_if_fail (value != NULL);
+
+ if (str->len)
+ g_string_append_c (str, '&');
+ append_form_encoded (str, name);
+ g_string_append_c (str, '=');
+ append_form_encoded (str, value);
+}
+
+/* Adapted from soup_form_decode in libsoup */
+static GList *
+query_decode (const char *query)
+{
+ GList *items;
+ char **pairs, *eq, *name, *value;
+ int i;
+
+ items = NULL;
+ pairs = g_strsplit (query, "&", -1);
+ for (i = 0; pairs[i]; i++) {
+ QueryItem *item;
+
+ name = pairs[i];
+ eq = strchr (name, '=');
+ if (eq) {
+ *eq = '\0';
+ value = eq + 1;
+ } else
+ value = NULL;
+ if (!value || !form_decode (name) || !form_decode (value)) {
+ g_free (name);
+ continue;
+ }
+
+ item = g_slice_new0 (QueryItem);
+ item->name = name;
+ item->value = value;
+ items = g_list_prepend (items, item);
+ }
+ g_free (pairs);
+
+ return g_list_reverse (items);
+}
+
+static char *
+query_encode (GList *items)
+{
+ GList *l;
+ GString *str;
+
+ if (!items)
+ return NULL;
+
+ str = g_string_new (NULL);
+ for (l = items; l != NULL; l = l->next) {
+ QueryItem *item = l->data;
+
+ encode_pair (str, item->name, item->value);
+ }
+
+ return g_string_free (str, FALSE);
+}
+
+static gboolean
+is_garbage (const char *name,
+ const char *host)
+{
+ struct {
+ const char *field;
+ const char *host;
+ } const fields[] = {
+ /* analytics.google.com */
+ { "utm_source", NULL },
+ { "utm_medium", NULL },
+ { "utm_term", NULL },
+ { "utm_content", NULL },
+ { "utm_campaign", NULL },
+ /* metrika.yandex.ru */
+ { "yclid", NULL },
+ /* youtube.com */
+ { "feature", "youtube.com" },
+ /* facebook.com */
+ { "fb_action_ids", NULL},
+ { "fb_action_types", NULL },
+ { "fb_ref", NULL },
+ { "fb_source", NULL },
+ { "action_object_map", NULL },
+ { "action_type_map", NULL },
+ { "action_ref_map", NULL },
+ { "ref", "facebook.com" },
+ { "fref", "facebook.com" },
+ { "hc_location", "facebook.com" },
+ /* imdb.com */
+ { "ref_", "imdb.com" }
+ };
+ guint i;
+
+ for (i = 0; i < G_N_ELEMENTS (fields); i++) {
+ if (fields[i].host != NULL &&
+ !g_str_has_suffix (host, fields[i].host))
+ continue;
+ if (g_str_equal (fields[i].field, name))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/**
+ * ephy_remove_tracking_from_uri:
+ * @uri: a uri
+ *
+ * Sanitize @uri to make sure it does not contain analytics tracking
+ * information. Inspired by the Firefox PureURL add-on:
+ * https://addons.mozilla.org/fr/firefox/addon/pure-url/
+ *
+ * Returns: the sanitized uri, or %NULL on error or when the URI did
+ * not change.
+ */
+char *
+ephy_remove_tracking_from_uri (const char *uri_string)
+{
+ SoupURI *uri;
+ GList *items, *new_items, *l;
+ const char *query, *host;
+ char *new_query;
+ char *ret = NULL;
+
+ uri = soup_uri_new (uri_string);
+ if (!uri)
+ return ret;
+
+ host = soup_uri_get_host (uri);
+ query = soup_uri_get_query (uri);
+ if (!query)
+ goto bail;
+
+ items = query_decode (query);
+ if (!items)
+ goto bail;
+
+ new_items = NULL;
+ for (l = items; l != NULL; l = l->next) {
+ QueryItem *item = l->data;
+ if (!is_garbage (item->name, host))
+ new_items = g_list_prepend (new_items, item);
+ }
+ new_items = g_list_reverse (new_items);
+
+ new_query = query_encode (new_items);
+
+ g_list_free_full (items, (GDestroyNotify) query_item_free);
+ g_list_free (new_items);
+
+ soup_uri_set_query (uri, new_query);
+ g_free (new_query);
+
+ ret = soup_uri_to_string (uri, FALSE);
+
+bail:
+ soup_uri_free (uri);
+ return ret;
+}
+/* vim: set sw=2 ts=2 sts=2 et: */
diff --git a/lib/ephy-uri-helpers.h b/lib/ephy-uri-helpers.h
new file mode 100644
index 0000000..0d7f4b6
--- /dev/null
+++ b/lib/ephy-uri-helpers.h
@@ -0,0 +1,38 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Copyright © 2013 Bastien Nocera <hadess hadess net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#if !defined (__EPHY_EPIPHANY_H_INSIDE__) && !defined (EPIPHANY_COMPILATION)
+#error "Only <epiphany/epiphany.h> can be included directly."
+#endif
+
+#ifndef EPHY_URI_HELPERS_H
+#define EPHY_URI_HELPERS_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+char *ephy_remove_tracking_from_uri (const char *uri);
+
+G_END_DECLS
+
+#endif /* EPHY_URI_HELPERS_H */
+
+/* vim: set sw=2 ts=2 sts=2 et: */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index aca37d7..2ce0538 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -16,6 +16,7 @@ noinst_PROGRAMS = \
test-ephy-snapshot-service \
test-ephy-sqlite \
test-ephy-string \
+ ephy-uri-helpers-test \
test-ephy-web-app-utils \
test-ephy-web-view \
$(NULL)
@@ -174,6 +175,9 @@ test_ephy_sqlite_SOURCES = \
test_ephy_string_SOURCES = \
ephy-string-test.c
+test_ephy_uri_helpers_SOURCES = \
+ ephy-uri-helpers-test.c
+
test_ephy_web_app_utils_SOURCES = \
ephy-web-app-utils-test.c
diff --git a/tests/ephy-uri-helpers-test.c b/tests/ephy-uri-helpers-test.c
new file mode 100644
index 0000000..1d44b64
--- /dev/null
+++ b/tests/ephy-uri-helpers-test.c
@@ -0,0 +1,82 @@
+/* vim: set sw=2 ts=2 sts=2 et: */
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * ephy-url-helpers-test.c
+ * This file is part of Epiphany
+ *
+ * Copyright © 2013 Bastien Nocera <hadess hadess net>
+ *
+ * Epiphany is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Epiphany is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Epiphany; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "ephy-debug.h"
+#include "ephy-uri-helpers.h"
+#include "ephy-settings.h"
+
+#include <glib.h>
+#include <gtk/gtk.h>
+
+static void
+test_ephy_uri_helpers_remove_tracking (void)
+{
+ struct {
+ const char *input;
+ const char *output;
+ } const items[] = {
+ { "http://www.test.com/", "http://www.test.com/" },
+ { "http://www.test.com/?key=foo", "http://www.test.com/?key=foo" },
+ /* From the description in https://addons.mozilla.org/fr/firefox/addon/pure-url/ */
+ {
"http://bigpicture.ru/?p=431513&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%%3A+bigpictures+%%28%%D0%%9D%%D0%%9E%%D0%%92%%D0%%9E%%D0%%A1%%D0%%A2%%D0%%98+%%D0%%92+%%D0%%A4%%D0%%9E%%D0%%A2%%D0%%9E%%D0%%93%%D0%%A0%%D0%%90%%D0%%A4%%D0%%98%%D0%%AF%%D0%%A5%%29",
"http://bigpicture.ru/?p=431513" },
+ { "http://www.test.com/?utm_source=feedburner", "http://www.test.com/" },
+ { "http://www.test.com/?feature=foo", "http://www.test.com/?feature=foo" },
+ { "http://foo.youtube.com/?feature=foo", "http://foo.youtube.com/" },
+ };
+ guint i;
+
+ for (i = 0; i < G_N_ELEMENTS (items); i++) {
+ char *result;
+
+ g_test_message ("TRACKING: uri: %s; expected: %s;",
+ items[i].input, items[i].output);
+ result = ephy_remove_tracking_from_uri (items[i].input);
+ if (result == NULL)
+ result = g_strdup (items[i].input);
+ g_assert_cmpstr (items[i].output, ==, result);
+ g_free (result);
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ int ret;
+
+ /* This should affect only this test, we use it so we can test for
+ * default directory changes. */
+ g_setenv ("GSETTINGS_BACKEND", "memory", TRUE);
+
+ gtk_test_init (&argc, &argv);
+
+ ephy_debug_init ();
+
+ g_test_add_func ("/lib/ephy-uri-helpers/remove_tracking",
+ test_ephy_uri_helpers_remove_tracking);
+
+ ret = g_test_run ();
+
+ return ret;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]