[epiphany/wip/google-safe-browsing: 7/12] gsb-service: Implement URL verification logic
- From: Gabriel Ivașcu <gabrielivascu src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [epiphany/wip/google-safe-browsing: 7/12] gsb-service: Implement URL verification logic
- Date: Wed, 20 Sep 2017 19:05:48 +0000 (UTC)
commit 03f6e8f0e4e746f421aaef865ed226d0dc0b21ea
Author: Gabriel Ivascu <gabrielivascu gnome org>
Date: Mon Sep 18 20:18:53 2017 +0300
gsb-service: Implement URL verification logic
lib/safe-browsing/ephy-gsb-service.c | 249 +++++++++++++++++++++++++++++++--
lib/safe-browsing/ephy-gsb-service.h | 14 ++-
lib/safe-browsing/ephy-gsb-storage.c | 6 +-
lib/safe-browsing/ephy-gsb-utils.c | 54 ++++++++
lib/safe-browsing/ephy-gsb-utils.h | 6 +
5 files changed, 307 insertions(+), 22 deletions(-)
---
diff --git a/lib/safe-browsing/ephy-gsb-service.c b/lib/safe-browsing/ephy-gsb-service.c
index 5d26bae..f9f0bc0 100644
--- a/lib/safe-browsing/ephy-gsb-service.c
+++ b/lib/safe-browsing/ephy-gsb-service.c
@@ -58,22 +58,41 @@ static GParamSpec *obj_properties[LAST_PROP];
static gboolean ephy_gsb_service_update (EphyGSBService *self);
typedef struct {
- EphyGSBService *service;
- GList *prefixes;
+ EphyGSBService *service;
+ GHashTable *threats;
+ GList *matching_prefixes;
+ GList *matching_hashes;
+ EphyGSBServiceVerifyURLCallback callback;
+ gpointer user_data;
} FindFullHashesData;
static FindFullHashesData *
-find_full_hashes_data_new (EphyGSBService *service,
- GList *prefixes)
+find_full_hashes_data_new (EphyGSBService *service,
+ GHashTable *threats,
+ GList *matching_prefixes,
+ GList *matching_hashes,
+ EphyGSBServiceVerifyURLCallback callback,
+ gpointer user_data)
{
FindFullHashesData *data;
g_assert (EPHY_IS_GSB_SERVICE (service));
- g_assert (prefixes);
+ g_assert (threats);
+ g_assert (matching_prefixes);
+ g_assert (matching_hashes);
+ g_assert (callback);
data = g_slice_new (FindFullHashesData);
data->service = g_object_ref (service);
- data->prefixes = g_list_copy_deep (prefixes, (GCopyFunc)g_bytes_ref, NULL);
+ data->threats = g_hash_table_ref (threats);
+ data->matching_prefixes = g_list_copy_deep (matching_prefixes,
+ (GCopyFunc)g_bytes_ref,
+ NULL);
+ data->matching_hashes = g_list_copy_deep (matching_hashes,
+ (GCopyFunc)g_bytes_ref,
+ NULL);
+ data->callback = callback;
+ data->user_data = user_data;
return data;
}
@@ -84,7 +103,9 @@ find_full_hashes_data_free (FindFullHashesData *data)
g_assert (data);
g_object_unref (data->service);
- g_list_free_full (data->prefixes, (GDestroyNotify)g_bytes_unref);
+ g_hash_table_unref (data->threats);
+ g_list_free_full (data->matching_prefixes, (GDestroyNotify)g_bytes_unref);
+ g_list_free_full (data->matching_hashes, (GDestroyNotify)g_bytes_unref);
g_slice_free (FindFullHashesData, data);
}
@@ -441,6 +462,7 @@ ephy_gsb_service_find_full_hashes_cb (SoupSession *session,
JsonNode *body_node = NULL;
JsonObject *body_obj;
JsonArray *matches;
+ GList *hashes_lookup = NULL;
GError *error = NULL;
const char *negative_duration;
double duration;
@@ -461,6 +483,7 @@ ephy_gsb_service_find_full_hashes_cb (SoupSession *session,
body_obj = json_node_get_object (body_node);
matches = json_object_get_array_member (body_obj, "matches");
+ /* Update full hashes in database. */
for (guint i = 0; i < json_array_get_length (matches); i++) {
EphyGSBThreatList *list;
JsonObject *match = json_array_get_object_element (matches, i);
@@ -494,21 +517,44 @@ ephy_gsb_service_find_full_hashes_cb (SoupSession *session,
/* Update negative cache duration. */
negative_duration = json_object_get_string_member (body_obj, "negativeCacheDuration");
sscanf (negative_duration, "%lfs", &duration);
- for (GList *l = data->prefixes; l && l->data; l = l->next)
+ for (GList *l = data->matching_prefixes; l && l->data; l = l->next)
ephy_gsb_storage_update_hash_prefix_expiration (self->storage, l->data, floor (duration));
/* TODO: Handle minimumWaitDuration. */
+ /* Repeat the full hash verification. */
+ hashes_lookup = ephy_gsb_storage_lookup_full_hashes (self->storage, data->matching_hashes);
+ for (GList *l = hashes_lookup; l && l->data; l = l->next) {
+ EphyGSBHashFullLookup *lookup = (EphyGSBHashFullLookup *)l->data;
+ EphyGSBThreatList *list;
+
+ if (!lookup->expired) {
+ list = ephy_gsb_threat_list_new (lookup->threat_type,
+ lookup->platform_type,
+ lookup->threat_entry_type,
+ NULL, 0);
+ g_hash_table_add (data->threats, list);
+ }
+ }
+
out:
+ data->callback (data->threats, data->user_data);
+
if (body_node)
json_node_unref (body_node);
+ g_list_free_full (hashes_lookup, (GDestroyNotify)ephy_gsb_hash_full_lookup_free);
find_full_hashes_data_free (data);
}
static void
-ephy_gsb_service_find_full_hashes (EphyGSBService *self,
- GList *prefixes)
+ephy_gsb_service_find_full_hashes (EphyGSBService *self,
+ GHashTable *threats,
+ GList *matching_prefixes,
+ GList *matching_hashes,
+ EphyGSBServiceVerifyURLCallback callback,
+ gpointer user_data)
{
+ FindFullHashesData *data;
SoupMessage *msg;
GList *threat_lists;
JsonNode *body_node;
@@ -518,13 +564,20 @@ ephy_gsb_service_find_full_hashes (EphyGSBService *self,
g_assert (EPHY_IS_GSB_SERVICE (self));
g_assert (ephy_gsb_storage_is_operable (self->storage));
- g_assert (prefixes);
-
- LOG ("Updating full hashes of %u prefixes", g_list_length (prefixes));
+ g_assert (threats);
+ g_assert (matching_prefixes);
+ g_assert (matching_hashes);
+ g_assert (callback);
threat_lists = ephy_gsb_storage_get_threat_lists (self->storage);
- body_obj = ephy_gsb_utils_make_full_hashes_request (threat_lists, prefixes);
+ if (!threat_lists) {
+ callback (threats, user_data);
+ return;
+ }
+ LOG ("Requesting full hashes for %u prefixes", g_list_length (matching_prefixes));
+
+ body_obj = ephy_gsb_utils_make_full_hashes_request (threat_lists, matching_prefixes);
body_node = json_node_new (JSON_NODE_OBJECT);
json_node_set_object (body_node, body_obj);
body = json_to_string (body_node, FALSE);
@@ -532,12 +585,176 @@ ephy_gsb_service_find_full_hashes (EphyGSBService *self,
url = g_strdup_printf ("%sfullHashes:find?key=%s", API_PREFIX, self->api_key);
msg = soup_message_new (SOUP_METHOD_POST, url);
soup_message_set_request (msg, "application/json", SOUP_MEMORY_TAKE, body, strlen (body));
+
+ data = find_full_hashes_data_new (self, threats,
+ matching_prefixes, matching_hashes,
+ callback, user_data);
soup_session_queue_message (self->session, msg,
- ephy_gsb_service_find_full_hashes_cb,
- find_full_hashes_data_new (self, prefixes));
+ ephy_gsb_service_find_full_hashes_cb, data);
g_free (url);
json_object_unref (body_obj);
json_node_unref (body_node);
g_list_free_full (threat_lists, (GDestroyNotify)ephy_gsb_threat_list_free);
}
+
+static void
+ephy_gsb_service_verify_hashes (EphyGSBService *self,
+ GList *hashes,
+ GHashTable *threats,
+ EphyGSBServiceVerifyURLCallback callback,
+ gpointer user_data)
+{
+ GList *cues;
+ GList *prefixes_lookup = NULL;
+ GList *hashes_lookup = NULL;
+ GList *matching_prefixes = NULL;
+ GList *matching_hashes = NULL;
+ GHashTable *matching_prefixes_set;
+ GHashTable *matching_hashes_set;
+ GHashTableIter iter;
+ gpointer value;
+ gboolean has_matching_expired_hashes = FALSE;
+ gboolean has_matching_expired_prefixes = FALSE;
+
+ g_assert (EPHY_IS_GSB_SERVICE (self));
+ g_assert (ephy_gsb_storage_is_operable (self->storage));
+ g_assert (threats);
+ g_assert (hashes);
+ g_assert (callback);
+
+ matching_prefixes_set = g_hash_table_new (g_bytes_hash, g_bytes_equal);
+ matching_hashes_set = g_hash_table_new (g_bytes_hash, g_bytes_equal);
+
+ /* Check for hash prefixes in database that match any of the full hashes. */
+ cues = ephy_gsb_utils_get_hash_cues (hashes);
+ prefixes_lookup = ephy_gsb_storage_lookup_hash_prefixes (self->storage, cues);
+ for (GList *p = prefixes_lookup; p && p->data; p = p->next) {
+ EphyGSBHashPrefixLookup *lookup = (EphyGSBHashPrefixLookup *)p->data;
+
+ for (GList *h = hashes; h && h->data; h = h->next) {
+ if (ephy_gsb_utils_hash_has_prefix (h->data, lookup->prefix)) {
+ value = g_hash_table_lookup (matching_prefixes_set, lookup->prefix);
+
+ /* Consider the prefix expired if it's expired in at least one threat list. */
+ g_hash_table_replace (matching_prefixes_set,
+ lookup->prefix,
+ GINT_TO_POINTER (GPOINTER_TO_INT (value) || lookup->negative_expired));
+ g_hash_table_add (matching_hashes_set, h->data);
+ }
+ }
+ }
+
+ /* If there are no database matches, then the URL is safe. */
+ if (g_hash_table_size (matching_hashes_set) == 0) {
+ LOG ("No database match, URL is safe");
+ goto return_result;
+ }
+
+ /* Check for full hashes matches.
+ * All unexpired full hash matches are added directly to the result set.
+ */
+ matching_hashes = g_hash_table_get_keys (matching_hashes_set);
+ hashes_lookup = ephy_gsb_storage_lookup_full_hashes (self->storage, matching_hashes);
+ for (GList *l = hashes_lookup; l && l->data; l = l->next) {
+ EphyGSBHashFullLookup *lookup = (EphyGSBHashFullLookup *)l->data;
+ EphyGSBThreatList *list;
+
+ if (lookup->expired) {
+ has_matching_expired_hashes = TRUE;
+ } else {
+ list = ephy_gsb_threat_list_new (lookup->threat_type,
+ lookup->platform_type,
+ lookup->threat_entry_type,
+ NULL, 0);
+ g_hash_table_add (threats, list);
+ }
+ }
+
+ /* Check for positive cache hit.
+ * That is, there is at least one unexpired full hash match.
+ */
+ if (g_hash_table_size (threats) > 0) {
+ LOG ("Positive cache hit, URL is not safe");
+ goto return_result;
+ }
+
+ /* Check for negative cache hit. That is, there are no expired
+ * full hash matches and all hash prefix matches are negative-unexpired.
+ */
+ g_hash_table_iter_init (&iter, matching_prefixes_set);
+ while (g_hash_table_iter_next (&iter, NULL, &value)) {
+ if (GPOINTER_TO_INT (value) == TRUE) {
+ has_matching_expired_prefixes = TRUE;
+ break;
+ }
+ }
+ if (!has_matching_expired_hashes && !has_matching_expired_prefixes) {
+ LOG ("Negative cache hit, URL is safe");
+ goto return_result;
+ }
+
+ /* At this point we have either expired full hash matches and/or
+ * negative-expired hash prefix matches, so we need to find from
+ * the server whether the URL is safe or not. We do this by updating
+ * the full hashes of the matching prefixes with fresh values from
+ * server and re-checking for positive cache hits.
+ * See ephy_gsb_service_find_full_hashes_cb().
+ */
+ matching_prefixes = g_hash_table_get_keys (matching_prefixes_set);
+ ephy_gsb_service_find_full_hashes (self, threats,
+ matching_prefixes, matching_hashes,
+ callback, user_data);
+ goto out;
+
+return_result:
+ callback (threats, user_data);
+
+out:
+ g_list_free (matching_prefixes);
+ g_list_free (matching_hashes);
+ g_list_free_full (cues, (GDestroyNotify)g_bytes_unref);
+ g_list_free_full (prefixes_lookup, (GDestroyNotify)ephy_gsb_hash_prefix_lookup_free);
+ g_list_free_full (hashes_lookup, (GDestroyNotify)ephy_gsb_hash_full_lookup_free);
+ g_hash_table_unref (matching_prefixes_set);
+ g_hash_table_unref (matching_hashes_set);
+}
+
+void
+ephy_gsb_service_verify_url (EphyGSBService *self,
+ const char *url,
+ EphyGSBServiceVerifyURLCallback callback,
+ gpointer user_data)
+{
+ GHashTable *threats;
+ GList *hashes;
+
+ g_assert (EPHY_IS_GSB_SERVICE (self));
+ g_assert (url);
+
+ if (!callback)
+ return;
+
+ threats = g_hash_table_new_full (g_direct_hash,
+ (GEqualFunc)ephy_gsb_threat_list_equal,
+ (GDestroyNotify)ephy_gsb_threat_list_free,
+ NULL);
+
+ /* If the local database is broken or an update is in course, we cannot
+ * really verify the URL, so we have no choice other than to consider it safe.
+ */
+ if (!ephy_gsb_storage_is_operable (self->storage) || self->is_updating) {
+ LOG ("Local GSB storage is not available at the moment, cannot verify URL");
+ callback (threats, user_data);
+ return;
+ }
+
+ hashes = ephy_gsb_utils_compute_hashes (url);
+ if (!hashes) {
+ callback (threats, user_data);
+ return;
+ }
+
+ ephy_gsb_service_verify_hashes (self, hashes, threats, callback, user_data);
+ g_list_free_full (hashes, (GDestroyNotify)g_bytes_unref);
+}
diff --git a/lib/safe-browsing/ephy-gsb-service.h b/lib/safe-browsing/ephy-gsb-service.h
index 8371394..eb7175e 100644
--- a/lib/safe-browsing/ephy-gsb-service.h
+++ b/lib/safe-browsing/ephy-gsb-service.h
@@ -28,7 +28,17 @@ G_BEGIN_DECLS
G_DECLARE_FINAL_TYPE (EphyGSBService, ephy_gsb_service, EPHY, GSB_SERVICE, GObject)
-EphyGSBService *ephy_gsb_service_new (const char *api_key,
- const char *db_path);
+/* @threats is a set of EphyGSBThreatList where the URL is considered unsafe.
+ * The caller takes ownership of the GHashTable and needs to free it.
+ */
+typedef void (*EphyGSBServiceVerifyURLCallback) (GHashTable *threats,
+ gpointer user_data);
+
+EphyGSBService *ephy_gsb_service_new (const char *api_key,
+ const char *db_path);
+void ephy_gsb_service_verify_url (EphyGSBService *self,
+ const char *url,
+ EphyGSBServiceVerifyURLCallback callback,
+ gpointer user_data);
G_END_DECLS
diff --git a/lib/safe-browsing/ephy-gsb-storage.c b/lib/safe-browsing/ephy-gsb-storage.c
index 3f6e330..19183a2 100644
--- a/lib/safe-browsing/ephy-gsb-storage.c
+++ b/lib/safe-browsing/ephy-gsb-storage.c
@@ -28,8 +28,6 @@
#include <glib/gstdio.h>
#include <string.h>
-#define CUE_LEN 4
-
/* Keep this lower than 200 or else you'll get "too many SQL variables" error
* in ephy_gsb_storage_insert_batch(). SQLITE_MAX_VARIABLE_NUMBER is hardcoded
* in sqlite3 as 999.
@@ -1062,7 +1060,7 @@ ephy_gsb_storage_insert_hash_prefix_batch (EphyGSBStorage *self,
}
for (gsize k = start; k < end; k += len) {
- if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, CUE_LEN, NULL) ||
+ if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, GSB_CUE_LEN, NULL) ||
!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, len, NULL) ||
!bind_threat_list_params (statement, list, id, id + 1, id + 2, -1)) {
g_warning ("Failed to bind values in hash prefix statement");
@@ -1165,7 +1163,7 @@ ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self,
for (GList *l = cues; l && l->data; l = l->next) {
ephy_sqlite_statement_bind_blob (statement, id++,
- g_bytes_get_data (l->data, NULL), CUE_LEN,
+ g_bytes_get_data (l->data, NULL), GSB_CUE_LEN,
&error);
if (error) {
g_warning ("Failed to bind cue value as blob: %s", error->message);
diff --git a/lib/safe-browsing/ephy-gsb-utils.c b/lib/safe-browsing/ephy-gsb-utils.c
index 0b64ff0..5b72182 100644
--- a/lib/safe-browsing/ephy-gsb-utils.c
+++ b/lib/safe-browsing/ephy-gsb-utils.c
@@ -66,6 +66,23 @@ ephy_gsb_threat_list_free (EphyGSBThreatList *list)
g_slice_free (EphyGSBThreatList, list);
}
+gboolean
+ephy_gsb_threat_list_equal (EphyGSBThreatList *l1,
+ EphyGSBThreatList *l2)
+{
+ g_assert (l1);
+ g_assert (l2);
+
+ if (g_strcmp0 (l1->threat_type, l2->threat_type) != 0)
+ return FALSE;
+ if (g_strcmp0 (l1->platform_type, l2->platform_type) != 0)
+ return FALSE;
+ if (g_strcmp0 (l1->threat_entry_type, l2->threat_entry_type) != 0)
+ return FALSE;
+
+ return TRUE;
+}
+
EphyGSBHashPrefixLookup *
ephy_gsb_hash_prefix_lookup_new (const guint8 *prefix,
gsize length,
@@ -636,3 +653,40 @@ ephy_gsb_utils_compute_hashes (const char *url)
return g_list_reverse (retval);
}
+
+GList *
+ephy_gsb_utils_get_hash_cues (GList *hashes)
+{
+ GList *retval = NULL;
+
+ g_assert (hashes);
+
+ for (GList *l = hashes; l && l->data; l = l->next) {
+ const char *hash = g_bytes_get_data (l->data, NULL);
+ retval = g_list_prepend (retval, g_bytes_new (hash, GSB_CUE_LEN));
+ }
+
+ return g_list_reverse (retval);
+}
+
+gboolean
+ephy_gsb_utils_hash_has_prefix (GBytes *hash,
+ GBytes *prefix)
+{
+ const guint8 *hash_data;
+ const guint8 *prefix_data;
+ gsize prefix_len;
+
+ g_assert (hash);
+ g_assert (prefix);
+
+ hash_data = g_bytes_get_data (hash, NULL);
+ prefix_data = g_bytes_get_data (prefix, &prefix_len);
+
+ for (gsize i = 0; i < prefix_len; i++) {
+ if (hash_data[i] != prefix_data[i])
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/lib/safe-browsing/ephy-gsb-utils.h b/lib/safe-browsing/ephy-gsb-utils.h
index 759f525..81c06c3 100644
--- a/lib/safe-browsing/ephy-gsb-utils.h
+++ b/lib/safe-browsing/ephy-gsb-utils.h
@@ -25,6 +25,7 @@
G_BEGIN_DECLS
+#define GSB_CUE_LEN 4
#define GSB_HASH_TYPE G_CHECKSUM_SHA256
#define GSB_HASH_SIZE (g_checksum_type_get_length (GSB_HASH_TYPE))
@@ -60,6 +61,8 @@ EphyGSBThreatList *ephy_gsb_threat_list_new (const char *t
const char *client_state,
gint64 timestamp);
void ephy_gsb_threat_list_free (EphyGSBThreatList *list);
+gboolean ephy_gsb_threat_list_equal (EphyGSBThreatList *l1,
+ EphyGSBThreatList *l2);
EphyGSBHashPrefixLookup *ephy_gsb_hash_prefix_lookup_new (const guint8 *prefix,
gsize length,
@@ -87,5 +90,8 @@ char *ephy_gsb_utils_canonicalize (const char *
char **path_out,
char **query_out);
GList *ephy_gsb_utils_compute_hashes (const char *url);
+GList *ephy_gsb_utils_get_hash_cues (GList *hashes);
+gboolean ephy_gsb_utils_hash_has_prefix (GBytes *hash,
+ GBytes *prefix);
G_END_DECLS
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]