[epiphany/wip/google-safe-browsing: 22/22] safe-browsing: Add support for Rice compression scheme
- From: Gabriel Ivașcu <gabrielivascu src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [epiphany/wip/google-safe-browsing: 22/22] safe-browsing: Add support for Rice compression scheme
- Date: Thu, 21 Sep 2017 21:37:47 +0000 (UTC)
commit 9d66b67875fefd2efda915732c1959f6d82e5ab2
Author: Gabriel Ivascu <gabrielivascu gnome org>
Date: Fri Sep 22 00:32:44 2017 +0300
safe-browsing: Add support for Rice compression scheme
https://developers.google.com/safe-browsing/v4/compression
lib/safe-browsing/ephy-gsb-service.c | 9 +--
lib/safe-browsing/ephy-gsb-storage.c | 179 ++++++++++++++++++++++++----------
lib/safe-browsing/ephy-gsb-storage.h | 5 +-
lib/safe-browsing/ephy-gsb-utils.c | 159 +++++++++++++++++++++++++++++-
lib/safe-browsing/ephy-gsb-utils.h | 10 ++-
5 files changed, 297 insertions(+), 65 deletions(-)
---
diff --git a/lib/safe-browsing/ephy-gsb-service.c b/lib/safe-browsing/ephy-gsb-service.c
index 4e01e7e..0c76a4c 100644
--- a/lib/safe-browsing/ephy-gsb-service.c
+++ b/lib/safe-browsing/ephy-gsb-service.c
@@ -268,9 +268,7 @@ ephy_gsb_service_update_thread (GTask *task,
JsonArray *removals = json_object_get_array_member (lur, "removals");
for (guint k = 0; k < json_array_get_length (removals); k++) {
JsonObject *tes = json_array_get_object_element (removals, k);
- JsonObject *raw_indices = json_object_get_object_member (tes, "rawIndices");
- JsonArray *indices = json_object_get_array_member (raw_indices, "indices");
- ephy_gsb_storage_delete_hash_prefixes (self->storage, list, indices);
+ ephy_gsb_storage_delete_hash_prefixes (self->storage, list, tes);
}
}
@@ -279,10 +277,7 @@ ephy_gsb_service_update_thread (GTask *task,
JsonArray *additions = json_object_get_array_member (lur, "additions");
for (guint k = 0; k < json_array_get_length (additions); k++) {
JsonObject *tes = json_array_get_object_element (additions, k);
- JsonObject *raw_hashes = json_object_get_object_member (tes, "rawHashes");
- gint64 prefix_size = json_object_get_int_member (raw_hashes, "prefixSize");
- const char *hashes = json_object_get_string_member (raw_hashes, "rawHashes");
- ephy_gsb_storage_insert_hash_prefixes (self->storage, list, prefix_size, hashes);
+ ephy_gsb_storage_insert_hash_prefixes (self->storage, list, tes);
}
}
diff --git a/lib/safe-browsing/ephy-gsb-storage.c b/lib/safe-browsing/ephy-gsb-storage.c
index 84affd4..984f260 100644
--- a/lib/safe-browsing/ephy-gsb-storage.c
+++ b/lib/safe-browsing/ephy-gsb-storage.c
@@ -899,11 +899,11 @@ ephy_gsb_storage_make_delete_hash_prefix_statement (EphyGSBStorage *self,
}
static GList *
-ephy_gsb_storage_delete_hash_prefix_batch (EphyGSBStorage *self,
- EphyGSBThreatList *list,
- GList *prefixes,
- gsize num_prefixes,
- EphySQLiteStatement *stmt)
+ephy_gsb_storage_delete_hash_prefixes_batch (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ GList *prefixes,
+ gsize num_prefixes,
+ EphySQLiteStatement *stmt)
{
EphySQLiteStatement *statement = NULL;
GError *error = NULL;
@@ -953,10 +953,11 @@ out:
return prefixes;
}
-void
-ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
- EphyGSBThreatList *list,
- JsonArray *indices)
+static void
+ephy_gsb_storage_delete_hash_prefixes_internal (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ guint32 *indices,
+ gsize num_indices)
{
EphySQLiteStatement *statement = NULL;
GList *prefixes = NULL;
@@ -969,12 +970,12 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
g_assert (list);
g_assert (indices);
- LOG ("Deleting %u hash prefixes...", json_array_get_length (indices));
+ LOG ("Deleting %lu hash prefixes...", num_indices);
/* Move indices from the JSON array to a hash table set. */
set = g_hash_table_new (g_direct_hash, g_direct_equal);
- for (guint i = 0; i < json_array_get_length (indices); i++)
- g_hash_table_add (set, GINT_TO_POINTER (json_array_get_int_element (indices, i)));
+ for (gsize i = 0; i < num_indices; i++)
+ g_hash_table_add (set, GUINT_TO_POINTER (indices[i]));
prefixes = ephy_gsb_storage_get_hash_prefixes_to_delete (self, list, set, &num_prefixes);
head = prefixes;
@@ -986,16 +987,16 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
statement = ephy_gsb_storage_make_delete_hash_prefix_statement (self, BATCH_SIZE);
for (gsize i = 0; i < num_prefixes / BATCH_SIZE; i++) {
- head = ephy_gsb_storage_delete_hash_prefix_batch (self, list,
- head, BATCH_SIZE,
- statement);
+ head = ephy_gsb_storage_delete_hash_prefixes_batch (self, list,
+ head, BATCH_SIZE,
+ statement);
}
}
if (num_prefixes % BATCH_SIZE != 0) {
- ephy_gsb_storage_delete_hash_prefix_batch (self, list,
- head, num_prefixes % BATCH_SIZE,
- NULL);
+ ephy_gsb_storage_delete_hash_prefixes_batch (self, list,
+ head, num_prefixes % BATCH_SIZE,
+ NULL);
}
ephy_gsb_storage_end_transaction (self);
@@ -1006,6 +1007,42 @@ ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
g_object_unref (statement);
}
+void
+ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ JsonObject *tes)
+{
+ JsonObject *raw_indices;
+ JsonObject *rice_indices;
+ JsonArray *indices_arr;
+ const char *compression;
+ guint32 *indices;
+ gsize num_indices;
+
+ g_assert (EPHY_IS_GSB_STORAGE (self));
+ g_assert (self->is_operable);
+ g_assert (list);
+ g_assert (tes);
+
+ compression = json_object_get_string_member (tes, "compressionType");
+ if (!g_strcmp0 (compression, GSB_COMPRESSION_TYPE_RICE)) {
+ rice_indices = json_object_get_object_member (tes, "riceIndices");
+ indices = ephy_gsb_utils_rice_delta_decode (rice_indices, &num_indices);
+ } else {
+ raw_indices = json_object_get_object_member (tes, "rawIndices");
+ indices_arr = json_object_get_array_member (raw_indices, "indices");
+ num_indices = json_array_get_length (indices_arr);
+
+ indices = g_malloc (num_indices * sizeof (guint));
+ for (guint i = 0; i < num_indices; i++)
+ indices[i] = json_array_get_int_element (indices_arr, i);
+ }
+
+ ephy_gsb_storage_delete_hash_prefixes_internal (self, list, indices, num_indices);
+
+ g_free (indices);
+}
+
static EphySQLiteStatement *
ephy_gsb_storage_make_insert_hash_prefix_statement (EphyGSBStorage *self,
gsize num_prefixes)
@@ -1036,13 +1073,13 @@ ephy_gsb_storage_make_insert_hash_prefix_statement (EphyGSBStorage *self,
}
static void
-ephy_gsb_storage_insert_hash_prefix_batch (EphyGSBStorage *self,
- EphyGSBThreatList *list,
- const guint8 *prefixes,
- gsize start,
- gsize end,
- gsize len,
- EphySQLiteStatement *stmt)
+ephy_gsb_storage_insert_hash_prefixes_batch (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ const guint8 *prefixes,
+ gsize start,
+ gsize end,
+ gsize len,
+ EphySQLiteStatement *stmt)
{
EphySQLiteStatement *statement = NULL;
GError *error = NULL;
@@ -1065,7 +1102,7 @@ ephy_gsb_storage_insert_hash_prefix_batch (EphyGSBStorage *self,
}
for (gsize k = start; k < end; k += len) {
- if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, GSB_CUE_LEN, NULL) ||
+ if (!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, GSB_HASH_CUE_LEN, NULL) ||
!ephy_sqlite_statement_bind_blob (statement, id++, prefixes + k, len, NULL) ||
!bind_threat_list_params (statement, list, id, id + 1, id + 2, -1)) {
g_warning ("Failed to bind values in hash prefix statement");
@@ -1085,60 +1122,98 @@ out:
g_object_unref (statement);
}
-void
-ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self,
- EphyGSBThreatList *list,
- gsize prefix_len,
- const char *prefixes_b64)
+static void
+ephy_gsb_storage_insert_hash_prefixes_internal (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ const guint8 *prefixes,
+ gsize num_prefixes,
+ gsize prefix_len)
{
EphySQLiteStatement *statement = NULL;
- guint8 *prefixes;
- gsize prefixes_len;
- gsize num_prefixes;
gsize num_batches;
g_assert (EPHY_IS_GSB_STORAGE (self));
g_assert (self->is_operable);
g_assert (list);
- g_assert (prefix_len > 0);
- g_assert (prefixes_b64);
-
- prefixes = g_base64_decode (prefixes_b64, &prefixes_len);
- num_prefixes = prefixes_len / prefix_len;
- num_batches = num_prefixes / BATCH_SIZE;
+ g_assert (prefixes);
LOG ("Inserting %lu hash prefixes of size %ld...", num_prefixes, prefix_len);
ephy_gsb_storage_start_transaction (self);
+ num_batches = num_prefixes / BATCH_SIZE;
if (num_batches > 0) {
/* Reuse statement to increase performance. */
statement = ephy_gsb_storage_make_insert_hash_prefix_statement (self, BATCH_SIZE);
for (gsize i = 0; i < num_batches; i++) {
- ephy_gsb_storage_insert_hash_prefix_batch (self, list, prefixes,
- i * prefix_len * BATCH_SIZE,
- (i + 1) * prefix_len * BATCH_SIZE,
- prefix_len,
- statement);
+ ephy_gsb_storage_insert_hash_prefixes_batch (self, list, prefixes,
+ i * prefix_len * BATCH_SIZE,
+ (i + 1) * prefix_len * BATCH_SIZE,
+ prefix_len,
+ statement);
}
}
if (num_prefixes % BATCH_SIZE != 0) {
- ephy_gsb_storage_insert_hash_prefix_batch (self, list, prefixes,
- num_batches * prefix_len * BATCH_SIZE,
- prefixes_len - 1,
- prefix_len,
- NULL);
+ ephy_gsb_storage_insert_hash_prefixes_batch (self, list, prefixes,
+ num_batches * prefix_len * BATCH_SIZE,
+ num_prefixes * prefix_len - 1,
+ prefix_len,
+ NULL);
}
ephy_gsb_storage_end_transaction (self);
- g_free (prefixes);
if (statement)
g_object_unref (statement);
}
+void
+ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self,
+ EphyGSBThreatList *list,
+ JsonObject *tes)
+{
+ JsonObject *raw_hashes;
+ JsonObject *rice_hashes;
+ const char *compression;
+ const char *prefixes_b64;
+ guint32 *items = NULL;
+ guint8 *prefixes;
+ gsize prefixes_len;
+ gsize prefix_len;
+ gsize num_prefixes;
+
+ g_assert (EPHY_IS_GSB_STORAGE (self));
+ g_assert (self->is_operable);
+ g_assert (list);
+ g_assert (tes);
+
+ compression = json_object_get_string_member (tes, "compressionType");
+ if (!g_strcmp0 (compression, GSB_COMPRESSION_TYPE_RICE)) {
+ rice_hashes = json_object_get_object_member (tes, "riceHashes");
+ items = ephy_gsb_utils_rice_delta_decode (rice_hashes, &num_prefixes);
+
+ prefixes = g_malloc (num_prefixes * GSB_RICE_PREFIX_LEN);
+ for (gsize i = 0; i < num_prefixes; i++)
+ memcpy (prefixes + i * GSB_RICE_PREFIX_LEN, &items[i], GSB_RICE_PREFIX_LEN);
+
+ prefix_len = GSB_RICE_PREFIX_LEN;
+ } else {
+ raw_hashes = json_object_get_object_member (tes, "rawHashes");
+ prefix_len = json_object_get_int_member (raw_hashes, "prefixSize");
+ prefixes_b64 = json_object_get_string_member (raw_hashes, "rawHashes");
+
+ prefixes = g_base64_decode (prefixes_b64, &prefixes_len);
+ num_prefixes = prefixes_len / prefix_len;
+ }
+
+ ephy_gsb_storage_insert_hash_prefixes_internal (self, list, prefixes, num_prefixes, prefix_len);
+
+ g_free (items);
+ g_free (prefixes);
+}
+
GList *
ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self,
GList *cues)
@@ -1172,7 +1247,7 @@ ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self,
for (GList *l = cues; l && l->data; l = l->next) {
ephy_sqlite_statement_bind_blob (statement, id++,
- g_bytes_get_data (l->data, NULL), GSB_CUE_LEN,
+ g_bytes_get_data (l->data, NULL), GSB_HASH_CUE_LEN,
&error);
if (error) {
g_warning ("Failed to bind cue value as blob: %s", error->message);
diff --git a/lib/safe-browsing/ephy-gsb-storage.h b/lib/safe-browsing/ephy-gsb-storage.h
index 01ef05e..aa3754d 100644
--- a/lib/safe-browsing/ephy-gsb-storage.h
+++ b/lib/safe-browsing/ephy-gsb-storage.h
@@ -45,11 +45,10 @@ void ephy_gsb_storage_clear_hash_prefixes (EphyGSBStorage
EphyGSBThreatList *list);
void ephy_gsb_storage_delete_hash_prefixes (EphyGSBStorage *self,
EphyGSBThreatList *list,
- JsonArray *indices);
+ JsonObject *tes);
void ephy_gsb_storage_insert_hash_prefixes (EphyGSBStorage *self,
EphyGSBThreatList *list,
- gsize prefix_len,
- const char *prefixes_b64);
+ JsonObject *tes);
GList *ephy_gsb_storage_lookup_hash_prefixes (EphyGSBStorage *self,
GList *cues);
GList *ephy_gsb_storage_lookup_full_hashes (EphyGSBStorage *self,
diff --git a/lib/safe-browsing/ephy-gsb-utils.c b/lib/safe-browsing/ephy-gsb-utils.c
index 5b72182..1dca29c 100644
--- a/lib/safe-browsing/ephy-gsb-utils.c
+++ b/lib/safe-browsing/ephy-gsb-utils.c
@@ -26,12 +26,129 @@
#include <arpa/inet.h>
#include <libsoup/soup.h>
+#include <stdio.h>
#include <string.h>
#define MAX_HOST_SUFFIXES 5
#define MAX_PATH_PREFIXES 6
#define MAX_UNESCAPE_STEP 1024
+typedef struct {
+ guint8 *data; /* The bit stream as an array of bytes */
+ gsize data_len; /* The number of bytes in the array */
+ guint8 *curr; /* The current byte in the bit stream */
+ guint8 mask; /* Bit mask to read a bit within a byte */
+ gsize read; /* The number of bits read so far */
+} EphyGSBBitReader;
+
+typedef struct {
+ EphyGSBBitReader *reader;
+ guint parameter; /* Golomb-Rice parameter */
+} EphyGSBRiceDecoder;
+
+static inline EphyGSBBitReader *
+ephy_gsb_bit_reader_new (const guint8 *data,
+ gsize data_len)
+{
+ EphyGSBBitReader *reader;
+
+ g_assert (data);
+ g_assert (data_len > 0);
+
+ reader = g_slice_new (EphyGSBBitReader);
+ reader->curr = reader->data = g_malloc (data_len);
+ memcpy (reader->data, data, data_len);
+ reader->data_len = data_len;
+ reader->read = 0;
+ reader->mask = 0x01;
+
+ return reader;
+}
+
+static inline void
+ephy_gsb_bit_reader_free (EphyGSBBitReader *reader)
+{
+ g_assert (reader);
+
+ g_free (reader->data);
+ g_slice_free (EphyGSBBitReader, reader);
+}
+
+/*
+ * https://developers.google.com/safe-browsing/v4/compression#bit-encoderdecoder
+ */
+static guint32
+ephy_gsb_bit_reader_read (EphyGSBBitReader *reader,
+ guint num_bits)
+{
+ guint32 retval = 0;
+
+ /* Cannot read more than 4 bytes at once. */
+ g_assert (num_bits <= 32);
+ /* Cannot read more bits than the buffer has left. */
+ g_assert (reader->read + num_bits <= reader->data_len * 8);
+
+ /* Within a byte, the least-significant bits come before the most-significant
+ * bits in the bit stream. */
+ for (guint i = 0; i < num_bits; i++) {
+ if (*reader->curr & reader->mask)
+ retval |= 1 << i;
+
+ reader->mask <<= 1;
+ if (reader->mask == 0) {
+ reader->curr++;
+ reader->mask = 0x01;
+ }
+ }
+
+ reader->read += num_bits;
+
+ return retval;
+}
+
+static inline EphyGSBRiceDecoder *
+ephy_gsb_rice_decoder_new (const guint8 *data,
+ gsize data_len,
+ guint32 parameter)
+{
+ EphyGSBRiceDecoder *decoder;
+
+ g_assert (data);
+ g_assert (data_len > 0);
+
+ decoder = g_slice_new (EphyGSBRiceDecoder);
+ decoder->reader = ephy_gsb_bit_reader_new (data, data_len);
+ decoder->parameter = parameter;
+
+ return decoder;
+}
+
+static inline void
+ephy_gsb_rice_decoder_free (EphyGSBRiceDecoder *decoder)
+{
+ g_assert (decoder);
+
+ ephy_gsb_bit_reader_free (decoder->reader);
+ g_slice_free (EphyGSBRiceDecoder, decoder);
+}
+
+static guint32
+ephy_gsb_rice_decoder_next (EphyGSBRiceDecoder *decoder)
+{
+ guint32 quotient = 0;
+ guint32 remainder;
+ guint32 bit;
+
+ g_assert (decoder);
+
+ while ((bit = ephy_gsb_bit_reader_read (decoder->reader, 1)) != 0)
+ quotient += bit;
+
+ remainder = ephy_gsb_bit_reader_read (decoder->reader, decoder->parameter);
+
+ return (quotient << decoder->parameter) + remainder;
+}
+
EphyGSBThreatList *
ephy_gsb_threat_list_new (const char *threat_type,
const char *platform_type,
@@ -175,7 +292,8 @@ ephy_gsb_utils_make_contraints (void)
JsonArray *compressions;
compressions = json_array_new ();
- json_array_add_string_element (compressions, "RAW");
+ json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RAW);
+ json_array_add_string_element (compressions, GSB_COMPRESSION_TYPE_RICE);
constraints = json_object_new ();
/* No restriction for the number of update entries. */
@@ -336,6 +454,43 @@ ephy_gsb_utils_get_metadata_entry (JsonObject *threat_entry_metadata,
return NULL;
}
+guint32 *
+ephy_gsb_utils_rice_delta_decode (JsonObject *rde,
+ gsize *num_items)
+{
+ EphyGSBRiceDecoder *decoder;
+ const char *data_b64;
+ const char *first_value_str;
+ guint8 *data;
+ guint32 *items;
+ gsize data_len;
+ guint parameter;
+ guint num_entries;
+
+ g_assert (rde);
+ g_assert (num_items);
+
+ first_value_str = json_object_get_string_member (rde, "firstValue");
+ parameter = json_object_get_int_member (rde, "riceParameter");
+ num_entries = json_object_get_int_member (rde, "numEntries");
+ data_b64 = json_object_get_string_member (rde, "encodedData");
+
+ *num_items = 1 + num_entries;
+ items = g_malloc (*num_items * sizeof (guint32));
+ sscanf (first_value_str, "%u", &items[0]);
+
+ data = g_base64_decode (data_b64, &data_len);
+ decoder = ephy_gsb_rice_decoder_new (data, data_len, parameter);
+
+ for (guint i = 1; i <= num_entries; i++)
+ items[i] = items[i - 1] + ephy_gsb_rice_decoder_next (decoder);
+
+ g_free (data);
+ ephy_gsb_rice_decoder_free (decoder);
+
+ return items;
+}
+
static char *
ephy_gsb_utils_full_unescape (const char *part)
{
@@ -663,7 +818,7 @@ ephy_gsb_utils_get_hash_cues (GList *hashes)
for (GList *l = hashes; l && l->data; l = l->next) {
const char *hash = g_bytes_get_data (l->data, NULL);
- retval = g_list_prepend (retval, g_bytes_new (hash, GSB_CUE_LEN));
+ retval = g_list_prepend (retval, g_bytes_new (hash, GSB_HASH_CUE_LEN));
}
return g_list_reverse (retval);
diff --git a/lib/safe-browsing/ephy-gsb-utils.h b/lib/safe-browsing/ephy-gsb-utils.h
index 84f2518..3002cc0 100644
--- a/lib/safe-browsing/ephy-gsb-utils.h
+++ b/lib/safe-browsing/ephy-gsb-utils.h
@@ -25,10 +25,16 @@
G_BEGIN_DECLS
-#define GSB_CUE_LEN 4
+#define GSB_HASH_CUE_LEN 4
+#define GSB_RICE_PREFIX_LEN 4
+
#define GSB_HASH_TYPE G_CHECKSUM_SHA256
#define GSB_HASH_SIZE (g_checksum_type_get_length (GSB_HASH_TYPE))
+#define GSB_COMPRESSION_TYPE_RAW "RAW"
+#define GSB_COMPRESSION_TYPE_RICE "RICE"
+#define GSB_COMPRESSION_TYPE_UNSPECIFIED "COMPRESSION_TYPE_UNSPECIFIED"
+
#define GSB_THREAT_TYPE_MALWARE "MALWARE"
#define GSB_THREAT_TYPE_SOCIAL_ENGINEERING "SOCIAL_ENGINEERING"
#define GSB_THREAT_TYPE_UNWANTED_SOFTWARE "UNWANTED_SOFTWARE"
@@ -86,6 +92,8 @@ JsonObject *ephy_gsb_utils_make_full_hashes_request (GList *threat
GList *hash_prefixes);
char *ephy_gsb_utils_get_metadata_entry (JsonObject *threat_entry_metadata,
const char *metadata_key);
+guint32 *ephy_gsb_utils_rice_delta_decode (JsonObject *rde,
+ gsize *num_items);
char *ephy_gsb_utils_canonicalize (const char *url,
char **host_out,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]