[ostree] Add bsdiff support to deltas
- From: Colin Walters <walters src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ostree] Add bsdiff support to deltas
- Date: Tue, 3 Mar 2015 23:46:34 +0000 (UTC)
commit 3f3bb8e37dffec1143328db43fdd806baf346d1e
Author: Giuseppe Scrivano <gscrivan redhat com>
Date: Mon Feb 23 10:35:17 2015 +0100
Add bsdiff support to deltas
Signed-off-by: Giuseppe Scrivano <gscrivan redhat com>
.../ostree-repo-static-delta-compilation.c | 233 ++++++++++++++++++-
src/libostree/ostree-repo-static-delta-private.h | 3 +-
.../ostree-repo-static-delta-processing.c | 82 +++++++
src/libostree/ostree-rollsum.h | 1 -
4 files changed, 304 insertions(+), 15 deletions(-)
---
diff --git a/src/libostree/ostree-repo-static-delta-compilation.c
b/src/libostree/ostree-repo-static-delta-compilation.c
index b3ce797..968b460 100644
--- a/src/libostree/ostree-repo-static-delta-compilation.c
+++ b/src/libostree/ostree-repo-static-delta-compilation.c
@@ -21,7 +21,9 @@
#include "config.h"
#include <string.h>
+#include <stdlib.h>
#include <gio/gunixoutputstream.h>
+#include <gio/gmemoryoutputstream.h>
#include "ostree-core-private.h"
#include "ostree-repo-private.h"
@@ -31,6 +33,7 @@
#include "ostree-rollsum.h"
#include "otutil.h"
#include "ostree-varint.h"
+#include "bsdiff/bsdiff.h"
#define CONTENT_SIZE_SIMILARITY_THRESHOLD_PERCENT (30)
@@ -399,7 +402,14 @@ process_one_object (OstreeRepo *repo,
typedef struct {
char *from_checksum;
+ GBytes *tmp_from;
+ GBytes *tmp_to;
+} ContentBsdiff;
+
+typedef struct {
+ char *from_checksum;
OstreeRollsumMatches *matches;
+ GBytes *tmp_from;
GBytes *tmp_to;
} ContentRollsum;
@@ -408,10 +418,20 @@ content_rollsums_free (ContentRollsum *rollsum)
{
g_free (rollsum->from_checksum);
_ostree_rollsum_matches_free (rollsum->matches);
+ g_bytes_unref (rollsum->tmp_from);
g_bytes_unref (rollsum->tmp_to);
g_free (rollsum);
}
+static void
+content_bsdiffs_free (ContentBsdiff *bsdiff)
+{
+ g_free (bsdiff->from_checksum);
+ g_bytes_unref (bsdiff->tmp_from);
+ g_bytes_unref (bsdiff->tmp_to);
+ g_free (bsdiff);
+}
+
/* Load a content object, uncompressing it to an unlinked tmpfile
that's mmap()'d and suitable for seeking.
*/
@@ -467,6 +487,47 @@ get_unpacked_unlinked_content (OstreeRepo *repo,
}
static gboolean
+try_content_bsdiff (OstreeRepo *repo,
+ const char *from,
+ const char *to,
+ ContentBsdiff **out_bsdiff,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean ret = FALSE;
+ gs_unref_hashtable GHashTable *from_bsdiff = NULL;
+ gs_unref_hashtable GHashTable *to_bsdiff = NULL;
+ gs_unref_bytes GBytes *tmp_from = NULL;
+ gs_unref_bytes GBytes *tmp_to = NULL;
+ gs_unref_object GFileInfo *from_finfo = NULL;
+ gs_unref_object GFileInfo *to_finfo = NULL;
+ ContentBsdiff *ret_bsdiff = NULL;
+
+ *out_bsdiff = NULL;
+
+ if (!get_unpacked_unlinked_content (repo, from, &tmp_from, &from_finfo,
+ cancellable, error))
+ goto out;
+ if (!get_unpacked_unlinked_content (repo, to, &tmp_to, &to_finfo,
+ cancellable, error))
+ goto out;
+
+ /* TODO: make this option configurable. */
+ if (g_bytes_get_size (tmp_to) + g_bytes_get_size (tmp_from) > (200 * (1 << 20)))
+ goto out;
+
+ ret_bsdiff = g_new0 (ContentBsdiff, 1);
+ ret_bsdiff->from_checksum = g_strdup (from);
+ ret_bsdiff->tmp_from = tmp_from; tmp_from = NULL;
+ ret_bsdiff->tmp_to = tmp_to; tmp_to = NULL;
+
+ ret = TRUE;
+ gs_transfer_out_value (out_bsdiff, &ret_bsdiff);
+ out:
+ return ret;
+}
+
+static gboolean
try_content_rollsum (OstreeRepo *repo,
const char *from,
const char *to,
@@ -525,6 +586,7 @@ try_content_rollsum (OstreeRepo *repo,
ret_rollsum = g_new0 (ContentRollsum, 1);
ret_rollsum->from_checksum = g_strdup (from);
ret_rollsum->matches = matches; matches = NULL;
+ ret_rollsum->tmp_from = tmp_from; tmp_from = NULL;
ret_rollsum->tmp_to = tmp_to; tmp_to = NULL;
ret = TRUE;
@@ -535,6 +597,27 @@ try_content_rollsum (OstreeRepo *repo,
return ret;
}
+struct bzdiff_opaque_s
+{
+ GOutputStream *out;
+ GCancellable *cancellable;
+ GError **error;
+};
+
+static int
+bzdiff_write (struct bsdiff_stream* stream, const void* buffer, int size)
+{
+ struct bzdiff_opaque_s *op = stream->opaque;
+ if (!g_output_stream_write (op->out,
+ buffer,
+ size,
+ op->cancellable,
+ op->error))
+ return -1;
+
+ return 0;
+}
+
static void
append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part,
const guint8 *buf,
@@ -572,7 +655,7 @@ process_one_rollsum (OstreeRepo *repo,
current_part->payload->len > builder->max_chunk_size_bytes)
{
*current_part_val = current_part = allocate_part (builder);
- }
+ }
tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len);
@@ -615,7 +698,7 @@ process_one_rollsum (OstreeRepo *repo,
GVariant *match = matchlist->pdata[i];
guint32 crc;
guint64 prefix;
-
+
g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start);
prefix = to_start - writing_offset;
@@ -627,7 +710,7 @@ process_one_rollsum (OstreeRepo *repo,
g_string_append_c (current_part->operations,
(gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
reading_payload = TRUE;
}
-
+
g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len);
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix);
writing_offset += prefix;
@@ -651,7 +734,7 @@ process_one_rollsum (OstreeRepo *repo,
g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
reading_payload = TRUE;
}
-
+
{ guint64 remainder = tmp_to_len - writing_offset;
if (remainder > 0)
append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder);
@@ -671,6 +754,100 @@ process_one_rollsum (OstreeRepo *repo,
return ret;
}
+static gboolean
+process_one_bsdiff (OstreeRepo *repo,
+ OstreeStaticDeltaBuilder *builder,
+ OstreeStaticDeltaPartBuilder **current_part_val,
+ const char *to_checksum,
+ ContentBsdiff *bsdiff_content,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean ret = FALSE;
+ guint64 content_size;
+ gs_unref_object GInputStream *content_stream = NULL;
+ gs_unref_object GFileInfo *content_finfo = NULL;
+ gs_unref_variant GVariant *content_xattrs = NULL;
+ OstreeStaticDeltaPartBuilder *current_part = *current_part_val;
+ const guint8 *tmp_to_buf;
+ gsize tmp_to_len;
+ const guint8 *tmp_from_buf;
+ gsize tmp_from_len;
+
+ /* Check to see if this delta has gone over maximum size */
+ if (current_part->objects->len > 0 &&
+ current_part->payload->len > builder->max_chunk_size_bytes)
+ {
+ *current_part_val = current_part = allocate_part (builder);
+ }
+
+ tmp_to_buf = g_bytes_get_data (bsdiff_content->tmp_to, &tmp_to_len);
+ tmp_from_buf = g_bytes_get_data (bsdiff_content->tmp_from, &tmp_from_len);
+
+ if (!ostree_repo_load_file (repo, to_checksum, &content_stream,
+ &content_finfo, &content_xattrs,
+ cancellable, error))
+ goto out;
+ content_size = g_file_info_get_size (content_finfo);
+ g_assert_cmpint (tmp_to_len, ==, content_size);
+
+ current_part->uncompressed_size += content_size;
+
+ g_ptr_array_add (current_part->objects, ostree_object_name_serialize (to_checksum,
OSTREE_OBJECT_TYPE_FILE));
+
+ { gsize mode_offset, xattr_offset;
+ guchar source_csum[32];
+
+ write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
+ &mode_offset, &xattr_offset);
+
+ /* Write the origin checksum */
+ ostree_checksum_inplace_to_bytes (bsdiff_content->from_checksum, source_csum);
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE);
+ _ostree_write_varuint64 (current_part->operations, current_part->payload->len);
+ g_string_append_len (current_part->payload, (char*)source_csum, sizeof (source_csum));
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_OPEN);
+ _ostree_write_varuint64 (current_part->operations, mode_offset);
+ _ostree_write_varuint64 (current_part->operations, xattr_offset);
+ _ostree_write_varuint64 (current_part->operations, content_size);
+
+ {
+ struct bsdiff_stream stream;
+ struct bzdiff_opaque_s op;
+ const gchar *payload;
+ gssize payload_size;
+ gs_unref_object GOutputStream *out = g_memory_output_stream_new_resizable ();
+ stream.malloc = malloc;
+ stream.free = free;
+ stream.write = bzdiff_write;
+ op.out = out;
+ op.cancellable = cancellable;
+ op.error = error;
+ stream.opaque = &op;
+ if (bsdiff (tmp_from_buf, tmp_from_len, tmp_to_buf, tmp_to_len, &stream) < 0)
+ goto out;
+
+ payload = g_memory_output_stream_get_data (G_MEMORY_OUTPUT_STREAM (out));
+ payload_size = g_memory_output_stream_get_data_size (G_MEMORY_OUTPUT_STREAM (out));
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_BSPATCH);
+ _ostree_write_varuint64 (current_part->operations, current_part->payload->len);
+ _ostree_write_varuint64 (current_part->operations, payload_size);
+
+ g_string_append_len (current_part->payload, payload, payload_size);
+ }
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
+ }
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
+
+ ret = TRUE;
+ out:
+ return ret;
+}
+
static gboolean
generate_delta_lowlatency (OstreeRepo *repo,
const char *from,
@@ -695,6 +872,7 @@ generate_delta_lowlatency (OstreeRepo *repo,
gs_unref_hashtable GHashTable *new_reachable_symlink_content = NULL;
gs_unref_hashtable GHashTable *modified_regfile_content = NULL;
gs_unref_hashtable GHashTable *rollsum_optimized_content_objects = NULL;
+ gs_unref_hashtable GHashTable *bsdiff_optimized_content_objects = NULL;
gs_unref_hashtable GHashTable *content_object_to_size = NULL;
if (from != NULL)
@@ -787,22 +965,35 @@ generate_delta_lowlatency (OstreeRepo *repo,
g_free,
(GDestroyNotify) content_rollsums_free);
+ bsdiff_optimized_content_objects = g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free,
+ (GDestroyNotify) content_bsdiffs_free);
+
g_hash_table_iter_init (&hashiter, modified_regfile_content);
while (g_hash_table_iter_next (&hashiter, &key, &value))
{
const char *to_checksum = key;
const char *from_checksum = value;
ContentRollsum *rollsum;
+ ContentBsdiff *bsdiff;
if (!try_content_rollsum (repo, from_checksum, to_checksum,
&rollsum, cancellable, error))
goto out;
- if (!rollsum)
- continue;
+ if (rollsum)
+ {
+ g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
+ builder->rollsum_size += rollsum->matches->match_size;
+ continue;
+ }
+
+ if (!try_content_bsdiff (repo, from_checksum, to_checksum,
+ &bsdiff, cancellable, error))
+ goto out;
- g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
- builder->rollsum_size += rollsum->matches->match_size;
+ if (bsdiff)
+ g_hash_table_insert (bsdiff_optimized_content_objects, g_strdup (to_checksum), bsdiff);
}
g_printerr ("rollsum for %u/%u modified\n",
@@ -836,7 +1027,21 @@ generate_delta_lowlatency (OstreeRepo *repo,
ContentRollsum *rollsum = value;
if (!process_one_rollsum (repo, builder, ¤t_part,
- checksum, rollsum,
+ checksum, rollsum,
+ cancellable, error))
+ goto out;
+ }
+
+ /* Now do bsdiff'ed objects */
+
+ g_hash_table_iter_init (&hashiter, bsdiff_optimized_content_objects);
+ while (g_hash_table_iter_next (&hashiter, &key, &value))
+ {
+ const char *checksum = key;
+ ContentBsdiff *bsdiff = value;
+
+ if (!process_one_bsdiff (repo, builder, ¤t_part,
+ checksum, bsdiff,
cancellable, error))
goto out;
}
@@ -851,8 +1056,9 @@ generate_delta_lowlatency (OstreeRepo *repo,
guint64 uncompressed_size;
gboolean fallback = FALSE;
- /* Skip content objects we rollsum'd */
- if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
+ /* Skip content objects we rollsum'd or bsdiff'ed */
+ if (g_hash_table_contains (rollsum_optimized_content_objects, checksum) ||
+ g_hash_table_contains (bsdiff_optimized_content_objects, checksum))
continue;
if (!ostree_repo_load_object_stream (repo, OSTREE_OBJECT_TYPE_FILE, checksum,
@@ -872,14 +1078,15 @@ generate_delta_lowlatency (OstreeRepo *repo,
}
}
- /* Now non-rollsummed regular file content */
+ /* Now non-rollsummed or bsdiff'ed regular file content */
g_hash_table_iter_init (&hashiter, new_reachable_regfile_content);
while (g_hash_table_iter_next (&hashiter, &key, &value))
{
const char *checksum = key;
/* Skip content objects we rollsum'd */
- if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
+ if (g_hash_table_contains (rollsum_optimized_content_objects, checksum) ||
+ g_hash_table_contains (bsdiff_optimized_content_objects, checksum))
continue;
if (!process_one_object (repo, builder, ¤t_part,
diff --git a/src/libostree/ostree-repo-static-delta-private.h
b/src/libostree/ostree-repo-static-delta-private.h
index 55ef437..0cf2a0b 100644
--- a/src/libostree/ostree-repo-static-delta-private.h
+++ b/src/libostree/ostree-repo-static-delta-private.h
@@ -137,7 +137,8 @@ typedef enum {
OSTREE_STATIC_DELTA_OP_WRITE = 'w',
OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r',
OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R',
- OSTREE_STATIC_DELTA_OP_CLOSE = 'c'
+ OSTREE_STATIC_DELTA_OP_CLOSE = 'c',
+ OSTREE_STATIC_DELTA_OP_BSPATCH = 'B'
} OstreeStaticDeltaOpCode;
gboolean
diff --git a/src/libostree/ostree-repo-static-delta-processing.c
b/src/libostree/ostree-repo-static-delta-processing.c
index 5cfdba9..0ddf79f 100644
--- a/src/libostree/ostree-repo-static-delta-processing.c
+++ b/src/libostree/ostree-repo-static-delta-processing.c
@@ -33,6 +33,7 @@
#include "ostree-lzma-decompressor.h"
#include "otutil.h"
#include "ostree-varint.h"
+#include "bsdiff/bspatch.h"
/* This should really always be true, but hey, let's just assert it */
G_STATIC_ASSERT (sizeof (guint) >= sizeof (guint32));
@@ -100,6 +101,7 @@ OPPROTO(write)
OPPROTO(set_read_source)
OPPROTO(unset_read_source)
OPPROTO(close)
+OPPROTO(bspatch)
#undef OPPROTO
static gboolean
@@ -259,6 +261,10 @@ _ostree_static_delta_part_execute_raw (OstreeRepo *repo,
if (!dispatch_close (repo, state, cancellable, error))
goto out;
break;
+ case OSTREE_STATIC_DELTA_OP_BSPATCH:
+ if (!dispatch_bspatch (repo, state, cancellable, error))
+ goto out;
+ break;
default:
g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
"Unknown opcode %u at offset %u", opcode, n_executed);
@@ -486,6 +492,82 @@ do_content_open_generic (OstreeRepo *repo,
return ret;
}
+struct bzpatch_opaque_s
+{
+ StaticDeltaExecutionState *state;
+ guint64 offset, length;
+};
+
+static int
+bspatch_read (const struct bspatch_stream* stream, void* buffer, int length)
+{
+ struct bzpatch_opaque_s *opaque = stream->opaque;
+
+ g_assert (length <= opaque->length);
+ g_assert (opaque->offset + length <= opaque->state->payload_size);
+
+ memcpy (buffer, opaque->state->payload_data + opaque->offset, length);
+ opaque->offset += length;
+ opaque->length -= length;
+ return 0;
+}
+
+static gboolean
+dispatch_bspatch (OstreeRepo *repo,
+ StaticDeltaExecutionState *state,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean ret = FALSE;
+ guint64 offset, length;
+ gs_unref_object GInputStream *in_stream = NULL;
+ gs_unref_object GOutputStream *out_mem_stream = NULL;
+ gs_free guchar *buf = NULL;
+ struct bspatch_stream stream;
+ struct bzpatch_opaque_s opaque;
+ gsize bytes_written;
+
+ if (!read_varuint64 (state, &offset, error))
+ goto out;
+ if (!read_varuint64 (state, &length, error))
+ goto out;
+
+ buf = g_malloc0 (state->content_size);
+
+ in_stream = g_unix_input_stream_new (state->read_source_fd, FALSE);
+
+ out_mem_stream = g_memory_output_stream_new_resizable ();
+
+ if (!g_output_stream_splice (out_mem_stream, in_stream, G_OUTPUT_STREAM_SPLICE_NONE,
+ cancellable, error) < 0)
+ goto out;
+
+ opaque.state = state;
+ opaque.offset = offset;
+ opaque.length = length;
+ stream.read = bspatch_read;
+ stream.opaque = &opaque;
+ if (bspatch (g_memory_output_stream_get_data (G_MEMORY_OUTPUT_STREAM (out_mem_stream)),
+ g_memory_output_stream_get_data_size (G_MEMORY_OUTPUT_STREAM (out_mem_stream)),
+ buf,
+ state->content_size,
+ &stream) < 0)
+ goto out;
+
+ if (!g_output_stream_write_all (state->content_out,
+ buf,
+ state->content_size,
+ &bytes_written,
+ cancellable, error))
+ goto out;
+
+ g_assert (bytes_written == state->content_size);
+
+ ret = TRUE;
+ out:
+ return ret;
+}
+
static gboolean
dispatch_open_splice_and_close (OstreeRepo *repo,
StaticDeltaExecutionState *state,
diff --git a/src/libostree/ostree-rollsum.h b/src/libostree/ostree-rollsum.h
index 37003d8..3a96ea5 100644
--- a/src/libostree/ostree-rollsum.h
+++ b/src/libostree/ostree-rollsum.h
@@ -41,4 +41,3 @@ _ostree_compute_rollsum_matches (GBytes *from,
void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum);
G_END_DECLS
-
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]