[ostree] deltas: Implement rollsums
- From: Colin Walters <walters src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ostree] deltas: Implement rollsums
- Date: Mon, 16 Feb 2015 15:15:10 +0000 (UTC)
commit 9aa7e30b38f33794fc0fda12fa8b0ef50d9fbab5
Author: Colin Walters <walters verbum org>
Date: Fri Jan 30 10:13:07 2015 -0500
deltas: Implement rollsums
This does an rsync-style prepared delta basically. On my test data,
it shaves ~6MB of uncompressed data. Not a huge amount, but I expect
this to be more useful for things like binaries which embed data, etc.
Makefile-libostree.am | 2 +
Makefile-tests.am | 8 +-
.../ostree-repo-static-delta-compilation.c | 474 ++++++++++++--------
src/libostree/ostree-repo-static-delta-private.h | 3 +-
.../ostree-repo-static-delta-processing.c | 59 ++-
src/libostree/ostree-rollsum.c | 201 +++++++++
src/libostree/ostree-rollsum.h | 44 ++
tests/test-rollsum.c | 145 ++-----
8 files changed, 620 insertions(+), 316 deletions(-)
---
diff --git a/Makefile-libostree.am b/Makefile-libostree.am
index 9f59642..f77a36a 100644
--- a/Makefile-libostree.am
+++ b/Makefile-libostree.am
@@ -51,6 +51,8 @@ libostree_1_la_SOURCES = \
src/libostree/ostree-lzma-decompressor.h \
src/libostree/bupsplit.h \
src/libostree/bupsplit.c \
+ src/libostree/ostree-rollsum.h \
+ src/libostree/ostree-rollsum.c \
src/libostree/ostree-varint.h \
src/libostree/ostree-varint.c \
src/libostree/ostree-linuxfsutil.h \
diff --git a/Makefile-tests.am b/Makefile-tests.am
index 8571fa1..42d1be7 100644
--- a/Makefile-tests.am
+++ b/Makefile-tests.am
@@ -87,12 +87,6 @@ INSTALL_DATA_HOOKS += install-gpg-data-hook
echo 'Output=TAP' >> $ tmp; \
mv $ tmp $@)
-%.test: tests/%.js Makefile
- $(AM_V_GEN) (echo '[Test]' > $ tmp; \
- echo 'Exec=env TESTDATADIR=$(pkglibexecdir)/installed-tests
$(pkglibexecdir)/installed-tests/$(notdir $<)' >> $ tmp; \
- echo 'Type=session' >> $ tmp; \
- mv $ tmp $@)
-
if BUILDOPT_GJS
insttest_SCRIPTS += tests/test-core.js \
tests/test-sizes.js \
@@ -109,7 +103,7 @@ check_PROGRAMS = tests/test-rollsum tests/test-varint tests/test-ot-unix-utils
tests_test_ot_unix_utils_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
tests_test_ot_unix_utils_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
-tests_test_rollsum_SOURCES = src/libostree/bupsplit.c tests/test-rollsum.c
+tests_test_rollsum_SOURCES = src/libostree/bupsplit.c src/libostree/ostree-rollsum.c tests/test-rollsum.c
tests_test_rollsum_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
tests_test_rollsum_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
diff --git a/src/libostree/ostree-repo-static-delta-compilation.c
b/src/libostree/ostree-repo-static-delta-compilation.c
index ad995d0..594b432 100644
--- a/src/libostree/ostree-repo-static-delta-compilation.c
+++ b/src/libostree/ostree-repo-static-delta-compilation.c
@@ -21,18 +21,16 @@
#include "config.h"
#include <string.h>
-#include <zlib.h>
+#include <gio/gunixoutputstream.h>
#include "ostree-core-private.h"
#include "ostree-repo-private.h"
#include "ostree-lzma-compressor.h"
#include "ostree-repo-static-delta-private.h"
#include "ostree-diff.h"
+#include "ostree-rollsum.h"
#include "otutil.h"
#include "ostree-varint.h"
-#include "bupsplit.h"
-
-#define ROLLSUM_BLOB_MAX (8192*4)
typedef struct {
guint64 uncompressed_size;
@@ -260,6 +258,36 @@ splice_stream_to_payload (OstreeStaticDeltaPartBuilder *current_part,
return ret;
}
+static void
+write_content_mode_xattrs (OstreeRepo *repo,
+ OstreeStaticDeltaPartBuilder *current_part,
+ GFileInfo *content_finfo,
+ GVariant *content_xattrs,
+ gsize *out_mode_offset,
+ gsize *out_xattr_offset)
+{
+ guint32 uid =
+ g_file_info_get_attribute_uint32 (content_finfo, "unix::uid");
+ guint32 gid =
+ g_file_info_get_attribute_uint32 (content_finfo, "unix::gid");
+ guint32 mode =
+ g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
+ gs_unref_variant GVariant *modev
+ = g_variant_ref_sink (g_variant_new ("(uuu)",
+ GUINT32_TO_BE (uid),
+ GUINT32_TO_BE (gid),
+ GUINT32_TO_BE (mode)));
+
+ *out_mode_offset = write_unique_variant_chunk (current_part,
+ current_part->mode_set,
+ current_part->modes,
+ modev);
+ *out_xattr_offset = write_unique_variant_chunk (current_part,
+ current_part->xattr_set,
+ current_part->xattrs,
+ content_xattrs);
+}
+
static gboolean
process_one_object (OstreeRepo *repo,
OstreeStaticDeltaBuilder *builder,
@@ -327,26 +355,12 @@ process_one_object (OstreeRepo *repo,
else
{
gsize mode_offset, xattr_offset, content_offset;
- guint32 uid =
- g_file_info_get_attribute_uint32 (content_finfo, "unix::uid");
- guint32 gid =
- g_file_info_get_attribute_uint32 (content_finfo, "unix::gid");
- guint32 mode =
- g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
- gs_unref_variant GVariant *modev
- = g_variant_ref_sink (g_variant_new ("(uuu)",
- GUINT32_TO_BE (uid),
- GUINT32_TO_BE (gid),
- GUINT32_TO_BE (mode)));
-
- mode_offset = write_unique_variant_chunk (current_part,
- current_part->mode_set,
- current_part->modes,
- modev);
- xattr_offset = write_unique_variant_chunk (current_part,
- current_part->xattr_set,
- current_part->xattrs,
- content_xattrs);
+ guint32 mode;
+
+ mode = g_file_info_get_attribute_uint32 (content_finfo, "unix::mode");
+
+ write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
+ &mode_offset, &xattr_offset);
if (S_ISLNK (mode))
{
@@ -382,105 +396,74 @@ process_one_object (OstreeRepo *repo,
}
typedef struct {
- GPtrArray *keys;
- GHashTable *values;
-} OrderedRollsums;
+ char *from_checksum;
+ OstreeRollsumMatches *matches;
+ GBytes *tmp_to;
+} ContentRollsum;
static void
-ordered_rollsums_free (OrderedRollsums *ohash)
+content_rollsums_free (ContentRollsum *rollsum)
{
- g_ptr_array_unref (ohash->keys);
- g_hash_table_unref (ohash->values);
- g_free (ohash);
+ g_free (rollsum->from_checksum);
+ _ostree_rollsum_matches_free (rollsum->matches);
+ g_bytes_unref (rollsum->tmp_to);
+ g_free (rollsum);
}
+/* Load a content object, uncompressing it to an unlinked tmpfile
+ that's mmap()'d and suitable for seeking.
+ */
static gboolean
-rollsum_chunks_crc32 (GInputStream *istream,
- OrderedRollsums **out_rollsums,
- GCancellable *cancellable,
- GError **error)
+get_unpacked_unlinked_content (OstreeRepo *repo,
+ const char *checksum,
+ GBytes **out_content,
+ GFileInfo **out_finfo,
+ GCancellable *cancellable,
+ GError **error)
{
gboolean ret = FALSE;
- gsize start = 0;
- gboolean rollsum_end = FALSE;
- OrderedRollsums *ret_rollsums = g_new0 (OrderedRollsums, 1);
- gs_unref_object GBufferedInputStream *bufinput =
- (GBufferedInputStream*) g_buffered_input_stream_new_sized (istream, ROLLSUM_BLOB_MAX);
-
- ret_rollsums->keys = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
- ret_rollsums->values = g_hash_table_new (NULL, NULL);
-
- while (TRUE)
+ gs_free char *tmpname = g_strdup ("tmpostree-deltaobj-XXXXXX");
+ gs_fd_close int fd = -1;
+ gs_unref_bytes GBytes *ret_content = NULL;
+ gs_unref_object GInputStream *istream = NULL;
+ gs_unref_object GFileInfo *ret_finfo = NULL;
+ gs_unref_object GOutputStream *out = NULL;
+
+ fd = g_mkstemp (tmpname);
+ if (fd == -1)
{
- gssize bytes_read;
- const guint8 *buf;
- gsize bufsize;
- int offset, bits;
-
- bytes_read = g_buffered_input_stream_fill (bufinput, -1, cancellable, error);
- if (bytes_read == -1)
- goto out;
- if (bytes_read == 0)
- break;
-
- buf = g_buffered_input_stream_peek_buffer (bufinput, &bufsize);
-
- if (!rollsum_end)
- {
- offset = bupsplit_find_ofs (buf, MIN(G_MAXINT32, bufsize), &bits);
- if (offset == 0)
- {
- rollsum_end = TRUE;
- offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
- }
- else if (offset > ROLLSUM_BLOB_MAX)
- offset = ROLLSUM_BLOB_MAX;
- }
- else
- offset = MIN(ROLLSUM_BLOB_MAX, bufsize);
-
- if (!g_input_stream_skip ((GInputStream*)bufinput, bufsize, cancellable, error))
- goto out;
-
- /* Use zlib's crc32 */
- { guint32 crc = crc32 (0L, NULL, 0);
- GVariant *val;
-
- crc = crc32 (crc, buf, offset);
+ gs_set_error_from_errno (error, errno);
+ goto out;
+ }
+ /* Doesn't need a name */
+ (void) unlink (tmpname);
- val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
- g_ptr_array_add (ret_rollsums->keys, val);
- g_hash_table_insert (ret_rollsums->values, GUINT_TO_POINTER (crc), val);
- }
+ if (!ostree_repo_load_file (repo, checksum, &istream, &ret_finfo, NULL,
+ cancellable, error))
+ goto out;
- start += offset;
+ if (g_file_info_get_file_type (ret_finfo) != G_FILE_TYPE_REGULAR)
+ {
+ ret = TRUE;
+ goto out;
}
+
+ out = g_unix_output_stream_new (fd, FALSE);
+ if (g_output_stream_splice (out, istream, G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET,
+ cancellable, error) < 0)
+ goto out;
+
+ { GMappedFile *mfile = g_mapped_file_new_from_fd (fd, FALSE, error);
+ ret_content = g_mapped_file_get_bytes (mfile);
+ g_mapped_file_unref (mfile);
+ }
ret = TRUE;
- gs_transfer_out_value (out_rollsums, &ret_rollsums);
+ gs_transfer_out_value (out_content, &ret_content);
out:
- if (ret_rollsums)
- ordered_rollsums_free (ret_rollsums);
return ret;
}
-typedef struct {
- char *from_checksum;
- OrderedRollsums *from_rollsums;
- OrderedRollsums *to_rollsums;
- guint match_ratio;
- guint64 match_size;
-} ContentRollsum;
-
-static void
-content_rollsums_free (ContentRollsum *rollsum)
-{
- g_free (rollsum->from_checksum);
- ordered_rollsums_free (rollsum->from_rollsums);
- ordered_rollsums_free (rollsum->to_rollsums);
- g_free (rollsum);
-}
-
static gboolean
try_content_rollsum (OstreeRepo *repo,
const char *from,
@@ -490,85 +473,203 @@ try_content_rollsum (OstreeRepo *repo,
GError **error)
{
gboolean ret = FALSE;
- OrderedRollsums *from_rollsum = NULL;
- OrderedRollsums *to_rollsum = NULL;
- gs_unref_object GInputStream *from_istream = NULL;
+ gs_unref_hashtable GHashTable *from_rollsum = NULL;
+ gs_unref_hashtable GHashTable *to_rollsum = NULL;
+ gs_unref_bytes GBytes *tmp_from = NULL;
+ gs_unref_bytes GBytes *tmp_to = NULL;
gs_unref_object GFileInfo *from_finfo = NULL;
- gs_unref_object GInputStream *to_istream = NULL;
gs_unref_object GFileInfo *to_finfo = NULL;
+ OstreeRollsumMatches *matches;
ContentRollsum *ret_rollsum = NULL;
- guint total = 0;
- guint matches = 0;
- guint match_ratio = 0;
- guint64 match_size = 0;
- gpointer hkey, hvalue;
- GHashTableIter hiter;
*out_rollsum = NULL;
- if (!ostree_repo_load_file (repo, from, &from_istream, &from_finfo, NULL,
- cancellable, error))
+ /* Load the content objects, splice them to uncompressed temporary files that
+ * we can just mmap() and seek around in conveniently.
+ */
+ if (!get_unpacked_unlinked_content (repo, from, &tmp_from, &from_finfo,
+ cancellable, error))
goto out;
- if (!ostree_repo_load_file (repo, to, &to_istream, &to_finfo, NULL,
- cancellable, error))
+ if (!get_unpacked_unlinked_content (repo, to, &tmp_to, &to_finfo,
+ cancellable, error))
goto out;
/* Only try to rollsum regular files obviously */
- if (!(g_file_info_get_file_type (from_finfo) == G_FILE_TYPE_REGULAR
- && g_file_info_get_file_type (to_finfo) == G_FILE_TYPE_REGULAR))
+ if (!(tmp_from && tmp_to))
{
ret = TRUE;
goto out;
}
- g_assert (from_istream && to_istream);
+ matches = _ostree_compute_rollsum_matches (tmp_from, tmp_to);
- if (!rollsum_chunks_crc32 (from_istream, &from_rollsum, cancellable, error))
- goto out;
- if (!rollsum_chunks_crc32 (to_istream, &to_rollsum, cancellable, error))
- goto out;
+ { guint match_ratio = (matches->bufmatches*100)/matches->total;
- g_clear_object (&from_istream);
- g_clear_object (&to_istream);
+ /* Only proceed if the file contains (arbitrary) more than 25% of
+ * the previous chunks.
+ */
+ if (match_ratio < 25)
+ {
+ ret = TRUE;
+ goto out;
+ }
+ }
+
+ g_printerr ("rollsum for %s; crcs=%u bufs=%u total=%u matchsize=%llu\n",
+ to, matches->crcmatches,
+ matches->bufmatches,
+ matches->total, (unsigned long long)matches->match_size);
+
+ ret_rollsum = g_new0 (ContentRollsum, 1);
+ ret_rollsum->from_checksum = g_strdup (from);
+ ret_rollsum->matches = matches; matches = NULL;
+ ret_rollsum->tmp_to = tmp_to; tmp_to = NULL;
+
+ ret = TRUE;
+ gs_transfer_out_value (out_rollsum, &ret_rollsum);
+ out:
+ if (matches)
+ _ostree_rollsum_matches_free (matches);
+ return ret;
+}
+
+static void
+append_payload_chunk_and_write (OstreeStaticDeltaPartBuilder *current_part,
+ const guint8 *buf,
+ guint64 offset)
+{
+ guint64 payload_start;
+
+ payload_start = current_part->payload->len;
+ g_string_append_len (current_part->payload, (char*)buf, offset);
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
+ _ostree_write_varuint64 (current_part->operations, offset);
+ _ostree_write_varuint64 (current_part->operations, payload_start);
+}
+
+static gboolean
+process_one_rollsum (OstreeRepo *repo,
+ OstreeStaticDeltaBuilder *builder,
+ OstreeStaticDeltaPartBuilder **current_part_val,
+ const char *to_checksum,
+ ContentRollsum *rollsum,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean ret = FALSE;
+ guint64 content_size;
+ gs_unref_object GInputStream *content_stream = NULL;
+ gs_unref_object GFileInfo *content_finfo = NULL;
+ gs_unref_variant GVariant *content_xattrs = NULL;
+ OstreeStaticDeltaPartBuilder *current_part = *current_part_val;
+ const guint8 *tmp_to_buf;
+ gsize tmp_to_len;
- g_hash_table_iter_init (&hiter, to_rollsum->values);
- while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
+ /* Check to see if this delta has gone over maximum size */
+ if (current_part->objects->len > 0 &&
+ current_part->payload->len > builder->max_chunk_size_bytes)
{
- GVariant *chunk = hvalue;
- if (g_hash_table_contains (from_rollsum->values, hkey))
+ *current_part_val = current_part = allocate_part (builder);
+ }
+
+ tmp_to_buf = g_bytes_get_data (rollsum->tmp_to, &tmp_to_len);
+
+ if (!ostree_repo_load_file (repo, to_checksum, &content_stream,
+ &content_finfo, &content_xattrs,
+ cancellable, error))
+ goto out;
+ content_size = g_file_info_get_size (content_finfo);
+ g_assert_cmpint (tmp_to_len, ==, content_size);
+
+ current_part->uncompressed_size += content_size;
+
+ g_ptr_array_add (current_part->objects, ostree_object_name_serialize (to_checksum,
OSTREE_OBJECT_TYPE_FILE));
+
+ { gsize mode_offset, xattr_offset, from_csum_offset;
+ gboolean reading_payload = TRUE;
+ guchar source_csum[32];
+ guint i;
+
+ write_content_mode_xattrs (repo, current_part, content_finfo, content_xattrs,
+ &mode_offset, &xattr_offset);
+
+ /* Write the origin checksum */
+ ostree_checksum_inplace_to_bytes (rollsum->from_checksum, source_csum);
+ from_csum_offset = current_part->payload->len;
+ g_string_append_len (current_part->payload, (char*)source_csum, sizeof (source_csum));
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_OPEN);
+ _ostree_write_varuint64 (current_part->operations, mode_offset);
+ _ostree_write_varuint64 (current_part->operations, xattr_offset);
+ _ostree_write_varuint64 (current_part->operations, content_size);
+
+ { guint64 writing_offset = 0;
+ guint64 offset = 0, to_start = 0, from_start = 0;
+ GPtrArray *matchlist = rollsum->matches->matches;
+
+ g_assert (matchlist->len > 0);
+ for (i = 0; i < matchlist->len; i++)
{
- guint64 offset;
- g_variant_get (chunk, "(utt)", NULL, NULL, &offset);
- matches++;
- match_size += offset;
+ GVariant *match = matchlist->pdata[i];
+ guint32 crc;
+ guint64 prefix;
+
+ g_variant_get (match, "(uttt)", &crc, &offset, &to_start, &from_start);
+
+ prefix = to_start - writing_offset;
+
+ if (prefix > 0)
+ {
+ if (!reading_payload)
+ {
+ g_string_append_c (current_part->operations,
(gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
+ reading_payload = TRUE;
+ }
+
+ g_assert_cmpint (writing_offset + prefix, <=, tmp_to_len);
+ append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, prefix);
+ writing_offset += prefix;
+ }
+
+ if (reading_payload)
+ {
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE);
+ _ostree_write_varuint64 (current_part->operations, from_csum_offset);
+ reading_payload = FALSE;
+ }
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
+ _ostree_write_varuint64 (current_part->operations, offset);
+ _ostree_write_varuint64 (current_part->operations, from_start);
+ writing_offset += offset;
}
- total++;
- }
- match_ratio = (matches*100)/total;
+ if (!reading_payload)
+ {
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE);
+ reading_payload = TRUE;
+ }
+
+ { guint64 remainder = tmp_to_len - writing_offset;
+ if (remainder > 0)
+ append_payload_chunk_and_write (current_part, tmp_to_buf + writing_offset, remainder);
+ writing_offset += remainder;
+ g_assert_cmpint (writing_offset, ==, tmp_to_len);
+ }
- /* Only proceed if the file contains (arbitrary) more than 25% of
- * the previous chunks.
- */
- if (match_ratio < 25)
- {
- ret = TRUE;
- goto out;
+ g_assert_cmpint (writing_offset, ==, content_size);
}
- ret_rollsum = g_new0 (ContentRollsum, 1);
- ret_rollsum->match_ratio = match_ratio;
- ret_rollsum->match_size = match_size;
- ret_rollsum->from_checksum = g_strdup (from);
- ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
- ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
-
+
+ g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
+ }
+
ret = TRUE;
- gs_transfer_out_value (out_rollsum, &ret_rollsum);
out:
return ret;
}
+
static gboolean
generate_delta_lowlatency (OstreeRepo *repo,
const char *from,
@@ -694,16 +795,47 @@ generate_delta_lowlatency (OstreeRepo *repo,
continue;
g_hash_table_insert (rollsum_optimized_content_objects, g_strdup (to_checksum), rollsum);
- builder->rollsum_size += rollsum->match_size;
+ builder->rollsum_size += rollsum->matches->match_size;
}
g_printerr ("rollsum for %u/%u modified\n",
g_hash_table_size (rollsum_optimized_content_objects),
g_hash_table_size (modified_content_objects));
+ current_part = allocate_part (builder);
+
+ /* Pack the metadata first */
+ g_hash_table_iter_init (&hashiter, new_reachable_metadata);
+ while (g_hash_table_iter_next (&hashiter, &key, &value))
+ {
+ GVariant *serialized_key = key;
+ const char *checksum;
+ OstreeObjectType objtype;
+
+ ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+
+ if (!process_one_object (repo, builder, ¤t_part,
+ checksum, objtype,
+ cancellable, error))
+ goto out;
+ }
+
+ /* Now do rollsummed objects */
+
+ g_hash_table_iter_init (&hashiter, rollsum_optimized_content_objects);
+ while (g_hash_table_iter_next (&hashiter, &key, &value))
+ {
+ const char *checksum = key;
+ ContentRollsum *rollsum = value;
+
+ if (!process_one_rollsum (repo, builder, ¤t_part,
+ checksum, rollsum,
+ cancellable, error))
+ goto out;
+ }
+
/* Scan for large objects, so we can fall back to plain HTTP-based
- * fetch. In the future this should come after an rsync-style
- * rolling delta check for modified files.
+ * fetch.
*/
g_hash_table_iter_init (&hashiter, new_reachable_content);
while (g_hash_table_iter_next (&hashiter, &key, &value))
@@ -716,6 +848,10 @@ generate_delta_lowlatency (OstreeRepo *repo,
ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+ /* Skip content objects we rollsum'd */
+ if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
+ continue;
+
if (!ostree_repo_load_object_stream (repo, objtype, checksum,
NULL, &uncompressed_size,
cancellable, error))
@@ -734,25 +870,7 @@ generate_delta_lowlatency (OstreeRepo *repo,
}
}
- current_part = allocate_part (builder);
-
- /* Pack the metadata first */
- g_hash_table_iter_init (&hashiter, new_reachable_metadata);
- while (g_hash_table_iter_next (&hashiter, &key, &value))
- {
- GVariant *serialized_key = key;
- const char *checksum;
- OstreeObjectType objtype;
-
- ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
-
- if (!process_one_object (repo, builder, ¤t_part,
- checksum, objtype,
- cancellable, error))
- goto out;
- }
-
- /* Now content */
+ /* Now non-rollsummed content */
g_hash_table_iter_init (&hashiter, new_reachable_content);
while (g_hash_table_iter_next (&hashiter, &key, &value))
{
@@ -762,6 +880,10 @@ generate_delta_lowlatency (OstreeRepo *repo,
ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+ /* Skip content objects we rollsum'd */
+ if (g_hash_table_contains (rollsum_optimized_content_objects, checksum))
+ continue;
+
if (!process_one_object (repo, builder, ¤t_part,
checksum, objtype,
cancellable, error))
diff --git a/src/libostree/ostree-repo-static-delta-private.h
b/src/libostree/ostree-repo-static-delta-private.h
index 09d4898..2478f16 100644
--- a/src/libostree/ostree-repo-static-delta-private.h
+++ b/src/libostree/ostree-repo-static-delta-private.h
@@ -135,7 +135,8 @@ typedef enum {
OSTREE_STATIC_DELTA_OP_OPEN_SPLICE_AND_CLOSE = 'S',
OSTREE_STATIC_DELTA_OP_OPEN = 'o',
OSTREE_STATIC_DELTA_OP_WRITE = 'w',
- OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'R',
+ OSTREE_STATIC_DELTA_OP_SET_READ_SOURCE = 'r',
+ OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE = 'R',
OSTREE_STATIC_DELTA_OP_CLOSE = 'c'
} OstreeStaticDeltaOpCode;
diff --git a/src/libostree/ostree-repo-static-delta-processing.c
b/src/libostree/ostree-repo-static-delta-processing.c
index 1cb66de..b268fb2 100644
--- a/src/libostree/ostree-repo-static-delta-processing.c
+++ b/src/libostree/ostree-repo-static-delta-processing.c
@@ -98,6 +98,7 @@ OPPROTO(open_splice_and_close)
OPPROTO(open)
OPPROTO(write)
OPPROTO(set_read_source)
+OPPROTO(unset_read_source)
OPPROTO(close)
#undef OPPROTO
@@ -250,6 +251,10 @@ _ostree_static_delta_part_execute_raw (OstreeRepo *repo,
if (!dispatch_set_read_source (repo, state, cancellable, error))
goto out;
break;
+ case OSTREE_STATIC_DELTA_OP_UNSET_READ_SOURCE:
+ if (!dispatch_unset_read_source (repo, state, cancellable, error))
+ goto out;
+ break;
case OSTREE_STATIC_DELTA_OP_CLOSE:
if (!dispatch_close (repo, state, cancellable, error))
goto out;
@@ -491,7 +496,7 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
if (!open_output_target (state, cancellable, error))
goto out;
-
+
if (OSTREE_OBJECT_TYPE_IS_META (state->output_objtype))
{
gs_unref_variant GVariant *metadata = NULL;
@@ -519,7 +524,6 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
{
guint64 content_offset;
guint64 objlen;
- guint64 content_size;
gsize bytes_written;
gs_unref_object GInputStream *object_input = NULL;
gs_unref_object GInputStream *memin = NULL;
@@ -527,11 +531,11 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
if (!do_content_open_generic (repo, state, cancellable, error))
goto out;
- if (!read_varuint64 (state, &content_size, error))
+ if (!read_varuint64 (state, &state->content_size, error))
goto out;
if (!read_varuint64 (state, &content_offset, error))
goto out;
- if (!validate_ofs (state, content_offset, content_size, error))
+ if (!validate_ofs (state, content_offset, state->content_size, error))
goto out;
/* Fast path for regular files to bare repositories */
@@ -551,7 +555,7 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
{
if (!g_output_stream_write_all (state->content_out,
state->payload_data + content_offset,
- content_size,
+ state->content_size,
&bytes_written,
cancellable, error))
goto out;
@@ -567,14 +571,14 @@ dispatch_open_splice_and_close (OstreeRepo *repo,
if (S_ISLNK (state->mode))
{
gs_free char *nulterminated_target =
- g_strndup ((char*)state->payload_data + content_offset, content_size);
+ g_strndup ((char*)state->payload_data + content_offset, state->content_size);
g_file_info_set_symlink_target (finfo, nulterminated_target);
}
else
{
g_assert (S_ISREG (state->mode));
- g_file_info_set_size (finfo, content_size);
- memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset,
content_size, NULL);
+ g_file_info_set_size (finfo, state->content_size);
+ memin = g_memory_input_stream_new_from_data (state->payload_data + content_offset,
state->content_size, NULL);
}
if (!ostree_raw_file_to_content_stream (memin, finfo, state->xattrs,
@@ -621,6 +625,9 @@ dispatch_open (OstreeRepo *repo,
if (!do_content_open_generic (repo, state, cancellable, error))
goto out;
+ if (!read_varuint64 (state, &state->content_size, error))
+ goto out;
+
if (!_ostree_repo_open_trusted_content_bare (repo, state->checksum,
state->content_size,
&state->barecommitstate,
@@ -651,8 +658,6 @@ dispatch_write (OstreeRepo *repo,
goto out;
if (!read_varuint64 (state, &content_offset, error))
goto out;
- if (!validate_ofs (state, content_offset, content_size, error))
- goto out;
if (!state->have_obj)
{
@@ -695,6 +700,9 @@ dispatch_write (OstreeRepo *repo,
}
else
{
+ if (!validate_ofs (state, content_offset, content_size, error))
+ goto out;
+
if (!g_output_stream_write_all (state->content_out,
state->payload_data + content_offset,
content_size,
@@ -746,6 +754,29 @@ dispatch_set_read_source (OstreeRepo *repo,
}
static gboolean
+dispatch_unset_read_source (OstreeRepo *repo,
+ StaticDeltaExecutionState *state,
+ GCancellable *cancellable,
+ GError **error)
+{
+ gboolean ret = FALSE;
+
+ if (state->read_source_fd)
+ {
+ (void) close (state->read_source_fd);
+ state->read_source_fd = -1;
+ }
+
+ g_clear_pointer (&state->read_source_object, g_free);
+
+ ret = TRUE;
+ /* out: */
+ if (!ret)
+ g_prefix_error (error, "opcode unset-read-source: ");
+ return ret;
+}
+
+static gboolean
dispatch_close (OstreeRepo *repo,
StaticDeltaExecutionState *state,
GCancellable *cancellable,
@@ -765,14 +796,10 @@ dispatch_close (OstreeRepo *repo,
goto out;
}
- if (state->read_source_fd)
- {
- (void) close (state->read_source_fd);
- state->read_source_fd = -1;
- }
+ if (!dispatch_unset_read_source (repo, state, cancellable, error))
+ goto out;
g_clear_pointer (&state->xattrs, g_variant_unref);
- g_clear_pointer (&state->read_source_object, g_free);
g_clear_object (&state->content_out);
state->checksum_index++;
diff --git a/src/libostree/ostree-rollsum.c b/src/libostree/ostree-rollsum.c
new file mode 100644
index 0000000..5a57e1c
--- /dev/null
+++ b/src/libostree/ostree-rollsum.c
@@ -0,0 +1,201 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2015 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <zlib.h>
+
+#include "ostree-rollsum.h"
+#include "libgsystem.h"
+#include "bupsplit.h"
+
+#define ROLLSUM_BLOB_MAX (8192*4)
+
+static GHashTable *
+rollsum_chunks_crc32 (GBytes *bytes)
+{
+ gsize start = 0;
+ gboolean rollsum_end = FALSE;
+ GHashTable *ret_rollsums = NULL;
+ const guint8 *buf;
+ gsize buflen;
+ gsize remaining;
+
+ ret_rollsums = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_ptr_array_unref);
+
+ buf = g_bytes_get_data (bytes, &buflen);
+
+ remaining = buflen;
+ while (remaining > 0)
+ {
+ int offset, bits;
+
+ if (!rollsum_end)
+ {
+ offset = bupsplit_find_ofs (buf + start, MIN(G_MAXINT32, remaining), &bits);
+ if (offset == 0)
+ {
+ rollsum_end = TRUE;
+ offset = MIN(ROLLSUM_BLOB_MAX, remaining);
+ }
+ else if (offset > ROLLSUM_BLOB_MAX)
+ offset = ROLLSUM_BLOB_MAX;
+ }
+ else
+ offset = MIN(ROLLSUM_BLOB_MAX, remaining);
+
+ /* Use zlib's crc32 */
+ { guint32 crc = crc32 (0L, NULL, 0);
+ GVariant *val;
+ GPtrArray *matches;
+
+ crc = crc32 (crc, buf, offset);
+
+ val = g_variant_ref_sink (g_variant_new ("(utt)", crc, (guint64) start, (guint64)offset));
+ matches = g_hash_table_lookup (ret_rollsums, GUINT_TO_POINTER (crc));
+ if (!matches)
+ {
+ matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+ g_hash_table_insert (ret_rollsums, GUINT_TO_POINTER (crc), matches);
+ }
+ g_ptr_array_add (matches, val);
+ }
+
+ start += offset;
+ remaining -= offset;
+ }
+
+ return ret_rollsums;
+}
+
+static gint
+compare_matches (const void *app,
+ const void *bpp)
+{
+ GVariant **avpp = (GVariant**)app;
+ GVariant *a = *avpp;
+ GVariant **bvpp = (GVariant**)bpp;
+ GVariant *b = *bvpp;
+ guint64 a_start, b_start;
+
+ g_variant_get_child (a, 2, "t", &a_start);
+ g_variant_get_child (b, 2, "t", &b_start);
+
+ g_assert_cmpint (a_start, !=, b_start);
+
+ if (a_start < b_start)
+ return -1;
+ return 1;
+}
+
+OstreeRollsumMatches *
+_ostree_compute_rollsum_matches (GBytes *from,
+ GBytes *to)
+{
+ OstreeRollsumMatches *ret_rollsum = NULL;
+ gs_unref_hashtable GHashTable *from_rollsum = NULL;
+ gs_unref_hashtable GHashTable *to_rollsum = NULL;
+ gs_unref_ptrarray GPtrArray *matches = NULL;
+ const guint8 *from_buf;
+ gsize from_len;
+ const guint8 *to_buf;
+ gsize to_len;
+ gpointer hkey, hvalue;
+ GHashTableIter hiter;
+
+ ret_rollsum = g_new0 (OstreeRollsumMatches, 1);
+
+ matches = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+
+ from_buf = g_bytes_get_data (from, &from_len);
+ to_buf = g_bytes_get_data (to, &to_len);
+
+ from_rollsum = rollsum_chunks_crc32 (from);
+ to_rollsum = rollsum_chunks_crc32 (to);
+
+ g_hash_table_iter_init (&hiter, to_rollsum);
+ while (g_hash_table_iter_next (&hiter, &hkey, &hvalue))
+ {
+ GPtrArray *to_chunks = hvalue;
+ GPtrArray *from_chunks;
+
+ from_chunks = g_hash_table_lookup (from_rollsum, hkey);
+ if (from_chunks != NULL)
+ {
+ guint i;
+
+ ret_rollsum->crcmatches++;
+
+ for (i = 0; i < to_chunks->len; i++)
+ {
+ GVariant *to_chunk = to_chunks->pdata[i];
+ guint64 to_start, to_offset;
+ guint32 tocrc;
+ guint j;
+
+ g_variant_get (to_chunk, "(utt)", &tocrc, &to_start, &to_offset);
+
+ for (j = 0; j < from_chunks->len; j++)
+ {
+ GVariant *from_chunk = from_chunks->pdata[j];
+ guint32 fromcrc;
+ guint64 from_start, from_offset;
+
+ g_variant_get (from_chunk, "(utt)", &fromcrc, &from_start, &from_offset);
+
+ g_assert (fromcrc == tocrc);
+ g_assert (to_offset == from_offset);
+
+ /* Rsync uses a cryptographic checksum, but let's be
+ * very conservative here and just memcmp.
+ */
+ if (memcmp (from_buf + from_start, to_buf + to_start, to_offset) == 0)
+ {
+ GVariant *match = g_variant_new ("(uttt)", fromcrc, to_offset, to_start, from_start);
+ ret_rollsum->bufmatches++;
+ ret_rollsum->match_size += to_offset;
+ g_ptr_array_add (matches, g_variant_ref_sink (match));
+ break; /* Don't need any more matches */
+ }
+ }
+ }
+ }
+
+ ret_rollsum->total += to_chunks->len;
+ }
+
+ g_ptr_array_sort (matches, compare_matches);
+
+ ret_rollsum->from_rollsums = from_rollsum; from_rollsum = NULL;
+ ret_rollsum->to_rollsums = to_rollsum; to_rollsum = NULL;
+ ret_rollsum->matches = matches; matches = NULL;
+
+ return ret_rollsum;
+}
+
+void
+_ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum)
+{
+ g_hash_table_unref (rollsum->to_rollsums);
+ g_hash_table_unref (rollsum->from_rollsums);
+ g_ptr_array_unref (rollsum->matches);
+ g_free (rollsum);
+}
diff --git a/src/libostree/ostree-rollsum.h b/src/libostree/ostree-rollsum.h
new file mode 100644
index 0000000..37003d8
--- /dev/null
+++ b/src/libostree/ostree-rollsum.h
@@ -0,0 +1,44 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2015 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#pragma once
+
+#include <gio/gio.h>
+
+G_BEGIN_DECLS
+
+typedef struct {
+ GHashTable *from_rollsums;
+ GHashTable *to_rollsums;
+ guint crcmatches;
+ guint bufmatches;
+ guint total;
+ guint64 match_size;
+ GPtrArray *matches;
+} OstreeRollsumMatches;
+
+OstreeRollsumMatches *
+_ostree_compute_rollsum_matches (GBytes *from,
+ GBytes *to);
+
+void _ostree_rollsum_matches_free (OstreeRollsumMatches *rollsum);
+
+G_END_DECLS
+
diff --git a/tests/test-rollsum.c b/tests/test-rollsum.c
index 1b9174c..e79c895 100644
--- a/tests/test-rollsum.c
+++ b/tests/test-rollsum.c
@@ -20,138 +20,51 @@
#include "config.h"
-#include "libgsystem.h"
-
-#include "bupsplit.h"
-
-#define BLOB_MAX (8192*4)
-
-static GPtrArray *
-rollsum_checksums_for_data (GBytes *bytes)
-{
- const guint8 *start;
- gsize len;
- gboolean rollsum_end = FALSE;
- GPtrArray *ret = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
-
- start = g_bytes_get_data (bytes, &len);
- while (len > 0)
- {
- int offset, bits;
- if (!rollsum_end)
- {
- offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits);
- if (offset == 0)
- {
- rollsum_end = TRUE;
- offset = MIN(BLOB_MAX, len);
- }
- else if (offset > BLOB_MAX)
- offset = BLOB_MAX;
- }
- else
- offset = MIN(BLOB_MAX, len);
-
- {
- gs_free char *blobcsum =
- g_compute_checksum_for_data (G_CHECKSUM_SHA256,
- start, offset);
- g_ptr_array_add (ret, g_variant_ref_sink (g_variant_new ("(st)",
- blobcsum, (guint64)offset)));
- }
- start += offset;
- len -= offset;
- }
- return ret;
-}
+#include "ostree-rollsum.h"
+#include <unistd.h>
+#include <stdlib.h>
-static void
-print_rollsums (GPtrArray *rollsums)
-{
- guint i;
- for (i = 0; i < rollsums->len; i++)
- {
- GVariant *sum = rollsums->pdata[i];
- const char *csum;
- guint64 val;
- g_variant_get (sum, "(&st)", &csum, &val);
- g_print ("chunk %s %" G_GUINT64_FORMAT "\n", csum, val);
- }
-}
+#include "libgsystem.h"
int
main (int argc, char **argv)
{
- GCancellable *cancellable = NULL;
GError *local_error = NULL;
GError **error = &local_error;
- gs_unref_object GFile *path = NULL;
- GBytes *bytes = NULL;
+ GBytes *from_bytes = NULL;
+ GBytes *to_bytes = NULL;
+ const char *from_path;
+ const char *to_path;
+ OstreeRollsumMatches *matches;
+ GMappedFile *mfile;
g_setenv ("GIO_USE_VFS", "local", TRUE);
- if (argc == 2)
- {
- gs_unref_ptrarray GPtrArray *rollsums = NULL;
+ if (argc < 3)
+ exit (1);
- path = g_file_new_for_path (argv[1]);
- bytes = gs_file_map_readonly (path, cancellable, error);
- if (!bytes)
- goto out;
+ from_path = argv[1];
+ to_path = argv[2];
- rollsums = rollsum_checksums_for_data (bytes);
- print_rollsums (rollsums);
- }
- else if (argc > 2)
- {
- guint i;
- gs_unref_hashtable GHashTable *sums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
- guint64 input_size = 0;
- guint64 rollsum_size = 0;
+ mfile = g_mapped_file_new (from_path, FALSE, error);
+ if (!mfile)
+ goto out;
+ from_bytes = g_mapped_file_get_bytes (mfile);
+ g_mapped_file_unref (mfile);
+ mfile = g_mapped_file_new (to_path, FALSE, error);
+ if (!mfile)
+ goto out;
+ to_bytes = g_mapped_file_get_bytes (mfile);
+ g_mapped_file_unref (mfile);
- for (i = 1; i < argc; i++)
- {
- guint j;
- gs_unref_ptrarray GPtrArray *rollsums = NULL;
- guint64 this_rollsum_size = 0;
+ matches = _ostree_compute_rollsum_matches (from_bytes, to_bytes);
- path = g_file_new_for_path (argv[i]);
- bytes = gs_file_map_readonly (path, cancellable, error);
- if (!bytes)
- goto out;
-
- input_size += g_bytes_get_size (bytes);
-
- g_print ("input: %s size: %" G_GUINT64_FORMAT "\n", argv[i], g_bytes_get_size (bytes));
-
- rollsums = rollsum_checksums_for_data (bytes);
- print_rollsums (rollsums);
- for (j = 0; j < rollsums->len; j++)
- {
- GVariant *sum = rollsums->pdata[j];
- const char *csum;
- guint64 ofs;
- g_variant_get (sum, "(&st)", &csum, &ofs);
- if (!g_hash_table_contains (sums, csum))
- {
- g_hash_table_add (sums, g_strdup (csum));
- rollsum_size += ofs;
- }
- this_rollsum_size += ofs;
- }
- g_print ("input: rollsum size: %" G_GUINT64_FORMAT "\n", this_rollsum_size);
- }
- g_print ("rollsum total:%u input:%" G_GUINT64_FORMAT " output: %" G_GUINT64_FORMAT " speedup:%f\n",
- g_hash_table_size (sums), input_size, rollsum_size,
- (((double)(input_size+1)) / ((double) rollsum_size + 1)));
- }
- else
- {
- bupsplit_selftest ();
- }
+ g_printerr ("rollsum crcs=%u bufs=%u total=%u matchsize=%llu\n",
+ matches->crcmatches,
+ matches->bufmatches,
+ matches->total, (unsigned long long)matches->match_size);
out:
- g_clear_pointer (&bytes, g_bytes_unref);
if (local_error)
{
g_printerr ("%s\n", local_error->message);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]