[ostree] core: Import bup's "rollsum" code, add a test case
- From: Colin Walters <walters src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ostree] core: Import bup's "rollsum" code, add a test case
- Date: Tue, 4 Feb 2014 20:04:35 +0000 (UTC)
commit 844c5ea652251d57f8afbd3ca133d7fc3ce8fc50
Author: Colin Walters <walters verbum org>
Date: Thu Aug 15 09:14:26 2013 -0400
core: Import bup's "rollsum" code, add a test case
For static deltas, one strategy that will be employed is to split each
object into chunks, and only include changed chunks in the deltas.
Makefile-tests.am | 5 ++
src/libostree/bupsplit.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++
src/libostree/bupsplit.h | 49 +++++++++++++++
tests/test-rollsum.c | 79 ++++++++++++++++++++++++
4 files changed, 285 insertions(+), 0 deletions(-)
---
diff --git a/Makefile-tests.am b/Makefile-tests.am
index 442b603..e9f45f3 100644
--- a/Makefile-tests.am
+++ b/Makefile-tests.am
@@ -87,6 +87,11 @@ test_varint_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
test_varint_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
testmeta_DATA += test-varint.test
+insttest_PROGRAMS += test-rollsum
+test_rollsum_SOURCES = src/libostree/bupsplit.c tests/test-rollsum.c
+test_rollsum_CFLAGS = $(ostree_bin_shared_cflags) $(OT_INTERNAL_GIO_UNIX_CFLAGS)
+test_rollsum_LDADD = $(ostree_bin_shared_ldadd) $(OT_INTERNAL_GIO_UNIX_LIBS)
+
if BUILDOPT_GJS
insttest_SCRIPTS += tests/test-core.js \
tests/test-sizes.js \
diff --git a/src/libostree/bupsplit.c b/src/libostree/bupsplit.c
new file mode 100644
index 0000000..067b2e9
--- /dev/null
+++ b/src/libostree/bupsplit.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2011 Avery Pennarun. All rights reserved.
+ *
+ * (This license applies to bupsplit.c and bupsplit.h only.)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "bupsplit.h"
+#include <stdint.h>
+#include <memory.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+// According to librsync/rollsum.h:
+// "We should make this something other than zero to improve the
+// checksum algorithm: tridge suggests a prime number."
+// apenwarr: I unscientifically tried 0 and 7919, and they both ended up
+// slightly worse than the librsync value of 31 for my arbitrary test data.
+#define ROLLSUM_CHAR_OFFSET 31
+
+typedef struct {
+ unsigned s1, s2;
+ uint8_t window[BUP_WINDOWSIZE];
+ int wofs;
+} Rollsum;
+
+
+// These formulas are based on rollsum.h in the librsync project.
+static void rollsum_add(Rollsum *r, uint8_t drop, uint8_t add)
+{
+ r->s1 += add - drop;
+ r->s2 += r->s1 - (BUP_WINDOWSIZE * (drop + ROLLSUM_CHAR_OFFSET));
+}
+
+
+static void rollsum_init(Rollsum *r)
+{
+ r->s1 = BUP_WINDOWSIZE * ROLLSUM_CHAR_OFFSET;
+ r->s2 = BUP_WINDOWSIZE * (BUP_WINDOWSIZE-1) * ROLLSUM_CHAR_OFFSET;
+ r->wofs = 0;
+ memset(r->window, 0, BUP_WINDOWSIZE);
+}
+
+
+// For some reason, gcc 4.3 (at least) optimizes badly if find_ofs()
+// is static and rollsum_roll is an inline function. Let's use a macro
+// here instead to help out the optimizer.
+#define rollsum_roll(r, ch) do { \
+ rollsum_add((r), (r)->window[(r)->wofs], (ch)); \
+ (r)->window[(r)->wofs] = (ch); \
+ (r)->wofs = ((r)->wofs + 1) % BUP_WINDOWSIZE; \
+} while (0)
+
+
+static uint32_t rollsum_digest(Rollsum *r)
+{
+ return (r->s1 << 16) | (r->s2 & 0xffff);
+}
+
+
+static uint32_t rollsum_sum(uint8_t *buf, size_t ofs, size_t len)
+{
+ size_t count;
+ Rollsum r;
+ rollsum_init(&r);
+ for (count = ofs; count < len; count++)
+ rollsum_roll(&r, buf[count]);
+ return rollsum_digest(&r);
+}
+
+
+int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits)
+{
+ Rollsum r;
+ int count;
+
+ rollsum_init(&r);
+ for (count = 0; count < len; count++)
+ {
+ rollsum_roll(&r, buf[count]);
+ if ((r.s2 & (BUP_BLOBSIZE-1)) == ((~0) & (BUP_BLOBSIZE-1)))
+ {
+ if (bits)
+ {
+ unsigned rsum = rollsum_digest(&r);
+ *bits = BUP_BLOBBITS;
+ rsum >>= BUP_BLOBBITS;
+ for (*bits = BUP_BLOBBITS; (rsum >>= 1) & 1; (*bits)++)
+ ;
+ }
+ return count+1;
+ }
+ }
+ return 0;
+}
+
+
+#ifndef BUP_NO_SELFTEST
+#define BUP_SELFTEST_SIZE 100000
+
+int bupsplit_selftest()
+{
+ uint8_t *buf = malloc(BUP_SELFTEST_SIZE);
+ uint32_t sum1a, sum1b, sum2a, sum2b, sum3a, sum3b;
+ unsigned count;
+
+ srandom(1);
+ for (count = 0; count < BUP_SELFTEST_SIZE; count++)
+ buf[count] = random();
+
+ sum1a = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE);
+ sum1b = rollsum_sum(buf, 1, BUP_SELFTEST_SIZE);
+ sum2a = rollsum_sum(buf, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE*5/2,
+ BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
+ sum2b = rollsum_sum(buf, 0, BUP_SELFTEST_SIZE - BUP_WINDOWSIZE);
+ sum3a = rollsum_sum(buf, 0, BUP_WINDOWSIZE+3);
+ sum3b = rollsum_sum(buf, 3, BUP_WINDOWSIZE+3);
+
+ fprintf(stderr, "sum1a = 0x%08x\n", sum1a);
+ fprintf(stderr, "sum1b = 0x%08x\n", sum1b);
+ fprintf(stderr, "sum2a = 0x%08x\n", sum2a);
+ fprintf(stderr, "sum2b = 0x%08x\n", sum2b);
+ fprintf(stderr, "sum3a = 0x%08x\n", sum3a);
+ fprintf(stderr, "sum3b = 0x%08x\n", sum3b);
+
+ free(buf);
+ return sum1a!=sum1b || sum2a!=sum2b || sum3a!=sum3b;
+}
+
+#endif // !BUP_NO_SELFTEST
diff --git a/src/libostree/bupsplit.h b/src/libostree/bupsplit.h
new file mode 100644
index 0000000..e37a56a
--- /dev/null
+++ b/src/libostree/bupsplit.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011 Avery Pennarun. All rights reserved.
+ *
+ * (This license applies to bupsplit.c and bupsplit.h only.)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AVERY PENNARUN ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __BUPSPLIT_H
+#define __BUPSPLIT_H
+
+#define BUP_BLOBBITS (13)
+#define BUP_BLOBSIZE (1<<BUP_BLOBBITS)
+#define BUP_WINDOWBITS (7)
+#define BUP_WINDOWSIZE (1<<(BUP_WINDOWBITS-1))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int bupsplit_find_ofs(const unsigned char *buf, int len, int *bits);
+int bupsplit_selftest(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __BUPSPLIT_H */
diff --git a/tests/test-rollsum.c b/tests/test-rollsum.c
new file mode 100644
index 0000000..4d7f50e
--- /dev/null
+++ b/tests/test-rollsum.c
@@ -0,0 +1,79 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2013 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include "libgsystem.h"
+
+#include "bupsplit.h"
+
+#define BLOB_MAX (8192*4)
+#define BLOB_READ_SIZE (1024*1024)
+
+int
+main (int argc, char **argv)
+{
+ GCancellable *cancellable = NULL;
+ GError *local_error = NULL;
+ GError **error = &local_error;
+ gs_unref_object GFile *path = NULL;
+ GBytes *bytes = NULL;
+
+ g_setenv ("GIO_USE_VFS", "local", TRUE);
+
+ if (argc > 1)
+ {
+ const guint8 *start;
+ gsize len;
+
+ path = g_file_new_for_path (argv[1]);
+ bytes = gs_file_map_readonly (path, cancellable, error);
+ if (!bytes)
+ goto out;
+
+ start = g_bytes_get_data (bytes, &len);
+ while (TRUE)
+ {
+ int offset, bits;
+ offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits);
+ if (offset == 0)
+ break;
+ if (offset > BLOB_MAX)
+ offset = BLOB_MAX;
+ g_print ("%" G_GUINT64_FORMAT "\n", (guint64)offset);
+ start += offset;
+ len -= offset;
+ }
+ }
+ else
+ {
+ bupsplit_selftest ();
+ }
+
+ out:
+ g_clear_pointer (&bytes, g_bytes_unref);
+ if (local_error)
+ {
+ g_printerr ("%s\n", local_error->message);
+ g_error_free (local_error);
+ return 1;
+ }
+ return 0;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]