[ostree/wip/delta2] Expand test-rollsum to show stats on deltas



commit 93e8c04bdc37b7b87f1a63d279c4d17e70994021
Author: Colin Walters <walters verbum org>
Date:   Fri May 9 08:34:34 2014 -0400

    Expand test-rollsum to show stats on deltas

 .../ostree-repo-static-delta-compilation.c         |    3 +
 tests/test-rollsum.c                               |   98 ++++++++++++++++---
 2 files changed, 85 insertions(+), 16 deletions(-)
---
diff --git a/src/libostree/ostree-repo-static-delta-compilation.c 
b/src/libostree/ostree-repo-static-delta-compilation.c
index 9c9d603..24fe8f4 100644
--- a/src/libostree/ostree-repo-static-delta-compilation.c
+++ b/src/libostree/ostree-repo-static-delta-compilation.c
@@ -29,6 +29,9 @@
 #include "ostree-diff.h"
 #include "otutil.h"
 #include "ostree-varint.h"
+#include "bupsplit.h"
+
+#define ROLLSUM_BLOB_MAX (8192*4)
 
 typedef struct {
   guint64 uncompressed_size;
diff --git a/tests/test-rollsum.c b/tests/test-rollsum.c
index 4d7f50e..9c31585 100644
--- a/tests/test-rollsum.c
+++ b/tests/test-rollsum.c
@@ -25,7 +25,49 @@
 #include "bupsplit.h"
 
 #define BLOB_MAX (8192*4)
-#define BLOB_READ_SIZE (1024*1024)
+
+static GPtrArray *
+rollsum_checksums_for_data (GBytes     *bytes)
+{
+  const guint8 *start;
+  gsize len;
+  GPtrArray *ret = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+
+  start = g_bytes_get_data (bytes, &len);
+  while (TRUE)
+    {
+      int offset, bits;
+      offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits); 
+      if (offset == 0)
+        break;
+      if (offset > BLOB_MAX)
+        offset = BLOB_MAX;
+      {
+        gs_free char *blobcsum =
+          g_compute_checksum_for_data (G_CHECKSUM_SHA256,
+                                       start, offset);
+        g_ptr_array_add (ret, g_variant_ref_sink (g_variant_new ("(st)",
+                                                                 blobcsum, (guint64)offset)));
+      }
+      start += offset;
+      len -= offset;
+    }
+  return ret;
+}
+
+static void
+print_rollsums (GPtrArray  *rollsums)
+{
+  guint i;
+  for (i = 0; i < rollsums->len; i++)
+    {
+      GVariant *sum = rollsums->pdata[i];
+      const char *csum;
+      guint64 val;
+      g_variant_get (sum, "(&st)", &csum, &val);
+      g_print ("chunk %s %" G_GUINT64_FORMAT "\n", csum, val);
+    }
+}
 
 int
 main (int argc, char **argv)
@@ -38,29 +80,53 @@ main (int argc, char **argv)
 
   g_setenv ("GIO_USE_VFS", "local", TRUE);
 
-  if (argc > 1)
+  if (argc == 2)
     {
-      const guint8 *start;
-      gsize len;
+      gs_unref_ptrarray GPtrArray *rollsums;
 
       path = g_file_new_for_path (argv[1]);
       bytes = gs_file_map_readonly (path, cancellable, error);
       if (!bytes)
        goto out;
 
-      start = g_bytes_get_data (bytes, &len);
-      while (TRUE)
-       {
-         int offset, bits;
-         offset = bupsplit_find_ofs (start, MIN(G_MAXINT32, len), &bits); 
-         if (offset == 0)
-           break;
-         if (offset > BLOB_MAX)
-           offset = BLOB_MAX;
-          g_print ("%" G_GUINT64_FORMAT "\n", (guint64)offset);
-         start += offset;
-          len -= offset;
+      rollsums = rollsum_checksums_for_data (bytes);
+      print_rollsums (rollsums);
+    }
+  else if (argc > 2)
+    {
+      guint i;
+      gs_unref_hashtable GHashTable *sums = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
+      gs_unref_ptrarray GPtrArray *rollsums;
+      guint64 input_size = 0;
+      guint64 rollsum_size = 0;
+
+      for (i = 1; i < argc; i++)
+        {
+          guint j;
+          path = g_file_new_for_path (argv[i]);
+          bytes = gs_file_map_readonly (path, cancellable, error);
+          if (!bytes)
+            goto out;
+
+          input_size += g_bytes_get_size (bytes);
+
+          rollsums = rollsum_checksums_for_data (bytes);
+          for (j = 0; j < rollsums->len; j++)
+            {
+              GVariant *sum = rollsums->pdata[j];
+              const char *csum;
+              guint64 val;
+              g_variant_get (sum, "(&st)", &csum, &val);
+              if (!g_hash_table_contains (sums, csum))
+                {
+                  g_hash_table_add (sums, g_strdup (csum));
+                  rollsum_size += val;
+                }
+            }
         }
+      g_print ("rollsums:%u input:%" G_GUINT64_FORMAT " output: %" G_GUINT64_FORMAT " speedup:%f\n",
+               g_hash_table_size (sums), input_size, rollsum_size,
+               ((double)rollsum_size) / input_size);
     }
   else
     {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]