[ostree/wip/delta: 24/24] WIP static deltas



commit 87527bc174c50f98eca42fa98f24f0fc3578b10b
Author: Colin Walters <walters verbum org>
Date:   Thu Aug 15 09:17:37 2013 -0400

    WIP static deltas

 Makefile-libostree.am                         |    3 +
 src/libostree/README-deltas.md                |  174 +++++++++++
 src/libostree/ostree-core.c                   |   15 +
 src/libostree/ostree-core.h                   |    7 +
 src/libostree/ostree-repo-private.h           |    1 +
 src/libostree/ostree-static-delta-compiler.c  |  342 ++++++++++++++++++++++
 src/libostree/ostree-static-delta-processor.c |  381 +++++++++++++++++++++++++
 src/libostree/ostree-static-delta.h           |   64 ++++
 tests/test-delta.sh                           |   70 +++++
 9 files changed, 1057 insertions(+), 0 deletions(-)
---
diff --git a/Makefile-libostree.am b/Makefile-libostree.am
index 197aa8e..cc42f87 100644
--- a/Makefile-libostree.am
+++ b/Makefile-libostree.am
@@ -50,6 +50,9 @@ libostree_1_la_SOURCES = \
        src/libostree/ostree-repo-file.c \
        src/libostree/ostree-repo-file-enumerator.c \
        src/libostree/ostree-repo-file-enumerator.h \
+       src/libostree/ostree-static-delta-processor.c \
+       src/libostree/ostree-static-delta-compiler.c \
+       src/libostree/ostree-static-delta.h \
        $(NULL)
 if USE_LIBARCHIVE
 libostree_1_la_SOURCES += src/libostree/ostree-libarchive-input-stream.h \
diff --git a/src/libostree/README-deltas.md b/src/libostree/README-deltas.md
new file mode 100644
index 0000000..ccdda81
--- /dev/null
+++ b/src/libostree/README-deltas.md
@@ -0,0 +1,174 @@
+OSTree Static Object Deltas
+===========================
+
+Currently, OSTree's "archive-z2" mode stores both metadata and content
+objects as individual files in the filesystem.  Content objects are
+zlib-compressed.
+
+The advantage of this is model are:
+
+0) It's easy to understand and implement
+1) Can be served directly over plain HTTP by a static webserver
+2) Space efficient on the server
+
+However, it can be inefficient both for large updates and small ones:
+
+0) For large tree changes (such as going from -runtime to
+   -devel-debug, or major version upgrades), this can mean thousands
+   and thousands of HTTP requests.  The overhead for that is very
+   large (until SPDY/HTTP2.0), and will be catastrophically bad if the
+   webserver is not configured with KeepAlive.
+1) Small changes (typo in gnome-shell .js file) still require around
+   5 metadata HTTP requests, plus a redownload of the whole file.
+
+Why not smart servers?
+======================
+
+Smart servers (custom daemons, or just CGI scripts) as git has are not
+under consideration for this proposal.  OSTree is designed for the
+same use case as GNU/Linux distribution package systems are, where
+content is served by a network of volunteer mirrors that will
+generally not run custom code.
+
+In particular, Amazon S3 style dumb content servers is a very
+important use case, as is being able to apply updates from static
+media like DVD-ROM.
+
+Delta Bytecode Format
+=====================
+
+When a client wants to retrieve commit ${new} while currently running
+${current}, it makes a HTTP request for
+${repo}/deltas/${current}-${new}.delta.  If it exists, then it is
+retrieved and executed.  Otherwise, simply fall back to plain object
+retrieval.
+
+A .delta object is a custom binary format.  It has the following high
+level form:
+
+delta-descriptor:
+  metadata strings: a(ss)
+  metadata binary: a(say)
+  ARRAY[(csum from, csum to)]: ay
+  ARRAY[delta-part-header]
+
+The metadata would include things like a version number, as well as
+extended verification data like a GPG signature.
+
+The second array is an array of delta objects that should be fetched
+and applied before this one.  This is a fairly generic recursion
+mechanism that would potentially allow saving significant storage
+space on the server.
+
+Each delta-part-header is:
+
+  ay checksum
+  guint64 size  /* Size of delta */
+  guint64 usize  /* Uncompressed object size */
+  ARRAY[(guint8 objtype, csum object)]
+
+The checksum is of the delta payload, and each entry in the array
+represents an OSTree object which will be created by the deltapart.
+
+A delta-part has the following form:
+
+byte compression-type (0 = none, 'g' = gzip, 'x' = 'xz')
+delta-part-content
+
+delta-part-content:
+  byte[] payload
+  ARRAY[operation]
+
+The rationale for having delta-part is that it allows easy incremental
+resumption of downloads.  The client can look at the delta descriptor
+and skip downloading delta-parts for which it already has the
+contained objects.  This is better than simply resuming a gigantic
+file because if the client decides to fetch a slightly newer version,
+it's very probable that some of the downloading we've already done is
+still useful.
+
+The delta part is effectively a high level bytecode for a
+stack-oriented machine.  It iterates on the array of objects in order.
+The following operations are available:
+
+FETCH
+  Fall back to fetching the current object individually.  Move
+  to the next object.
+
+WRITE(array[(varint offset, varint length)])
+  Write from current input target (default payload) to output.
+
+GUNZIP(array[(varint offset, varint length)])
+  gunzip from current input target (default payload) to output.
+
+CLOSE
+  Close the current output target, and proceed to the next; if the
+  output object was a temporary, the output resets to the current
+  object.
+
+# Change the input source to an object
+READOBJECT(csum object)
+  Set object as current input target
+
+# Change the input source to payload
+READPAYLOAD
+  Set payload as current input target
+
+Compiling Deltas
+================
+
+After reading the above, you may be wondering how we actually *make*
+these deltas.  I envison a strategy similar to that employed by
+Chromium autoupdate:
+http://www.chromium.org/chromium-os/chromiumos-design-docs/autoupdate-details
+
+Something like this would be a useful initial algorithm:
+1) Compute the set of added objects NEW
+2) For each object in NEW:
+  - Look for a the set of "superficially similar" objects in the
+    previous tree, using heuristics based first on filename (including
+    prefix), then on size.  Call this set CANDIDATES.
+    For each entry in CANDIDATES:
+      - Try doing a bup/librsync style rolling checksum, and compute the
+        list of changed blocks.
+      - Try gzip-compressing it
+3) Choose the lowest cost method for each NEW object, and partition
+   the program for each method into deltapart-sized chunks.
+
+However, there are many other possibilities, that could be used in a
+hybrid mode with the above.  For example, we could try to find similar
+objects, and gzip them together.  This would be a *very* useful
+strategy for things like the 9000 Boost headers which have massive
+amounts of redundant data.
+
+Notice too that the delta format supports falling back to retrieving
+individual objects.  For cases like the initramfs which is compressed
+inside the tree with gzip, we're not going to find an efficient way to
+sync it, so the delta compiler should just fall back to fetching it
+individually.
+
+Which Deltas To Create?
+=======================
+
+Going back to the start, there are two cases to optimize for:
+
+1) Incremental upgrades between builds
+2) Major version upgrades
+
+A command line operation would look something like this:
+
+$ ostree --repo=/path/to/repo gendelta --ref-prefix=gnome-ostree/buildmaster/ --strategy=latest --depth=5
+
+This would tell ostree to generate deltas from each of the last 4
+commits to each ref (e.g. gnome-ostree/buildmaster/x86_64-runtime) to
+the latest commit.  It might also be possible of course to have
+--strategy=incremental where we generate a delta between each commit.
+I suspect that'd be something to do if one has a *lot* of disk space
+to spend, and there's a reason for clients to be fetching individual
+refs.
+
+$ ostree --repo=/path/to/repo gendelta --from=gnome-ostree/3.10/x86_64-runtime 
--to=gnome-ostree/buildmaster/x86_64-runtime
+
+This is an obvious one - generate a delta from the last stable release
+to the current development head.
+
diff --git a/src/libostree/ostree-core.c b/src/libostree/ostree-core.c
index 2aa269d..a41974e 100644
--- a/src/libostree/ostree-core.c
+++ b/src/libostree/ostree-core.c
@@ -1121,6 +1121,21 @@ ostree_get_relative_object_path (const char         *checksum,
 }
 
 char *
+ostree_get_relative_static_delta_path (const char        *from,
+                                       const char        *to)
+{
+  return g_strdup_printf ("deltas/%s-%s/meta", from, to);
+}
+
+char *
+ostree_get_relative_static_delta_part_path (const char        *from,
+                                            const char        *to,
+                                            guint              i)
+{
+  return g_strdup_printf ("deltas/%s-%s/%u", from, to, i);
+}
+
+char *
 ostree_get_relative_archive_content_path (const char        *checksum)
 {
   GString *path;
diff --git a/src/libostree/ostree-core.h b/src/libostree/ostree-core.h
index b2d24e6..96e91ec 100644
--- a/src/libostree/ostree-core.h
+++ b/src/libostree/ostree-core.h
@@ -172,6 +172,13 @@ char *ostree_get_relative_object_path (const char        *checksum,
                                        OstreeObjectType   type,
                                        gboolean           compressed);
 
+char *ostree_get_relative_static_delta_path (const char        *from,
+                                             const char        *to);
+
+char *ostree_get_relative_static_delta_part_path (const char        *from,
+                                                  const char        *to,
+                                                  guint              i);
+
 char *ostree_get_relative_archive_content_path (const char        *checksum);
 
 gboolean ostree_get_xattrs_for_file (GFile         *f,
diff --git a/src/libostree/ostree-repo-private.h b/src/libostree/ostree-repo-private.h
index b382021..636d3fa 100644
--- a/src/libostree/ostree-repo-private.h
+++ b/src/libostree/ostree-repo-private.h
@@ -86,5 +86,6 @@ _ostree_repo_stage_directory_meta (OstreeRepo   *self,
                                    GCancellable *cancellable,
                                    GError      **error);
 
+
 G_END_DECLS
 
diff --git a/src/libostree/ostree-static-delta-compiler.c b/src/libostree/ostree-static-delta-compiler.c
new file mode 100644
index 0000000..f9c3817
--- /dev/null
+++ b/src/libostree/ostree-static-delta-compiler.c
@@ -0,0 +1,342 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2013 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ostree-repo-private.h"
+#include "ostree-static-delta.h"
+#include "ostree-diff.h"
+#include "otutil.h"
+#include "ostree-varint.h"
+
+typedef struct {
+  guint64 uncompressed_size;
+  GPtrArray *objects;
+  GString *payload;
+  GString *operations;
+} OstreeStaticDeltaPartBuilder;
+
+typedef struct {
+  GPtrArray *parts;
+} OstreeStaticDeltaBuilder;
+
+static void
+ostree_static_delta_part_builder_unref (OstreeStaticDeltaPartBuilder *part_builder)
+{
+  if (part_builder->objects)
+    g_ptr_array_unref (part_builder->objects);
+  if (part_builder->payload)
+    g_string_free (part_builder->payload, TRUE);
+  if (part_builder->operations)
+    g_string_free (part_builder->operations, TRUE);
+  g_free (part_builder);
+}
+
+static OstreeStaticDeltaPartBuilder *
+allocate_part (OstreeStaticDeltaBuilder *builder)
+{
+  OstreeStaticDeltaPartBuilder *part = g_new0 (OstreeStaticDeltaPartBuilder, 1);
+  part->objects = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+  part->payload = g_string_new (NULL);
+  part->operations = g_string_new (NULL);
+  part->uncompressed_size = 0;
+  g_ptr_array_add (builder->parts, part);
+  return part;
+}
+
+static GBytes *
+objtype_checksum_array_new (GPtrArray *objects)
+{
+  guint i;
+  GByteArray *ret = g_byte_array_new ();
+
+  for (i = 0; i < objects->len; i++)
+    {
+      GVariant *serialized_key = objects->pdata[i];
+      OstreeObjectType objtype;
+      const char *checksum;
+      guint8 csum[32];
+      guint8 objtype_v;
+        
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+      objtype_v = (guint8) objtype;
+
+      ostree_checksum_inplace_to_bytes (checksum, csum);
+
+      g_byte_array_append (ret, &objtype_v, 1);
+      g_byte_array_append (ret, csum, sizeof (csum));
+    }
+  return g_byte_array_free_to_bytes (ret);
+}
+
+static gboolean 
+generate_delta_lowlatency (OstreeRepo                       *repo,
+                           const char                       *from,
+                           const char                       *to,
+                           OstreeStaticDeltaBuilder         *builder,
+                           GCancellable                     *cancellable,
+                           GError                          **error)
+{
+  gboolean ret = FALSE;
+  GHashTableIter hashiter;
+  gpointer key, value;
+  OstreeStaticDeltaPartBuilder *current_part = NULL;
+  gs_unref_object GFile *root_from = NULL;
+  gs_unref_object GFile *root_to = NULL;
+  gs_unref_ptrarray GPtrArray *modified = NULL;
+  gs_unref_ptrarray GPtrArray *removed = NULL;
+  gs_unref_ptrarray GPtrArray *added = NULL;
+  gs_unref_hashtable GHashTable *to_reachable_objects = NULL;
+  gs_unref_hashtable GHashTable *from_reachable_objects = NULL;
+  gs_unref_hashtable GHashTable *new_reachable_objects = NULL;
+
+  if (!ostree_repo_read_commit (repo, from, &root_from, cancellable, error))
+    goto out;
+  if (!ostree_repo_read_commit (repo, to, &root_to, cancellable, error))
+    goto out;
+
+  /* Gather a filesystem level diff; when we do heuristics to ship
+   * just parts of changed files, we can make use of this data.
+   */
+  modified = g_ptr_array_new_with_free_func ((GDestroyNotify) ostree_diff_item_unref);
+  removed = g_ptr_array_new_with_free_func ((GDestroyNotify) g_object_unref);
+  added = g_ptr_array_new_with_free_func ((GDestroyNotify) g_object_unref);
+  if (!ostree_diff_dirs (root_from, root_to, modified, removed, added,
+                         cancellable, error))
+    goto out;
+
+  from_reachable_objects = ostree_repo_traverse_new_reachable ();
+  if (!ostree_repo_traverse_commit (repo, from, -1, new_reachable_objects,
+                                    cancellable, error))
+    goto out;
+
+  to_reachable_objects = ostree_repo_traverse_new_reachable ();
+  if (!ostree_repo_traverse_commit (repo, to, -1, new_reachable_objects,
+                                    cancellable, error))
+    goto out;
+
+  new_reachable_objects = ostree_repo_traverse_new_reachable ();
+
+  g_hash_table_iter_init (&hashiter, to_reachable_objects);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      GVariant *serialized_key = key;
+
+      if (g_hash_table_contains (from_reachable_objects, serialized_key))
+        continue;
+
+      g_hash_table_insert (new_reachable_objects, g_variant_ref (serialized_key), serialized_key);
+    }
+
+  current_part = allocate_part (builder);
+
+  g_hash_table_iter_init (&hashiter, new_reachable_objects);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      GVariant *serialized_key = key;
+      const char *checksum;
+      OstreeObjectType objtype;
+      guint64 content_size;
+      gs_unref_object GInputStream *content_stream = NULL;
+      gsize bytes_read;
+      gsize object_payload_start;
+      const guint readlen = 4096;
+
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+
+      if (!ostree_repo_load_object_stream (repo, objtype, checksum,
+                                           &content_stream, &content_size,
+                                           cancellable, error))
+        goto out;
+
+      current_part->uncompressed_size += content_size;
+
+      /* Ensure we have at least one object per delta, even if a given
+       * object is larger.
+       */
+      if (current_part->objects->len > 0 &&
+          current_part->payload->len + content_size > OSTREE_STATIC_DELTA_PART_MAX_SIZE_BYTES)
+        {
+          current_part = allocate_part (builder);
+        } 
+
+      object_payload_start = current_part->payload->len;
+
+      g_ptr_array_add (current_part->objects, g_variant_ref (serialized_key));
+
+      while (TRUE)
+        {
+          gsize empty_space;
+
+          empty_space = current_part->payload->allocated_len - current_part->payload->len;
+          if (empty_space < readlen)
+            {
+              gsize origlen;
+              origlen = current_part->payload->len;
+              g_string_set_size (current_part->payload, current_part->payload->allocated_len + (readlen - 
empty_space));
+              current_part->payload->len = origlen;
+            }
+
+          if (!g_input_stream_read_all (content_stream,
+                                        current_part->payload + current_part->payload->len,
+                                        readlen,
+                                        &bytes_read,
+                                        cancellable, error))
+            goto out;
+          if (bytes_read == 0)
+            break;
+          current_part->payload->len += bytes_read;
+
+          g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
+          g_string_append_c (current_part->operations, (gchar)1);
+          _ostree_write_varuint64 (current_part->operations, object_payload_start);
+          _ostree_write_varuint64 (current_part->operations, current_part->payload->len - 
object_payload_start);
+          g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
+        }
+    }
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+gboolean
+_ostree_static_delta_generate (OstreeRepo                   *repo,
+                               OstreeStaticDeltaGenerateOpt  opt,
+                               const char                   *from,
+                               const char                   *to,
+                               GVariant                     *metadata,
+                               GCancellable                 *cancellable,
+                               GError                      **error)
+{
+  gboolean ret = FALSE;
+  OstreeStaticDeltaBuilder builder;
+  guint i;
+  gs_unref_variant_builder GVariantBuilder *part_headers = NULL;
+  gs_unref_ptrarray GPtrArray *part_tempfiles = NULL;
+  gs_unref_variant GVariant *delta_descriptor = NULL;
+  gs_free char *descriptor_relpath = NULL;
+  gs_unref_object GFile *descriptor_path = NULL;
+  gs_unref_object GFile *descriptor_dir = NULL;
+
+  memset (&builder, 0, sizeof (builder));
+
+  builder.parts = g_ptr_array_new_with_free_func ((GDestroyNotify)ostree_static_delta_part_builder_unref);
+
+  /* Ignore optimization flags */
+  if (!generate_delta_lowlatency (repo, from, to, &builder,
+                                  cancellable, error))
+    goto out;
+
+  part_headers = g_variant_builder_new (G_VARIANT_TYPE ("a" OSTREE_STATIC_DELTA_PART_HEADER_FORMAT));
+  part_tempfiles = g_ptr_array_new_with_free_func (g_object_unref);
+  for (i = 0; i < builder.parts->len; i++)
+    {
+      OstreeStaticDeltaPartBuilder *part_builder = builder.parts->pdata[i];
+      GBytes *payload_b;
+      GBytes *operations_b;
+      gs_free guchar *part_checksum = NULL;
+      gs_free_checksum GChecksum *checksum = NULL;
+      gs_unref_bytes GBytes *objtype_checksum_array = NULL;
+      gs_unref_object GFile *part_tempfile = NULL;
+      gs_unref_object GOutputStream *part_temp_outstream = NULL;
+      gs_unref_object GInputStream *part_in = NULL;
+      gs_unref_object GInputStream *part_payload_in = NULL;
+      gs_unref_object GMemoryOutputStream *part_payload_out = NULL;
+      gs_unref_object GConverterOutputStream *part_payload_compressor = NULL;
+      gs_unref_object GConverter *zlib_compressor = NULL;
+      gs_unref_variant GVariant *delta_part_content = NULL;
+      gs_unref_variant GVariant *delta_part = NULL;
+      gs_unref_variant GVariant *delta_part_header = NULL;
+
+      payload_b = g_string_free_to_bytes (part_builder->payload);
+      part_builder->payload = NULL;
+      
+      operations_b = g_string_free_to_bytes (part_builder->operations);
+      part_builder->operations = NULL;
+      /* FIXME - avoid duplicating memory here */
+      delta_part_content = g_variant_new ("@ay ay",
+                                          ot_gvariant_new_ay_bytes (payload_b),
+                                          ot_gvariant_new_ay_bytes (operations_b));
+
+      /* Hardcode gzip for now */
+      zlib_compressor = (GConverter*)g_zlib_compressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW, 9);
+      part_payload_in = ot_variant_read (delta_part);
+      part_payload_out = (GMemoryOutputStream*)g_memory_output_stream_new_resizable ();
+      part_payload_compressor = (GConverterOutputStream*)g_converter_output_stream_new 
((GOutputStream*)part_payload_out, zlib_compressor);
+
+      if (0 > g_output_stream_splice ((GOutputStream*)part_payload_compressor, part_payload_in,
+                                      G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET | 
G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
+                                      cancellable, error))
+        goto out;
+
+      /* FIXME - avoid duplicating memory here */
+      delta_part = g_variant_new ("y ay", (guint8)'g', part_builder);
+
+      if (!gs_file_open_in_tmpdir (repo->tmp_dir, 0644,
+                                   &part_tempfile, &part_temp_outstream,
+                                   cancellable, error))
+        goto out;
+      part_in = ot_variant_read (delta_part);
+      if (!ot_gio_splice_get_checksum (part_temp_outstream, part_in,
+                                       &part_checksum,
+                                       cancellable, error))
+        goto out;
+
+      objtype_checksum_array = objtype_checksum_array_new (part_builder->objects);
+      delta_part_header = g_variant_new ("(@aytt aay)",
+                                         ot_gvariant_new_ay_bytes (objtype_checksum_array),
+                                         g_variant_get_size (delta_part),
+                                         part_builder->uncompressed_size,
+                                         NULL);
+      g_variant_builder_add_value (part_headers, g_variant_ref (delta_part_header));
+      g_ptr_array_add (part_tempfiles, g_object_ref (part_tempfile));
+    }
+
+  descriptor_relpath = ostree_get_relative_static_delta_path (from, to);
+  descriptor_path = g_file_resolve_relative_path (repo->repodir, descriptor_relpath);
+  descriptor_dir = g_file_get_parent (descriptor_path);
+
+  if (!gs_file_ensure_directory (descriptor_dir, TRUE, cancellable, error))
+    goto out;
+
+  for (i = 0; i < builder.parts->len; i++)
+    {
+      GFile *tempfile = part_tempfiles->pdata[i];
+      gs_free char *part_relpath = ostree_get_relative_static_delta_part_path (from, to, i);
+      gs_unref_object GFile *part_path = g_file_resolve_relative_path (repo->repodir, part_relpath);
+
+      if (!gs_file_rename (tempfile, part_path, cancellable, error))
+        goto out;
+    }
+
+  delta_descriptor = g_variant_new ("(@(a(ss)a(say))@ay a(ayttay)",
+                                    metadata, NULL, part_headers);
+
+  if (!ot_util_variant_save (descriptor_path, delta_descriptor, cancellable, error))
+    goto out;
+
+  ret = TRUE;
+ out:
+  g_clear_pointer (&builder.parts, g_ptr_array_unref);
+  return ret;
+}
diff --git a/src/libostree/ostree-static-delta-processor.c b/src/libostree/ostree-static-delta-processor.c
new file mode 100644
index 0000000..4bef613
--- /dev/null
+++ b/src/libostree/ostree-static-delta-processor.c
@@ -0,0 +1,381 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2013 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ostree-repo-private.h"
+#include "ostree-static-delta.h"
+#include "otutil.h"
+#include "ostree-varint.h"
+
+/* 1 byte for object type, 32 bytes for checksum */
+#define CSUM_LEN 33
+
+/* This should really always be true, but hey, let's just assert it */
+G_STATIC_ASSERT (sizeof (guint) >= sizeof (guint32));
+
+typedef struct {
+  guint           checksum_index;
+  const guint8   *checksums;
+  guint           n_checksums;
+
+  const guint8   *opdata;
+  guint           oplen;
+
+  OstreeObjectType output_objtype;
+  const guint8   *output_target;
+  GFile          *output_tmp_path;
+  GOutputStream  *output_tmp_stream;
+  const guint8   *input_target_csum;
+
+  const guint8   *payload_data;
+  guint64         payload_size; 
+} StaticDeltaExecutionState;
+
+typedef gboolean (*DispatchOpFunc) (OstreeRepo                 *repo,
+                                    StaticDeltaExecutionState  *state,
+                                    GCancellable               *cancellable,
+                                    GError                    **error);
+
+typedef struct  {
+  const char *name;
+  DispatchOpFunc func;
+} OstreeStaticDeltaOperation;
+
+#define OPPROTO(name) \
+  static gboolean dispatch_##name (OstreeRepo                 *repo, \
+                                   StaticDeltaExecutionState  *state, \
+                                   GCancellable               *cancellable, \
+                                   GError                    **error);
+
+OPPROTO(fetch)
+OPPROTO(write)
+OPPROTO(gunzip)
+OPPROTO(close)
+#undef OPPROTO
+
+static OstreeStaticDeltaOperation op_dispatch_table[] = {
+  { "fetch", dispatch_fetch },
+  { "write", dispatch_write },
+  { "gunzip", dispatch_gunzip },
+  { "close", dispatch_close },
+  { NULL }
+};
+
+static gboolean
+parse_checksum_array (GVariant      *array,
+                      guint8       **out_checksums_array,
+                      guint         *out_n_checksums,
+                      GError       **error)
+{
+  gsize n = g_variant_n_children (array);
+  guint n_checksums;
+
+  n_checksums = n / CSUM_LEN;
+
+  if (G_UNLIKELY(n == 0 || n > (G_MAXUINT32/CSUM_LEN) || (n_checksums * CSUM_LEN) != n))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Invalid checksum array length %" G_GSIZE_FORMAT, n);
+      return FALSE;
+    }
+
+  *out_checksums_array = (gpointer)g_variant_get_data (array);
+  *out_n_checksums = n_checksums;
+
+  return TRUE;
+}
+
+static gboolean
+open_output_target_csum (OstreeRepo                  *repo,
+                         StaticDeltaExecutionState   *state,
+                         GCancellable                *cancellable,
+                         GError                     **error)
+{
+  gboolean ret = FALSE;
+  guint8 *objcsum;
+
+  g_assert (state->checksums != NULL);
+  g_assert (state->output_target == NULL);
+  g_assert (state->output_tmp_path == NULL);
+  g_assert (state->output_tmp_stream == NULL);
+  g_assert (state->checksum_index < state->n_checksums);
+
+  objcsum = (guint8*)state->checksums + (state->checksum_index * CSUM_LEN);
+
+  if (G_UNLIKELY(!ostree_validate_structureof_objtype (*objcsum, error)))
+    goto out;
+
+  state->output_objtype = (OstreeObjectType) *objcsum;
+  state->output_target = objcsum + 1;
+  if (!gs_file_open_in_tmpdir (repo->tmp_dir, 0644,
+                               &state->output_tmp_path, &state->output_tmp_stream,
+                               cancellable, error))
+    goto out;
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+
+gboolean
+_ostree_static_delta_part_execute (OstreeRepo      *repo,
+                                   GVariant        *header,
+                                   GVariant        *part,
+                                   GCancellable    *cancellable,
+                                   GError         **error)
+{
+  gboolean ret = FALSE;
+  guint8 *checksums_data;
+  gs_unref_variant GVariant *checksums = NULL;
+  gs_unref_variant GVariant *payload = NULL;
+  gs_unref_variant GVariant *ops = NULL;
+  StaticDeltaExecutionState statedata;
+  StaticDeltaExecutionState *state = &statedata;
+
+  memset (state, 0, sizeof (*state));
+
+  g_variant_get_child (header, 1, "@ay", &checksums);
+  if (!parse_checksum_array (checksums,
+                             &checksums_data,
+                             &state->n_checksums,
+                             error))
+    goto out;
+
+  state->checksums = checksums_data;
+  g_assert (state->n_checksums > 0);
+  if (!open_output_target_csum (repo, state, cancellable, error))
+    goto out;
+
+  g_variant_get (part, "(@ay ay)", &payload, &ops);
+
+  state->payload_data = g_variant_get_data (payload);
+  state->payload_size = g_variant_get_size (payload);
+
+  state->oplen = g_variant_n_children (payload);
+  state->opdata = g_variant_get_data (payload);
+  while (state->oplen > 0)
+    {
+      guint8 opcode = state->opdata[0];
+      OstreeStaticDeltaOperation *op;
+
+      if (G_UNLIKELY (opcode >= G_N_ELEMENTS (op_dispatch_table)))
+        {
+          g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
+                       "Out of range opcode %u", opcode);
+          goto out;
+        }
+      op = &op_dispatch_table[opcode];
+      if (!op->func (repo, state, cancellable, error))
+        goto out;
+    }
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+static gboolean
+dispatch_fetch (OstreeRepo    *repo,   
+                StaticDeltaExecutionState  *state,
+                GCancellable  *cancellable,  
+                GError       **error)
+{
+  g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
+               "Static delta fetch opcode is not implemented in this version");
+  return FALSE;
+}
+
+static guint64
+read_varuint64 (StaticDeltaExecutionState  *state)
+{
+  gsize bytes_read;
+  guint64 ret;
+  ret = _ostree_read_varuint64 (state->opdata, state->oplen, &bytes_read);
+  state->opdata += bytes_read;
+  state->oplen -= bytes_read;
+  return ret;
+}
+
+
+static gboolean
+validate_ofs (StaticDeltaExecutionState  *state,
+              guint64                     offset,
+              guint64                     length,
+              GError                    **error)
+{
+  if (G_UNLIKELY (offset + length < offset ||
+                  offset + length >= state->payload_size))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
+                   "Invalid offset/length %" G_GUINT64_FORMAT "/%" G_GUINT64_FORMAT,
+                   offset, length);
+      return FALSE;
+    }
+  return TRUE;
+}
+  
+static gboolean
+dispatch_write (OstreeRepo                 *repo,
+                StaticDeltaExecutionState  *state,
+                GCancellable               *cancellable,  
+                GError                    **error)
+{
+  gboolean ret = FALSE;
+  guint64 offset;
+  guint64 length;
+  gsize bytes_written;
+
+  if (G_UNLIKELY(state->oplen < 2))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
+                   "Expected at least 2 bytes for write op");
+      goto out;
+    }
+  offset = read_varuint64 (state);
+  length = read_varuint64 (state);
+  
+  if (!validate_ofs (state, offset, length, error))
+    goto out;
+
+  if (!g_output_stream_write_all (state->output_tmp_stream,
+                                  state->payload_data + offset,
+                                  length,
+                                  &bytes_written,
+                                  cancellable, error))
+    goto out;
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+static gboolean
+dispatch_gunzip (OstreeRepo                 *repo,
+                 StaticDeltaExecutionState  *state,
+                 GCancellable               *cancellable,  
+                 GError                    **error)
+{
+  gboolean ret = FALSE;
+  guint64 offset;
+  guint64 length;
+  gs_unref_object GConverter *zlib_decomp = NULL;
+  gs_unref_object GInputStream *payload_in = NULL;
+  gs_unref_object GInputStream *zlib_in = NULL;
+
+  if (G_UNLIKELY(state->oplen < 2))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
+                   "Expected at least 2 bytes for gunzip op");
+      goto out;
+    }
+  offset = read_varuint64 (state);
+  length = read_varuint64 (state);
+
+  if (!validate_ofs (state, offset, length, error))
+    goto out;
+
+  payload_in = g_memory_input_stream_new_from_data (state->payload_data + offset, length, NULL);
+  zlib_decomp = (GConverter*)g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_RAW);
+  zlib_in = g_converter_input_stream_new (payload_in, zlib_decomp);
+
+  if (0 > g_output_stream_splice (state->output_tmp_stream, zlib_in,
+                                  G_OUTPUT_STREAM_SPLICE_CLOSE_SOURCE,
+                                  cancellable, error))
+    goto out;
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
+static gboolean
+dispatch_close (OstreeRepo                 *repo,
+                StaticDeltaExecutionState  *state,
+                GCancellable               *cancellable,  
+                GError                    **error)
+{
+  gboolean ret = FALSE;
+  char tmp_checksum[65];
+
+  if (state->checksum_index == state->n_checksums)
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT,
+                   "Too many close operations");
+      goto out;
+    }
+
+  g_assert (state->output_tmp_stream);
+  g_assert (state->output_tmp_path);
+
+  if (!g_output_stream_close (state->output_tmp_stream, cancellable, error))
+    goto out;
+
+  g_clear_object (&state->output_tmp_stream);
+
+  ostree_checksum_inplace_from_bytes (state->output_target, tmp_checksum);
+
+  if (OSTREE_OBJECT_TYPE_IS_META (state->output_objtype))
+    {
+      gs_unref_variant GVariant *metadata = NULL;
+
+      if (!ot_util_variant_map (state->output_tmp_path,
+                                ostree_metadata_variant_type (state->output_objtype),
+                                TRUE, &metadata, error))
+        goto out;
+
+      if (!ostree_repo_stage_metadata (repo, state->output_objtype, tmp_checksum,
+                                       metadata, NULL, cancellable, error))
+        goto out;
+    }
+  else
+    {
+      gs_unref_object GInputStream *in = NULL;
+      gs_unref_object GFileInfo *info = NULL;
+
+      in = (GInputStream*)g_file_read (state->output_tmp_path, cancellable, error);
+      if (!in)
+        goto out;
+
+      info = g_file_input_stream_query_info ((GFileInputStream*)in, G_FILE_ATTRIBUTE_STANDARD_SIZE,
+                                             cancellable, error);
+      if (!info)
+        goto out;
+      
+      if (!ostree_repo_stage_content (repo, tmp_checksum, in,
+                                      g_file_info_get_size (info), NULL,
+                                      cancellable, error))
+        goto out;
+    }
+
+  state->checksum_index++;
+  if (state->checksum_index < state->n_checksums)
+    {
+      if (!open_output_target_csum (repo, state, cancellable, error))
+        goto out;
+    }
+      
+  ret = TRUE;
+ out:
+  return ret;
+}
diff --git a/src/libostree/ostree-static-delta.h b/src/libostree/ostree-static-delta.h
new file mode 100644
index 0000000..a302e7e
--- /dev/null
+++ b/src/libostree/ostree-static-delta.h
@@ -0,0 +1,64 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
+ *
+ * Copyright (C) 2013 Colin Walters <walters verbum org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#pragma once
+
+#include "ostree-core.h"
+
+G_BEGIN_DECLS
+
+/* Arbitrarily chosen */
+#define OSTREE_STATIC_DELTA_PART_MAX_SIZE_BYTES (16*1024*1024)
+
+#define OSTREE_STATIC_DELTA_PART_PAYLOAD_FORMAT "(ayay)"
+#define OSTREE_STATIC_DELTA_PART_HEADER_FORMAT "(ayttay)"
+#define OSTREE_STATIC_DELTA_METADATA_FORMAT "(a(ss)a(say))"
+#define OSTREE_STATIC_DELTA_DESCRIPTOR_FORMAT "(" OSTREE_STATIC_DELTA_METADATA_FORMAT "aya" 
OSTREE_STATIC_DELTA_PART_HEADER_FORMAT ")"
+
+gboolean _ostree_static_delta_part_execute (OstreeRepo      *repo,
+                                            GVariant        *header,
+                                            GVariant        *part,
+                                            GCancellable    *cancellable,
+                                            GError         **error);
+
+typedef enum {
+  OSTREE_STATIC_DELTA_GENERATE_OPT_LOWLATENCY,
+  OSTREE_STATIC_DELTA_GENERATE_OPT_MAJOR
+} OstreeStaticDeltaGenerateOpt;
+
+typedef enum {
+  OSTREE_STATIC_DELTA_OP_FETCH = 1,
+  OSTREE_STATIC_DELTA_OP_WRITE = 2,
+  OSTREE_STATIC_DELTA_OP_GUNZIP = 3,
+  OSTREE_STATIC_DELTA_OP_CLOSE = 4,
+  OSTREE_STATIC_DELTA_OP_READOBJECT = 5,
+  OSTREE_STATIC_DELTA_OP_READPAYLOAD = 6
+} OstreeStaticDeltaOpCode;
+
+gboolean _ostree_static_delta_generate (OstreeRepo                   *repo,
+                                        OstreeStaticDeltaGenerateOpt  opt,
+                                        const char                   *from,
+                                        const char                   *to,
+                                        GVariant                     *metadata,
+                                        GCancellable                 *cancellable,
+                                        GError                      **error);
+
+G_END_DECLS
+
diff --git a/tests/test-delta.sh b/tests/test-delta.sh
new file mode 100755
index 0000000..a829fde
--- /dev/null
+++ b/tests/test-delta.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# Copyright (C) 2011 Colin Walters <walters verbum org>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+set -e
+
+. $(dirname $0)/libtest.sh
+
+bindatafiles="bash true ostree"
+morebindatafiles="false ls"
+
+echo '1..2'
+
+mkdir repo
+ostree --repo=repo init --mode=archive-z2
+
+mkdir files
+for bin in ${bindatafiles}; do
+    cp $(which ${bin}) files
+done
+
+ostree --repo=repo commit -b test -s test --tree=dir=files
+
+function permuteFile() {
+    permutation=$(($1 % 2))
+    output=$2
+    case $permutation in
+       0) dd if=/dev/zero count=40 bs=1 >> $output;;
+       1) echo aheader | cat - $output >> $output.new && mv $output.new $output;;
+    esac
+}
+
+function permuteDirectory() {
+    permutation=$1
+    dir=$2
+    for x in ${dir}/*; do
+       for z in $(seq ${permutation}); do
+           permuteFile ${z} ${x}
+       done
+    done
+}
+
+permuteDirectory 1 files
+ostree --repo=repo commit -b test -s test --tree=dir=files
+
+origrev=$(ostree --repo=repo rev-parse test^)
+newrev=$(ostree --repo=repo rev-parse test)
+ostree --repo=repo deltas --tune=continuous --from-ref=test
+
+assert_has_file repo/deltas/${origrev}-${newrev}
+
+
+
+
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]