[ostree/wip/delta2: 15/21] deltas: Make size configurable, pack metadata objects first



commit 68d52620d7b5e2e38b11f6a69d1a3e1cee0b1d16
Author: Colin Walters <walters verbum org>
Date:   Sun Apr 27 13:48:14 2014 -0400

    deltas: Make size configurable, pack metadata objects first
    
    It looks like a major win to pack all the metadata objects into the
    first delta part.

 .../ostree-repo-static-delta-compilation.c         |  261 ++++++++++++++------
 src/libostree/ostree-repo.h                        |    1 +
 src/ostree/ot-builtin-static-delta.c               |    9 +
 3 files changed, 198 insertions(+), 73 deletions(-)
---
diff --git a/src/libostree/ostree-repo-static-delta-compilation.c 
b/src/libostree/ostree-repo-static-delta-compilation.c
index f309bbd..cfef30b 100644
--- a/src/libostree/ostree-repo-static-delta-compilation.c
+++ b/src/libostree/ostree-repo-static-delta-compilation.c
@@ -41,6 +41,7 @@ typedef struct {
   GPtrArray *parts;
   GPtrArray *fallback_objects;
   guint64 loose_compressed_size;
+  guint64 max_usize_bytes;
 } OstreeStaticDeltaBuilder;
 
 static void
@@ -93,6 +94,83 @@ objtype_checksum_array_new (GPtrArray *objects)
   return g_byte_array_free_to_bytes (ret);
 }
 
+static gboolean
+process_one_object (OstreeRepo                       *repo,
+                    OstreeStaticDeltaBuilder         *builder,
+                    OstreeStaticDeltaPartBuilder    **current_part_val,
+                    const char                       *checksum,
+                    OstreeObjectType                  objtype,
+                    GCancellable                     *cancellable,
+                    GError                          **error)
+{
+  gboolean ret = FALSE;
+  guint64 content_size;
+  gsize object_payload_start;
+  gs_unref_object GInputStream *content_stream = NULL;
+  gsize bytes_read;
+  const guint readlen = 4096;
+  guint64 compressed_size;
+  OstreeStaticDeltaPartBuilder *current_part = *current_part_val;
+
+  if (!ostree_repo_load_object_stream (repo, objtype, checksum,
+                                       &content_stream, &content_size,
+                                       cancellable, error))
+    goto out;
+  
+  /* Check to see if this delta is maximum size */
+  if (current_part->objects->len > 0 &&
+      current_part->payload->len + content_size > builder->max_usize_bytes)
+    {
+      *current_part_val = current_part = allocate_part (builder);
+    } 
+
+  if (!ostree_repo_query_object_storage_size (repo, objtype, checksum,
+                                              &compressed_size,
+                                              cancellable, error))
+    goto out;
+  builder->loose_compressed_size += compressed_size;
+
+  current_part->uncompressed_size += content_size;
+
+  g_ptr_array_add (current_part->objects, ostree_object_name_serialize (checksum, objtype));
+
+  object_payload_start = current_part->payload->len;
+
+  while (TRUE)
+    {
+      gsize empty_space;
+
+      empty_space = current_part->payload->allocated_len - current_part->payload->len;
+      if (empty_space < readlen)
+        {
+          gsize origlen;
+          origlen = current_part->payload->len;
+          g_string_set_size (current_part->payload, current_part->payload->allocated_len + (readlen - 
empty_space));
+          current_part->payload->len = origlen;
+        }
+
+      if (!g_input_stream_read_all (content_stream,
+                                    current_part->payload->str + current_part->payload->len,
+                                    readlen,
+                                    &bytes_read,
+                                    cancellable, error))
+        goto out;
+      if (bytes_read == 0)
+        break;
+          
+      current_part->payload->len += bytes_read;
+    }
+      
+  g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
+  _ostree_write_varuint64 (current_part->operations, object_payload_start);
+  _ostree_write_varuint64 (current_part->operations, content_size);
+  g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
 static gboolean 
 generate_delta_lowlatency (OstreeRepo                       *repo,
                            const char                       *from,
@@ -104,6 +182,7 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
   gboolean ret = FALSE;
   GHashTableIter hashiter;
   gpointer key, value;
+  guint i;
   OstreeStaticDeltaPartBuilder *current_part = NULL;
   gs_unref_object GFile *root_from = NULL;
   gs_unref_object GFile *root_to = NULL;
@@ -112,7 +191,10 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
   gs_unref_ptrarray GPtrArray *added = NULL;
   gs_unref_hashtable GHashTable *to_reachable_objects = NULL;
   gs_unref_hashtable GHashTable *from_reachable_objects = NULL;
-  gs_unref_hashtable GHashTable *new_reachable_objects = NULL;
+  gs_unref_hashtable GHashTable *new_reachable_metadata = NULL;
+  gs_unref_hashtable GHashTable *new_reachable_content = NULL;
+  gs_unref_hashtable GHashTable *modified_content_objects = NULL;
+  gs_unref_hashtable GHashTable *content_object_to_size = NULL;
 
   if (!ostree_repo_read_commit (repo, from, &root_from, NULL,
                                 cancellable, error))
@@ -131,6 +213,17 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
                          cancellable, error))
     goto out;
 
+  modified_content_objects = g_hash_table_new_full (ostree_hash_object_name, g_variant_equal,
+                                                    NULL,
+                                                    (GDestroyNotify) g_variant_unref);
+  for (i = 0; i < modified->len; i++)
+    {
+      OstreeDiffItem *diffitem = modified->pdata[i];
+      GVariant *objname = ostree_object_name_serialize (diffitem->target_checksum,
+                                                        OSTREE_OBJECT_TYPE_FILE);
+      g_hash_table_add (modified_content_objects, objname);
+    }
+
   if (!ostree_repo_traverse_commit (repo, from, -1, &from_reachable_objects,
                                     cancellable, error))
     goto out;
@@ -139,108 +232,114 @@ generate_delta_lowlatency (OstreeRepo                       *repo,
                                     cancellable, error))
     goto out;
 
-  new_reachable_objects = ostree_repo_traverse_new_reachable ();
+  new_reachable_metadata = ostree_repo_traverse_new_reachable ();
+  new_reachable_content = ostree_repo_traverse_new_reachable ();
 
   g_hash_table_iter_init (&hashiter, to_reachable_objects);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
       GVariant *serialized_key = key;
+      const char *checksum;
+      OstreeObjectType objtype;
 
       if (g_hash_table_contains (from_reachable_objects, serialized_key))
         continue;
 
-      g_hash_table_insert (new_reachable_objects, g_variant_ref (serialized_key), serialized_key);
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+
+      g_variant_ref (serialized_key);
+      if (OSTREE_OBJECT_TYPE_IS_META (objtype))
+        g_hash_table_add (new_reachable_metadata, serialized_key);
+      else
+        g_hash_table_add (new_reachable_content, serialized_key);
     }
   
-  g_printerr ("modified: %u removed: %u added: %u; total %u new reachable objects\n",
-              modified->len, removed->len, added->len, 
-              g_hash_table_size (new_reachable_objects));
-
-  current_part = allocate_part (builder);
-
-  g_hash_table_iter_init (&hashiter, new_reachable_objects);
+  g_printerr ("modified: %u removed: %u added: %u\n",
+              modified->len, removed->len, added->len);
+  g_printerr ("new reachable: metadata=%u content=%u\n",
+              g_hash_table_size (new_reachable_metadata),
+              g_hash_table_size (new_reachable_content));
+
+  /* Scan for large objects, so we can fall back to plain HTTP-based
+   * fetch.  In the future this should come after an rsync-style
+   * rolling delta check for modified files.
+   */
+  g_hash_table_iter_init (&hashiter, new_reachable_content);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
       GVariant *serialized_key = key;
       const char *checksum;
       OstreeObjectType objtype;
-      guint64 content_size;
-      gsize object_payload_start;
-      gs_unref_object GInputStream *content_stream = NULL;
-      gsize bytes_read;
-      const guint readlen = 4096;
       guint64 compressed_size;
+      guint64 uncompressed_size;
+      gboolean fallback = FALSE;
 
       ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
 
-      if (!ostree_repo_load_object_stream (repo, objtype, checksum,
-                                           &content_stream, &content_size,
-                                           cancellable, error))
+      /* First, do a cheap stat() on the *compressed* size - if that's
+       * larger than the max uncompressed size of a delta part, then
+       * clearly uncompressed will be larger.
+       */
+      if (!ostree_repo_query_object_storage_size (repo, objtype, checksum,
+                                                  &compressed_size,
+                                                  cancellable, error))
         goto out;
 
-      /* Fall back to plain HTTP-based fetch for large objects;
-       * in the future we should try an rsync-style rolling checksum
-       * against a previous version, if any.
-       */
-      if (content_size > OSTREE_STATIC_DELTA_PART_MAX_SIZE_BYTES)
+      if (compressed_size > builder->max_usize_bytes)
+        fallback = TRUE;
+      else
+        {
+          /* Now query the uncompressed size. */
+          if (!ostree_repo_load_object_stream (repo, objtype, checksum,
+                                               NULL, &uncompressed_size,
+                                               cancellable, error))
+            goto out;
+          if (uncompressed_size > builder->max_usize_bytes)
+            fallback = TRUE;
+        }
+  
+      if (fallback)
         {
           g_printerr ("fallback for %s\n",
                       ostree_object_to_string (checksum, objtype));
           g_ptr_array_add (builder->fallback_objects, 
-                           ostree_object_name_serialize (checksum, objtype));
-          continue;
+                           g_variant_ref (serialized_key));
+          g_hash_table_iter_remove (&hashiter);
         }
+    }
 
-      /* Ensure we have at least one object per delta, even if a given
-       * object is larger.
-       */
-      if (current_part->objects->len > 0 &&
-          current_part->payload->len + content_size > OSTREE_STATIC_DELTA_PART_MAX_SIZE_BYTES)
-        {
-          current_part = allocate_part (builder);
-        } 
+  current_part = allocate_part (builder);
 
-      if (!ostree_repo_query_object_storage_size (repo, objtype, checksum,
-                                                  &compressed_size,
-                                                  cancellable, error))
-        goto out;
-      builder->loose_compressed_size += compressed_size;
+  /* Pack the metadata first */
+  g_hash_table_iter_init (&hashiter, new_reachable_metadata);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      GVariant *serialized_key = key;
+      const char *checksum;
+      OstreeObjectType objtype;
 
-      current_part->uncompressed_size += content_size;
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
 
-      g_ptr_array_add (current_part->objects, g_variant_ref (serialized_key));
+      if (!process_one_object (repo, builder, &current_part,
+                               checksum, objtype,
+                               cancellable, error))
+        goto out;
+    }
 
-      object_payload_start = current_part->payload->len;
+  /* Now content */
+  g_hash_table_iter_init (&hashiter, new_reachable_content);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      GVariant *serialized_key = key;
+      const char *checksum;
+      OstreeObjectType objtype;
 
-      while (TRUE)
-        {
-          gsize empty_space;
-
-          empty_space = current_part->payload->allocated_len - current_part->payload->len;
-          if (empty_space < readlen)
-            {
-              gsize origlen;
-              origlen = current_part->payload->len;
-              g_string_set_size (current_part->payload, current_part->payload->allocated_len + (readlen - 
empty_space));
-              current_part->payload->len = origlen;
-            }
-
-          if (!g_input_stream_read_all (content_stream,
-                                        current_part->payload->str + current_part->payload->len,
-                                        readlen,
-                                        &bytes_read,
-                                        cancellable, error))
-            goto out;
-          if (bytes_read == 0)
-            break;
-          
-          current_part->payload->len += bytes_read;
-        }
-      
-      g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_WRITE);
-      _ostree_write_varuint64 (current_part->operations, object_payload_start);
-      _ostree_write_varuint64 (current_part->operations, content_size);
-      g_string_append_c (current_part->operations, (gchar)OSTREE_STATIC_DELTA_OP_CLOSE);
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+
+      if (!process_one_object (repo, builder, &current_part,
+                               checksum, objtype,
+                               cancellable, error))
+        goto out;
     }
 
   ret = TRUE;
@@ -320,6 +419,7 @@ get_fallback_headers (OstreeRepo               *self,
  * @from: ASCII SHA256 checksum of origin
  * @to: ASCII SHA256 checksum of target
  * @metadata: (allow-none): Optional metadata
+ * @params: (allow-none): Parameters, see below
  * @cancellable: Cancellable
  * @error: Error
  *
@@ -327,6 +427,11 @@ get_fallback_headers (OstreeRepo               *self,
  * the objects in @to.  This delta is an optimization over fetching
  * individual objects, and can be conveniently stored and applied
  * offline.
+ *
+ * The @params argument should be an a{sv}.  The following attributes
+ * are known:
+ *   - max-usize: u: Maximum size in megabytes of a delta part
+ *   - compression: y: Compression type: 0=none, x=lzma, g=gzip
  */
 gboolean
 ostree_repo_static_delta_generate (OstreeRepo                   *self,
@@ -334,12 +439,14 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
                                    const char                   *from,
                                    const char                   *to,
                                    GVariant                     *metadata,
+                                   GVariant                     *params,
                                    GCancellable                 *cancellable,
                                    GError                      **error)
 {
   gboolean ret = FALSE;
   OstreeStaticDeltaBuilder builder = { 0, };
   guint i;
+  guint max_usize;
   GVariant *metadata_source;
   guint64 total_compressed_size = 0;
   guint64 total_uncompressed_size = 0;
@@ -355,6 +462,10 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
   builder.parts = g_ptr_array_new_with_free_func ((GDestroyNotify)ostree_static_delta_part_builder_unref);
   builder.fallback_objects = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
 
+  if (!g_variant_lookup (params, "max-usize", "u", &max_usize))
+    max_usize = 32;
+  builder.max_usize_bytes = ((guint64)max_usize) * 1000 * 1000;
+
   /* Ignore optimization flags */
   if (!generate_delta_lowlatency (self, from, to, &builder,
                                   cancellable, error))
@@ -381,6 +492,7 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
       gs_unref_variant GVariant *delta_part_content = NULL;
       gs_unref_variant GVariant *delta_part = NULL;
       gs_unref_variant GVariant *delta_part_header = NULL;
+      guint8 compression_type_char;
 
       payload_b = g_string_free_to_bytes (part_builder->payload);
       part_builder->payload = NULL;
@@ -395,6 +507,7 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
 
       /* Hardcode xz for now */
       compressor = (GConverter*)_ostree_lzma_compressor_new (NULL);
+      compression_type_char = 'x';
       part_payload_in = ot_variant_read (delta_part_content);
       part_payload_out = (GMemoryOutputStream*)g_memory_output_stream_new (NULL, 0, g_realloc, g_free);
       part_payload_compressor = (GConverterOutputStream*)g_converter_output_stream_new 
((GOutputStream*)part_payload_out, compressor);
@@ -406,7 +519,7 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
 
       /* FIXME - avoid duplicating memory here */
       delta_part = g_variant_new ("(y ay)",
-                                  (guint8)'x',
+                                  compression_type_char,
                                   ot_gvariant_new_ay_bytes (g_memory_output_stream_steal_as_bytes 
(part_payload_out)));
 
       if (!gs_file_open_in_tmpdir (self->tmp_dir, 0644,
@@ -485,8 +598,10 @@ ostree_repo_static_delta_generate (OstreeRepo                   *self,
     g_date_time_unref (now);
   }
 
-  g_printerr ("delta compressed=%" G_GUINT64_FORMAT " loose=%" G_GUINT64_FORMAT "\n",
-              total_compressed_size, builder.loose_compressed_size);
+  g_printerr ("delta uncompressed=%" G_GUINT64_FORMAT " compressed=%" G_GUINT64_FORMAT " loose=%" 
G_GUINT64_FORMAT "\n",
+              total_uncompressed_size,
+              total_compressed_size,
+              builder.loose_compressed_size);
 
   if (!ot_util_variant_save (descriptor_path, delta_descriptor, cancellable, error))
     goto out;
diff --git a/src/libostree/ostree-repo.h b/src/libostree/ostree-repo.h
index 26aa6ae..c8b45e9 100644
--- a/src/libostree/ostree-repo.h
+++ b/src/libostree/ostree-repo.h
@@ -452,6 +452,7 @@ gboolean ostree_repo_static_delta_generate (OstreeRepo                   *self,
                                             const char                   *from,
                                             const char                   *to,
                                             GVariant                     *metadata,
+                                            GVariant                     *params,
                                             GCancellable                 *cancellable,
                                             GError                      **error);
 
diff --git a/src/ostree/ot-builtin-static-delta.c b/src/ostree/ot-builtin-static-delta.c
index 34305a4..6422fdf 100644
--- a/src/ostree/ot-builtin-static-delta.c
+++ b/src/ostree/ot-builtin-static-delta.c
@@ -29,6 +29,7 @@ static char *opt_to_rev;
 static char *opt_apply;
 static char **opt_key_ids;
 static char *opt_gpg_homedir;
+static char *opt_max_usize;
 
 static GOptionEntry options[] = {
   { "from", 0, 0, G_OPTION_ARG_STRING, &opt_from_rev, "Create delta from revision REV", "REV" },
@@ -36,6 +37,7 @@ static GOptionEntry options[] = {
   { "apply", 0, 0, G_OPTION_ARG_FILENAME, &opt_apply, "Apply delta from PATH", "PATH" },
   { "gpg-sign", 0, 0, G_OPTION_ARG_STRING_ARRAY, &opt_key_ids, "GPG Key ID to sign the delta with", 
"key-id"},
   { "gpg-homedir", 0, 0, G_OPTION_ARG_STRING, &opt_gpg_homedir, "GPG Homedir to use when looking for 
keyrings", "homedir"},
+  { "max-usize", 'u', 0, G_OPTION_ARG_STRING, &opt_max_usize, "Maximum uncompressed size in megabytes", 
NULL},
   { NULL }
 };
 
@@ -94,6 +96,7 @@ ostree_builtin_static_delta (int argc, char **argv, OstreeRepo *repo, GCancellab
           gs_free char *from_resolved = NULL;
           gs_free char *to_resolved = NULL;
           gs_free char *from_parent_str = NULL;
+          gs_unref_variant_builder GVariantBuilder *parambuilder = NULL;
 
           if (opt_from_rev == NULL)
             {
@@ -110,11 +113,17 @@ ostree_builtin_static_delta (int argc, char **argv, OstreeRepo *repo, GCancellab
           if (!ostree_repo_resolve_rev (repo, opt_to_rev, FALSE, &to_resolved, error))
             goto out;
 
+          parambuilder = g_variant_builder_new (G_VARIANT_TYPE ("a{sv}"));
+          if (opt_max_usize)
+            g_variant_builder_add (parambuilder, "{sv}",
+                                   "max-usize", g_variant_new_uint32 (g_ascii_strtoull (opt_max_usize, NULL, 
10)));
+
           g_print ("Generating static delta:\n");
           g_print ("  From: %s\n", from_resolved);
           g_print ("  To:   %s\n", to_resolved);
           if (!ostree_repo_static_delta_generate (repo, OSTREE_STATIC_DELTA_GENERATE_OPT_MAJOR,
                                                   from_resolved, to_resolved, NULL,
+                                                  g_variant_builder_end (parambuilder),
                                                   cancellable, error))
             goto out;
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]