[ostree/wip/packfile-rebase2: 9/11] core: Flesh out to parse packed files



commit 63dfd4ab0310b15ba7cb7f4cd4cec567d081b7ab
Author: Colin Walters <walters verbum org>
Date:   Wed Mar 21 19:28:54 2012 -0400

    core: Flesh out to parse packed files

 src/libostree/README.md        |   21 +++
 src/libostree/ostree-core.h    |    5 +-
 src/libostree/ostree-repo.c    |  289 ++++++++++++++++++++++++++++++++++++----
 src/libostree/ostree-repo.h    |   20 +++
 src/ostree/ot-builtin-repack.c |  246 +++++++++++++++++++++-------------
 5 files changed, 456 insertions(+), 125 deletions(-)
---
diff --git a/src/libostree/README.md b/src/libostree/README.md
index 3ab4730..aa1425b 100644
--- a/src/libostree/README.md
+++ b/src/libostree/README.md
@@ -28,6 +28,27 @@ While this is still in planning, I plan to heavily optimize OSTree for
 versioning ELF operating systems.  In industry jargon, this would be
 "content-aware storage".
 
+MILESTONE 1
+-----------
+* Basic pack files
+
+MILESTONE 2
+-----------
+* Store checksums as ay
+* Drop version/metadata from tree/dirmeta objects
+* Restructure repository so that links can be generated as a cache;
+  i.e. objects/raw, pack files are now the canonical
+* Commits generate a pack?
+* For files, checksum combination of metadata variant + raw data 
+
+MILESTONE 3
+-----------
+
+* Rolling checksums for partitioning large files?  Kernel debuginfo
+* Improved pack clustering
+  - file fingerprinting?
+* ELF-x86 aware deltas
+
 Related work in storage
 -----------------------
 
diff --git a/src/libostree/ostree-core.h b/src/libostree/ostree-core.h
index e11392c..d857c8a 100644
--- a/src/libostree/ostree-core.h
+++ b/src/libostree/ostree-core.h
@@ -114,8 +114,7 @@ typedef enum {
 #define OSTREE_PACK_INDEX_VARIANT_FORMAT G_VARIANT_TYPE ("(sua{sv}a(sut))")
 
 typedef enum {
-  OSTREE_PACK_FILE_ENTRY_FLAG_NONE = 0,
-  OSTREE_PACK_FILE_ENTRY_FLAG_COMPRESSION_GZIP = (1 << 0)
+  OSTREE_PACK_FILE_ENTRY_TYPE_GZIP_RAW = (1 << 0)
 } OstreePackFileEntryFlag;
 
 /* Pack files
@@ -132,7 +131,7 @@ typedef enum {
  */
 #define OSTREE_PACK_FILE_VARIANT_FORMAT G_VARIANT_TYPE ("(sua{sv}t)")
 
-#define OSTREE_PACK_FILE_CONTENT_VARIANT_FORMAT G_VARIANT_TYPE ("(ysut)")
+#define OSTREE_PACK_FILE_CONTENT_VARIANT_FORMAT G_VARIANT_TYPE ("(tuys)")
 
 gboolean ostree_validate_checksum_string (const char *sha256,
                                           GError    **error);
diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c
index 4f79e70..523361e 100644
--- a/src/libostree/ostree-repo.c
+++ b/src/libostree/ostree-repo.c
@@ -70,6 +70,7 @@ struct _OstreeRepoPrivate {
   OstreeRepoMode mode;
 
   GHashTable *pack_index_mappings;
+  GHashTable *pack_data_mappings;
 
   GHashTable *pending_transaction;
 };
@@ -89,6 +90,7 @@ ostree_repo_finalize (GObject *object)
   g_clear_object (&priv->pack_dir);
   g_clear_object (&priv->config_file);
   g_hash_table_destroy (priv->pack_index_mappings);
+  g_hash_table_destroy (priv->pack_data_mappings);
   g_hash_table_destroy (priv->pending_transaction);
   if (priv->config)
     g_key_file_free (priv->config);
@@ -193,7 +195,10 @@ ostree_repo_init (OstreeRepo *self)
   
   priv->pack_index_mappings = g_hash_table_new_full (g_str_hash, g_str_equal,
                                                      g_free,
-                                                     (GDestroyNotify)g_mapped_file_unref);
+                                                     (GDestroyNotify)g_variant_unref);
+  priv->pack_data_mappings = g_hash_table_new_full (g_str_hash, g_str_equal,
+                                                    g_free,
+                                                    (GDestroyNotify)g_mapped_file_unref);
   priv->pending_transaction = g_hash_table_new_full (g_str_hash, g_str_equal,
                                                      g_free,
                                                      NULL);
@@ -2529,26 +2534,6 @@ list_loose_objects (OstreeRepo                     *self,
   return ret;
 }
 
-static GFile *
-get_pack_data_for_index (GFile *index)
-{
-  const char *basename;
-  GString *name = g_string_new ("");
-  GFile *ret;
-  GFile *parent;
-
-  basename = ot_gfile_get_basename_cached (index);
-  g_assert (g_str_has_suffix (basename, ".index"));
-  g_string_append_len (name, basename, strlen (basename) - 5);
-  g_string_append (name, "data");
-
-  parent = g_file_get_parent (index);
-  ret = g_file_get_child (parent, name->str);
-  g_object_unref (parent);
-  g_string_free (name, TRUE);
-  return ret;
-}
-
 static char *
 get_checksum_from_pack_name (const char *name)
 {
@@ -2560,7 +2545,7 @@ get_checksum_from_pack_name (const char *name)
   dot = strrchr (name, '.');
   g_assert (dot);
 
-  g_assert_cmpint (dot - dash, ==, 64);
+  g_assert_cmpint (dot - (dash + 1), ==, 64);
   
   return g_strndup (dash + 1, 64);
 }
@@ -2580,8 +2565,8 @@ get_pack_index_path (OstreeRepo *self,
 }
 
 static GFile *
-get_pack_content_path (OstreeRepo *self,
-                       const char *checksum)
+get_pack_data_path (OstreeRepo *self,
+                    const char *checksum)
 {
   char *name;
   GFile *ret;
@@ -2629,6 +2614,258 @@ ostree_repo_load_pack_index (OstreeRepo    *self,
   return ret;
 }
 
+/**
+ * ostree_repo_map_pack_file:
+ * @self:
+ * @sha256: Checksum of pack file
+ * @out_data: (out): Pointer to pack file data
+ * @cancellable:
+ * @error:
+ *
+ * Ensure that the given pack file is mapped into
+ * memory.
+ */
+gboolean
+ostree_repo_map_pack_file (OstreeRepo    *self,
+                           const char    *sha256,
+                           guchar       **out_data,
+                           guint64       *out_len,
+                           GCancellable  *cancellable,
+                           GError       **error)
+{
+  gboolean ret = FALSE;
+  OstreeRepoPrivate *priv = GET_PRIVATE (self);
+  GMappedFile *map;
+  gpointer ret_data;
+  guint64 ret_len;
+  GFile *path = NULL;
+
+  map = g_hash_table_lookup (priv->pack_data_mappings, sha256);
+  if (map == NULL)
+    {
+      path = get_pack_data_path (self, sha256);
+
+      map = g_mapped_file_new (ot_gfile_get_path_cached (path), FALSE, error);
+      if (!map)
+        goto out;
+
+      g_hash_table_insert (priv->pack_data_mappings, g_strdup (sha256), map);
+      ret_data = g_mapped_file_get_contents (map);
+    }
+
+  ret_data = g_mapped_file_get_contents (map);
+  ret_len = (guint64)g_mapped_file_get_length (map);
+
+  ret = TRUE;
+  ot_transfer_out_value (out_data, &ret_data);
+  if (out_len)
+    *out_len = ret_len;
+ out:
+  g_clear_object (&path);
+  if (ret_data)
+    g_mapped_file_unref (ret_data);
+  return ret;
+}
+
+static gboolean
+bsearch_in_pack_index (GVariant   *index_contents,
+                       const char *checksum,
+                       OstreeObjectType objtype,
+                       guint64    *out_offset)
+{
+  gsize n;
+  gsize i;
+  gsize m;
+  guint32 target_objtype = (guint32) objtype;
+
+  i = 0;
+  n = g_variant_n_children (index_contents) - 1;
+  m = 0;
+
+  while (i <= n)
+    {
+      const char *cur_checksum;
+      guint32 cur_objtype;
+      guint64 cur_offset;
+      int c;
+
+      m = i + ((n - i) / 2);
+
+      g_variant_get_child (index_contents, m, "&sut", &cur_checksum, &cur_objtype, &cur_offset);      
+      c = strcmp (cur_checksum, checksum);
+      if (c == 0)
+        {
+          if (cur_objtype < target_objtype)
+            c = -1;
+          else if (cur_objtype > target_objtype)
+            c = 1;
+        }
+
+      if (c < 0)
+        i = m + 1;
+      else if (c > 0)
+        n = m - 1;
+      else
+        {
+          *out_offset = cur_offset;
+          return TRUE;
+        } 
+    }
+
+  return FALSE;
+}
+
+static gboolean
+parse_pack_entry (gboolean       trusted,
+                  guchar        *pack_data,
+                  guint64        pack_len,
+                  guint64        offset,
+                  GVariant     **out_header,
+                  GInputStream **out_input,
+                  GCancellable  *cancellable,
+                  GError       **error)
+{
+  gboolean ret = FALSE;
+  GVariant *ret_header = NULL;
+  GConverter *decompressor = NULL;
+  GInputStream *raw_input = NULL;
+  GInputStream *ret_input = NULL;
+  guint64 data_offset;
+  guint64 header_start;
+  guint64 header_end;
+  guint32 header_len;
+  guchar entry_type;
+
+  if (G_UNLIKELY (!(offset < pack_len)))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack index; out of range offset %" G_GUINT64_FORMAT,
+                   offset);
+      goto out;
+    }
+  if (G_UNLIKELY (!((offset & 0x3) == 0)))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack index; unaligned offset %" G_GUINT64_FORMAT,
+                   offset);
+      goto out;
+    }
+
+  g_assert ((((guint64)pack_data+offset) & 0x3) == 0);
+  header_len = GUINT32_FROM_BE (*((guint32*)(pack_data+offset)));
+  header_end = offset + header_len;
+  if (G_UNLIKELY (!(header_end < pack_len)))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack index; out of range header length %u",
+                   header_len);
+      goto out;
+    }
+
+  header_start = offset + 4;
+  if (G_UNLIKELY (!(header_start < pack_len)))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack index; out of range data offset %" G_GUINT64_FORMAT,
+                   header_start);
+      goto out;
+    }
+
+  ret_header = g_variant_new_from_data (OSTREE_PACK_FILE_CONTENT_VARIANT_FORMAT,
+                                        pack_data+header_start, header_len,
+                                        trusted, NULL, NULL);
+
+  g_variant_get_child (ret_header, 2, "y", &entry_type);
+  
+  if (entry_type != OSTREE_PACK_FILE_ENTRY_TYPE_GZIP_RAW)
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack; invalid entry type %u",
+                   entry_type);
+      goto out;
+    }
+
+  /* Skip 4 bytes for the header len, the actual header, then align to
+   * 8.
+   */
+  data_offset = (offset + 4 + header_len + 7) & ~0x7;
+  if (G_UNLIKELY (!(data_offset < pack_len)))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Corrupted pack index; out of range offset %" G_GUINT64_FORMAT,
+                   offset);
+      goto out;
+    }
+
+  raw_input = (GInputStream*)g_memory_input_stream_new_from_data (pack_data + data_offset,
+                                                                  pack_len - data_offset,
+                                                                  NULL);
+
+  decompressor = (GConverter*)g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP);
+  ret_input = (GInputStream*)g_object_new (G_TYPE_CONVERTER_INPUT_STREAM,
+                                           "converter", decompressor,
+                                           "base-stream", raw_input,
+                                           "close-base-stream", TRUE,
+                                           NULL);
+
+  ret = TRUE;
+  ot_transfer_out_value (out_header, &ret_header);
+  ot_transfer_out_value (out_input, &ret_input);
+ out:
+  ot_clear_gvariant (&ret_header);
+  g_clear_object (&raw_input);
+  g_clear_object (&decompressor);
+  g_clear_object (&ret_input);
+  return ret;
+}
+
+gboolean
+ostree_repo_load_pack_entry (OstreeRepo         *self,
+                             const char         *pack_sha256,
+                             const char         *entry_sha256,
+                             OstreeObjectType    objtype,
+                             GInputStream      **out_input,
+                             GCancellable       *cancellable,
+                             GError            **error)
+{
+  gboolean ret = FALSE;
+  guint64 offset;
+  guchar *pack_data;
+  guint64 pack_len;
+  GVariant *index = NULL;
+  GVariant *index_contents = NULL;
+  GInputStream *ret_input = NULL;
+
+  if (!ostree_repo_load_pack_index (self, pack_sha256, &index, cancellable, error))
+    goto out;
+
+  index_contents = g_variant_get_child_value (index, 3);
+
+  if (!bsearch_in_pack_index (index_contents, entry_sha256, objtype, &offset))
+    {
+      g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+                   "Object '%s' of type %u not in pack",
+                   entry_sha256, (guint32)objtype);
+      goto out;
+    }
+
+  if (!ostree_repo_map_pack_file (self, pack_sha256, &pack_data, &pack_len,
+                                  cancellable, error))
+    goto out;
+
+  if (!parse_pack_entry (TRUE, pack_data, pack_len, offset, NULL, &ret_input,
+                         cancellable, error))
+    goto out;
+
+  ret = TRUE;
+  ot_transfer_out_value (out_input, &ret_input);
+ out:
+  ot_clear_gvariant (&index);
+  ot_clear_gvariant (&index_contents);
+  g_clear_object (&ret_input);
+  return ret;
+}
+
 static gboolean
 list_objects_in_index (OstreeRepo                     *self,
                        const char                     *pack_checksum,
@@ -2676,7 +2913,7 @@ list_objects_in_index (OstreeRepo                     *self,
           GVariantIter *current_packs_iter;
           const char *current_pack_checksum;
 
-          g_variant_get (objdata, "bas", &is_loose, &current_packs_iter);
+          g_variant_get (objdata, "(bas)", &is_loose, &current_packs_iter);
 
           while (g_variant_iter_loop (current_packs_iter, "&s", &current_pack_checksum))
             {
@@ -2684,7 +2921,7 @@ list_objects_in_index (OstreeRepo                     *self,
             }
           g_variant_iter_free (current_packs_iter);
         }
-      g_variant_builder_add (&pack_contents_builder, pack_checksum);
+      g_variant_builder_add (&pack_contents_builder, "s", pack_checksum);
       objdata = g_variant_new ("(b as)", is_loose,
                                g_variant_builder_end (&pack_contents_builder));
       g_hash_table_replace (inout_objects,
diff --git a/src/libostree/ostree-repo.h b/src/libostree/ostree-repo.h
index 09ec1ae..29c6504 100644
--- a/src/libostree/ostree-repo.h
+++ b/src/libostree/ostree-repo.h
@@ -147,6 +147,26 @@ gboolean      ostree_repo_load_pack_index (OstreeRepo    *self,
                                            GCancellable  *cancellable,
                                            GError       **error);
 
+gboolean      ostree_repo_load_pack_data  (OstreeRepo    *self,
+                                           const char    *sha256,
+                                           guchar       **out_data,
+                                           GCancellable  *cancellable,
+                                           GError       **error);
+
+gboolean ostree_repo_map_pack_file (OstreeRepo    *self,
+                                    const char    *sha256,
+                                    guchar       **out_data,
+                                    guint64       *out_len,
+                                    GCancellable  *cancellable,
+                                    GError       **error);
+
+gboolean ostree_repo_load_pack_entry (OstreeRepo         *self,
+                                      const char         *pack_sha256,
+                                      const char         *entry_sha256,
+                                      OstreeObjectType    objtype,
+                                      GInputStream      **object_input,
+                                      GCancellable       *cancellable,
+                                      GError            **error);
 typedef enum {
   OSTREE_REPO_COMMIT_FILTER_ALLOW,
   OSTREE_REPO_COMMIT_FILTER_SKIP
diff --git a/src/ostree/ot-builtin-repack.c b/src/ostree/ot-builtin-repack.c
index 3bde4be..b59c9b5 100644
--- a/src/ostree/ot-builtin-repack.c
+++ b/src/ostree/ot-builtin-repack.c
@@ -35,7 +35,6 @@
 #define OT_GZIP_COMPRESSION_LEVEL (8)
 
 static gboolean opt_analyze_only;
-static gboolean opt_ls;
 static char* opt_pack_size;
 static char* opt_int_compression;
 static char* opt_ext_compression;
@@ -51,7 +50,6 @@ static GOptionEntry options[] = {
   { "internal-compression", 0, 0, G_OPTION_ARG_STRING, &opt_int_compression, "Compress objects using COMPRESSION", "COMPRESSION" },
   { "external-compression", 0, 0, G_OPTION_ARG_STRING, &opt_ext_compression, "Compress entire packfiles using COMPRESSION", "COMPRESSION" },
   { "analyze-only", 0, 0, G_OPTION_ARG_NONE, &opt_analyze_only, "Just analyze current state", NULL },
-  { "ls", 0, 0, G_OPTION_ARG_NONE, &opt_ls, "Print packfiles", NULL },
   { NULL }
 };
 
@@ -62,10 +60,6 @@ typedef struct {
   OtCompressionType int_compression;
   OtCompressionType ext_compression;
 
-  guint n_commits;
-  guint n_dirmeta;
-  guint n_dirtree;
-  guint n_files;
   gboolean had_error;
   GError **error;
 } OtRepackData;
@@ -160,7 +154,7 @@ write_variant_with_size (GOutputStream      *output,
   guint64 variant_size;
   guint32 variant_size_u32_be;
 
-  g_assert ((*inout_offset & 7) == 0);
+  g_assert ((*inout_offset & 3) == 0);
 
   /* Write variant size */
   variant_size = g_variant_get_size (variant);
@@ -184,6 +178,35 @@ write_variant_with_size (GOutputStream      *output,
   return ret;
 }
 
+static gint
+compare_index_content (gconstpointer         ap,
+                       gconstpointer         bp)
+{
+  gpointer a = *((gpointer*)ap);
+  gpointer b = *((gpointer*)bp);
+  GVariant *a_v = a;
+  GVariant *b_v = b;
+  const char *a_checksum;
+  const char *b_checksum;
+  guint32 a_objtype;
+  guint32 b_objtype;
+  guint64 a_offset;
+  guint64 b_offset;
+  int c;
+
+  g_variant_get (a_v, "(&sut)", &a_checksum, &a_objtype, &a_offset);      
+  g_variant_get (b_v, "(&sut)", &b_checksum, &b_objtype, &b_offset);      
+  c = strcmp (a_checksum, b_checksum);
+  if (c == 0)
+    {
+      if (a_objtype < b_objtype)
+        c = -1;
+      else if (a_objtype > b_objtype)
+        c = 1;
+    }
+  return c;
+}
+
 static gboolean
 create_pack_file (OtRepackData        *data,
                   GPtrArray           *objects,
@@ -205,11 +228,11 @@ create_pack_file (OtRepackData        *data,
   guint64 offset;
   gsize bytes_read;
   gsize bytes_written;
-  GVariantBuilder index_content_builder;
-  gboolean index_content_builder_initialized = FALSE;
+  GPtrArray *index_content_list = NULL;
   GVariant *pack_header = NULL;
   GVariant *object_header = NULL;
   GVariant *index_content = NULL;
+  GVariantBuilder index_content_builder;
   GChecksum *pack_checksum = NULL;
   char *pack_name = NULL;
   GFile *pack_file_path = NULL;
@@ -232,12 +255,11 @@ create_pack_file (OtRepackData        *data,
                                         cancellable, error))
     goto out;
 
+  index_content_list = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+
   offset = 0;
   pack_checksum = g_checksum_new (G_CHECKSUM_SHA256);
 
-  g_variant_builder_init (&index_content_builder, G_VARIANT_TYPE ("a(sut)"));
-  index_content_builder_initialized = TRUE;
-
   pack_header = g_variant_new ("(su a{sv}u)",
                                "OSTPACK", GUINT32_TO_BE (0),
                                g_variant_new_array (G_VARIANT_TYPE ("{sv}"), NULL, 0),
@@ -258,7 +280,7 @@ create_pack_file (OtRepackData        *data,
       guint64 expected_objsize;
       guint64 objsize;
       GOutputStream *write_pack_out;
-      guchar entry_flags;
+      guchar entry_type;
 
       g_variant_get (object_data, "(&sut)", &checksum, &objtype_u32, &expected_objsize);
                      
@@ -268,22 +290,21 @@ create_pack_file (OtRepackData        *data,
         goto out;
 
       /* offset points to aligned header size */
-      g_variant_builder_add (&index_content_builder, "(sut)", checksum, (guint32)objtype, offset);
+      g_ptr_array_add (index_content_list,
+                       g_variant_ref_sink (g_variant_new ("(sut)", checksum, (guint32)objtype, offset)));
 
       ot_clear_gvariant (&object_header);
-      entry_flags = 0;
-      if (data->int_compression != OT_COMPRESSION_NONE)
+      switch (data->int_compression)
         {
-          switch (data->int_compression)
-            {
-          case OT_COMPRESSION_GZIP:
-            entry_flags |= OSTREE_PACK_FILE_ENTRY_FLAG_COMPRESSION_GZIP;
+        case OT_COMPRESSION_GZIP:
+          {
+            entry_type = OSTREE_PACK_FILE_ENTRY_TYPE_GZIP_RAW;
             break;
-            case OT_COMPRESSION_NONE:
-              break;
-            default:
-              g_assert_not_reached ();
-            }
+          }
+        default:
+          {
+            g_assert_not_reached ();
+          }
         }
 
       g_clear_object (&object_path);
@@ -306,7 +327,7 @@ create_pack_file (OtRepackData        *data,
       ot_clear_gvariant (&object_header);
       object_header = g_variant_new ("(tuys)", GUINT64_TO_BE (objsize),
                                      GUINT32_TO_BE ((guint32)objtype),
-                                     GUINT32_TO_BE (entry_flags),
+                                     entry_type,
                                      checksum);
 
       if (!write_variant_with_size (pack_out, object_header, pack_checksum,
@@ -384,11 +405,17 @@ create_pack_file (OtRepackData        *data,
     }
   g_clear_object (&pack_temppath);
 
+  g_variant_builder_init (&index_content_builder, G_VARIANT_TYPE ("a(sut)"));
+  g_ptr_array_sort (index_content_list, compare_index_content);
+  for (i = 0; i < index_content_list->len; i++)
+    {
+      GVariant *index_item = index_content_list->pdata[i];
+      g_variant_builder_add_value (&index_content_builder, index_item);
+    }
   index_content = g_variant_new ("(su a{sv}@a(sut))",
                                  "OSTPACKINDEX", GUINT32_TO_BE(0),
                                  g_variant_new_array (G_VARIANT_TYPE ("{sv}"), NULL, 0),
                                  g_variant_builder_end (&index_content_builder));
-  index_content_builder_initialized = FALSE;
 
   if (!g_output_stream_write_all (index_out,
                                   g_variant_get_data (index_content),
@@ -439,8 +466,8 @@ create_pack_file (OtRepackData        *data,
   g_free (pack_name);
   g_clear_object (&pack_file_path);
   g_clear_object (&pack_index_path);
-  if (index_content_builder_initialized)
-    g_variant_builder_clear (&index_content_builder);
+  if (index_content_list)
+    g_ptr_array_unref (index_content_list);
   return ret;
 }
 
@@ -486,12 +513,6 @@ cluster_objects_stupidly (OtRepackData      *data,
       g_clear_object (&object_path);
       object_path = ostree_repo_get_object_path (data->repo, checksum, objtype);
 
-      if (objtype == OSTREE_OBJECT_TYPE_ARCHIVED_FILE_META)
-        {
-          /* Counted under files */
-          continue;
-        }
-
       g_clear_object (&object_info);
       object_info = g_file_query_info (object_path, OSTREE_GIO_FAST_QUERYINFO,
                                        G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
@@ -636,11 +657,84 @@ parse_compression_string (const char *compstr,
 }
 
 static gboolean
-do_ls (OtRepackData  *data,
-       GCancellable  *cancellable,
-       GError       **error)
+do_stats (OtRepackData  *data,
+          GHashTable    *objects,
+          GCancellable  *cancellable,
+          GError       **error)
 {
-  return FALSE;
+  gboolean ret = FALSE;
+  guint n_loose = 0;
+  guint n_loose_and_packed = 0;
+  guint n_packed = 0;
+  guint n_dup_packed = 0;
+  guint n_multiple_packs = 0;
+  guint n_commits = 0;
+  guint n_dirmeta = 0;
+  guint n_dirtree = 0;
+  guint n_files = 0;
+  GHashTableIter hash_iter;
+  gpointer key, value;
+
+  g_hash_table_iter_init (&hash_iter, objects);
+  while (g_hash_table_iter_next (&hash_iter, &key, &value))
+    {
+      GVariant *serialized_key = key;
+      GVariant *objdata = value;
+      const char *checksum;
+      OstreeObjectType objtype;
+      gboolean is_loose;
+      gboolean is_packed;
+      GVariant *pack_array;
+
+      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
+
+      g_variant_get (objdata, "(b as)", &is_loose, &pack_array);
+
+      is_packed = g_variant_n_children (pack_array) > 0;
+      
+      if (is_loose && is_packed)
+        n_loose_and_packed++;
+      else if (is_loose)
+        n_loose++;
+      else if (g_variant_n_children (pack_array) > 0)
+        n_dup_packed++;
+      else
+        n_packed++;
+          
+      switch (objtype)
+        {
+        case OSTREE_OBJECT_TYPE_COMMIT:
+          n_commits++;
+          break;
+        case OSTREE_OBJECT_TYPE_DIR_TREE:
+          n_dirtree++;
+          break;
+        case OSTREE_OBJECT_TYPE_DIR_META:
+          n_dirmeta++;
+          break;
+        case OSTREE_OBJECT_TYPE_RAW_FILE:
+        case OSTREE_OBJECT_TYPE_ARCHIVED_FILE_META:
+          n_files++;
+          break;
+        case OSTREE_OBJECT_TYPE_ARCHIVED_FILE_CONTENT:
+          /* Counted under files by META */
+          break;
+        }
+    }
+
+  g_print ("Commits: %u\n", n_commits);
+  g_print ("Tree contents: %u\n", n_dirtree);
+  g_print ("Tree meta: %u\n", n_dirmeta);
+  g_print ("Files: %u\n", n_files);
+  g_print ("\n");
+  g_print ("Loose+packed objects: %u\n", n_loose_and_packed);
+  g_print ("Loose objects: %u\n", n_loose);
+  g_print ("Duplicate packed objects: %u\n", n_dup_packed);
+  g_print ("Packed objects: %u\n", n_packed);
+
+  ret = TRUE;
+ /* out: */
+  return ret;
 }
 
 gboolean
@@ -654,6 +748,7 @@ ostree_builtin_repack (int argc, char **argv, GFile *repo_path, GError **error)
   GCancellable *cancellable = NULL;
   guint i;
   GPtrArray *clusters = NULL;
+  GHashTable *loose_objects = NULL;
   GHashTableIter hash_iter;
   gpointer key, value;
 
@@ -680,71 +775,30 @@ ostree_builtin_repack (int argc, char **argv, GFile *repo_path, GError **error)
   if (!parse_compression_string (opt_ext_compression, &data.ext_compression, error))
     goto out;
 
-  if (!ostree_repo_list_objects (repo, OSTREE_REPO_LIST_OBJECTS_LOOSE, &objects, cancellable, error))
+  if (!ostree_repo_list_objects (repo, OSTREE_REPO_LIST_OBJECTS_ALL, &objects, cancellable, error))
     goto out;
 
-  g_hash_table_iter_init (&hash_iter, objects);
-
-  while (g_hash_table_iter_next (&hash_iter, &key, &value))
-    {
-      GVariant *serialized_key = key;
-      const char *checksum;
-      OstreeObjectType objtype;
-
-      ostree_object_name_deserialize (serialized_key, &checksum, &objtype);
-
-      switch (objtype)
-        {
-        case OSTREE_OBJECT_TYPE_COMMIT:
-          data.n_commits++;
-          break;
-        case OSTREE_OBJECT_TYPE_DIR_TREE:
-          data.n_dirtree++;
-          break;
-        case OSTREE_OBJECT_TYPE_DIR_META:
-          data.n_dirmeta++;
-          break;
-        case OSTREE_OBJECT_TYPE_RAW_FILE:
-        case OSTREE_OBJECT_TYPE_ARCHIVED_FILE_CONTENT:
-          data.n_files++;
-          break;
-        case OSTREE_OBJECT_TYPE_ARCHIVED_FILE_META:
-          /* Counted under files */
-          break;
-        }
-    }
-
-  g_print ("Commits: %u\n", data.n_commits);
-  g_print ("Tree contents: %u\n", data.n_dirtree);
-  g_print ("Tree meta: %u\n", data.n_dirmeta);
-  g_print ("Files: %u\n", data.n_files);
+  if (!do_stats (&data, objects, cancellable, error))
+    goto out;
 
   g_print ("\n");
   g_print ("Using pack size: %" G_GUINT64_FORMAT "\n", data.pack_size);
 
-  if (opt_ls)
-    {
-      if (!do_ls (&data, cancellable, error))
-        goto out;
-    }
-  else
+  if (!cluster_objects_stupidly (&data, objects, &clusters, cancellable, error))
+    goto out;
+  
+  g_print ("Going to create %u packfiles\n", clusters->len);
+  
+  for (i = 0; i < clusters->len; i++)
     {
-      if (!cluster_objects_stupidly (&data, objects, &clusters, cancellable, error))
-        goto out;
-
-      g_print ("Going to create %u packfiles\n", clusters->len);
-
-      for (i = 0; i < clusters->len; i++)
-        {
-          GPtrArray *cluster = clusters->pdata[i];
+      GPtrArray *cluster = clusters->pdata[i];
       
-          g_print ("%u: %u objects\n", i, cluster->len);
-
-          if (!opt_analyze_only)
-            {
-              if (!create_pack_file (&data, cluster, cancellable, error))
-                goto out;
-            }
+      g_print ("%u: %u objects\n", i, cluster->len);
+      
+      if (!opt_analyze_only)
+        {
+          if (!create_pack_file (&data, cluster, cancellable, error))
+            goto out;
         }
     }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]