[ostree] core: Add dynamic uncompressed object cache for archive-z



commit ee73c0fa3487fe084c1b9b3d4e1fbb5c8225ae34
Author: Colin Walters <walters verbum org>
Date:   Thu Oct 11 18:33:03 2012 -0400

    core: Add dynamic uncompressed object cache for archive-z
    
    This gives us something closer to the advantages of archive and
    archive-z when using the latter.  Concretely we get deduplication
    among multiple checkouts, along with the "devino" hash table trick
    during commits to avoid checksumming content again.
    
    This is enabled by default.

 src/libostree/ostree-repo.c      |  222 +++++++++++++++++++++++++++++++++++---
 src/libostree/ostree-repo.h      |    4 +
 src/libotutil/ot-gio-utils.c     |    6 +
 src/ostree/ot-builtin-checkout.c |    3 +
 4 files changed, 220 insertions(+), 15 deletions(-)
---
diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c
index 4081b0b..548955b 100644
--- a/src/libostree/ostree-repo.c
+++ b/src/libostree/ostree-repo.c
@@ -50,6 +50,7 @@ struct OstreeRepo {
   GFile *local_heads_dir;
   GFile *remote_heads_dir;
   GFile *objects_dir;
+  GFile *uncompressed_objects_dir;
   GFile *remote_cache_dir;
   GFile *config_file;
 
@@ -64,9 +65,11 @@ struct OstreeRepo {
   gboolean inited;
   gboolean in_transaction;
   GHashTable *loose_object_devino_hash;
+  GHashTable *updated_uncompressed_dirs;
 
   GKeyFile *config;
   OstreeRepoMode mode;
+  gboolean enable_uncompressed_cache;
 
   OstreeRepo *parent_repo;
 };
@@ -104,10 +107,13 @@ ostree_repo_finalize (GObject *object)
   g_clear_object (&self->local_heads_dir);
   g_clear_object (&self->remote_heads_dir);
   g_clear_object (&self->objects_dir);
+  g_clear_object (&self->uncompressed_objects_dir);
   g_clear_object (&self->remote_cache_dir);
   g_clear_object (&self->config_file);
   if (self->loose_object_devino_hash)
     g_hash_table_destroy (self->loose_object_devino_hash);
+  if (self->updated_uncompressed_dirs)
+    g_hash_table_destroy (self->updated_uncompressed_dirs);
   if (self->config)
     g_key_file_free (self->config);
   g_clear_pointer (&self->cached_meta_indexes, (GDestroyNotify) g_ptr_array_unref);
@@ -177,6 +183,7 @@ ostree_repo_constructor (GType                  gtype,
   self->remote_heads_dir = g_file_resolve_relative_path (self->repodir, "refs/remotes");
   
   self->objects_dir = g_file_get_child (self->repodir, "objects");
+  self->uncompressed_objects_dir = g_file_get_child (self->repodir, "uncompressed-objects-cache");
   self->remote_cache_dir = g_file_get_child (self->repodir, "remote-cache");
   self->config_file = g_file_get_child (self->repodir, "config");
 
@@ -674,6 +681,10 @@ ostree_repo_check (OstreeRepo *self, GError **error)
         }
     }
 
+  if (!ot_keyfile_get_boolean_with_default (self->config, "core", "enable-uncompressed-cache",
+                                            TRUE, &self->enable_uncompressed_cache, error))
+    goto out;
+
   self->inited = TRUE;
   
   ret = TRUE;
@@ -1094,18 +1105,35 @@ get_loose_object_dirs (OstreeRepo       *self,
 {
   gboolean ret = FALSE;
   GError *temp_error = NULL;
+  GFile *object_dir_to_scan;
   ot_lptrarray GPtrArray *ret_object_dirs = NULL;
   ot_lobj GFileEnumerator *enumerator = NULL;
   ot_lobj GFileInfo *file_info = NULL;
 
   ret_object_dirs = g_ptr_array_new_with_free_func ((GDestroyNotify)g_object_unref);
 
-  enumerator = g_file_enumerate_children (self->objects_dir, OSTREE_GIO_FAST_QUERYINFO, 
+  if (ostree_repo_get_mode (self) == OSTREE_REPO_MODE_ARCHIVE_Z)
+    object_dir_to_scan = self->uncompressed_objects_dir;
+  else
+    object_dir_to_scan = self->objects_dir;
+
+  enumerator = g_file_enumerate_children (object_dir_to_scan, OSTREE_GIO_FAST_QUERYINFO, 
                                           G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
                                           cancellable, 
-                                          error);
+                                          &temp_error);
   if (!enumerator)
-    goto out;
+    {
+      if (g_error_matches (temp_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND))
+        {
+          g_clear_error (&temp_error);
+          ret = TRUE;
+          ot_transfer_out_value (out_object_dirs, &ret_object_dirs);
+        }
+      else
+        g_propagate_error (error, temp_error);
+
+      goto out;
+    }
 
   while ((file_info = g_file_enumerator_next_file (enumerator, cancellable, &temp_error)) != NULL)
     {
@@ -1178,8 +1206,6 @@ scan_loose_devino (OstreeRepo                     *self,
     }
 
   repo_mode = ostree_repo_get_mode (self);
-  if (repo_mode == OSTREE_REPO_MODE_ARCHIVE_Z)
-    return TRUE;
 
   if (!get_loose_object_dirs (self, &object_dirs, cancellable, error))
     goto out;
@@ -1207,6 +1233,7 @@ scan_loose_devino (OstreeRepo                     *self,
           guint32 type;
           OstreeDevIno *key;
           GString *checksum;
+          gboolean skip;
 
           name = g_file_info_get_attribute_byte_string (file_info, "standard::name"); 
           type = g_file_info_get_attribute_uint32 (file_info, "standard::type");
@@ -1217,14 +1244,20 @@ scan_loose_devino (OstreeRepo                     *self,
               continue;
             }
       
-          if (!((repo_mode == OSTREE_REPO_MODE_ARCHIVE
-                 && g_str_has_suffix (name, ".filecontent"))
-                || (repo_mode == OSTREE_REPO_MODE_BARE
-                    && g_str_has_suffix (name, ".file"))))
+          switch (repo_mode)
             {
-              g_clear_object (&file_info);
-              continue;
+            case OSTREE_REPO_MODE_ARCHIVE:
+              skip = !g_str_has_suffix (name, ".filecontent");
+              break;
+            case OSTREE_REPO_MODE_ARCHIVE_Z:
+              skip = !g_str_has_suffix (name, ".filez");
+              break;
+            case OSTREE_REPO_MODE_BARE:
+              skip = !g_str_has_suffix (name, ".file");
+              break;
             }
+          if (skip)
+            continue;
 
           dot = strrchr (name, '.');
           g_assert (dot);
@@ -1517,6 +1550,20 @@ ostree_repo_get_object_path (OstreeRepo       *self,
   return ret;
 }
 
+static GFile *
+get_uncompressed_object_cache_path (OstreeRepo       *self,
+                                    const char       *checksum)
+{
+  char *relpath;
+  GFile *ret;
+
+  relpath = ostree_get_relative_object_path (checksum, OSTREE_OBJECT_TYPE_FILE, FALSE);
+  ret = g_file_resolve_relative_path (self->uncompressed_objects_dir, relpath);
+  g_free (relpath);
+ 
+  return ret;
+}
+
 /**
  * ostree_repo_stage_content_trusted:
  *
@@ -3208,10 +3255,29 @@ find_loose_for_checkout (OstreeRepo             *self,
 
   do
     {
-      if (self->mode == OSTREE_REPO_MODE_BARE)
-        path = ostree_repo_get_object_path (self, checksum, OSTREE_OBJECT_TYPE_FILE);
-      else
-        path = ostree_repo_get_archive_content_path (self, checksum);
+      switch (self->mode)
+        {
+        case OSTREE_REPO_MODE_BARE:
+          path = ostree_repo_get_object_path (self, checksum, OSTREE_OBJECT_TYPE_FILE);
+          break;
+        case OSTREE_REPO_MODE_ARCHIVE:
+          path = ostree_repo_get_archive_content_path (self, checksum);
+          break;
+        case OSTREE_REPO_MODE_ARCHIVE_Z:
+          {
+            if (self->enable_uncompressed_cache)
+              path = get_uncompressed_object_cache_path (self, checksum);
+            else
+              path = NULL;
+          }
+          break;
+        }
+
+      if (!path)
+        {
+          self = self->parent_repo;
+          continue;
+        }
 
       if (lstat (ot_gfile_get_path_cached (path), &stbuf) < 0)
         {
@@ -3279,6 +3345,7 @@ checkout_file_thread (GSimpleAsyncResult     *result,
                       GCancellable           *cancellable)
 {
   const char *checksum;
+  OstreeRepo *repo;
   gboolean hardlink_supported;
   GError *local_error = NULL;
   GError **error = &local_error;
@@ -3288,6 +3355,7 @@ checkout_file_thread (GSimpleAsyncResult     *result,
   CheckoutOneFileAsyncData *checkout_data;
 
   checkout_data = g_simple_async_result_get_op_res_gpointer (result);
+  repo = checkout_data->repo;
 
   /* Hack to avoid trying to create device files as a user */
   if (checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER
@@ -3296,15 +3364,72 @@ checkout_file_thread (GSimpleAsyncResult     *result,
 
   checksum = ostree_repo_file_get_checksum ((OstreeRepoFile*)checkout_data->source);
 
+  /* We can only do hardlinks in these scenarios */
   if ((checkout_data->repo->mode == OSTREE_REPO_MODE_BARE
        && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_NONE)
       || (checkout_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE
+          && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER)
+      || (checkout_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE_Z
           && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER))
     {
       if (!find_loose_for_checkout (checkout_data->repo, checksum, &loose_path,
                                     cancellable, error))
         goto out;
     }
+  /* Also, if we're archive-z and we didn't find an object, uncompress it now,
+   * stick it in the cache, and then hardlink to that.
+   */
+  if (loose_path == NULL
+      && repo->mode == OSTREE_REPO_MODE_ARCHIVE_Z
+      && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER
+      && repo->enable_uncompressed_cache)
+    {
+      ot_lobj GFile *objdir = NULL;
+
+      loose_path = get_uncompressed_object_cache_path (repo, checksum);
+      if (!ostree_repo_load_file (repo, checksum, &input, NULL, &xattrs,
+                                  cancellable, error))
+        goto out;
+
+      objdir = g_file_get_parent (loose_path);
+      if (!ot_gfile_ensure_directory (objdir, TRUE, error))
+        goto out;
+
+      /* Use UNION_FILES to make this last-one-wins thread behavior
+       * for now; we lose deduplication potentially, but oh well
+       */ 
+      if (!checkout_file_from_input (loose_path,
+                                     OSTREE_REPO_CHECKOUT_MODE_USER,
+                                     OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES,
+                                     checkout_data->source_info, xattrs, 
+                                     input, cancellable, error))
+        goto out;
+
+      /* Store the 2-byte objdir prefix (e.g. e3) in a set.  The basic
+       * idea here is that if we had to unpack an object, it's very
+       * likely we're replacing some other object, so we may need a GC.
+       *
+       * This model ensures that we do work roughly proportional to
+       * the size of the changes.  For example, we don't scan any
+       * directories if we didn't modify anything, meaning you can
+       * checkout the same tree multiple times very quickly.
+       *
+       * This is also scale independent; we don't hardcode e.g. looking
+       * at 1000 objects.
+       *
+       * The downside is that if we're unlucky, we may not free
+       * an object for quite some time.
+       */
+      g_mutex_lock (&repo->cache_lock);
+      {
+        gpointer key = GUINT_TO_POINTER ((g_ascii_xdigit_value (checksum[0]) << 4) + 
+                                         g_ascii_xdigit_value (checksum[1]));
+        if (repo->updated_uncompressed_dirs == NULL)
+          repo->updated_uncompressed_dirs = g_hash_table_new (NULL, NULL);
+        g_hash_table_insert (repo->updated_uncompressed_dirs, key, key);
+      }
+      g_mutex_unlock (&repo->cache_lock);
+    }
 
   if (loose_path)
     {
@@ -3615,6 +3740,73 @@ ostree_repo_checkout_tree_finish (OstreeRepo               *self,
   return TRUE;
 }
 
+/**
+ * ostree_repo_checkout_gc:
+ *
+ * Call this after finishing a succession of checkout operations; it
+ * will delete any currently-unused uncompressed objects from the
+ * cache.
+ */
+gboolean
+ostree_repo_checkout_gc (OstreeRepo        *self,
+                         GCancellable      *cancellable,
+                         GError           **error)
+{
+  gboolean ret = FALSE;
+  ot_lhash GHashTable *to_clean_dirs = NULL;
+  GHashTableIter iter;
+  gpointer key, value;
+
+  g_mutex_lock (&self->cache_lock);
+  to_clean_dirs = self->updated_uncompressed_dirs;
+  self->updated_uncompressed_dirs = g_hash_table_new (NULL, NULL);
+  g_mutex_unlock (&self->cache_lock);
+
+  if (to_clean_dirs)
+    g_hash_table_iter_init (&iter, to_clean_dirs);
+  while (to_clean_dirs && g_hash_table_iter_next (&iter, &key, &value))
+    {
+      GError *temp_error = NULL;
+      ot_lobj GFile *objdir = NULL;
+      ot_lobj GFileInfo *file_info = NULL;
+      ot_lobj GFileEnumerator *enumerator = NULL;
+      ot_lfree char *objdir_name = NULL;
+
+      objdir_name = g_strdup_printf ("%02x", GPOINTER_TO_UINT (key));
+      objdir = ot_gfile_get_child_build_path (self->uncompressed_objects_dir, "objects",
+                                              objdir_name, NULL);
+
+      enumerator = g_file_enumerate_children (objdir, "standard::name,standard::type,unix::inode,unix::nlink", 
+                                              G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+                                              cancellable, 
+                                              error);
+      if (!enumerator)
+        goto out;
+  
+      while ((file_info = g_file_enumerator_next_file (enumerator, cancellable, &temp_error)) != NULL)
+        {
+          guint32 nlinks = g_file_info_get_attribute_uint32 (file_info, "unix::nlink");
+          if (nlinks == 1)
+            {
+              ot_lobj GFile *objpath = NULL;
+              objpath = ot_gfile_get_child_build_path (objdir, g_file_info_get_name (file_info), NULL);
+              if (!ot_gfile_unlink (objpath, cancellable, error))
+                goto out;
+            }
+          g_object_unref (file_info);
+        }
+      if (temp_error != NULL)
+        {
+          g_propagate_error (error, temp_error);
+          goto out;
+        }
+    }
+
+  ret = TRUE;
+ out:
+  return ret;
+}
+
 gboolean
 ostree_repo_read_commit (OstreeRepo *self,
                          const char *rev, 
diff --git a/src/libostree/ostree-repo.h b/src/libostree/ostree-repo.h
index 0ed4c2b..baed3ce 100644
--- a/src/libostree/ostree-repo.h
+++ b/src/libostree/ostree-repo.h
@@ -281,6 +281,10 @@ ostree_repo_checkout_tree_finish (OstreeRepo               *self,
                                   GAsyncResult             *result,
                                   GError                  **error);
 
+gboolean       ostree_repo_checkout_gc (OstreeRepo        *self,
+                                        GCancellable      *cancellable,
+                                        GError           **error);
+
 gboolean       ostree_repo_read_commit (OstreeRepo *self,
                                         const char *rev,
                                         GFile       **out_root,
diff --git a/src/libotutil/ot-gio-utils.c b/src/libotutil/ot-gio-utils.c
index 71ab383..31ea3fa 100644
--- a/src/libotutil/ot-gio-utils.c
+++ b/src/libotutil/ot-gio-utils.c
@@ -57,6 +57,7 @@ ot_gfile_ensure_directory (GFile     *dir,
   gboolean ret = FALSE;
   GError *temp_error = NULL;
 
+ again:
   if (with_parents)
     ret = g_file_make_directory_with_parents (dir, NULL, &temp_error);
   else
@@ -71,6 +72,11 @@ ot_gfile_ensure_directory (GFile     *dir,
       else
         g_clear_error (&temp_error);
     }
+  /* Work around glib bug where if multiple threads/processes race in
+   * _with_parents, it can error out early
+   */
+  if (with_parents && !g_file_query_exists (dir, NULL))
+    goto again;
 
   ret = TRUE;
  out:
diff --git a/src/ostree/ot-builtin-checkout.c b/src/ostree/ot-builtin-checkout.c
index 2b63851..433ff0e 100644
--- a/src/ostree/ot-builtin-checkout.c
+++ b/src/ostree/ot-builtin-checkout.c
@@ -280,6 +280,9 @@ ostree_builtin_checkout (int argc, char **argv, GFile *repo_path, GError **error
         }
     }
 
+  if (!ostree_repo_checkout_gc (repo, cancellable, error))
+    goto out;
+
   ret = TRUE;
  out:
   if (context)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]