[ostree] repo: Add APIs for devino optimization between checkout -> commit



commit 5929ce9e0e04e5dea0f654515629bfd2dd08f9df
Author: Colin Walters <walters verbum org>
Date:   Tue Jan 5 09:13:54 2016 -0500

    repo: Add APIs for devino optimization between checkout -> commit
    
    A fast way to generate new OSTree content using an existing
    tree is to checkout (as hard links), add/replace files, then
    call `ostree_repo_scan_hardlinks()`, then commit.
    
    But `ostree_repo_scan_hardlinks()` scans the entire repo, which
    can be slow if you have a lot of content.
    
    All we really need is a mapping of (device,inode) -> checksum
    just for the objects we checked out, then use that mapping
    for commits.
    
    This patch adds API so that callers can create a mapping via
    `ostree_repo_devino_cache_new()`, then pass it to
    `ostree_repo_checkout_tree_at()` which will populate it, and then
    `ostree_repo_write_directory_to_mtree()` can consume it.
    
    I plan to use this in rpm-ostree for package layering work.
    
    Notes:
     - The old `ostree_repo_scan_hardlinks()` API still works.
     - I tweaked the cache to be a set with the checksum colocated with
       the key, to avoid a separate malloc block per entry.
    
    https://github.com/GNOME/ostree/pull/167

 doc/ostree-sections.txt              |    4 +
 src/libostree/ostree-repo-checkout.c |   56 +++++++++++++++
 src/libostree/ostree-repo-commit.c   |  129 +++++++++++++++++++++-------------
 src/libostree/ostree-repo-private.h  |    6 ++
 src/libostree/ostree-repo.h          |   12 +++-
 src/libostree/ostree-types.h         |    1 +
 6 files changed, 157 insertions(+), 51 deletions(-)
---
diff --git a/doc/ostree-sections.txt b/doc/ostree-sections.txt
index 131ee1b..d448d6d 100644
--- a/doc/ostree-sections.txt
+++ b/doc/ostree-sections.txt
@@ -276,8 +276,12 @@ ostree_repo_commit_modifier_new
 OstreeRepoCommitModifierXattrCallback
 ostree_repo_commit_modifier_set_xattr_callback
 ostree_repo_commit_modifier_set_sepolicy
+ostree_repo_commit_modifier_set_devino_cache
 ostree_repo_commit_modifier_ref
 ostree_repo_commit_modifier_unref
+ostree_repo_devino_cache_new
+ostree_repo_devino_cache_ref
+ostree_repo_devino_cache_unref
 ostree_repo_write_directory_to_mtree
 ostree_repo_write_dfd_to_mtree
 ostree_repo_write_archive_to_mtree
diff --git a/src/libostree/ostree-repo-checkout.c b/src/libostree/ostree-repo-checkout.c
index 836008d..ad309e7 100644
--- a/src/libostree/ostree-repo-checkout.c
+++ b/src/libostree/ostree-repo-checkout.c
@@ -429,6 +429,26 @@ checkout_one_file_at (OstreeRepo                        *repo,
                                            TRUE, &did_hardlink,
                                            cancellable, error))
                 goto out;
+
+              if (did_hardlink && options->devino_to_csum_cache)
+                {
+                  struct stat stbuf;
+                  OstreeDevIno *key;
+                  
+                  if (TEMP_FAILURE_RETRY (fstatat (destination_dfd, destination_name, &stbuf, 
AT_SYMLINK_NOFOLLOW)) != 0)
+                    {
+                      glnx_set_error_from_errno (error);
+                      goto out;
+                    }
+                  
+                  key = g_new (OstreeDevIno, 1);
+                  key->dev = stbuf.st_dev;
+                  key->ino = stbuf.st_ino;
+                  memcpy (key->checksum, checksum, 65);
+                  
+                  g_hash_table_add ((GHashTable*)options->devino_to_csum_cache, key);
+                }
+
               if (did_hardlink)
                 break;
             }
@@ -834,6 +854,42 @@ ostree_repo_checkout_tree_at (OstreeRepo                         *self,
   return ret;
 }
 
+static guint
+devino_hash (gconstpointer a)
+{
+  OstreeDevIno *a_i = (gpointer)a;
+  return (guint) (a_i->dev + a_i->ino);
+}
+
+static int
+devino_equal (gconstpointer   a,
+              gconstpointer   b)
+{
+  OstreeDevIno *a_i = (gpointer)a;
+  OstreeDevIno *b_i = (gpointer)b;
+  return a_i->dev == b_i->dev
+    && a_i->ino == b_i->ino;
+}
+
+/**
+ * ostree_repo_devino_cache_new:
+ * 
+ * OSTree has support for pairing ostree_repo_checkout_tree_at() using
+ * hardlinks in combination with a later
+ * ostree_repo_write_directory_to_mtree() using a (normally modified)
+ * directory.  In order for OSTree to optimally detect just the new
+ * files, use this function and fill in the `devino_to_csum_cache`
+ * member of `OstreeRepoCheckoutOptions`, then call
+ * ostree_repo_commit_set_devino_cache().
+ *
+ * Returns: (transfer full): Newly allocated cache
+ */
+OstreeRepoDevInoCache *
+ostree_repo_devino_cache_new (void)
+{
+  return (OstreeRepoDevInoCache*) g_hash_table_new_full (devino_hash, devino_equal, g_free, NULL);
+}
+
 /**
  * ostree_repo_checkout_gc:
  * @self: Repo
diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c
index 5faa25e..0919028 100644
--- a/src/libostree/ostree-repo-commit.c
+++ b/src/libostree/ostree-repo-commit.c
@@ -36,6 +36,22 @@
 #include <sys/xattr.h>
 #include <glib/gprintf.h>
 
+struct OstreeRepoCommitModifier {
+  volatile gint refcount;
+
+  OstreeRepoCommitModifierFlags flags;
+  OstreeRepoCommitFilter filter;
+  gpointer user_data;
+  GDestroyNotify destroy_notify;
+
+  OstreeRepoCommitModifierXattrCallback xattr_callback;
+  GDestroyNotify xattr_destroy;
+  gpointer xattr_user_data;
+
+  OstreeSePolicy *sepolicy;
+  GHashTable *devino_cache;
+};
+
 gboolean
 _ostree_repo_ensure_loose_objdir_at (int             dfd,
                                      const char     *loose_path,
@@ -936,28 +952,6 @@ write_object (OstreeRepo         *self,
   return ret;
 }
 
-typedef struct {
-  dev_t dev;
-  ino_t ino;
-} OstreeDevIno;
-
-static guint
-devino_hash (gconstpointer a)
-{
-  OstreeDevIno *a_i = (gpointer)a;
-  return (guint) (a_i->dev + a_i->ino);
-}
-
-static int
-devino_equal (gconstpointer   a,
-              gconstpointer   b)
-{
-  OstreeDevIno *a_i = (gpointer)a;
-  OstreeDevIno *b_i = (gpointer)b;
-  return a_i->dev == b_i->dev
-    && a_i->ino == b_i->ino;
-}
-
 static gboolean
 scan_one_loose_devino (OstreeRepo                     *self,
                        int                             object_dir_fd,
@@ -998,7 +992,6 @@ scan_one_loose_devino (OstreeRepo                     *self,
           OstreeDevIno *key;
           struct dirent *child_dent;
           const char *dot;
-          GString *checksum;
           gboolean skip;
           const char *name;
 
@@ -1039,14 +1032,14 @@ scan_one_loose_devino (OstreeRepo                     *self,
               goto out;
             }
 
-          checksum = g_string_new (dent->d_name);
-          g_string_append_len (checksum, name, 62);
-          
           key = g_new (OstreeDevIno, 1);
           key->dev = stbuf.st_dev;
           key->ino = stbuf.st_ino;
+          memcpy (key->checksum, dent->d_name, 2);
+          memcpy (key->checksum + 2, name, 62);
+          key->checksum[sizeof(key->checksum)-1] = '\0';
           
-          g_hash_table_replace (devino_cache, key, g_string_free (checksum, FALSE));
+          g_hash_table_add (devino_cache, key);
         }
     }
 
@@ -1087,17 +1080,27 @@ scan_loose_devino (OstreeRepo                     *self,
 
 static const char *
 devino_cache_lookup (OstreeRepo           *self,
+                     OstreeRepoCommitModifier *modifier,
                      guint32               device,
                      guint32               inode)
 {
-  OstreeDevIno dev_ino;
+  OstreeDevIno dev_ino_key;
+  OstreeDevIno *dev_ino_val;
+  GHashTable *cache;
 
-  if (!self->loose_object_devino_hash)
+  if (self->loose_object_devino_hash)
+    cache = self->loose_object_devino_hash;
+  else if (modifier && modifier->devino_cache)
+    cache = modifier->devino_cache;
+  else
     return NULL;
 
-  dev_ino.dev = device;
-  dev_ino.ino = inode;
-  return g_hash_table_lookup (self->loose_object_devino_hash, &dev_ino);
+  dev_ino_key.dev = device;
+  dev_ino_key.ino = inode;
+  dev_ino_val = g_hash_table_lookup (cache, &dev_ino_key);
+  if (!dev_ino_val)
+    return NULL;
+  return dev_ino_val->checksum;
 }
 
 /**
@@ -1127,7 +1130,7 @@ ostree_repo_scan_hardlinks (OstreeRepo    *self,
   g_return_val_if_fail (self->in_transaction == TRUE, FALSE);
 
   if (!self->loose_object_devino_hash)
-    self->loose_object_devino_hash = g_hash_table_new_full (devino_hash, devino_equal, g_free, g_free);
+    self->loose_object_devino_hash = (GHashTable*)ostree_repo_devino_cache_new ();
   g_hash_table_remove_all (self->loose_object_devino_hash);
   if (!scan_loose_devino (self, self->loose_object_devino_hash, cancellable, error))
     goto out;
@@ -2231,21 +2234,6 @@ create_tree_variant_from_hashes (GHashTable            *file_checksums,
   return serialized_tree;
 }
 
-struct OstreeRepoCommitModifier {
-  volatile gint refcount;
-
-  OstreeRepoCommitModifierFlags flags;
-  OstreeRepoCommitFilter filter;
-  gpointer user_data;
-  GDestroyNotify destroy_notify;
-
-  OstreeRepoCommitModifierXattrCallback xattr_callback;
-  GDestroyNotify xattr_destroy;
-  gpointer xattr_user_data;
-
-  OstreeSePolicy *sepolicy;
-};
-
 OstreeRepoCommitFilterResult
 _ostree_repo_commit_modifier_apply (OstreeRepo               *self,
                                     OstreeRepoCommitModifier *modifier,
@@ -2503,7 +2491,7 @@ write_directory_content_to_mtree_internal (OstreeRepo                  *self,
       g_autofree guchar *child_file_csum = NULL;
       g_autofree char *tmp_checksum = NULL;
 
-      loose_checksum = devino_cache_lookup (self,
+      loose_checksum = devino_cache_lookup (self, modifier,
                                             g_file_info_get_attribute_uint32 (child_info, "unix::device"),
                                             g_file_info_get_attribute_uint64 (child_info, "unix::inode"));
 
@@ -2757,7 +2745,7 @@ write_dfd_iter_to_mtree_internal (OstreeRepo                  *self,
           goto out;
         }
 
-      loose_checksum = devino_cache_lookup (self, stbuf.st_dev, stbuf.st_ino);
+      loose_checksum = devino_cache_lookup (self, modifier, stbuf.st_dev, stbuf.st_ino);
       if (loose_checksum)
         {
           if (!ostree_mutable_tree_replace_file (mtree, dent->d_name, loose_checksum,
@@ -3030,6 +3018,7 @@ ostree_repo_commit_modifier_unref (OstreeRepoCommitModifier *modifier)
     modifier->xattr_destroy (modifier->xattr_user_data);
 
   g_clear_object (&modifier->sepolicy);
+  g_clear_pointer (&modifier->devino_cache, (GDestroyNotify)g_hash_table_unref);
 
   g_free (modifier);
   return;
@@ -3080,6 +3069,46 @@ ostree_repo_commit_modifier_set_sepolicy (OstreeRepoCommitModifier
   modifier->sepolicy = sepolicy ? g_object_ref (sepolicy) : NULL;
 }
 
+/**
+ * ostree_repo_commit_modifier_set_devino_cache:
+ * @modifier: Modifier
+ * @cache: A hash table caching device,inode to checksums
+ *
+ * See the documentation for
+ * `ostree_repo_devino_cache_new()`.  This function can
+ * then be used for later calls to
+ * `ostree_repo_write_directory_to_mtree()` to optimize commits.
+ *
+ * Note if your process has multiple writers, you should use separate
+ * `OSTreeRepo` instances if you want to also use this API.
+ *
+ * This function will add a reference to @cache without copying - you
+ * should avoid further mutation of the cache.
+ */
+void
+ostree_repo_commit_modifier_set_devino_cache (OstreeRepoCommitModifier              *modifier,
+                                              OstreeRepoDevInoCache                 *cache)
+{
+  modifier->devino_cache = g_hash_table_ref ((GHashTable*)cache);
+}
+
+OstreeRepoDevInoCache *
+ostree_repo_devino_cache_ref (OstreeRepoDevInoCache *cache)
+{
+  g_hash_table_ref ((GHashTable*)cache);
+  return cache;
+}
+
+void
+ostree_repo_devino_cache_unref (OstreeRepoDevInoCache *cache)
+{
+  g_hash_table_unref ((GHashTable*)cache);
+}
+
+G_DEFINE_BOXED_TYPE(OstreeRepoDevInoCache, ostree_repo_devino_cache,
+                    ostree_repo_devino_cache_ref,
+                    ostree_repo_devino_cache_unref);
+
 G_DEFINE_BOXED_TYPE(OstreeRepoCommitModifier, ostree_repo_commit_modifier,
                     ostree_repo_commit_modifier_ref,
                     ostree_repo_commit_modifier_unref);
diff --git a/src/libostree/ostree-repo-private.h b/src/libostree/ostree-repo-private.h
index 1985b2e..b6ea317 100644
--- a/src/libostree/ostree-repo-private.h
+++ b/src/libostree/ostree-repo-private.h
@@ -92,6 +92,12 @@ struct OstreeRepo {
   OstreeRepo *parent_repo;
 };
 
+typedef struct {
+  dev_t dev;
+  ino_t ino;
+  char checksum[65];
+} OstreeDevIno;
+
 gboolean
 _ostree_repo_allocate_tmpdir (int           tmpdir_dfd,
                               const char   *tmpdir_prefix,
diff --git a/src/libostree/ostree-repo.h b/src/libostree/ostree-repo.h
index 5f1b497..5bc2520 100644
--- a/src/libostree/ostree-repo.h
+++ b/src/libostree/ostree-repo.h
@@ -418,6 +418,9 @@ void ostree_repo_commit_modifier_set_xattr_callback (OstreeRepoCommitModifier
 void ostree_repo_commit_modifier_set_sepolicy (OstreeRepoCommitModifier              *modifier,
                                                OstreeSePolicy                        *sepolicy);
 
+void ostree_repo_commit_modifier_set_devino_cache (OstreeRepoCommitModifier              *modifier,
+                                                   OstreeRepoDevInoCache                 *cache);
+
 OstreeRepoCommitModifier *ostree_repo_commit_modifier_ref (OstreeRepoCommitModifier *modifier);
 void ostree_repo_commit_modifier_unref (OstreeRepoCommitModifier *modifier);
 
@@ -531,10 +534,17 @@ typedef struct {
 
   const char *subpath;
 
+  OstreeRepoDevInoCache *devino_to_csum_cache;
+
   guint unused_uints[6];
-  gpointer unused_ptrs[8];
+  gpointer unused_ptrs[7];
 } OstreeRepoCheckoutOptions;
 
+GType ostree_repo_devino_cache_get_type (void);
+OstreeRepoDevInoCache *ostree_repo_devino_cache_new (void);
+OstreeRepoDevInoCache * ostree_repo_devino_cache_ref (OstreeRepoDevInoCache *cache);
+void ostree_repo_devino_cache_unref (OstreeRepoDevInoCache *cache);
+
 gboolean ostree_repo_checkout_tree_at (OstreeRepo                         *self,
                                        OstreeRepoCheckoutOptions          *options,
                                        int                                 destination_dfd,
diff --git a/src/libostree/ostree-types.h b/src/libostree/ostree-types.h
index 639b71f..691f128 100644
--- a/src/libostree/ostree-types.h
+++ b/src/libostree/ostree-types.h
@@ -27,6 +27,7 @@
 G_BEGIN_DECLS
 
 typedef struct OstreeRepo OstreeRepo;
+typedef struct OstreeRepoDevInoCache OstreeRepoDevInoCache;
 typedef struct OstreeSePolicy OstreeSePolicy;
 typedef struct OstreeSysroot OstreeSysroot;
 typedef struct OstreeSysrootUpgrader OstreeSysrootUpgrader;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]