[ostree] repo: Call fdatasync() before adding objects to the repo



commit 2396608754060e9e7a68843b65e4d3b8f4a4b5f2
Author: Colin Walters <walters verbum org>
Date:   Mon Aug 27 11:53:06 2012 -0400

    repo: Call fdatasync() before adding objects to the repo
    
    I run builds on my laptop, but it also crashes about 1/4 of the time
    while suspending.  It's definitely undesrirable to get e.g. empty
    .dirtree objects because they corrupt builds.  Concretely, I was
    getting empty contents committed for xorg-util-macros.
    
    Now, we used to write out temporary files using g_file_replace() which
    does a fsync() during close, but then switched to a more "manual"
    g_file_append_to().
    
    We could switch back to g_file_replace(), but the problem is, we don't
    want to call fsync() on temporary files in the case where we already
    have the object.  Attempting to add an object we already have is a
    *very* common case.
    
    This is both the old and new code sequence for the case where an
    object is already stored:
    
    open(temp, O_WRONLY)
    write() write() write()
    close()
    lstat(objects/3a/9fe332...) = 0
    unlink(temp)
    
    In the *new* code, here's the case where an object *isn't* stored:
    
    open(temp, O_WRONLY)
    write() write() write()
    close()
    lstat(objects/3a/9fe332...) = -1
    open(temp, O_RDONLY)
    fdatasync()
    close()
    rename(temp, objects/3a/9fe332)
    
    Compare with the *old* code path for when an object isn't stored:
    
    open(temp, O_WRONLY)
    write() write() write()
    close()
    lstat(objects/3a/9fe332...) = -1
    link(temp, objects/3a/9fe332)
    unlink(temp)
    
    The problem with this is we really need to fdatasync().  Also doing
    just rename() instead of the weird link()/unlink() helps us express to
    the filesystem that we want atomic semantics.  For example, BTRFS has
    special handling for rename().

 src/libostree/ostree-repo.c   |   66 ++++++++++++++++++++++++++++++++++++-----
 src/libotutil/ot-unix-utils.c |   41 +++++++++++++++++++++++++
 src/libotutil/ot-unix-utils.h |   10 ++++++
 3 files changed, 109 insertions(+), 8 deletions(-)
---
diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c
index c74c095..ab41e30 100644
--- a/src/libostree/ostree-repo.c
+++ b/src/libostree/ostree-repo.c
@@ -30,6 +30,7 @@
 
 #include <gio/gunixoutputstream.h>
 #include <gio/gunixinputstream.h>
+#include <glib/gstdio.h>
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -797,10 +798,46 @@ ostree_repo_get_archive_content_path (OstreeRepo    *self,
   return g_file_resolve_relative_path (self->repodir, path);
 }
 
+/**
+ * ensure_file_data_synced:
+ *
+ * Ensure that in case of a power cut, these files have the data we
+ * want.   See http://lwn.net/Articles/322823/
+ */
+static gboolean
+ensure_file_data_synced (GFile         *file,
+                         GCancellable  *cancellable,
+                         GError       **error)
+{
+  gboolean ret = FALSE;
+  int fd = -1;
+
+  fd = g_open (ot_gfile_get_path_cached (file), O_RDONLY | O_NOATIME | O_CLOEXEC | O_LARGEFILE, 0);
+  if (fd < 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      goto out;
+    }
+
+  if (!ot_unix_fdatasync (fd, error))
+    goto out;
+
+  if (!ot_unix_close (fd, error))
+    goto out;
+  fd = -1;
+
+  ret = TRUE;
+ out:
+  if (fd != -1)
+    (void) close (fd);
+  return ret;
+}
+
 static gboolean
 commit_loose_object_impl (OstreeRepo        *self,
                           GFile             *tempfile_path,
                           GFile             *dest,
+                          gboolean           is_regular,
                           GCancellable      *cancellable,
                           GError           **error)
 {
@@ -810,8 +847,14 @@ commit_loose_object_impl (OstreeRepo        *self,
   parent = g_file_get_parent (dest);
   if (!ot_gfile_ensure_directory (parent, FALSE, error))
     goto out;
+
+  if (is_regular)
+    {
+      if (!ensure_file_data_synced (tempfile_path, cancellable, error))
+        goto out;
+    }
   
-  if (link (ot_gfile_get_path_cached (tempfile_path), ot_gfile_get_path_cached (dest)) < 0)
+  if (rename (ot_gfile_get_path_cached (tempfile_path), ot_gfile_get_path_cached (dest)) < 0)
     {
       if (errno != EEXIST)
         {
@@ -822,7 +865,6 @@ commit_loose_object_impl (OstreeRepo        *self,
         }
     }
 
-  (void) unlink (ot_gfile_get_path_cached (tempfile_path));
   ret = TRUE;
  out:
   return ret;
@@ -833,6 +875,7 @@ commit_loose_object_trusted (OstreeRepo        *self,
                              const char        *checksum,
                              OstreeObjectType   objtype,
                              GFile             *tempfile_path,
+                             gboolean           is_regular,
                              GCancellable      *cancellable,
                              GError           **error)
 {
@@ -841,7 +884,7 @@ commit_loose_object_trusted (OstreeRepo        *self,
 
   dest_file = ostree_repo_get_object_path (self, checksum, objtype);
 
-  if (!commit_loose_object_impl (self, tempfile_path, dest_file,
+  if (!commit_loose_object_impl (self, tempfile_path, dest_file, is_regular,
                                  cancellable, error))
     goto out;
 
@@ -879,6 +922,7 @@ stage_object_internal (OstreeRepo         *self,
   GChecksum *checksum = NULL;
   gboolean staged_raw_file = FALSE;
   gboolean staged_archive_file = FALSE;
+  gboolean temp_file_is_regular;
 
   if (out_csum)
     {
@@ -900,6 +944,8 @@ stage_object_internal (OstreeRepo         *self,
                                         cancellable, error))
         goto out;
 
+      temp_file_is_regular = g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR;
+
       if (ostree_repo_get_mode (self) == OSTREE_REPO_MODE_BARE)
         {
           if (!ostree_create_temp_file_from_input (self->tmp_dir,
@@ -973,8 +1019,9 @@ stage_object_internal (OstreeRepo         *self,
                                                &temp_file,
                                                cancellable, error))
         goto out;
+      temp_file_is_regular = TRUE;
     }
-          
+
   if (!checksum)
     actual_checksum = expected_checksum;
   else
@@ -1012,6 +1059,7 @@ stage_object_internal (OstreeRepo         *self,
           ot_lobj GInputStream *file_input = NULL;
           ot_lobj GFileInfo *file_info = NULL;
           ot_lvariant GVariant *xattrs = NULL;
+          gboolean is_regular;
               
           if (!ostree_content_file_parse (temp_file, FALSE, &file_input,
                                           &file_info, &xattrs,
@@ -1025,8 +1073,10 @@ stage_object_internal (OstreeRepo         *self,
                                                    cancellable, error))
             goto out;
 
+          is_regular = g_file_info_get_file_type (file_info) == G_FILE_TYPE_REGULAR;
+
           if (!commit_loose_object_trusted (self, actual_checksum, objtype, 
-                                            raw_temp_file, cancellable, error))
+                                            raw_temp_file, is_regular, cancellable, error))
             goto out;
           g_clear_object (&raw_temp_file);
         }
@@ -1039,13 +1089,13 @@ stage_object_internal (OstreeRepo         *self,
 
               archive_content_dest = ostree_repo_get_archive_content_path (self, actual_checksum);
                                                                    
-              if (!commit_loose_object_impl (self, raw_temp_file, archive_content_dest,
+              if (!commit_loose_object_impl (self, raw_temp_file, archive_content_dest, TRUE,
                                              cancellable, error))
                 goto out;
               g_clear_object (&raw_temp_file);
             }
-          if (!commit_loose_object_trusted (self, actual_checksum, objtype, 
-                                            temp_file, cancellable, error))
+          if (!commit_loose_object_trusted (self, actual_checksum, objtype, temp_file, temp_file_is_regular,
+                                            cancellable, error))
             goto out;
           g_clear_object (&temp_file);
         }
diff --git a/src/libotutil/ot-unix-utils.c b/src/libotutil/ot-unix-utils.c
index ee8f039..600082d 100644
--- a/src/libotutil/ot-unix-utils.c
+++ b/src/libotutil/ot-unix-utils.c
@@ -31,6 +31,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <errno.h>
 #include <dirent.h>
 
 gboolean
@@ -184,3 +185,43 @@ ot_util_fatal_gerror (GError *error)
   g_assert (error != NULL);
   ot_util_fatal_literal (error->message);
 }
+
+/**
+ * ot_unix_fdatasync:
+ *
+ * Like fdatasync(), but uses #GError, and handles EINTR.
+ */
+gboolean
+ot_unix_fdatasync (int fd, GError **error)
+{
+  int result;
+  do
+    result = fdatasync (fd);
+  while (G_UNLIKELY (result != 0 && errno == EINTR));
+  if (result != 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/**
+ * ot_unix_close:
+ *
+ * Like close(), but uses #GError, and handles EINTR.
+ */
+gboolean
+ot_unix_close (int fd, GError **error)
+{
+  int result;
+  do
+    result = close (fd);
+  while (G_UNLIKELY (result != 0 && errno == EINTR));
+  if (result != 0)
+    {
+      ot_util_set_error_from_errno (error, errno);
+      return FALSE;
+    }
+  return TRUE;
+}
diff --git a/src/libotutil/ot-unix-utils.h b/src/libotutil/ot-unix-utils.h
index 58dd59b..2022dea 100644
--- a/src/libotutil/ot-unix-utils.h
+++ b/src/libotutil/ot-unix-utils.h
@@ -49,6 +49,16 @@ gboolean ot_util_path_split_validate (const char *path, GPtrArray **out_componen
 
 void ot_util_set_error_from_errno (GError **error, gint saved_errno);
 
+gboolean ot_unix_open (const char *path, 
+                       int         flags,
+                       mode_t      mode,
+                       int        *out_fd,
+                       GError    **error);
+
+gboolean ot_unix_fdatasync (int fd, GError **error);
+
+gboolean ot_unix_close (int fd, GError **error);
+
 G_END_DECLS
 
 #endif



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]