[PATCH] Add --disable-fsync option to both commit and pull (non-local) commands.



 I've been looking at the performance problem ostree has with large
numbers of small files, where commits and pull's from remote
repositories take a _huge_ amount of time.
 After some digging it seemed that the fsync calls where basically the
only problem, so I wrote a patch so you can remove them. This is a giant
hammer though, and doesn't fix the default/new user experience.

 I also wrote some test code, to write 10k small files, so it would be
easier to try different things ... and for others to do similar tests
(all my testing was done on traditional rotational media).

 Rough numbers:

  ostree default (fsync after every write):
./write-files.py sync = ~7.5 minutes

  no fsync, similar to --disable-fsync
./write-files.py fast = 0.3 of second (yes)

  batch fsync calls up into groups of 222
./write-files.py gsync = ~7:30

  batch fsync calls up into groups of 222, and dir sync first
./write-files.py dsync = ~7:30

  batch fsync calls up into groups of 222, and dir sync first,
  and just after the write call fork and fsync in the child.
./write-files.py psync = ~6 seconds

...git pull/commit defaults to the --disable-fsync behaviour (this
should be obvious from just timing it ;). However while the later is
still ~18x slower than no fsync, it's a huge improvement and might be
possible to implement if we need the fsyncs to stay around by default.



From 165cda52491357cbdaa9fdb5c62478d2ed45e8a5 Mon Sep 17 00:00:00 2001
From: James Antill <james and org>
Date: Mon, 2 Jun 2014 16:31:58 -0400
Subject: [PATCH] Add --disable-fsync option to both commit and pull
 (non-local).

---
 src/ostree/ot-builtin-commit.c |    4 ++++
 src/ostree/ot-builtin-pull.c   |    5 +++++
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/src/ostree/ot-builtin-commit.c b/src/ostree/ot-builtin-commit.c
index 6f63205..fb11a05 100644
--- a/src/ostree/ot-builtin-commit.c
+++ b/src/ostree/ot-builtin-commit.c
@@ -46,6 +46,7 @@ static char **opt_key_ids;
 static char *opt_gpg_homedir;
 #endif
 static gboolean opt_generate_sizes;
+static gboolean opt_disable_fsync;
 
 static GOptionEntry options[] = {
   { "subject", 's', 0, G_OPTION_ARG_STRING, &opt_subject, "One line subject", "subject" },
@@ -67,6 +68,7 @@ static GOptionEntry options[] = {
   { "gpg-homedir", 0, 0, G_OPTION_ARG_STRING, &opt_gpg_homedir, "GPG Homedir to use when looking for 
keyrings", "homedir"},
 #endif
   { "generate-sizes", 0, 0, G_OPTION_ARG_NONE, &opt_generate_sizes, "Generate size information along with 
commit metadata", NULL },
+  { "disable-fsync", 0, 0, G_OPTION_ARG_NONE, &opt_disable_fsync, "Do not invoke fsync()", NULL },
   { NULL }
 };
 
@@ -326,6 +328,8 @@ ostree_builtin_commit (int argc, char **argv, OstreeRepo *repo, GCancellable *ca
     flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_SKIP_XATTRS;
   if (opt_generate_sizes)
     flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_GENERATE_SIZES;
+  if (opt_disable_fsync)
+    ostree_repo_set_disable_fsync (repo, TRUE);
 
   if (flags != 0
       || opt_owner_uid >= 0
diff --git a/src/ostree/ot-builtin-pull.c b/src/ostree/ot-builtin-pull.c
index a29bbb7..f600b00 100644
--- a/src/ostree/ot-builtin-pull.c
+++ b/src/ostree/ot-builtin-pull.c
@@ -27,7 +27,9 @@
 #include "ostree.h"
 #include "otutil.h"
 
+static gboolean opt_disable_fsync;
 static GOptionEntry options[] = {
+  { "disable-fsync", 0, 0, G_OPTION_ARG_NONE, &opt_disable_fsync, "Do not invoke fsync()", NULL },
   { NULL }
 };
 
@@ -54,6 +56,9 @@ ostree_builtin_pull (int argc, char **argv, OstreeRepo *repo, GCancellable *canc
       goto out;
     }
 
+  if (opt_disable_fsync)
+    ostree_repo_set_disable_fsync (repo, TRUE);
+
 
   if (strchr (argv[1], ':') == NULL)
     {
-- 
1.7.7.6

#! /usr/bin/python -tt

import os
import sys
import tempfile


fs = []
first = None

pids = []
def fs_flush():
   global fs
   global pids
   if mode == 'dsync':
       os.fsync(first)
   for f in fs:
       os.fsync(f.fileno())
       f.close()
   fs = []
   for pid in pids:
       os.waitpid(pid, 0)
   pids = []

def async_sync(f):
    try:
        pid = os.fork()
        if pid:
            pids.append(pid)
            return
    except:
        return
    os.fsync(f.fileno())
    os._exit(0)

async_num = 222

mode = 'sync'
if len(sys.argv) >= 2:
   mode = sys.argv[1]

if mode not in ('fast', 'sync', 'psync', 'dsync', 'gsync'):
   print >>sys.stderr, "Bad mode:", mode, "(defaulting to sync)"
   mode = 'sync'

dnam = tempfile.mkdtemp(dir=".")
print "Created:", dnam
first = os.open(dnam, os.O_DIRECTORY)
for i in range(1, 10000):
   f = open(dnam + "/%u" % i, 'wb')
   f.write(str(i))
   f.flush()
   if mode == 'fast': continue
   if mode == 'sync':
       os.fsync(f.fileno())
       f.close()
       continue

   fs.append(f)
   if mode == 'psync':
       async_sync(f)
   if len(fs) > async_num:
       fs_flush()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]