[Notes] [Git][BuildStream/buildstream][valentindavid/cache_server_fill

Valentin David pushed to branch valentindavid/cache_server_fill_up at BuildStream / buildstream

Commits:

7bdd4c1a

by Valentin David at 2018-09-28T13:21:11Z

Use fallocate instead of checking for disk space for every write

8f5d7f69

by Valentin David at 2018-09-28T13:33:00Z

Fix race condition getting mtime in FindMissingBlobs

2e5ca36f
by Valentin David at 2018-09-28T13:34:16Z
```
Fix various small issues from review comments
```

Changes:

buildstream/_artifactcache/cascache.py

@@ -446,14 +446,14 @@ class CASCache(ArtifactCache):
      #     digest (Digest): An optional Digest object to populate
      #     path (str): Path to file to add
      #     buffer (bytes): Byte buffer to add
 -    #     link_file (bool): Whether file given by path can be linked
 +    #     link_directly (bool): Whether file given by path can be linked
+     #
      # Returns:
      #     (Digest): The digest of the added object
+     #
      # Either `path` or `buffer` must be passed, but not both.
+     #
 -    def add_object(self, *, digest=None, path=None, buffer=None, link_file=False):
 +    def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False):
          # Exactly one of the two parameters has to be specified
          assert (path is None) != (buffer is None)
@@ -464,7 +464,7 @@ class CASCache(ArtifactCache):
              h = hashlib.sha256()
              # Always write out new file to avoid corruption if input file is modified
              with contextlib.ExitStack() as stack:
 -                if link_file:
 +                if path is not None and link_directly:
                      tmp = stack.enter_context(open(path, 'rb'))
                      for chunk in iter(lambda: tmp.read(4096), b""):
                          h.update(chunk)
@@ -626,8 +626,9 @@ class CASCache(ArtifactCache):
      # Prune unreachable objects from the repo.
+     #
      # Args:
 -    #    keep_after (int): timestamp after which unreachable objects are kept.
 -    #                      None if no unreachable object should be kept.
 +    #    keep_after (int|None): timestamp after which unreachable objects
 +    #                           are kept. None if no unreachable object
 +    #                           should be kept.
+     #
      def prune(self, keep_after=None):
          ref_heads = os.path.join(self.casdir, 'refs', 'heads')
@@ -895,7 +896,7 @@ class CASCache(ArtifactCache):
          with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
              self._fetch_blob(remote, digest, f)
 -            added_digest = self.add_object(path=f.name)
 +            added_digest = self.add_object(path=f.name, link_directly=True)
              assert added_digest.hash == digest.hash
          return objpath
@@ -906,7 +907,7 @@ class CASCache(ArtifactCache):
                  f.write(data)
                  f.flush()
 -                added_digest = self.add_object(path=f.name)
 +                added_digest = self.add_object(path=f.name, link_directly=True)
                  assert added_digest.hash == digest.hash
      # Helper function for _fetch_directory().

buildstream/_artifactcache/casserver.py

@@ -26,6 +26,7 @@ import tempfile
  import uuid
  import time
  import errno
 +import ctypes
  import click
  import grpc
@@ -137,11 +138,43 @@ def server_main(repo, port, server_key, server_cert, client_certs, enable_push):
          server.stop(0)
 +class _FallocateCall:
++
 +    FALLOC_FL_KEEP_SIZE = 1
 +    FALLOC_FL_PUNCH_HOLE = 2
 +    FALLOC_FL_NO_HIDE_STALE = 4
 +    FALLOC_FL_COLLAPSE_RANGE = 8
 +    FALLOC_FL_ZERO_RANGE = 16
 +    FALLOC_FL_INSERT_RANGE = 32
 +    FALLOC_FL_UNSHARE_RANGE = 64
++
 +    def __init__(self):
 +        self.libc = ctypes.CDLL("libc.so.6", use_errno=True)
 +        try:
 +            self.fallocate64 = self.libc.fallocate64
 +        except AttributeError:
 +            self.fallocate = self.libc.fallocate
++
 +    def __call__(self, fd, mode, offset, length):
 +        if hasattr(self, 'fallocate64'):
 +            print(fd, mode, offset, length)
 +            ret = self.fallocate64(ctypes.c_int(fd), ctypes.c_int(mode),
 +                                   ctypes.c_int64(offset), ctypes.c_int64(length))
 +        else:
 +            ret = self.fallocate(ctypes.c_int(fd), ctypes.c_int(mode),
 +                                 ctypes.c_int(offset), ctypes.c_int(length))
 +        if ret == -1:
 +            errno = ctypes.get_errno()
 +            raise OSError(errno, os.strerror(errno))
 +        return ret
++
++
  class _ByteStreamServicer(bytestream_pb2_grpc.ByteStreamServicer):
      def __init__(self, cas, *, enable_push):
          super().__init__()
          self.cas = cas
          self.enable_push = enable_push
 +        self.fallocate = _FallocateCall()
      def Read(self, request, context):
          resource_name = request.resource_name
@@ -198,36 +231,45 @@ class _ByteStreamServicer(bytestream_pb2_grpc.ByteStreamServicer):
                      if client_digest is None:
                          context.set_code(grpc.StatusCode.NOT_FOUND)
                          return response
++
 +                    while True:
 +                        if client_digest.size_bytes == 0:
 +                            break
 +                        try:
 +                            _clean_up_cache(self.cas, client_digest.size_bytes)
 +                        except ArtifactTooLargeException as e:
 +                            context.set_code(grpc.StatusCode.RESOURCE_EXHAUSTED)
 +                            context.set_details(str(e))
 +                            return response
++
 +                        try:
 +                            self.fallocate(out.fileno(), 0, 0, client_digest.size_bytes)
 +                            break
 +                        except OSError as e:
 +                            # Multiple upload can happen in the same time
 +                            if e.errno != errno.ENOSPC:
 +                                raise
++
                  elif request.resource_name:
                      # If it is set on subsequent calls, it **must** match the value of the first request.
                      if request.resource_name != resource_name:
                          context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
                          return response
 -                while True:
 -                    try:
 -                        _clean_up_cache(self.cas, client_digest.size_bytes - offset)
 -                    except ArtifactTooLargeException as e:
 -                        context.set_code(grpc.StatusCode.RESOURCE_EXHAUSTED)
 -                        context.set_details(str(e))
 -                        return response
 -                    try:
 -                        out.write(request.data)
 -                        break
 -                    except OSError as e:
 -                        # Multiple upload can happen in the same time
 -                        if e.errno == errno.ENOSPC:
 -                            continue
 -                        else:
 -                            raise
 +                if (offset + len(request.data)) > client_digest.size_bytes:
 +                    context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
 +                    return response
++
 +                out.write(request.data)
                  offset += len(request.data)
++
                  if request.finish_write:
                      if client_digest.size_bytes != offset:
                          context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
                          return response
                      out.flush()
 -                    digest = self.cas.add_object(path=out.name, link_file=True)
 +                    digest = self.cas.add_object(path=out.name, link_directly=True)
                      if digest.hash != client_digest.hash:
                          context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
                          return response
@@ -248,12 +290,16 @@ class _ContentAddressableStorageServicer(remote_execution_pb2_grpc.ContentAddres
          response = remote_execution_pb2.FindMissingBlobsResponse()
          for digest in request.blob_digests:
              objpath = self.cas.objpath(digest)
 -            if not os.path.exists(objpath):
 -                d = response.missing_blob_digests.add()
 -                d.hash = digest.hash
 -                d.size_bytes = digest.size_bytes
 -            else:
 +            try:
                  os.utime(objpath)
 +            except OSError as e:
 +                if e.errno != errno.ENOENT:
 +                    raise
 +                else:
 +                    d = response.missing_blob_digests.add()
 +                    d.hash = digest.hash
 +                    d.size_bytes = digest.size_bytes
++
          return response
      def BatchReadBlobs(self, request, context):

[Notes] [Git][BuildStream/buildstream][valentindavid/cache_server_fill_up] 3 commits: Use fallocate instead of checking for disk space for every write

Valentin David pushed to branch valentindavid/cache_server_fill_up at BuildStream / buildstream

Commits:

2 changed files:

Changes: