[Notes] [Git][BuildStream/buildstream][tristan/cas-cleanup-improve] 4 commits: _cas/cascache.py: Cleanup directories when removing refs



Title: GitLab

Tristan Van Berkom pushed to branch tristan/cas-cleanup-improve at BuildStream / buildstream

Commits:

3 changed files:

Changes:

  • buildstream/_cas/cascache.py
    ... ... @@ -21,7 +21,7 @@ import hashlib
    21 21
     import itertools
    
    22 22
     import os
    
    23 23
     import stat
    
    24
    -import tempfile
    
    24
    +import errno
    
    25 25
     import uuid
    
    26 26
     import contextlib
    
    27 27
     
    
    ... ... @@ -129,7 +129,7 @@ class CASCache():
    129 129
                 else:
    
    130 130
                     return dest
    
    131 131
     
    
    132
    -        with tempfile.TemporaryDirectory(prefix='tmp', dir=self.tmpdir) as tmpdir:
    
    132
    +        with utils._tempdir(prefix='tmp', dir=self.tmpdir) as tmpdir:
    
    133 133
                 checkoutdir = os.path.join(tmpdir, ref)
    
    134 134
                 self._checkout(checkoutdir, tree)
    
    135 135
     
    
    ... ... @@ -374,7 +374,7 @@ class CASCache():
    374 374
                         for chunk in iter(lambda: tmp.read(4096), b""):
    
    375 375
                             h.update(chunk)
    
    376 376
                     else:
    
    377
    -                    tmp = stack.enter_context(tempfile.NamedTemporaryFile(dir=self.tmpdir))
    
    377
    +                    tmp = stack.enter_context(utils._tempnamedfile(dir=self.tmpdir))
    
    378 378
                         # Set mode bits to 0644
    
    379 379
                         os.chmod(tmp.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
    
    380 380
     
    
    ... ... @@ -545,11 +545,7 @@ class CASCache():
    545 545
         def remove(self, ref, *, defer_prune=False):
    
    546 546
     
    
    547 547
             # Remove cache ref
    
    548
    -        refpath = self._refpath(ref)
    
    549
    -        if not os.path.exists(refpath):
    
    550
    -            raise CASCacheError("Could not find ref '{}'".format(ref))
    
    551
    -
    
    552
    -        os.unlink(refpath)
    
    548
    +        self._remove_ref(ref)
    
    553 549
     
    
    554 550
             if not defer_prune:
    
    555 551
                 pruned = self.prune()
    
    ... ... @@ -626,6 +622,55 @@ class CASCache():
    626 622
         def _refpath(self, ref):
    
    627 623
             return os.path.join(self.casdir, 'refs', 'heads', ref)
    
    628 624
     
    
    625
    +    # _remove_ref()
    
    626
    +    #
    
    627
    +    # Removes a ref.
    
    628
    +    #
    
    629
    +    # This also takes care of pruning away directories which can
    
    630
    +    # be removed after having removed the given ref.
    
    631
    +    #
    
    632
    +    # Args:
    
    633
    +    #    ref (str): The ref to remove
    
    634
    +    #
    
    635
    +    # Raises:
    
    636
    +    #    (CASCacheError): If the ref didnt exist, or a system error
    
    637
    +    #                     occurred while removing it
    
    638
    +    #
    
    639
    +    def _remove_ref(self, ref):
    
    640
    +
    
    641
    +        # Remove the ref itself
    
    642
    +        refpath = self._refpath(ref)
    
    643
    +        try:
    
    644
    +            os.unlink(refpath)
    
    645
    +        except FileNotFoundError as e:
    
    646
    +            raise CASCacheError("Could not find ref '{}'".format(ref)) from e
    
    647
    +
    
    648
    +        # Now remove any leading directories
    
    649
    +        basedir = os.path.join(self.casdir, 'refs', 'heads')
    
    650
    +        components = list(os.path.split(ref))
    
    651
    +        while components:
    
    652
    +            components.pop()
    
    653
    +            refdir = os.path.join(basedir, *components)
    
    654
    +
    
    655
    +            # Break out once we reach the base
    
    656
    +            if refdir == basedir:
    
    657
    +                break
    
    658
    +
    
    659
    +            try:
    
    660
    +                os.rmdir(refdir)
    
    661
    +            except FileNotFoundError:
    
    662
    +                # The parent directory did not exist, but it's
    
    663
    +                # parent directory might still be ready to prune
    
    664
    +                pass
    
    665
    +            except OSError as e:
    
    666
    +                if e.errno == errno.ENOTEMPTY:
    
    667
    +                    # The parent directory was not empty, so we
    
    668
    +                    # cannot prune directories beyond this point
    
    669
    +                    break
    
    670
    +
    
    671
    +                # Something went wrong here
    
    672
    +                raise CASCacheError("System error while removing ref '{}': {}".format(ref, e)) from e
    
    673
    +
    
    629 674
         # _commit_directory():
    
    630 675
         #
    
    631 676
         # Adds local directory to content addressable store.
    
    ... ... @@ -797,7 +842,7 @@ class CASCache():
    797 842
                 # already in local repository
    
    798 843
                 return objpath
    
    799 844
     
    
    800
    -        with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
    
    845
    +        with utils._tempnamedfile(dir=self.tmpdir) as f:
    
    801 846
                 remote._fetch_blob(digest, f)
    
    802 847
     
    
    803 848
                 added_digest = self.add_object(path=f.name, link_directly=True)
    
    ... ... @@ -807,7 +852,7 @@ class CASCache():
    807 852
     
    
    808 853
         def _batch_download_complete(self, batch):
    
    809 854
             for digest, data in batch.send():
    
    810
    -            with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
    
    855
    +            with utils._tempnamedfile(dir=self.tmpdir) as f:
    
    811 856
                     f.write(data)
    
    812 857
                     f.flush()
    
    813 858
     
    
    ... ... @@ -904,7 +949,7 @@ class CASCache():
    904 949
     
    
    905 950
         def _fetch_tree(self, remote, digest):
    
    906 951
             # download but do not store the Tree object
    
    907
    -        with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
    
    952
    +        with utils._tempnamedfile(dir=self.tmpdir) as out:
    
    908 953
                 remote._fetch_blob(digest, out)
    
    909 954
     
    
    910 955
                 tree = remote_execution_pb2.Tree()
    

  • buildstream/utils.py
    ... ... @@ -1032,6 +1032,36 @@ def _tempdir(suffix="", prefix="tmp", dir=None): # pylint: disable=redefined-bu
    1032 1032
             cleanup_tempdir()
    
    1033 1033
     
    
    1034 1034
     
    
    1035
    +# _tempnamedfile()
    
    1036
    +#
    
    1037
    +# A context manager for doing work on an open temporary file
    
    1038
    +# which is guaranteed to be named and have an entry in the filesystem.
    
    1039
    +#
    
    1040
    +# Args:
    
    1041
    +#    dir (str): A path to a parent directory for the temporary file
    
    1042
    +#    suffix (str): A suffix for the temproary file name
    
    1043
    +#    prefix (str): A prefix for the temporary file name
    
    1044
    +#
    
    1045
    +# Yields:
    
    1046
    +#    (str): The temporary file handle
    
    1047
    +#
    
    1048
    +# Do not use tempfile.NamedTemporaryFile() directly, as this will
    
    1049
    +# leak files on the filesystem when BuildStream exits a process
    
    1050
    +# on SIGTERM.
    
    1051
    +#
    
    1052
    +@contextmanager
    
    1053
    +def _tempnamedfile(suffix="", prefix="tmp", dir=None):  # pylint: disable=redefined-builtin
    
    1054
    +    temp = None
    
    1055
    +
    
    1056
    +    def close_tempfile():
    
    1057
    +        if temp is not None:
    
    1058
    +            temp.close()
    
    1059
    +
    
    1060
    +    with _signals.terminator(close_tempfile), \
    
    1061
    +        tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, dir=dir) as temp:
    
    1062
    +        yield temp
    
    1063
    +
    
    1064
    +
    
    1035 1065
     # _kill_process_tree()
    
    1036 1066
     #
    
    1037 1067
     # Brutally murder a process and all of its children
    

  • tests/artifactcache/expiry.py
    ... ... @@ -382,6 +382,7 @@ def test_extract_expiry(cli, datafiles, tmpdir):
    382 382
         res = cli.run(project=project, args=['checkout', 'target.bst', os.path.join(str(tmpdir), 'checkout')])
    
    383 383
         res.assert_success()
    
    384 384
     
    
    385
    +    # Get a snapshot of the extracts in advance
    
    385 386
         extractdir = os.path.join(project, 'cache', 'artifacts', 'extract', 'test', 'target')
    
    386 387
         extracts = os.listdir(extractdir)
    
    387 388
         assert(len(extracts) == 1)
    
    ... ... @@ -395,3 +396,16 @@ def test_extract_expiry(cli, datafiles, tmpdir):
    395 396
     
    
    396 397
         # Now the extract should be removed.
    
    397 398
         assert not os.path.exists(extract)
    
    399
    +
    
    400
    +    # As an added bonus, let's ensure that no directories have been left behind
    
    401
    +    #
    
    402
    +    # Now we should have a directory for the cached target2.bst, which
    
    403
    +    # replaced target.bst in the cache, we should not have a directory
    
    404
    +    # for the target.bst
    
    405
    +    refsdir = os.path.join(project, 'cache', 'artifacts', 'cas', 'refs', 'heads')
    
    406
    +    refsdirtest = os.path.join(refsdir, 'test')
    
    407
    +    refsdirtarget = os.path.join(refsdirtest, 'target')
    
    408
    +    refsdirtarget2 = os.path.join(refsdirtest, 'target2')
    
    409
    +
    
    410
    +    assert os.path.isdir(refsdirtarget2)
    
    411
    +    assert not os.path.exists(refsdirtarget)



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]