[Notes] [Git][BuildStream/buildstream][jmac/cas_virtual_directory] 5 commits: Initial implementation of _casbaseddirectory.py



Title: GitLab

Jim MacArthur pushed to branch jmac/cas_virtual_directory at BuildStream / buildstream

Commits:

6 changed files:

Changes:

  • buildstream/sandbox/_sandboxbwrap.py
    ... ... @@ -243,6 +243,7 @@ class SandboxBwrap(Sandbox):
    243 243
                             # a bug, bwrap mounted a tempfs here and when it exits, that better be empty.
    
    244 244
                             pass
    
    245 245
     
    
    246
    +        self._vdir.mark_changed()
    
    246 247
             return exit_code
    
    247 248
     
    
    248 249
         def run_bwrap(self, argv, stdin, stdout, stderr, env, interactive):
    

  • buildstream/sandbox/_sandboxchroot.py
    ... ... @@ -106,6 +106,7 @@ class SandboxChroot(Sandbox):
    106 106
                 status = self.chroot(rootfs, command, stdin, stdout,
    
    107 107
                                      stderr, cwd, env, flags)
    
    108 108
     
    
    109
    +        self._vdir.mark_changed()
    
    109 110
             return status
    
    110 111
     
    
    111 112
         # chroot()
    

  • buildstream/sandbox/sandbox.py
    ... ... @@ -31,6 +31,7 @@ See also: :ref:`sandboxing`.
    31 31
     import os
    
    32 32
     from .._exceptions import ImplError, BstError
    
    33 33
     from ..storage._filebaseddirectory import FileBasedDirectory
    
    34
    +from ..storage._casbaseddirectory import CasBasedDirectory
    
    34 35
     
    
    35 36
     
    
    36 37
     class SandboxFlags():
    
    ... ... @@ -105,6 +106,7 @@ class Sandbox():
    105 106
             self.__scratch = os.path.join(self.__directory, 'scratch')
    
    106 107
             for directory_ in [self._root, self.__scratch]:
    
    107 108
                 os.makedirs(directory_, exist_ok=True)
    
    109
    +        self._vdir = None
    
    108 110
     
    
    109 111
         def get_directory(self):
    
    110 112
             """Fetches the sandbox root directory
    
    ... ... @@ -133,8 +135,14 @@ class Sandbox():
    133 135
                (str): The sandbox root directory
    
    134 136
     
    
    135 137
             """
    
    136
    -        # For now, just create a new Directory every time we're asked
    
    137
    -        return FileBasedDirectory(self._root)
    
    138
    +        if not self._vdir:
    
    139
    +            # BST_CAS_DIRECTORIES is a deliberately hidden environment variable which
    
    140
    +            # can be used to switch on CAS-based directories for testing.
    
    141
    +            if 'BST_CAS_DIRECTORIES' in os.environ:
    
    142
    +                self._vdir = CasBasedDirectory(self.__context, ref=None)
    
    143
    +            else:
    
    144
    +                self._vdir = FileBasedDirectory(self._root)
    
    145
    +        return self._vdir
    
    138 146
     
    
    139 147
         def set_environment(self, environment):
    
    140 148
             """Sets the environment variables for the sandbox
    

  • buildstream/storage/_casbaseddirectory.py
    1
    +#
    
    2
    +#  Copyright (C) 2018 Bloomberg LLC
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Jim MacArthur <jim macarthur codethink co uk>
    
    19
    +
    
    20
    +"""
    
    21
    +CasBasedDirectory
    
    22
    +=========
    
    23
    +
    
    24
    +Implementation of the Directory class which backs onto a Merkle-tree based content
    
    25
    +addressable storage system.
    
    26
    +
    
    27
    +See also: :ref:`sandboxing`.
    
    28
    +"""
    
    29
    +
    
    30
    +from collections import OrderedDict
    
    31
    +
    
    32
    +import os
    
    33
    +import tempfile
    
    34
    +import stat
    
    35
    +
    
    36
    +from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    37
    +from .._exceptions import BstError
    
    38
    +from .directory import Directory, VirtualDirectoryError
    
    39
    +from ._filebaseddirectory import FileBasedDirectory
    
    40
    +from ..utils import FileListResult, safe_copy, list_relative_paths
    
    41
    +from .._artifactcache.cascache import CASCache
    
    42
    +
    
    43
    +
    
    44
    +class IndexEntry():
    
    45
    +    """ Used in our index of names to objects to store the 'modified' flag
    
    46
    +    for directory entries. Because we need both the remote_execution_pb2 object
    
    47
    +    and our own Directory object for directory entries, we store both. For files
    
    48
    +    and symlinks, only pb2_object is used. """
    
    49
    +    def __init__(self, pb2_object, buildstream_object=None, modified=False):
    
    50
    +        self.pb2_object = pb2_object
    
    51
    +        self.buildstream_object = buildstream_object
    
    52
    +        self.modified = modified
    
    53
    +
    
    54
    +
    
    55
    +# CasBasedDirectory intentionally doesn't call its superclass constuctor,
    
    56
    +# which is meant to be unimplemented.
    
    57
    +# pylint: disable=super-init-not-called
    
    58
    +
    
    59
    +class CasBasedDirectory(Directory):
    
    60
    +    """
    
    61
    +    CAS-based directories can have two names; one is a 'common name' which has no effect
    
    62
    +    on functionality, and the 'filename'. If a CasBasedDirectory has a parent, then 'filename'
    
    63
    +    must be the name of an entry in the parent directory's index which points to this object.
    
    64
    +    This is used to inform a parent directory that it must update the given hash for this
    
    65
    +    object when this object changes.
    
    66
    +
    
    67
    +    Typically a top-level CasBasedDirectory will have a common_name and no filename, and
    
    68
    +    subdirectories wil have a filename and no common_name. common_name can used to identify
    
    69
    +    CasBasedDirectory objects in a log file, since they have no unique position in a file
    
    70
    +    system.
    
    71
    +    """
    
    72
    +
    
    73
    +    # Two constants which define the separators used by the remote execution API.
    
    74
    +    _pb2_path_sep = "/"
    
    75
    +    _bp2_absolute_path_prefix = "/"
    
    76
    +
    
    77
    +    def __init__(self, context, ref=None, parent=None, common_name="untitled", filename=None):
    
    78
    +        self.context = context
    
    79
    +        self.cas_directory = os.path.join(context.artifactdir, 'cas')
    
    80
    +        self.filename = filename
    
    81
    +        self.common_name = common_name
    
    82
    +        self.pb2_directory = remote_execution_pb2.Directory()
    
    83
    +        self.cas_cache = CASCache(context)
    
    84
    +        if ref:
    
    85
    +            with open(self.cas_cache.objpath(ref), 'rb') as f:
    
    86
    +                self.pb2_directory.ParseFromString(f.read())
    
    87
    +
    
    88
    +        self.ref = ref
    
    89
    +        self.index = OrderedDict()
    
    90
    +        self.parent = parent
    
    91
    +        self._directory_read = False
    
    92
    +        self._populate_index()
    
    93
    +
    
    94
    +    def _populate_index(self):
    
    95
    +        if self._directory_read:
    
    96
    +            return
    
    97
    +        for entry in self.pb2_directory.directories:
    
    98
    +            buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
    
    99
    +                                                     parent=self, filename=entry.name)
    
    100
    +            self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
    
    101
    +        for entry in self.pb2_directory.files:
    
    102
    +            self.index[entry.name] = IndexEntry(entry)
    
    103
    +        for entry in self.pb2_directory.symlinks:
    
    104
    +            self.index[entry.name] = IndexEntry(entry)
    
    105
    +        self._directory_read = True
    
    106
    +
    
    107
    +    def _recalculate_recursing_up(self, caller=None):
    
    108
    +        """Recalcuate the hash for this directory and store the results in
    
    109
    +        the cache.  If this directory has a parent, tell it to
    
    110
    +        recalculate (since changing this directory changes an entry in
    
    111
    +        the parent).
    
    112
    +
    
    113
    +        """
    
    114
    +        self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    115
    +        if caller:
    
    116
    +            old_dir = self._find_pb2_entry(caller.filename)
    
    117
    +            self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
    
    118
    +        if self.parent:
    
    119
    +            self.parent._recalculate_recursing_up(self)
    
    120
    +
    
    121
    +    def _recalculate_recursing_down(self, parent=None):
    
    122
    +        """Recalcuate the hash for this directory and any
    
    123
    +        subdirectories. Hashes for subdirectories should be calculated
    
    124
    +        and stored after a significant operation (e.g. an
    
    125
    +        import_files() call) but not after adding each file, as that
    
    126
    +        is extremely wasteful.
    
    127
    +
    
    128
    +        """
    
    129
    +        for entry in self.pb2_directory.directories:
    
    130
    +            self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
    
    131
    +
    
    132
    +        if parent:
    
    133
    +            self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
    
    134
    +        else:
    
    135
    +            self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    136
    +        # We don't need to do anything more than that; files were already added ealier, and symlinks are
    
    137
    +        # part of the directory structure.
    
    138
    +
    
    139
    +    def _find_pb2_entry(self, name):
    
    140
    +        if name in self.index:
    
    141
    +            return self.index[name].pb2_object
    
    142
    +        return None
    
    143
    +
    
    144
    +    def _add_directory(self, name):
    
    145
    +        if name in self.index:
    
    146
    +            newdir = self.index[name].buildstream_object
    
    147
    +            if not isinstance(newdir, CasBasedDirectory):
    
    148
    +                # TODO: This may not be an actual error; it may actually overwrite it
    
    149
    +                raise VirtualDirectoryError("New directory {} in {} would overwrite existing non-directory of type {}"
    
    150
    +                                            .format(name, str(self), type(newdir)))
    
    151
    +            dirnode = self._find_pb2_entry(name)
    
    152
    +        else:
    
    153
    +            newdir = CasBasedDirectory(self.context, parent=self, filename=name)
    
    154
    +            dirnode = self.pb2_directory.directories.add()
    
    155
    +
    
    156
    +        dirnode.name = name
    
    157
    +
    
    158
    +        # Calculate the hash for an empty directory
    
    159
    +        new_directory = remote_execution_pb2.Directory()
    
    160
    +        self.cas_cache.add_object(digest=dirnode.digest, buffer=new_directory.SerializeToString())
    
    161
    +        self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
    
    162
    +        return newdir
    
    163
    +
    
    164
    +    def _add_new_file(self, basename, filename):
    
    165
    +        filenode = self.pb2_directory.files.add()
    
    166
    +        filenode.name = filename
    
    167
    +        self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
    
    168
    +        is_executable = os.access(os.path.join(basename, filename), os.X_OK)
    
    169
    +        filenode.is_executable = is_executable
    
    170
    +        self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
    
    171
    +
    
    172
    +    def _add_new_link(self, basename, filename):
    
    173
    +        existing_link = self._find_pb2_entry(filename)
    
    174
    +        if existing_link:
    
    175
    +            symlinknode = existing_link
    
    176
    +        else:
    
    177
    +            symlinknode = self.pb2_directory.symlinks.add()
    
    178
    +        symlinknode.name = filename
    
    179
    +        # A symlink node has no digest.
    
    180
    +        symlinknode.target = os.readlink(os.path.join(basename, filename))
    
    181
    +        self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    182
    +
    
    183
    +    def delete_entry(self, name):
    
    184
    +        for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
    
    185
    +            if name in collection:
    
    186
    +                collection.remove(name)
    
    187
    +        if name in self.index:
    
    188
    +            del self.index[name]
    
    189
    +
    
    190
    +    def descend(self, subdirectory_spec, create=False):
    
    191
    +        """Descend one or more levels of directory hierarchy and return a new
    
    192
    +        Directory object for that directory.
    
    193
    +
    
    194
    +        Arguments:
    
    195
    +        * subdirectory_spec (list of strings): A list of strings which are all directory
    
    196
    +          names.
    
    197
    +        * create (boolean): If this is true, the directories will be created if
    
    198
    +          they don't already exist.
    
    199
    +
    
    200
    +        Note: At the moment, creating a directory by descending does
    
    201
    +        not update this object in the CAS cache. However, performing
    
    202
    +        an import_files() into a subdirectory of any depth obtained by
    
    203
    +        descending from this object *will* cause this directory to be
    
    204
    +        updated and stored.
    
    205
    +
    
    206
    +        """
    
    207
    +
    
    208
    +        # It's very common to send a directory name instead of a list and this causes
    
    209
    +        # bizarre errors, so check for it here
    
    210
    +        if not isinstance(subdirectory_spec, list):
    
    211
    +            subdirectory_spec = [subdirectory_spec]
    
    212
    +
    
    213
    +        # Because of the way split works, it's common to get a list which begins with
    
    214
    +        # an empty string. Detect these and remove them.
    
    215
    +        while subdirectory_spec and subdirectory_spec[0] == "":
    
    216
    +            subdirectory_spec.pop(0)
    
    217
    +
    
    218
    +        # Descending into [] returns the same directory.
    
    219
    +        if not subdirectory_spec:
    
    220
    +            return self
    
    221
    +
    
    222
    +        if subdirectory_spec[0] in self.index:
    
    223
    +            entry = self.index[subdirectory_spec[0]].buildstream_object
    
    224
    +            if isinstance(entry, CasBasedDirectory):
    
    225
    +                return entry.descend(subdirectory_spec[1:], create)
    
    226
    +            else:
    
    227
    +                error = "Cannot descend into {}, which is a '{}' in the directory {}"
    
    228
    +                raise VirtualDirectoryError(error.format(subdirectory_spec[0],
    
    229
    +                                                         type(entry).__name__,
    
    230
    +                                                         self))
    
    231
    +        else:
    
    232
    +            if create:
    
    233
    +                newdir = self._add_directory(subdirectory_spec[0])
    
    234
    +                return newdir.descend(subdirectory_spec[1:], create)
    
    235
    +            else:
    
    236
    +                error = "No entry called '{}' found in {}. There are directories called {}."
    
    237
    +                directory_list = ",".join([entry.name for entry in self.pb2_directory.directories])
    
    238
    +                raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self),
    
    239
    +                                                         directory_list))
    
    240
    +        return None
    
    241
    +
    
    242
    +    def find_root(self):
    
    243
    +        """ Finds the root of this directory tree by following 'parent' until there is
    
    244
    +        no parent. """
    
    245
    +        if self.parent:
    
    246
    +            return self.parent.find_root()
    
    247
    +        else:
    
    248
    +            return self
    
    249
    +
    
    250
    +    def _resolve_symlink_or_directory(self, name):
    
    251
    +        """Used only by _import_files_from_directory. Tries to resolve a
    
    252
    +        directory name or symlink name. 'name' must be an entry in this
    
    253
    +        directory. It must be a single symlink or directory name, not a path
    
    254
    +        separated by path separators. If it's an existing directory name, it
    
    255
    +        just returns the Directory object for that. If it's a symlink, it will
    
    256
    +        attempt to find the target of the symlink and return that as a
    
    257
    +        Directory object.
    
    258
    +
    
    259
    +        If a symlink target doesn't exist, it will attempt to create it
    
    260
    +        as a directory as long as it's within this directory tree.
    
    261
    +        """
    
    262
    +
    
    263
    +        if isinstance(self.index[name].buildstream_object, Directory):
    
    264
    +            return self.index[name].buildstream_object
    
    265
    +        # OK then, it's a symlink
    
    266
    +        symlink = self._find_pb2_entry(name)
    
    267
    +        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
    
    268
    +        if absolute:
    
    269
    +            root = self.find_root()
    
    270
    +        else:
    
    271
    +            root = self
    
    272
    +        directory = root
    
    273
    +        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
    
    274
    +        for c in components:
    
    275
    +            if c == "..":
    
    276
    +                directory = directory.parent
    
    277
    +            else:
    
    278
    +                directory = directory.descend(c, create=True)
    
    279
    +        return directory
    
    280
    +
    
    281
    +    def _check_replacement(self, name, path_prefix, fileListResult):
    
    282
    +        """ Checks whether 'name' exists, and if so, whether we can overwrite it.
    
    283
    +        If we can, add the name to 'overwritten_files' and delete the existing entry.
    
    284
    +        Returns 'True' if the import should go ahead.
    
    285
    +        fileListResult.overwritten and fileListResult.ignore are updated depending
    
    286
    +        on the result. """
    
    287
    +        existing_entry = self._find_pb2_entry(name)
    
    288
    +        relative_pathname = os.path.join(path_prefix, name)
    
    289
    +        if existing_entry is None:
    
    290
    +            return True
    
    291
    +        if (isinstance(existing_entry,
    
    292
    +                       (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
    
    293
    +            self.delete_entry(name)
    
    294
    +            fileListResult.overwritten.append(relative_pathname)
    
    295
    +            return True
    
    296
    +        elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
    
    297
    +            # If 'name' maps to a DirectoryNode, then there must be an entry in index
    
    298
    +            # pointing to another Directory.
    
    299
    +            if self.index[name].buildstream_object.is_empty():
    
    300
    +                self.delete_entry(name)
    
    301
    +                fileListResult.overwritten.append(relative_pathname)
    
    302
    +                return True
    
    303
    +            else:
    
    304
    +                # We can't overwrite a non-empty directory, so we just ignore it.
    
    305
    +                fileListResult.ignored.append(relative_pathname)
    
    306
    +                return False
    
    307
    +        raise VirtualDirectoryError("Entry '{}' is not a recognised file/link/directory and not None; it is {}"
    
    308
    +                                    .format(name, type(existing_entry)))
    
    309
    +
    
    310
    +    def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
    
    311
    +        """ _import_directory_recursively and _import_files_from_directory will be called alternately
    
    312
    +        as a directory tree is descended. """
    
    313
    +        if directory_name in self.index:
    
    314
    +            subdir = self._resolve_symlink_or_directory(directory_name)
    
    315
    +        else:
    
    316
    +            subdir = self._add_directory(directory_name)
    
    317
    +        new_path_prefix = os.path.join(path_prefix, directory_name)
    
    318
    +        subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
    
    319
    +                                                            [os.path.sep.join(remaining_path)],
    
    320
    +                                                            path_prefix=new_path_prefix)
    
    321
    +        return subdir_result
    
    322
    +
    
    323
    +    def _import_files_from_directory(self, source_directory, files, path_prefix=""):
    
    324
    +        """ Imports files from a traditional directory """
    
    325
    +        result = FileListResult()
    
    326
    +        for entry in sorted(files):
    
    327
    +            split_path = entry.split(os.path.sep)
    
    328
    +            # The actual file on the FS we're importing
    
    329
    +            import_file = os.path.join(source_directory, entry)
    
    330
    +            # The destination filename, relative to the root where the import started
    
    331
    +            relative_pathname = os.path.join(path_prefix, entry)
    
    332
    +            if len(split_path) > 1:
    
    333
    +                directory_name = split_path[0]
    
    334
    +                # Hand this off to the importer for that subdir. This will only do one file -
    
    335
    +                # a better way would be to hand off all the files in this subdir at once.
    
    336
    +                subdir_result = self._import_directory_recursively(directory_name, source_directory,
    
    337
    +                                                                   split_path[1:], path_prefix)
    
    338
    +                result.combine(subdir_result)
    
    339
    +            elif os.path.islink(import_file):
    
    340
    +                if self._check_replacement(entry, path_prefix, result):
    
    341
    +                    self._add_new_link(source_directory, entry)
    
    342
    +                    result.files_written.append(relative_pathname)
    
    343
    +            elif os.path.isdir(import_file):
    
    344
    +                # A plain directory which already exists isn't a problem; just ignore it.
    
    345
    +                if entry not in self.index:
    
    346
    +                    self._add_directory(entry)
    
    347
    +            elif os.path.isfile(import_file):
    
    348
    +                if self._check_replacement(entry, path_prefix, result):
    
    349
    +                    self._add_new_file(source_directory, entry)
    
    350
    +                    result.files_written.append(relative_pathname)
    
    351
    +        return result
    
    352
    +
    
    353
    +    def import_files(self, external_pathspec, *, files=None,
    
    354
    +                     report_written=True, update_utimes=False,
    
    355
    +                     can_link=False):
    
    356
    +        """Imports some or all files from external_path into this directory.
    
    357
    +
    
    358
    +        Keyword arguments: external_pathspec: Either a string
    
    359
    +        containing a pathname, or a Directory object, to use as the
    
    360
    +        source.
    
    361
    +
    
    362
    +        files (list of strings): A list of all the files relative to
    
    363
    +        the external_pathspec to copy. If 'None' is supplied, all
    
    364
    +        files are copied.
    
    365
    +
    
    366
    +        report_written (bool): Return the full list of files
    
    367
    +        written. Defaults to true. If false, only a list of
    
    368
    +        overwritten files is returned.
    
    369
    +
    
    370
    +        update_utimes (bool): Currently ignored, since CAS does not store utimes.
    
    371
    +
    
    372
    +        can_link (bool): Ignored, since hard links do not have any meaning within CAS.
    
    373
    +        """
    
    374
    +        if isinstance(external_pathspec, FileBasedDirectory):
    
    375
    +            source_directory = external_pathspec.get_underlying_directory()
    
    376
    +        elif isinstance(external_pathspec, CasBasedDirectory):
    
    377
    +            # TODO: This transfers from one CAS to another via the
    
    378
    +            # filesystem, which is very inefficient. Alter this so it
    
    379
    +            # transfers refs across directly.
    
    380
    +            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
    
    381
    +                external_pathspec.export_files(tmpdir)
    
    382
    +                if files is None:
    
    383
    +                    files = list_relative_paths(tmpdir)
    
    384
    +                result = self._import_files_from_directory(tmpdir, files=files)
    
    385
    +            return result
    
    386
    +        else:
    
    387
    +            source_directory = external_pathspec
    
    388
    +
    
    389
    +        if files is None:
    
    390
    +            files = list_relative_paths(source_directory)
    
    391
    +
    
    392
    +        # TODO: No notice is taken of report_written, update_utimes or can_link.
    
    393
    +        # Current behaviour is to fully populate the report, which is inefficient,
    
    394
    +        # but still correct.
    
    395
    +        result = self._import_files_from_directory(source_directory, files=files)
    
    396
    +
    
    397
    +        # We need to recalculate and store the hashes of all directories both
    
    398
    +        # up and down the tree; we have changed our directory by importing files
    
    399
    +        # which changes our hash and all our parents' hashes of us. The trees
    
    400
    +        # lower down need to be stored in the CAS as they are not automatically
    
    401
    +        # added during construction.
    
    402
    +        self._recalculate_recursing_down()
    
    403
    +        if self.parent:
    
    404
    +            self.parent._recalculate_recursing_up(self)
    
    405
    +        return result
    
    406
    +
    
    407
    +    def set_deterministic_mtime(self):
    
    408
    +        """ Sets a static modification time for all regular files in this directory.
    
    409
    +        Since we don't store any modification time, we don't need to do anything.
    
    410
    +        """
    
    411
    +        pass
    
    412
    +
    
    413
    +    def set_deterministic_user(self):
    
    414
    +        """ Sets all files in this directory to the current user's euid/egid.
    
    415
    +        We also don't store user data, so this can be ignored.
    
    416
    +        """
    
    417
    +        pass
    
    418
    +
    
    419
    +    def export_files(self, to_directory, *, can_link=False, can_destroy=False):
    
    420
    +        """Copies everything from this into to_directory, which must be the name
    
    421
    +        of a traditional filesystem directory.
    
    422
    +
    
    423
    +        Arguments:
    
    424
    +
    
    425
    +        to_directory (string): a path outside this directory object
    
    426
    +        where the contents will be copied to.
    
    427
    +
    
    428
    +        can_link (bool): Whether we can create hard links in to_directory
    
    429
    +        instead of copying.
    
    430
    +
    
    431
    +        can_destroy (bool): Whether we can destroy elements in this
    
    432
    +        directory to export them (e.g. by renaming them as the
    
    433
    +        target).
    
    434
    +
    
    435
    +        """
    
    436
    +
    
    437
    +        if not os.path.exists(to_directory):
    
    438
    +            os.mkdir(to_directory)
    
    439
    +
    
    440
    +        for entry in self.pb2_directory.directories:
    
    441
    +            if entry.name not in self.index:
    
    442
    +                raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
    
    443
    +                                            .format(str(self), entry.name))
    
    444
    +            if not self._directory_read:
    
    445
    +                raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
    
    446
    +            dest_dir = os.path.join(to_directory, entry.name)
    
    447
    +            if not os.path.exists(dest_dir):
    
    448
    +                os.mkdir(dest_dir)
    
    449
    +            target = self.descend([entry.name])
    
    450
    +            target.export_files(dest_dir)
    
    451
    +        for entry in self.pb2_directory.files:
    
    452
    +            # Extract the entry to a single file
    
    453
    +            dest_name = os.path.join(to_directory, entry.name)
    
    454
    +            src_name = self.cas_cache.objpath(entry.digest)
    
    455
    +            safe_copy(src_name, dest_name)
    
    456
    +            if entry.is_executable:
    
    457
    +                os.chmod(dest_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
    
    458
    +                         stat.S_IRGRP | stat.S_IXGRP |
    
    459
    +                         stat.S_IROTH | stat.S_IXOTH)
    
    460
    +        for entry in self.pb2_directory.symlinks:
    
    461
    +            src_name = os.path.join(to_directory, entry.name)
    
    462
    +            target_name = entry.target
    
    463
    +            try:
    
    464
    +                os.symlink(target_name, src_name)
    
    465
    +            except FileExistsError as e:
    
    466
    +                raise BstError(("Cannot create a symlink named {} pointing to {}." +
    
    467
    +                                " The original error was: {}").
    
    468
    +                               format(src_name, entry.target, e))
    
    469
    +
    
    470
    +    def export_to_tar(self, tarfile, destination_dir, mtime=0):
    
    471
    +        raise NotImplementedError()
    
    472
    +
    
    473
    +    def mark_changed(self):
    
    474
    +        """ It should not be possible to externally modify a CAS-based
    
    475
    +        directory at the moment."""
    
    476
    +        raise NotImplementedError()
    
    477
    +
    
    478
    +    def is_empty(self):
    
    479
    +        """ Return true if this directory has no files, subdirectories or links in it.
    
    480
    +        """
    
    481
    +        return len(self.index) == 0
    
    482
    +
    
    483
    +    def mark_unmodified(self):
    
    484
    +        """ Marks all files in this directory (recursively) as unmodified.
    
    485
    +        """
    
    486
    +        # TODO: We don't actually mark our own directory unmodified
    
    487
    +        # here, because we can't get to the containing IndexEntry -
    
    488
    +        # just the objects we contain.
    
    489
    +        for i in self.index.values():
    
    490
    +            i.modified = False
    
    491
    +            if isinstance(i.buildstream_object, CasBasedDirectory):
    
    492
    +                i.buildstream_object.mark_unmodified()
    
    493
    +
    
    494
    +    def list_modified_paths(self):
    
    495
    +        """Provide a list of relative paths which have been modified since the
    
    496
    +        last call to mark_unmodified.
    
    497
    +
    
    498
    +        Return value: List(str) - list of modified paths
    
    499
    +        """
    
    500
    +
    
    501
    +        filelist = []
    
    502
    +        for (k, v) in self.index.items():
    
    503
    +            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    504
    +                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
    
    505
    +            elif isinstance(v.pb2_object, remote_execution_pb2.FileNode) and v.modified:
    
    506
    +                filelist.append(k)
    
    507
    +        return filelist
    
    508
    +
    
    509
    +    def list_relative_paths(self):
    
    510
    +        """Provide a list of all relative paths.
    
    511
    +
    
    512
    +        NOTE: This list is not in the same order as utils.list_relative_paths.
    
    513
    +
    
    514
    +        Return value: List(str) - list of all paths
    
    515
    +        """
    
    516
    +
    
    517
    +        filelist = []
    
    518
    +        for (k, v) in self.index.items():
    
    519
    +            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    520
    +                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
    
    521
    +            elif isinstance(v.pb2_object, remote_execution_pb2.FileNode):
    
    522
    +                filelist.append(k)
    
    523
    +        return filelist
    
    524
    +
    
    525
    +    def _get_identifier(self):
    
    526
    +        path = ""
    
    527
    +        if self.parent:
    
    528
    +            path = self.parent._get_identifier()
    
    529
    +        if self.filename:
    
    530
    +            path += "/" + self.filename
    
    531
    +        else:
    
    532
    +            path += "/" + self.common_name
    
    533
    +        return path
    
    534
    +
    
    535
    +    def __str__(self):
    
    536
    +        return "[CAS:{}]".format(self._get_identifier())
    
    537
    +
    
    538
    +    def get_underlying_directory(self):
    
    539
    +        """ There is no underlying directory for a CAS-backed directory, so
    
    540
    +        throw an exception. """
    
    541
    +        raise VirtualDirectoryError("get_underlying_directory was called on a CAS-backed directory," +
    
    542
    +                                    " which has no underlying directory.")

  • buildstream/storage/_filebaseddirectory.py
    ... ... @@ -95,7 +95,8 @@ class FileBasedDirectory(Directory):
    95 95
                 if create:
    
    96 96
                     new_path = os.path.join(self.external_directory, subdirectory_spec[0])
    
    97 97
                     os.makedirs(new_path, exist_ok=True)
    
    98
    -                return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
    
    98
    +                self.index[subdirectory_spec[0]] = FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
    
    99
    +                return self.index[subdirectory_spec[0]]
    
    99 100
                 else:
    
    100 101
                     error = "No entry called '{}' found in the directory rooted at {}"
    
    101 102
                     raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
    
    ... ... @@ -121,8 +122,12 @@ class FileBasedDirectory(Directory):
    121 122
     
    
    122 123
                 for f in import_result.files_written:
    
    123 124
                     os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
    
    125
    +        self.mark_changed()
    
    124 126
             return import_result
    
    125 127
     
    
    128
    +    def mark_changed(self):
    
    129
    +        self._directory_read = False
    
    130
    +
    
    126 131
         def set_deterministic_mtime(self):
    
    127 132
             _set_deterministic_mtime(self.external_directory)
    
    128 133
     
    

  • buildstream/storage/directory.py
    ... ... @@ -124,6 +124,14 @@ class Directory():
    124 124
             """
    
    125 125
             raise NotImplementedError()
    
    126 126
     
    
    127
    +    def mark_changed(self):
    
    128
    +        """ Mark this directory as having been changed outside this API. This
    
    129
    +        normally can only happen by calling the Sandbox's `run`
    
    130
    +        method.
    
    131
    +
    
    132
    +        """
    
    133
    +        raise NotImplementedError()
    
    134
    +
    
    127 135
         # Convenience functions
    
    128 136
         def is_empty(self):
    
    129 137
             """ Return true if this directory has no files, subdirectories or links in it.
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]