[Notes] [Git][BuildStream/buildstream][master] 9 commits: Convert uses of external_directory to get_underlying_directory()



Title: GitLab

Jim MacArthur pushed to branch master at BuildStream / buildstream

Commits:

15 changed files:

Changes:

  • buildstream/__init__.py
    ... ... @@ -30,6 +30,7 @@ if "_BST_COMPLETION" not in os.environ:
    30 30
         from .sandbox import Sandbox, SandboxFlags
    
    31 31
         from .plugin import Plugin
    
    32 32
         from .source import Source, SourceError, Consistency, SourceFetcher
    
    33
    -    from .element import Element, ElementError, Scope
    
    33
    +    from .element import Element, ElementError
    
    34
    +    from .element_enums import Scope
    
    34 35
         from .buildelement import BuildElement
    
    35 36
         from .scriptelement import ScriptElement

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -21,7 +21,7 @@ import os
    21 21
     import string
    
    22 22
     from collections import Mapping, namedtuple
    
    23 23
     
    
    24
    -from ..element import _KeyStrength
    
    24
    +from ..element_enums import _KeyStrength
    
    25 25
     from .._exceptions import ArtifactError, ImplError, LoadError, LoadErrorReason
    
    26 26
     from .._message import Message, MessageType
    
    27 27
     from .. import utils
    

  • buildstream/element.py
    ... ... @@ -78,7 +78,6 @@ import stat
    78 78
     import copy
    
    79 79
     from collections import Mapping, OrderedDict
    
    80 80
     from contextlib import contextmanager
    
    81
    -from enum import Enum
    
    82 81
     import tempfile
    
    83 82
     import shutil
    
    84 83
     
    
    ... ... @@ -98,41 +97,9 @@ from .plugin import CoreWarnings
    98 97
     from .sandbox._config import SandboxConfig
    
    99 98
     
    
    100 99
     from .storage.directory import Directory
    
    101
    -from .storage._filebaseddirectory import FileBasedDirectory, VirtualDirectoryError
    
    102
    -
    
    103
    -
    
    104
    -# _KeyStrength():
    
    105
    -#
    
    106
    -# Strength of cache key
    
    107
    -#
    
    108
    -class _KeyStrength(Enum):
    
    109
    -
    
    110
    -    # Includes strong cache keys of all build dependencies and their
    
    111
    -    # runtime dependencies.
    
    112
    -    STRONG = 1
    
    113
    -
    
    114
    -    # Includes names of direct build dependencies but does not include
    
    115
    -    # cache keys of dependencies.
    
    116
    -    WEAK = 2
    
    117
    -
    
    118
    -
    
    119
    -class Scope(Enum):
    
    120
    -    """Types of scope for a given element"""
    
    121
    -
    
    122
    -    ALL = 1
    
    123
    -    """All elements which the given element depends on, following
    
    124
    -    all elements required for building. Including the element itself.
    
    125
    -    """
    
    126
    -
    
    127
    -    BUILD = 2
    
    128
    -    """All elements required for building the element, including their
    
    129
    -    respective run dependencies. Not including the given element itself.
    
    130
    -    """
    
    131
    -
    
    132
    -    RUN = 3
    
    133
    -    """All elements required for running the element. Including the element
    
    134
    -    itself.
    
    135
    -    """
    
    100
    +from .storage._filebaseddirectory import FileBasedDirectory
    
    101
    +from .storage.directory import VirtualDirectoryError
    
    102
    +from .element_enums import _KeyStrength, Scope
    
    136 103
     
    
    137 104
     
    
    138 105
     class ElementError(BstError):
    

  • buildstream/element_enums.py
    1
    +#
    
    2
    +#  Copyright (C) 2018 Bloomberg LP
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Tristan Van Berkom <tristan vanberkom codethink co uk>
    
    19
    +#        Jim MacArthur <jim macarthur codethink co uk>
    
    20
    +
    
    21
    +"""
    
    22
    +Element - Globally visible enumerations
    
    23
    +=======================================
    
    24
    +
    
    25
    +"""
    
    26
    +
    
    27
    +from enum import Enum
    
    28
    +
    
    29
    +
    
    30
    +# _KeyStrength():
    
    31
    +#
    
    32
    +# Strength of cache key
    
    33
    +#
    
    34
    +class _KeyStrength(Enum):
    
    35
    +
    
    36
    +    # Includes strong cache keys of all build dependencies and their
    
    37
    +    # runtime dependencies.
    
    38
    +    STRONG = 1
    
    39
    +
    
    40
    +    # Includes names of direct build dependencies but does not include
    
    41
    +    # cache keys of dependencies.
    
    42
    +    WEAK = 2
    
    43
    +
    
    44
    +
    
    45
    +class Scope(Enum):
    
    46
    +    """Types of scope for a given element"""
    
    47
    +
    
    48
    +    ALL = 1
    
    49
    +    """All elements which the given element depends on, following
    
    50
    +    all elements required for building. Including the element itself.
    
    51
    +    """
    
    52
    +
    
    53
    +    BUILD = 2
    
    54
    +    """All elements required for building the element, including their
    
    55
    +    respective run dependencies. Not including the given element itself.
    
    56
    +    """
    
    57
    +
    
    58
    +    RUN = 3
    
    59
    +    """All elements required for running the element. Including the element
    
    60
    +    itself.
    
    61
    +    """

  • buildstream/sandbox/_mount.py
    ... ... @@ -32,8 +32,10 @@ from .._fuse import SafeHardlinks
    32 32
     class Mount():
    
    33 33
         def __init__(self, sandbox, mount_point, safe_hardlinks):
    
    34 34
             scratch_directory = sandbox._get_scratch_directory()
    
    35
    -        # Getting external_directory here is acceptable as we're part of the sandbox code.
    
    36
    -        root_directory = sandbox.get_virtual_directory().external_directory
    
    35
    +        # Getting _get_underlying_directory() here is acceptable as
    
    36
    +        # we're part of the sandbox code. This will fail if our
    
    37
    +        # directory is CAS-based.
    
    38
    +        root_directory = sandbox.get_virtual_directory()._get_underlying_directory()
    
    37 39
     
    
    38 40
             self.mount_point = mount_point
    
    39 41
             self.safe_hardlinks = safe_hardlinks
    

  • buildstream/sandbox/_sandboxbwrap.py
    ... ... @@ -58,7 +58,7 @@ class SandboxBwrap(Sandbox):
    58 58
             stdout, stderr = self._get_output()
    
    59 59
     
    
    60 60
             # Allowable access to underlying storage as we're part of the sandbox
    
    61
    -        root_directory = self.get_virtual_directory().external_directory
    
    61
    +        root_directory = self.get_virtual_directory()._get_underlying_directory()
    
    62 62
     
    
    63 63
             # Fallback to the sandbox default settings for
    
    64 64
             # the cwd and env.
    
    ... ... @@ -248,6 +248,7 @@ class SandboxBwrap(Sandbox):
    248 248
                             # a bug, bwrap mounted a tempfs here and when it exits, that better be empty.
    
    249 249
                             pass
    
    250 250
     
    
    251
    +        self._vdir._mark_changed()
    
    251 252
             return exit_code
    
    252 253
     
    
    253 254
         def run_bwrap(self, argv, stdin, stdout, stderr, interactive):
    

  • buildstream/sandbox/_sandboxchroot.py
    ... ... @@ -106,6 +106,7 @@ class SandboxChroot(Sandbox):
    106 106
                 status = self.chroot(rootfs, command, stdin, stdout,
    
    107 107
                                      stderr, cwd, env, flags)
    
    108 108
     
    
    109
    +        self._vdir._mark_changed()
    
    109 110
             return status
    
    110 111
     
    
    111 112
         # chroot()
    

  • buildstream/sandbox/sandbox.py
    ... ... @@ -31,6 +31,7 @@ See also: :ref:`sandboxing`.
    31 31
     import os
    
    32 32
     from .._exceptions import ImplError, BstError
    
    33 33
     from ..storage._filebaseddirectory import FileBasedDirectory
    
    34
    +from ..storage._casbaseddirectory import CasBasedDirectory
    
    34 35
     
    
    35 36
     
    
    36 37
     class SandboxFlags():
    
    ... ... @@ -105,6 +106,7 @@ class Sandbox():
    105 106
             self.__scratch = os.path.join(self.__directory, 'scratch')
    
    106 107
             for directory_ in [self._root, self.__scratch]:
    
    107 108
                 os.makedirs(directory_, exist_ok=True)
    
    109
    +        self._vdir = None
    
    108 110
     
    
    109 111
         def get_directory(self):
    
    110 112
             """Fetches the sandbox root directory
    
    ... ... @@ -133,8 +135,14 @@ class Sandbox():
    133 135
                (str): The sandbox root directory
    
    134 136
     
    
    135 137
             """
    
    136
    -        # For now, just create a new Directory every time we're asked
    
    137
    -        return FileBasedDirectory(self._root)
    
    138
    +        if not self._vdir:
    
    139
    +            # BST_CAS_DIRECTORIES is a deliberately hidden environment variable which
    
    140
    +            # can be used to switch on CAS-based directories for testing.
    
    141
    +            if 'BST_CAS_DIRECTORIES' in os.environ:
    
    142
    +                self._vdir = CasBasedDirectory(self.__context, ref=None)
    
    143
    +            else:
    
    144
    +                self._vdir = FileBasedDirectory(self._root)
    
    145
    +        return self._vdir
    
    138 146
     
    
    139 147
         def set_environment(self, environment):
    
    140 148
             """Sets the environment variables for the sandbox
    

  • buildstream/storage/_casbaseddirectory.py
    1
    +#
    
    2
    +#  Copyright (C) 2018 Bloomberg LP
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Jim MacArthur <jim macarthur codethink co uk>
    
    19
    +
    
    20
    +"""
    
    21
    +CasBasedDirectory
    
    22
    +=========
    
    23
    +
    
    24
    +Implementation of the Directory class which backs onto a Merkle-tree based content
    
    25
    +addressable storage system.
    
    26
    +
    
    27
    +See also: :ref:`sandboxing`.
    
    28
    +"""
    
    29
    +
    
    30
    +from collections import OrderedDict
    
    31
    +
    
    32
    +import os
    
    33
    +import tempfile
    
    34
    +import stat
    
    35
    +
    
    36
    +from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    37
    +from .._exceptions import BstError
    
    38
    +from .directory import Directory, VirtualDirectoryError
    
    39
    +from ._filebaseddirectory import FileBasedDirectory
    
    40
    +from ..utils import FileListResult, safe_copy, list_relative_paths
    
    41
    +from .._artifactcache.cascache import CASCache
    
    42
    +
    
    43
    +
    
    44
    +class IndexEntry():
    
    45
    +    """ Used in our index of names to objects to store the 'modified' flag
    
    46
    +    for directory entries. Because we need both the remote_execution_pb2 object
    
    47
    +    and our own Directory object for directory entries, we store both. For files
    
    48
    +    and symlinks, only pb_object is used. """
    
    49
    +    def __init__(self, pb_object, buildstream_object=None, modified=False):
    
    50
    +        self.pb_object = pb_object  # Short for 'protocol buffer object')
    
    51
    +        self.buildstream_object = buildstream_object
    
    52
    +        self.modified = modified
    
    53
    +
    
    54
    +
    
    55
    +# CasBasedDirectory intentionally doesn't call its superclass constuctor,
    
    56
    +# which is meant to be unimplemented.
    
    57
    +# pylint: disable=super-init-not-called
    
    58
    +
    
    59
    +class CasBasedDirectory(Directory):
    
    60
    +    """
    
    61
    +    CAS-based directories can have two names; one is a 'common name' which has no effect
    
    62
    +    on functionality, and the 'filename'. If a CasBasedDirectory has a parent, then 'filename'
    
    63
    +    must be the name of an entry in the parent directory's index which points to this object.
    
    64
    +    This is used to inform a parent directory that it must update the given hash for this
    
    65
    +    object when this object changes.
    
    66
    +
    
    67
    +    Typically a top-level CasBasedDirectory will have a common_name and no filename, and
    
    68
    +    subdirectories wil have a filename and no common_name. common_name can used to identify
    
    69
    +    CasBasedDirectory objects in a log file, since they have no unique position in a file
    
    70
    +    system.
    
    71
    +    """
    
    72
    +
    
    73
    +    # Two constants which define the separators used by the remote execution API.
    
    74
    +    _pb2_path_sep = "/"
    
    75
    +    _pb2_absolute_path_prefix = "/"
    
    76
    +
    
    77
    +    def __init__(self, context, ref=None, parent=None, common_name="untitled", filename=None):
    
    78
    +        self.context = context
    
    79
    +        self.cas_directory = os.path.join(context.artifactdir, 'cas')
    
    80
    +        self.filename = filename
    
    81
    +        self.common_name = common_name
    
    82
    +        self.pb2_directory = remote_execution_pb2.Directory()
    
    83
    +        self.cas_cache = CASCache(context)
    
    84
    +        if ref:
    
    85
    +            with open(self.cas_cache.objpath(ref), 'rb') as f:
    
    86
    +                self.pb2_directory.ParseFromString(f.read())
    
    87
    +
    
    88
    +        self.ref = ref
    
    89
    +        self.index = OrderedDict()
    
    90
    +        self.parent = parent
    
    91
    +        self._directory_read = False
    
    92
    +        self._populate_index()
    
    93
    +
    
    94
    +    def _populate_index(self):
    
    95
    +        if self._directory_read:
    
    96
    +            return
    
    97
    +        for entry in self.pb2_directory.directories:
    
    98
    +            buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
    
    99
    +                                                     parent=self, filename=entry.name)
    
    100
    +            self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
    
    101
    +        for entry in self.pb2_directory.files:
    
    102
    +            self.index[entry.name] = IndexEntry(entry)
    
    103
    +        for entry in self.pb2_directory.symlinks:
    
    104
    +            self.index[entry.name] = IndexEntry(entry)
    
    105
    +        self._directory_read = True
    
    106
    +
    
    107
    +    def _recalculate_recursing_up(self, caller=None):
    
    108
    +        """Recalcuate the hash for this directory and store the results in
    
    109
    +        the cache.  If this directory has a parent, tell it to
    
    110
    +        recalculate (since changing this directory changes an entry in
    
    111
    +        the parent).
    
    112
    +
    
    113
    +        """
    
    114
    +        self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    115
    +        if caller:
    
    116
    +            old_dir = self._find_pb2_entry(caller.filename)
    
    117
    +            self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
    
    118
    +        if self.parent:
    
    119
    +            self.parent._recalculate_recursing_up(self)
    
    120
    +
    
    121
    +    def _recalculate_recursing_down(self, parent=None):
    
    122
    +        """Recalcuate the hash for this directory and any
    
    123
    +        subdirectories. Hashes for subdirectories should be calculated
    
    124
    +        and stored after a significant operation (e.g. an
    
    125
    +        import_files() call) but not after adding each file, as that
    
    126
    +        is extremely wasteful.
    
    127
    +
    
    128
    +        """
    
    129
    +        for entry in self.pb2_directory.directories:
    
    130
    +            self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
    
    131
    +
    
    132
    +        if parent:
    
    133
    +            self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
    
    134
    +        else:
    
    135
    +            self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    136
    +        # We don't need to do anything more than that; files were already added ealier, and symlinks are
    
    137
    +        # part of the directory structure.
    
    138
    +
    
    139
    +    def _find_pb2_entry(self, name):
    
    140
    +        if name in self.index:
    
    141
    +            return self.index[name].pb_object
    
    142
    +        return None
    
    143
    +
    
    144
    +    def _find_self_in_parent(self):
    
    145
    +        assert self.parent is not None
    
    146
    +        parent = self.parent
    
    147
    +        for (k, v) in parent.index.items():
    
    148
    +            if v.buildstream_object == self:
    
    149
    +                return k
    
    150
    +        return None
    
    151
    +
    
    152
    +    def _add_directory(self, name):
    
    153
    +        if name in self.index:
    
    154
    +            newdir = self.index[name].buildstream_object
    
    155
    +            if not isinstance(newdir, CasBasedDirectory):
    
    156
    +                # TODO: This may not be an actual error; it may actually overwrite it
    
    157
    +                raise VirtualDirectoryError("New directory {} in {} would overwrite existing non-directory of type {}"
    
    158
    +                                            .format(name, str(self), type(newdir)))
    
    159
    +            dirnode = self._find_pb2_entry(name)
    
    160
    +        else:
    
    161
    +            newdir = CasBasedDirectory(self.context, parent=self, filename=name)
    
    162
    +            dirnode = self.pb2_directory.directories.add()
    
    163
    +
    
    164
    +        dirnode.name = name
    
    165
    +
    
    166
    +        # Calculate the hash for an empty directory
    
    167
    +        new_directory = remote_execution_pb2.Directory()
    
    168
    +        self.cas_cache.add_object(digest=dirnode.digest, buffer=new_directory.SerializeToString())
    
    169
    +        self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
    
    170
    +        return newdir
    
    171
    +
    
    172
    +    def _add_new_file(self, basename, filename):
    
    173
    +        filenode = self.pb2_directory.files.add()
    
    174
    +        filenode.name = filename
    
    175
    +        self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
    
    176
    +        is_executable = os.access(os.path.join(basename, filename), os.X_OK)
    
    177
    +        filenode.is_executable = is_executable
    
    178
    +        self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
    
    179
    +
    
    180
    +    def _add_new_link(self, basename, filename):
    
    181
    +        existing_link = self._find_pb2_entry(filename)
    
    182
    +        if existing_link:
    
    183
    +            symlinknode = existing_link
    
    184
    +        else:
    
    185
    +            symlinknode = self.pb2_directory.symlinks.add()
    
    186
    +        symlinknode.name = filename
    
    187
    +        # A symlink node has no digest.
    
    188
    +        symlinknode.target = os.readlink(os.path.join(basename, filename))
    
    189
    +        self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    190
    +
    
    191
    +    def delete_entry(self, name):
    
    192
    +        for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
    
    193
    +            if name in collection:
    
    194
    +                collection.remove(name)
    
    195
    +        if name in self.index:
    
    196
    +            del self.index[name]
    
    197
    +
    
    198
    +    def descend(self, subdirectory_spec, create=False):
    
    199
    +        """Descend one or more levels of directory hierarchy and return a new
    
    200
    +        Directory object for that directory.
    
    201
    +
    
    202
    +        Arguments:
    
    203
    +        * subdirectory_spec (list of strings): A list of strings which are all directory
    
    204
    +          names.
    
    205
    +        * create (boolean): If this is true, the directories will be created if
    
    206
    +          they don't already exist.
    
    207
    +
    
    208
    +        Note: At the moment, creating a directory by descending does
    
    209
    +        not update this object in the CAS cache. However, performing
    
    210
    +        an import_files() into a subdirectory of any depth obtained by
    
    211
    +        descending from this object *will* cause this directory to be
    
    212
    +        updated and stored.
    
    213
    +
    
    214
    +        """
    
    215
    +
    
    216
    +        # It's very common to send a directory name instead of a list and this causes
    
    217
    +        # bizarre errors, so check for it here
    
    218
    +        if not isinstance(subdirectory_spec, list):
    
    219
    +            subdirectory_spec = [subdirectory_spec]
    
    220
    +
    
    221
    +        # Because of the way split works, it's common to get a list which begins with
    
    222
    +        # an empty string. Detect these and remove them.
    
    223
    +        while subdirectory_spec and subdirectory_spec[0] == "":
    
    224
    +            subdirectory_spec.pop(0)
    
    225
    +
    
    226
    +        # Descending into [] returns the same directory.
    
    227
    +        if not subdirectory_spec:
    
    228
    +            return self
    
    229
    +
    
    230
    +        if subdirectory_spec[0] in self.index:
    
    231
    +            entry = self.index[subdirectory_spec[0]].buildstream_object
    
    232
    +            if isinstance(entry, CasBasedDirectory):
    
    233
    +                return entry.descend(subdirectory_spec[1:], create)
    
    234
    +            else:
    
    235
    +                error = "Cannot descend into {}, which is a '{}' in the directory {}"
    
    236
    +                raise VirtualDirectoryError(error.format(subdirectory_spec[0],
    
    237
    +                                                         type(entry).__name__,
    
    238
    +                                                         self))
    
    239
    +        else:
    
    240
    +            if create:
    
    241
    +                newdir = self._add_directory(subdirectory_spec[0])
    
    242
    +                return newdir.descend(subdirectory_spec[1:], create)
    
    243
    +            else:
    
    244
    +                error = "No entry called '{}' found in {}. There are directories called {}."
    
    245
    +                directory_list = ",".join([entry.name for entry in self.pb2_directory.directories])
    
    246
    +                raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self),
    
    247
    +                                                         directory_list))
    
    248
    +        return None
    
    249
    +
    
    250
    +    def find_root(self):
    
    251
    +        """ Finds the root of this directory tree by following 'parent' until there is
    
    252
    +        no parent. """
    
    253
    +        if self.parent:
    
    254
    +            return self.parent.find_root()
    
    255
    +        else:
    
    256
    +            return self
    
    257
    +
    
    258
    +    def _resolve_symlink_or_directory(self, name):
    
    259
    +        """Used only by _import_files_from_directory. Tries to resolve a
    
    260
    +        directory name or symlink name. 'name' must be an entry in this
    
    261
    +        directory. It must be a single symlink or directory name, not a path
    
    262
    +        separated by path separators. If it's an existing directory name, it
    
    263
    +        just returns the Directory object for that. If it's a symlink, it will
    
    264
    +        attempt to find the target of the symlink and return that as a
    
    265
    +        Directory object.
    
    266
    +
    
    267
    +        If a symlink target doesn't exist, it will attempt to create it
    
    268
    +        as a directory as long as it's within this directory tree.
    
    269
    +        """
    
    270
    +
    
    271
    +        if isinstance(self.index[name].buildstream_object, Directory):
    
    272
    +            return self.index[name].buildstream_object
    
    273
    +        # OK then, it's a symlink
    
    274
    +        symlink = self._find_pb2_entry(name)
    
    275
    +        absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
    
    276
    +        if absolute:
    
    277
    +            root = self.find_root()
    
    278
    +        else:
    
    279
    +            root = self
    
    280
    +        directory = root
    
    281
    +        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
    
    282
    +        for c in components:
    
    283
    +            if c == "..":
    
    284
    +                directory = directory.parent
    
    285
    +            else:
    
    286
    +                directory = directory.descend(c, create=True)
    
    287
    +        return directory
    
    288
    +
    
    289
    +    def _check_replacement(self, name, path_prefix, fileListResult):
    
    290
    +        """ Checks whether 'name' exists, and if so, whether we can overwrite it.
    
    291
    +        If we can, add the name to 'overwritten_files' and delete the existing entry.
    
    292
    +        Returns 'True' if the import should go ahead.
    
    293
    +        fileListResult.overwritten and fileListResult.ignore are updated depending
    
    294
    +        on the result. """
    
    295
    +        existing_entry = self._find_pb2_entry(name)
    
    296
    +        relative_pathname = os.path.join(path_prefix, name)
    
    297
    +        if existing_entry is None:
    
    298
    +            return True
    
    299
    +        if (isinstance(existing_entry,
    
    300
    +                       (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
    
    301
    +            fileListResult.overwritten.append(relative_pathname)
    
    302
    +            return True
    
    303
    +        elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
    
    304
    +            # If 'name' maps to a DirectoryNode, then there must be an entry in index
    
    305
    +            # pointing to another Directory.
    
    306
    +            if self.index[name].buildstream_object.is_empty():
    
    307
    +                self.delete_entry(name)
    
    308
    +                fileListResult.overwritten.append(relative_pathname)
    
    309
    +                return True
    
    310
    +            else:
    
    311
    +                # We can't overwrite a non-empty directory, so we just ignore it.
    
    312
    +                fileListResult.ignored.append(relative_pathname)
    
    313
    +                return False
    
    314
    +        assert False, ("Entry '{}' is not a recognised file/link/directory and not None; it is {}"
    
    315
    +                       .format(name, type(existing_entry)))
    
    316
    +        return False  # In case asserts are disabled
    
    317
    +
    
    318
    +    def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
    
    319
    +        """ _import_directory_recursively and _import_files_from_directory will be called alternately
    
    320
    +        as a directory tree is descended. """
    
    321
    +        if directory_name in self.index:
    
    322
    +            subdir = self._resolve_symlink_or_directory(directory_name)
    
    323
    +        else:
    
    324
    +            subdir = self._add_directory(directory_name)
    
    325
    +        new_path_prefix = os.path.join(path_prefix, directory_name)
    
    326
    +        subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
    
    327
    +                                                            [os.path.sep.join(remaining_path)],
    
    328
    +                                                            path_prefix=new_path_prefix)
    
    329
    +        return subdir_result
    
    330
    +
    
    331
    +    def _import_files_from_directory(self, source_directory, files, path_prefix=""):
    
    332
    +        """ Imports files from a traditional directory """
    
    333
    +        result = FileListResult()
    
    334
    +        for entry in sorted(files):
    
    335
    +            split_path = entry.split(os.path.sep)
    
    336
    +            # The actual file on the FS we're importing
    
    337
    +            import_file = os.path.join(source_directory, entry)
    
    338
    +            # The destination filename, relative to the root where the import started
    
    339
    +            relative_pathname = os.path.join(path_prefix, entry)
    
    340
    +            if len(split_path) > 1:
    
    341
    +                directory_name = split_path[0]
    
    342
    +                # Hand this off to the importer for that subdir. This will only do one file -
    
    343
    +                # a better way would be to hand off all the files in this subdir at once.
    
    344
    +                subdir_result = self._import_directory_recursively(directory_name, source_directory,
    
    345
    +                                                                   split_path[1:], path_prefix)
    
    346
    +                result.combine(subdir_result)
    
    347
    +            elif os.path.islink(import_file):
    
    348
    +                if self._check_replacement(entry, path_prefix, result):
    
    349
    +                    self._add_new_link(source_directory, entry)
    
    350
    +                    result.files_written.append(relative_pathname)
    
    351
    +            elif os.path.isdir(import_file):
    
    352
    +                # A plain directory which already exists isn't a problem; just ignore it.
    
    353
    +                if entry not in self.index:
    
    354
    +                    self._add_directory(entry)
    
    355
    +            elif os.path.isfile(import_file):
    
    356
    +                if self._check_replacement(entry, path_prefix, result):
    
    357
    +                    self._add_new_file(source_directory, entry)
    
    358
    +                    result.files_written.append(relative_pathname)
    
    359
    +        return result
    
    360
    +
    
    361
    +    def import_files(self, external_pathspec, *, files=None,
    
    362
    +                     report_written=True, update_utimes=False,
    
    363
    +                     can_link=False):
    
    364
    +        """Imports some or all files from external_path into this directory.
    
    365
    +
    
    366
    +        Keyword arguments: external_pathspec: Either a string
    
    367
    +        containing a pathname, or a Directory object, to use as the
    
    368
    +        source.
    
    369
    +
    
    370
    +        files (list of strings): A list of all the files relative to
    
    371
    +        the external_pathspec to copy. If 'None' is supplied, all
    
    372
    +        files are copied.
    
    373
    +
    
    374
    +        report_written (bool): Return the full list of files
    
    375
    +        written. Defaults to true. If false, only a list of
    
    376
    +        overwritten files is returned.
    
    377
    +
    
    378
    +        update_utimes (bool): Currently ignored, since CAS does not store utimes.
    
    379
    +
    
    380
    +        can_link (bool): Ignored, since hard links do not have any meaning within CAS.
    
    381
    +        """
    
    382
    +        if isinstance(external_pathspec, FileBasedDirectory):
    
    383
    +            source_directory = external_pathspec._get_underlying_directory()
    
    384
    +        elif isinstance(external_pathspec, CasBasedDirectory):
    
    385
    +            # TODO: This transfers from one CAS to another via the
    
    386
    +            # filesystem, which is very inefficient. Alter this so it
    
    387
    +            # transfers refs across directly.
    
    388
    +            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
    
    389
    +                external_pathspec.export_files(tmpdir)
    
    390
    +                if files is None:
    
    391
    +                    files = list_relative_paths(tmpdir)
    
    392
    +                result = self._import_files_from_directory(tmpdir, files=files)
    
    393
    +            return result
    
    394
    +        else:
    
    395
    +            source_directory = external_pathspec
    
    396
    +
    
    397
    +        if files is None:
    
    398
    +            files = list_relative_paths(source_directory)
    
    399
    +
    
    400
    +        # TODO: No notice is taken of report_written, update_utimes or can_link.
    
    401
    +        # Current behaviour is to fully populate the report, which is inefficient,
    
    402
    +        # but still correct.
    
    403
    +        result = self._import_files_from_directory(source_directory, files=files)
    
    404
    +
    
    405
    +        # We need to recalculate and store the hashes of all directories both
    
    406
    +        # up and down the tree; we have changed our directory by importing files
    
    407
    +        # which changes our hash and all our parents' hashes of us. The trees
    
    408
    +        # lower down need to be stored in the CAS as they are not automatically
    
    409
    +        # added during construction.
    
    410
    +        self._recalculate_recursing_down()
    
    411
    +        if self.parent:
    
    412
    +            self.parent._recalculate_recursing_up(self)
    
    413
    +        return result
    
    414
    +
    
    415
    +    def set_deterministic_mtime(self):
    
    416
    +        """ Sets a static modification time for all regular files in this directory.
    
    417
    +        Since we don't store any modification time, we don't need to do anything.
    
    418
    +        """
    
    419
    +        pass
    
    420
    +
    
    421
    +    def set_deterministic_user(self):
    
    422
    +        """ Sets all files in this directory to the current user's euid/egid.
    
    423
    +        We also don't store user data, so this can be ignored.
    
    424
    +        """
    
    425
    +        pass
    
    426
    +
    
    427
    +    def export_files(self, to_directory, *, can_link=False, can_destroy=False):
    
    428
    +        """Copies everything from this into to_directory, which must be the name
    
    429
    +        of a traditional filesystem directory.
    
    430
    +
    
    431
    +        Arguments:
    
    432
    +
    
    433
    +        to_directory (string): a path outside this directory object
    
    434
    +        where the contents will be copied to.
    
    435
    +
    
    436
    +        can_link (bool): Whether we can create hard links in to_directory
    
    437
    +        instead of copying.
    
    438
    +
    
    439
    +        can_destroy (bool): Whether we can destroy elements in this
    
    440
    +        directory to export them (e.g. by renaming them as the
    
    441
    +        target).
    
    442
    +
    
    443
    +        """
    
    444
    +
    
    445
    +        if not os.path.exists(to_directory):
    
    446
    +            os.mkdir(to_directory)
    
    447
    +
    
    448
    +        for entry in self.pb2_directory.directories:
    
    449
    +            if entry.name not in self.index:
    
    450
    +                raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
    
    451
    +                                            .format(str(self), entry.name))
    
    452
    +            if not self._directory_read:
    
    453
    +                raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
    
    454
    +            dest_dir = os.path.join(to_directory, entry.name)
    
    455
    +            if not os.path.exists(dest_dir):
    
    456
    +                os.mkdir(dest_dir)
    
    457
    +            target = self.descend([entry.name])
    
    458
    +            target.export_files(dest_dir)
    
    459
    +        for entry in self.pb2_directory.files:
    
    460
    +            # Extract the entry to a single file
    
    461
    +            dest_name = os.path.join(to_directory, entry.name)
    
    462
    +            src_name = self.cas_cache.objpath(entry.digest)
    
    463
    +            safe_copy(src_name, dest_name)
    
    464
    +            if entry.is_executable:
    
    465
    +                os.chmod(dest_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
    
    466
    +                         stat.S_IRGRP | stat.S_IXGRP |
    
    467
    +                         stat.S_IROTH | stat.S_IXOTH)
    
    468
    +        for entry in self.pb2_directory.symlinks:
    
    469
    +            src_name = os.path.join(to_directory, entry.name)
    
    470
    +            target_name = entry.target
    
    471
    +            try:
    
    472
    +                os.symlink(target_name, src_name)
    
    473
    +            except FileExistsError as e:
    
    474
    +                raise BstError(("Cannot create a symlink named {} pointing to {}." +
    
    475
    +                                " The original error was: {}").
    
    476
    +                               format(src_name, entry.target, e))
    
    477
    +
    
    478
    +    def export_to_tar(self, tarfile, destination_dir, mtime=0):
    
    479
    +        raise NotImplementedError()
    
    480
    +
    
    481
    +    def mark_changed(self):
    
    482
    +        """ It should not be possible to externally modify a CAS-based
    
    483
    +        directory at the moment."""
    
    484
    +        raise NotImplementedError()
    
    485
    +
    
    486
    +    def is_empty(self):
    
    487
    +        """ Return true if this directory has no files, subdirectories or links in it.
    
    488
    +        """
    
    489
    +        return len(self.index) == 0
    
    490
    +
    
    491
    +    def _mark_directory_unmodified(self):
    
    492
    +        # Marks all entries in this directory and all child directories as unmodified.
    
    493
    +        for i in self.index.values():
    
    494
    +            i.modified = False
    
    495
    +            if isinstance(i.buildstream_object, CasBasedDirectory):
    
    496
    +                i.buildstream_object._mark_directory_unmodified()
    
    497
    +
    
    498
    +    def _mark_entry_unmodified(self, name):
    
    499
    +        # Marks an entry as unmodified. If the entry is a directory, it will
    
    500
    +        # recursively mark all its tree as unmodified.
    
    501
    +        self.index[name].modified = False
    
    502
    +        if self.index[name].buildstream_object:
    
    503
    +            self.index[name].buildstream_object._mark_directory_unmodified()
    
    504
    +
    
    505
    +    def mark_unmodified(self):
    
    506
    +        """ Marks all files in this directory (recursively) as unmodified.
    
    507
    +        If we have a parent, we mark our own entry as unmodified in that parent's
    
    508
    +        index.
    
    509
    +        """
    
    510
    +        if self.parent:
    
    511
    +            self.parent._mark_entry_unmodified(self._find_self_in_parent())
    
    512
    +        else:
    
    513
    +            self._mark_directory_unmodified()
    
    514
    +
    
    515
    +    def list_modified_paths(self):
    
    516
    +        """Provide a list of relative paths which have been modified since the
    
    517
    +        last call to mark_unmodified.
    
    518
    +
    
    519
    +        Return value: List(str) - list of modified paths
    
    520
    +        """
    
    521
    +
    
    522
    +        filelist = []
    
    523
    +        for (k, v) in self.index.items():
    
    524
    +            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    525
    +                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
    
    526
    +            elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
    
    527
    +                filelist.append(k)
    
    528
    +        return filelist
    
    529
    +
    
    530
    +    def list_relative_paths(self):
    
    531
    +        """Provide a list of all relative paths.
    
    532
    +
    
    533
    +        NOTE: This list is not in the same order as utils.list_relative_paths.
    
    534
    +
    
    535
    +        Return value: List(str) - list of all paths
    
    536
    +        """
    
    537
    +
    
    538
    +        filelist = []
    
    539
    +        for (k, v) in self.index.items():
    
    540
    +            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    541
    +                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
    
    542
    +            elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
    
    543
    +                filelist.append(k)
    
    544
    +        return filelist
    
    545
    +
    
    546
    +    def _get_identifier(self):
    
    547
    +        path = ""
    
    548
    +        if self.parent:
    
    549
    +            path = self.parent._get_identifier()
    
    550
    +        if self.filename:
    
    551
    +            path += "/" + self.filename
    
    552
    +        else:
    
    553
    +            path += "/" + self.common_name
    
    554
    +        return path
    
    555
    +
    
    556
    +    def __str__(self):
    
    557
    +        return "[CAS:{}]".format(self._get_identifier())
    
    558
    +
    
    559
    +    def _get_underlying_directory(self):
    
    560
    +        """ There is no underlying directory for a CAS-backed directory, so
    
    561
    +        throw an exception. """
    
    562
    +        raise VirtualDirectoryError("_get_underlying_directory was called on a CAS-backed directory," +
    
    563
    +                                    " which has no underlying directory.")

  • buildstream/storage/_filebaseddirectory.py
    ... ... @@ -29,25 +29,12 @@ See also: :ref:`sandboxing`.
    29 29
     
    
    30 30
     import os
    
    31 31
     import time
    
    32
    -from .._exceptions import BstError, ErrorDomain
    
    33
    -from .directory import Directory
    
    32
    +from .directory import Directory, VirtualDirectoryError
    
    34 33
     from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
    
    35 34
     from ..utils import _set_deterministic_user, _set_deterministic_mtime
    
    36 35
     
    
    37
    -
    
    38
    -class VirtualDirectoryError(BstError):
    
    39
    -    """Raised by Directory functions when system calls fail.
    
    40
    -    This will be handled internally by the BuildStream core,
    
    41
    -    if you need to handle this error, then it should be reraised,
    
    42
    -    or either of the :class:`.ElementError` or :class:`.SourceError`
    
    43
    -    exceptions should be raised from this error.
    
    44
    -    """
    
    45
    -    def __init__(self, message, reason=None):
    
    46
    -        super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
    
    47
    -
    
    48
    -
    
    49 36
     # FileBasedDirectory intentionally doesn't call its superclass constuctor,
    
    50
    -# which is mean to be unimplemented.
    
    37
    +# which is meant to be unimplemented.
    
    51 38
     # pylint: disable=super-init-not-called
    
    52 39
     
    
    53 40
     
    
    ... ... @@ -108,7 +95,8 @@ class FileBasedDirectory(Directory):
    108 95
                 if create:
    
    109 96
                     new_path = os.path.join(self.external_directory, subdirectory_spec[0])
    
    110 97
                     os.makedirs(new_path, exist_ok=True)
    
    111
    -                return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
    
    98
    +                self.index[subdirectory_spec[0]] = FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
    
    99
    +                return self.index[subdirectory_spec[0]]
    
    112 100
                 else:
    
    113 101
                     error = "No entry called '{}' found in the directory rooted at {}"
    
    114 102
                     raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
    
    ... ... @@ -134,8 +122,12 @@ class FileBasedDirectory(Directory):
    134 122
     
    
    135 123
                 for f in import_result.files_written:
    
    136 124
                     os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
    
    125
    +        self._mark_changed()
    
    137 126
             return import_result
    
    138 127
     
    
    128
    +    def _mark_changed(self):
    
    129
    +        self._directory_read = False
    
    130
    +
    
    139 131
         def set_deterministic_mtime(self):
    
    140 132
             _set_deterministic_mtime(self.external_directory)
    
    141 133
     
    
    ... ... @@ -214,3 +206,8 @@ class FileBasedDirectory(Directory):
    214 206
             # which exposes the sandbox directory; we will have to assume for the time being
    
    215 207
             # that people will not abuse __str__.
    
    216 208
             return self.external_directory
    
    209
    +
    
    210
    +    def _get_underlying_directory(self) -> str:
    
    211
    +        """ Returns the underlying (real) file system directory this
    
    212
    +        object refers to. """
    
    213
    +        return self.external_directory

  • buildstream/storage/directory.py
    ... ... @@ -31,6 +31,19 @@ See also: :ref:`sandboxing`.
    31 31
     
    
    32 32
     """
    
    33 33
     
    
    34
    +from .._exceptions import BstError, ErrorDomain
    
    35
    +
    
    36
    +
    
    37
    +class VirtualDirectoryError(BstError):
    
    38
    +    """Raised by Directory functions when system calls fail.
    
    39
    +    This will be handled internally by the BuildStream core,
    
    40
    +    if you need to handle this error, then it should be reraised,
    
    41
    +    or either of the :class:`.ElementError` or :class:`.SourceError`
    
    42
    +    exceptions should be raised from this error.
    
    43
    +    """
    
    44
    +    def __init__(self, message, reason=None):
    
    45
    +        super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
    
    46
    +
    
    34 47
     
    
    35 48
     class Directory():
    
    36 49
         def __init__(self, external_directory=None):
    
    ... ... @@ -153,3 +166,13 @@ class Directory():
    153 166
     
    
    154 167
             """
    
    155 168
             raise NotImplementedError()
    
    169
    +
    
    170
    +    def _mark_changed(self):
    
    171
    +        """Internal function to mark this directory as having been changed
    
    172
    +        outside this API. This normally can only happen by calling the
    
    173
    +        Sandbox's `run` method. This does *not* mark everything as modified
    
    174
    +        (i.e. list_modified_paths will not necessarily return the same results
    
    175
    +        as list_relative_paths after calling this.)
    
    176
    +
    
    177
    +        """
    
    178
    +        raise NotImplementedError()

  • tests/sandboxes/storage-test/original/bin/bash
    1
    +This is the original /bin/bash.

  • tests/sandboxes/storage-test/original/bin/hello
    1
    +This is the original /bin/hello.

  • tests/sandboxes/storage-test/overlay/bin/bash
    1
    +This is the replacement /bin/bash.

  • tests/sandboxes/storage-tests.py
    1
    +import os
    
    2
    +import pytest
    
    3
    +
    
    4
    +from buildstream._exceptions import ErrorDomain
    
    5
    +
    
    6
    +from buildstream._context import Context
    
    7
    +from buildstream.storage._casbaseddirectory import CasBasedDirectory
    
    8
    +from buildstream.storage._filebaseddirectory import FileBasedDirectory
    
    9
    +
    
    10
    +DATA_DIR = os.path.join(
    
    11
    +    os.path.dirname(os.path.realpath(__file__)),
    
    12
    +    "storage-test"
    
    13
    +)
    
    14
    +
    
    15
    +
    
    16
    +def setup_backend(backend_class, tmpdir):
    
    17
    +    if backend_class == FileBasedDirectory:
    
    18
    +        return backend_class(os.path.join(tmpdir, "vdir"))
    
    19
    +    else:
    
    20
    +        context = Context()
    
    21
    +        context.artifactdir = os.path.join(tmpdir, "cas")
    
    22
    +        return backend_class(context)
    
    23
    +
    
    24
    +
    
    25
    +@pytest.mark.parametrize("backend", [
    
    26
    +    FileBasedDirectory, CasBasedDirectory])
    
    27
    +@pytest.mark.datafiles(DATA_DIR)
    
    28
    +def test_import(tmpdir, datafiles, backend):
    
    29
    +    original = os.path.join(str(datafiles), "original")
    
    30
    +
    
    31
    +    c = setup_backend(backend, str(tmpdir))
    
    32
    +
    
    33
    +    c.import_files(original)
    
    34
    +
    
    35
    +    assert("bin/bash" in c.list_relative_paths())
    
    36
    +    assert("bin/hello" in c.list_relative_paths())
    
    37
    +
    
    38
    +
    
    39
    +@pytest.mark.parametrize("backend", [
    
    40
    +    FileBasedDirectory, CasBasedDirectory])
    
    41
    +@pytest.mark.datafiles(DATA_DIR)
    
    42
    +def test_modified_file_list(tmpdir, datafiles, backend):
    
    43
    +    original = os.path.join(str(datafiles), "original")
    
    44
    +    overlay = os.path.join(str(datafiles), "overlay")
    
    45
    +
    
    46
    +    c = setup_backend(backend, str(tmpdir))
    
    47
    +
    
    48
    +    c.import_files(original)
    
    49
    +
    
    50
    +    c.mark_unmodified()
    
    51
    +
    
    52
    +    c.import_files(overlay)
    
    53
    +
    
    54
    +    print("List of all paths in imported results: {}".format(c.list_relative_paths()))
    
    55
    +    assert("bin/bash" in c.list_relative_paths())
    
    56
    +    assert("bin/bash" in c.list_modified_paths())
    
    57
    +    assert("bin/hello" not in c.list_modified_paths())



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]