[Notes] [Git][BuildStream/buildstream][jmac/cas_to_cas_v2] 3 commits: _casbaseddirectory.py: Enable direct CAS-to-CAS import.



Title: GitLab

Jim MacArthur pushed to branch jmac/cas_to_cas_v2 at BuildStream / buildstream

Commits:

2 changed files:

Changes:

  • buildstream/storage/_casbaseddirectory.py
    ... ... @@ -38,6 +38,8 @@ from .._exceptions import BstError
    38 38
     from .directory import Directory, VirtualDirectoryError
    
    39 39
     from ._filebaseddirectory import FileBasedDirectory
    
    40 40
     from ..utils import FileListResult, safe_copy, list_relative_paths
    
    41
    +from ..utils import FileListResult, safe_copy, list_relative_paths, _relative_symlink_target
    
    42
    +from .._artifactcache.cascache import CASCache
    
    41 43
     
    
    42 44
     
    
    43 45
     class IndexEntry():
    
    ... ... @@ -51,6 +53,24 @@ class IndexEntry():
    51 53
             self.modified = modified
    
    52 54
     
    
    53 55
     
    
    56
    +class ResolutionException(Exception):
    
    57
    +    """ Superclass of all exceptions that can be raised by
    
    58
    +    CasBasedDirectory._resolve. Should not be used outside this module. """
    
    59
    +    pass
    
    60
    +
    
    61
    +
    
    62
    +class InfiniteSymlinkException(ResolutionException):
    
    63
    +    """ Raised when an infinite symlink loop is found. """
    
    64
    +    pass
    
    65
    +
    
    66
    +
    
    67
    +class AbsoluteSymlinkException(ResolutionException):
    
    68
    +    """Raised if we try to follow an absolute symlink (i.e. one whose
    
    69
    +    target starts with the path separator) and we have disallowed
    
    70
    +    following such symlinks. """
    
    71
    +    pass
    
    72
    +
    
    73
    +
    
    54 74
     # CasBasedDirectory intentionally doesn't call its superclass constuctor,
    
    55 75
     # which is meant to be unimplemented.
    
    56 76
     # pylint: disable=super-init-not-called
    
    ... ... @@ -168,29 +188,34 @@ class CasBasedDirectory(Directory):
    168 188
             self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
    
    169 189
             return newdir
    
    170 190
     
    
    171
    -    def _add_new_file(self, basename, filename):
    
    191
    +    def _add_file(self, basename, filename, modified=False):
    
    172 192
             filenode = self.pb2_directory.files.add()
    
    173 193
             filenode.name = filename
    
    174 194
             self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
    
    175 195
             is_executable = os.access(os.path.join(basename, filename), os.X_OK)
    
    176 196
             filenode.is_executable = is_executable
    
    177
    -        self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
    
    197
    +        self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
    
    198
    +
    
    199
    +    def _copy_link_from_filesystem(self, basename, filename):
    
    200
    +        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
    
    178 201
     
    
    179
    -    def _add_new_link(self, basename, filename):
    
    180
    -        existing_link = self._find_pb2_entry(filename)
    
    202
    +    def _add_new_link_direct(self, name, target):
    
    203
    +        existing_link = self._find_pb2_entry(name)
    
    181 204
             if existing_link:
    
    182 205
                 symlinknode = existing_link
    
    183 206
             else:
    
    184 207
                 symlinknode = self.pb2_directory.symlinks.add()
    
    185
    -        symlinknode.name = filename
    
    208
    +        assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
    
    209
    +        symlinknode.name = name
    
    186 210
             # A symlink node has no digest.
    
    187
    -        symlinknode.target = os.readlink(os.path.join(basename, filename))
    
    188
    -        self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    211
    +        symlinknode.target = target
    
    212
    +        self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    189 213
     
    
    190 214
         def delete_entry(self, name):
    
    191 215
             for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
    
    192
    -            if name in collection:
    
    193
    -                collection.remove(name)
    
    216
    +            for thing in collection:
    
    217
    +                if thing.name == name:
    
    218
    +                    collection.remove(thing)
    
    194 219
             if name in self.index:
    
    195 220
                 del self.index[name]
    
    196 221
     
    
    ... ... @@ -231,9 +256,13 @@ class CasBasedDirectory(Directory):
    231 256
                 if isinstance(entry, CasBasedDirectory):
    
    232 257
                     return entry.descend(subdirectory_spec[1:], create)
    
    233 258
                 else:
    
    259
    +                # May be a symlink
    
    260
    +                target = self._resolve(subdirectory_spec[0], force_create=create)
    
    261
    +                if isinstance(target, CasBasedDirectory):
    
    262
    +                    return target
    
    234 263
                     error = "Cannot descend into {}, which is a '{}' in the directory {}"
    
    235 264
                     raise VirtualDirectoryError(error.format(subdirectory_spec[0],
    
    236
    -                                                         type(entry).__name__,
    
    265
    +                                                         type(self.index[subdirectory_spec[0]].pb_object).__name__,
    
    237 266
                                                              self))
    
    238 267
             else:
    
    239 268
                 if create:
    
    ... ... @@ -254,36 +283,112 @@ class CasBasedDirectory(Directory):
    254 283
             else:
    
    255 284
                 return self
    
    256 285
     
    
    257
    -    def _resolve_symlink_or_directory(self, name):
    
    258
    -        """Used only by _import_files_from_directory. Tries to resolve a
    
    259
    -        directory name or symlink name. 'name' must be an entry in this
    
    260
    -        directory. It must be a single symlink or directory name, not a path
    
    261
    -        separated by path separators. If it's an existing directory name, it
    
    262
    -        just returns the Directory object for that. If it's a symlink, it will
    
    263
    -        attempt to find the target of the symlink and return that as a
    
    264
    -        Directory object.
    
    265
    -
    
    266
    -        If a symlink target doesn't exist, it will attempt to create it
    
    267
    -        as a directory as long as it's within this directory tree.
    
    286
    +    def _resolve(self, name, absolute_symlinks_resolve=True, force_create=False, seen_objects=None):
    
    287
    +        """Resolves any name to an object. If the name points to a symlink in
    
    288
    +        this directory, it returns the thing it points to,
    
    289
    +        recursively.
    
    290
    +
    
    291
    +        Returns a CasBasedDirectory, FileNode or None. None indicates
    
    292
    +        either that 'target' does not exist in this directory, or is a
    
    293
    +        symlink chain which points to a nonexistent name (broken
    
    294
    +        symlink).
    
    295
    +
    
    296
    +
    
    297
    +        Raises:
    
    298
    +        - InfiniteSymlinkException if 'name' points to an infinite symlink loop.
    
    299
    +        - AbsoluteSymlinkException if 'name' points to an absolute symlink and absolute_symlinks_resolve is False.
    
    300
    +
    
    301
    +        If force_create is on, this will attempt to create directories to make symlinks and directories resolve.
    
    302
    +        If force_create is off, this will never alter this directory.
    
    303
    +
    
    268 304
             """
    
    269 305
     
    
    270
    -        if isinstance(self.index[name].buildstream_object, Directory):
    
    271
    -            return self.index[name].buildstream_object
    
    272
    -        # OK then, it's a symlink
    
    273
    -        symlink = self._find_pb2_entry(name)
    
    306
    +        if name not in self.index:
    
    307
    +            return None
    
    308
    +
    
    309
    +        # First check if it's a normal object and return that
    
    310
    +        index_entry = self.index[name]
    
    311
    +        if isinstance(index_entry.buildstream_object, Directory):
    
    312
    +            return index_entry.buildstream_object
    
    313
    +        elif isinstance(index_entry.pb_object, remote_execution_pb2.FileNode):
    
    314
    +            return index_entry.pb_object
    
    315
    +
    
    316
    +        assert isinstance(index_entry.pb_object, remote_execution_pb2.SymlinkNode)
    
    317
    +
    
    318
    +        if seen_objects is None:
    
    319
    +            seen_objects = [index_entry.pb_object]
    
    320
    +        else:
    
    321
    +            if index_entry.pb_object in seen_objects:
    
    322
    +                # Infinite symlink loop detected
    
    323
    +                raise InfiniteSymlinkException()
    
    324
    +
    
    325
    +        symlink = index_entry.pb_object
    
    326
    +        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
    
    327
    +
    
    274 328
             absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
    
    275 329
             if absolute:
    
    276
    -            root = self.find_root()
    
    330
    +            if absolute_symlinks_resolve:
    
    331
    +                start_directory = self.find_root()
    
    332
    +                # Discard the first empty element
    
    333
    +                components.pop(0)
    
    334
    +            else:
    
    335
    +                # Unresolvable absolute symlink
    
    336
    +                raise AbsoluteSymlinkException()
    
    277 337
             else:
    
    278
    -            root = self
    
    279
    -        directory = root
    
    280
    -        components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
    
    281
    -        for c in components:
    
    282
    -            if c == "..":
    
    283
    -                directory = directory.parent
    
    338
    +            start_directory = self
    
    339
    +
    
    340
    +        directory = start_directory
    
    341
    +        while True:
    
    342
    +            if not components:
    
    343
    +                # If we run out of components, the directory we're currently in
    
    344
    +                # is the resolved component.
    
    345
    +                return directory
    
    346
    +
    
    347
    +            c = components.pop(0)
    
    348
    +            if c == ".":
    
    349
    +                pass
    
    350
    +            elif c == "..":
    
    351
    +                if directory.parent is not None:
    
    352
    +                    directory = directory.parent
    
    353
    +                # If directory.parent *is* None, this is an attempt to access
    
    354
    +                # '..' from the root, which is valid under POSIX; it just
    
    355
    +                # returns the root.
    
    356
    +            elif c in directory.index:
    
    357
    +                # Recursive resolve and continue
    
    358
    +                try:
    
    359
    +                    f = directory._resolve(c, absolute_symlinks_resolve, seen_objects=seen_objects,
    
    360
    +                                           force_create=force_create)
    
    361
    +                except ResolutionException as e:
    
    362
    +                    return None
    
    363
    +                if isinstance(f, CasBasedDirectory):
    
    364
    +                    directory = f
    
    365
    +                elif isinstance(f, remote_execution_pb2.FileNode):
    
    366
    +                    if components:
    
    367
    +                        # We have components still to resolve, but one of the path components
    
    368
    +                        # is a file.
    
    369
    +                        if force_create:
    
    370
    +                            self.delete_entry(c)
    
    371
    +                            directory = directory.descend(c, create=True)
    
    372
    +                        else:
    
    373
    +                            errormsg = ("Reached a file called {} while trying to resolve a symlink; " +
    
    374
    +                                        "cannot proceed. The remaining path components are {}.")
    
    375
    +                            raise ResolutionException(errormsg.format(c, components))
    
    376
    +                    else:
    
    377
    +                        # It's a file, and there's no path components left, so just return that.
    
    378
    +                        return f
    
    379
    +                else:
    
    380
    +                    # f was not found, or wasn't resolvable
    
    381
    +                    if force_create:
    
    382
    +                        directory = directory.descend(c, create=True)
    
    383
    +                    else:
    
    384
    +                        return None
    
    284 385
                 else:
    
    285
    -                directory = directory.descend(c, create=True)
    
    286
    -        return directory
    
    386
    +                # c is not in our index
    
    387
    +                if force_create:
    
    388
    +                    directory = directory.descend(c, create=True)
    
    389
    +                else:
    
    390
    +                    return None
    
    391
    +        # You can only exit the while loop with a return, or exception, so you shouldn't be here.
    
    287 392
     
    
    288 393
         def _check_replacement(self, name, path_prefix, fileListResult):
    
    289 394
             """ Checks whether 'name' exists, and if so, whether we can overwrite it.
    
    ... ... @@ -297,6 +402,7 @@ class CasBasedDirectory(Directory):
    297 402
                 return True
    
    298 403
             if (isinstance(existing_entry,
    
    299 404
                            (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
    
    405
    +            self.delete_entry(name)
    
    300 406
                 fileListResult.overwritten.append(relative_pathname)
    
    301 407
                 return True
    
    302 408
             elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
    
    ... ... @@ -314,23 +420,44 @@ class CasBasedDirectory(Directory):
    314 420
                            .format(name, type(existing_entry)))
    
    315 421
             return False  # In case asserts are disabled
    
    316 422
     
    
    317
    -    def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
    
    318
    -        """ _import_directory_recursively and _import_files_from_directory will be called alternately
    
    319
    -        as a directory tree is descended. """
    
    320
    -        if directory_name in self.index:
    
    321
    -            subdir = self._resolve_symlink_or_directory(directory_name)
    
    322
    -        else:
    
    323
    -            subdir = self._add_directory(directory_name)
    
    324
    -        new_path_prefix = os.path.join(path_prefix, directory_name)
    
    325
    -        subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
    
    326
    -                                                            [os.path.sep.join(remaining_path)],
    
    327
    -                                                            path_prefix=new_path_prefix)
    
    328
    -        return subdir_result
    
    423
    +    def _replace_anything_with_dir(self, name, path_prefix, overwritten_files_list):
    
    424
    +        self.delete_entry(name)
    
    425
    +        subdir = self._add_directory(name)
    
    426
    +        overwritten_files_list.append(os.path.join(path_prefix, name))
    
    427
    +        return subdir
    
    329 428
     
    
    330 429
         def _import_files_from_directory(self, source_directory, files, path_prefix=""):
    
    331
    -        """ Imports files from a traditional directory """
    
    430
    +        """ Imports files from a traditional directory. """
    
    431
    +
    
    432
    +        def _ensure_followable(name, path_prefix):
    
    433
    +            """ Makes sure 'name' is a directory or symlink to a directory which can be descended into. """
    
    434
    +            if isinstance(self.index[name].buildstream_object, Directory):
    
    435
    +                return self.descend(name)
    
    436
    +            try:
    
    437
    +                target = self._resolve(name, force_create=True)
    
    438
    +            except InfiniteSymlinkException:
    
    439
    +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
    
    440
    +            if isinstance(target, CasBasedDirectory):
    
    441
    +                return target
    
    442
    +            elif isinstance(target, remote_execution_pb2.FileNode):
    
    443
    +                return self._replace_anything_with_dir(name, path_prefix, result.overwritten)
    
    444
    +            return target
    
    445
    +
    
    446
    +        def _import_directory_recursively(directory_name, source_directory, remaining_path, path_prefix):
    
    447
    +            """ _import_directory_recursively and _import_files_from_directory will be called alternately
    
    448
    +            as a directory tree is descended. """
    
    449
    +            if directory_name in self.index:
    
    450
    +                subdir = _ensure_followable(directory_name, path_prefix)
    
    451
    +            else:
    
    452
    +                subdir = self._add_directory(directory_name)
    
    453
    +            new_path_prefix = os.path.join(path_prefix, directory_name)
    
    454
    +            subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
    
    455
    +                                                                [os.path.sep.join(remaining_path)],
    
    456
    +                                                                path_prefix=new_path_prefix)
    
    457
    +            return subdir_result
    
    458
    +
    
    332 459
             result = FileListResult()
    
    333
    -        for entry in sorted(files):
    
    460
    +        for entry in files:
    
    334 461
                 split_path = entry.split(os.path.sep)
    
    335 462
                 # The actual file on the FS we're importing
    
    336 463
                 import_file = os.path.join(source_directory, entry)
    
    ... ... @@ -338,14 +465,18 @@ class CasBasedDirectory(Directory):
    338 465
                 relative_pathname = os.path.join(path_prefix, entry)
    
    339 466
                 if len(split_path) > 1:
    
    340 467
                     directory_name = split_path[0]
    
    341
    -                # Hand this off to the importer for that subdir. This will only do one file -
    
    342
    -                # a better way would be to hand off all the files in this subdir at once.
    
    343
    -                subdir_result = self._import_directory_recursively(directory_name, source_directory,
    
    344
    -                                                                   split_path[1:], path_prefix)
    
    468
    +                # Hand this off to the importer for that subdir.
    
    469
    +
    
    470
    +                # It would be advantageous to batch these together by
    
    471
    +                # directory_name. However, we can't do it out of
    
    472
    +                # order, since importing symlinks affects the results
    
    473
    +                # of other imports.
    
    474
    +                subdir_result = _import_directory_recursively(directory_name, source_directory,
    
    475
    +                                                              split_path[1:], path_prefix)
    
    345 476
                     result.combine(subdir_result)
    
    346 477
                 elif os.path.islink(import_file):
    
    347 478
                     if self._check_replacement(entry, path_prefix, result):
    
    348
    -                    self._add_new_link(source_directory, entry)
    
    479
    +                    self._copy_link_from_filesystem(source_directory, entry)
    
    349 480
                         result.files_written.append(relative_pathname)
    
    350 481
                 elif os.path.isdir(import_file):
    
    351 482
                     # A plain directory which already exists isn't a problem; just ignore it.
    
    ... ... @@ -353,10 +484,85 @@ class CasBasedDirectory(Directory):
    353 484
                         self._add_directory(entry)
    
    354 485
                 elif os.path.isfile(import_file):
    
    355 486
                     if self._check_replacement(entry, path_prefix, result):
    
    356
    -                    self._add_new_file(source_directory, entry)
    
    487
    +                    self._add_file(source_directory, entry, modified=relative_pathname in result.overwritten)
    
    357 488
                         result.files_written.append(relative_pathname)
    
    358 489
             return result
    
    359 490
     
    
    491
    +    def _files_in_subdir(sorted_files, dirname):
    
    492
    +        """Filters sorted_files and returns only the ones which have
    
    493
    +           'dirname' as a prefix, with that prefix removed.
    
    494
    +
    
    495
    +        """
    
    496
    +        if not dirname.endswith(os.path.sep):
    
    497
    +            dirname += os.path.sep
    
    498
    +        return [f[len(dirname):] for f in sorted_files if f.startswith(dirname)]
    
    499
    +
    
    500
    +    def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
    
    501
    +        """ Import only the files and symlinks listed in 'files' from source_directory to this one.
    
    502
    +        Args:
    
    503
    +           source_directory (:class:`.CasBasedDirectory`): The directory to import from
    
    504
    +           files ([str]): List of pathnames to import.
    
    505
    +           path_prefix (str): Prefix used to add entries to the file list result.
    
    506
    +           file_list_required: Whether to update the file list while processing.
    
    507
    +        """
    
    508
    +        result = FileListResult()
    
    509
    +        processed_directories = set()
    
    510
    +        for f in files:
    
    511
    +            fullname = os.path.join(path_prefix, f)
    
    512
    +            components = f.split(os.path.sep)
    
    513
    +            if len(components) > 1:
    
    514
    +                # We are importing a thing which is in a subdirectory. We may have already seen this dirname
    
    515
    +                # for a previous file.
    
    516
    +                dirname = components[0]
    
    517
    +                if dirname not in processed_directories:
    
    518
    +                    # Now strip off the first directory name and import files recursively.
    
    519
    +                    subcomponents = CasBasedDirectory._files_in_subdir(files, dirname)
    
    520
    +                    # We will fail at this point if there is a file or symlink to file called 'dirname'.
    
    521
    +                    if dirname in self.index:
    
    522
    +                        resolved_component = self._resolve(dirname, force_create=True)
    
    523
    +                        if isinstance(resolved_component, remote_execution_pb2.FileNode):
    
    524
    +                            dest_subdir = self._replace_anything_with_dir(dirname, path_prefix, result.overwritten)
    
    525
    +                        else:
    
    526
    +                            dest_subdir = resolved_component
    
    527
    +                    else:
    
    528
    +                        dest_subdir = self.descend(dirname, create=True)
    
    529
    +                    src_subdir = source_directory.descend(dirname)
    
    530
    +                    import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
    
    531
    +                                                                             path_prefix=fullname,
    
    532
    +                                                                             file_list_required=file_list_required)
    
    533
    +                    result.combine(import_result)
    
    534
    +                processed_directories.add(dirname)
    
    535
    +            elif isinstance(source_directory.index[f].buildstream_object, CasBasedDirectory):
    
    536
    +                # The thing in the input file list is a directory on
    
    537
    +                # its own. We don't need to do anything other than create it if it doesn't exist.
    
    538
    +                # If we already have an entry with the same name that isn't a directory, that
    
    539
    +                # will be dealt with when importing files in this directory.
    
    540
    +                if f not in self.index:
    
    541
    +                    self.descend(f, create=True)
    
    542
    +            else:
    
    543
    +                # We're importing a file or symlink - replace anything with the same name.
    
    544
    +                importable = self._check_replacement(f, path_prefix, result)
    
    545
    +                if importable:
    
    546
    +                    item = source_directory.index[f].pb_object
    
    547
    +                    if isinstance(item, remote_execution_pb2.FileNode):
    
    548
    +                        filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
    
    549
    +                                                                is_executable=item.is_executable)
    
    550
    +                        self.index[f] = IndexEntry(filenode, modified=True)
    
    551
    +                    else:
    
    552
    +                        assert(isinstance(item, remote_execution_pb2.SymlinkNode))
    
    553
    +                        self._add_new_link_direct(name=f, target=item.target)
    
    554
    +                else:
    
    555
    +                    result.ignored.append(os.path.join(path_prefix, f))
    
    556
    +        return result
    
    557
    +
    
    558
    +    def _import_cas_into_cas(self, source_directory, files=None):
    
    559
    +        """ A full import is significantly quicker than a partial import, because we can just
    
    560
    +        replace one directory with another's hash, without doing any recursion.
    
    561
    +        """
    
    562
    +
    
    563
    +        # You must pass a list into _partial_import (not a generator)
    
    564
    +        return self._partial_import_cas_into_cas(source_directory, list(files))
    
    565
    +
    
    360 566
         def import_files(self, external_pathspec, *, files=None,
    
    361 567
                          report_written=True, update_utimes=False,
    
    362 568
                          can_link=False):
    
    ... ... @@ -378,28 +584,27 @@ class CasBasedDirectory(Directory):
    378 584
     
    
    379 585
             can_link (bool): Ignored, since hard links do not have any meaning within CAS.
    
    380 586
             """
    
    381
    -        if isinstance(external_pathspec, FileBasedDirectory):
    
    382
    -            source_directory = external_pathspec._get_underlying_directory()
    
    383
    -        elif isinstance(external_pathspec, CasBasedDirectory):
    
    384
    -            # TODO: This transfers from one CAS to another via the
    
    385
    -            # filesystem, which is very inefficient. Alter this so it
    
    386
    -            # transfers refs across directly.
    
    387
    -            with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
    
    388
    -                external_pathspec.export_files(tmpdir)
    
    389
    -                if files is None:
    
    390
    -                    files = list_relative_paths(tmpdir)
    
    391
    -                result = self._import_files_from_directory(tmpdir, files=files)
    
    392
    -            return result
    
    393
    -        else:
    
    394
    -            source_directory = external_pathspec
    
    395 587
     
    
    396 588
             if files is None:
    
    397
    -            files = list_relative_paths(source_directory)
    
    589
    +            if isinstance(external_pathspec, str):
    
    590
    +                files = list_relative_paths(external_pathspec)
    
    591
    +            else:
    
    592
    +                assert isinstance(external_pathspec, Directory)
    
    593
    +                files = external_pathspec.list_relative_paths()
    
    594
    +
    
    595
    +        if isinstance(external_pathspec, FileBasedDirectory):
    
    596
    +            source_directory = external_pathspec.get_underlying_directory()
    
    597
    +            result = self._import_files_from_directory(source_directory, files=files)
    
    598
    +        elif isinstance(external_pathspec, str):
    
    599
    +            source_directory = external_pathspec
    
    600
    +            result = self._import_files_from_directory(source_directory, files=files)
    
    601
    +        else:
    
    602
    +            assert isinstance(external_pathspec, CasBasedDirectory)
    
    603
    +            result = self._import_cas_into_cas(external_pathspec, files=files)
    
    398 604
     
    
    399 605
             # TODO: No notice is taken of report_written, update_utimes or can_link.
    
    400 606
             # Current behaviour is to fully populate the report, which is inefficient,
    
    401 607
             # but still correct.
    
    402
    -        result = self._import_files_from_directory(source_directory, files=files)
    
    403 608
     
    
    404 609
             # We need to recalculate and store the hashes of all directories both
    
    405 610
             # up and down the tree; we have changed our directory by importing files
    
    ... ... @@ -511,6 +716,28 @@ class CasBasedDirectory(Directory):
    511 716
             else:
    
    512 717
                 self._mark_directory_unmodified()
    
    513 718
     
    
    719
    +    def _lightweight_resolve_to_index(self, path):
    
    720
    +        """A lightweight function for transforming paths into IndexEntry
    
    721
    +        objects. This does not follow symlinks.
    
    722
    +
    
    723
    +        path: The string to resolve. This should be a series of path
    
    724
    +        components separated by the protocol buffer path separator
    
    725
    +        _pb2_path_sep.
    
    726
    +
    
    727
    +        Returns: the IndexEntry found, or None if any of the path components were not present.
    
    728
    +
    
    729
    +        """
    
    730
    +        directory = self
    
    731
    +        path_components = path.split(_pb2_path_sep)
    
    732
    +        for component in path_components[:-1]:
    
    733
    +            if component not in directory.index:
    
    734
    +                return None
    
    735
    +            if isinstance(directory.index[component].buildstream_object, CasBasedDirectory):
    
    736
    +                directory = directory.index[component].buildstream_object
    
    737
    +            else:
    
    738
    +                return None
    
    739
    +        return directory.index.get(path_components[-1], None)
    
    740
    +
    
    514 741
         def list_modified_paths(self):
    
    515 742
             """Provide a list of relative paths which have been modified since the
    
    516 743
             last call to mark_unmodified.
    
    ... ... @@ -518,29 +745,43 @@ class CasBasedDirectory(Directory):
    518 745
             Return value: List(str) - list of modified paths
    
    519 746
             """
    
    520 747
     
    
    521
    -        filelist = []
    
    522
    -        for (k, v) in self.index.items():
    
    523
    -            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    524
    -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
    
    525
    -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
    
    526
    -                filelist.append(k)
    
    527
    -        return filelist
    
    748
    +        for p in self.list_relative_paths():
    
    749
    +            i = self._lightweight_resolve_to_index(p)
    
    750
    +            if i and i.modified:
    
    751
    +                yield p
    
    528 752
     
    
    529
    -    def list_relative_paths(self):
    
    753
    +    def list_relative_paths(self, relpath=""):
    
    530 754
             """Provide a list of all relative paths.
    
    531 755
     
    
    532
    -        NOTE: This list is not in the same order as utils.list_relative_paths.
    
    533
    -
    
    534 756
             Return value: List(str) - list of all paths
    
    535 757
             """
    
    536 758
     
    
    537
    -        filelist = []
    
    538
    -        for (k, v) in self.index.items():
    
    539
    -            if isinstance(v.buildstream_object, CasBasedDirectory):
    
    540
    -                filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
    
    541
    -            elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
    
    542
    -                filelist.append(k)
    
    543
    -        return filelist
    
    759
    +        symlink_list = filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.SymlinkNode),
    
    760
    +                              self.index.items())
    
    761
    +        file_list = list(filter(lambda i: isinstance(i[1].pb_object, remote_execution_pb2.FileNode),
    
    762
    +                                self.index.items()))
    
    763
    +        directory_list = filter(lambda i: isinstance(i[1].buildstream_object, CasBasedDirectory),
    
    764
    +                                self.index.items())
    
    765
    +
    
    766
    +        # We need to mimic the behaviour of os.walk, in which symlinks
    
    767
    +        # to directories count as directories and symlinks to file or
    
    768
    +        # broken symlinks count as files. os.walk doesn't follow
    
    769
    +        # symlinks, so we don't recurse.
    
    770
    +        for (k, v) in sorted(symlink_list):
    
    771
    +            target = self._resolve(k, absolute_symlinks_resolve=True)
    
    772
    +            if isinstance(target, CasBasedDirectory):
    
    773
    +                yield os.path.join(relpath, k)
    
    774
    +            else:
    
    775
    +                file_list.append((k, v))
    
    776
    +
    
    777
    +        if file_list == [] and relpath != "":
    
    778
    +            yield relpath
    
    779
    +        else:
    
    780
    +            for (k, v) in sorted(file_list):
    
    781
    +                yield os.path.join(relpath, k)
    
    782
    +
    
    783
    +        for (k, v) in sorted(directory_list):
    
    784
    +            yield from v.buildstream_object.list_relative_paths(relpath=os.path.join(relpath, k))
    
    544 785
     
    
    545 786
         def recalculate_hash(self):
    
    546 787
             """ Recalcuates the hash for this directory and store the results in
    

  • tests/storage/virtual_directory_import.py
    1
    +import os
    
    2
    +import pytest
    
    3
    +import random
    
    4
    +import copy
    
    5
    +import tempfile
    
    6
    +from tests.testutils import cli
    
    7
    +
    
    8
    +
    
    9
    +from buildstream.storage._casbaseddirectory import CasBasedDirectory
    
    10
    +from buildstream.storage._filebaseddirectory import FileBasedDirectory
    
    11
    +from buildstream._artifactcache import ArtifactCache
    
    12
    +from buildstream._artifactcache.cascache import CASCache
    
    13
    +from buildstream import utils
    
    14
    +
    
    15
    +
    
    16
    +class FakeContext():
    
    17
    +    def __init__(self):
    
    18
    +        self.config_cache_quota = "65536"
    
    19
    +        self.artifactdir = ""
    
    20
    +
    
    21
    +    def get_projects(self):
    
    22
    +        return []
    
    23
    +
    
    24
    +# This is a set of example file system contents. The test attempts to import
    
    25
    +# each on top of each other to test importing works consistently.
    
    26
    +# Each tuple is defined as (<filename>, <type>, <content>). Type can be
    
    27
    +# 'F' (file), 'S' (symlink) or 'D' (directory) with content being the contents
    
    28
    +# for a file or the destination for a symlink.
    
    29
    +root_filesets = [
    
    30
    +    # Arbitrary test sets
    
    31
    +    [('a/b/c/textfile1', 'F', 'This is textfile 1\n')],
    
    32
    +    [('a/b/c/textfile1', 'F', 'This is the replacement textfile 1\n')],
    
    33
    +    [('a/b/d', 'D', '')],
    
    34
    +    [('a/b/c', 'S', '/a/b/d')],
    
    35
    +    [('a/b/d', 'S', '/a/b/c')],
    
    36
    +    [('a/b/d', 'D', ''), ('a/b/c', 'S', '/a/b/d')],
    
    37
    +    [('a/b/c', 'D', ''), ('a/b/d', 'S', '/a/b/c')],
    
    38
    +    [('a/b', 'F', 'This is textfile 1\n')],
    
    39
    +    [('a/b/c', 'F', 'This is textfile 1\n')],
    
    40
    +    [('a/b/c', 'D', '')]
    
    41
    +]
    
    42
    +
    
    43
    +empty_hash_ref = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
    
    44
    +RANDOM_SEED = 69105
    
    45
    +
    
    46
    +
    
    47
    +def generate_import_roots(rootno, directory):
    
    48
    +    rootname = "root{}".format(rootno)
    
    49
    +    rootdir = os.path.join(directory, "content", rootname)
    
    50
    +
    
    51
    +    for (path, typesymbol, content) in root_filesets[rootno - 1]:
    
    52
    +        if typesymbol == 'F':
    
    53
    +            (dirnames, filename) = os.path.split(path)
    
    54
    +            os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
    
    55
    +            with open(os.path.join(rootdir, dirnames, filename), "wt") as f:
    
    56
    +                f.write(content)
    
    57
    +        elif typesymbol == 'D':
    
    58
    +            os.makedirs(os.path.join(rootdir, path), exist_ok=True)
    
    59
    +        elif typesymbol == 'S':
    
    60
    +            (dirnames, filename) = os.path.split(path)
    
    61
    +            os.makedirs(os.path.join(rootdir, dirnames), exist_ok=True)
    
    62
    +            os.symlink(content, os.path.join(rootdir, path))
    
    63
    +
    
    64
    +
    
    65
    +def generate_random_root(rootno, directory):
    
    66
    +    random.seed(RANDOM_SEED + rootno)
    
    67
    +    rootname = "root{}".format(rootno)
    
    68
    +    rootdir = os.path.join(directory, "content", rootname)
    
    69
    +    things = []
    
    70
    +    locations = ['.']
    
    71
    +    os.makedirs(rootdir)
    
    72
    +    for i in range(0, 100):
    
    73
    +        location = random.choice(locations)
    
    74
    +        thingname = "node{}".format(i)
    
    75
    +        thing = random.choice(['dir', 'link', 'file'])
    
    76
    +        target = os.path.join(rootdir, location, thingname)
    
    77
    +        if thing == 'dir':
    
    78
    +            os.makedirs(target)
    
    79
    +            locations.append(os.path.join(location, thingname))
    
    80
    +        elif thing == 'file':
    
    81
    +            with open(target, "wt") as f:
    
    82
    +                f.write("This is node {}\n".format(i))
    
    83
    +        elif thing == 'link':
    
    84
    +            # TODO: Make some relative symlinks
    
    85
    +            if random.randint(1, 3) == 1 or not things:
    
    86
    +                os.symlink("/broken", target)
    
    87
    +            else:
    
    88
    +                symlink_destination = random.choice(things)
    
    89
    +                os.symlink(symlink_destination, target)
    
    90
    +        things.append(os.path.join(location, thingname))
    
    91
    +
    
    92
    +
    
    93
    +def file_contents(path):
    
    94
    +    with open(path, "r") as f:
    
    95
    +        result = f.read()
    
    96
    +    return result
    
    97
    +
    
    98
    +
    
    99
    +def file_contents_are(path, contents):
    
    100
    +    return file_contents(path) == contents
    
    101
    +
    
    102
    +
    
    103
    +def create_new_casdir(root_number, fake_context, tmpdir):
    
    104
    +    d = CasBasedDirectory(fake_context)
    
    105
    +    d.import_files(os.path.join(tmpdir, "content", "root{}".format(root_number)))
    
    106
    +    assert d.ref.hash != empty_hash_ref
    
    107
    +    return d
    
    108
    +
    
    109
    +
    
    110
    +def create_new_filedir(root_number, tmpdir):
    
    111
    +    root = os.path.join(tmpdir, "vdir")
    
    112
    +    os.makedirs(root)
    
    113
    +    d = FileBasedDirectory(root)
    
    114
    +    d.import_files(os.path.join(tmpdir, "content", "root{}".format(root_number)))
    
    115
    +    return d
    
    116
    +
    
    117
    +
    
    118
    +def combinations(integer_range):
    
    119
    +    for x in integer_range:
    
    120
    +        for y in integer_range:
    
    121
    +            yield (x, y)
    
    122
    +
    
    123
    +
    
    124
    +def resolve_symlinks(path, root):
    
    125
    +    """ A function to resolve symlinks inside 'path' components apart from the last one.
    
    126
    +        For example, resolve_symlinks('/a/b/c/d', '/a/b')
    
    127
    +        will return '/a/b/f/d' if /a/b/c is a symlink to /a/b/f. The final component of
    
    128
    +        'path' is not resolved, because we typically want to inspect the symlink found
    
    129
    +        at that path, not its target.
    
    130
    +
    
    131
    +    """
    
    132
    +    components = path.split(os.path.sep)
    
    133
    +    location = root
    
    134
    +    for i in range(0, len(components) - 1):
    
    135
    +        location = os.path.join(location, components[i])
    
    136
    +        if os.path.islink(location):
    
    137
    +            # Resolve the link, add on all the remaining components
    
    138
    +            target = os.path.join(os.readlink(location))
    
    139
    +            tail = os.path.sep.join(components[i + 1:])
    
    140
    +
    
    141
    +            if target.startswith(os.path.sep):
    
    142
    +                # Absolute link - relative to root
    
    143
    +                location = os.path.join(root, target, tail)
    
    144
    +            else:
    
    145
    +                # Relative link - relative to symlink location
    
    146
    +                location = os.path.join(location, target)
    
    147
    +            return resolve_symlinks(location, root)
    
    148
    +    # If we got here, no symlinks were found. Add on the final component and return.
    
    149
    +    location = os.path.join(location, components[-1])
    
    150
    +    return location
    
    151
    +
    
    152
    +
    
    153
    +def directory_not_empty(path):
    
    154
    +    return os.listdir(path)
    
    155
    +
    
    156
    +
    
    157
    +def _import_test(tmpdir, original, overlay, generator_function, verify_contents=False):
    
    158
    +    fake_context = FakeContext()
    
    159
    +    fake_context.artifactdir = tmpdir
    
    160
    +    print("Creating CAS Cache with artifact dir {}".format(tmpdir))
    
    161
    +    fake_context.artifactcache = CASCache(fake_context)
    
    162
    +    # Create some fake content
    
    163
    +    generator_function(original, tmpdir)
    
    164
    +    if original != overlay:
    
    165
    +        generator_function(overlay, tmpdir)
    
    166
    +
    
    167
    +    d = create_new_casdir(original, fake_context, tmpdir)
    
    168
    +
    
    169
    +    duplicate_cas = create_new_casdir(original, fake_context, tmpdir)
    
    170
    +
    
    171
    +    assert duplicate_cas.ref.hash == d.ref.hash
    
    172
    +
    
    173
    +    d2 = create_new_casdir(overlay, fake_context, tmpdir)
    
    174
    +    print("Importing dir {} into {}".format(overlay, original))
    
    175
    +    d.import_files(d2)
    
    176
    +    export_dir = os.path.join(tmpdir, "output")
    
    177
    +    roundtrip_dir = os.path.join(tmpdir, "roundtrip")
    
    178
    +    d2.export_files(roundtrip_dir)
    
    179
    +    d.export_files(export_dir)
    
    180
    +
    
    181
    +    if verify_contents:
    
    182
    +        for item in root_filesets[overlay - 1]:
    
    183
    +            (path, typename, content) = item
    
    184
    +            realpath = resolve_symlinks(path, export_dir)
    
    185
    +            if typename == 'F':
    
    186
    +                if os.path.isdir(realpath) and directory_not_empty(realpath):
    
    187
    +                    # The file should not have overwritten the directory in this case.
    
    188
    +                    pass
    
    189
    +                else:
    
    190
    +                    assert os.path.isfile(realpath), "{} did not exist in the combined virtual directory".format(path)
    
    191
    +                    assert file_contents_are(realpath, content)
    
    192
    +            elif typename == 'S':
    
    193
    +                if os.path.isdir(realpath) and directory_not_empty(realpath):
    
    194
    +                    # The symlink should not have overwritten the directory in this case.
    
    195
    +                    pass
    
    196
    +                else:
    
    197
    +                    assert os.path.islink(realpath)
    
    198
    +                    assert os.readlink(realpath) == content
    
    199
    +            elif typename == 'D':
    
    200
    +                # We can't do any more tests than this because it
    
    201
    +                # depends on things present in the original. Blank
    
    202
    +                # directories here will be ignored and the original
    
    203
    +                # left in place.
    
    204
    +                assert os.path.lexists(realpath)
    
    205
    +
    
    206
    +    # Now do the same thing with filebaseddirectories and check the contents match
    
    207
    +
    
    208
    +    files = list(utils.list_relative_paths(roundtrip_dir))
    
    209
    +    print("Importing from filesystem: filelist is: {}".format(files))
    
    210
    +    duplicate_cas._import_files_from_directory(roundtrip_dir, files=files)
    
    211
    +    duplicate_cas._recalculate_recursing_down()
    
    212
    +    if duplicate_cas.parent:
    
    213
    +        duplicate_cas.parent._recalculate_recursing_up(duplicate_cas)
    
    214
    +        print("Result of direct import: {}".format(duplicate_cas.show_files_recursive()))
    
    215
    +
    
    216
    +    assert duplicate_cas.ref.hash == d.ref.hash
    
    217
    +
    
    218
    +
    
    219
    +@pytest.mark.parametrize("original,overlay", combinations(range(1, len(root_filesets) + 1)))
    
    220
    +def test_fixed_cas_import(cli, tmpdir, original, overlay):
    
    221
    +    _import_test(tmpdir, original, overlay, generate_import_roots, verify_contents=True)
    
    222
    +
    
    223
    +
    
    224
    +@pytest.mark.parametrize("original,overlay", combinations(range(1, 11)))
    
    225
    +def test_random_cas_import_fast(cli, tmpdir, original, overlay):
    
    226
    +    _import_test(tmpdir, original, overlay, generate_random_root, verify_contents=False)
    
    227
    +
    
    228
    +
    
    229
    +def _listing_test(tmpdir, root, generator_function):
    
    230
    +    fake_context = FakeContext()
    
    231
    +    fake_context.artifactdir = tmpdir
    
    232
    +    print("Creating CAS Cache with artifact dir {}".format(tmpdir))
    
    233
    +    fake_context.artifactcache = CASCache(fake_context)
    
    234
    +    # Create some fake content
    
    235
    +    generator_function(root, tmpdir)
    
    236
    +
    
    237
    +    d = create_new_filedir(root, tmpdir)
    
    238
    +    filelist = list(d.list_relative_paths())
    
    239
    +
    
    240
    +    d2 = create_new_casdir(root, fake_context, tmpdir)
    
    241
    +    filelist2 = list(d2.list_relative_paths())
    
    242
    +
    
    243
    +    print("filelist for root {} via FileBasedDirectory:".format(root))
    
    244
    +    print("{}".format(filelist))
    
    245
    +    print("filelist for root {} via CasBasedDirectory:".format(root))
    
    246
    +    print("{}".format(filelist2))
    
    247
    +    assert filelist == filelist2
    
    248
    +
    
    249
    +
    
    250
    +@pytest.mark.parametrize("root", range(1, 11))
    
    251
    +def test_random_directory_listing(cli, tmpdir, root):
    
    252
    +    _listing_test(tmpdir, root, generate_random_root)
    
    253
    +
    
    254
    +
    
    255
    +@pytest.mark.parametrize("root", [1, 2, 3, 4, 5])
    
    256
    +def test_fixed_directory_listing(cli, tmpdir, root):
    
    257
    +    _listing_test(tmpdir, root, generate_import_roots)



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]