[Notes] [Git][BuildStream/buildstream][jmac/cas_virtual_directory] Deleted 4 commits: WIP: Adding CAS-to-CAS import



Title: GitLab

Jim MacArthur pushed to branch jmac/cas_virtual_directory at BuildStream / buildstream

WARNING: The push did not contain any new commits, but force pushed to delete the commits and changes below.

Deleted commits:

1 changed file:

Changes:

  • buildstream/storage/_casbaseddirectory.py
    ... ... @@ -330,6 +330,7 @@ class CasBasedDirectory(Directory):
    330 330
         def _import_files_from_directory(self, source_directory, files, path_prefix=""):
    
    331 331
             result = FileListResult()
    
    332 332
             for entry in sorted(files):
    
    333
    +            if entry == ".": continue
    
    333 334
                 split_path = entry.split(os.path.sep)
    
    334 335
                 # The actual file on the FS we're importing
    
    335 336
                 import_file = os.path.join(source_directory, entry)
    
    ... ... @@ -341,8 +342,9 @@ class CasBasedDirectory(Directory):
    341 342
                     # a better way would be to hand off all the files in this subdir at once.
    
    342 343
                     subdir_result = self._import_directory_recursively(directory_name, source_directory,
    
    343 344
                                                                        split_path[1:], path_prefix)
    
    345
    +
    
    344 346
                     result.combine(subdir_result)
    
    345
    -            elif os.path.islink(import_file):
    
    347
    +            elif os.path.islink(import_file):  # careful about ordering here, as some cases overlap
    
    346 348
                     if self._check_replacement(entry, path_prefix, result):
    
    347 349
                         self._add_new_link(source_directory, entry)
    
    348 350
                         result.files_written.append(relative_pathname)
    
    ... ... @@ -370,6 +372,164 @@ class CasBasedDirectory(Directory):
    370 372
             with open(refname, "wb") as f:
    
    371 373
                 f.write(self.ref.SerializeToString())
    
    372 374
     
    
    375
    +    def find_updated_files(self, modified_directory, prefix=""):
    
    376
    +        """Find the list of written and overwritten files that would result
    
    377
    +        from importing 'modified_directory' into this one.  This does
    
    378
    +        not change either directory. The reason this exists is for
    
    379
    +        direct imports of cas directories into other ones, which can
    
    380
    +        be done by simply replacing a hash, but we still need the file
    
    381
    +        lists.
    
    382
    +
    
    383
    +        """
    
    384
    +        result = FileListResult()
    
    385
    +        for entry in modified_directory.pb2_directory.directories:
    
    386
    +            existing_dir = self.find_pb2_entry(entry.name)
    
    387
    +            if existing_dir:
    
    388
    +                updates_files = existing_dir.find_updated_files(modified_directory.descend(entry.name),
    
    389
    +                                                                os.path.join(prefix, entry.name))
    
    390
    +                result.combine(updated_files)
    
    391
    +            else:
    
    392
    +                for f in source_directory.descend(entry.name).list_relative_paths():
    
    393
    +                    result.files_written.append(os.path.join(prefix, f))
    
    394
    +                    # None of these can overwrite anything, since the original files don't exist
    
    395
    +        for entry in modified_directory.pb2_directory.files + modified_directory.pb2_directory.symlinks:
    
    396
    +            if self.find_pb2_entry(entry.name):
    
    397
    +                result.files_overwritten.apppend(os.path.join(prefix, entry.name))
    
    398
    +            result.file_written.apppend(os.path.join(prefix, entry.name))
    
    399
    +        return result
    
    400
    +
    
    401
    +    def files_in_subdir(sorted_files, dirname):
    
    402
    +        """Filters sorted_files and returns only the ones which have
    
    403
    +           'dirname' as a prefix, with that prefix removed.
    
    404
    +
    
    405
    +        """
    
    406
    +        if not dirname.endswith(os.path.sep):
    
    407
    +            dirname += os.path.sep
    
    408
    +        return [f.lstrip(dirname) for f in sorted_files if f.startswith(dirname)]
    
    409
    +
    
    410
    +    def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
    
    411
    +        """ Import only the files and symlinks listed in 'files' from source_directory to this one.
    
    412
    +        Args:
    
    413
    +           source_directory (:class:`.CasBasedDirectory`): The directory to import from
    
    414
    +           files ([str]): List of pathnames to import.
    
    415
    +           path_prefix (str): Prefix used to add entries to the file list result.
    
    416
    +           file_list_required: Whether to update the file list while processing.
    
    417
    +        """
    
    418
    +
    
    419
    +        result = FileListResult()
    
    420
    +        sorted_files = sorted(files) # Check if this is necessary
    
    421
    +        processed_directories = set()
    
    422
    +        for f in sorted_files:
    
    423
    +            if f == ".": continue
    
    424
    +            fullname = os.path.join(path_prefix, f)
    
    425
    +            components = f.split(os.path.sep)
    
    426
    +            if len(components)>1 or isinstance(source_directory.index[components[0]].buildstream_object, CasBasedDirectory):
    
    427
    +                # Then we are importing a directory
    
    428
    +                dirname = components[0]
    
    429
    +                if dirname not in processed_directories:
    
    430
    +                    subcomponents = CasBasedDirectory.files_in_subdir(sorted_files, dirname)
    
    431
    +                    dest_subdir = self.descend(dirname, create=True)
    
    432
    +                    src_subdir = source_directory.descend(dirname)
    
    433
    +                    import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
    
    434
    +                                                                             path_prefix=fullname, file_list_required=file_list_required)
    
    435
    +                    result.combine(import_result)
    
    436
    +                processed_directories.add(dirname)
    
    437
    +            else:
    
    438
    +                self._check_replacement(f, path_prefix, result)
    
    439
    +                item = source_directory.index[f].pb2_object
    
    440
    +                if isinstance(item, remote_execution_pb2.FileNode):
    
    441
    +                    filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
    
    442
    +                                                            is_executable=item.is_executable)
    
    443
    +                    self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
    
    444
    +                else:
    
    445
    +                    assert(isinstance(item.pb2_object, remote_execution_pb2.SymlinkNode))
    
    446
    +                    symlinknode = self.pb2_directory.symlinks.add(name=f, target=item.pb2_object.target)
    
    447
    +                    # A symlink node has no digest.
    
    448
    +                    self.index[filename] = IndexEntry(symlinknode, modified=(fullname in result.overwritten))
    
    449
    +        return result
    
    450
    +
    
    451
    +    def transfer_node_contents(destination, source):
    
    452
    +        """Transfers all fields from the source PB2 node into the
    
    453
    +        destination. Destination and source must be of the same type and must
    
    454
    +        be a FileNode, SymlinkNode or DirectoryNode.
    
    455
    +        """
    
    456
    +        destination.name = source.name
    
    457
    +        if isinstance(destination, remote_execution_pb2.FileNode):
    
    458
    +            destination.digest = source.digest # Hmm!
    
    459
    +            destination.is_executable = source.is_executable
    
    460
    +        elif isinstance(destination, remote_execution_pb2.SymlinkNode):
    
    461
    +            destination.target = source.target
    
    462
    +        elif isinstance(destination, remote_execution_pb2.DirectoryNode):
    
    463
    +            destination.digest = source.digest # Hmm!
    
    464
    +        else:
    
    465
    +            raise VirtualDirectoryError("Incompatible type '{}' used as destination for transfer_node_contents"
    
    466
    +                                        .format(destination.type))
    
    467
    +
    
    468
    +    def _full_import_cas_into_cas(self, source_directory, path_prefix="", file_list_required=True):
    
    469
    +        """ Import all files and symlinks from source_directory to this one.
    
    470
    +        Args:
    
    471
    +           source_directory (:class:`.CasBasedDirectory`): The directory to import from
    
    472
    +           path_prefix (str): Prefix used to add entries to the file list result.
    
    473
    +           file_list_required: Whether to update the file list while processing.
    
    474
    +        """
    
    475
    +
    
    476
    +        result = FileListResult()
    
    477
    +        for entry in source_directory.pb2_directory.directories:
    
    478
    +            existing_item = self.find_pb2_entry(entry.name)
    
    479
    +            # Create a cloned CasBasedDirectory, since we may import more files
    
    480
    +            # into a subdirectory of it and we don't want to affect the original.
    
    481
    +            if existing_item:
    
    482
    +                existing_item.digest = entry.digest
    
    483
    +            else:
    
    484
    +                new_pb2_dirnode = self.pb2_directory.directories.add(digest=entry.digest, name=entry.name)
    
    485
    +            buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
    
    486
    +                                                    parent=self, filename=entry.name)
    
    487
    +            self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
    
    488
    +
    
    489
    +            if file_list_required:
    
    490
    +                if existing_item:
    
    491
    +                    updated_files = existing_item.find_updated_files(source_directory.descend(entry.name), entry.name)
    
    492
    +                    result.combine(updated_files)
    
    493
    +                else:
    
    494
    +                    for i in source_directory.descend(entry.name).list_relative_paths():
    
    495
    +                        result.files_written.append(i)
    
    496
    +
    
    497
    +        for entry in source_directory.pb2_directory.files:
    
    498
    +            # TODO: Note that this and the symlinks case are now almost identical
    
    499
    +            existing_item = self.find_pb2_entry(entry.name)
    
    500
    +            relative_pathname = os.path.join(path_prefix, entry.name)
    
    501
    +            if existing_item:
    
    502
    +                filenode = existing_item
    
    503
    +                result.files_overwritten.append(relative_pathname)
    
    504
    +            else:
    
    505
    +                filenode = self.pb2_directory.files.add(name=entry.name, digest=entry.digest)
    
    506
    +            CasBasedDirectory.transfer_node_contents(filenode, entry)
    
    507
    +            self.index[entry.name] = IndexEntry(filenode, modified=(existing_item is not None))
    
    508
    +            result.files_written.append(relative_pathname)
    
    509
    +
    
    510
    +        for entry in source_directory.pb2_directory.symlinks:
    
    511
    +            existing_item = self.find_pb2_entry(entry.name)
    
    512
    +            relative_pathname = os.path.join(path_prefix, entry.name)
    
    513
    +            if existing_item:
    
    514
    +                symlinknode = existing_item
    
    515
    +                result.files_overwritten.append(relative_pathname)
    
    516
    +            else:
    
    517
    +                symlinknode = self.pb2_directory.symlinks.add()
    
    518
    +            CasBasedDirectory.transfer_node_contents(symlinknode, entry)
    
    519
    +            # A symlink node has no digest.
    
    520
    +            self.index[entry.name] = IndexEntry(symlinknode, modified=(existing_item is not None))
    
    521
    +            result.files_written.append(relative_pathname)
    
    522
    +        return result
    
    523
    +
    
    524
    +    def _import_cas_into_cas(self, source_directory, files=None):
    
    525
    +        """ A full import is significantly quicker than a partial import, because we can just
    
    526
    +        replace one directory with another's hash, without doing any recursion.
    
    527
    +        """
    
    528
    +        if files is None:
    
    529
    +            return self._full_import_cas_into_cas(source_directory)
    
    530
    +        else:
    
    531
    +            return self._partial_import_cas_into_cas(source_directory, files)
    
    532
    +
    
    373 533
         def import_files(self, external_pathspec: any, files: List[str] = None,
    
    374 534
                          report_written: bool = True, update_utimes: bool = False,
    
    375 535
                          can_link: bool = False) -> FileListResult:
    
    ... ... @@ -391,28 +551,33 @@ class CasBasedDirectory(Directory):
    391 551
     
    
    392 552
             can_link (bool): Ignored, since hard links do not have any meaning within CAS.
    
    393 553
             """
    
    394
    -        if isinstance(external_pathspec, FileBasedDirectory):
    
    395
    -            source_directory = external_pathspec.get_underlying_directory()
    
    396
    -        elif isinstance(external_pathspec, CasBasedDirectory):
    
    397
    -            # TODO: This transfers from one CAS to another via the
    
    398
    -            # filesystem, which is very inefficient. Alter this so it
    
    399
    -            # transfers refs across directory.
    
    554
    +
    
    555
    +        duplicate_cas = None
    
    556
    +        if isinstance(external_pathspec, CasBasedDirectory):
    
    557
    +            result = self._import_cas_into_cas(external_pathspec, files=files)
    
    558
    +
    
    559
    +            # Duplicate the current directory and do an import that way.
    
    560
    +            duplicate_cas = CasBasedDirectory(self.context, ref=self.ref)
    
    400 561
                 with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
    
    401 562
                     external_pathspec.export_files(tmpdir)
    
    402 563
                     if files is None:
    
    403 564
                         files = list_relative_paths(tmpdir)
    
    404
    -                result = self._import_files_from_directory(tmpdir, files=files)
    
    405
    -            return result
    
    565
    +                duplicate_cas._import_files_from_directory(tmpdir, files=files)
    
    566
    +                duplicate_cas._recalculate_recursing_down()
    
    567
    +                if duplicate_cas.parent:
    
    568
    +                    duplicate_cas.parent._recalculate_recursing_up(self)
    
    406 569
             else:
    
    407
    -            source_directory = external_pathspec
    
    408
    -
    
    409
    -        if files is None:
    
    410
    -            files = list_relative_paths(source_directory)
    
    570
    +            if isinstance(external_pathspec, FileBasedDirectory):
    
    571
    +                source_directory = external_pathspec.get_underlying_directory()
    
    572
    +            else:
    
    573
    +                source_directory = external_pathspec
    
    574
    +            if files is None:
    
    575
    +                files = list_relative_paths(external_pathspec)
    
    576
    +            result = self._import_files_from_directory(source_directory, files=files)
    
    411 577
     
    
    412 578
             # TODO: No notice is taken of report_written, update_utimes or can_link.
    
    413 579
             # Current behaviour is to fully populate the report, which is inefficient,
    
    414 580
             # but still correct.
    
    415
    -        result = self._import_files_from_directory(source_directory, files=files)
    
    416 581
     
    
    417 582
             # We need to recalculate and store the hashes of all directories both
    
    418 583
             # up and down the tree; we have changed our directory by importing files
    
    ... ... @@ -422,6 +587,10 @@ class CasBasedDirectory(Directory):
    422 587
             self._recalculate_recursing_down()
    
    423 588
             if self.parent:
    
    424 589
                 self.parent._recalculate_recursing_up(self)
    
    590
    +        if duplicate_cas:
    
    591
    +            if duplicate_cas.ref.hash != self.ref.hash:
    
    592
    +                print("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
    
    593
    +
    
    425 594
             return result
    
    426 595
     
    
    427 596
         def set_deterministic_mtime(self) -> None:
    
    ... ... @@ -448,13 +617,12 @@ class CasBasedDirectory(Directory):
    448 617
             instead of copying.
    
    449 618
     
    
    450 619
             """
    
    451
    -
    
    452 620
             if not os.path.exists(to_directory):
    
    453 621
                 os.mkdir(to_directory)
    
    454 622
     
    
    455 623
             for entry in self.pb2_directory.directories:
    
    456 624
                 if entry.name not in self.index:
    
    457
    -                raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
    
    625
    +                raise VirtualDirectoryError("CasDir {} contained '{}' in directories but not in the index"
    
    458 626
                                                 .format(str(self), entry.name))
    
    459 627
                 if not self._directory_read:
    
    460 628
                     raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]