Jim MacArthur pushed to branch jmac/cas_virtual_directory at BuildStream / buildstream
WARNING: The push did not contain any new commits, but force pushed to delete the commits and changes below.
Deleted commits:
-
e5d1cb73
by Jim MacArthur at 2018-07-12T09:12:52Z
-
48cca2b5
by Jim MacArthur at 2018-07-13T15:41:18Z
-
25065284
by Jim MacArthur at 2018-07-13T16:44:52Z
-
3e2e11c2
by Jim MacArthur at 2018-07-13T17:16:49Z
1 changed file:
Changes:
| ... | ... | @@ -330,6 +330,7 @@ class CasBasedDirectory(Directory): |
| 330 | 330 |
def _import_files_from_directory(self, source_directory, files, path_prefix=""):
|
| 331 | 331 |
result = FileListResult()
|
| 332 | 332 |
for entry in sorted(files):
|
| 333 |
+ if entry == ".": continue
|
|
| 333 | 334 |
split_path = entry.split(os.path.sep)
|
| 334 | 335 |
# The actual file on the FS we're importing
|
| 335 | 336 |
import_file = os.path.join(source_directory, entry)
|
| ... | ... | @@ -341,8 +342,9 @@ class CasBasedDirectory(Directory): |
| 341 | 342 |
# a better way would be to hand off all the files in this subdir at once.
|
| 342 | 343 |
subdir_result = self._import_directory_recursively(directory_name, source_directory,
|
| 343 | 344 |
split_path[1:], path_prefix)
|
| 345 |
+ |
|
| 344 | 346 |
result.combine(subdir_result)
|
| 345 |
- elif os.path.islink(import_file):
|
|
| 347 |
+ elif os.path.islink(import_file): # careful about ordering here, as some cases overlap
|
|
| 346 | 348 |
if self._check_replacement(entry, path_prefix, result):
|
| 347 | 349 |
self._add_new_link(source_directory, entry)
|
| 348 | 350 |
result.files_written.append(relative_pathname)
|
| ... | ... | @@ -370,6 +372,164 @@ class CasBasedDirectory(Directory): |
| 370 | 372 |
with open(refname, "wb") as f:
|
| 371 | 373 |
f.write(self.ref.SerializeToString())
|
| 372 | 374 |
|
| 375 |
+ def find_updated_files(self, modified_directory, prefix=""):
|
|
| 376 |
+ """Find the list of written and overwritten files that would result
|
|
| 377 |
+ from importing 'modified_directory' into this one. This does
|
|
| 378 |
+ not change either directory. The reason this exists is for
|
|
| 379 |
+ direct imports of cas directories into other ones, which can
|
|
| 380 |
+ be done by simply replacing a hash, but we still need the file
|
|
| 381 |
+ lists.
|
|
| 382 |
+ |
|
| 383 |
+ """
|
|
| 384 |
+ result = FileListResult()
|
|
| 385 |
+ for entry in modified_directory.pb2_directory.directories:
|
|
| 386 |
+ existing_dir = self.find_pb2_entry(entry.name)
|
|
| 387 |
+ if existing_dir:
|
|
| 388 |
+ updates_files = existing_dir.find_updated_files(modified_directory.descend(entry.name),
|
|
| 389 |
+ os.path.join(prefix, entry.name))
|
|
| 390 |
+ result.combine(updated_files)
|
|
| 391 |
+ else:
|
|
| 392 |
+ for f in source_directory.descend(entry.name).list_relative_paths():
|
|
| 393 |
+ result.files_written.append(os.path.join(prefix, f))
|
|
| 394 |
+ # None of these can overwrite anything, since the original files don't exist
|
|
| 395 |
+ for entry in modified_directory.pb2_directory.files + modified_directory.pb2_directory.symlinks:
|
|
| 396 |
+ if self.find_pb2_entry(entry.name):
|
|
| 397 |
+ result.files_overwritten.apppend(os.path.join(prefix, entry.name))
|
|
| 398 |
+ result.file_written.apppend(os.path.join(prefix, entry.name))
|
|
| 399 |
+ return result
|
|
| 400 |
+ |
|
| 401 |
+ def files_in_subdir(sorted_files, dirname):
|
|
| 402 |
+ """Filters sorted_files and returns only the ones which have
|
|
| 403 |
+ 'dirname' as a prefix, with that prefix removed.
|
|
| 404 |
+ |
|
| 405 |
+ """
|
|
| 406 |
+ if not dirname.endswith(os.path.sep):
|
|
| 407 |
+ dirname += os.path.sep
|
|
| 408 |
+ return [f.lstrip(dirname) for f in sorted_files if f.startswith(dirname)]
|
|
| 409 |
+ |
|
| 410 |
+ def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
|
|
| 411 |
+ """ Import only the files and symlinks listed in 'files' from source_directory to this one.
|
|
| 412 |
+ Args:
|
|
| 413 |
+ source_directory (:class:`.CasBasedDirectory`): The directory to import from
|
|
| 414 |
+ files ([str]): List of pathnames to import.
|
|
| 415 |
+ path_prefix (str): Prefix used to add entries to the file list result.
|
|
| 416 |
+ file_list_required: Whether to update the file list while processing.
|
|
| 417 |
+ """
|
|
| 418 |
+ |
|
| 419 |
+ result = FileListResult()
|
|
| 420 |
+ sorted_files = sorted(files) # Check if this is necessary
|
|
| 421 |
+ processed_directories = set()
|
|
| 422 |
+ for f in sorted_files:
|
|
| 423 |
+ if f == ".": continue
|
|
| 424 |
+ fullname = os.path.join(path_prefix, f)
|
|
| 425 |
+ components = f.split(os.path.sep)
|
|
| 426 |
+ if len(components)>1 or isinstance(source_directory.index[components[0]].buildstream_object, CasBasedDirectory):
|
|
| 427 |
+ # Then we are importing a directory
|
|
| 428 |
+ dirname = components[0]
|
|
| 429 |
+ if dirname not in processed_directories:
|
|
| 430 |
+ subcomponents = CasBasedDirectory.files_in_subdir(sorted_files, dirname)
|
|
| 431 |
+ dest_subdir = self.descend(dirname, create=True)
|
|
| 432 |
+ src_subdir = source_directory.descend(dirname)
|
|
| 433 |
+ import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
|
|
| 434 |
+ path_prefix=fullname, file_list_required=file_list_required)
|
|
| 435 |
+ result.combine(import_result)
|
|
| 436 |
+ processed_directories.add(dirname)
|
|
| 437 |
+ else:
|
|
| 438 |
+ self._check_replacement(f, path_prefix, result)
|
|
| 439 |
+ item = source_directory.index[f].pb2_object
|
|
| 440 |
+ if isinstance(item, remote_execution_pb2.FileNode):
|
|
| 441 |
+ filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
|
|
| 442 |
+ is_executable=item.is_executable)
|
|
| 443 |
+ self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
|
|
| 444 |
+ else:
|
|
| 445 |
+ assert(isinstance(item.pb2_object, remote_execution_pb2.SymlinkNode))
|
|
| 446 |
+ symlinknode = self.pb2_directory.symlinks.add(name=f, target=item.pb2_object.target)
|
|
| 447 |
+ # A symlink node has no digest.
|
|
| 448 |
+ self.index[filename] = IndexEntry(symlinknode, modified=(fullname in result.overwritten))
|
|
| 449 |
+ return result
|
|
| 450 |
+ |
|
| 451 |
+ def transfer_node_contents(destination, source):
|
|
| 452 |
+ """Transfers all fields from the source PB2 node into the
|
|
| 453 |
+ destination. Destination and source must be of the same type and must
|
|
| 454 |
+ be a FileNode, SymlinkNode or DirectoryNode.
|
|
| 455 |
+ """
|
|
| 456 |
+ destination.name = source.name
|
|
| 457 |
+ if isinstance(destination, remote_execution_pb2.FileNode):
|
|
| 458 |
+ destination.digest = source.digest # Hmm!
|
|
| 459 |
+ destination.is_executable = source.is_executable
|
|
| 460 |
+ elif isinstance(destination, remote_execution_pb2.SymlinkNode):
|
|
| 461 |
+ destination.target = source.target
|
|
| 462 |
+ elif isinstance(destination, remote_execution_pb2.DirectoryNode):
|
|
| 463 |
+ destination.digest = source.digest # Hmm!
|
|
| 464 |
+ else:
|
|
| 465 |
+ raise VirtualDirectoryError("Incompatible type '{}' used as destination for transfer_node_contents"
|
|
| 466 |
+ .format(destination.type))
|
|
| 467 |
+ |
|
| 468 |
+ def _full_import_cas_into_cas(self, source_directory, path_prefix="", file_list_required=True):
|
|
| 469 |
+ """ Import all files and symlinks from source_directory to this one.
|
|
| 470 |
+ Args:
|
|
| 471 |
+ source_directory (:class:`.CasBasedDirectory`): The directory to import from
|
|
| 472 |
+ path_prefix (str): Prefix used to add entries to the file list result.
|
|
| 473 |
+ file_list_required: Whether to update the file list while processing.
|
|
| 474 |
+ """
|
|
| 475 |
+ |
|
| 476 |
+ result = FileListResult()
|
|
| 477 |
+ for entry in source_directory.pb2_directory.directories:
|
|
| 478 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
| 479 |
+ # Create a cloned CasBasedDirectory, since we may import more files
|
|
| 480 |
+ # into a subdirectory of it and we don't want to affect the original.
|
|
| 481 |
+ if existing_item:
|
|
| 482 |
+ existing_item.digest = entry.digest
|
|
| 483 |
+ else:
|
|
| 484 |
+ new_pb2_dirnode = self.pb2_directory.directories.add(digest=entry.digest, name=entry.name)
|
|
| 485 |
+ buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
|
|
| 486 |
+ parent=self, filename=entry.name)
|
|
| 487 |
+ self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
|
|
| 488 |
+ |
|
| 489 |
+ if file_list_required:
|
|
| 490 |
+ if existing_item:
|
|
| 491 |
+ updated_files = existing_item.find_updated_files(source_directory.descend(entry.name), entry.name)
|
|
| 492 |
+ result.combine(updated_files)
|
|
| 493 |
+ else:
|
|
| 494 |
+ for i in source_directory.descend(entry.name).list_relative_paths():
|
|
| 495 |
+ result.files_written.append(i)
|
|
| 496 |
+ |
|
| 497 |
+ for entry in source_directory.pb2_directory.files:
|
|
| 498 |
+ # TODO: Note that this and the symlinks case are now almost identical
|
|
| 499 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
| 500 |
+ relative_pathname = os.path.join(path_prefix, entry.name)
|
|
| 501 |
+ if existing_item:
|
|
| 502 |
+ filenode = existing_item
|
|
| 503 |
+ result.files_overwritten.append(relative_pathname)
|
|
| 504 |
+ else:
|
|
| 505 |
+ filenode = self.pb2_directory.files.add(name=entry.name, digest=entry.digest)
|
|
| 506 |
+ CasBasedDirectory.transfer_node_contents(filenode, entry)
|
|
| 507 |
+ self.index[entry.name] = IndexEntry(filenode, modified=(existing_item is not None))
|
|
| 508 |
+ result.files_written.append(relative_pathname)
|
|
| 509 |
+ |
|
| 510 |
+ for entry in source_directory.pb2_directory.symlinks:
|
|
| 511 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
| 512 |
+ relative_pathname = os.path.join(path_prefix, entry.name)
|
|
| 513 |
+ if existing_item:
|
|
| 514 |
+ symlinknode = existing_item
|
|
| 515 |
+ result.files_overwritten.append(relative_pathname)
|
|
| 516 |
+ else:
|
|
| 517 |
+ symlinknode = self.pb2_directory.symlinks.add()
|
|
| 518 |
+ CasBasedDirectory.transfer_node_contents(symlinknode, entry)
|
|
| 519 |
+ # A symlink node has no digest.
|
|
| 520 |
+ self.index[entry.name] = IndexEntry(symlinknode, modified=(existing_item is not None))
|
|
| 521 |
+ result.files_written.append(relative_pathname)
|
|
| 522 |
+ return result
|
|
| 523 |
+ |
|
| 524 |
+ def _import_cas_into_cas(self, source_directory, files=None):
|
|
| 525 |
+ """ A full import is significantly quicker than a partial import, because we can just
|
|
| 526 |
+ replace one directory with another's hash, without doing any recursion.
|
|
| 527 |
+ """
|
|
| 528 |
+ if files is None:
|
|
| 529 |
+ return self._full_import_cas_into_cas(source_directory)
|
|
| 530 |
+ else:
|
|
| 531 |
+ return self._partial_import_cas_into_cas(source_directory, files)
|
|
| 532 |
+ |
|
| 373 | 533 |
def import_files(self, external_pathspec: any, files: List[str] = None,
|
| 374 | 534 |
report_written: bool = True, update_utimes: bool = False,
|
| 375 | 535 |
can_link: bool = False) -> FileListResult:
|
| ... | ... | @@ -391,28 +551,33 @@ class CasBasedDirectory(Directory): |
| 391 | 551 |
|
| 392 | 552 |
can_link (bool): Ignored, since hard links do not have any meaning within CAS.
|
| 393 | 553 |
"""
|
| 394 |
- if isinstance(external_pathspec, FileBasedDirectory):
|
|
| 395 |
- source_directory = external_pathspec.get_underlying_directory()
|
|
| 396 |
- elif isinstance(external_pathspec, CasBasedDirectory):
|
|
| 397 |
- # TODO: This transfers from one CAS to another via the
|
|
| 398 |
- # filesystem, which is very inefficient. Alter this so it
|
|
| 399 |
- # transfers refs across directory.
|
|
| 554 |
+ |
|
| 555 |
+ duplicate_cas = None
|
|
| 556 |
+ if isinstance(external_pathspec, CasBasedDirectory):
|
|
| 557 |
+ result = self._import_cas_into_cas(external_pathspec, files=files)
|
|
| 558 |
+ |
|
| 559 |
+ # Duplicate the current directory and do an import that way.
|
|
| 560 |
+ duplicate_cas = CasBasedDirectory(self.context, ref=self.ref)
|
|
| 400 | 561 |
with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
|
| 401 | 562 |
external_pathspec.export_files(tmpdir)
|
| 402 | 563 |
if files is None:
|
| 403 | 564 |
files = list_relative_paths(tmpdir)
|
| 404 |
- result = self._import_files_from_directory(tmpdir, files=files)
|
|
| 405 |
- return result
|
|
| 565 |
+ duplicate_cas._import_files_from_directory(tmpdir, files=files)
|
|
| 566 |
+ duplicate_cas._recalculate_recursing_down()
|
|
| 567 |
+ if duplicate_cas.parent:
|
|
| 568 |
+ duplicate_cas.parent._recalculate_recursing_up(self)
|
|
| 406 | 569 |
else:
|
| 407 |
- source_directory = external_pathspec
|
|
| 408 |
- |
|
| 409 |
- if files is None:
|
|
| 410 |
- files = list_relative_paths(source_directory)
|
|
| 570 |
+ if isinstance(external_pathspec, FileBasedDirectory):
|
|
| 571 |
+ source_directory = external_pathspec.get_underlying_directory()
|
|
| 572 |
+ else:
|
|
| 573 |
+ source_directory = external_pathspec
|
|
| 574 |
+ if files is None:
|
|
| 575 |
+ files = list_relative_paths(external_pathspec)
|
|
| 576 |
+ result = self._import_files_from_directory(source_directory, files=files)
|
|
| 411 | 577 |
|
| 412 | 578 |
# TODO: No notice is taken of report_written, update_utimes or can_link.
|
| 413 | 579 |
# Current behaviour is to fully populate the report, which is inefficient,
|
| 414 | 580 |
# but still correct.
|
| 415 |
- result = self._import_files_from_directory(source_directory, files=files)
|
|
| 416 | 581 |
|
| 417 | 582 |
# We need to recalculate and store the hashes of all directories both
|
| 418 | 583 |
# up and down the tree; we have changed our directory by importing files
|
| ... | ... | @@ -422,6 +587,10 @@ class CasBasedDirectory(Directory): |
| 422 | 587 |
self._recalculate_recursing_down()
|
| 423 | 588 |
if self.parent:
|
| 424 | 589 |
self.parent._recalculate_recursing_up(self)
|
| 590 |
+ if duplicate_cas:
|
|
| 591 |
+ if duplicate_cas.ref.hash != self.ref.hash:
|
|
| 592 |
+ print("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
|
|
| 593 |
+ |
|
| 425 | 594 |
return result
|
| 426 | 595 |
|
| 427 | 596 |
def set_deterministic_mtime(self) -> None:
|
| ... | ... | @@ -448,13 +617,12 @@ class CasBasedDirectory(Directory): |
| 448 | 617 |
instead of copying.
|
| 449 | 618 |
|
| 450 | 619 |
"""
|
| 451 |
- |
|
| 452 | 620 |
if not os.path.exists(to_directory):
|
| 453 | 621 |
os.mkdir(to_directory)
|
| 454 | 622 |
|
| 455 | 623 |
for entry in self.pb2_directory.directories:
|
| 456 | 624 |
if entry.name not in self.index:
|
| 457 |
- raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
|
|
| 625 |
+ raise VirtualDirectoryError("CasDir {} contained '{}' in directories but not in the index"
|
|
| 458 | 626 |
.format(str(self), entry.name))
|
| 459 | 627 |
if not self._directory_read:
|
| 460 | 628 |
raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
|
