Jim MacArthur pushed to branch jmac/cas_virtual_directory at BuildStream / buildstream
WARNING: The push did not contain any new commits, but force pushed to delete the commits and changes below.
Deleted commits:
-
e5d1cb73
by Jim MacArthur at 2018-07-12T09:12:52Z
-
48cca2b5
by Jim MacArthur at 2018-07-13T15:41:18Z
-
25065284
by Jim MacArthur at 2018-07-13T16:44:52Z
-
3e2e11c2
by Jim MacArthur at 2018-07-13T17:16:49Z
1 changed file:
Changes:
... | ... | @@ -330,6 +330,7 @@ class CasBasedDirectory(Directory): |
330 | 330 |
def _import_files_from_directory(self, source_directory, files, path_prefix=""):
|
331 | 331 |
result = FileListResult()
|
332 | 332 |
for entry in sorted(files):
|
333 |
+ if entry == ".": continue
|
|
333 | 334 |
split_path = entry.split(os.path.sep)
|
334 | 335 |
# The actual file on the FS we're importing
|
335 | 336 |
import_file = os.path.join(source_directory, entry)
|
... | ... | @@ -341,8 +342,9 @@ class CasBasedDirectory(Directory): |
341 | 342 |
# a better way would be to hand off all the files in this subdir at once.
|
342 | 343 |
subdir_result = self._import_directory_recursively(directory_name, source_directory,
|
343 | 344 |
split_path[1:], path_prefix)
|
345 |
+ |
|
344 | 346 |
result.combine(subdir_result)
|
345 |
- elif os.path.islink(import_file):
|
|
347 |
+ elif os.path.islink(import_file): # careful about ordering here, as some cases overlap
|
|
346 | 348 |
if self._check_replacement(entry, path_prefix, result):
|
347 | 349 |
self._add_new_link(source_directory, entry)
|
348 | 350 |
result.files_written.append(relative_pathname)
|
... | ... | @@ -370,6 +372,164 @@ class CasBasedDirectory(Directory): |
370 | 372 |
with open(refname, "wb") as f:
|
371 | 373 |
f.write(self.ref.SerializeToString())
|
372 | 374 |
|
375 |
+ def find_updated_files(self, modified_directory, prefix=""):
|
|
376 |
+ """Find the list of written and overwritten files that would result
|
|
377 |
+ from importing 'modified_directory' into this one. This does
|
|
378 |
+ not change either directory. The reason this exists is for
|
|
379 |
+ direct imports of cas directories into other ones, which can
|
|
380 |
+ be done by simply replacing a hash, but we still need the file
|
|
381 |
+ lists.
|
|
382 |
+ |
|
383 |
+ """
|
|
384 |
+ result = FileListResult()
|
|
385 |
+ for entry in modified_directory.pb2_directory.directories:
|
|
386 |
+ existing_dir = self.find_pb2_entry(entry.name)
|
|
387 |
+ if existing_dir:
|
|
388 |
+ updates_files = existing_dir.find_updated_files(modified_directory.descend(entry.name),
|
|
389 |
+ os.path.join(prefix, entry.name))
|
|
390 |
+ result.combine(updated_files)
|
|
391 |
+ else:
|
|
392 |
+ for f in source_directory.descend(entry.name).list_relative_paths():
|
|
393 |
+ result.files_written.append(os.path.join(prefix, f))
|
|
394 |
+ # None of these can overwrite anything, since the original files don't exist
|
|
395 |
+ for entry in modified_directory.pb2_directory.files + modified_directory.pb2_directory.symlinks:
|
|
396 |
+ if self.find_pb2_entry(entry.name):
|
|
397 |
+ result.files_overwritten.apppend(os.path.join(prefix, entry.name))
|
|
398 |
+ result.file_written.apppend(os.path.join(prefix, entry.name))
|
|
399 |
+ return result
|
|
400 |
+ |
|
401 |
+ def files_in_subdir(sorted_files, dirname):
|
|
402 |
+ """Filters sorted_files and returns only the ones which have
|
|
403 |
+ 'dirname' as a prefix, with that prefix removed.
|
|
404 |
+ |
|
405 |
+ """
|
|
406 |
+ if not dirname.endswith(os.path.sep):
|
|
407 |
+ dirname += os.path.sep
|
|
408 |
+ return [f.lstrip(dirname) for f in sorted_files if f.startswith(dirname)]
|
|
409 |
+ |
|
410 |
+ def _partial_import_cas_into_cas(self, source_directory, files, path_prefix="", file_list_required=True):
|
|
411 |
+ """ Import only the files and symlinks listed in 'files' from source_directory to this one.
|
|
412 |
+ Args:
|
|
413 |
+ source_directory (:class:`.CasBasedDirectory`): The directory to import from
|
|
414 |
+ files ([str]): List of pathnames to import.
|
|
415 |
+ path_prefix (str): Prefix used to add entries to the file list result.
|
|
416 |
+ file_list_required: Whether to update the file list while processing.
|
|
417 |
+ """
|
|
418 |
+ |
|
419 |
+ result = FileListResult()
|
|
420 |
+ sorted_files = sorted(files) # Check if this is necessary
|
|
421 |
+ processed_directories = set()
|
|
422 |
+ for f in sorted_files:
|
|
423 |
+ if f == ".": continue
|
|
424 |
+ fullname = os.path.join(path_prefix, f)
|
|
425 |
+ components = f.split(os.path.sep)
|
|
426 |
+ if len(components)>1 or isinstance(source_directory.index[components[0]].buildstream_object, CasBasedDirectory):
|
|
427 |
+ # Then we are importing a directory
|
|
428 |
+ dirname = components[0]
|
|
429 |
+ if dirname not in processed_directories:
|
|
430 |
+ subcomponents = CasBasedDirectory.files_in_subdir(sorted_files, dirname)
|
|
431 |
+ dest_subdir = self.descend(dirname, create=True)
|
|
432 |
+ src_subdir = source_directory.descend(dirname)
|
|
433 |
+ import_result = dest_subdir._partial_import_cas_into_cas(src_subdir, subcomponents,
|
|
434 |
+ path_prefix=fullname, file_list_required=file_list_required)
|
|
435 |
+ result.combine(import_result)
|
|
436 |
+ processed_directories.add(dirname)
|
|
437 |
+ else:
|
|
438 |
+ self._check_replacement(f, path_prefix, result)
|
|
439 |
+ item = source_directory.index[f].pb2_object
|
|
440 |
+ if isinstance(item, remote_execution_pb2.FileNode):
|
|
441 |
+ filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
|
|
442 |
+ is_executable=item.is_executable)
|
|
443 |
+ self.index[f] = IndexEntry(filenode, modified=(fullname in result.overwritten))
|
|
444 |
+ else:
|
|
445 |
+ assert(isinstance(item.pb2_object, remote_execution_pb2.SymlinkNode))
|
|
446 |
+ symlinknode = self.pb2_directory.symlinks.add(name=f, target=item.pb2_object.target)
|
|
447 |
+ # A symlink node has no digest.
|
|
448 |
+ self.index[filename] = IndexEntry(symlinknode, modified=(fullname in result.overwritten))
|
|
449 |
+ return result
|
|
450 |
+ |
|
451 |
+ def transfer_node_contents(destination, source):
|
|
452 |
+ """Transfers all fields from the source PB2 node into the
|
|
453 |
+ destination. Destination and source must be of the same type and must
|
|
454 |
+ be a FileNode, SymlinkNode or DirectoryNode.
|
|
455 |
+ """
|
|
456 |
+ destination.name = source.name
|
|
457 |
+ if isinstance(destination, remote_execution_pb2.FileNode):
|
|
458 |
+ destination.digest = source.digest # Hmm!
|
|
459 |
+ destination.is_executable = source.is_executable
|
|
460 |
+ elif isinstance(destination, remote_execution_pb2.SymlinkNode):
|
|
461 |
+ destination.target = source.target
|
|
462 |
+ elif isinstance(destination, remote_execution_pb2.DirectoryNode):
|
|
463 |
+ destination.digest = source.digest # Hmm!
|
|
464 |
+ else:
|
|
465 |
+ raise VirtualDirectoryError("Incompatible type '{}' used as destination for transfer_node_contents"
|
|
466 |
+ .format(destination.type))
|
|
467 |
+ |
|
468 |
+ def _full_import_cas_into_cas(self, source_directory, path_prefix="", file_list_required=True):
|
|
469 |
+ """ Import all files and symlinks from source_directory to this one.
|
|
470 |
+ Args:
|
|
471 |
+ source_directory (:class:`.CasBasedDirectory`): The directory to import from
|
|
472 |
+ path_prefix (str): Prefix used to add entries to the file list result.
|
|
473 |
+ file_list_required: Whether to update the file list while processing.
|
|
474 |
+ """
|
|
475 |
+ |
|
476 |
+ result = FileListResult()
|
|
477 |
+ for entry in source_directory.pb2_directory.directories:
|
|
478 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
479 |
+ # Create a cloned CasBasedDirectory, since we may import more files
|
|
480 |
+ # into a subdirectory of it and we don't want to affect the original.
|
|
481 |
+ if existing_item:
|
|
482 |
+ existing_item.digest = entry.digest
|
|
483 |
+ else:
|
|
484 |
+ new_pb2_dirnode = self.pb2_directory.directories.add(digest=entry.digest, name=entry.name)
|
|
485 |
+ buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
|
|
486 |
+ parent=self, filename=entry.name)
|
|
487 |
+ self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
|
|
488 |
+ |
|
489 |
+ if file_list_required:
|
|
490 |
+ if existing_item:
|
|
491 |
+ updated_files = existing_item.find_updated_files(source_directory.descend(entry.name), entry.name)
|
|
492 |
+ result.combine(updated_files)
|
|
493 |
+ else:
|
|
494 |
+ for i in source_directory.descend(entry.name).list_relative_paths():
|
|
495 |
+ result.files_written.append(i)
|
|
496 |
+ |
|
497 |
+ for entry in source_directory.pb2_directory.files:
|
|
498 |
+ # TODO: Note that this and the symlinks case are now almost identical
|
|
499 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
500 |
+ relative_pathname = os.path.join(path_prefix, entry.name)
|
|
501 |
+ if existing_item:
|
|
502 |
+ filenode = existing_item
|
|
503 |
+ result.files_overwritten.append(relative_pathname)
|
|
504 |
+ else:
|
|
505 |
+ filenode = self.pb2_directory.files.add(name=entry.name, digest=entry.digest)
|
|
506 |
+ CasBasedDirectory.transfer_node_contents(filenode, entry)
|
|
507 |
+ self.index[entry.name] = IndexEntry(filenode, modified=(existing_item is not None))
|
|
508 |
+ result.files_written.append(relative_pathname)
|
|
509 |
+ |
|
510 |
+ for entry in source_directory.pb2_directory.symlinks:
|
|
511 |
+ existing_item = self.find_pb2_entry(entry.name)
|
|
512 |
+ relative_pathname = os.path.join(path_prefix, entry.name)
|
|
513 |
+ if existing_item:
|
|
514 |
+ symlinknode = existing_item
|
|
515 |
+ result.files_overwritten.append(relative_pathname)
|
|
516 |
+ else:
|
|
517 |
+ symlinknode = self.pb2_directory.symlinks.add()
|
|
518 |
+ CasBasedDirectory.transfer_node_contents(symlinknode, entry)
|
|
519 |
+ # A symlink node has no digest.
|
|
520 |
+ self.index[entry.name] = IndexEntry(symlinknode, modified=(existing_item is not None))
|
|
521 |
+ result.files_written.append(relative_pathname)
|
|
522 |
+ return result
|
|
523 |
+ |
|
524 |
+ def _import_cas_into_cas(self, source_directory, files=None):
|
|
525 |
+ """ A full import is significantly quicker than a partial import, because we can just
|
|
526 |
+ replace one directory with another's hash, without doing any recursion.
|
|
527 |
+ """
|
|
528 |
+ if files is None:
|
|
529 |
+ return self._full_import_cas_into_cas(source_directory)
|
|
530 |
+ else:
|
|
531 |
+ return self._partial_import_cas_into_cas(source_directory, files)
|
|
532 |
+ |
|
373 | 533 |
def import_files(self, external_pathspec: any, files: List[str] = None,
|
374 | 534 |
report_written: bool = True, update_utimes: bool = False,
|
375 | 535 |
can_link: bool = False) -> FileListResult:
|
... | ... | @@ -391,28 +551,33 @@ class CasBasedDirectory(Directory): |
391 | 551 |
|
392 | 552 |
can_link (bool): Ignored, since hard links do not have any meaning within CAS.
|
393 | 553 |
"""
|
394 |
- if isinstance(external_pathspec, FileBasedDirectory):
|
|
395 |
- source_directory = external_pathspec.get_underlying_directory()
|
|
396 |
- elif isinstance(external_pathspec, CasBasedDirectory):
|
|
397 |
- # TODO: This transfers from one CAS to another via the
|
|
398 |
- # filesystem, which is very inefficient. Alter this so it
|
|
399 |
- # transfers refs across directory.
|
|
554 |
+ |
|
555 |
+ duplicate_cas = None
|
|
556 |
+ if isinstance(external_pathspec, CasBasedDirectory):
|
|
557 |
+ result = self._import_cas_into_cas(external_pathspec, files=files)
|
|
558 |
+ |
|
559 |
+ # Duplicate the current directory and do an import that way.
|
|
560 |
+ duplicate_cas = CasBasedDirectory(self.context, ref=self.ref)
|
|
400 | 561 |
with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
|
401 | 562 |
external_pathspec.export_files(tmpdir)
|
402 | 563 |
if files is None:
|
403 | 564 |
files = list_relative_paths(tmpdir)
|
404 |
- result = self._import_files_from_directory(tmpdir, files=files)
|
|
405 |
- return result
|
|
565 |
+ duplicate_cas._import_files_from_directory(tmpdir, files=files)
|
|
566 |
+ duplicate_cas._recalculate_recursing_down()
|
|
567 |
+ if duplicate_cas.parent:
|
|
568 |
+ duplicate_cas.parent._recalculate_recursing_up(self)
|
|
406 | 569 |
else:
|
407 |
- source_directory = external_pathspec
|
|
408 |
- |
|
409 |
- if files is None:
|
|
410 |
- files = list_relative_paths(source_directory)
|
|
570 |
+ if isinstance(external_pathspec, FileBasedDirectory):
|
|
571 |
+ source_directory = external_pathspec.get_underlying_directory()
|
|
572 |
+ else:
|
|
573 |
+ source_directory = external_pathspec
|
|
574 |
+ if files is None:
|
|
575 |
+ files = list_relative_paths(external_pathspec)
|
|
576 |
+ result = self._import_files_from_directory(source_directory, files=files)
|
|
411 | 577 |
|
412 | 578 |
# TODO: No notice is taken of report_written, update_utimes or can_link.
|
413 | 579 |
# Current behaviour is to fully populate the report, which is inefficient,
|
414 | 580 |
# but still correct.
|
415 |
- result = self._import_files_from_directory(source_directory, files=files)
|
|
416 | 581 |
|
417 | 582 |
# We need to recalculate and store the hashes of all directories both
|
418 | 583 |
# up and down the tree; we have changed our directory by importing files
|
... | ... | @@ -422,6 +587,10 @@ class CasBasedDirectory(Directory): |
422 | 587 |
self._recalculate_recursing_down()
|
423 | 588 |
if self.parent:
|
424 | 589 |
self.parent._recalculate_recursing_up(self)
|
590 |
+ if duplicate_cas:
|
|
591 |
+ if duplicate_cas.ref.hash != self.ref.hash:
|
|
592 |
+ print("Mismatch between file-imported result {} and cas-to-cas imported result {}.".format(duplicate_cas.ref.hash,self.ref.hash))
|
|
593 |
+ |
|
425 | 594 |
return result
|
426 | 595 |
|
427 | 596 |
def set_deterministic_mtime(self) -> None:
|
... | ... | @@ -448,13 +617,12 @@ class CasBasedDirectory(Directory): |
448 | 617 |
instead of copying.
|
449 | 618 |
|
450 | 619 |
"""
|
451 |
- |
|
452 | 620 |
if not os.path.exists(to_directory):
|
453 | 621 |
os.mkdir(to_directory)
|
454 | 622 |
|
455 | 623 |
for entry in self.pb2_directory.directories:
|
456 | 624 |
if entry.name not in self.index:
|
457 |
- raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
|
|
625 |
+ raise VirtualDirectoryError("CasDir {} contained '{}' in directories but not in the index"
|
|
458 | 626 |
.format(str(self), entry.name))
|
459 | 627 |
if not self._directory_read:
|
460 | 628 |
raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
|