Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream
Commits:
-
a1dc2acd
by Jim MacArthur at 2018-12-05T17:34:45Z
-
aecd7458
by Jim MacArthur at 2018-12-05T17:34:45Z
-
3de81d0b
by Jim MacArthur at 2018-12-05T17:34:45Z
-
11398076
by Jim MacArthur at 2018-12-05T17:34:45Z
-
4400c6e2
by Jim MacArthur at 2018-12-05T17:34:45Z
-
de0d2bde
by Jim MacArthur at 2018-12-05T17:34:45Z
-
7fde6bfd
by Jim MacArthur at 2018-12-05T17:34:45Z
-
d2f5827e
by Jim MacArthur at 2018-12-05T17:34:45Z
5 changed files:
- buildstream/element.py
- buildstream/storage/_casbaseddirectory.py
- buildstream/storage/_filebaseddirectory.py
- buildstream/storage/directory.py
- tests/storage/virtual_directory_import.py
Changes:
| ... | ... | @@ -1638,9 +1638,7 @@ class Element(Plugin): |
| 1638 | 1638 |
collectvdir = None
|
| 1639 | 1639 |
|
| 1640 | 1640 |
assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
|
| 1641 |
- filesvdir = assemblevdir.descend("files", create=True)
|
|
| 1642 | 1641 |
logsvdir = assemblevdir.descend("logs", create=True)
|
| 1643 |
- buildtreevdir = assemblevdir.descend("buildtree", create=True)
|
|
| 1644 | 1642 |
metavdir = assemblevdir.descend("meta", create=True)
|
| 1645 | 1643 |
|
| 1646 | 1644 |
# Create artifact directory structure
|
| ... | ... | @@ -1652,14 +1650,14 @@ class Element(Plugin): |
| 1652 | 1650 |
os.mkdir(metadir)
|
| 1653 | 1651 |
|
| 1654 | 1652 |
if collect is not None and collectvdir is not None:
|
| 1655 |
- filesvdir.import_files(collectvdir, can_link=True)
|
|
| 1656 |
- |
|
| 1653 |
+ assemblevdir.fast_directory_import("files", collectvdir)
|
|
| 1657 | 1654 |
try:
|
| 1658 | 1655 |
sandbox_vroot = sandbox.get_virtual_directory()
|
| 1659 | 1656 |
sandbox_build_dir = sandbox_vroot.descend(
|
| 1660 | 1657 |
self.get_variable('build-root').lstrip(os.sep).split(os.sep))
|
| 1661 |
- buildtreevdir.import_files(sandbox_build_dir)
|
|
| 1658 |
+ assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
|
|
| 1662 | 1659 |
except VirtualDirectoryError:
|
| 1660 |
+ assemblevdir.descend("buildtree", create=True)
|
|
| 1663 | 1661 |
# Directory could not be found. Pre-virtual
|
| 1664 | 1662 |
# directory behaviour was to continue silently
|
| 1665 | 1663 |
# if the directory could not be found.
|
| ... | ... | @@ -1709,10 +1707,10 @@ class Element(Plugin): |
| 1709 | 1707 |
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
|
| 1710 | 1708 |
|
| 1711 | 1709 |
metavdir.import_files(metadir)
|
| 1710 |
+ logsvdir.import_files(logsdir)
|
|
| 1712 | 1711 |
|
| 1713 |
- with self.timed_activity("Caching artifact"):
|
|
| 1714 |
- # TODO: I don't know the artifact size! Can we get _get_dir_size to work for vdirs?
|
|
| 1715 |
- artifact_size = 1024
|
|
| 1712 |
+ artifact_size = assemblevdir.get_size()
|
|
| 1713 |
+ with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
|
|
| 1716 | 1714 |
self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
|
| 1717 | 1715 |
|
| 1718 | 1716 |
if collect is not None and collectvdir is None:
|
| ... | ... | @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory): |
| 350 | 350 |
filenode.is_executable = is_executable
|
| 351 | 351 |
self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
|
| 352 | 352 |
|
| 353 |
- def _copy_link_from_filesystem(self, basename, filename):
|
|
| 354 |
- self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
|
|
| 353 |
+ def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
|
|
| 354 |
+ """ filesystem_path should be a full path point to the source symlink.
|
|
| 355 |
+ relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
|
|
| 356 |
+ destination_name should be the destination name in this directory. """
|
|
| 357 |
+ self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
|
|
| 355 | 358 |
|
| 356 |
- def _add_new_link_direct(self, name, target):
|
|
| 359 |
+ def _add_new_link_direct(self, relative_path, name, target):
|
|
| 357 | 360 |
existing_link = self._find_pb2_entry(name)
|
| 358 | 361 |
if existing_link:
|
| 359 | 362 |
symlinknode = existing_link
|
| ... | ... | @@ -361,8 +364,16 @@ class CasBasedDirectory(Directory): |
| 361 | 364 |
symlinknode = self.pb2_directory.symlinks.add()
|
| 362 | 365 |
assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
|
| 363 | 366 |
symlinknode.name = name
|
| 364 |
- # A symlink node has no digest.
|
|
| 367 |
+ |
|
| 368 |
+ absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
|
|
| 369 |
+ if absolute:
|
|
| 370 |
+ distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
|
|
| 371 |
+ # TODO: Using os.path.join and _pb2_path_sep in the same place is illogical
|
|
| 372 |
+ target = os.path.join(CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root), target[1:])
|
|
| 365 | 373 |
symlinknode.target = target
|
| 374 |
+ |
|
| 375 |
+ # A symlink node has no digest.
|
|
| 376 |
+ |
|
| 366 | 377 |
self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
|
| 367 | 378 |
|
| 368 | 379 |
def delete_entry(self, name):
|
| ... | ... | @@ -527,7 +538,7 @@ class CasBasedDirectory(Directory): |
| 527 | 538 |
result.combine(subdir_result)
|
| 528 | 539 |
elif os.path.islink(import_file):
|
| 529 | 540 |
if self._check_replacement(entry, path_prefix, result):
|
| 530 |
- self._copy_link_from_filesystem(source_directory, entry)
|
|
| 541 |
+ self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
|
|
| 531 | 542 |
result.files_written.append(relative_pathname)
|
| 532 | 543 |
elif os.path.isdir(import_file):
|
| 533 | 544 |
# A plain directory which already exists isn't a problem; just ignore it.
|
| ... | ... | @@ -600,11 +611,9 @@ class CasBasedDirectory(Directory): |
| 600 | 611 |
filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
|
| 601 | 612 |
is_executable=item.is_executable)
|
| 602 | 613 |
self.index[f] = IndexEntry(filenode, modified=True)
|
| 603 |
- # TODO: No idea if we actually need this (the digest already existed...)
|
|
| 604 |
- self.cas_cache.add_object(path=os.path.join(path_prefix, f), digest=item.digest)
|
|
| 605 | 614 |
else:
|
| 606 | 615 |
assert isinstance(item, remote_execution_pb2.SymlinkNode)
|
| 607 |
- self._add_new_link_direct(name=f, target=item.target)
|
|
| 616 |
+ self._add_new_link_direct(path_prefix, name=f, target=item.target)
|
|
| 608 | 617 |
else:
|
| 609 | 618 |
result.ignored.append(os.path.join(path_prefix, f))
|
| 610 | 619 |
return result
|
| ... | ... | @@ -838,6 +847,28 @@ class CasBasedDirectory(Directory): |
| 838 | 847 |
self._recalculate_recursing_up()
|
| 839 | 848 |
self._recalculate_recursing_down()
|
| 840 | 849 |
|
| 850 |
+ def get_size(self):
|
|
| 851 |
+ total = len(self.pb2_directory.SerializeToString())
|
|
| 852 |
+ for i in self.index.values():
|
|
| 853 |
+ if isinstance(i.buildstream_object, CasBasedDirectory):
|
|
| 854 |
+ total += i.buildstream_object.get_size()
|
|
| 855 |
+ elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
|
|
| 856 |
+ src_name = self.cas_cache.objpath(i.pb_object.digest)
|
|
| 857 |
+ filesize = os.stat(src_name).st_size
|
|
| 858 |
+ total += filesize
|
|
| 859 |
+ # Symlink nodes are encoded as part of the directory serialization.
|
|
| 860 |
+ return total
|
|
| 861 |
+ |
|
| 862 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 863 |
+ assert dirname not in self.index
|
|
| 864 |
+ if isinstance(other_directory, CasBasedDirectory):
|
|
| 865 |
+ self.index[dirname] = IndexEntry(other_directory.pb_object,
|
|
| 866 |
+ buildstream_object=other_directory.buildstream_object)
|
|
| 867 |
+ else:
|
|
| 868 |
+ # Revert to the old method.
|
|
| 869 |
+ subdir = self.descend(dirname, create=True)
|
|
| 870 |
+ subdir.import_files(other_directory, can_link=True)
|
|
| 871 |
+ |
|
| 841 | 872 |
def _get_identifier(self):
|
| 842 | 873 |
path = ""
|
| 843 | 874 |
if self.parent:
|
| ... | ... | @@ -125,6 +125,13 @@ class FileBasedDirectory(Directory): |
| 125 | 125 |
self._mark_changed()
|
| 126 | 126 |
return import_result
|
| 127 | 127 |
|
| 128 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 129 |
+ # We can't do a fast import into a FileBasedDirectory, so this
|
|
| 130 |
+ # falls back to import_files.
|
|
| 131 |
+ assert dirname not in self.index
|
|
| 132 |
+ subdir = self.descend(dirname, create=True)
|
|
| 133 |
+ subdir.import_files(other_directory, can_link=True)
|
|
| 134 |
+ |
|
| 128 | 135 |
def _mark_changed(self):
|
| 129 | 136 |
self._directory_read = False
|
| 130 | 137 |
|
| ... | ... | @@ -99,6 +99,30 @@ class Directory(): |
| 99 | 99 |
|
| 100 | 100 |
raise NotImplementedError()
|
| 101 | 101 |
|
| 102 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 103 |
+ """Import other_directory as a new directory in this one.
|
|
| 104 |
+ |
|
| 105 |
+ This is a potentially faster method than import_directory with
|
|
| 106 |
+ fewer options. dirname must not already exist, and all files
|
|
| 107 |
+ are imported unconditionally. It is assumed that it is
|
|
| 108 |
+ acceptable to use filesystem hard links to files in
|
|
| 109 |
+ other_directory. You cannot update utimes or get a
|
|
| 110 |
+ FileListResult.
|
|
| 111 |
+ |
|
| 112 |
+ This only provides a benefit if both this and other_directory
|
|
| 113 |
+ are CAS-based directories. In other cases, it will fall back
|
|
| 114 |
+ to import_directory.
|
|
| 115 |
+ |
|
| 116 |
+ Args:
|
|
| 117 |
+ dirname: The name to call the subdirectory in this
|
|
| 118 |
+ directory. This must not already exist in this directory.
|
|
| 119 |
+ |
|
| 120 |
+ other_directory: The directory to import.
|
|
| 121 |
+ |
|
| 122 |
+ """
|
|
| 123 |
+ |
|
| 124 |
+ raise NotImplementedError()
|
|
| 125 |
+ |
|
| 102 | 126 |
def export_files(self, to_directory, *, can_link=False, can_destroy=False):
|
| 103 | 127 |
"""Copies everything from this into to_directory.
|
| 104 | 128 |
|
| ... | ... | @@ -149,10 +149,10 @@ def resolve_symlinks(path, root): |
| 149 | 149 |
if target.startswith(os.path.sep):
|
| 150 | 150 |
# Absolute link - relative to root
|
| 151 | 151 |
location = os.path.join(root, target, tail)
|
| 152 |
+ return resolve_symlinks(location, root)
|
|
| 152 | 153 |
else:
|
| 153 |
- # Relative link - relative to symlink location
|
|
| 154 |
- location = os.path.join(location, target)
|
|
| 155 |
- return resolve_symlinks(location, root)
|
|
| 154 |
+ return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
|
|
| 155 |
+ |
|
| 156 | 156 |
# If we got here, no symlinks were found. Add on the final component and return.
|
| 157 | 157 |
location = os.path.join(location, components[-1])
|
| 158 | 158 |
return location
|
| ... | ... | @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents= |
| 199 | 199 |
pass
|
| 200 | 200 |
else:
|
| 201 | 201 |
assert os.path.islink(realpath)
|
| 202 |
- assert os.readlink(realpath) == content
|
|
| 202 |
+ # We expect all storage to normalise absolute symlinks.
|
|
| 203 |
+ depth = len(path.split(os.path.sep)) - 1
|
|
| 204 |
+ if content.startswith(os.path.sep):
|
|
| 205 |
+ assert os.readlink(realpath) == os.path.join(os.path.sep.join([".."] * depth), content[1:])
|
|
| 206 |
+ else:
|
|
| 207 |
+ assert os.readlink(realpath) == content
|
|
| 208 |
+ |
|
| 203 | 209 |
elif typename == 'D':
|
| 204 | 210 |
# We can't do any more tests than this because it
|
| 205 | 211 |
# depends on things present in the original. Blank
|
