Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream
Commits:
-
f0a76f43
by Jim MacArthur at 2018-12-06T15:30:11Z
-
45f29ec5
by Jim MacArthur at 2018-12-06T15:30:11Z
-
16636e41
by Jim MacArthur at 2018-12-06T15:30:11Z
-
4bc19f32
by Jim MacArthur at 2018-12-06T15:30:11Z
-
92057395
by Jim MacArthur at 2018-12-06T15:30:11Z
-
bfa65372
by Jim MacArthur at 2018-12-06T15:30:11Z
6 changed files:
- buildstream/_artifactcache/cascache.py
- buildstream/element.py
- buildstream/storage/_casbaseddirectory.py
- buildstream/storage/_filebaseddirectory.py
- buildstream/storage/directory.py
- tests/storage/virtual_directory_import.py
Changes:
| ... | ... | @@ -39,6 +39,7 @@ from .. import utils |
| 39 | 39 |
from .._exceptions import CASError, LoadError, LoadErrorReason
|
| 40 | 40 |
from .. import _yaml
|
| 41 | 41 |
|
| 42 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
| 42 | 43 |
|
| 43 | 44 |
# The default limit for gRPC messages is 4 MiB.
|
| 44 | 45 |
# Limit payload to 1 MiB to leave sufficient headroom for metadata.
|
| ... | ... | @@ -768,6 +769,9 @@ class CASCache(): |
| 768 | 769 |
# (Digest): Digest object for the directory added.
|
| 769 | 770 |
#
|
| 770 | 771 |
def _commit_directory(self, path, *, dir_digest=None):
|
| 772 |
+ if isinstance(path, CasBasedDirectory):
|
|
| 773 |
+ return self.add_object(digest=dir_digest, buffer=path.pb2_directory.SerializeToString())
|
|
| 774 |
+ |
|
| 771 | 775 |
directory = remote_execution_pb2.Directory()
|
| 772 | 776 |
|
| 773 | 777 |
for name in sorted(os.listdir(path)):
|
| ... | ... | @@ -102,6 +102,7 @@ from .types import _KeyStrength, CoreWarnings |
| 102 | 102 |
|
| 103 | 103 |
from .storage.directory import Directory
|
| 104 | 104 |
from .storage._filebaseddirectory import FileBasedDirectory
|
| 105 |
+from .storage._casbaseddirectory import CasBasedDirectory
|
|
| 105 | 106 |
from .storage.directory import VirtualDirectoryError
|
| 106 | 107 |
|
| 107 | 108 |
|
| ... | ... | @@ -1634,35 +1635,38 @@ class Element(Plugin): |
| 1634 | 1635 |
# No collect directory existed
|
| 1635 | 1636 |
collectvdir = None
|
| 1636 | 1637 |
|
| 1638 |
+ assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
|
|
| 1639 |
+ logsvdir = assemblevdir.descend("logs", create=True)
|
|
| 1640 |
+ metavdir = assemblevdir.descend("meta", create=True)
|
|
| 1641 |
+ |
|
| 1637 | 1642 |
# Create artifact directory structure
|
| 1638 | 1643 |
assembledir = os.path.join(rootdir, 'artifact')
|
| 1639 |
- filesdir = os.path.join(assembledir, 'files')
|
|
| 1640 | 1644 |
logsdir = os.path.join(assembledir, 'logs')
|
| 1641 | 1645 |
metadir = os.path.join(assembledir, 'meta')
|
| 1642 |
- buildtreedir = os.path.join(assembledir, 'buildtree')
|
|
| 1643 | 1646 |
os.mkdir(assembledir)
|
| 1644 |
- if collect is not None and collectvdir is not None:
|
|
| 1645 |
- os.mkdir(filesdir)
|
|
| 1646 | 1647 |
os.mkdir(logsdir)
|
| 1647 | 1648 |
os.mkdir(metadir)
|
| 1648 |
- os.mkdir(buildtreedir)
|
|
| 1649 | 1649 |
|
| 1650 |
- # Hard link files from collect dir to files directory
|
|
| 1651 | 1650 |
if collect is not None and collectvdir is not None:
|
| 1652 |
- collectvdir.export_files(filesdir, can_link=True)
|
|
| 1651 |
+ if isinstance(collectvdir, CasBasedDirectory):
|
|
| 1652 |
+ assemblevdir.fast_directory_import("files", collectvdir)
|
|
| 1653 |
+ else:
|
|
| 1654 |
+ filesvdir = assemblevdir.descend("files", create=True)
|
|
| 1655 |
+ filesvdir.import_files(collectvdir, can_link=True)
|
|
| 1653 | 1656 |
|
| 1657 |
+ sandbox_vroot = sandbox.get_virtual_directory()
|
|
| 1654 | 1658 |
try:
|
| 1655 |
- sandbox_vroot = sandbox.get_virtual_directory()
|
|
| 1656 | 1659 |
sandbox_build_dir = sandbox_vroot.descend(
|
| 1657 | 1660 |
self.get_variable('build-root').lstrip(os.sep).split(os.sep))
|
| 1658 |
- # Hard link files from build-root dir to buildtreedir directory
|
|
| 1659 |
- sandbox_build_dir.export_files(buildtreedir)
|
|
| 1661 |
+ assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
|
|
| 1660 | 1662 |
except VirtualDirectoryError:
|
| 1661 | 1663 |
# Directory could not be found. Pre-virtual
|
| 1662 | 1664 |
# directory behaviour was to continue silently
|
| 1663 |
- # if the directory could not be found.
|
|
| 1664 |
- pass
|
|
| 1665 |
+ # if the directory could not be found, but we must create
|
|
| 1666 |
+ # the directory.
|
|
| 1667 |
+ assemblevdir.descend("buildtree", create=True)
|
|
| 1665 | 1668 |
|
| 1669 |
+ # Write some logs out to normal directories: logsdir and metadir
|
|
| 1666 | 1670 |
# Copy build log
|
| 1667 | 1671 |
log_filename = self._get_context().get_log_filename()
|
| 1668 | 1672 |
self._build_log_path = os.path.join(logsdir, 'build.log')
|
| ... | ... | @@ -1705,9 +1709,12 @@ class Element(Plugin): |
| 1705 | 1709 |
]
|
| 1706 | 1710 |
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
|
| 1707 | 1711 |
|
| 1708 |
- with self.timed_activity("Caching artifact"):
|
|
| 1709 |
- artifact_size = utils._get_dir_size(assembledir)
|
|
| 1710 |
- self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
|
|
| 1712 |
+ metavdir.import_files(metadir)
|
|
| 1713 |
+ logsvdir.import_files(logsdir)
|
|
| 1714 |
+ |
|
| 1715 |
+ artifact_size = assemblevdir.get_size()
|
|
| 1716 |
+ with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
|
|
| 1717 |
+ self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
|
|
| 1711 | 1718 |
|
| 1712 | 1719 |
if collect is not None and collectvdir is None:
|
| 1713 | 1720 |
raise ElementError(
|
| ... | ... | @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory): |
| 350 | 350 |
filenode.is_executable = is_executable
|
| 351 | 351 |
self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
|
| 352 | 352 |
|
| 353 |
- def _copy_link_from_filesystem(self, basename, filename):
|
|
| 354 |
- self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
|
|
| 353 |
+ def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
|
|
| 354 |
+ """ filesystem_path should be a full path point to the source symlink.
|
|
| 355 |
+ relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
|
|
| 356 |
+ destination_name should be the destination name in this directory. """
|
|
| 357 |
+ self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
|
|
| 355 | 358 |
|
| 356 |
- def _add_new_link_direct(self, name, target):
|
|
| 359 |
+ def _add_new_link_direct(self, relative_path, name, target):
|
|
| 357 | 360 |
existing_link = self._find_pb2_entry(name)
|
| 358 | 361 |
if existing_link:
|
| 359 | 362 |
symlinknode = existing_link
|
| ... | ... | @@ -361,8 +364,15 @@ class CasBasedDirectory(Directory): |
| 361 | 364 |
symlinknode = self.pb2_directory.symlinks.add()
|
| 362 | 365 |
assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
|
| 363 | 366 |
symlinknode.name = name
|
| 364 |
- # A symlink node has no digest.
|
|
| 367 |
+ |
|
| 368 |
+ absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
|
|
| 369 |
+ if absolute:
|
|
| 370 |
+ distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
|
|
| 371 |
+ target = CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root + [target[1:]])
|
|
| 365 | 372 |
symlinknode.target = target
|
| 373 |
+ |
|
| 374 |
+ # A symlink node has no digest.
|
|
| 375 |
+ |
|
| 366 | 376 |
self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
|
| 367 | 377 |
|
| 368 | 378 |
def delete_entry(self, name):
|
| ... | ... | @@ -527,7 +537,7 @@ class CasBasedDirectory(Directory): |
| 527 | 537 |
result.combine(subdir_result)
|
| 528 | 538 |
elif os.path.islink(import_file):
|
| 529 | 539 |
if self._check_replacement(entry, path_prefix, result):
|
| 530 |
- self._copy_link_from_filesystem(source_directory, entry)
|
|
| 540 |
+ self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
|
|
| 531 | 541 |
result.files_written.append(relative_pathname)
|
| 532 | 542 |
elif os.path.isdir(import_file):
|
| 533 | 543 |
# A plain directory which already exists isn't a problem; just ignore it.
|
| ... | ... | @@ -602,7 +612,7 @@ class CasBasedDirectory(Directory): |
| 602 | 612 |
self.index[f] = IndexEntry(filenode, modified=True)
|
| 603 | 613 |
else:
|
| 604 | 614 |
assert isinstance(item, remote_execution_pb2.SymlinkNode)
|
| 605 |
- self._add_new_link_direct(name=f, target=item.target)
|
|
| 615 |
+ self._add_new_link_direct(path_prefix, name=f, target=item.target)
|
|
| 606 | 616 |
else:
|
| 607 | 617 |
result.ignored.append(os.path.join(path_prefix, f))
|
| 608 | 618 |
return result
|
| ... | ... | @@ -637,7 +647,7 @@ class CasBasedDirectory(Directory): |
| 637 | 647 |
files = external_pathspec.list_relative_paths()
|
| 638 | 648 |
|
| 639 | 649 |
if isinstance(external_pathspec, FileBasedDirectory):
|
| 640 |
- source_directory = external_pathspec.get_underlying_directory()
|
|
| 650 |
+ source_directory = external_pathspec._get_underlying_directory()
|
|
| 641 | 651 |
result = self._import_files_from_directory(source_directory, files=files)
|
| 642 | 652 |
elif isinstance(external_pathspec, str):
|
| 643 | 653 |
source_directory = external_pathspec
|
| ... | ... | @@ -836,6 +846,28 @@ class CasBasedDirectory(Directory): |
| 836 | 846 |
self._recalculate_recursing_up()
|
| 837 | 847 |
self._recalculate_recursing_down()
|
| 838 | 848 |
|
| 849 |
+ def get_size(self):
|
|
| 850 |
+ total = len(self.pb2_directory.SerializeToString())
|
|
| 851 |
+ for i in self.index.values():
|
|
| 852 |
+ if isinstance(i.buildstream_object, CasBasedDirectory):
|
|
| 853 |
+ total += i.buildstream_object.get_size()
|
|
| 854 |
+ elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
|
|
| 855 |
+ src_name = self.cas_cache.objpath(i.pb_object.digest)
|
|
| 856 |
+ filesize = os.stat(src_name).st_size
|
|
| 857 |
+ total += filesize
|
|
| 858 |
+ # Symlink nodes are encoded as part of the directory serialization.
|
|
| 859 |
+ return total
|
|
| 860 |
+ |
|
| 861 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 862 |
+ assert dirname not in self.index
|
|
| 863 |
+ if isinstance(other_directory, CasBasedDirectory):
|
|
| 864 |
+ self.index[dirname] = IndexEntry(other_directory.pb_object,
|
|
| 865 |
+ buildstream_object=other_directory.buildstream_object)
|
|
| 866 |
+ else:
|
|
| 867 |
+ # Revert to the old method.
|
|
| 868 |
+ subdir = self.descend(dirname, create=True)
|
|
| 869 |
+ subdir.import_files(other_directory, can_link=True)
|
|
| 870 |
+ |
|
| 839 | 871 |
def _get_identifier(self):
|
| 840 | 872 |
path = ""
|
| 841 | 873 |
if self.parent:
|
| ... | ... | @@ -30,6 +30,7 @@ See also: :ref:`sandboxing`. |
| 30 | 30 |
import os
|
| 31 | 31 |
import time
|
| 32 | 32 |
from .directory import Directory, VirtualDirectoryError
|
| 33 |
+from .. import utils
|
|
| 33 | 34 |
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
|
| 34 | 35 |
from ..utils import _set_deterministic_user, _set_deterministic_mtime
|
| 35 | 36 |
|
| ... | ... | @@ -125,6 +126,13 @@ class FileBasedDirectory(Directory): |
| 125 | 126 |
self._mark_changed()
|
| 126 | 127 |
return import_result
|
| 127 | 128 |
|
| 129 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 130 |
+ # We can't do a fast import into a FileBasedDirectory, so this
|
|
| 131 |
+ # falls back to import_files.
|
|
| 132 |
+ assert dirname not in self.index
|
|
| 133 |
+ subdir = self.descend(dirname, create=True)
|
|
| 134 |
+ subdir.import_files(other_directory, can_link=True)
|
|
| 135 |
+ |
|
| 128 | 136 |
def _mark_changed(self):
|
| 129 | 137 |
self._directory_read = False
|
| 130 | 138 |
|
| ... | ... | @@ -201,6 +209,9 @@ class FileBasedDirectory(Directory): |
| 201 | 209 |
|
| 202 | 210 |
return list_relative_paths(self.external_directory)
|
| 203 | 211 |
|
| 212 |
+ def get_size(self):
|
|
| 213 |
+ return utils._get_dir_size(self.external_directory)
|
|
| 214 |
+ |
|
| 204 | 215 |
def __str__(self):
|
| 205 | 216 |
# This returns the whole path (since we don't know where the directory started)
|
| 206 | 217 |
# which exposes the sandbox directory; we will have to assume for the time being
|
| ... | ... | @@ -99,6 +99,30 @@ class Directory(): |
| 99 | 99 |
|
| 100 | 100 |
raise NotImplementedError()
|
| 101 | 101 |
|
| 102 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
| 103 |
+ """Import other_directory as a new directory in this one.
|
|
| 104 |
+ |
|
| 105 |
+ This is a potentially faster method than import_directory with
|
|
| 106 |
+ fewer options. dirname must not already exist, and all files
|
|
| 107 |
+ are imported unconditionally. It is assumed that it is
|
|
| 108 |
+ acceptable to use filesystem hard links to files in
|
|
| 109 |
+ other_directory. You cannot update utimes or get a
|
|
| 110 |
+ FileListResult.
|
|
| 111 |
+ |
|
| 112 |
+ This only provides a benefit if both this and other_directory
|
|
| 113 |
+ are CAS-based directories. In other cases, it will fall back
|
|
| 114 |
+ to import_directory.
|
|
| 115 |
+ |
|
| 116 |
+ Args:
|
|
| 117 |
+ dirname: The name to call the subdirectory in this
|
|
| 118 |
+ directory. This must not already exist in this directory.
|
|
| 119 |
+ |
|
| 120 |
+ other_directory: The directory to import.
|
|
| 121 |
+ |
|
| 122 |
+ """
|
|
| 123 |
+ |
|
| 124 |
+ raise NotImplementedError()
|
|
| 125 |
+ |
|
| 102 | 126 |
def export_files(self, to_directory, *, can_link=False, can_destroy=False):
|
| 103 | 127 |
"""Copies everything from this into to_directory.
|
| 104 | 128 |
|
| ... | ... | @@ -176,3 +200,9 @@ class Directory(): |
| 176 | 200 |
|
| 177 | 201 |
"""
|
| 178 | 202 |
raise NotImplementedError()
|
| 203 |
+ |
|
| 204 |
+ def get_size(self):
|
|
| 205 |
+ """ Get an approximation of the storage space in bytes used by this directory
|
|
| 206 |
+ and all files and subdirectories in it. Storage space varies by implementation
|
|
| 207 |
+ and effective space used may be lower than this number due to deduplication. """
|
|
| 208 |
+ raise NotImplementedError()
|
| ... | ... | @@ -149,10 +149,10 @@ def resolve_symlinks(path, root): |
| 149 | 149 |
if target.startswith(os.path.sep):
|
| 150 | 150 |
# Absolute link - relative to root
|
| 151 | 151 |
location = os.path.join(root, target, tail)
|
| 152 |
+ return resolve_symlinks(location, root)
|
|
| 152 | 153 |
else:
|
| 153 |
- # Relative link - relative to symlink location
|
|
| 154 |
- location = os.path.join(location, target)
|
|
| 155 |
- return resolve_symlinks(location, root)
|
|
| 154 |
+ return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
|
|
| 155 |
+ |
|
| 156 | 156 |
# If we got here, no symlinks were found. Add on the final component and return.
|
| 157 | 157 |
location = os.path.join(location, components[-1])
|
| 158 | 158 |
return location
|
| ... | ... | @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents= |
| 199 | 199 |
pass
|
| 200 | 200 |
else:
|
| 201 | 201 |
assert os.path.islink(realpath)
|
| 202 |
- assert os.readlink(realpath) == content
|
|
| 202 |
+ # We expect all storage to normalise absolute symlinks.
|
|
| 203 |
+ depth = len(path.split(os.path.sep)) - 1
|
|
| 204 |
+ if content.startswith(os.path.sep):
|
|
| 205 |
+ assert os.readlink(realpath) == os.path.sep.join([".."] * depth + [content[1:]])
|
|
| 206 |
+ else:
|
|
| 207 |
+ assert os.readlink(realpath) == content
|
|
| 208 |
+ |
|
| 203 | 209 |
elif typename == 'D':
|
| 204 | 210 |
# We can't do any more tests than this because it
|
| 205 | 211 |
# depends on things present in the original. Blank
|
