Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream
Commits:
-
f0a76f43
by Jim MacArthur at 2018-12-06T15:30:11Z
-
45f29ec5
by Jim MacArthur at 2018-12-06T15:30:11Z
-
16636e41
by Jim MacArthur at 2018-12-06T15:30:11Z
-
4bc19f32
by Jim MacArthur at 2018-12-06T15:30:11Z
-
92057395
by Jim MacArthur at 2018-12-06T15:30:11Z
-
bfa65372
by Jim MacArthur at 2018-12-06T15:30:11Z
6 changed files:
- buildstream/_artifactcache/cascache.py
- buildstream/element.py
- buildstream/storage/_casbaseddirectory.py
- buildstream/storage/_filebaseddirectory.py
- buildstream/storage/directory.py
- tests/storage/virtual_directory_import.py
Changes:
... | ... | @@ -39,6 +39,7 @@ from .. import utils |
39 | 39 |
from .._exceptions import CASError, LoadError, LoadErrorReason
|
40 | 40 |
from .. import _yaml
|
41 | 41 |
|
42 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
42 | 43 |
|
43 | 44 |
# The default limit for gRPC messages is 4 MiB.
|
44 | 45 |
# Limit payload to 1 MiB to leave sufficient headroom for metadata.
|
... | ... | @@ -768,6 +769,9 @@ class CASCache(): |
768 | 769 |
# (Digest): Digest object for the directory added.
|
769 | 770 |
#
|
770 | 771 |
def _commit_directory(self, path, *, dir_digest=None):
|
772 |
+ if isinstance(path, CasBasedDirectory):
|
|
773 |
+ return self.add_object(digest=dir_digest, buffer=path.pb2_directory.SerializeToString())
|
|
774 |
+ |
|
771 | 775 |
directory = remote_execution_pb2.Directory()
|
772 | 776 |
|
773 | 777 |
for name in sorted(os.listdir(path)):
|
... | ... | @@ -102,6 +102,7 @@ from .types import _KeyStrength, CoreWarnings |
102 | 102 |
|
103 | 103 |
from .storage.directory import Directory
|
104 | 104 |
from .storage._filebaseddirectory import FileBasedDirectory
|
105 |
+from .storage._casbaseddirectory import CasBasedDirectory
|
|
105 | 106 |
from .storage.directory import VirtualDirectoryError
|
106 | 107 |
|
107 | 108 |
|
... | ... | @@ -1634,35 +1635,38 @@ class Element(Plugin): |
1634 | 1635 |
# No collect directory existed
|
1635 | 1636 |
collectvdir = None
|
1636 | 1637 |
|
1638 |
+ assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
|
|
1639 |
+ logsvdir = assemblevdir.descend("logs", create=True)
|
|
1640 |
+ metavdir = assemblevdir.descend("meta", create=True)
|
|
1641 |
+ |
|
1637 | 1642 |
# Create artifact directory structure
|
1638 | 1643 |
assembledir = os.path.join(rootdir, 'artifact')
|
1639 |
- filesdir = os.path.join(assembledir, 'files')
|
|
1640 | 1644 |
logsdir = os.path.join(assembledir, 'logs')
|
1641 | 1645 |
metadir = os.path.join(assembledir, 'meta')
|
1642 |
- buildtreedir = os.path.join(assembledir, 'buildtree')
|
|
1643 | 1646 |
os.mkdir(assembledir)
|
1644 |
- if collect is not None and collectvdir is not None:
|
|
1645 |
- os.mkdir(filesdir)
|
|
1646 | 1647 |
os.mkdir(logsdir)
|
1647 | 1648 |
os.mkdir(metadir)
|
1648 |
- os.mkdir(buildtreedir)
|
|
1649 | 1649 |
|
1650 |
- # Hard link files from collect dir to files directory
|
|
1651 | 1650 |
if collect is not None and collectvdir is not None:
|
1652 |
- collectvdir.export_files(filesdir, can_link=True)
|
|
1651 |
+ if isinstance(collectvdir, CasBasedDirectory):
|
|
1652 |
+ assemblevdir.fast_directory_import("files", collectvdir)
|
|
1653 |
+ else:
|
|
1654 |
+ filesvdir = assemblevdir.descend("files", create=True)
|
|
1655 |
+ filesvdir.import_files(collectvdir, can_link=True)
|
|
1653 | 1656 |
|
1657 |
+ sandbox_vroot = sandbox.get_virtual_directory()
|
|
1654 | 1658 |
try:
|
1655 |
- sandbox_vroot = sandbox.get_virtual_directory()
|
|
1656 | 1659 |
sandbox_build_dir = sandbox_vroot.descend(
|
1657 | 1660 |
self.get_variable('build-root').lstrip(os.sep).split(os.sep))
|
1658 |
- # Hard link files from build-root dir to buildtreedir directory
|
|
1659 |
- sandbox_build_dir.export_files(buildtreedir)
|
|
1661 |
+ assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
|
|
1660 | 1662 |
except VirtualDirectoryError:
|
1661 | 1663 |
# Directory could not be found. Pre-virtual
|
1662 | 1664 |
# directory behaviour was to continue silently
|
1663 |
- # if the directory could not be found.
|
|
1664 |
- pass
|
|
1665 |
+ # if the directory could not be found, but we must create
|
|
1666 |
+ # the directory.
|
|
1667 |
+ assemblevdir.descend("buildtree", create=True)
|
|
1665 | 1668 |
|
1669 |
+ # Write some logs out to normal directories: logsdir and metadir
|
|
1666 | 1670 |
# Copy build log
|
1667 | 1671 |
log_filename = self._get_context().get_log_filename()
|
1668 | 1672 |
self._build_log_path = os.path.join(logsdir, 'build.log')
|
... | ... | @@ -1705,9 +1709,12 @@ class Element(Plugin): |
1705 | 1709 |
]
|
1706 | 1710 |
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
|
1707 | 1711 |
|
1708 |
- with self.timed_activity("Caching artifact"):
|
|
1709 |
- artifact_size = utils._get_dir_size(assembledir)
|
|
1710 |
- self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
|
|
1712 |
+ metavdir.import_files(metadir)
|
|
1713 |
+ logsvdir.import_files(logsdir)
|
|
1714 |
+ |
|
1715 |
+ artifact_size = assemblevdir.get_size()
|
|
1716 |
+ with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
|
|
1717 |
+ self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
|
|
1711 | 1718 |
|
1712 | 1719 |
if collect is not None and collectvdir is None:
|
1713 | 1720 |
raise ElementError(
|
... | ... | @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory): |
350 | 350 |
filenode.is_executable = is_executable
|
351 | 351 |
self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
|
352 | 352 |
|
353 |
- def _copy_link_from_filesystem(self, basename, filename):
|
|
354 |
- self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
|
|
353 |
+ def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
|
|
354 |
+ """ filesystem_path should be a full path point to the source symlink.
|
|
355 |
+ relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
|
|
356 |
+ destination_name should be the destination name in this directory. """
|
|
357 |
+ self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
|
|
355 | 358 |
|
356 |
- def _add_new_link_direct(self, name, target):
|
|
359 |
+ def _add_new_link_direct(self, relative_path, name, target):
|
|
357 | 360 |
existing_link = self._find_pb2_entry(name)
|
358 | 361 |
if existing_link:
|
359 | 362 |
symlinknode = existing_link
|
... | ... | @@ -361,8 +364,15 @@ class CasBasedDirectory(Directory): |
361 | 364 |
symlinknode = self.pb2_directory.symlinks.add()
|
362 | 365 |
assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
|
363 | 366 |
symlinknode.name = name
|
364 |
- # A symlink node has no digest.
|
|
367 |
+ |
|
368 |
+ absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
|
|
369 |
+ if absolute:
|
|
370 |
+ distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
|
|
371 |
+ target = CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root + [target[1:]])
|
|
365 | 372 |
symlinknode.target = target
|
373 |
+ |
|
374 |
+ # A symlink node has no digest.
|
|
375 |
+ |
|
366 | 376 |
self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
|
367 | 377 |
|
368 | 378 |
def delete_entry(self, name):
|
... | ... | @@ -527,7 +537,7 @@ class CasBasedDirectory(Directory): |
527 | 537 |
result.combine(subdir_result)
|
528 | 538 |
elif os.path.islink(import_file):
|
529 | 539 |
if self._check_replacement(entry, path_prefix, result):
|
530 |
- self._copy_link_from_filesystem(source_directory, entry)
|
|
540 |
+ self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
|
|
531 | 541 |
result.files_written.append(relative_pathname)
|
532 | 542 |
elif os.path.isdir(import_file):
|
533 | 543 |
# A plain directory which already exists isn't a problem; just ignore it.
|
... | ... | @@ -602,7 +612,7 @@ class CasBasedDirectory(Directory): |
602 | 612 |
self.index[f] = IndexEntry(filenode, modified=True)
|
603 | 613 |
else:
|
604 | 614 |
assert isinstance(item, remote_execution_pb2.SymlinkNode)
|
605 |
- self._add_new_link_direct(name=f, target=item.target)
|
|
615 |
+ self._add_new_link_direct(path_prefix, name=f, target=item.target)
|
|
606 | 616 |
else:
|
607 | 617 |
result.ignored.append(os.path.join(path_prefix, f))
|
608 | 618 |
return result
|
... | ... | @@ -637,7 +647,7 @@ class CasBasedDirectory(Directory): |
637 | 647 |
files = external_pathspec.list_relative_paths()
|
638 | 648 |
|
639 | 649 |
if isinstance(external_pathspec, FileBasedDirectory):
|
640 |
- source_directory = external_pathspec.get_underlying_directory()
|
|
650 |
+ source_directory = external_pathspec._get_underlying_directory()
|
|
641 | 651 |
result = self._import_files_from_directory(source_directory, files=files)
|
642 | 652 |
elif isinstance(external_pathspec, str):
|
643 | 653 |
source_directory = external_pathspec
|
... | ... | @@ -836,6 +846,28 @@ class CasBasedDirectory(Directory): |
836 | 846 |
self._recalculate_recursing_up()
|
837 | 847 |
self._recalculate_recursing_down()
|
838 | 848 |
|
849 |
+ def get_size(self):
|
|
850 |
+ total = len(self.pb2_directory.SerializeToString())
|
|
851 |
+ for i in self.index.values():
|
|
852 |
+ if isinstance(i.buildstream_object, CasBasedDirectory):
|
|
853 |
+ total += i.buildstream_object.get_size()
|
|
854 |
+ elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
|
|
855 |
+ src_name = self.cas_cache.objpath(i.pb_object.digest)
|
|
856 |
+ filesize = os.stat(src_name).st_size
|
|
857 |
+ total += filesize
|
|
858 |
+ # Symlink nodes are encoded as part of the directory serialization.
|
|
859 |
+ return total
|
|
860 |
+ |
|
861 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
862 |
+ assert dirname not in self.index
|
|
863 |
+ if isinstance(other_directory, CasBasedDirectory):
|
|
864 |
+ self.index[dirname] = IndexEntry(other_directory.pb_object,
|
|
865 |
+ buildstream_object=other_directory.buildstream_object)
|
|
866 |
+ else:
|
|
867 |
+ # Revert to the old method.
|
|
868 |
+ subdir = self.descend(dirname, create=True)
|
|
869 |
+ subdir.import_files(other_directory, can_link=True)
|
|
870 |
+ |
|
839 | 871 |
def _get_identifier(self):
|
840 | 872 |
path = ""
|
841 | 873 |
if self.parent:
|
... | ... | @@ -30,6 +30,7 @@ See also: :ref:`sandboxing`. |
30 | 30 |
import os
|
31 | 31 |
import time
|
32 | 32 |
from .directory import Directory, VirtualDirectoryError
|
33 |
+from .. import utils
|
|
33 | 34 |
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
|
34 | 35 |
from ..utils import _set_deterministic_user, _set_deterministic_mtime
|
35 | 36 |
|
... | ... | @@ -125,6 +126,13 @@ class FileBasedDirectory(Directory): |
125 | 126 |
self._mark_changed()
|
126 | 127 |
return import_result
|
127 | 128 |
|
129 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
130 |
+ # We can't do a fast import into a FileBasedDirectory, so this
|
|
131 |
+ # falls back to import_files.
|
|
132 |
+ assert dirname not in self.index
|
|
133 |
+ subdir = self.descend(dirname, create=True)
|
|
134 |
+ subdir.import_files(other_directory, can_link=True)
|
|
135 |
+ |
|
128 | 136 |
def _mark_changed(self):
|
129 | 137 |
self._directory_read = False
|
130 | 138 |
|
... | ... | @@ -201,6 +209,9 @@ class FileBasedDirectory(Directory): |
201 | 209 |
|
202 | 210 |
return list_relative_paths(self.external_directory)
|
203 | 211 |
|
212 |
+ def get_size(self):
|
|
213 |
+ return utils._get_dir_size(self.external_directory)
|
|
214 |
+ |
|
204 | 215 |
def __str__(self):
|
205 | 216 |
# This returns the whole path (since we don't know where the directory started)
|
206 | 217 |
# which exposes the sandbox directory; we will have to assume for the time being
|
... | ... | @@ -99,6 +99,30 @@ class Directory(): |
99 | 99 |
|
100 | 100 |
raise NotImplementedError()
|
101 | 101 |
|
102 |
+ def fast_directory_import(self, dirname, other_directory):
|
|
103 |
+ """Import other_directory as a new directory in this one.
|
|
104 |
+ |
|
105 |
+ This is a potentially faster method than import_directory with
|
|
106 |
+ fewer options. dirname must not already exist, and all files
|
|
107 |
+ are imported unconditionally. It is assumed that it is
|
|
108 |
+ acceptable to use filesystem hard links to files in
|
|
109 |
+ other_directory. You cannot update utimes or get a
|
|
110 |
+ FileListResult.
|
|
111 |
+ |
|
112 |
+ This only provides a benefit if both this and other_directory
|
|
113 |
+ are CAS-based directories. In other cases, it will fall back
|
|
114 |
+ to import_directory.
|
|
115 |
+ |
|
116 |
+ Args:
|
|
117 |
+ dirname: The name to call the subdirectory in this
|
|
118 |
+ directory. This must not already exist in this directory.
|
|
119 |
+ |
|
120 |
+ other_directory: The directory to import.
|
|
121 |
+ |
|
122 |
+ """
|
|
123 |
+ |
|
124 |
+ raise NotImplementedError()
|
|
125 |
+ |
|
102 | 126 |
def export_files(self, to_directory, *, can_link=False, can_destroy=False):
|
103 | 127 |
"""Copies everything from this into to_directory.
|
104 | 128 |
|
... | ... | @@ -176,3 +200,9 @@ class Directory(): |
176 | 200 |
|
177 | 201 |
"""
|
178 | 202 |
raise NotImplementedError()
|
203 |
+ |
|
204 |
+ def get_size(self):
|
|
205 |
+ """ Get an approximation of the storage space in bytes used by this directory
|
|
206 |
+ and all files and subdirectories in it. Storage space varies by implementation
|
|
207 |
+ and effective space used may be lower than this number due to deduplication. """
|
|
208 |
+ raise NotImplementedError()
|
... | ... | @@ -149,10 +149,10 @@ def resolve_symlinks(path, root): |
149 | 149 |
if target.startswith(os.path.sep):
|
150 | 150 |
# Absolute link - relative to root
|
151 | 151 |
location = os.path.join(root, target, tail)
|
152 |
+ return resolve_symlinks(location, root)
|
|
152 | 153 |
else:
|
153 |
- # Relative link - relative to symlink location
|
|
154 |
- location = os.path.join(location, target)
|
|
155 |
- return resolve_symlinks(location, root)
|
|
154 |
+ return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
|
|
155 |
+ |
|
156 | 156 |
# If we got here, no symlinks were found. Add on the final component and return.
|
157 | 157 |
location = os.path.join(location, components[-1])
|
158 | 158 |
return location
|
... | ... | @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents= |
199 | 199 |
pass
|
200 | 200 |
else:
|
201 | 201 |
assert os.path.islink(realpath)
|
202 |
- assert os.readlink(realpath) == content
|
|
202 |
+ # We expect all storage to normalise absolute symlinks.
|
|
203 |
+ depth = len(path.split(os.path.sep)) - 1
|
|
204 |
+ if content.startswith(os.path.sep):
|
|
205 |
+ assert os.readlink(realpath) == os.path.sep.join([".."] * depth + [content[1:]])
|
|
206 |
+ else:
|
|
207 |
+ assert os.readlink(realpath) == content
|
|
208 |
+ |
|
203 | 209 |
elif typename == 'D':
|
204 | 210 |
# We can't do any more tests than this because it
|
205 | 211 |
# depends on things present in the original. Blank
|