Jim MacArthur pushed to branch jmac/stop-caching-vdirs at BuildStream / buildstream
Commits:
-
b199afe6
by Jürg Billeter at 2018-09-25T09:01:51Z
-
7d199322
by Jürg Billeter at 2018-09-25T09:01:51Z
-
e2e24015
by Jürg Billeter at 2018-09-25T09:01:51Z
-
697d10f2
by Jürg Billeter at 2018-09-25T09:01:51Z
-
81c51dbf
by Jürg Billeter at 2018-09-25T09:31:55Z
-
2df7d140
by Jim MacArthur at 2018-09-25T10:36:37Z
-
62f59382
by Jim MacArthur at 2018-09-25T10:36:37Z
4 changed files:
- buildstream/_artifactcache/cascache.py
- buildstream/_artifactcache/casserver.py
- buildstream/element.py
- buildstream/sandbox/sandbox.py
Changes:
... | ... | @@ -44,6 +44,11 @@ from .._exceptions import ArtifactError |
44 | 44 |
from . import ArtifactCache
|
45 | 45 |
|
46 | 46 |
|
47 |
+# The default limit for gRPC messages is 4 MiB.
|
|
48 |
+# Limit payload to 1 MiB to leave sufficient headroom for metadata.
|
|
49 |
+_MAX_PAYLOAD_BYTES = 1024 * 1024
|
|
50 |
+ |
|
51 |
+ |
|
47 | 52 |
# A CASCache manages artifacts in a CAS repository as specified in the
|
48 | 53 |
# Remote Execution API.
|
49 | 54 |
#
|
... | ... | @@ -854,6 +859,80 @@ class CASCache(ArtifactCache): |
854 | 859 |
|
855 | 860 |
assert digest.size_bytes == os.fstat(stream.fileno()).st_size
|
856 | 861 |
|
862 |
+ # _ensure_blob():
|
|
863 |
+ #
|
|
864 |
+ # Fetch and add blob if it's not already local.
|
|
865 |
+ #
|
|
866 |
+ # Args:
|
|
867 |
+ # remote (Remote): The remote to use.
|
|
868 |
+ # digest (Digest): Digest object for the blob to fetch.
|
|
869 |
+ #
|
|
870 |
+ # Returns:
|
|
871 |
+ # (str): The path of the object
|
|
872 |
+ #
|
|
873 |
+ def _ensure_blob(self, remote, digest):
|
|
874 |
+ objpath = self.objpath(digest)
|
|
875 |
+ if os.path.exists(objpath):
|
|
876 |
+ # already in local repository
|
|
877 |
+ return objpath
|
|
878 |
+ |
|
879 |
+ with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
880 |
+ self._fetch_blob(remote, digest, f)
|
|
881 |
+ |
|
882 |
+ added_digest = self.add_object(path=f.name)
|
|
883 |
+ assert added_digest.hash == digest.hash
|
|
884 |
+ |
|
885 |
+ return objpath
|
|
886 |
+ |
|
887 |
+ def _batch_download_complete(self, batch):
|
|
888 |
+ for digest, data in batch.send():
|
|
889 |
+ with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
890 |
+ f.write(data)
|
|
891 |
+ f.flush()
|
|
892 |
+ |
|
893 |
+ added_digest = self.add_object(path=f.name)
|
|
894 |
+ assert added_digest.hash == digest.hash
|
|
895 |
+ |
|
896 |
+ # Helper function for _fetch_directory().
|
|
897 |
+ def _fetch_directory_batch(self, remote, batch, fetch_queue, fetch_next_queue):
|
|
898 |
+ self._batch_download_complete(batch)
|
|
899 |
+ |
|
900 |
+ # All previously scheduled directories are now locally available,
|
|
901 |
+ # move them to the processing queue.
|
|
902 |
+ fetch_queue.extend(fetch_next_queue)
|
|
903 |
+ fetch_next_queue.clear()
|
|
904 |
+ return _CASBatchRead(remote)
|
|
905 |
+ |
|
906 |
+ # Helper function for _fetch_directory().
|
|
907 |
+ def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, *, recursive=False):
|
|
908 |
+ in_local_cache = os.path.exists(self.objpath(digest))
|
|
909 |
+ |
|
910 |
+ if in_local_cache:
|
|
911 |
+ # Skip download, already in local cache.
|
|
912 |
+ pass
|
|
913 |
+ elif (digest.size_bytes >= remote.max_batch_total_size_bytes or
|
|
914 |
+ not remote.batch_read_supported):
|
|
915 |
+ # Too large for batch request, download in independent request.
|
|
916 |
+ self._ensure_blob(remote, digest)
|
|
917 |
+ in_local_cache = True
|
|
918 |
+ else:
|
|
919 |
+ if not batch.add(digest):
|
|
920 |
+ # Not enough space left in batch request.
|
|
921 |
+ # Complete pending batch first.
|
|
922 |
+ batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
923 |
+ batch.add(digest)
|
|
924 |
+ |
|
925 |
+ if recursive:
|
|
926 |
+ if in_local_cache:
|
|
927 |
+ # Add directory to processing queue.
|
|
928 |
+ fetch_queue.append(digest)
|
|
929 |
+ else:
|
|
930 |
+ # Directory will be available after completing pending batch.
|
|
931 |
+ # Add directory to deferred processing queue.
|
|
932 |
+ fetch_next_queue.append(digest)
|
|
933 |
+ |
|
934 |
+ return batch
|
|
935 |
+ |
|
857 | 936 |
# _fetch_directory():
|
858 | 937 |
#
|
859 | 938 |
# Fetches remote directory and adds it to content addressable store.
|
... | ... | @@ -867,39 +946,32 @@ class CASCache(ArtifactCache): |
867 | 946 |
# dir_digest (Digest): Digest object for the directory to fetch.
|
868 | 947 |
#
|
869 | 948 |
def _fetch_directory(self, remote, dir_digest):
|
870 |
- objpath = self.objpath(dir_digest)
|
|
871 |
- if os.path.exists(objpath):
|
|
872 |
- # already in local cache
|
|
873 |
- return
|
|
874 |
- |
|
875 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
|
|
876 |
- self._fetch_blob(remote, dir_digest, out)
|
|
877 |
- |
|
878 |
- directory = remote_execution_pb2.Directory()
|
|
949 |
+ fetch_queue = [dir_digest]
|
|
950 |
+ fetch_next_queue = []
|
|
951 |
+ batch = _CASBatchRead(remote)
|
|
879 | 952 |
|
880 |
- with open(out.name, 'rb') as f:
|
|
881 |
- directory.ParseFromString(f.read())
|
|
953 |
+ while len(fetch_queue) + len(fetch_next_queue) > 0:
|
|
954 |
+ if len(fetch_queue) == 0:
|
|
955 |
+ batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
882 | 956 |
|
883 |
- for filenode in directory.files:
|
|
884 |
- fileobjpath = self.objpath(filenode.digest)
|
|
885 |
- if os.path.exists(fileobjpath):
|
|
886 |
- # already in local cache
|
|
887 |
- continue
|
|
957 |
+ dir_digest = fetch_queue.pop(0)
|
|
888 | 958 |
|
889 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
890 |
- self._fetch_blob(remote, filenode.digest, f)
|
|
959 |
+ objpath = self._ensure_blob(remote, dir_digest)
|
|
891 | 960 |
|
892 |
- digest = self.add_object(path=f.name)
|
|
893 |
- assert digest.hash == filenode.digest.hash
|
|
961 |
+ directory = remote_execution_pb2.Directory()
|
|
962 |
+ with open(objpath, 'rb') as f:
|
|
963 |
+ directory.ParseFromString(f.read())
|
|
894 | 964 |
|
895 | 965 |
for dirnode in directory.directories:
|
896 |
- self._fetch_directory(remote, dirnode.digest)
|
|
966 |
+ batch = self._fetch_directory_node(remote, dirnode.digest, batch,
|
|
967 |
+ fetch_queue, fetch_next_queue, recursive=True)
|
|
968 |
+ |
|
969 |
+ for filenode in directory.files:
|
|
970 |
+ batch = self._fetch_directory_node(remote, filenode.digest, batch,
|
|
971 |
+ fetch_queue, fetch_next_queue)
|
|
897 | 972 |
|
898 |
- # Place directory blob only in final location when we've
|
|
899 |
- # downloaded all referenced blobs to avoid dangling
|
|
900 |
- # references in the repository.
|
|
901 |
- digest = self.add_object(path=out.name)
|
|
902 |
- assert digest.hash == dir_digest.hash
|
|
973 |
+ # Fetch final batch
|
|
974 |
+ self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
903 | 975 |
|
904 | 976 |
def _fetch_tree(self, remote, digest):
|
905 | 977 |
# download but do not store the Tree object
|
... | ... | @@ -914,16 +986,7 @@ class CASCache(ArtifactCache): |
914 | 986 |
tree.children.extend([tree.root])
|
915 | 987 |
for directory in tree.children:
|
916 | 988 |
for filenode in directory.files:
|
917 |
- fileobjpath = self.objpath(filenode.digest)
|
|
918 |
- if os.path.exists(fileobjpath):
|
|
919 |
- # already in local cache
|
|
920 |
- continue
|
|
921 |
- |
|
922 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
923 |
- self._fetch_blob(remote, filenode.digest, f)
|
|
924 |
- |
|
925 |
- added_digest = self.add_object(path=f.name)
|
|
926 |
- assert added_digest.hash == filenode.digest.hash
|
|
989 |
+ self._ensure_blob(remote, filenode.digest)
|
|
927 | 990 |
|
928 | 991 |
# place directory blob only in final location when we've downloaded
|
929 | 992 |
# all referenced blobs to avoid dangling references in the repository
|
... | ... | @@ -942,12 +1005,12 @@ class CASCache(ArtifactCache): |
942 | 1005 |
finished = False
|
943 | 1006 |
remaining = digest.size_bytes
|
944 | 1007 |
while not finished:
|
945 |
- chunk_size = min(remaining, 64 * 1024)
|
|
1008 |
+ chunk_size = min(remaining, _MAX_PAYLOAD_BYTES)
|
|
946 | 1009 |
remaining -= chunk_size
|
947 | 1010 |
|
948 | 1011 |
request = bytestream_pb2.WriteRequest()
|
949 | 1012 |
request.write_offset = offset
|
950 |
- # max. 64 kB chunks
|
|
1013 |
+ # max. _MAX_PAYLOAD_BYTES chunks
|
|
951 | 1014 |
request.data = instream.read(chunk_size)
|
952 | 1015 |
request.resource_name = resname
|
953 | 1016 |
request.finish_write = remaining <= 0
|
... | ... | @@ -1035,11 +1098,78 @@ class _CASRemote(): |
1035 | 1098 |
|
1036 | 1099 |
self.bytestream = bytestream_pb2_grpc.ByteStreamStub(self.channel)
|
1037 | 1100 |
self.cas = remote_execution_pb2_grpc.ContentAddressableStorageStub(self.channel)
|
1101 |
+ self.capabilities = remote_execution_pb2_grpc.CapabilitiesStub(self.channel)
|
|
1038 | 1102 |
self.ref_storage = buildstream_pb2_grpc.ReferenceStorageStub(self.channel)
|
1039 | 1103 |
|
1104 |
+ self.max_batch_total_size_bytes = _MAX_PAYLOAD_BYTES
|
|
1105 |
+ try:
|
|
1106 |
+ request = remote_execution_pb2.GetCapabilitiesRequest()
|
|
1107 |
+ response = self.capabilities.GetCapabilities(request)
|
|
1108 |
+ server_max_batch_total_size_bytes = response.cache_capabilities.max_batch_total_size_bytes
|
|
1109 |
+ if 0 < server_max_batch_total_size_bytes < self.max_batch_total_size_bytes:
|
|
1110 |
+ self.max_batch_total_size_bytes = server_max_batch_total_size_bytes
|
|
1111 |
+ except grpc.RpcError as e:
|
|
1112 |
+ # Simply use the defaults for servers that don't implement GetCapabilities()
|
|
1113 |
+ if e.code() != grpc.StatusCode.UNIMPLEMENTED:
|
|
1114 |
+ raise
|
|
1115 |
+ |
|
1116 |
+ # Check whether the server supports BatchReadBlobs()
|
|
1117 |
+ self.batch_read_supported = False
|
|
1118 |
+ try:
|
|
1119 |
+ request = remote_execution_pb2.BatchReadBlobsRequest()
|
|
1120 |
+ response = self.cas.BatchReadBlobs(request)
|
|
1121 |
+ self.batch_read_supported = True
|
|
1122 |
+ except grpc.RpcError as e:
|
|
1123 |
+ if e.code() != grpc.StatusCode.UNIMPLEMENTED:
|
|
1124 |
+ raise
|
|
1125 |
+ |
|
1040 | 1126 |
self._initialized = True
|
1041 | 1127 |
|
1042 | 1128 |
|
1129 |
+# Represents a batch of blobs queued for fetching.
|
|
1130 |
+#
|
|
1131 |
+class _CASBatchRead():
|
|
1132 |
+ def __init__(self, remote):
|
|
1133 |
+ self._remote = remote
|
|
1134 |
+ self._max_total_size_bytes = remote.max_batch_total_size_bytes
|
|
1135 |
+ self._request = remote_execution_pb2.BatchReadBlobsRequest()
|
|
1136 |
+ self._size = 0
|
|
1137 |
+ self._sent = False
|
|
1138 |
+ |
|
1139 |
+ def add(self, digest):
|
|
1140 |
+ assert not self._sent
|
|
1141 |
+ |
|
1142 |
+ new_batch_size = self._size + digest.size_bytes
|
|
1143 |
+ if new_batch_size > self._max_total_size_bytes:
|
|
1144 |
+ # Not enough space left in current batch
|
|
1145 |
+ return False
|
|
1146 |
+ |
|
1147 |
+ request_digest = self._request.digests.add()
|
|
1148 |
+ request_digest.hash = digest.hash
|
|
1149 |
+ request_digest.size_bytes = digest.size_bytes
|
|
1150 |
+ self._size = new_batch_size
|
|
1151 |
+ return True
|
|
1152 |
+ |
|
1153 |
+ def send(self):
|
|
1154 |
+ assert not self._sent
|
|
1155 |
+ self._sent = True
|
|
1156 |
+ |
|
1157 |
+ if len(self._request.digests) == 0:
|
|
1158 |
+ return
|
|
1159 |
+ |
|
1160 |
+ batch_response = self._remote.cas.BatchReadBlobs(self._request)
|
|
1161 |
+ |
|
1162 |
+ for response in batch_response.responses:
|
|
1163 |
+ if response.status.code != grpc.StatusCode.OK.value[0]:
|
|
1164 |
+ raise ArtifactError("Failed to download blob {}: {}".format(
|
|
1165 |
+ response.digest.hash, response.status.code))
|
|
1166 |
+ if response.digest.size_bytes != len(response.data):
|
|
1167 |
+ raise ArtifactError("Failed to download blob {}: expected {} bytes, received {} bytes".format(
|
|
1168 |
+ response.digest.hash, response.digest.size_bytes, len(response.data)))
|
|
1169 |
+ |
|
1170 |
+ yield (response.digest, response.data)
|
|
1171 |
+ |
|
1172 |
+ |
|
1043 | 1173 |
def _grouper(iterable, n):
|
1044 | 1174 |
while True:
|
1045 | 1175 |
try:
|
... | ... | @@ -38,8 +38,9 @@ from .._context import Context |
38 | 38 |
from .cascache import CASCache
|
39 | 39 |
|
40 | 40 |
|
41 |
-# The default limit for gRPC messages is 4 MiB
|
|
42 |
-_MAX_BATCH_TOTAL_SIZE_BYTES = 4 * 1024 * 1024
|
|
41 |
+# The default limit for gRPC messages is 4 MiB.
|
|
42 |
+# Limit payload to 1 MiB to leave sufficient headroom for metadata.
|
|
43 |
+_MAX_PAYLOAD_BYTES = 1024 * 1024
|
|
43 | 44 |
|
44 | 45 |
|
45 | 46 |
# Trying to push an artifact that is too large
|
... | ... | @@ -158,7 +159,7 @@ class _ByteStreamServicer(bytestream_pb2_grpc.ByteStreamServicer): |
158 | 159 |
|
159 | 160 |
remaining = client_digest.size_bytes - request.read_offset
|
160 | 161 |
while remaining > 0:
|
161 |
- chunk_size = min(remaining, 64 * 1024)
|
|
162 |
+ chunk_size = min(remaining, _MAX_PAYLOAD_BYTES)
|
|
162 | 163 |
remaining -= chunk_size
|
163 | 164 |
|
164 | 165 |
response = bytestream_pb2.ReadResponse()
|
... | ... | @@ -242,7 +243,7 @@ class _ContentAddressableStorageServicer(remote_execution_pb2_grpc.ContentAddres |
242 | 243 |
|
243 | 244 |
for digest in request.digests:
|
244 | 245 |
batch_size += digest.size_bytes
|
245 |
- if batch_size > _MAX_BATCH_TOTAL_SIZE_BYTES:
|
|
246 |
+ if batch_size > _MAX_PAYLOAD_BYTES:
|
|
246 | 247 |
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
|
247 | 248 |
return response
|
248 | 249 |
|
... | ... | @@ -269,7 +270,7 @@ class _CapabilitiesServicer(remote_execution_pb2_grpc.CapabilitiesServicer): |
269 | 270 |
cache_capabilities = response.cache_capabilities
|
270 | 271 |
cache_capabilities.digest_function.append(remote_execution_pb2.SHA256)
|
271 | 272 |
cache_capabilities.action_cache_update_capabilities.update_enabled = False
|
272 |
- cache_capabilities.max_batch_total_size_bytes = _MAX_BATCH_TOTAL_SIZE_BYTES
|
|
273 |
+ cache_capabilities.max_batch_total_size_bytes = _MAX_PAYLOAD_BYTES
|
|
273 | 274 |
cache_capabilities.symlink_absolute_path_strategy = remote_execution_pb2.CacheCapabilities.ALLOWED
|
274 | 275 |
|
275 | 276 |
response.deprecated_api_version.major = 2
|
... | ... | @@ -1532,8 +1532,6 @@ class Element(Plugin): |
1532 | 1532 |
with _signals.terminator(cleanup_rootdir), \
|
1533 | 1533 |
self.__sandbox(rootdir, output_file, output_file, self.__sandbox_config) as sandbox: # nopep8
|
1534 | 1534 |
|
1535 |
- sandbox_vroot = sandbox.get_virtual_directory()
|
|
1536 |
- |
|
1537 | 1535 |
# By default, the dynamic public data is the same as the static public data.
|
1538 | 1536 |
# The plugin's assemble() method may modify this, though.
|
1539 | 1537 |
self.__dynamic_public = _yaml.node_copy(self.__public)
|
... | ... | @@ -1581,7 +1579,6 @@ class Element(Plugin): |
1581 | 1579 |
finally:
|
1582 | 1580 |
if collect is not None:
|
1583 | 1581 |
try:
|
1584 |
- # Sandbox will probably have replaced its virtual directory, so get it again
|
|
1585 | 1582 |
sandbox_vroot = sandbox.get_virtual_directory()
|
1586 | 1583 |
collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
|
1587 | 1584 |
except VirtualDirectoryError:
|
... | ... | @@ -1606,6 +1603,7 @@ class Element(Plugin): |
1606 | 1603 |
collectvdir.export_files(filesdir, can_link=True)
|
1607 | 1604 |
|
1608 | 1605 |
try:
|
1606 |
+ sandbox_vroot = sandbox.get_virtual_directory()
|
|
1609 | 1607 |
sandbox_build_dir = sandbox_vroot.descend(
|
1610 | 1608 |
self.get_variable('build-root').lstrip(os.sep).split(os.sep))
|
1611 | 1609 |
# Hard link files from build-root dir to buildtreedir directory
|
... | ... | @@ -110,6 +110,10 @@ class Sandbox(): |
110 | 110 |
os.makedirs(directory_, exist_ok=True)
|
111 | 111 |
self._vdir = None
|
112 | 112 |
|
113 |
+ # This is set if anyone requests access to the underlying
|
|
114 |
+ # directory via get_directory.
|
|
115 |
+ self._never_cache_vdirs = False
|
|
116 |
+ |
|
113 | 117 |
def get_directory(self):
|
114 | 118 |
"""Fetches the sandbox root directory
|
115 | 119 |
|
... | ... | @@ -122,24 +126,28 @@ class Sandbox(): |
122 | 126 |
|
123 | 127 |
"""
|
124 | 128 |
if self.__allow_real_directory:
|
129 |
+ self._never_cache_vdirs = True
|
|
125 | 130 |
return self._root
|
126 | 131 |
else:
|
127 | 132 |
raise BstError("You can't use get_directory")
|
128 | 133 |
|
129 | 134 |
def get_virtual_directory(self):
|
130 |
- """Fetches the sandbox root directory
|
|
135 |
+ """Fetches the sandbox root directory as a virtual Directory.
|
|
131 | 136 |
|
132 | 137 |
The root directory is where artifacts for the base
|
133 |
- runtime environment should be staged. Only works if
|
|
134 |
- BST_VIRTUAL_DIRECTORY is not set.
|
|
138 |
+ runtime environment should be staged.
|
|
139 |
+ |
|
140 |
+ Use caution if you use get_directory and
|
|
141 |
+ get_virtual_directory. If you alter the contents of the
|
|
142 |
+ directory returned by get_directory, all objects returned by
|
|
143 |
+ get_virtual_directory or derived from them are invalid and you
|
|
144 |
+ must call get_virtual_directory again to get a new copy.
|
|
135 | 145 |
|
136 | 146 |
Returns:
|
137 |
- (str): The sandbox root directory
|
|
147 |
+ (Directory): The sandbox root directory
|
|
138 | 148 |
|
139 | 149 |
"""
|
140 |
- if not self._vdir:
|
|
141 |
- # BST_CAS_DIRECTORIES is a deliberately hidden environment variable which
|
|
142 |
- # can be used to switch on CAS-based directories for testing.
|
|
150 |
+ if self._vdir is None or self._never_cache_vdirs:
|
|
143 | 151 |
if 'BST_CAS_DIRECTORIES' in os.environ:
|
144 | 152 |
self._vdir = CasBasedDirectory(self.__context, ref=None)
|
145 | 153 |
else:
|