Raoul Hidalgo Charman pushed to branch raoul/802-refactor-artifactcache at BuildStream / buildstream
Commits:
-
115b5593
by Raoul Hidalgo Charman at 2019-01-14T11:06:49Z
-
993f9957
by Raoul Hidalgo Charman at 2019-01-14T11:11:19Z
-
3a000f6c
by Raoul Hidalgo Charman at 2019-01-14T11:11:34Z
13 changed files:
- buildstream/_artifactcache.py
- buildstream/_cas/__init__.py
- buildstream/_cas/cascache.py
- buildstream/_cas/casremote.py
- + buildstream/_cas/transfer.py
- buildstream/_context.py
- buildstream/data/userconfig.yaml
- buildstream/sandbox/_sandboxremote.py
- conftest.py
- tests/artifactcache/pull.py
- tests/artifactcache/push.py
- tests/integration/pullbuildtrees.py
- tests/testutils/runcli.py
Changes:
... | ... | @@ -28,7 +28,8 @@ from ._message import Message, MessageType |
28 | 28 |
from . import utils
|
29 | 29 |
from . import _yaml
|
30 | 30 |
|
31 |
-from ._cas import CASRemote, CASRemoteSpec
|
|
31 |
+from ._cas import BlobNotFound, CASRemote, CASRemoteSpec
|
|
32 |
+from ._cas.transfer import cas_directory_upload, cas_directory_download, cas_tree_download
|
|
32 | 33 |
|
33 | 34 |
|
34 | 35 |
CACHE_SIZE_FILE = "cache_size"
|
... | ... | @@ -374,7 +375,7 @@ class ArtifactCache(): |
374 | 375 |
q = multiprocessing.Queue()
|
375 | 376 |
for remote_spec in remote_specs:
|
376 | 377 |
|
377 |
- error = CASRemote.check_remote(remote_spec, q)
|
|
378 |
+ error = CASRemote.check_remote(remote_spec, self.context.tmpdir, q)
|
|
378 | 379 |
|
379 | 380 |
if error and on_failure:
|
380 | 381 |
on_failure(remote_spec.url, error)
|
... | ... | @@ -385,7 +386,7 @@ class ArtifactCache(): |
385 | 386 |
if remote_spec.push:
|
386 | 387 |
self._has_push_remotes = True
|
387 | 388 |
|
388 |
- remotes[remote_spec.url] = CASRemote(remote_spec)
|
|
389 |
+ remotes[remote_spec.url] = CASRemote(remote_spec, self.context.tmpdir)
|
|
389 | 390 |
|
390 | 391 |
for project in self.context.get_projects():
|
391 | 392 |
remote_specs = self.global_remote_specs
|
... | ... | @@ -607,16 +608,41 @@ class ArtifactCache(): |
607 | 608 |
|
608 | 609 |
for remote in push_remotes:
|
609 | 610 |
remote.init()
|
611 |
+ skipped_remote = True
|
|
610 | 612 |
display_key = element._get_brief_display_key()
|
611 | 613 |
element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
|
612 | 614 |
|
613 |
- if self.cas.push(refs, remote):
|
|
614 |
- element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
|
|
615 |
+ try:
|
|
616 |
+ for ref in refs:
|
|
617 |
+ # Check whether ref is already on the server in which case
|
|
618 |
+ # there is no need to push the ref
|
|
619 |
+ root_digest = self.cas.resolve_ref(ref)
|
|
620 |
+ response = remote.get_reference(ref)
|
|
621 |
+ if (response is not None and
|
|
622 |
+ response.hash == root_digest.hash and
|
|
623 |
+ response.size_bytes == root_digest.size_bytes):
|
|
624 |
+ element.info("Remote ({}) already has {} cached".format(
|
|
625 |
+ remote.spec.url, element._get_brief_display_key()))
|
|
626 |
+ continue
|
|
627 |
+ |
|
628 |
+ # upload blobs
|
|
629 |
+ cas_directory_upload(self.cas, remote, root_digest)
|
|
630 |
+ remote.update_reference(ref, root_digest)
|
|
631 |
+ |
|
632 |
+ skipped_remote = False
|
|
633 |
+ |
|
634 |
+ except CASError as e:
|
|
635 |
+ if str(e.reason) == "StatusCode.RESOURCE_EXHAUSTED":
|
|
636 |
+ element.warn("Failed to push element to {}: Resource exhuasted"
|
|
637 |
+ .format(remote.spec.url))
|
|
638 |
+ continue
|
|
639 |
+ else:
|
|
640 |
+ raise ArtifactError("Failed to push refs {}: {}".format(refs, e),
|
|
641 |
+ temporary=True) from e
|
|
642 |
+ |
|
643 |
+ if skipped_remote is False:
|
|
615 | 644 |
pushed = True
|
616 |
- else:
|
|
617 |
- element.info("Remote ({}) already has {} cached".format(
|
|
618 |
- remote.spec.url, element._get_brief_display_key()
|
|
619 |
- ))
|
|
645 |
+ element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
|
|
620 | 646 |
|
621 | 647 |
return pushed
|
622 | 648 |
|
... | ... | @@ -644,19 +670,31 @@ class ArtifactCache(): |
644 | 670 |
display_key = element._get_brief_display_key()
|
645 | 671 |
element.status("Pulling artifact {} <- {}".format(display_key, remote.spec.url))
|
646 | 672 |
|
647 |
- if self.cas.pull(ref, remote, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
|
|
648 |
- element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
|
|
649 |
- if subdir:
|
|
650 |
- # Attempt to extract subdir into artifact extract dir if it already exists
|
|
651 |
- # without containing the subdir. If the respective artifact extract dir does not
|
|
652 |
- # exist a complete extraction will complete.
|
|
653 |
- self.extract(element, key, subdir)
|
|
654 |
- # no need to pull from additional remotes
|
|
655 |
- return True
|
|
656 |
- else:
|
|
673 |
+ root_digest = remote.get_reference(ref)
|
|
674 |
+ |
|
675 |
+ if not root_digest:
|
|
657 | 676 |
element.info("Remote ({}) does not have {} cached".format(
|
658 |
- remote.spec.url, element._get_brief_display_key()
|
|
659 |
- ))
|
|
677 |
+ remote.spec.url, element._get_brief_display_key()))
|
|
678 |
+ continue
|
|
679 |
+ |
|
680 |
+ try:
|
|
681 |
+ cas_directory_download(self.cas, remote, root_digest, excluded_subdirs)
|
|
682 |
+ except BlobNotFound:
|
|
683 |
+ element.info("Remote ({}) is missing blobs for {}".format(
|
|
684 |
+ remote.spec.url, element._get_brief_display_key()))
|
|
685 |
+ continue
|
|
686 |
+ |
|
687 |
+ self.cas.set_ref(ref, root_digest)
|
|
688 |
+ |
|
689 |
+ if subdir:
|
|
690 |
+ # Attempt to extract subdir into artifact extract dir if it already exists
|
|
691 |
+ # without containing the subdir. If the respective artifact extract dir does not
|
|
692 |
+ # exist a complete extraction will complete.
|
|
693 |
+ self.extract(element, key, subdir)
|
|
694 |
+ |
|
695 |
+ element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
|
|
696 |
+ # no need to pull from additional remotes
|
|
697 |
+ return True
|
|
660 | 698 |
|
661 | 699 |
except CASError as e:
|
662 | 700 |
raise ArtifactError("Failed to pull artifact {}: {}".format(
|
... | ... | @@ -671,15 +709,16 @@ class ArtifactCache(): |
671 | 709 |
#
|
672 | 710 |
# Args:
|
673 | 711 |
# project (Project): The current project
|
674 |
- # digest (Digest): The digest of the tree
|
|
712 |
+ # tree_digest (Digest): The digest of the tree
|
|
675 | 713 |
#
|
676 |
- def pull_tree(self, project, digest):
|
|
714 |
+ def pull_tree(self, project, tree_digest):
|
|
677 | 715 |
for remote in self._remotes[project]:
|
678 |
- digest = self.cas.pull_tree(remote, digest)
|
|
679 |
- |
|
680 |
- if digest:
|
|
681 |
- # no need to pull from additional remotes
|
|
682 |
- return digest
|
|
716 |
+ try:
|
|
717 |
+ root_digest = cas_tree_download(self.cas, remote, tree_digest)
|
|
718 |
+ except BlobNotFound:
|
|
719 |
+ continue
|
|
720 |
+ else:
|
|
721 |
+ return root_digest
|
|
683 | 722 |
|
684 | 723 |
return None
|
685 | 724 |
|
... | ... | @@ -708,7 +747,7 @@ class ArtifactCache(): |
708 | 747 |
return
|
709 | 748 |
|
710 | 749 |
for remote in push_remotes:
|
711 |
- self.cas.push_directory(remote, directory)
|
|
750 |
+ cas_directory_upload(self.cas, remote, directory.ref)
|
|
712 | 751 |
|
713 | 752 |
# push_message():
|
714 | 753 |
#
|
... | ... | @@ -917,6 +956,19 @@ class ArtifactCache(): |
917 | 956 |
stat = os.statvfs(volume)
|
918 | 957 |
return stat.f_bsize * stat.f_bavail, stat.f_bsize * stat.f_blocks
|
919 | 958 |
|
959 |
+ def _fetch_directory(self, remote, root_digest, excluded_subdirs):
|
|
960 |
+ for blob_digest in remote.yield_directory_digests(
|
|
961 |
+ root_digest, excluded_subdirs=excluded_subdirs):
|
|
962 |
+ if self.cas.check_blob(blob_digest):
|
|
963 |
+ continue
|
|
964 |
+ remote.request_blob(blob_digest)
|
|
965 |
+ for blob_file in remote.get_blobs():
|
|
966 |
+ self.cas.add_object(path=blob_file.name, link_directly=True)
|
|
967 |
+ |
|
968 |
+ # Request final CAS batch
|
|
969 |
+ for blob_file in remote.get_blobs(complete_batch=True):
|
|
970 |
+ self.cas.add_object(path=blob_file.name, link_directly=True)
|
|
971 |
+ |
|
920 | 972 |
|
921 | 973 |
# _configured_remote_artifact_cache_specs():
|
922 | 974 |
#
|
... | ... | @@ -18,4 +18,4 @@ |
18 | 18 |
# Tristan Van Berkom <tristan vanberkom codethink co uk>
|
19 | 19 |
|
20 | 20 |
from .cascache import CASCache
|
21 |
-from .casremote import CASRemote, CASRemoteSpec
|
|
21 |
+from .casremote import CASRemote, CASRemoteSpec, BlobNotFound
|
... | ... | @@ -18,23 +18,16 @@ |
18 | 18 |
# Jürg Billeter <juerg billeter codethink co uk>
|
19 | 19 |
|
20 | 20 |
import hashlib
|
21 |
-import itertools
|
|
22 | 21 |
import os
|
23 | 22 |
import stat
|
24 | 23 |
import tempfile
|
25 |
-import uuid
|
|
26 | 24 |
import contextlib
|
27 | 25 |
|
28 |
-import grpc
|
|
29 |
- |
|
30 | 26 |
from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
31 |
-from .._protos.buildstream.v2 import buildstream_pb2
|
|
32 | 27 |
|
33 | 28 |
from .. import utils
|
34 | 29 |
from .._exceptions import CASCacheError
|
35 | 30 |
|
36 |
-from .casremote import BlobNotFound, _CASBatchRead, _CASBatchUpdate
|
|
37 |
- |
|
38 | 31 |
|
39 | 32 |
# A CASCache manages a CAS repository as specified in the Remote Execution API.
|
40 | 33 |
#
|
... | ... | @@ -183,73 +176,6 @@ class CASCache(): |
183 | 176 |
|
184 | 177 |
return modified, removed, added
|
185 | 178 |
|
186 |
- # pull():
|
|
187 |
- #
|
|
188 |
- # Pull a ref from a remote repository.
|
|
189 |
- #
|
|
190 |
- # Args:
|
|
191 |
- # ref (str): The ref to pull
|
|
192 |
- # remote (CASRemote): The remote repository to pull from
|
|
193 |
- # progress (callable): The progress callback, if any
|
|
194 |
- # subdir (str): The optional specific subdir to pull
|
|
195 |
- # excluded_subdirs (list): The optional list of subdirs to not pull
|
|
196 |
- #
|
|
197 |
- # Returns:
|
|
198 |
- # (bool): True if pull was successful, False if ref was not available
|
|
199 |
- #
|
|
200 |
- def pull(self, ref, remote, *, progress=None, subdir=None, excluded_subdirs=None):
|
|
201 |
- try:
|
|
202 |
- remote.init()
|
|
203 |
- |
|
204 |
- request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
|
|
205 |
- request.key = ref
|
|
206 |
- response = remote.ref_storage.GetReference(request)
|
|
207 |
- |
|
208 |
- tree = remote_execution_pb2.Digest()
|
|
209 |
- tree.hash = response.digest.hash
|
|
210 |
- tree.size_bytes = response.digest.size_bytes
|
|
211 |
- |
|
212 |
- # Check if the element artifact is present, if so just fetch the subdir.
|
|
213 |
- if subdir and os.path.exists(self.objpath(tree)):
|
|
214 |
- self._fetch_subdir(remote, tree, subdir)
|
|
215 |
- else:
|
|
216 |
- # Fetch artifact, excluded_subdirs determined in pullqueue
|
|
217 |
- self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
|
|
218 |
- |
|
219 |
- self.set_ref(ref, tree)
|
|
220 |
- |
|
221 |
- return True
|
|
222 |
- except grpc.RpcError as e:
|
|
223 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
224 |
- raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
|
|
225 |
- else:
|
|
226 |
- return False
|
|
227 |
- except BlobNotFound as e:
|
|
228 |
- return False
|
|
229 |
- |
|
230 |
- # pull_tree():
|
|
231 |
- #
|
|
232 |
- # Pull a single Tree rather than a ref.
|
|
233 |
- # Does not update local refs.
|
|
234 |
- #
|
|
235 |
- # Args:
|
|
236 |
- # remote (CASRemote): The remote to pull from
|
|
237 |
- # digest (Digest): The digest of the tree
|
|
238 |
- #
|
|
239 |
- def pull_tree(self, remote, digest):
|
|
240 |
- try:
|
|
241 |
- remote.init()
|
|
242 |
- |
|
243 |
- digest = self._fetch_tree(remote, digest)
|
|
244 |
- |
|
245 |
- return digest
|
|
246 |
- |
|
247 |
- except grpc.RpcError as e:
|
|
248 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
249 |
- raise
|
|
250 |
- |
|
251 |
- return None
|
|
252 |
- |
|
253 | 179 |
# link_ref():
|
254 | 180 |
#
|
255 | 181 |
# Add an alias for an existing ref.
|
... | ... | @@ -263,73 +189,6 @@ class CASCache(): |
263 | 189 |
|
264 | 190 |
self.set_ref(newref, tree)
|
265 | 191 |
|
266 |
- # push():
|
|
267 |
- #
|
|
268 |
- # Push committed refs to remote repository.
|
|
269 |
- #
|
|
270 |
- # Args:
|
|
271 |
- # refs (list): The refs to push
|
|
272 |
- # remote (CASRemote): The remote to push to
|
|
273 |
- #
|
|
274 |
- # Returns:
|
|
275 |
- # (bool): True if any remote was updated, False if no pushes were required
|
|
276 |
- #
|
|
277 |
- # Raises:
|
|
278 |
- # (CASCacheError): if there was an error
|
|
279 |
- #
|
|
280 |
- def push(self, refs, remote):
|
|
281 |
- skipped_remote = True
|
|
282 |
- try:
|
|
283 |
- for ref in refs:
|
|
284 |
- tree = self.resolve_ref(ref)
|
|
285 |
- |
|
286 |
- # Check whether ref is already on the server in which case
|
|
287 |
- # there is no need to push the ref
|
|
288 |
- try:
|
|
289 |
- request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
|
|
290 |
- request.key = ref
|
|
291 |
- response = remote.ref_storage.GetReference(request)
|
|
292 |
- |
|
293 |
- if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
|
294 |
- # ref is already on the server with the same tree
|
|
295 |
- continue
|
|
296 |
- |
|
297 |
- except grpc.RpcError as e:
|
|
298 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
299 |
- # Intentionally re-raise RpcError for outer except block.
|
|
300 |
- raise
|
|
301 |
- |
|
302 |
- self._send_directory(remote, tree)
|
|
303 |
- |
|
304 |
- request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
|
|
305 |
- request.keys.append(ref)
|
|
306 |
- request.digest.hash = tree.hash
|
|
307 |
- request.digest.size_bytes = tree.size_bytes
|
|
308 |
- remote.ref_storage.UpdateReference(request)
|
|
309 |
- |
|
310 |
- skipped_remote = False
|
|
311 |
- except grpc.RpcError as e:
|
|
312 |
- if e.code() != grpc.StatusCode.RESOURCE_EXHAUSTED:
|
|
313 |
- raise CASCacheError("Failed to push ref {}: {}".format(refs, e), temporary=True) from e
|
|
314 |
- |
|
315 |
- return not skipped_remote
|
|
316 |
- |
|
317 |
- # push_directory():
|
|
318 |
- #
|
|
319 |
- # Push the given virtual directory to a remote.
|
|
320 |
- #
|
|
321 |
- # Args:
|
|
322 |
- # remote (CASRemote): The remote to push to
|
|
323 |
- # directory (Directory): A virtual directory object to push.
|
|
324 |
- #
|
|
325 |
- # Raises:
|
|
326 |
- # (CASCacheError): if there was an error
|
|
327 |
- #
|
|
328 |
- def push_directory(self, remote, directory):
|
|
329 |
- remote.init()
|
|
330 |
- |
|
331 |
- self._send_directory(remote, directory.ref)
|
|
332 |
- |
|
333 | 192 |
# objpath():
|
334 | 193 |
#
|
335 | 194 |
# Return the path of an object based on its digest.
|
... | ... | @@ -591,6 +450,37 @@ class CASCache(): |
591 | 450 |
reachable = set()
|
592 | 451 |
self._reachable_refs_dir(reachable, tree, update_mtime=True)
|
593 | 452 |
|
453 |
+ # Check to see if a blob is in the local CAS
|
|
454 |
+ # return None if not
|
|
455 |
+ def check_blob(self, digest):
|
|
456 |
+ objpath = self.objpath(digest)
|
|
457 |
+ if os.path.exists(objpath):
|
|
458 |
+ # already in local repository
|
|
459 |
+ return objpath
|
|
460 |
+ else:
|
|
461 |
+ return None
|
|
462 |
+ |
|
463 |
+ def yield_directory_digests(self, directory_digest):
|
|
464 |
+ # parse directory, and recursively add blobs
|
|
465 |
+ d = remote_execution_pb2.Digest()
|
|
466 |
+ d.hash = directory_digest.hash
|
|
467 |
+ d.size_bytes = directory_digest.size_bytes
|
|
468 |
+ yield d
|
|
469 |
+ |
|
470 |
+ directory = remote_execution_pb2.Directory()
|
|
471 |
+ |
|
472 |
+ with open(self.objpath(directory_digest), 'rb') as f:
|
|
473 |
+ directory.ParseFromString(f.read())
|
|
474 |
+ |
|
475 |
+ for filenode in directory.files:
|
|
476 |
+ d = remote_execution_pb2.Digest()
|
|
477 |
+ d.hash = filenode.digest.hash
|
|
478 |
+ d.size_bytes = filenode.digest.size_bytes
|
|
479 |
+ yield d
|
|
480 |
+ |
|
481 |
+ for dirnode in directory.directories:
|
|
482 |
+ yield from self.yield_directory_digests(dirnode.digest)
|
|
483 |
+ |
|
594 | 484 |
################################################
|
595 | 485 |
# Local Private Methods #
|
596 | 486 |
################################################
|
... | ... | @@ -779,202 +669,3 @@ class CASCache(): |
779 | 669 |
|
780 | 670 |
for dirnode in directory.directories:
|
781 | 671 |
yield from self._required_blobs(dirnode.digest)
|
782 |
- |
|
783 |
- # _ensure_blob():
|
|
784 |
- #
|
|
785 |
- # Fetch and add blob if it's not already local.
|
|
786 |
- #
|
|
787 |
- # Args:
|
|
788 |
- # remote (Remote): The remote to use.
|
|
789 |
- # digest (Digest): Digest object for the blob to fetch.
|
|
790 |
- #
|
|
791 |
- # Returns:
|
|
792 |
- # (str): The path of the object
|
|
793 |
- #
|
|
794 |
- def _ensure_blob(self, remote, digest):
|
|
795 |
- objpath = self.objpath(digest)
|
|
796 |
- if os.path.exists(objpath):
|
|
797 |
- # already in local repository
|
|
798 |
- return objpath
|
|
799 |
- |
|
800 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
801 |
- remote._fetch_blob(digest, f)
|
|
802 |
- |
|
803 |
- added_digest = self.add_object(path=f.name, link_directly=True)
|
|
804 |
- assert added_digest.hash == digest.hash
|
|
805 |
- |
|
806 |
- return objpath
|
|
807 |
- |
|
808 |
- def _batch_download_complete(self, batch):
|
|
809 |
- for digest, data in batch.send():
|
|
810 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
|
811 |
- f.write(data)
|
|
812 |
- f.flush()
|
|
813 |
- |
|
814 |
- added_digest = self.add_object(path=f.name, link_directly=True)
|
|
815 |
- assert added_digest.hash == digest.hash
|
|
816 |
- |
|
817 |
- # Helper function for _fetch_directory().
|
|
818 |
- def _fetch_directory_batch(self, remote, batch, fetch_queue, fetch_next_queue):
|
|
819 |
- self._batch_download_complete(batch)
|
|
820 |
- |
|
821 |
- # All previously scheduled directories are now locally available,
|
|
822 |
- # move them to the processing queue.
|
|
823 |
- fetch_queue.extend(fetch_next_queue)
|
|
824 |
- fetch_next_queue.clear()
|
|
825 |
- return _CASBatchRead(remote)
|
|
826 |
- |
|
827 |
- # Helper function for _fetch_directory().
|
|
828 |
- def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, *, recursive=False):
|
|
829 |
- in_local_cache = os.path.exists(self.objpath(digest))
|
|
830 |
- |
|
831 |
- if in_local_cache:
|
|
832 |
- # Skip download, already in local cache.
|
|
833 |
- pass
|
|
834 |
- elif (digest.size_bytes >= remote.max_batch_total_size_bytes or
|
|
835 |
- not remote.batch_read_supported):
|
|
836 |
- # Too large for batch request, download in independent request.
|
|
837 |
- self._ensure_blob(remote, digest)
|
|
838 |
- in_local_cache = True
|
|
839 |
- else:
|
|
840 |
- if not batch.add(digest):
|
|
841 |
- # Not enough space left in batch request.
|
|
842 |
- # Complete pending batch first.
|
|
843 |
- batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
844 |
- batch.add(digest)
|
|
845 |
- |
|
846 |
- if recursive:
|
|
847 |
- if in_local_cache:
|
|
848 |
- # Add directory to processing queue.
|
|
849 |
- fetch_queue.append(digest)
|
|
850 |
- else:
|
|
851 |
- # Directory will be available after completing pending batch.
|
|
852 |
- # Add directory to deferred processing queue.
|
|
853 |
- fetch_next_queue.append(digest)
|
|
854 |
- |
|
855 |
- return batch
|
|
856 |
- |
|
857 |
- # _fetch_directory():
|
|
858 |
- #
|
|
859 |
- # Fetches remote directory and adds it to content addressable store.
|
|
860 |
- #
|
|
861 |
- # Fetches files, symbolic links and recursively other directories in
|
|
862 |
- # the remote directory and adds them to the content addressable
|
|
863 |
- # store.
|
|
864 |
- #
|
|
865 |
- # Args:
|
|
866 |
- # remote (Remote): The remote to use.
|
|
867 |
- # dir_digest (Digest): Digest object for the directory to fetch.
|
|
868 |
- # excluded_subdirs (list): The optional list of subdirs to not fetch
|
|
869 |
- #
|
|
870 |
- def _fetch_directory(self, remote, dir_digest, *, excluded_subdirs=None):
|
|
871 |
- fetch_queue = [dir_digest]
|
|
872 |
- fetch_next_queue = []
|
|
873 |
- batch = _CASBatchRead(remote)
|
|
874 |
- if not excluded_subdirs:
|
|
875 |
- excluded_subdirs = []
|
|
876 |
- |
|
877 |
- while len(fetch_queue) + len(fetch_next_queue) > 0:
|
|
878 |
- if not fetch_queue:
|
|
879 |
- batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
880 |
- |
|
881 |
- dir_digest = fetch_queue.pop(0)
|
|
882 |
- |
|
883 |
- objpath = self._ensure_blob(remote, dir_digest)
|
|
884 |
- |
|
885 |
- directory = remote_execution_pb2.Directory()
|
|
886 |
- with open(objpath, 'rb') as f:
|
|
887 |
- directory.ParseFromString(f.read())
|
|
888 |
- |
|
889 |
- for dirnode in directory.directories:
|
|
890 |
- if dirnode.name not in excluded_subdirs:
|
|
891 |
- batch = self._fetch_directory_node(remote, dirnode.digest, batch,
|
|
892 |
- fetch_queue, fetch_next_queue, recursive=True)
|
|
893 |
- |
|
894 |
- for filenode in directory.files:
|
|
895 |
- batch = self._fetch_directory_node(remote, filenode.digest, batch,
|
|
896 |
- fetch_queue, fetch_next_queue)
|
|
897 |
- |
|
898 |
- # Fetch final batch
|
|
899 |
- self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
|
|
900 |
- |
|
901 |
- def _fetch_subdir(self, remote, tree, subdir):
|
|
902 |
- subdirdigest = self._get_subdir(tree, subdir)
|
|
903 |
- self._fetch_directory(remote, subdirdigest)
|
|
904 |
- |
|
905 |
- def _fetch_tree(self, remote, digest):
|
|
906 |
- # download but do not store the Tree object
|
|
907 |
- with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
|
|
908 |
- remote._fetch_blob(digest, out)
|
|
909 |
- |
|
910 |
- tree = remote_execution_pb2.Tree()
|
|
911 |
- |
|
912 |
- with open(out.name, 'rb') as f:
|
|
913 |
- tree.ParseFromString(f.read())
|
|
914 |
- |
|
915 |
- tree.children.extend([tree.root])
|
|
916 |
- for directory in tree.children:
|
|
917 |
- for filenode in directory.files:
|
|
918 |
- self._ensure_blob(remote, filenode.digest)
|
|
919 |
- |
|
920 |
- # place directory blob only in final location when we've downloaded
|
|
921 |
- # all referenced blobs to avoid dangling references in the repository
|
|
922 |
- dirbuffer = directory.SerializeToString()
|
|
923 |
- dirdigest = self.add_object(buffer=dirbuffer)
|
|
924 |
- assert dirdigest.size_bytes == len(dirbuffer)
|
|
925 |
- |
|
926 |
- return dirdigest
|
|
927 |
- |
|
928 |
- def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
|
|
929 |
- required_blobs = self._required_blobs(digest)
|
|
930 |
- |
|
931 |
- missing_blobs = dict()
|
|
932 |
- # Limit size of FindMissingBlobs request
|
|
933 |
- for required_blobs_group in _grouper(required_blobs, 512):
|
|
934 |
- request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name)
|
|
935 |
- |
|
936 |
- for required_digest in required_blobs_group:
|
|
937 |
- d = request.blob_digests.add()
|
|
938 |
- d.hash = required_digest.hash
|
|
939 |
- d.size_bytes = required_digest.size_bytes
|
|
940 |
- |
|
941 |
- response = remote.cas.FindMissingBlobs(request)
|
|
942 |
- for missing_digest in response.missing_blob_digests:
|
|
943 |
- d = remote_execution_pb2.Digest()
|
|
944 |
- d.hash = missing_digest.hash
|
|
945 |
- d.size_bytes = missing_digest.size_bytes
|
|
946 |
- missing_blobs[d.hash] = d
|
|
947 |
- |
|
948 |
- # Upload any blobs missing on the server
|
|
949 |
- self._send_blobs(remote, missing_blobs.values(), u_uid)
|
|
950 |
- |
|
951 |
- def _send_blobs(self, remote, digests, u_uid=uuid.uuid4()):
|
|
952 |
- batch = _CASBatchUpdate(remote)
|
|
953 |
- |
|
954 |
- for digest in digests:
|
|
955 |
- with open(self.objpath(digest), 'rb') as f:
|
|
956 |
- assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
957 |
- |
|
958 |
- if (digest.size_bytes >= remote.max_batch_total_size_bytes or
|
|
959 |
- not remote.batch_update_supported):
|
|
960 |
- # Too large for batch request, upload in independent request.
|
|
961 |
- remote._send_blob(digest, f, u_uid=u_uid)
|
|
962 |
- else:
|
|
963 |
- if not batch.add(digest, f):
|
|
964 |
- # Not enough space left in batch request.
|
|
965 |
- # Complete pending batch first.
|
|
966 |
- batch.send()
|
|
967 |
- batch = _CASBatchUpdate(remote)
|
|
968 |
- batch.add(digest, f)
|
|
969 |
- |
|
970 |
- # Send final batch
|
|
971 |
- batch.send()
|
|
972 |
- |
|
973 |
- |
|
974 |
-def _grouper(iterable, n):
|
|
975 |
- while True:
|
|
976 |
- try:
|
|
977 |
- current = next(iterable)
|
|
978 |
- except StopIteration:
|
|
979 |
- return
|
|
980 |
- yield itertools.chain([current], itertools.islice(iterable, n - 1))
|
1 | 1 |
from collections import namedtuple
|
2 | 2 |
import io
|
3 |
+import itertools
|
|
3 | 4 |
import os
|
4 | 5 |
import multiprocessing
|
5 | 6 |
import signal
|
7 |
+import tempfile
|
|
6 | 8 |
from urllib.parse import urlparse
|
7 | 9 |
import uuid
|
8 | 10 |
|
... | ... | @@ -79,7 +81,7 @@ class BlobNotFound(CASRemoteError): |
79 | 81 |
# Represents a single remote CAS cache.
|
80 | 82 |
#
|
81 | 83 |
class CASRemote():
|
82 |
- def __init__(self, spec):
|
|
84 |
+ def __init__(self, spec, tmpdir):
|
|
83 | 85 |
self.spec = spec
|
84 | 86 |
self._initialized = False
|
85 | 87 |
self.channel = None
|
... | ... | @@ -91,6 +93,16 @@ class CASRemote(): |
91 | 93 |
self.capabilities = None
|
92 | 94 |
self.max_batch_total_size_bytes = None
|
93 | 95 |
|
96 |
+ # Need str because python 3.5 and lower doesn't deal with path like
|
|
97 |
+ # objects here.
|
|
98 |
+ self.tmpdir = str(tmpdir)
|
|
99 |
+ os.makedirs(self.tmpdir, exist_ok=True)
|
|
100 |
+ |
|
101 |
+ self.__tmp_downloads = [] # files in the tmpdir waiting to be added to local caches
|
|
102 |
+ |
|
103 |
+ self.__batch_read = None
|
|
104 |
+ self.__batch_update = None
|
|
105 |
+ |
|
94 | 106 |
def init(self):
|
95 | 107 |
if not self._initialized:
|
96 | 108 |
url = urlparse(self.spec.url)
|
... | ... | @@ -148,6 +160,7 @@ class CASRemote(): |
148 | 160 |
request = remote_execution_pb2.BatchReadBlobsRequest()
|
149 | 161 |
response = self.cas.BatchReadBlobs(request)
|
150 | 162 |
self.batch_read_supported = True
|
163 |
+ self.__batch_read = _CASBatchRead(self)
|
|
151 | 164 |
except grpc.RpcError as e:
|
152 | 165 |
if e.code() != grpc.StatusCode.UNIMPLEMENTED:
|
153 | 166 |
raise
|
... | ... | @@ -158,6 +171,7 @@ class CASRemote(): |
158 | 171 |
request = remote_execution_pb2.BatchUpdateBlobsRequest()
|
159 | 172 |
response = self.cas.BatchUpdateBlobs(request)
|
160 | 173 |
self.batch_update_supported = True
|
174 |
+ self.__batch_update = _CASBatchUpdate(self)
|
|
161 | 175 |
except grpc.RpcError as e:
|
162 | 176 |
if (e.code() != grpc.StatusCode.UNIMPLEMENTED and
|
163 | 177 |
e.code() != grpc.StatusCode.PERMISSION_DENIED):
|
... | ... | @@ -172,11 +186,11 @@ class CASRemote(): |
172 | 186 |
# in the main BuildStream process
|
173 | 187 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
174 | 188 |
@classmethod
|
175 |
- def check_remote(cls, remote_spec, q):
|
|
189 |
+ def check_remote(cls, remote_spec, tmpdir, q):
|
|
176 | 190 |
|
177 | 191 |
def __check_remote():
|
178 | 192 |
try:
|
179 |
- remote = cls(remote_spec)
|
|
193 |
+ remote = cls(remote_spec, tmpdir)
|
|
180 | 194 |
remote.init()
|
181 | 195 |
|
182 | 196 |
request = buildstream_pb2.StatusRequest()
|
... | ... | @@ -254,6 +268,210 @@ class CASRemote(): |
254 | 268 |
|
255 | 269 |
return message_digest
|
256 | 270 |
|
271 |
+ # get_reference():
|
|
272 |
+ #
|
|
273 |
+ # Args:
|
|
274 |
+ # ref (str): The ref to request
|
|
275 |
+ #
|
|
276 |
+ # Returns:
|
|
277 |
+ # (digest): digest of ref, None if not found
|
|
278 |
+ #
|
|
279 |
+ def get_reference(self, ref):
|
|
280 |
+ try:
|
|
281 |
+ self.init()
|
|
282 |
+ |
|
283 |
+ request = buildstream_pb2.GetReferenceRequest()
|
|
284 |
+ request.key = ref
|
|
285 |
+ return self.ref_storage.GetReference(request).digest
|
|
286 |
+ except grpc.RpcError as e:
|
|
287 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
288 |
+ raise CASRemoteError("Failed to find ref {}: {}".format(ref, e)) from e
|
|
289 |
+ else:
|
|
290 |
+ return None
|
|
291 |
+ |
|
292 |
+ # update_reference():
|
|
293 |
+ #
|
|
294 |
+ # Args:
|
|
295 |
+ # ref (str): Reference to update
|
|
296 |
+ # digest (Digest): New digest to update ref with
|
|
297 |
+ def update_reference(self, ref, digest):
|
|
298 |
+ request = buildstream_pb2.UpdateReferenceRequest()
|
|
299 |
+ request.keys.append(ref)
|
|
300 |
+ request.digest.hash = digest.hash
|
|
301 |
+ request.digest.size_bytes = digest.size_bytes
|
|
302 |
+ self.ref_storage.UpdateReference(request)
|
|
303 |
+ |
|
304 |
+ def get_tree_blob(self, tree_digest):
|
|
305 |
+ self.init()
|
|
306 |
+ f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
|
307 |
+ self._fetch_blob(tree_digest, f)
|
|
308 |
+ |
|
309 |
+ tree = remote_execution_pb2.Tree()
|
|
310 |
+ with open(f.name, 'rb') as tmp:
|
|
311 |
+ tree.ParseFromString(tmp.read())
|
|
312 |
+ |
|
313 |
+ return tree
|
|
314 |
+ |
|
315 |
+ # yield_directory_digests():
|
|
316 |
+ #
|
|
317 |
+ # Recursively iterates over digests for files, symbolic links and other
|
|
318 |
+ # directories starting from a root digest
|
|
319 |
+ #
|
|
320 |
+ # Args:
|
|
321 |
+ # root_digest (digest): The root_digest to get a tree of
|
|
322 |
+ # progress (callable): The progress callback, if any
|
|
323 |
+ # subdir (str): The optional specific subdir to pull
|
|
324 |
+ # excluded_subdirs (list): The optional list of subdirs to not pull
|
|
325 |
+ #
|
|
326 |
+ # Returns:
|
|
327 |
+ # (iter digests): recursively iterates over digests contained in root directory
|
|
328 |
+ #
|
|
329 |
+ def yield_directory_digests(self, root_digest, *, progress=None,
|
|
330 |
+ subdir=None, excluded_subdirs=None):
|
|
331 |
+ self.init()
|
|
332 |
+ |
|
333 |
+ # Fetch artifact, excluded_subdirs determined in pullqueue
|
|
334 |
+ if excluded_subdirs is None:
|
|
335 |
+ excluded_subdirs = []
|
|
336 |
+ |
|
337 |
+ # get directory blob
|
|
338 |
+ f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
|
339 |
+ self._fetch_blob(root_digest, f)
|
|
340 |
+ |
|
341 |
+ directory = remote_execution_pb2.Directory()
|
|
342 |
+ with open(f.name, 'rb') as tmp:
|
|
343 |
+ directory.ParseFromString(tmp.read())
|
|
344 |
+ |
|
345 |
+ yield root_digest
|
|
346 |
+ for filenode in directory.files:
|
|
347 |
+ yield filenode.digest
|
|
348 |
+ |
|
349 |
+ for dirnode in directory.directories:
|
|
350 |
+ if dirnode.name not in excluded_subdirs:
|
|
351 |
+ yield from self.yield_directory_digests(dirnode.digest)
|
|
352 |
+ |
|
353 |
+ # yield_tree_digests():
|
|
354 |
+ #
|
|
355 |
+ # Fetches a tree file from digests and then iterates over child digests
|
|
356 |
+ #
|
|
357 |
+ # Args:
|
|
358 |
+ # tree_digest (digest): tree digest
|
|
359 |
+ #
|
|
360 |
+ # Returns:
|
|
361 |
+ # (iter digests): iterates over digests in tree message
|
|
362 |
+ def yield_tree_digests(self, tree):
|
|
363 |
+ self.init()
|
|
364 |
+ |
|
365 |
+ tree.children.extend([tree.root])
|
|
366 |
+ for directory in tree.children:
|
|
367 |
+ for filenode in directory.files:
|
|
368 |
+ yield filenode.digest
|
|
369 |
+ |
|
370 |
+ # add the directory to downloaded tmp files to be added
|
|
371 |
+ f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
|
372 |
+ f.write(directory.SerializeToString())
|
|
373 |
+ f.flush()
|
|
374 |
+ self.__tmp_downloads.append(f)
|
|
375 |
+ |
|
376 |
+ # request_blob():
|
|
377 |
+ #
|
|
378 |
+ # Request blob, triggering download depending via bytestream or cas
|
|
379 |
+ # BatchReadBlobs depending on size.
|
|
380 |
+ #
|
|
381 |
+ # Args:
|
|
382 |
+ # digest (Digest): digest of the requested blob
|
|
383 |
+ #
|
|
384 |
+ def request_blob(self, digest):
|
|
385 |
+ if (not self.batch_read_supported or
|
|
386 |
+ digest.size_bytes > self.max_batch_total_size_bytes):
|
|
387 |
+ f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
|
388 |
+ self._fetch_blob(digest, f)
|
|
389 |
+ self.__tmp_downloads.append(f)
|
|
390 |
+ elif self.__batch_read.add(digest) is False:
|
|
391 |
+ self._download_batch()
|
|
392 |
+ self.__batch_read.add(digest)
|
|
393 |
+ |
|
394 |
+ # get_blobs():
|
|
395 |
+ #
|
|
396 |
+ # Yield over downloaded blobs in the tmp file locations, causing the files
|
|
397 |
+ # to be deleted once they go out of scope.
|
|
398 |
+ #
|
|
399 |
+ # Args:
|
|
400 |
+ # complete_batch (bool): download any outstanding batch read request
|
|
401 |
+ #
|
|
402 |
+ # Returns:
|
|
403 |
+ # iterator over NamedTemporaryFile
|
|
404 |
+ def get_blobs(self, complete_batch=False):
|
|
405 |
+ # Send read batch request and download
|
|
406 |
+ if (complete_batch is True and
|
|
407 |
+ self.batch_read_supported is True):
|
|
408 |
+ self._download_batch()
|
|
409 |
+ |
|
410 |
+ while self.__tmp_downloads:
|
|
411 |
+ yield self.__tmp_downloads.pop()
|
|
412 |
+ |
|
413 |
+ # upload_blob():
|
|
414 |
+ #
|
|
415 |
+ # Push blobs given an iterator over blob files
|
|
416 |
+ #
|
|
417 |
+ # Args:
|
|
418 |
+ # digest (Digest): digest we want to upload
|
|
419 |
+ # blob_file (str): Name of file location
|
|
420 |
+ # u_uid (str): Used to identify to the bytestream service
|
|
421 |
+ #
|
|
422 |
+ def upload_blob(self, digest, blob_file, u_uid=uuid.uuid4()):
|
|
423 |
+ with open(blob_file, 'rb') as f:
|
|
424 |
+ assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
425 |
+ |
|
426 |
+ if (digest.size_bytes >= self.max_batch_total_size_bytes or
|
|
427 |
+ not self.batch_update_supported):
|
|
428 |
+ # Too large for batch request, upload in independent request.
|
|
429 |
+ self._send_blob(digest, f, u_uid=u_uid)
|
|
430 |
+ else:
|
|
431 |
+ if self.__batch_update.add(digest, f) is False:
|
|
432 |
+ self.__batch_update.send()
|
|
433 |
+ self.__batch_update = _CASBatchUpdate(self)
|
|
434 |
+ self.__batch_update.add(digest, f)
|
|
435 |
+ |
|
436 |
+ # send_update_batch():
|
|
437 |
+ #
|
|
438 |
+ # Sends anything left in the update batch
|
|
439 |
+ #
|
|
440 |
+ def send_update_batch(self):
|
|
441 |
+ # make sure everything is sent
|
|
442 |
+ self.__batch_update.send()
|
|
443 |
+ self.__batch_update = _CASBatchUpdate(self)
|
|
444 |
+ |
|
445 |
+ # find_missing_blobs()
|
|
446 |
+ #
|
|
447 |
+ # Does FindMissingBlobs request to remote
|
|
448 |
+ #
|
|
449 |
+ # Args:
|
|
450 |
+ # required_blobs ([Digest]): list of blobs required
|
|
451 |
+ #
|
|
452 |
+ # Returns:
|
|
453 |
+ # (Dict(Digest)): missing blobs
|
|
454 |
+ def find_missing_blobs(self, required_blobs):
|
|
455 |
+ self.init()
|
|
456 |
+ missing_blobs = dict()
|
|
457 |
+ # Limit size of FindMissingBlobs request
|
|
458 |
+ for required_blobs_group in _grouper(required_blobs, 512):
|
|
459 |
+ request = remote_execution_pb2.FindMissingBlobsRequest()
|
|
460 |
+ |
|
461 |
+ for required_digest in required_blobs_group:
|
|
462 |
+ d = request.blob_digests.add()
|
|
463 |
+ d.hash = required_digest.hash
|
|
464 |
+ d.size_bytes = required_digest.size_bytes
|
|
465 |
+ |
|
466 |
+ response = self.cas.FindMissingBlobs(request)
|
|
467 |
+ for missing_digest in response.missing_blob_digests:
|
|
468 |
+ d = remote_execution_pb2.Digest()
|
|
469 |
+ d.hash = missing_digest.hash
|
|
470 |
+ d.size_bytes = missing_digest.size_bytes
|
|
471 |
+ missing_blobs[d.hash] = d
|
|
472 |
+ |
|
473 |
+ return missing_blobs
|
|
474 |
+ |
|
257 | 475 |
################################################
|
258 | 476 |
# Local Private Methods #
|
259 | 477 |
################################################
|
... | ... | @@ -292,10 +510,31 @@ class CASRemote(): |
292 | 510 |
offset += chunk_size
|
293 | 511 |
finished = request.finish_write
|
294 | 512 |
|
295 |
- response = self.bytestream.Write(request_stream(resource_name, stream))
|
|
513 |
+ try:
|
|
514 |
+ response = self.bytestream.Write(request_stream(resource_name, stream))
|
|
515 |
+ except grpc.RpcError as e:
|
|
516 |
+ raise CASRemoteError("Failed to upload blob: {}".format(e), reason=e.code())
|
|
296 | 517 |
|
297 | 518 |
assert response.committed_size == digest.size_bytes
|
298 | 519 |
|
520 |
+ def _download_batch(self):
|
|
521 |
+ for _, data in self.__batch_read.send():
|
|
522 |
+ f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
|
523 |
+ f.write(data)
|
|
524 |
+ f.flush()
|
|
525 |
+ self.__tmp_downloads.append(f)
|
|
526 |
+ |
|
527 |
+ self.__batch_read = _CASBatchRead(self)
|
|
528 |
+ |
|
529 |
+ |
|
530 |
+def _grouper(iterable, n):
|
|
531 |
+ while True:
|
|
532 |
+ try:
|
|
533 |
+ current = next(iterable)
|
|
534 |
+ except StopIteration:
|
|
535 |
+ return
|
|
536 |
+ yield itertools.chain([current], itertools.islice(iterable, n - 1))
|
|
537 |
+ |
|
299 | 538 |
|
300 | 539 |
# Represents a batch of blobs queued for fetching.
|
301 | 540 |
#
|
... | ... | @@ -328,7 +567,11 @@ class _CASBatchRead(): |
328 | 567 |
if not self._request.digests:
|
329 | 568 |
return
|
330 | 569 |
|
331 |
- batch_response = self._remote.cas.BatchReadBlobs(self._request)
|
|
570 |
+ try:
|
|
571 |
+ batch_response = self._remote.cas.BatchReadBlobs(self._request)
|
|
572 |
+ except grpc.RpcError as e:
|
|
573 |
+ raise CASRemoteError("Failed to read blob batch: {}".format(e),
|
|
574 |
+ reason=e.code()) from e
|
|
332 | 575 |
|
333 | 576 |
for response in batch_response.responses:
|
334 | 577 |
if response.status.code == code_pb2.NOT_FOUND:
|
... | ... | @@ -376,7 +619,12 @@ class _CASBatchUpdate(): |
376 | 619 |
if not self._request.requests:
|
377 | 620 |
return
|
378 | 621 |
|
379 |
- batch_response = self._remote.cas.BatchUpdateBlobs(self._request)
|
|
622 |
+ # Want to raise a CASRemoteError if
|
|
623 |
+ try:
|
|
624 |
+ batch_response = self._remote.cas.BatchUpdateBlobs(self._request)
|
|
625 |
+ except grpc.RpcError as e:
|
|
626 |
+ raise CASRemoteError("Failed to upload blob batch: {}".format(e),
|
|
627 |
+ reason=e.code()) from e
|
|
380 | 628 |
|
381 | 629 |
for response in batch_response.responses:
|
382 | 630 |
if response.status.code != code_pb2.OK:
|
1 |
+#
|
|
2 |
+# Copyright (C) 2017-2018 Codethink Limited
|
|
3 |
+#
|
|
4 |
+# This program is free software; you can redistribute it and/or
|
|
5 |
+# modify it under the terms of the GNU Lesser General Public
|
|
6 |
+# License as published by the Free Software Foundation; either
|
|
7 |
+# version 2 of the License, or (at your option) any later version.
|
|
8 |
+#
|
|
9 |
+# This library is distributed in the hope that it will be useful,
|
|
10 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
+# Lesser General Public License for more details.
|
|
13 |
+#
|
|
14 |
+# You should have received a copy of the GNU Lesser General Public
|
|
15 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
+#
|
|
17 |
+# Authors:
|
|
18 |
+# Raoul Hidalgo Charman <raoul hidalgocharman codethink co uk>
|
|
19 |
+ |
|
20 |
+from ..utils import _message_digest
|
|
21 |
+ |
|
22 |
+ |
|
23 |
+def cas_directory_download(caslocal, casremote, root_digest, excluded_subdirs):
|
|
24 |
+ for blob_digest in casremote.yield_directory_digests(
|
|
25 |
+ root_digest, excluded_subdirs=excluded_subdirs):
|
|
26 |
+ if caslocal.check_blob(blob_digest):
|
|
27 |
+ continue
|
|
28 |
+ casremote.request_blob(blob_digest)
|
|
29 |
+ for blob_file in casremote.get_blobs():
|
|
30 |
+ caslocal.add_object(path=blob_file.name, link_directly=True)
|
|
31 |
+ |
|
32 |
+ # Request final CAS batch
|
|
33 |
+ for blob_file in casremote.get_blobs(complete_batch=True):
|
|
34 |
+ caslocal.add_object(path=blob_file.name, link_directly=True)
|
|
35 |
+ |
|
36 |
+ |
|
37 |
+def cas_tree_download(caslocal, casremote, tree_digest):
|
|
38 |
+ tree = casremote.get_tree_blob(tree_digest)
|
|
39 |
+ for blob_digest in casremote.yield_tree_digests(tree):
|
|
40 |
+ if caslocal.check_blob(blob_digest):
|
|
41 |
+ continue
|
|
42 |
+ casremote.request_blob(blob_digest)
|
|
43 |
+ for blob_file in casremote.get_blobs():
|
|
44 |
+ caslocal.add_object(path=blob_file.name, link_directly=True)
|
|
45 |
+ |
|
46 |
+ # Get the last batch
|
|
47 |
+ for blob_file in casremote.get_blobs(complete_batch=True):
|
|
48 |
+ caslocal.add_object(path=blob_file.name, link_directly=True)
|
|
49 |
+ |
|
50 |
+ # get root digest from tree and return that
|
|
51 |
+ return _message_digest(tree.root.SerializeToString())
|
|
52 |
+ |
|
53 |
+ |
|
54 |
+def cas_directory_upload(caslocal, casremote, root_digest):
|
|
55 |
+ required_blobs = caslocal.yield_directory_digests(root_digest)
|
|
56 |
+ missing_blobs = casremote.find_missing_blobs(required_blobs)
|
|
57 |
+ for blob in missing_blobs.values():
|
|
58 |
+ blob_file = caslocal.objpath(blob)
|
|
59 |
+ casremote.upload_blob(blob, blob_file)
|
|
60 |
+ |
|
61 |
+ # send remaining blobs
|
|
62 |
+ casremote.send_update_batch()
|
... | ... | @@ -191,10 +191,11 @@ class Context(): |
191 | 191 |
_yaml.node_validate(defaults, [
|
192 | 192 |
'sourcedir', 'builddir', 'artifactdir', 'logdir',
|
193 | 193 |
'scheduler', 'artifacts', 'logging', 'projects',
|
194 |
- 'cache', 'prompt', 'workspacedir', 'remote-execution'
|
|
194 |
+ 'cache', 'prompt', 'workspacedir', 'remote-execution', 'tmpdir',
|
|
195 | 195 |
])
|
196 | 196 |
|
197 |
- for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir', 'workspacedir']:
|
|
197 |
+ for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir',
|
|
198 |
+ 'tmpdir', 'workspacedir']:
|
|
198 | 199 |
# Allow the ~ tilde expansion and any environment variables in
|
199 | 200 |
# path specification in the config files.
|
200 | 201 |
#
|
... | ... | @@ -19,6 +19,9 @@ builddir: ${XDG_CACHE_HOME}/buildstream/build |
19 | 19 |
# Location to store local binary artifacts
|
20 | 20 |
artifactdir: ${XDG_CACHE_HOME}/buildstream/artifacts
|
21 | 21 |
|
22 |
+# Location to store temporary files, e.g. used when downloading from a casremote
|
|
23 |
+tmpdir: ${XDG_CACHE_HOME}/buildstream/tmp
|
|
24 |
+ |
|
22 | 25 |
# Location to store build logs
|
23 | 26 |
logdir: ${XDG_CACHE_HOME}/buildstream/logs
|
24 | 27 |
|
... | ... | @@ -39,6 +39,7 @@ from .._exceptions import SandboxError |
39 | 39 |
from .. import _yaml
|
40 | 40 |
from .._protos.google.longrunning import operations_pb2, operations_pb2_grpc
|
41 | 41 |
from .._cas import CASRemote, CASRemoteSpec
|
42 |
+from .._cas.transfer import cas_tree_download, cas_directory_upload
|
|
42 | 43 |
|
43 | 44 |
|
44 | 45 |
class RemoteExecutionSpec(namedtuple('RemoteExecutionSpec', 'exec_service storage_service action_service')):
|
... | ... | @@ -279,10 +280,9 @@ class SandboxRemote(Sandbox): |
279 | 280 |
|
280 | 281 |
context = self._get_context()
|
281 | 282 |
cascache = context.get_cascache()
|
282 |
- casremote = CASRemote(self.storage_remote_spec)
|
|
283 |
+ casremote = CASRemote(self.storage_remote_spec, context.tmpdir)
|
|
283 | 284 |
|
284 |
- # Now do a pull to ensure we have the necessary parts.
|
|
285 |
- dir_digest = cascache.pull_tree(casremote, tree_digest)
|
|
285 |
+ dir_digest = cas_tree_download(cascache, casremote, tree_digest)
|
|
286 | 286 |
if dir_digest is None or not dir_digest.hash or not dir_digest.size_bytes:
|
287 | 287 |
raise SandboxError("Output directory structure pulling from remote failed.")
|
288 | 288 |
|
... | ... | @@ -306,8 +306,9 @@ class SandboxRemote(Sandbox): |
306 | 306 |
|
307 | 307 |
def _run(self, command, flags, *, cwd, env):
|
308 | 308 |
# set up virtual dircetory
|
309 |
+ context = self._get_context()
|
|
309 | 310 |
upload_vdir = self.get_virtual_directory()
|
310 |
- cascache = self._get_context().get_cascache()
|
|
311 |
+ cascache = context.get_cascache()
|
|
311 | 312 |
if isinstance(upload_vdir, FileBasedDirectory):
|
312 | 313 |
# Make a new temporary directory to put source in
|
313 | 314 |
upload_vdir = CasBasedDirectory(cascache, ref=None)
|
... | ... | @@ -340,11 +341,11 @@ class SandboxRemote(Sandbox): |
340 | 341 |
action_result = self._check_action_cache(action_digest)
|
341 | 342 |
|
342 | 343 |
if not action_result:
|
343 |
- casremote = CASRemote(self.storage_remote_spec)
|
|
344 |
+ casremote = CASRemote(self.storage_remote_spec, context.tmpdir)
|
|
344 | 345 |
|
345 | 346 |
# Now, push that key (without necessarily needing a ref) to the remote.
|
346 | 347 |
try:
|
347 |
- cascache.push_directory(casremote, upload_vdir)
|
|
348 |
+ cas_directory_upload(cascache, casremote, upload_vdir.ref)
|
|
348 | 349 |
except grpc.RpcError as e:
|
349 | 350 |
raise SandboxError("Failed to push source directory to remote: {}".format(e)) from e
|
350 | 351 |
|
... | ... | @@ -50,8 +50,10 @@ def integration_cache(request): |
50 | 50 |
|
51 | 51 |
# Clean up the artifacts after each test run - we only want to
|
52 | 52 |
# cache sources
|
53 |
+ # Also clean up the tmp dir, should be empty but isn't in CI tests
|
|
53 | 54 |
try:
|
54 | 55 |
shutil.rmtree(os.path.join(cache_dir, 'artifacts'))
|
56 |
+ shutil.rmtree(os.path.join(cache_dir, 'tmp'))
|
|
55 | 57 |
except FileNotFoundError:
|
56 | 58 |
pass
|
57 | 59 |
|
... | ... | @@ -110,7 +110,7 @@ def test_pull(cli, tmpdir, datafiles): |
110 | 110 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
111 | 111 |
process = multiprocessing.Process(target=_queue_wrapper,
|
112 | 112 |
args=(_test_pull, queue, user_config_file, project_dir,
|
113 |
- artifact_dir, 'target.bst', element_key))
|
|
113 |
+ artifact_dir, tmpdir, 'target.bst', element_key))
|
|
114 | 114 |
|
115 | 115 |
try:
|
116 | 116 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -126,14 +126,18 @@ def test_pull(cli, tmpdir, datafiles): |
126 | 126 |
assert not error
|
127 | 127 |
assert cas.contains(element, element_key)
|
128 | 128 |
|
129 |
+ # Check that the tmp dir is cleared out
|
|
130 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
129 | 131 |
|
130 |
-def _test_pull(user_config_file, project_dir, artifact_dir,
|
|
132 |
+ |
|
133 |
+def _test_pull(user_config_file, project_dir, artifact_dir, tmpdir,
|
|
131 | 134 |
element_name, element_key, queue):
|
132 | 135 |
# Fake minimal context
|
133 | 136 |
context = Context()
|
134 | 137 |
context.load(config=user_config_file)
|
135 | 138 |
context.artifactdir = artifact_dir
|
136 | 139 |
context.set_message_handler(message_handler)
|
140 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
137 | 141 |
|
138 | 142 |
# Load the project manually
|
139 | 143 |
project = Project(project_dir, context)
|
... | ... | @@ -218,7 +222,7 @@ def test_pull_tree(cli, tmpdir, datafiles): |
218 | 222 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
219 | 223 |
process = multiprocessing.Process(target=_queue_wrapper,
|
220 | 224 |
args=(_test_push_tree, queue, user_config_file, project_dir,
|
221 |
- artifact_dir, artifact_digest))
|
|
225 |
+ artifact_dir, tmpdir, artifact_digest))
|
|
222 | 226 |
|
223 | 227 |
try:
|
224 | 228 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -239,6 +243,9 @@ def test_pull_tree(cli, tmpdir, datafiles): |
239 | 243 |
# Assert that we are not cached locally anymore
|
240 | 244 |
assert cli.get_element_state(project_dir, 'target.bst') != 'cached'
|
241 | 245 |
|
246 |
+ # Check that the tmp dir is cleared out
|
|
247 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
248 |
+ |
|
242 | 249 |
tree_digest = remote_execution_pb2.Digest(hash=tree_hash,
|
243 | 250 |
size_bytes=tree_size)
|
244 | 251 |
|
... | ... | @@ -246,7 +253,7 @@ def test_pull_tree(cli, tmpdir, datafiles): |
246 | 253 |
# Use subprocess to avoid creation of gRPC threads in main BuildStream process
|
247 | 254 |
process = multiprocessing.Process(target=_queue_wrapper,
|
248 | 255 |
args=(_test_pull_tree, queue, user_config_file, project_dir,
|
249 |
- artifact_dir, tree_digest))
|
|
256 |
+ artifact_dir, tmpdir, tree_digest))
|
|
250 | 257 |
|
251 | 258 |
try:
|
252 | 259 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -267,13 +274,18 @@ def test_pull_tree(cli, tmpdir, datafiles): |
267 | 274 |
# Ensure the entire Tree stucture has been pulled
|
268 | 275 |
assert os.path.exists(cas.objpath(directory_digest))
|
269 | 276 |
|
277 |
+ # Check that the tmp dir is cleared out
|
|
278 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
279 |
+ |
|
270 | 280 |
|
271 |
-def _test_push_tree(user_config_file, project_dir, artifact_dir, artifact_digest, queue):
|
|
281 |
+def _test_push_tree(user_config_file, project_dir, artifact_dir, tmpdir,
|
|
282 |
+ artifact_digest, queue):
|
|
272 | 283 |
# Fake minimal context
|
273 | 284 |
context = Context()
|
274 | 285 |
context.load(config=user_config_file)
|
275 | 286 |
context.artifactdir = artifact_dir
|
276 | 287 |
context.set_message_handler(message_handler)
|
288 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
277 | 289 |
|
278 | 290 |
# Load the project manually
|
279 | 291 |
project = Project(project_dir, context)
|
... | ... | @@ -304,12 +316,14 @@ def _test_push_tree(user_config_file, project_dir, artifact_dir, artifact_digest |
304 | 316 |
queue.put("No remote configured")
|
305 | 317 |
|
306 | 318 |
|
307 |
-def _test_pull_tree(user_config_file, project_dir, artifact_dir, artifact_digest, queue):
|
|
319 |
+def _test_pull_tree(user_config_file, project_dir, artifact_dir, tmpdir,
|
|
320 |
+ artifact_digest, queue):
|
|
308 | 321 |
# Fake minimal context
|
309 | 322 |
context = Context()
|
310 | 323 |
context.load(config=user_config_file)
|
311 | 324 |
context.artifactdir = artifact_dir
|
312 | 325 |
context.set_message_handler(message_handler)
|
326 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
313 | 327 |
|
314 | 328 |
# Load the project manually
|
315 | 329 |
project = Project(project_dir, context)
|
... | ... | @@ -89,7 +89,7 @@ def test_push(cli, tmpdir, datafiles): |
89 | 89 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
90 | 90 |
process = multiprocessing.Process(target=_queue_wrapper,
|
91 | 91 |
args=(_test_push, queue, user_config_file, project_dir,
|
92 |
- artifact_dir, 'target.bst', element_key))
|
|
92 |
+ artifact_dir, tmpdir, 'target.bst', element_key))
|
|
93 | 93 |
|
94 | 94 |
try:
|
95 | 95 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -105,14 +105,18 @@ def test_push(cli, tmpdir, datafiles): |
105 | 105 |
assert not error
|
106 | 106 |
assert share.has_artifact('test', 'target.bst', element_key)
|
107 | 107 |
|
108 |
+ # Check tmpdir for downloads is cleared
|
|
109 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
108 | 110 |
|
109 |
-def _test_push(user_config_file, project_dir, artifact_dir,
|
|
111 |
+ |
|
112 |
+def _test_push(user_config_file, project_dir, artifact_dir, tmpdir,
|
|
110 | 113 |
element_name, element_key, queue):
|
111 | 114 |
# Fake minimal context
|
112 | 115 |
context = Context()
|
113 | 116 |
context.load(config=user_config_file)
|
114 | 117 |
context.artifactdir = artifact_dir
|
115 | 118 |
context.set_message_handler(message_handler)
|
119 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
116 | 120 |
|
117 | 121 |
# Load the project manually
|
118 | 122 |
project = Project(project_dir, context)
|
... | ... | @@ -196,9 +200,10 @@ def test_push_directory(cli, tmpdir, datafiles): |
196 | 200 |
queue = multiprocessing.Queue()
|
197 | 201 |
# Use subprocess to avoid creation of gRPC threads in main BuildStream process
|
198 | 202 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
199 |
- process = multiprocessing.Process(target=_queue_wrapper,
|
|
200 |
- args=(_test_push_directory, queue, user_config_file,
|
|
201 |
- project_dir, artifact_dir, artifact_digest))
|
|
203 |
+ process = multiprocessing.Process(
|
|
204 |
+ target=_queue_wrapper,
|
|
205 |
+ args=(_test_push_directory, queue, user_config_file, project_dir,
|
|
206 |
+ artifact_dir, tmpdir, artifact_digest))
|
|
202 | 207 |
|
203 | 208 |
try:
|
204 | 209 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -215,13 +220,17 @@ def test_push_directory(cli, tmpdir, datafiles): |
215 | 220 |
assert artifact_digest.hash == directory_hash
|
216 | 221 |
assert share.has_object(artifact_digest)
|
217 | 222 |
|
223 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
218 | 224 |
|
219 |
-def _test_push_directory(user_config_file, project_dir, artifact_dir, artifact_digest, queue):
|
|
225 |
+ |
|
226 |
+def _test_push_directory(user_config_file, project_dir, artifact_dir, tmpdir,
|
|
227 |
+ artifact_digest, queue):
|
|
220 | 228 |
# Fake minimal context
|
221 | 229 |
context = Context()
|
222 | 230 |
context.load(config=user_config_file)
|
223 | 231 |
context.artifactdir = artifact_dir
|
224 | 232 |
context.set_message_handler(message_handler)
|
233 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
225 | 234 |
|
226 | 235 |
# Load the project manually
|
227 | 236 |
project = Project(project_dir, context)
|
... | ... | @@ -273,7 +282,7 @@ def test_push_message(cli, tmpdir, datafiles): |
273 | 282 |
# See https://github.com/grpc/grpc/blob/master/doc/fork_support.md for details
|
274 | 283 |
process = multiprocessing.Process(target=_queue_wrapper,
|
275 | 284 |
args=(_test_push_message, queue, user_config_file,
|
276 |
- project_dir, artifact_dir))
|
|
285 |
+ project_dir, artifact_dir, tmpdir))
|
|
277 | 286 |
|
278 | 287 |
try:
|
279 | 288 |
# Keep SIGINT blocked in the child process
|
... | ... | @@ -291,13 +300,16 @@ def test_push_message(cli, tmpdir, datafiles): |
291 | 300 |
size_bytes=message_size)
|
292 | 301 |
assert share.has_object(message_digest)
|
293 | 302 |
|
303 |
+ assert os.listdir(os.path.join(str(tmpdir), 'cache', 'tmp')) == []
|
|
304 |
+ |
|
294 | 305 |
|
295 |
-def _test_push_message(user_config_file, project_dir, artifact_dir, queue):
|
|
306 |
+def _test_push_message(user_config_file, project_dir, artifact_dir, tmpdir, queue):
|
|
296 | 307 |
# Fake minimal context
|
297 | 308 |
context = Context()
|
298 | 309 |
context.load(config=user_config_file)
|
299 | 310 |
context.artifactdir = artifact_dir
|
300 | 311 |
context.set_message_handler(message_handler)
|
312 |
+ context.tmpdir = os.path.join(str(tmpdir), 'cache', 'tmp')
|
|
301 | 313 |
|
302 | 314 |
# Load the project manually
|
303 | 315 |
project = Project(project_dir, context)
|
... | ... | @@ -23,6 +23,7 @@ def default_state(cli, tmpdir, share): |
23 | 23 |
'artifacts': {'url': share.repo, 'push': False},
|
24 | 24 |
'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
|
25 | 25 |
'cache': {'pull-buildtrees': False},
|
26 |
+ 'tmpdir': os.path.join(str(tmpdir), 'cache', 'tmp'),
|
|
26 | 27 |
})
|
27 | 28 |
|
28 | 29 |
|
... | ... | @@ -79,6 +80,9 @@ def test_pullbuildtrees(cli, tmpdir, datafiles, integration_cache): |
79 | 80 |
assert os.path.isdir(buildtreedir)
|
80 | 81 |
default_state(cli, tmpdir, share1)
|
81 | 82 |
|
83 |
+ # Check tmpdir for downloads is cleared
|
|
84 |
+ assert os.listdir(os.path.join(integration_cache, 'tmp')) == []
|
|
85 |
+ |
|
82 | 86 |
# Pull artifact with pullbuildtrees set in user config, then assert
|
83 | 87 |
# that pulling with the same user config doesn't creates a pull job,
|
84 | 88 |
# or when buildtrees cli flag is set.
|
... | ... | @@ -91,6 +95,9 @@ def test_pullbuildtrees(cli, tmpdir, datafiles, integration_cache): |
91 | 95 |
assert element_name not in result.get_pulled_elements()
|
92 | 96 |
default_state(cli, tmpdir, share1)
|
93 | 97 |
|
98 |
+ # Check tmpdir for downloads is cleared
|
|
99 |
+ assert os.listdir(os.path.join(integration_cache, 'tmp')) == []
|
|
100 |
+ |
|
94 | 101 |
# Pull artifact with default config and buildtrees cli flag set, then assert
|
95 | 102 |
# that pulling with pullbuildtrees set in user config doesn't create a pull
|
96 | 103 |
# job.
|
... | ... | @@ -101,6 +108,9 @@ def test_pullbuildtrees(cli, tmpdir, datafiles, integration_cache): |
101 | 108 |
assert element_name not in result.get_pulled_elements()
|
102 | 109 |
default_state(cli, tmpdir, share1)
|
103 | 110 |
|
111 |
+ # Check tmpdir for downloads is cleared
|
|
112 |
+ assert os.listdir(os.path.join(integration_cache, 'tmp')) == []
|
|
113 |
+ |
|
104 | 114 |
# Assert that a partial build element (not containing a populated buildtree dir)
|
105 | 115 |
# can't be pushed to an artifact share, then assert that a complete build element
|
106 | 116 |
# can be. This will attempt a partial pull from share1 and then a partial push
|
... | ... | @@ -526,7 +526,8 @@ def cli_integration(tmpdir, integration_cache): |
526 | 526 |
# to avoid downloading the huge base-sdk repeatedly
|
527 | 527 |
fixture.configure({
|
528 | 528 |
'sourcedir': os.path.join(integration_cache, 'sources'),
|
529 |
- 'artifactdir': os.path.join(integration_cache, 'artifacts')
|
|
529 |
+ 'artifactdir': os.path.join(integration_cache, 'artifacts'),
|
|
530 |
+ 'tmpdir': os.path.join(integration_cache, 'tmp')
|
|
530 | 531 |
})
|
531 | 532 |
|
532 | 533 |
return fixture
|
... | ... | @@ -573,6 +574,8 @@ def configured(directory, config=None): |
573 | 574 |
config['builddir'] = os.path.join(directory, 'build')
|
574 | 575 |
if not config.get('artifactdir', False):
|
575 | 576 |
config['artifactdir'] = os.path.join(directory, 'artifacts')
|
577 |
+ if not config.get('tmpdir', False):
|
|
578 |
+ config['tmpdir'] = os.path.join(directory, 'tmp')
|
|
576 | 579 |
if not config.get('logdir', False):
|
577 | 580 |
config['logdir'] = os.path.join(directory, 'logs')
|
578 | 581 |
|