Raoul Hidalgo Charman pushed to branch raoul/802-refactor-artifactcache at BuildStream / buildstream
Commits:
-
ac8277bc
by Raoul Hidalgo Charman at 2019-01-09T15:52:36Z
3 changed files:
Changes:
| ... | ... | @@ -607,16 +607,41 @@ class ArtifactCache(): |
| 607 | 607 |
|
| 608 | 608 |
for remote in push_remotes:
|
| 609 | 609 |
remote.init()
|
| 610 |
+ skipped_remote = True
|
|
| 610 | 611 |
display_key = element._get_brief_display_key()
|
| 611 | 612 |
element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
|
| 612 | 613 |
|
| 613 |
- if self.cas.push(refs, remote):
|
|
| 614 |
- element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
|
|
| 614 |
+ try:
|
|
| 615 |
+ for ref in refs:
|
|
| 616 |
+ # Check whether ref is already on the server in which case
|
|
| 617 |
+ # there is no need to push the ref
|
|
| 618 |
+ root_digest = self.cas.resolve_ref(ref)
|
|
| 619 |
+ response = remote.get_reference(ref)
|
|
| 620 |
+ if (response is not None and
|
|
| 621 |
+ response.hash == root_digest.hash and
|
|
| 622 |
+ response.size_bytes == root_digest.size_bytes):
|
|
| 623 |
+ element.info("Remote ({}) already has {} cached".format(
|
|
| 624 |
+ remote.spec.url, element._get_brief_display_key()))
|
|
| 625 |
+ continue
|
|
| 626 |
+ |
|
| 627 |
+ # upload blobs
|
|
| 628 |
+ self._send_directory(root_digest, remote)
|
|
| 629 |
+ remote.update_reference(ref, root_digest)
|
|
| 630 |
+ |
|
| 631 |
+ skipped_remote = False
|
|
| 632 |
+ |
|
| 633 |
+ except CASError as e:
|
|
| 634 |
+ if str(e.reason) == "StatusCode.RESOURCE_EXHAUSTED":
|
|
| 635 |
+ element.warn("Failed to push element to {}: Resource exhuasted"
|
|
| 636 |
+ .format(remote.spec.url))
|
|
| 637 |
+ continue
|
|
| 638 |
+ else:
|
|
| 639 |
+ raise ArtifactError("Failed to push refs {}: {}".format(refs, e),
|
|
| 640 |
+ temporary=True) from e
|
|
| 641 |
+ |
|
| 642 |
+ if skipped_remote is False:
|
|
| 615 | 643 |
pushed = True
|
| 616 |
- else:
|
|
| 617 |
- element.info("Remote ({}) already has {} cached".format(
|
|
| 618 |
- remote.spec.url, element._get_brief_display_key()
|
|
| 619 |
- ))
|
|
| 644 |
+ element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
|
|
| 620 | 645 |
|
| 621 | 646 |
return pushed
|
| 622 | 647 |
|
| ... | ... | @@ -731,7 +756,7 @@ class ArtifactCache(): |
| 731 | 756 |
return
|
| 732 | 757 |
|
| 733 | 758 |
for remote in push_remotes:
|
| 734 |
- self.cas.push_directory(remote, directory)
|
|
| 759 |
+ self._send_directory(directory.ref, remote)
|
|
| 735 | 760 |
|
| 736 | 761 |
# push_message():
|
| 737 | 762 |
#
|
| ... | ... | @@ -816,6 +841,14 @@ class ArtifactCache(): |
| 816 | 841 |
with self.context.timed_activity("Initializing remote caches", silent_nested=True):
|
| 817 | 842 |
self.initialize_remotes(on_failure=remote_failed)
|
| 818 | 843 |
|
| 844 |
+ def _send_directory(self, root_digest, remote):
|
|
| 845 |
+ required_blobs = self.cas.yield_directory_digests(root_digest)
|
|
| 846 |
+ missing_blobs = remote.find_missing_blobs(required_blobs)
|
|
| 847 |
+ for blob in missing_blobs.values():
|
|
| 848 |
+ blob_file = self.cas.objpath(blob)
|
|
| 849 |
+ remote.upload_blob(blob, blob_file)
|
|
| 850 |
+ remote.send_update_batch()
|
|
| 851 |
+ |
|
| 819 | 852 |
# _write_cache_size()
|
| 820 | 853 |
#
|
| 821 | 854 |
# Writes the given size of the artifact to the cache's size file
|
| ... | ... | @@ -18,23 +18,16 @@ |
| 18 | 18 |
# Jürg Billeter <juerg billeter codethink co uk>
|
| 19 | 19 |
|
| 20 | 20 |
import hashlib
|
| 21 |
-import itertools
|
|
| 22 | 21 |
import os
|
| 23 | 22 |
import stat
|
| 24 | 23 |
import tempfile
|
| 25 |
-import uuid
|
|
| 26 | 24 |
import contextlib
|
| 27 | 25 |
|
| 28 |
-import grpc
|
|
| 29 |
- |
|
| 30 | 26 |
from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| 31 |
-from .._protos.buildstream.v2 import buildstream_pb2
|
|
| 32 | 27 |
|
| 33 | 28 |
from .. import utils
|
| 34 | 29 |
from .._exceptions import CASError
|
| 35 | 30 |
|
| 36 |
-from .casremote import _CASBatchUpdate
|
|
| 37 |
- |
|
| 38 | 31 |
|
| 39 | 32 |
# A CASCache manages a CAS repository as specified in the Remote Execution API.
|
| 40 | 33 |
#
|
| ... | ... | @@ -196,73 +189,6 @@ class CASCache(): |
| 196 | 189 |
|
| 197 | 190 |
self.set_ref(newref, tree)
|
| 198 | 191 |
|
| 199 |
- # push():
|
|
| 200 |
- #
|
|
| 201 |
- # Push committed refs to remote repository.
|
|
| 202 |
- #
|
|
| 203 |
- # Args:
|
|
| 204 |
- # refs (list): The refs to push
|
|
| 205 |
- # remote (CASRemote): The remote to push to
|
|
| 206 |
- #
|
|
| 207 |
- # Returns:
|
|
| 208 |
- # (bool): True if any remote was updated, False if no pushes were required
|
|
| 209 |
- #
|
|
| 210 |
- # Raises:
|
|
| 211 |
- # (CASError): if there was an error
|
|
| 212 |
- #
|
|
| 213 |
- def push(self, refs, remote):
|
|
| 214 |
- skipped_remote = True
|
|
| 215 |
- try:
|
|
| 216 |
- for ref in refs:
|
|
| 217 |
- tree = self.resolve_ref(ref)
|
|
| 218 |
- |
|
| 219 |
- # Check whether ref is already on the server in which case
|
|
| 220 |
- # there is no need to push the ref
|
|
| 221 |
- try:
|
|
| 222 |
- request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
|
|
| 223 |
- request.key = ref
|
|
| 224 |
- response = remote.ref_storage.GetReference(request)
|
|
| 225 |
- |
|
| 226 |
- if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
|
| 227 |
- # ref is already on the server with the same tree
|
|
| 228 |
- continue
|
|
| 229 |
- |
|
| 230 |
- except grpc.RpcError as e:
|
|
| 231 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
| 232 |
- # Intentionally re-raise RpcError for outer except block.
|
|
| 233 |
- raise
|
|
| 234 |
- |
|
| 235 |
- self._send_directory(remote, tree)
|
|
| 236 |
- |
|
| 237 |
- request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
|
|
| 238 |
- request.keys.append(ref)
|
|
| 239 |
- request.digest.hash = tree.hash
|
|
| 240 |
- request.digest.size_bytes = tree.size_bytes
|
|
| 241 |
- remote.ref_storage.UpdateReference(request)
|
|
| 242 |
- |
|
| 243 |
- skipped_remote = False
|
|
| 244 |
- except grpc.RpcError as e:
|
|
| 245 |
- if e.code() != grpc.StatusCode.RESOURCE_EXHAUSTED:
|
|
| 246 |
- raise CASError("Failed to push ref {}: {}".format(refs, e), temporary=True) from e
|
|
| 247 |
- |
|
| 248 |
- return not skipped_remote
|
|
| 249 |
- |
|
| 250 |
- # push_directory():
|
|
| 251 |
- #
|
|
| 252 |
- # Push the given virtual directory to a remote.
|
|
| 253 |
- #
|
|
| 254 |
- # Args:
|
|
| 255 |
- # remote (CASRemote): The remote to push to
|
|
| 256 |
- # directory (Directory): A virtual directory object to push.
|
|
| 257 |
- #
|
|
| 258 |
- # Raises:
|
|
| 259 |
- # (CASError): if there was an error
|
|
| 260 |
- #
|
|
| 261 |
- def push_directory(self, remote, directory):
|
|
| 262 |
- remote.init()
|
|
| 263 |
- |
|
| 264 |
- self._send_directory(remote, directory.ref)
|
|
| 265 |
- |
|
| 266 | 192 |
# objpath():
|
| 267 | 193 |
#
|
| 268 | 194 |
# Return the path of an object based on its digest.
|
| ... | ... | @@ -534,6 +460,27 @@ class CASCache(): |
| 534 | 460 |
else:
|
| 535 | 461 |
return None
|
| 536 | 462 |
|
| 463 |
+ def yield_directory_digests(self, directory_digest):
|
|
| 464 |
+ # parse directory, and recursively add blobs
|
|
| 465 |
+ d = remote_execution_pb2.Digest()
|
|
| 466 |
+ d.hash = directory_digest.hash
|
|
| 467 |
+ d.size_bytes = directory_digest.size_bytes
|
|
| 468 |
+ yield d
|
|
| 469 |
+ |
|
| 470 |
+ directory = remote_execution_pb2.Directory()
|
|
| 471 |
+ |
|
| 472 |
+ with open(self.objpath(directory_digest), 'rb') as f:
|
|
| 473 |
+ directory.ParseFromString(f.read())
|
|
| 474 |
+ |
|
| 475 |
+ for filenode in directory.files:
|
|
| 476 |
+ d = remote_execution_pb2.Digest()
|
|
| 477 |
+ d.hash = filenode.digest.hash
|
|
| 478 |
+ d.size_bytes = filenode.digest.size_bytes
|
|
| 479 |
+ yield d
|
|
| 480 |
+ |
|
| 481 |
+ for dirnode in directory.directories:
|
|
| 482 |
+ yield from self.yield_directory_digests(dirnode.digest)
|
|
| 483 |
+ |
|
| 537 | 484 |
################################################
|
| 538 | 485 |
# Local Private Methods #
|
| 539 | 486 |
################################################
|
| ... | ... | @@ -722,57 +669,3 @@ class CASCache(): |
| 722 | 669 |
|
| 723 | 670 |
for dirnode in directory.directories:
|
| 724 | 671 |
yield from self._required_blobs(dirnode.digest)
|
| 725 |
- |
|
| 726 |
- def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
|
|
| 727 |
- required_blobs = self._required_blobs(digest)
|
|
| 728 |
- |
|
| 729 |
- missing_blobs = dict()
|
|
| 730 |
- # Limit size of FindMissingBlobs request
|
|
| 731 |
- for required_blobs_group in _grouper(required_blobs, 512):
|
|
| 732 |
- request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name)
|
|
| 733 |
- |
|
| 734 |
- for required_digest in required_blobs_group:
|
|
| 735 |
- d = request.blob_digests.add()
|
|
| 736 |
- d.hash = required_digest.hash
|
|
| 737 |
- d.size_bytes = required_digest.size_bytes
|
|
| 738 |
- |
|
| 739 |
- response = remote.cas.FindMissingBlobs(request)
|
|
| 740 |
- for missing_digest in response.missing_blob_digests:
|
|
| 741 |
- d = remote_execution_pb2.Digest()
|
|
| 742 |
- d.hash = missing_digest.hash
|
|
| 743 |
- d.size_bytes = missing_digest.size_bytes
|
|
| 744 |
- missing_blobs[d.hash] = d
|
|
| 745 |
- |
|
| 746 |
- # Upload any blobs missing on the server
|
|
| 747 |
- self._send_blobs(remote, missing_blobs.values(), u_uid)
|
|
| 748 |
- |
|
| 749 |
- def _send_blobs(self, remote, digests, u_uid=uuid.uuid4()):
|
|
| 750 |
- batch = _CASBatchUpdate(remote)
|
|
| 751 |
- |
|
| 752 |
- for digest in digests:
|
|
| 753 |
- with open(self.objpath(digest), 'rb') as f:
|
|
| 754 |
- assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
| 755 |
- |
|
| 756 |
- if (digest.size_bytes >= remote.max_batch_total_size_bytes or
|
|
| 757 |
- not remote.batch_update_supported):
|
|
| 758 |
- # Too large for batch request, upload in independent request.
|
|
| 759 |
- remote._send_blob(digest, f, u_uid=u_uid)
|
|
| 760 |
- else:
|
|
| 761 |
- if not batch.add(digest, f):
|
|
| 762 |
- # Not enough space left in batch request.
|
|
| 763 |
- # Complete pending batch first.
|
|
| 764 |
- batch.send()
|
|
| 765 |
- batch = _CASBatchUpdate(remote)
|
|
| 766 |
- batch.add(digest, f)
|
|
| 767 |
- |
|
| 768 |
- # Send final batch
|
|
| 769 |
- batch.send()
|
|
| 770 |
- |
|
| 771 |
- |
|
| 772 |
-def _grouper(iterable, n):
|
|
| 773 |
- while True:
|
|
| 774 |
- try:
|
|
| 775 |
- current = next(iterable)
|
|
| 776 |
- except StopIteration:
|
|
| 777 |
- return
|
|
| 778 |
- yield itertools.chain([current], itertools.islice(iterable, n - 1))
|
| 1 | 1 |
from collections import namedtuple
|
| 2 | 2 |
import io
|
| 3 |
+import itertools
|
|
| 3 | 4 |
import os
|
| 4 | 5 |
import multiprocessing
|
| 5 | 6 |
import signal
|
| ... | ... | @@ -288,6 +289,18 @@ class CASRemote(): |
| 288 | 289 |
else:
|
| 289 | 290 |
return None
|
| 290 | 291 |
|
| 292 |
+ # update_reference():
|
|
| 293 |
+ #
|
|
| 294 |
+ # Args:
|
|
| 295 |
+ # ref (str): Reference to update
|
|
| 296 |
+ # digest (Digest): New digest to update ref with
|
|
| 297 |
+ def update_reference(self, ref, digest):
|
|
| 298 |
+ request = buildstream_pb2.UpdateReferenceRequest()
|
|
| 299 |
+ request.keys.append(ref)
|
|
| 300 |
+ request.digest.hash = digest.hash
|
|
| 301 |
+ request.digest.size_bytes = digest.size_bytes
|
|
| 302 |
+ self.ref_storage.UpdateReference(request)
|
|
| 303 |
+ |
|
| 291 | 304 |
def get_tree_blob(self, tree_digest):
|
| 292 | 305 |
self.init()
|
| 293 | 306 |
f = tempfile.NamedTemporaryFile(dir=self.tmpdir)
|
| ... | ... | @@ -405,6 +418,68 @@ class CASRemote(): |
| 405 | 418 |
while self.__tmp_downloads:
|
| 406 | 419 |
yield self.__tmp_downloads.pop()
|
| 407 | 420 |
|
| 421 |
+ # upload_blob():
|
|
| 422 |
+ #
|
|
| 423 |
+ # Push blobs given an iterator over blob files
|
|
| 424 |
+ #
|
|
| 425 |
+ # Args:
|
|
| 426 |
+ # digest (Digest): digest we want to upload
|
|
| 427 |
+ # blob_file (str): Name of file location
|
|
| 428 |
+ # u_uid (str): Used to identify to the bytestream service
|
|
| 429 |
+ #
|
|
| 430 |
+ def upload_blob(self, digest, blob_file, u_uid=uuid.uuid4()):
|
|
| 431 |
+ with open(blob_file, 'rb') as f:
|
|
| 432 |
+ assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
| 433 |
+ |
|
| 434 |
+ if (digest.size_bytes >= self.max_batch_total_size_bytes or
|
|
| 435 |
+ not self.batch_update_supported):
|
|
| 436 |
+ # Too large for batch request, upload in independent request.
|
|
| 437 |
+ self._send_blob(digest, f, u_uid=u_uid)
|
|
| 438 |
+ else:
|
|
| 439 |
+ if self.__batch_update.add(digest, f) is False:
|
|
| 440 |
+ self.__batch_update.send()
|
|
| 441 |
+ self.__batch_update = _CASBatchUpdate(self)
|
|
| 442 |
+ self.__batch_update.add(digest, f)
|
|
| 443 |
+ |
|
| 444 |
+ # send_update_batch():
|
|
| 445 |
+ #
|
|
| 446 |
+ # Sends anything left in the update batch
|
|
| 447 |
+ #
|
|
| 448 |
+ def send_update_batch(self):
|
|
| 449 |
+ # make sure everything is sent
|
|
| 450 |
+ self.__batch_update.send()
|
|
| 451 |
+ self.__batch_update = _CASBatchUpdate(self)
|
|
| 452 |
+ |
|
| 453 |
+ # find_missing_blobs()
|
|
| 454 |
+ #
|
|
| 455 |
+ # Does FindMissingBlobs request to remote
|
|
| 456 |
+ #
|
|
| 457 |
+ # Args:
|
|
| 458 |
+ # required_blobs ([Digest]): list of blobs required
|
|
| 459 |
+ #
|
|
| 460 |
+ # Returns:
|
|
| 461 |
+ # (Dict(Digest)): missing blobs
|
|
| 462 |
+ def find_missing_blobs(self, required_blobs):
|
|
| 463 |
+ self.init()
|
|
| 464 |
+ missing_blobs = dict()
|
|
| 465 |
+ # Limit size of FindMissingBlobs request
|
|
| 466 |
+ for required_blobs_group in _grouper(required_blobs, 512):
|
|
| 467 |
+ request = remote_execution_pb2.FindMissingBlobsRequest()
|
|
| 468 |
+ |
|
| 469 |
+ for required_digest in required_blobs_group:
|
|
| 470 |
+ d = request.blob_digests.add()
|
|
| 471 |
+ d.hash = required_digest.hash
|
|
| 472 |
+ d.size_bytes = required_digest.size_bytes
|
|
| 473 |
+ |
|
| 474 |
+ response = self.cas.FindMissingBlobs(request)
|
|
| 475 |
+ for missing_digest in response.missing_blob_digests:
|
|
| 476 |
+ d = remote_execution_pb2.Digest()
|
|
| 477 |
+ d.hash = missing_digest.hash
|
|
| 478 |
+ d.size_bytes = missing_digest.size_bytes
|
|
| 479 |
+ missing_blobs[d.hash] = d
|
|
| 480 |
+ |
|
| 481 |
+ return missing_blobs
|
|
| 482 |
+ |
|
| 408 | 483 |
################################################
|
| 409 | 484 |
# Local Private Methods #
|
| 410 | 485 |
################################################
|
| ... | ... | @@ -443,7 +518,10 @@ class CASRemote(): |
| 443 | 518 |
offset += chunk_size
|
| 444 | 519 |
finished = request.finish_write
|
| 445 | 520 |
|
| 446 |
- response = self.bytestream.Write(request_stream(resource_name, stream))
|
|
| 521 |
+ try:
|
|
| 522 |
+ response = self.bytestream.Write(request_stream(resource_name, stream))
|
|
| 523 |
+ except grpc.RpcError as e:
|
|
| 524 |
+ raise CASError("Failed to upload blob: {}".format(e), reason=e.code())
|
|
| 447 | 525 |
|
| 448 | 526 |
assert response.committed_size == digest.size_bytes
|
| 449 | 527 |
|
| ... | ... | @@ -457,6 +535,15 @@ class CASRemote(): |
| 457 | 535 |
self.__batch_read = _CASBatchRead(self)
|
| 458 | 536 |
|
| 459 | 537 |
|
| 538 |
+def _grouper(iterable, n):
|
|
| 539 |
+ while True:
|
|
| 540 |
+ try:
|
|
| 541 |
+ current = next(iterable)
|
|
| 542 |
+ except StopIteration:
|
|
| 543 |
+ return
|
|
| 544 |
+ yield itertools.chain([current], itertools.islice(iterable, n - 1))
|
|
| 545 |
+ |
|
| 546 |
+ |
|
| 460 | 547 |
# Represents a batch of blobs queued for fetching.
|
| 461 | 548 |
#
|
| 462 | 549 |
class _CASBatchRead():
|
| ... | ... | @@ -488,7 +575,11 @@ class _CASBatchRead(): |
| 488 | 575 |
if not self._request.digests:
|
| 489 | 576 |
return
|
| 490 | 577 |
|
| 491 |
- batch_response = self._remote.cas.BatchReadBlobs(self._request)
|
|
| 578 |
+ try:
|
|
| 579 |
+ batch_response = self._remote.cas.BatchReadBlobs(self._request)
|
|
| 580 |
+ except grpc.RpcError as e:
|
|
| 581 |
+ raise CASError("Failed to read blob batch: {}".format(e),
|
|
| 582 |
+ reason=e.code()) from e
|
|
| 492 | 583 |
|
| 493 | 584 |
for response in batch_response.responses:
|
| 494 | 585 |
if response.status.code == code_pb2.NOT_FOUND:
|
| ... | ... | @@ -536,7 +627,12 @@ class _CASBatchUpdate(): |
| 536 | 627 |
if not self._request.requests:
|
| 537 | 628 |
return
|
| 538 | 629 |
|
| 539 |
- batch_response = self._remote.cas.BatchUpdateBlobs(self._request)
|
|
| 630 |
+ # Want to raise a CASError if
|
|
| 631 |
+ try:
|
|
| 632 |
+ batch_response = self._remote.cas.BatchUpdateBlobs(self._request)
|
|
| 633 |
+ except grpc.RpcError as e:
|
|
| 634 |
+ raise CASError("Failed to upload blob batch: {}".format(e),
|
|
| 635 |
+ reason=e.code()) from e
|
|
| 540 | 636 |
|
| 541 | 637 |
for response in batch_response.responses:
|
| 542 | 638 |
if response.status.code != code_pb2.OK:
|
