Tom Pollard pushed to branch tpollard/566 at BuildStream / buildstream
Commits:
-
17369d73
by Tom Pollard at 2019-01-21T15:44:24Z
8 changed files:
- buildstream/_artifactcache.py
- buildstream/_cas/cascache.py
- buildstream/_cas/casremote.py
- buildstream/element.py
- doc/source/using_config.rst
- tests/artifactcache/config.py
- + tests/integration/pushbuildtrees.py
- tests/testutils/runcli.py
Changes:
... | ... | @@ -72,6 +72,7 @@ class ArtifactCache(): |
72 | 72 |
|
73 | 73 |
self._has_fetch_remotes = False
|
74 | 74 |
self._has_push_remotes = False
|
75 |
+ self._has_partial_push_remotes = False
|
|
75 | 76 |
|
76 | 77 |
os.makedirs(self.extractdir, exist_ok=True)
|
77 | 78 |
|
... | ... | @@ -384,6 +385,9 @@ class ArtifactCache(): |
384 | 385 |
self._has_fetch_remotes = True
|
385 | 386 |
if remote_spec.push:
|
386 | 387 |
self._has_push_remotes = True
|
388 |
+ # Partial push requires generic push option to also be set
|
|
389 |
+ if remote_spec.partial_push:
|
|
390 |
+ self._has_partial_push_remotes = True
|
|
387 | 391 |
|
388 | 392 |
remotes[remote_spec.url] = CASRemote(remote_spec)
|
389 | 393 |
|
... | ... | @@ -582,6 +586,32 @@ class ArtifactCache(): |
582 | 586 |
remotes_for_project = self._remotes[element._get_project()]
|
583 | 587 |
return any(remote.spec.push for remote in remotes_for_project)
|
584 | 588 |
|
589 |
+ # has_partial_push_remotes():
|
|
590 |
+ #
|
|
591 |
+ # Check whether any remote repositories are available for pushing
|
|
592 |
+ # non-complete artifacts. This option requires the generic push value
|
|
593 |
+ # to also be set.
|
|
594 |
+ #
|
|
595 |
+ # Args:
|
|
596 |
+ # element (Element): The Element to check
|
|
597 |
+ #
|
|
598 |
+ # Returns:
|
|
599 |
+ # (bool): True if any remote repository is configured for optional
|
|
600 |
+ # partial pushes, False otherwise
|
|
601 |
+ #
|
|
602 |
+ def has_partial_push_remotes(self, *, element=None):
|
|
603 |
+ # If there's no partial push remotes available, we can't partial push at all
|
|
604 |
+ if not self._has_partial_push_remotes:
|
|
605 |
+ return False
|
|
606 |
+ elif element is None:
|
|
607 |
+ # At least one remote is set to allow partial pushes
|
|
608 |
+ return True
|
|
609 |
+ else:
|
|
610 |
+ # Check whether the specified element's project has push remotes configured
|
|
611 |
+ # to not accept partial artifact pushes
|
|
612 |
+ remotes_for_project = self._remotes[element._get_project()]
|
|
613 |
+ return any(remote.spec.partial_push for remote in remotes_for_project)
|
|
614 |
+ |
|
585 | 615 |
# push():
|
586 | 616 |
#
|
587 | 617 |
# Push committed artifact to remote repository.
|
... | ... | @@ -589,6 +619,8 @@ class ArtifactCache(): |
589 | 619 |
# Args:
|
590 | 620 |
# element (Element): The Element whose artifact is to be pushed
|
591 | 621 |
# keys (list): The cache keys to use
|
622 |
+ # partial(bool): If the artifact is cached in a partial state
|
|
623 |
+ # subdir(string): Optional subdir to not push
|
|
592 | 624 |
#
|
593 | 625 |
# Returns:
|
594 | 626 |
# (bool): True if any remote was updated, False if no pushes were required
|
... | ... | @@ -596,12 +628,25 @@ class ArtifactCache(): |
596 | 628 |
# Raises:
|
597 | 629 |
# (ArtifactError): if there was an error
|
598 | 630 |
#
|
599 |
- def push(self, element, keys):
|
|
631 |
+ def push(self, element, keys, partial=False, subdir=None):
|
|
600 | 632 |
refs = [self.get_artifact_fullname(element, key) for key in list(keys)]
|
601 | 633 |
|
602 | 634 |
project = element._get_project()
|
603 | 635 |
|
604 |
- push_remotes = [r for r in self._remotes[project] if r.spec.push]
|
|
636 |
+ push_remotes = []
|
|
637 |
+ partial_remotes = []
|
|
638 |
+ |
|
639 |
+ # Create list of remotes to push to, given current element and partial push config
|
|
640 |
+ if not partial:
|
|
641 |
+ push_remotes = [r for r in self._remotes[project] if (r.spec.push and not r.spec.partial_push)]
|
|
642 |
+ |
|
643 |
+ if self._has_partial_push_remotes:
|
|
644 |
+ # Create a specific list of the remotes expecting the artifact to be push in a partial
|
|
645 |
+ # state. This list needs to be pushed in a partial state, without the optional subdir if
|
|
646 |
+ # exists locally. No need to attempt pushing a partial artifact to a remote that is queued to
|
|
647 |
+ # to also recieve a full artifact
|
|
648 |
+ partial_remotes = [r for r in self._remotes[project] if (r.spec.partial_push and r.spec.push) and
|
|
649 |
+ r not in push_remotes]
|
|
605 | 650 |
|
606 | 651 |
pushed = False
|
607 | 652 |
|
... | ... | @@ -610,7 +655,9 @@ class ArtifactCache(): |
610 | 655 |
display_key = element._get_brief_display_key()
|
611 | 656 |
element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
|
612 | 657 |
|
613 |
- if self.cas.push(refs, remote):
|
|
658 |
+ # Passing the optional subdir allows for remote artifacts that are cached in a 'partial'
|
|
659 |
+ # state to be completed
|
|
660 |
+ if self.cas.push(refs, remote, subdir=subdir):
|
|
614 | 661 |
element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
|
615 | 662 |
pushed = True
|
616 | 663 |
else:
|
... | ... | @@ -618,6 +665,19 @@ class ArtifactCache(): |
618 | 665 |
remote.spec.url, element._get_brief_display_key()
|
619 | 666 |
))
|
620 | 667 |
|
668 |
+ for remote in partial_remotes:
|
|
669 |
+ remote.init()
|
|
670 |
+ display_key = element._get_brief_display_key()
|
|
671 |
+ element.status("Pushing partial artifact {} -> {}".format(display_key, remote.spec.url))
|
|
672 |
+ |
|
673 |
+ if self.cas.push(refs, remote, excluded_subdirs=subdir):
|
|
674 |
+ element.info("Pushed partial artifact {} -> {}".format(display_key, remote.spec.url))
|
|
675 |
+ pushed = True
|
|
676 |
+ else:
|
|
677 |
+ element.info("Remote ({}) already has {} partial cached".format(
|
|
678 |
+ remote.spec.url, element._get_brief_display_key()
|
|
679 |
+ ))
|
|
680 |
+ |
|
621 | 681 |
return pushed
|
622 | 682 |
|
623 | 683 |
# pull():
|
... | ... | @@ -645,14 +705,23 @@ class ArtifactCache(): |
645 | 705 |
element.status("Pulling artifact {} <- {}".format(display_key, remote.spec.url))
|
646 | 706 |
|
647 | 707 |
if self.cas.pull(ref, remote, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
|
648 |
- element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
|
|
649 | 708 |
if subdir:
|
650 |
- # Attempt to extract subdir into artifact extract dir if it already exists
|
|
651 |
- # without containing the subdir. If the respective artifact extract dir does not
|
|
652 |
- # exist a complete extraction will complete.
|
|
653 |
- self.extract(element, key, subdir)
|
|
654 |
- # no need to pull from additional remotes
|
|
655 |
- return True
|
|
709 |
+ if not self.contains_subdir_artifact(element, key, subdir):
|
|
710 |
+ # The pull was expecting the specific subdir to be present in the remote, attempt
|
|
711 |
+ # to find it in other available remotes
|
|
712 |
+ element.info("Pulled partial artifact {} <- {}. Attempting to retrieve {} from remotes"
|
|
713 |
+ .format(display_key, remote.spec.url, subdir))
|
|
714 |
+ else:
|
|
715 |
+ element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
|
|
716 |
+ # Attempt to extract subdir into artifact extract dir if it already exists
|
|
717 |
+ # without containing the subdir. If the respective artifact extract dir does not
|
|
718 |
+ # exist a complete extraction will complete.
|
|
719 |
+ self.extract(element, key, subdir)
|
|
720 |
+ # no need to pull from additional remotes
|
|
721 |
+ return True
|
|
722 |
+ else:
|
|
723 |
+ element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
|
|
724 |
+ return True
|
|
656 | 725 |
else:
|
657 | 726 |
element.info("Remote ({}) does not have {} cached".format(
|
658 | 727 |
remote.spec.url, element._get_brief_display_key()
|
... | ... | @@ -198,34 +198,47 @@ class CASCache(): |
198 | 198 |
# (bool): True if pull was successful, False if ref was not available
|
199 | 199 |
#
|
200 | 200 |
def pull(self, ref, remote, *, progress=None, subdir=None, excluded_subdirs=None):
|
201 |
- try:
|
|
202 |
- remote.init()
|
|
203 | 201 |
|
204 |
- request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
|
|
205 |
- request.key = ref
|
|
206 |
- response = remote.ref_storage.GetReference(request)
|
|
202 |
+ tree_found = False
|
|
207 | 203 |
|
208 |
- tree = remote_execution_pb2.Digest()
|
|
209 |
- tree.hash = response.digest.hash
|
|
210 |
- tree.size_bytes = response.digest.size_bytes
|
|
204 |
+ while True:
|
|
205 |
+ try:
|
|
206 |
+ if not tree_found:
|
|
207 |
+ remote.init()
|
|
211 | 208 |
|
212 |
- # Check if the element artifact is present, if so just fetch the subdir.
|
|
213 |
- if subdir and os.path.exists(self.objpath(tree)):
|
|
214 |
- self._fetch_subdir(remote, tree, subdir)
|
|
215 |
- else:
|
|
216 |
- # Fetch artifact, excluded_subdirs determined in pullqueue
|
|
217 |
- self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
|
|
209 |
+ request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
|
|
210 |
+ request.key = ref
|
|
211 |
+ response = remote.ref_storage.GetReference(request)
|
|
218 | 212 |
|
219 |
- self.set_ref(ref, tree)
|
|
213 |
+ tree = remote_execution_pb2.Digest()
|
|
214 |
+ tree.hash = response.digest.hash
|
|
215 |
+ tree.size_bytes = response.digest.size_bytes
|
|
220 | 216 |
|
221 |
- return True
|
|
222 |
- except grpc.RpcError as e:
|
|
223 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
224 |
- raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
|
|
225 |
- else:
|
|
226 |
- return False
|
|
227 |
- except BlobNotFound as e:
|
|
228 |
- return False
|
|
217 |
+ # Check if the element artifact is present, if so just fetch the subdir.
|
|
218 |
+ if subdir and os.path.exists(self.objpath(tree)):
|
|
219 |
+ self._fetch_subdir(remote, tree, subdir)
|
|
220 |
+ else:
|
|
221 |
+ # Fetch artifact, excluded_subdirs determined in pullqueue
|
|
222 |
+ self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
|
|
223 |
+ |
|
224 |
+ self.set_ref(ref, tree)
|
|
225 |
+ |
|
226 |
+ return True
|
|
227 |
+ except grpc.RpcError as e:
|
|
228 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
229 |
+ raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
|
|
230 |
+ else:
|
|
231 |
+ return False
|
|
232 |
+ except BlobNotFound as e:
|
|
233 |
+ if not excluded_subdirs and subdir:
|
|
234 |
+ # The remote has the top level digest but could not complete a full pull,
|
|
235 |
+ # attempt partial without the need to initialise and check for the artifact
|
|
236 |
+ # digest. This default behaviour of dropping back to partial pulls could
|
|
237 |
+ # be made a configurable warning given at artfictcache level.
|
|
238 |
+ tree_found = True
|
|
239 |
+ excluded_subdirs, subdir = subdir, excluded_subdirs
|
|
240 |
+ else:
|
|
241 |
+ return False
|
|
229 | 242 |
|
230 | 243 |
# pull_tree():
|
231 | 244 |
#
|
... | ... | @@ -270,6 +283,8 @@ class CASCache(): |
270 | 283 |
# Args:
|
271 | 284 |
# refs (list): The refs to push
|
272 | 285 |
# remote (CASRemote): The remote to push to
|
286 |
+ # subdir (string): Optional specific subdir to include in the push
|
|
287 |
+ # excluded_subdirs (list): The optional list of subdirs to not push
|
|
273 | 288 |
#
|
274 | 289 |
# Returns:
|
275 | 290 |
# (bool): True if any remote was updated, False if no pushes were required
|
... | ... | @@ -277,7 +292,7 @@ class CASCache(): |
277 | 292 |
# Raises:
|
278 | 293 |
# (CASCacheError): if there was an error
|
279 | 294 |
#
|
280 |
- def push(self, refs, remote):
|
|
295 |
+ def push(self, refs, remote, *, subdir=None, excluded_subdirs=None):
|
|
281 | 296 |
skipped_remote = True
|
282 | 297 |
try:
|
283 | 298 |
for ref in refs:
|
... | ... | @@ -291,15 +306,18 @@ class CASCache(): |
291 | 306 |
response = remote.ref_storage.GetReference(request)
|
292 | 307 |
|
293 | 308 |
if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
294 |
- # ref is already on the server with the same tree
|
|
295 |
- continue
|
|
309 |
+ # ref is already on the server with the same tree, however it might be partially cached.
|
|
310 |
+ # If artifact is not set to be pushed partially attempt to 'complete' the remote artifact if
|
|
311 |
+ # needed, else continue.
|
|
312 |
+ if excluded_subdirs or remote.verify_digest_on_remote(self._get_subdir(tree, subdir)):
|
|
313 |
+ continue
|
|
296 | 314 |
|
297 | 315 |
except grpc.RpcError as e:
|
298 | 316 |
if e.code() != grpc.StatusCode.NOT_FOUND:
|
299 | 317 |
# Intentionally re-raise RpcError for outer except block.
|
300 | 318 |
raise
|
301 | 319 |
|
302 |
- self._send_directory(remote, tree)
|
|
320 |
+ self._send_directory(remote, tree, excluded_dir=excluded_subdirs)
|
|
303 | 321 |
|
304 | 322 |
request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
|
305 | 323 |
request.keys.append(ref)
|
... | ... | @@ -782,10 +800,17 @@ class CASCache(): |
782 | 800 |
a += 1
|
783 | 801 |
b += 1
|
784 | 802 |
|
785 |
- def _reachable_refs_dir(self, reachable, tree, update_mtime=False):
|
|
803 |
+ def _reachable_refs_dir(self, reachable, tree, update_mtime=False, subdir=False):
|
|
786 | 804 |
if tree.hash in reachable:
|
787 | 805 |
return
|
788 | 806 |
|
807 |
+ # If looping through subdir digests, skip processing if
|
|
808 |
+ # ref path does not exist, allowing for partial objects
|
|
809 |
+ if subdir and not os.path.exists(self.objpath(tree)):
|
|
810 |
+ return
|
|
811 |
+ |
|
812 |
+ # Raises FileNotFound exception is path does not exist,
|
|
813 |
+ # which should only be thrown on the top level digest
|
|
789 | 814 |
if update_mtime:
|
790 | 815 |
os.utime(self.objpath(tree))
|
791 | 816 |
|
... | ... | @@ -802,9 +827,9 @@ class CASCache(): |
802 | 827 |
reachable.add(filenode.digest.hash)
|
803 | 828 |
|
804 | 829 |
for dirnode in directory.directories:
|
805 |
- self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime)
|
|
830 |
+ self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, subdir=True)
|
|
806 | 831 |
|
807 |
- def _required_blobs(self, directory_digest):
|
|
832 |
+ def _required_blobs(self, directory_digest, excluded_dir=None):
|
|
808 | 833 |
# parse directory, and recursively add blobs
|
809 | 834 |
d = remote_execution_pb2.Digest()
|
810 | 835 |
d.hash = directory_digest.hash
|
... | ... | @@ -823,7 +848,8 @@ class CASCache(): |
823 | 848 |
yield d
|
824 | 849 |
|
825 | 850 |
for dirnode in directory.directories:
|
826 |
- yield from self._required_blobs(dirnode.digest)
|
|
851 |
+ if dirnode.name != excluded_dir:
|
|
852 |
+ yield from self._required_blobs(dirnode.digest)
|
|
827 | 853 |
|
828 | 854 |
# _ensure_blob():
|
829 | 855 |
#
|
... | ... | @@ -928,6 +954,7 @@ class CASCache(): |
928 | 954 |
objpath = self._ensure_blob(remote, dir_digest)
|
929 | 955 |
|
930 | 956 |
directory = remote_execution_pb2.Directory()
|
957 |
+ |
|
931 | 958 |
with open(objpath, 'rb') as f:
|
932 | 959 |
directory.ParseFromString(f.read())
|
933 | 960 |
|
... | ... | @@ -970,8 +997,8 @@ class CASCache(): |
970 | 997 |
|
971 | 998 |
return dirdigest
|
972 | 999 |
|
973 |
- def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
|
|
974 |
- required_blobs = self._required_blobs(digest)
|
|
1000 |
+ def _send_directory(self, remote, digest, u_uid=uuid.uuid4(), excluded_dir=None):
|
|
1001 |
+ required_blobs = self._required_blobs(digest, excluded_dir=excluded_dir)
|
|
975 | 1002 |
|
976 | 1003 |
missing_blobs = dict()
|
977 | 1004 |
# Limit size of FindMissingBlobs request
|
... | ... | @@ -23,7 +23,8 @@ from .. import utils |
23 | 23 |
_MAX_PAYLOAD_BYTES = 1024 * 1024
|
24 | 24 |
|
25 | 25 |
|
26 |
-class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key client_cert instance_name')):
|
|
26 |
+class CASRemoteSpec(namedtuple('CASRemoteSpec',
|
|
27 |
+ 'url push partial_push server_cert client_key client_cert instance_name')):
|
|
27 | 28 |
|
28 | 29 |
# _new_from_config_node
|
29 | 30 |
#
|
... | ... | @@ -31,9 +32,18 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key |
31 | 32 |
#
|
32 | 33 |
@staticmethod
|
33 | 34 |
def _new_from_config_node(spec_node, basedir=None):
|
34 |
- _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert', 'instance_name'])
|
|
35 |
+ _yaml.node_validate(spec_node, ['url', 'push', 'allow-partial-push', 'server-cert', 'client-key',
|
|
36 |
+ 'client-cert', 'instance_name'])
|
|
35 | 37 |
url = _yaml.node_get(spec_node, str, 'url')
|
36 | 38 |
push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
|
39 |
+ partial_push = _yaml.node_get(spec_node, bool, 'allow-partial-push', default_value=False)
|
|
40 |
+ |
|
41 |
+ # partial_push depends on push, raise error if not configured correctly
|
|
42 |
+ if partial_push and not push:
|
|
43 |
+ provenance = _yaml.node_get_provenance(spec_node, 'allow-partial-push')
|
|
44 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
45 |
+ "{}: allow-partial-push also requires push to be set".format(provenance))
|
|
46 |
+ |
|
37 | 47 |
if not url:
|
38 | 48 |
provenance = _yaml.node_get_provenance(spec_node, 'url')
|
39 | 49 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
... | ... | @@ -63,10 +73,10 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key |
63 | 73 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
64 | 74 |
"{}: 'client-cert' was specified without 'client-key'".format(provenance))
|
65 | 75 |
|
66 |
- return CASRemoteSpec(url, push, server_cert, client_key, client_cert, instance_name)
|
|
76 |
+ return CASRemoteSpec(url, push, partial_push, server_cert, client_key, client_cert, instance_name)
|
|
67 | 77 |
|
68 | 78 |
|
69 |
-CASRemoteSpec.__new__.__defaults__ = (None, None, None, None)
|
|
79 |
+CASRemoteSpec.__new__.__defaults__ = (False, None, None, None, None)
|
|
70 | 80 |
|
71 | 81 |
|
72 | 82 |
class BlobNotFound(CASRemoteError):
|
... | ... | @@ -1797,13 +1797,19 @@ class Element(Plugin): |
1797 | 1797 |
# (bool): True if this element does not need a push job to be created
|
1798 | 1798 |
#
|
1799 | 1799 |
def _skip_push(self):
|
1800 |
+ |
|
1800 | 1801 |
if not self.__artifacts.has_push_remotes(element=self):
|
1801 | 1802 |
# No push remotes for this element's project
|
1802 | 1803 |
return True
|
1803 | 1804 |
|
1804 | 1805 |
# Do not push elements that aren't cached, or that are cached with a dangling buildtree
|
1805 |
- # artifact unless element type is expected to have an an empty buildtree directory
|
|
1806 |
- if not self._cached_buildtree():
|
|
1806 |
+ # artifact unless element type is expected to have an an empty buildtree directory. Check
|
|
1807 |
+ # that this default behaviour is not overriden via a remote configured to allow pushing
|
|
1808 |
+ # artifacts without their corresponding buildtree.
|
|
1809 |
+ if not self._cached():
|
|
1810 |
+ return True
|
|
1811 |
+ |
|
1812 |
+ if not self._cached_buildtree() and not self.__artifacts.has_partial_push_remotes(element=self):
|
|
1807 | 1813 |
return True
|
1808 | 1814 |
|
1809 | 1815 |
# Do not push tainted artifact
|
... | ... | @@ -1814,11 +1820,14 @@ class Element(Plugin): |
1814 | 1820 |
|
1815 | 1821 |
# _push():
|
1816 | 1822 |
#
|
1817 |
- # Push locally cached artifact to remote artifact repository.
|
|
1823 |
+ # Push locally cached artifact to remote artifact repository. An attempt
|
|
1824 |
+ # will be made to push partial artifacts if given current config dictates.
|
|
1825 |
+ # If a remote set for 'full' artifact pushes is found to be cached partially
|
|
1826 |
+ # in the remote, an attempt will be made to 'complete' it.
|
|
1818 | 1827 |
#
|
1819 | 1828 |
# Returns:
|
1820 | 1829 |
# (bool): True if the remote was updated, False if it already existed
|
1821 |
- # and no updated was required
|
|
1830 |
+ # and no update was required
|
|
1822 | 1831 |
#
|
1823 | 1832 |
def _push(self):
|
1824 | 1833 |
self.__assert_cached()
|
... | ... | @@ -1827,8 +1836,17 @@ class Element(Plugin): |
1827 | 1836 |
self.warn("Not pushing tainted artifact.")
|
1828 | 1837 |
return False
|
1829 | 1838 |
|
1830 |
- # Push all keys used for local commit
|
|
1831 |
- pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit())
|
|
1839 |
+ # Push all keys used for local commit, this could be full or partial,
|
|
1840 |
+ # given previous _skip_push() logic. If buildtree isn't cached, then
|
|
1841 |
+ # set partial push
|
|
1842 |
+ |
|
1843 |
+ partial = False
|
|
1844 |
+ subdir = 'buildtree'
|
|
1845 |
+ if not self._cached_buildtree():
|
|
1846 |
+ partial = True
|
|
1847 |
+ |
|
1848 |
+ pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit(), partial=partial, subdir=subdir)
|
|
1849 |
+ |
|
1832 | 1850 |
if not pushed:
|
1833 | 1851 |
return False
|
1834 | 1852 |
|
... | ... | @@ -59,6 +59,15 @@ configuration: |
59 | 59 |
# Add another cache to pull from
|
60 | 60 |
- url: https://anothercache.com/artifacts:8080
|
61 | 61 |
server-cert: another_server.crt
|
62 |
+ # Add a cache to push/pull to/from, specifying
|
|
63 |
+ that you wish to push artifacts in a 'partial'
|
|
64 |
+ state (this being without the respective buildtree).
|
|
65 |
+ Note that allow-partial-push requires push to also
|
|
66 |
+ be set.
|
|
67 |
+ - url: https://anothercache.com/artifacts:11003
|
|
68 |
+ push: true
|
|
69 |
+ allow-partial-push: true
|
|
70 |
+ |
|
62 | 71 |
|
63 | 72 |
.. note::
|
64 | 73 |
|
... | ... | @@ -86,6 +95,14 @@ configuration: |
86 | 95 |
# Add another cache to pull from
|
87 | 96 |
- url: https://ourprojectcache.com/artifacts:8080
|
88 | 97 |
server-cert: project_server.crt
|
98 |
+ # Add a cache to push/pull to/from, specifying
|
|
99 |
+ that you wish to push artifacts in a 'partial'
|
|
100 |
+ state (this being without the respective buildtree).
|
|
101 |
+ Note that allow-partial-push requires push to also
|
|
102 |
+ be set.
|
|
103 |
+ - url: https://anothercache.com/artifacts:11003
|
|
104 |
+ push: true
|
|
105 |
+ allow-partial-push: true
|
|
89 | 106 |
|
90 | 107 |
|
91 | 108 |
.. note::
|
... | ... | @@ -139,3 +139,28 @@ def test_missing_certs(cli, datafiles, config_key, config_value): |
139 | 139 |
# This does not happen for a simple `bst show`.
|
140 | 140 |
result = cli.run(project=project, args=['pull', 'element.bst'])
|
141 | 141 |
result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
|
142 |
+ |
|
143 |
+ |
|
144 |
+# Assert that if allow-partial-push is specified as true without push also being
|
|
145 |
+# set likewise, we get a comprehensive LoadError instead of an unhandled exception.
|
|
146 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
147 |
+def test_partial_push_error(cli, datafiles):
|
|
148 |
+ project = os.path.join(datafiles.dirname, datafiles.basename, 'project', 'elements')
|
|
149 |
+ |
|
150 |
+ project_conf = {
|
|
151 |
+ 'name': 'test',
|
|
152 |
+ |
|
153 |
+ 'artifacts': {
|
|
154 |
+ 'url': 'https://cache.example.com:12345',
|
|
155 |
+ 'allow-partial-push': 'True'
|
|
156 |
+ }
|
|
157 |
+ }
|
|
158 |
+ project_conf_file = os.path.join(project, 'project.conf')
|
|
159 |
+ _yaml.dump(project_conf, project_conf_file)
|
|
160 |
+ |
|
161 |
+ # Use `pull` here to ensure we try to initialize the remotes, triggering the error
|
|
162 |
+ #
|
|
163 |
+ # This does not happen for a simple `bst show`.
|
|
164 |
+ result = cli.run(project=project, args=['pull', 'target.bst'])
|
|
165 |
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
|
|
166 |
+ assert "allow-partial-push also requires push to be set" in result.stderr
|
1 |
+import os
|
|
2 |
+import shutil
|
|
3 |
+import pytest
|
|
4 |
+import subprocess
|
|
5 |
+ |
|
6 |
+from buildstream import _yaml
|
|
7 |
+from tests.testutils import cli_integration as cli, create_artifact_share
|
|
8 |
+from tests.testutils.integration import assert_contains
|
|
9 |
+from tests.testutils.site import HAVE_BWRAP, IS_LINUX
|
|
10 |
+from buildstream._exceptions import ErrorDomain, LoadErrorReason
|
|
11 |
+ |
|
12 |
+ |
|
13 |
+DATA_DIR = os.path.join(
|
|
14 |
+ os.path.dirname(os.path.realpath(__file__)),
|
|
15 |
+ "project"
|
|
16 |
+)
|
|
17 |
+ |
|
18 |
+ |
|
19 |
+# Remove artifact cache & set cli.config value of pull-buildtrees
|
|
20 |
+# to false, which is the default user context. The cache has to be
|
|
21 |
+# cleared as just forcefully removing the refpath leaves dangling objects.
|
|
22 |
+def default_state(cli, tmpdir, share):
|
|
23 |
+ shutil.rmtree(os.path.join(str(tmpdir), 'artifacts'))
|
|
24 |
+ cli.configure({
|
|
25 |
+ 'artifacts': {'url': share.repo, 'push': False},
|
|
26 |
+ 'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
|
|
27 |
+ 'cache': {'pull-buildtrees': False},
|
|
28 |
+ })
|
|
29 |
+ |
|
30 |
+ |
|
31 |
+# Tests to capture the integration of the optionl push of buildtrees.
|
|
32 |
+# The behaviour should encompass pushing artifacts that are already cached
|
|
33 |
+# without a buildtree as well as artifacts that are cached with their buildtree.
|
|
34 |
+# This option is handled via 'allow-partial-push' on a per artifact remote config
|
|
35 |
+# node basis. Multiple remote config nodes can point to the same url and as such can
|
|
36 |
+# have different 'allow-partial-push' options, tests need to cover this using project
|
|
37 |
+# confs.
|
|
38 |
+@pytest.mark.integration
|
|
39 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
40 |
+@pytest.mark.skipif(IS_LINUX and not HAVE_BWRAP, reason='Only available with bubblewrap on Linux')
|
|
41 |
+def test_pushbuildtrees(cli, tmpdir, datafiles, integration_cache):
|
|
42 |
+ project = os.path.join(datafiles.dirname, datafiles.basename)
|
|
43 |
+ element_name = 'autotools/amhello.bst'
|
|
44 |
+ |
|
45 |
+ # Create artifact shares for pull & push testing
|
|
46 |
+ with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
|
|
47 |
+ create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2,\
|
|
48 |
+ create_artifact_share(os.path.join(str(tmpdir), 'share3')) as share3,\
|
|
49 |
+ create_artifact_share(os.path.join(str(tmpdir), 'share4')) as share4:
|
|
50 |
+ |
|
51 |
+ cli.configure({
|
|
52 |
+ 'artifacts': {'url': share1.repo, 'push': True},
|
|
53 |
+ 'artifactdir': os.path.join(str(tmpdir), 'artifacts')
|
|
54 |
+ })
|
|
55 |
+ |
|
56 |
+ cli.configure({'artifacts': [{'url': share1.repo, 'push': True},
|
|
57 |
+ {'url': share2.repo, 'push': True, 'allow-partial-push': True}]})
|
|
58 |
+ |
|
59 |
+ # Build autotools element, checked pushed, delete local.
|
|
60 |
+ # As share 2 has push & allow-partial-push set a true, it
|
|
61 |
+ # should have pushed the artifacts, without the cached buildtrees,
|
|
62 |
+ # to it.
|
|
63 |
+ result = cli.run(project=project, args=['build', element_name])
|
|
64 |
+ assert result.exit_code == 0
|
|
65 |
+ assert cli.get_element_state(project, element_name) == 'cached'
|
|
66 |
+ elementdigest = share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
|
|
67 |
+ buildtreedir = os.path.join(str(tmpdir), 'artifacts', 'extract', 'test', 'autotools-amhello',
|
|
68 |
+ elementdigest.hash, 'buildtree')
|
|
69 |
+ assert os.path.isdir(buildtreedir)
|
|
70 |
+ assert element_name in result.get_partial_pushed_elements()
|
|
71 |
+ assert element_name in result.get_pushed_elements()
|
|
72 |
+ assert share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
|
|
73 |
+ assert share2.has_artifact('test', element_name, cli.get_element_key(project, element_name))
|
|
74 |
+ default_state(cli, tmpdir, share1)
|
|
75 |
+ |
|
76 |
+ # Check that after explictly pulling an artifact without it's buildtree,
|
|
77 |
+ # we can push it to another remote that is configured to accept the partial
|
|
78 |
+ # artifact
|
|
79 |
+ result = cli.run(project=project, args=['pull', element_name])
|
|
80 |
+ assert element_name in result.get_pulled_elements()
|
|
81 |
+ cli.configure({'artifacts': {'url': share3.repo, 'push': True, 'allow-partial-push': True}})
|
|
82 |
+ assert cli.get_element_state(project, element_name) == 'cached'
|
|
83 |
+ assert not os.path.isdir(buildtreedir)
|
|
84 |
+ result = cli.run(project=project, args=['push', element_name])
|
|
85 |
+ assert result.exit_code == 0
|
|
86 |
+ assert element_name in result.get_partial_pushed_elements()
|
|
87 |
+ assert element_name not in result.get_pushed_elements()
|
|
88 |
+ assert share3.has_artifact('test', element_name, cli.get_element_key(project, element_name))
|
|
89 |
+ default_state(cli, tmpdir, share3)
|
|
90 |
+ |
|
91 |
+ # Delete the local cache and pull the partial artifact from share 3,
|
|
92 |
+ # this should not include the buildtree when extracted locally, even when
|
|
93 |
+ # pull-buildtrees is given as a cli parameter as no available remotes will
|
|
94 |
+ # contain the buildtree
|
|
95 |
+ assert not os.path.isdir(buildtreedir)
|
|
96 |
+ assert cli.get_element_state(project, element_name) != 'cached'
|
|
97 |
+ result = cli.run(project=project, args=['--pull-buildtrees', 'pull', element_name])
|
|
98 |
+ assert element_name in result.get_partial_pulled_elements()
|
|
99 |
+ assert not os.path.isdir(buildtreedir)
|
|
100 |
+ default_state(cli, tmpdir, share3)
|
|
101 |
+ |
|
102 |
+ # Delete the local cache and attempt to pull a 'full' artifact, including its
|
|
103 |
+ # buildtree. As with before share3 being the first listed remote will not have
|
|
104 |
+ # the buildtree available and should spawn a partial pull. Having share1 as the
|
|
105 |
+ # second available remote should allow the buildtree to be pulled thus 'completing'
|
|
106 |
+ # the artifact
|
|
107 |
+ cli.configure({'artifacts': [{'url': share3.repo, 'push': True, 'allow-partial-push': True},
|
|
108 |
+ {'url': share1.repo, 'push': True}]})
|
|
109 |
+ assert cli.get_element_state(project, element_name) != 'cached'
|
|
110 |
+ result = cli.run(project=project, args=['--pull-buildtrees', 'pull', element_name])
|
|
111 |
+ assert element_name in result.get_partial_pulled_elements()
|
|
112 |
+ assert element_name in result.get_pulled_elements()
|
|
113 |
+ assert "Attempting to retrieve buildtree from remotes" in result.stderr
|
|
114 |
+ assert os.path.isdir(buildtreedir)
|
|
115 |
+ assert cli.get_element_state(project, element_name) == 'cached'
|
|
116 |
+ |
|
117 |
+ # Test that we are able to 'complete' an artifact on a server which is cached partially,
|
|
118 |
+ # but has now been configured for full artifact pushing. This should require only pushing
|
|
119 |
+ # the missing blobs, which should be those of just the buildtree. In this case changing
|
|
120 |
+ # share3 to full pushes should exercise this
|
|
121 |
+ cli.configure({'artifacts': {'url': share3.repo, 'push': True}})
|
|
122 |
+ result = cli.run(project=project, args=['push', element_name])
|
|
123 |
+ assert element_name in result.get_pushed_elements()
|
|
124 |
+ |
|
125 |
+ # Ensure that the same remote url can be defined multiple times with differing push
|
|
126 |
+ # config. Buildstream supports the same remote having different configurations which
|
|
127 |
+ # partial pushing could be different for elements defined at a top level project.conf to
|
|
128 |
+ # those from a junctioned project. Assert that elements are pushed to the same remote in
|
|
129 |
+ # a state defined via their respective project.confs
|
|
130 |
+ default_state(cli, tmpdir, share1)
|
|
131 |
+ cli.configure({'artifactdir': os.path.join(str(tmpdir), 'artifacts')}, reset=True)
|
|
132 |
+ junction = os.path.join(project, 'elements', 'junction')
|
|
133 |
+ os.mkdir(junction)
|
|
134 |
+ shutil.copy2(os.path.join(project, 'elements', element_name), junction)
|
|
135 |
+ |
|
136 |
+ junction_conf = {}
|
|
137 |
+ project_conf = {}
|
|
138 |
+ junction_conf['name'] = 'amhello'
|
|
139 |
+ junction_conf['artifacts'] = {'url': share4.repo, 'push': True, 'allow-partial-push': True}
|
|
140 |
+ _yaml.dump(junction_conf, os.path.join(junction, 'project.conf'))
|
|
141 |
+ project_conf['artifacts'] = {'url': share4.repo, 'push': True}
|
|
142 |
+ |
|
143 |
+ # Read project.conf, the junction project.conf and buildstream.conf
|
|
144 |
+ # before running bst
|
|
145 |
+ with open(os.path.join(project, 'project.conf'), 'r') as f:
|
|
146 |
+ print(f.read())
|
|
147 |
+ with open(os.path.join(junction, 'project.conf'), 'r') as f:
|
|
148 |
+ print(f.read())
|
|
149 |
+ with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
|
|
150 |
+ print(f.read())
|
|
151 |
+ |
|
152 |
+ result = cli.run(project=project, args=['build', 'junction/amhello.bst'], project_config=project_conf)
|
|
153 |
+ |
|
154 |
+ # Read project.conf, the junction project.conf and buildstream.conf
|
|
155 |
+ # after running bst
|
|
156 |
+ with open(os.path.join(project, 'project.conf'), 'r') as f:
|
|
157 |
+ print(f.read())
|
|
158 |
+ with open(os.path.join(junction, 'project.conf'), 'r') as f:
|
|
159 |
+ print(f.read())
|
|
160 |
+ with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
|
|
161 |
+ print(f.read())
|
|
162 |
+ |
|
163 |
+ assert 'junction/amhello.bst' in result.get_partial_pushed_elements()
|
|
164 |
+ assert 'base/base-alpine.bst' in result.get_pushed_elements()
|
... | ... | @@ -208,6 +208,13 @@ class Result(): |
208 | 208 |
|
209 | 209 |
return list(pushed)
|
210 | 210 |
|
211 |
+ def get_partial_pushed_elements(self):
|
|
212 |
+ pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed partial artifact', self.stderr)
|
|
213 |
+ if pushed is None:
|
|
214 |
+ return []
|
|
215 |
+ |
|
216 |
+ return list(pushed)
|
|
217 |
+ |
|
211 | 218 |
def get_pulled_elements(self):
|
212 | 219 |
pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled artifact', self.stderr)
|
213 | 220 |
if pulled is None:
|
... | ... | @@ -215,6 +222,13 @@ class Result(): |
215 | 222 |
|
216 | 223 |
return list(pulled)
|
217 | 224 |
|
225 |
+ def get_partial_pulled_elements(self):
|
|
226 |
+ pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled partial artifact', self.stderr)
|
|
227 |
+ if pulled is None:
|
|
228 |
+ return []
|
|
229 |
+ |
|
230 |
+ return list(pulled)
|
|
231 |
+ |
|
218 | 232 |
|
219 | 233 |
class Cli():
|
220 | 234 |
|
... | ... | @@ -235,11 +249,15 @@ class Cli(): |
235 | 249 |
#
|
236 | 250 |
# Args:
|
237 | 251 |
# config (dict): The user configuration to use
|
252 |
+ # reset (bool): Optional reset of stored config
|
|
238 | 253 |
#
|
239 |
- def configure(self, config):
|
|
254 |
+ def configure(self, config, reset=False):
|
|
240 | 255 |
if self.config is None:
|
241 | 256 |
self.config = {}
|
242 | 257 |
|
258 |
+ if reset:
|
|
259 |
+ self.config.clear()
|
|
260 |
+ |
|
243 | 261 |
for key, val in config.items():
|
244 | 262 |
self.config[key] = val
|
245 | 263 |
|