[Notes] [Git][BuildStream/buildstream][tpollard/566] WIP: Make uploading of build trees configurable



Title: GitLab

Tom Pollard pushed to branch tpollard/566 at BuildStream / buildstream

Commits:

8 changed files:

Changes:

  • buildstream/_artifactcache.py
    ... ... @@ -72,6 +72,7 @@ class ArtifactCache():
    72 72
     
    
    73 73
             self._has_fetch_remotes = False
    
    74 74
             self._has_push_remotes = False
    
    75
    +        self._has_partial_push_remotes = False
    
    75 76
     
    
    76 77
             os.makedirs(self.extractdir, exist_ok=True)
    
    77 78
     
    
    ... ... @@ -384,6 +385,9 @@ class ArtifactCache():
    384 385
                     self._has_fetch_remotes = True
    
    385 386
                     if remote_spec.push:
    
    386 387
                         self._has_push_remotes = True
    
    388
    +                    # Partial push requires generic push option to also be set
    
    389
    +                    if remote_spec.partial_push:
    
    390
    +                        self._has_partial_push_remotes = True
    
    387 391
     
    
    388 392
                     remotes[remote_spec.url] = CASRemote(remote_spec)
    
    389 393
     
    
    ... ... @@ -582,6 +586,32 @@ class ArtifactCache():
    582 586
                 remotes_for_project = self._remotes[element._get_project()]
    
    583 587
                 return any(remote.spec.push for remote in remotes_for_project)
    
    584 588
     
    
    589
    +    # has_partial_push_remotes():
    
    590
    +    #
    
    591
    +    # Check whether any remote repositories are available for pushing
    
    592
    +    # non-complete artifacts. This option requires the generic push value
    
    593
    +    # to also be set.
    
    594
    +    #
    
    595
    +    # Args:
    
    596
    +    #     element (Element): The Element to check
    
    597
    +    #
    
    598
    +    # Returns:
    
    599
    +    #   (bool): True if any remote repository is configured for optional
    
    600
    +    #            partial pushes, False otherwise
    
    601
    +    #
    
    602
    +    def has_partial_push_remotes(self, *, element=None):
    
    603
    +        # If there's no partial push remotes available, we can't partial push at all
    
    604
    +        if not self._has_partial_push_remotes:
    
    605
    +            return False
    
    606
    +        elif element is None:
    
    607
    +            # At least one remote is set to allow partial pushes
    
    608
    +            return True
    
    609
    +        else:
    
    610
    +            # Check whether the specified element's project has push remotes configured
    
    611
    +            # to not accept partial artifact pushes
    
    612
    +            remotes_for_project = self._remotes[element._get_project()]
    
    613
    +            return any(remote.spec.partial_push for remote in remotes_for_project)
    
    614
    +
    
    585 615
         # push():
    
    586 616
         #
    
    587 617
         # Push committed artifact to remote repository.
    
    ... ... @@ -589,6 +619,8 @@ class ArtifactCache():
    589 619
         # Args:
    
    590 620
         #     element (Element): The Element whose artifact is to be pushed
    
    591 621
         #     keys (list): The cache keys to use
    
    622
    +    #     partial(bool): If the artifact is cached in a partial state
    
    623
    +    #     subdir(string): Optional subdir to not push
    
    592 624
         #
    
    593 625
         # Returns:
    
    594 626
         #   (bool): True if any remote was updated, False if no pushes were required
    
    ... ... @@ -596,12 +628,25 @@ class ArtifactCache():
    596 628
         # Raises:
    
    597 629
         #   (ArtifactError): if there was an error
    
    598 630
         #
    
    599
    -    def push(self, element, keys):
    
    631
    +    def push(self, element, keys, partial=False, subdir=None):
    
    600 632
             refs = [self.get_artifact_fullname(element, key) for key in list(keys)]
    
    601 633
     
    
    602 634
             project = element._get_project()
    
    603 635
     
    
    604
    -        push_remotes = [r for r in self._remotes[project] if r.spec.push]
    
    636
    +        push_remotes = []
    
    637
    +        partial_remotes = []
    
    638
    +
    
    639
    +        # Create list of remotes to push to, given current element and partial push config
    
    640
    +        if not partial:
    
    641
    +            push_remotes = [r for r in self._remotes[project] if (r.spec.push and not r.spec.partial_push)]
    
    642
    +
    
    643
    +        if self._has_partial_push_remotes:
    
    644
    +            # Create a specific list of the remotes expecting the artifact to be push in a partial
    
    645
    +            # state. This list needs to be pushed in a partial state, without the optional subdir if
    
    646
    +            # exists locally. No need to attempt pushing a partial artifact to a remote that is queued to
    
    647
    +            # to also recieve a full artifact
    
    648
    +            partial_remotes = [r for r in self._remotes[project] if (r.spec.partial_push and r.spec.push) and
    
    649
    +                               r not in push_remotes]
    
    605 650
     
    
    606 651
             pushed = False
    
    607 652
     
    
    ... ... @@ -610,7 +655,9 @@ class ArtifactCache():
    610 655
                 display_key = element._get_brief_display_key()
    
    611 656
                 element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
    
    612 657
     
    
    613
    -            if self.cas.push(refs, remote):
    
    658
    +            # Passing the optional subdir allows for remote artifacts that are cached in a 'partial'
    
    659
    +            # state to be completed
    
    660
    +            if self.cas.push(refs, remote, subdir=subdir):
    
    614 661
                     element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
    
    615 662
                     pushed = True
    
    616 663
                 else:
    
    ... ... @@ -618,6 +665,19 @@ class ArtifactCache():
    618 665
                         remote.spec.url, element._get_brief_display_key()
    
    619 666
                     ))
    
    620 667
     
    
    668
    +        for remote in partial_remotes:
    
    669
    +            remote.init()
    
    670
    +            display_key = element._get_brief_display_key()
    
    671
    +            element.status("Pushing partial artifact {} -> {}".format(display_key, remote.spec.url))
    
    672
    +
    
    673
    +            if self.cas.push(refs, remote, excluded_subdirs=subdir):
    
    674
    +                element.info("Pushed partial artifact {} -> {}".format(display_key, remote.spec.url))
    
    675
    +                pushed = True
    
    676
    +            else:
    
    677
    +                element.info("Remote ({}) already has {} partial cached".format(
    
    678
    +                    remote.spec.url, element._get_brief_display_key()
    
    679
    +                ))
    
    680
    +
    
    621 681
             return pushed
    
    622 682
     
    
    623 683
         # pull():
    
    ... ... @@ -645,14 +705,23 @@ class ArtifactCache():
    645 705
                     element.status("Pulling artifact {} <- {}".format(display_key, remote.spec.url))
    
    646 706
     
    
    647 707
                     if self.cas.pull(ref, remote, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
    
    648
    -                    element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
    
    649 708
                         if subdir:
    
    650
    -                        # Attempt to extract subdir into artifact extract dir if it already exists
    
    651
    -                        # without containing the subdir. If the respective artifact extract dir does not
    
    652
    -                        # exist a complete extraction will complete.
    
    653
    -                        self.extract(element, key, subdir)
    
    654
    -                    # no need to pull from additional remotes
    
    655
    -                    return True
    
    709
    +                        if not self.contains_subdir_artifact(element, key, subdir):
    
    710
    +                            # The pull was expecting the specific subdir to be present in the remote, attempt
    
    711
    +                            # to find it in other available remotes
    
    712
    +                            element.info("Pulled partial artifact {} <- {}. Attempting to retrieve {} from remotes"
    
    713
    +                                         .format(display_key, remote.spec.url, subdir))
    
    714
    +                        else:
    
    715
    +                            element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
    
    716
    +                            # Attempt to extract subdir into artifact extract dir if it already exists
    
    717
    +                            # without containing the subdir. If the respective artifact extract dir does not
    
    718
    +                            # exist a complete extraction will complete.
    
    719
    +                            self.extract(element, key, subdir)
    
    720
    +                            # no need to pull from additional remotes
    
    721
    +                            return True
    
    722
    +                    else:
    
    723
    +                        element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
    
    724
    +                        return True
    
    656 725
                     else:
    
    657 726
                         element.info("Remote ({}) does not have {} cached".format(
    
    658 727
                             remote.spec.url, element._get_brief_display_key()
    

  • buildstream/_cas/cascache.py
    ... ... @@ -198,34 +198,47 @@ class CASCache():
    198 198
         #   (bool): True if pull was successful, False if ref was not available
    
    199 199
         #
    
    200 200
         def pull(self, ref, remote, *, progress=None, subdir=None, excluded_subdirs=None):
    
    201
    -        try:
    
    202
    -            remote.init()
    
    203 201
     
    
    204
    -            request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
    
    205
    -            request.key = ref
    
    206
    -            response = remote.ref_storage.GetReference(request)
    
    202
    +        tree_found = False
    
    207 203
     
    
    208
    -            tree = remote_execution_pb2.Digest()
    
    209
    -            tree.hash = response.digest.hash
    
    210
    -            tree.size_bytes = response.digest.size_bytes
    
    204
    +        while True:
    
    205
    +            try:
    
    206
    +                if not tree_found:
    
    207
    +                    remote.init()
    
    211 208
     
    
    212
    -            # Check if the element artifact is present, if so just fetch the subdir.
    
    213
    -            if subdir and os.path.exists(self.objpath(tree)):
    
    214
    -                self._fetch_subdir(remote, tree, subdir)
    
    215
    -            else:
    
    216
    -                # Fetch artifact, excluded_subdirs determined in pullqueue
    
    217
    -                self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
    
    209
    +                    request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
    
    210
    +                    request.key = ref
    
    211
    +                    response = remote.ref_storage.GetReference(request)
    
    218 212
     
    
    219
    -            self.set_ref(ref, tree)
    
    213
    +                    tree = remote_execution_pb2.Digest()
    
    214
    +                    tree.hash = response.digest.hash
    
    215
    +                    tree.size_bytes = response.digest.size_bytes
    
    220 216
     
    
    221
    -            return True
    
    222
    -        except grpc.RpcError as e:
    
    223
    -            if e.code() != grpc.StatusCode.NOT_FOUND:
    
    224
    -                raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
    
    225
    -            else:
    
    226
    -                return False
    
    227
    -        except BlobNotFound as e:
    
    228
    -            return False
    
    217
    +                # Check if the element artifact is present, if so just fetch the subdir.
    
    218
    +                if subdir and os.path.exists(self.objpath(tree)):
    
    219
    +                    self._fetch_subdir(remote, tree, subdir)
    
    220
    +                else:
    
    221
    +                    # Fetch artifact, excluded_subdirs determined in pullqueue
    
    222
    +                    self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
    
    223
    +
    
    224
    +                self.set_ref(ref, tree)
    
    225
    +
    
    226
    +                return True
    
    227
    +            except grpc.RpcError as e:
    
    228
    +                if e.code() != grpc.StatusCode.NOT_FOUND:
    
    229
    +                    raise CASError("Failed to pull ref {}: {}".format(ref, e)) from e
    
    230
    +                else:
    
    231
    +                    return False
    
    232
    +            except BlobNotFound as e:
    
    233
    +                if not excluded_subdirs and subdir:
    
    234
    +                    # The remote has the top level digest but could not complete a full pull,
    
    235
    +                    # attempt partial without the need to initialise and check for the artifact
    
    236
    +                    # digest. This default behaviour of dropping back to partial pulls could
    
    237
    +                    # be made a configurable warning given at artfictcache level.
    
    238
    +                    tree_found = True
    
    239
    +                    excluded_subdirs, subdir = subdir, excluded_subdirs
    
    240
    +                else:
    
    241
    +                    return False
    
    229 242
     
    
    230 243
         # pull_tree():
    
    231 244
         #
    
    ... ... @@ -270,6 +283,8 @@ class CASCache():
    270 283
         # Args:
    
    271 284
         #     refs (list): The refs to push
    
    272 285
         #     remote (CASRemote): The remote to push to
    
    286
    +    #     subdir (string): Optional specific subdir to include in the push
    
    287
    +    #     excluded_subdirs (list): The optional list of subdirs to not push
    
    273 288
         #
    
    274 289
         # Returns:
    
    275 290
         #   (bool): True if any remote was updated, False if no pushes were required
    
    ... ... @@ -277,7 +292,7 @@ class CASCache():
    277 292
         # Raises:
    
    278 293
         #   (CASCacheError): if there was an error
    
    279 294
         #
    
    280
    -    def push(self, refs, remote):
    
    295
    +    def push(self, refs, remote, *, subdir=None, excluded_subdirs=None):
    
    281 296
             skipped_remote = True
    
    282 297
             try:
    
    283 298
                 for ref in refs:
    
    ... ... @@ -291,15 +306,18 @@ class CASCache():
    291 306
                         response = remote.ref_storage.GetReference(request)
    
    292 307
     
    
    293 308
                         if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
    
    294
    -                        # ref is already on the server with the same tree
    
    295
    -                        continue
    
    309
    +                        # ref is already on the server with the same tree, however it might be partially cached.
    
    310
    +                        # If artifact is not set to be pushed partially attempt to 'complete' the remote artifact if
    
    311
    +                        # needed, else continue.
    
    312
    +                        if excluded_subdirs or remote.verify_digest_on_remote(self._get_subdir(tree, subdir)):
    
    313
    +                            continue
    
    296 314
     
    
    297 315
                     except grpc.RpcError as e:
    
    298 316
                         if e.code() != grpc.StatusCode.NOT_FOUND:
    
    299 317
                             # Intentionally re-raise RpcError for outer except block.
    
    300 318
                             raise
    
    301 319
     
    
    302
    -                self._send_directory(remote, tree)
    
    320
    +                self._send_directory(remote, tree, excluded_dir=excluded_subdirs)
    
    303 321
     
    
    304 322
                     request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
    
    305 323
                     request.keys.append(ref)
    
    ... ... @@ -782,10 +800,17 @@ class CASCache():
    782 800
                     a += 1
    
    783 801
                     b += 1
    
    784 802
     
    
    785
    -    def _reachable_refs_dir(self, reachable, tree, update_mtime=False):
    
    803
    +    def _reachable_refs_dir(self, reachable, tree, update_mtime=False, subdir=False):
    
    786 804
             if tree.hash in reachable:
    
    787 805
                 return
    
    788 806
     
    
    807
    +        # If looping through subdir digests, skip processing if
    
    808
    +        # ref path does not exist, allowing for partial objects
    
    809
    +        if subdir and not os.path.exists(self.objpath(tree)):
    
    810
    +            return
    
    811
    +
    
    812
    +        # Raises FileNotFound exception is path does not exist,
    
    813
    +        # which should only be thrown on the top level digest
    
    789 814
             if update_mtime:
    
    790 815
                 os.utime(self.objpath(tree))
    
    791 816
     
    
    ... ... @@ -802,9 +827,9 @@ class CASCache():
    802 827
                 reachable.add(filenode.digest.hash)
    
    803 828
     
    
    804 829
             for dirnode in directory.directories:
    
    805
    -            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime)
    
    830
    +            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, subdir=True)
    
    806 831
     
    
    807
    -    def _required_blobs(self, directory_digest):
    
    832
    +    def _required_blobs(self, directory_digest, excluded_dir=None):
    
    808 833
             # parse directory, and recursively add blobs
    
    809 834
             d = remote_execution_pb2.Digest()
    
    810 835
             d.hash = directory_digest.hash
    
    ... ... @@ -823,7 +848,8 @@ class CASCache():
    823 848
                 yield d
    
    824 849
     
    
    825 850
             for dirnode in directory.directories:
    
    826
    -            yield from self._required_blobs(dirnode.digest)
    
    851
    +            if dirnode.name != excluded_dir:
    
    852
    +                yield from self._required_blobs(dirnode.digest)
    
    827 853
     
    
    828 854
         # _ensure_blob():
    
    829 855
         #
    
    ... ... @@ -928,6 +954,7 @@ class CASCache():
    928 954
                 objpath = self._ensure_blob(remote, dir_digest)
    
    929 955
     
    
    930 956
                 directory = remote_execution_pb2.Directory()
    
    957
    +
    
    931 958
                 with open(objpath, 'rb') as f:
    
    932 959
                     directory.ParseFromString(f.read())
    
    933 960
     
    
    ... ... @@ -970,8 +997,8 @@ class CASCache():
    970 997
     
    
    971 998
             return dirdigest
    
    972 999
     
    
    973
    -    def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
    
    974
    -        required_blobs = self._required_blobs(digest)
    
    1000
    +    def _send_directory(self, remote, digest, u_uid=uuid.uuid4(), excluded_dir=None):
    
    1001
    +        required_blobs = self._required_blobs(digest, excluded_dir=excluded_dir)
    
    975 1002
     
    
    976 1003
             missing_blobs = dict()
    
    977 1004
             # Limit size of FindMissingBlobs request
    

  • buildstream/_cas/casremote.py
    ... ... @@ -23,7 +23,8 @@ from .. import utils
    23 23
     _MAX_PAYLOAD_BYTES = 1024 * 1024
    
    24 24
     
    
    25 25
     
    
    26
    -class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key client_cert instance_name')):
    
    26
    +class CASRemoteSpec(namedtuple('CASRemoteSpec',
    
    27
    +                               'url push partial_push server_cert client_key client_cert instance_name')):
    
    27 28
     
    
    28 29
         # _new_from_config_node
    
    29 30
         #
    
    ... ... @@ -31,9 +32,18 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
    31 32
         #
    
    32 33
         @staticmethod
    
    33 34
         def _new_from_config_node(spec_node, basedir=None):
    
    34
    -        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert', 'instance_name'])
    
    35
    +        _yaml.node_validate(spec_node, ['url', 'push', 'allow-partial-push', 'server-cert', 'client-key',
    
    36
    +                                        'client-cert', 'instance_name'])
    
    35 37
             url = _yaml.node_get(spec_node, str, 'url')
    
    36 38
             push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
    
    39
    +        partial_push = _yaml.node_get(spec_node, bool, 'allow-partial-push', default_value=False)
    
    40
    +
    
    41
    +        # partial_push depends on push, raise error if not configured correctly
    
    42
    +        if partial_push and not push:
    
    43
    +            provenance = _yaml.node_get_provenance(spec_node, 'allow-partial-push')
    
    44
    +            raise LoadError(LoadErrorReason.INVALID_DATA,
    
    45
    +                            "{}: allow-partial-push also requires push to be set".format(provenance))
    
    46
    +
    
    37 47
             if not url:
    
    38 48
                 provenance = _yaml.node_get_provenance(spec_node, 'url')
    
    39 49
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    ... ... @@ -63,10 +73,10 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
    63 73
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    64 74
                                 "{}: 'client-cert' was specified without 'client-key'".format(provenance))
    
    65 75
     
    
    66
    -        return CASRemoteSpec(url, push, server_cert, client_key, client_cert, instance_name)
    
    76
    +        return CASRemoteSpec(url, push, partial_push, server_cert, client_key, client_cert, instance_name)
    
    67 77
     
    
    68 78
     
    
    69
    -CASRemoteSpec.__new__.__defaults__ = (None, None, None, None)
    
    79
    +CASRemoteSpec.__new__.__defaults__ = (False, None, None, None, None)
    
    70 80
     
    
    71 81
     
    
    72 82
     class BlobNotFound(CASRemoteError):
    

  • buildstream/element.py
    ... ... @@ -1797,13 +1797,19 @@ class Element(Plugin):
    1797 1797
         #   (bool): True if this element does not need a push job to be created
    
    1798 1798
         #
    
    1799 1799
         def _skip_push(self):
    
    1800
    +
    
    1800 1801
             if not self.__artifacts.has_push_remotes(element=self):
    
    1801 1802
                 # No push remotes for this element's project
    
    1802 1803
                 return True
    
    1803 1804
     
    
    1804 1805
             # Do not push elements that aren't cached, or that are cached with a dangling buildtree
    
    1805
    -        # artifact unless element type is expected to have an an empty buildtree directory
    
    1806
    -        if not self._cached_buildtree():
    
    1806
    +        # artifact unless element type is expected to have an an empty buildtree directory. Check
    
    1807
    +        # that this default behaviour is not overriden via a remote configured to allow pushing
    
    1808
    +        # artifacts without their corresponding buildtree.
    
    1809
    +        if not self._cached():
    
    1810
    +            return True
    
    1811
    +
    
    1812
    +        if not self._cached_buildtree() and not self.__artifacts.has_partial_push_remotes(element=self):
    
    1807 1813
                 return True
    
    1808 1814
     
    
    1809 1815
             # Do not push tainted artifact
    
    ... ... @@ -1814,11 +1820,14 @@ class Element(Plugin):
    1814 1820
     
    
    1815 1821
         # _push():
    
    1816 1822
         #
    
    1817
    -    # Push locally cached artifact to remote artifact repository.
    
    1823
    +    # Push locally cached artifact to remote artifact repository. An attempt
    
    1824
    +    # will be made to push partial artifacts if given current config dictates.
    
    1825
    +    # If a remote set for 'full' artifact pushes is found to be cached partially
    
    1826
    +    # in the remote, an attempt will be made to 'complete' it.
    
    1818 1827
         #
    
    1819 1828
         # Returns:
    
    1820 1829
         #   (bool): True if the remote was updated, False if it already existed
    
    1821
    -    #           and no updated was required
    
    1830
    +    #           and no update was required
    
    1822 1831
         #
    
    1823 1832
         def _push(self):
    
    1824 1833
             self.__assert_cached()
    
    ... ... @@ -1827,8 +1836,17 @@ class Element(Plugin):
    1827 1836
                 self.warn("Not pushing tainted artifact.")
    
    1828 1837
                 return False
    
    1829 1838
     
    
    1830
    -        # Push all keys used for local commit
    
    1831
    -        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit())
    
    1839
    +        # Push all keys used for local commit, this could be full or partial,
    
    1840
    +        # given previous _skip_push() logic. If buildtree isn't cached, then
    
    1841
    +        # set partial push
    
    1842
    +
    
    1843
    +        partial = False
    
    1844
    +        subdir = 'buildtree'
    
    1845
    +        if not self._cached_buildtree():
    
    1846
    +            partial = True
    
    1847
    +
    
    1848
    +        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit(), partial=partial, subdir=subdir)
    
    1849
    +
    
    1832 1850
             if not pushed:
    
    1833 1851
                 return False
    
    1834 1852
     
    

  • doc/source/using_config.rst
    ... ... @@ -59,6 +59,15 @@ configuration:
    59 59
          # Add another cache to pull from
    
    60 60
          - url: https://anothercache.com/artifacts:8080
    
    61 61
            server-cert: another_server.crt
    
    62
    +     # Add a cache to push/pull to/from, specifying
    
    63
    +       that you wish to push artifacts in a 'partial'
    
    64
    +       state (this being without the respective buildtree).
    
    65
    +       Note that allow-partial-push requires push to also
    
    66
    +       be set.
    
    67
    +     - url: https://anothercache.com/artifacts:11003
    
    68
    +       push: true
    
    69
    +       allow-partial-push: true
    
    70
    +
    
    62 71
     
    
    63 72
     .. note::
    
    64 73
     
    
    ... ... @@ -86,6 +95,14 @@ configuration:
    86 95
              # Add another cache to pull from
    
    87 96
              - url: https://ourprojectcache.com/artifacts:8080
    
    88 97
                server-cert: project_server.crt
    
    98
    +         # Add a cache to push/pull to/from, specifying
    
    99
    +           that you wish to push artifacts in a 'partial'
    
    100
    +           state (this being without the respective buildtree).
    
    101
    +           Note that allow-partial-push requires push to also
    
    102
    +           be set.
    
    103
    +         - url: https://anothercache.com/artifacts:11003
    
    104
    +           push: true
    
    105
    +           allow-partial-push: true
    
    89 106
     
    
    90 107
     
    
    91 108
     .. note::
    

  • tests/artifactcache/config.py
    ... ... @@ -139,3 +139,28 @@ def test_missing_certs(cli, datafiles, config_key, config_value):
    139 139
         # This does not happen for a simple `bst show`.
    
    140 140
         result = cli.run(project=project, args=['pull', 'element.bst'])
    
    141 141
         result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
    
    142
    +
    
    143
    +
    
    144
    +# Assert that if allow-partial-push is specified as true without push also being
    
    145
    +# set likewise, we get a comprehensive LoadError instead of an unhandled exception.
    
    146
    +@pytest.mark.datafiles(DATA_DIR)
    
    147
    +def test_partial_push_error(cli, datafiles):
    
    148
    +    project = os.path.join(datafiles.dirname, datafiles.basename, 'project', 'elements')
    
    149
    +
    
    150
    +    project_conf = {
    
    151
    +        'name': 'test',
    
    152
    +
    
    153
    +        'artifacts': {
    
    154
    +            'url': 'https://cache.example.com:12345',
    
    155
    +            'allow-partial-push': 'True'
    
    156
    +        }
    
    157
    +    }
    
    158
    +    project_conf_file = os.path.join(project, 'project.conf')
    
    159
    +    _yaml.dump(project_conf, project_conf_file)
    
    160
    +
    
    161
    +    # Use `pull` here to ensure we try to initialize the remotes, triggering the error
    
    162
    +    #
    
    163
    +    # This does not happen for a simple `bst show`.
    
    164
    +    result = cli.run(project=project, args=['pull', 'target.bst'])
    
    165
    +    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
    
    166
    +    assert "allow-partial-push also requires push to be set" in result.stderr

  • tests/integration/pushbuildtrees.py
    1
    +import os
    
    2
    +import shutil
    
    3
    +import pytest
    
    4
    +import subprocess
    
    5
    +
    
    6
    +from buildstream import _yaml
    
    7
    +from tests.testutils import cli_integration as cli, create_artifact_share
    
    8
    +from tests.testutils.integration import assert_contains
    
    9
    +from tests.testutils.site import HAVE_BWRAP, IS_LINUX
    
    10
    +from buildstream._exceptions import ErrorDomain, LoadErrorReason
    
    11
    +
    
    12
    +
    
    13
    +DATA_DIR = os.path.join(
    
    14
    +    os.path.dirname(os.path.realpath(__file__)),
    
    15
    +    "project"
    
    16
    +)
    
    17
    +
    
    18
    +
    
    19
    +# Remove artifact cache & set cli.config value of pull-buildtrees
    
    20
    +# to false, which is the default user context. The cache has to be
    
    21
    +# cleared as just forcefully removing the refpath leaves dangling objects.
    
    22
    +def default_state(cli, tmpdir, share):
    
    23
    +    shutil.rmtree(os.path.join(str(tmpdir), 'artifacts'))
    
    24
    +    cli.configure({
    
    25
    +        'artifacts': {'url': share.repo, 'push': False},
    
    26
    +        'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
    
    27
    +        'cache': {'pull-buildtrees': False},
    
    28
    +    })
    
    29
    +
    
    30
    +
    
    31
    +# Tests to capture the integration of the optionl push of buildtrees.
    
    32
    +# The behaviour should encompass pushing artifacts that are already cached
    
    33
    +# without a buildtree as well as artifacts that are cached with their buildtree.
    
    34
    +# This option is handled via 'allow-partial-push' on a per artifact remote config
    
    35
    +# node basis. Multiple remote config nodes can point to the same url and as such can
    
    36
    +# have different 'allow-partial-push' options, tests need to cover this using project
    
    37
    +# confs.
    
    38
    +@pytest.mark.integration
    
    39
    +@pytest.mark.datafiles(DATA_DIR)
    
    40
    +@pytest.mark.skipif(IS_LINUX and not HAVE_BWRAP, reason='Only available with bubblewrap on Linux')
    
    41
    +def test_pushbuildtrees(cli, tmpdir, datafiles, integration_cache):
    
    42
    +    project = os.path.join(datafiles.dirname, datafiles.basename)
    
    43
    +    element_name = 'autotools/amhello.bst'
    
    44
    +
    
    45
    +    # Create artifact shares for pull & push testing
    
    46
    +    with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
    
    47
    +        create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2,\
    
    48
    +        create_artifact_share(os.path.join(str(tmpdir), 'share3')) as share3,\
    
    49
    +        create_artifact_share(os.path.join(str(tmpdir), 'share4')) as share4:
    
    50
    +
    
    51
    +        cli.configure({
    
    52
    +            'artifacts': {'url': share1.repo, 'push': True},
    
    53
    +            'artifactdir': os.path.join(str(tmpdir), 'artifacts')
    
    54
    +        })
    
    55
    +
    
    56
    +        cli.configure({'artifacts': [{'url': share1.repo, 'push': True},
    
    57
    +                                     {'url': share2.repo, 'push': True, 'allow-partial-push': True}]})
    
    58
    +
    
    59
    +        # Build autotools element, checked pushed, delete local.
    
    60
    +        # As share 2 has push & allow-partial-push set a true, it
    
    61
    +        # should have pushed the artifacts, without the cached buildtrees,
    
    62
    +        # to it.
    
    63
    +        result = cli.run(project=project, args=['build', element_name])
    
    64
    +        assert result.exit_code == 0
    
    65
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    66
    +        elementdigest = share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    67
    +        buildtreedir = os.path.join(str(tmpdir), 'artifacts', 'extract', 'test', 'autotools-amhello',
    
    68
    +                                    elementdigest.hash, 'buildtree')
    
    69
    +        assert os.path.isdir(buildtreedir)
    
    70
    +        assert element_name in result.get_partial_pushed_elements()
    
    71
    +        assert element_name in result.get_pushed_elements()
    
    72
    +        assert share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    73
    +        assert share2.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    74
    +        default_state(cli, tmpdir, share1)
    
    75
    +
    
    76
    +        # Check that after explictly pulling an artifact without it's buildtree,
    
    77
    +        # we can push it to another remote that is configured to accept the partial
    
    78
    +        # artifact
    
    79
    +        result = cli.run(project=project, args=['pull', element_name])
    
    80
    +        assert element_name in result.get_pulled_elements()
    
    81
    +        cli.configure({'artifacts': {'url': share3.repo, 'push': True, 'allow-partial-push': True}})
    
    82
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    83
    +        assert not os.path.isdir(buildtreedir)
    
    84
    +        result = cli.run(project=project, args=['push', element_name])
    
    85
    +        assert result.exit_code == 0
    
    86
    +        assert element_name in result.get_partial_pushed_elements()
    
    87
    +        assert element_name not in result.get_pushed_elements()
    
    88
    +        assert share3.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    89
    +        default_state(cli, tmpdir, share3)
    
    90
    +
    
    91
    +        # Delete the local cache and pull the partial artifact from share 3,
    
    92
    +        # this should not include the buildtree when extracted locally, even when
    
    93
    +        # pull-buildtrees is given as a cli parameter as no available remotes will
    
    94
    +        # contain the buildtree
    
    95
    +        assert not os.path.isdir(buildtreedir)
    
    96
    +        assert cli.get_element_state(project, element_name) != 'cached'
    
    97
    +        result = cli.run(project=project, args=['--pull-buildtrees', 'pull', element_name])
    
    98
    +        assert element_name in result.get_partial_pulled_elements()
    
    99
    +        assert not os.path.isdir(buildtreedir)
    
    100
    +        default_state(cli, tmpdir, share3)
    
    101
    +
    
    102
    +        # Delete the local cache and attempt to pull a 'full' artifact, including its
    
    103
    +        # buildtree. As with before share3 being the first listed remote will not have
    
    104
    +        # the buildtree available and should spawn a partial pull. Having share1 as the
    
    105
    +        # second available remote should allow the buildtree to be pulled thus 'completing'
    
    106
    +        # the artifact
    
    107
    +        cli.configure({'artifacts': [{'url': share3.repo, 'push': True, 'allow-partial-push': True},
    
    108
    +                                     {'url': share1.repo, 'push': True}]})
    
    109
    +        assert cli.get_element_state(project, element_name) != 'cached'
    
    110
    +        result = cli.run(project=project, args=['--pull-buildtrees', 'pull', element_name])
    
    111
    +        assert element_name in result.get_partial_pulled_elements()
    
    112
    +        assert element_name in result.get_pulled_elements()
    
    113
    +        assert "Attempting to retrieve buildtree from remotes" in result.stderr
    
    114
    +        assert os.path.isdir(buildtreedir)
    
    115
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    116
    +
    
    117
    +        # Test that we are able to 'complete' an artifact on a server which is cached partially,
    
    118
    +        # but has now been configured for full artifact pushing. This should require only pushing
    
    119
    +        # the missing blobs, which should be those of just the buildtree. In this case changing
    
    120
    +        # share3 to full pushes should exercise this
    
    121
    +        cli.configure({'artifacts': {'url': share3.repo, 'push': True}})
    
    122
    +        result = cli.run(project=project, args=['push', element_name])
    
    123
    +        assert element_name in result.get_pushed_elements()
    
    124
    +
    
    125
    +        # Ensure that the same remote url can be defined multiple times with differing push
    
    126
    +        # config. Buildstream supports the same remote having different configurations which
    
    127
    +        # partial pushing could be different for elements defined at a top level project.conf to
    
    128
    +        # those from a junctioned project. Assert that elements are pushed to the same remote in
    
    129
    +        # a state defined via their respective project.confs
    
    130
    +        default_state(cli, tmpdir, share1)
    
    131
    +        cli.configure({'artifactdir': os.path.join(str(tmpdir), 'artifacts')}, reset=True)
    
    132
    +        junction = os.path.join(project, 'elements', 'junction')
    
    133
    +        os.mkdir(junction)
    
    134
    +        shutil.copy2(os.path.join(project, 'elements', element_name), junction)
    
    135
    +
    
    136
    +        junction_conf = {}
    
    137
    +        project_conf = {}
    
    138
    +        junction_conf['name'] = 'amhello'
    
    139
    +        junction_conf['artifacts'] = {'url': share4.repo, 'push': True, 'allow-partial-push': True}
    
    140
    +        _yaml.dump(junction_conf, os.path.join(junction, 'project.conf'))
    
    141
    +        project_conf['artifacts'] = {'url': share4.repo, 'push': True}
    
    142
    +
    
    143
    +        # Read project.conf, the junction project.conf and buildstream.conf
    
    144
    +        # before running bst
    
    145
    +        with open(os.path.join(project, 'project.conf'), 'r') as f:
    
    146
    +            print(f.read())
    
    147
    +        with open(os.path.join(junction, 'project.conf'), 'r') as f:
    
    148
    +            print(f.read())
    
    149
    +        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
    
    150
    +            print(f.read())
    
    151
    +
    
    152
    +        result = cli.run(project=project, args=['build', 'junction/amhello.bst'], project_config=project_conf)
    
    153
    +
    
    154
    +        # Read project.conf, the junction project.conf and buildstream.conf
    
    155
    +        # after running bst
    
    156
    +        with open(os.path.join(project, 'project.conf'), 'r') as f:
    
    157
    +            print(f.read())
    
    158
    +        with open(os.path.join(junction, 'project.conf'), 'r') as f:
    
    159
    +            print(f.read())
    
    160
    +        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
    
    161
    +            print(f.read())
    
    162
    +
    
    163
    +        assert 'junction/amhello.bst' in result.get_partial_pushed_elements()
    
    164
    +        assert 'base/base-alpine.bst' in result.get_pushed_elements()

  • tests/testutils/runcli.py
    ... ... @@ -208,6 +208,13 @@ class Result():
    208 208
     
    
    209 209
             return list(pushed)
    
    210 210
     
    
    211
    +    def get_partial_pushed_elements(self):
    
    212
    +        pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed partial artifact', self.stderr)
    
    213
    +        if pushed is None:
    
    214
    +            return []
    
    215
    +
    
    216
    +        return list(pushed)
    
    217
    +
    
    211 218
         def get_pulled_elements(self):
    
    212 219
             pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled artifact', self.stderr)
    
    213 220
             if pulled is None:
    
    ... ... @@ -215,6 +222,13 @@ class Result():
    215 222
     
    
    216 223
             return list(pulled)
    
    217 224
     
    
    225
    +    def get_partial_pulled_elements(self):
    
    226
    +        pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled partial artifact', self.stderr)
    
    227
    +        if pulled is None:
    
    228
    +            return []
    
    229
    +
    
    230
    +        return list(pulled)
    
    231
    +
    
    218 232
     
    
    219 233
     class Cli():
    
    220 234
     
    
    ... ... @@ -235,11 +249,15 @@ class Cli():
    235 249
         #
    
    236 250
         # Args:
    
    237 251
         #    config (dict): The user configuration to use
    
    252
    +    #    reset (bool): Optional reset of stored config
    
    238 253
         #
    
    239
    -    def configure(self, config):
    
    254
    +    def configure(self, config, reset=False):
    
    240 255
             if self.config is None:
    
    241 256
                 self.config = {}
    
    242 257
     
    
    258
    +        if reset:
    
    259
    +            self.config.clear()
    
    260
    +
    
    243 261
             for key, val in config.items():
    
    244 262
                 self.config[key] = val
    
    245 263
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]