[Notes] [Git][BuildStream/buildstream][tpollard/566] WIP: Make uploading of build trees configurable



Title: GitLab

Tom Pollard pushed to branch tpollard/566 at BuildStream / buildstream

Commits:

5 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -74,6 +74,7 @@ class ArtifactCache():
    74 74
     
    
    75 75
             self._has_fetch_remotes = False
    
    76 76
             self._has_push_remotes = False
    
    77
    +        self._has_partial_push_remotes = False
    
    77 78
     
    
    78 79
             os.makedirs(self.extractdir, exist_ok=True)
    
    79 80
     
    
    ... ... @@ -398,6 +399,8 @@ class ArtifactCache():
    398 399
                     self._has_fetch_remotes = True
    
    399 400
                     if remote_spec.push:
    
    400 401
                         self._has_push_remotes = True
    
    402
    +                    if remote_spec.partial_push:
    
    403
    +                        self._has_partial_push_remotes = True
    
    401 404
     
    
    402 405
                     remotes[remote_spec.url] = CASRemote(remote_spec)
    
    403 406
     
    
    ... ... @@ -596,6 +599,31 @@ class ArtifactCache():
    596 599
                 remotes_for_project = self._remotes[element._get_project()]
    
    597 600
                 return any(remote.spec.push for remote in remotes_for_project)
    
    598 601
     
    
    602
    +    # has_partial_push_remotes():
    
    603
    +    #
    
    604
    +    # Check whether any remote repositories are available for pushing
    
    605
    +    # non-complete artifacts
    
    606
    +    #
    
    607
    +    # Args:
    
    608
    +    #     element (Element): The Element to check
    
    609
    +    #
    
    610
    +    # Returns:
    
    611
    +    #   (bool): True if any remote repository is configured for optional
    
    612
    +    #            partial pushes, False otherwise
    
    613
    +    #
    
    614
    +    def has_partial_push_remotes(self, *, element=None):
    
    615
    +        # If there's no partial push remotes available, we can't partial push at all
    
    616
    +        if not self._has_partial_push_remotes:
    
    617
    +            return False
    
    618
    +        elif element is None:
    
    619
    +            # At least one remote is set to allow partial pushes
    
    620
    +            return True
    
    621
    +        else:
    
    622
    +            # Check whether the specified element's project has push remotes configured
    
    623
    +            # to not accept partial artifact pushes
    
    624
    +            remotes_for_project = self._remotes[element._get_project()]
    
    625
    +            return any(remote.spec.partial_push for remote in remotes_for_project)
    
    626
    +
    
    599 627
         # push():
    
    600 628
         #
    
    601 629
         # Push committed artifact to remote repository.
    
    ... ... @@ -603,6 +631,8 @@ class ArtifactCache():
    603 631
         # Args:
    
    604 632
         #     element (Element): The Element whose artifact is to be pushed
    
    605 633
         #     keys (list): The cache keys to use
    
    634
    +    #     partial(bool): If the artifact is cached in a partial state
    
    635
    +    #     subdir(string): Optional subdir to not push
    
    606 636
         #
    
    607 637
         # Returns:
    
    608 638
         #   (bool): True if any remote was updated, False if no pushes were required
    
    ... ... @@ -610,12 +640,23 @@ class ArtifactCache():
    610 640
         # Raises:
    
    611 641
         #   (ArtifactError): if there was an error
    
    612 642
         #
    
    613
    -    def push(self, element, keys):
    
    643
    +    def push(self, element, keys, partial=False, subdir=None):
    
    614 644
             refs = [self.get_artifact_fullname(element, key) for key in list(keys)]
    
    615 645
     
    
    616 646
             project = element._get_project()
    
    617 647
     
    
    618
    -        push_remotes = [r for r in self._remotes[project] if r.spec.push]
    
    648
    +        push_remotes = []
    
    649
    +        partial_remotes = []
    
    650
    +
    
    651
    +        # Create list of remotes to push to, given current element and partial push config
    
    652
    +        if not partial:
    
    653
    +            push_remotes = [r for r in self._remotes[project] if (r.spec.push and not r.spec.partial_push)]
    
    654
    +
    
    655
    +        if self._has_partial_push_remotes:
    
    656
    +            # Create a specific list of the remotes expecting the artifact to be push in a partial
    
    657
    +            # state. This list needs to be pushed in a partial state, without the optional subdir if
    
    658
    +            # exists locally
    
    659
    +            partial_remotes = [r for r in self._remotes[project] if (r.spec.partial_push and r.spec.push)]
    
    619 660
     
    
    620 661
             pushed = False
    
    621 662
     
    
    ... ... @@ -632,6 +673,19 @@ class ArtifactCache():
    632 673
                         remote.spec.url, element._get_brief_display_key()
    
    633 674
                     ))
    
    634 675
     
    
    676
    +        for remote in partial_remotes:
    
    677
    +            remote.init()
    
    678
    +            display_key = element._get_brief_display_key()
    
    679
    +            element.status("Pushing partial artifact {} -> {}".format(display_key, remote.spec.url))
    
    680
    +
    
    681
    +            if self.cas.push(refs, remote, subdir=subdir):
    
    682
    +                element.info("Pushed partial artifact {} -> {}".format(display_key, remote.spec.url))
    
    683
    +                pushed = True
    
    684
    +            else:
    
    685
    +                element.info("Remote ({}) already has {} partial cached".format(
    
    686
    +                    remote.spec.url, element._get_brief_display_key()
    
    687
    +                ))
    
    688
    +
    
    635 689
             return pushed
    
    636 690
     
    
    637 691
         # pull():
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -45,7 +45,7 @@ from .. import _yaml
    45 45
     _MAX_PAYLOAD_BYTES = 1024 * 1024
    
    46 46
     
    
    47 47
     
    
    48
    -class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key client_cert')):
    
    48
    +class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push partial_push server_cert client_key client_cert')):
    
    49 49
     
    
    50 50
         # _new_from_config_node
    
    51 51
         #
    
    ... ... @@ -53,9 +53,12 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
    53 53
         #
    
    54 54
         @staticmethod
    
    55 55
         def _new_from_config_node(spec_node, basedir=None):
    
    56
    -        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert'])
    
    56
    +        _yaml.node_validate(spec_node,
    
    57
    +                            ['url', 'push', 'allow-partial-push', 'server-cert', 'client-key', 'client-cert'])
    
    57 58
             url = _yaml.node_get(spec_node, str, 'url')
    
    58 59
             push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
    
    60
    +        partial_push = _yaml.node_get(spec_node, bool, 'allow-partial-push', default_value=False)
    
    61
    +
    
    59 62
             if not url:
    
    60 63
                 provenance = _yaml.node_get_provenance(spec_node, 'url')
    
    61 64
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    ... ... @@ -83,10 +86,10 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
    83 86
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    84 87
                                 "{}: 'client-cert' was specified without 'client-key'".format(provenance))
    
    85 88
     
    
    86
    -        return CASRemoteSpec(url, push, server_cert, client_key, client_cert)
    
    89
    +        return CASRemoteSpec(url, push, partial_push, server_cert, client_key, client_cert)
    
    87 90
     
    
    88 91
     
    
    89
    -CASRemoteSpec.__new__.__defaults__ = (None, None, None)
    
    92
    +CASRemoteSpec.__new__.__defaults__ = (False, None, None, None)
    
    90 93
     
    
    91 94
     
    
    92 95
     class BlobNotFound(CASError):
    
    ... ... @@ -308,6 +311,7 @@ class CASCache():
    308 311
                 else:
    
    309 312
                     return False
    
    310 313
             except BlobNotFound as e:
    
    314
    +            raise ValueError("Got to here")
    
    311 315
                 return False
    
    312 316
     
    
    313 317
         # pull_tree():
    
    ... ... @@ -353,6 +357,7 @@ class CASCache():
    353 357
         # Args:
    
    354 358
         #     refs (list): The refs to push
    
    355 359
         #     remote (CASRemote): The remote to push to
    
    360
    +    #     subdir (string): Optional specific subdir to exempt from the push
    
    356 361
         #
    
    357 362
         # Returns:
    
    358 363
         #   (bool): True if any remote was updated, False if no pushes were required
    
    ... ... @@ -360,7 +365,7 @@ class CASCache():
    360 365
         # Raises:
    
    361 366
         #   (CASError): if there was an error
    
    362 367
         #
    
    363
    -    def push(self, refs, remote):
    
    368
    +    def push(self, refs, remote, subdir=None):
    
    364 369
             skipped_remote = True
    
    365 370
             try:
    
    366 371
                 for ref in refs:
    
    ... ... @@ -382,7 +387,7 @@ class CASCache():
    382 387
                             # Intentionally re-raise RpcError for outer except block.
    
    383 388
                             raise
    
    384 389
     
    
    385
    -                self._send_directory(remote, tree)
    
    390
    +                self._send_directory(remote, tree, excluded_dir=subdir)
    
    386 391
     
    
    387 392
                     request = buildstream_pb2.UpdateReferenceRequest()
    
    388 393
                     request.keys.append(ref)
    
    ... ... @@ -864,10 +869,17 @@ class CASCache():
    864 869
                     a += 1
    
    865 870
                     b += 1
    
    866 871
     
    
    867
    -    def _reachable_refs_dir(self, reachable, tree, update_mtime=False):
    
    872
    +    def _reachable_refs_dir(self, reachable, tree, update_mtime=False, subdir=False):
    
    868 873
             if tree.hash in reachable:
    
    869 874
                 return
    
    870 875
     
    
    876
    +        # If looping through subdir digests, skip processing if
    
    877
    +        # ref path does not exist, allowing for partial objects
    
    878
    +        if subdir and not os.path.exists(self.objpath(tree)):
    
    879
    +            return
    
    880
    +
    
    881
    +        # Raises FileNotFound exception is path does not exist,
    
    882
    +        # which should only be entered on the top level digest
    
    871 883
             if update_mtime:
    
    872 884
                 os.utime(self.objpath(tree))
    
    873 885
     
    
    ... ... @@ -884,9 +896,9 @@ class CASCache():
    884 896
                 reachable.add(filenode.digest.hash)
    
    885 897
     
    
    886 898
             for dirnode in directory.directories:
    
    887
    -            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime)
    
    899
    +            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, subdir=True)
    
    888 900
     
    
    889
    -    def _required_blobs(self, directory_digest):
    
    901
    +    def _required_blobs(self, directory_digest, excluded_dir=None):
    
    890 902
             # parse directory, and recursively add blobs
    
    891 903
             d = remote_execution_pb2.Digest()
    
    892 904
             d.hash = directory_digest.hash
    
    ... ... @@ -905,7 +917,8 @@ class CASCache():
    905 917
                 yield d
    
    906 918
     
    
    907 919
             for dirnode in directory.directories:
    
    908
    -            yield from self._required_blobs(dirnode.digest)
    
    920
    +            if dirnode.name != excluded_dir:
    
    921
    +                yield from self._required_blobs(dirnode.digest)
    
    909 922
     
    
    910 923
         def _fetch_blob(self, remote, digest, stream):
    
    911 924
             resource_name = '/'.join(['blobs', digest.hash, str(digest.size_bytes)])
    
    ... ... @@ -1021,6 +1034,7 @@ class CASCache():
    1021 1034
                 objpath = self._ensure_blob(remote, dir_digest)
    
    1022 1035
     
    
    1023 1036
                 directory = remote_execution_pb2.Directory()
    
    1037
    +
    
    1024 1038
                 with open(objpath, 'rb') as f:
    
    1025 1039
                     directory.ParseFromString(f.read())
    
    1026 1040
     
    
    ... ... @@ -1091,9 +1105,8 @@ class CASCache():
    1091 1105
     
    
    1092 1106
             assert response.committed_size == digest.size_bytes
    
    1093 1107
     
    
    1094
    -    def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
    
    1095
    -        required_blobs = self._required_blobs(digest)
    
    1096
    -
    
    1108
    +    def _send_directory(self, remote, digest, u_uid=uuid.uuid4(), excluded_dir=None):
    
    1109
    +        required_blobs = self._required_blobs(digest, excluded_dir=excluded_dir)
    
    1097 1110
             missing_blobs = dict()
    
    1098 1111
             # Limit size of FindMissingBlobs request
    
    1099 1112
             for required_blobs_group in _grouper(required_blobs, 512):
    

  • buildstream/element.py
    ... ... @@ -1800,13 +1800,19 @@ class Element(Plugin):
    1800 1800
         #   (bool): True if this element does not need a push job to be created
    
    1801 1801
         #
    
    1802 1802
         def _skip_push(self):
    
    1803
    +
    
    1803 1804
             if not self.__artifacts.has_push_remotes(element=self):
    
    1804 1805
                 # No push remotes for this element's project
    
    1805 1806
                 return True
    
    1806 1807
     
    
    1807 1808
             # Do not push elements that aren't cached, or that are cached with a dangling buildtree
    
    1808
    -        # artifact unless element type is expected to have an an empty buildtree directory
    
    1809
    -        if not self._cached_buildtree():
    
    1809
    +        # artifact unless element type is expected to have an an empty buildtree directory. Check
    
    1810
    +        # that this default behaviour is not overriden via a remote configured to allow pushing
    
    1811
    +        # artifacts without their corresponding buildtree.
    
    1812
    +        if not self._cached():
    
    1813
    +            return True
    
    1814
    +
    
    1815
    +        if not self._cached_buildtree() and not self.__artifacts.has_partial_push_remotes(element=self):
    
    1810 1816
                 return True
    
    1811 1817
     
    
    1812 1818
             # Do not push tainted artifact
    
    ... ... @@ -1817,7 +1823,8 @@ class Element(Plugin):
    1817 1823
     
    
    1818 1824
         # _push():
    
    1819 1825
         #
    
    1820
    -    # Push locally cached artifact to remote artifact repository.
    
    1826
    +    # Push locally cached artifact to remote artifact repository. An attempt
    
    1827
    +    # will be made to push partial artifacts given current config
    
    1821 1828
         #
    
    1822 1829
         # Returns:
    
    1823 1830
         #   (bool): True if the remote was updated, False if it already existed
    
    ... ... @@ -1830,8 +1837,19 @@ class Element(Plugin):
    1830 1837
                 self.warn("Not pushing tainted artifact.")
    
    1831 1838
                 return False
    
    1832 1839
     
    
    1833
    -        # Push all keys used for local commit
    
    1834
    -        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit())
    
    1840
    +        # Push all keys used for local commit, this could be full or partial,
    
    1841
    +        # given previous _skip_push() logic. If buildtree isn't cached, then
    
    1842
    +        # set partial push
    
    1843
    +
    
    1844
    +        partial = False
    
    1845
    +        subdir = 'buildtree'
    
    1846
    +        if not self._cached_buildtree():
    
    1847
    +            partial = True
    
    1848
    +
    
    1849
    +        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit(), partial=partial, subdir=subdir)
    
    1850
    +
    
    1851
    +        # Artifact might be cached in the server partially with the top level ref existing.
    
    1852
    +        # Check if we need to attempt a push of a locally cached buildtree given current config
    
    1835 1853
             if not pushed:
    
    1836 1854
                 return False
    
    1837 1855
     
    

  • tests/integration/pushbuildtrees.py
    1
    +import os
    
    2
    +import shutil
    
    3
    +import pytest
    
    4
    +
    
    5
    +from tests.testutils import cli_integration as cli, create_artifact_share
    
    6
    +from tests.testutils.integration import assert_contains
    
    7
    +from tests.testutils.site import HAVE_BWRAP, IS_LINUX
    
    8
    +from buildstream._exceptions import ErrorDomain, LoadErrorReason
    
    9
    +
    
    10
    +
    
    11
    +DATA_DIR = os.path.join(
    
    12
    +    os.path.dirname(os.path.realpath(__file__)),
    
    13
    +    "project"
    
    14
    +)
    
    15
    +
    
    16
    +
    
    17
    +# Remove artifact cache & set cli.config value of pull-buildtrees
    
    18
    +# to false, which is the default user context. The cache has to be
    
    19
    +# cleared as just forcefully removing the refpath leaves dangling objects.
    
    20
    +def default_state(cli, tmpdir, share):
    
    21
    +    shutil.rmtree(os.path.join(str(tmpdir), 'artifacts'))
    
    22
    +    cli.configure({
    
    23
    +        'artifacts': {'url': share.repo, 'push': False},
    
    24
    +        'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
    
    25
    +        'cache': {'pull-buildtrees': False},
    
    26
    +    })
    
    27
    +
    
    28
    +
    
    29
    +# A test to capture the integration of the optionl push of buildtrees.
    
    30
    +# The behaviour should encompass pushing artifacts that are already cached
    
    31
    +# without a buildtree as well as artifacts that are cached with their buildtree.
    
    32
    +# This option is handled via 'allow-partial-push' on a per artifact remote config
    
    33
    +# node basis. Multiple remote config nodes can point to the same url and as such can
    
    34
    +# have different 'allow-partial-push' options, tests need to cover this
    
    35
    +@pytest.mark.integration
    
    36
    +@pytest.mark.datafiles(DATA_DIR)
    
    37
    +@pytest.mark.skipif(IS_LINUX and not HAVE_BWRAP, reason='Only available with bubblewrap on Linux')
    
    38
    +def test_pushbuildtrees(cli, tmpdir, datafiles, integration_cache):
    
    39
    +    project = os.path.join(datafiles.dirname, datafiles.basename)
    
    40
    +    element_name = 'autotools/amhello.bst'
    
    41
    +
    
    42
    +    # Create artifact shares for pull & push testing
    
    43
    +    with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
    
    44
    +        create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2,\
    
    45
    +        create_artifact_share(os.path.join(str(tmpdir), 'share3')) as share3:
    
    46
    +
    
    47
    +        cli.configure({
    
    48
    +            'artifacts': {'url': share1.repo, 'push': True},
    
    49
    +            'artifactdir': os.path.join(str(tmpdir), 'artifacts')
    
    50
    +        })
    
    51
    +
    
    52
    +        cli.configure({'artifacts': [{'url': share1.repo, 'push': True},
    
    53
    +                                     {'url': share2.repo, 'push': True, 'allow-partial-push': True}]})
    
    54
    +
    
    55
    +        # Build autotools element, checked pushed, delete local.
    
    56
    +        # As share 2 has push & allow-partial-push set a true, it
    
    57
    +        # should have pushed the artifacts, without the cached buildtrees,
    
    58
    +        # to it.
    
    59
    +        result = cli.run(project=project, args=['build', element_name])
    
    60
    +        assert result.exit_code == 0
    
    61
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    62
    +        elementdigest = share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    63
    +        buildtreedir = os.path.join(str(tmpdir), 'artifacts', 'extract', 'test', 'autotools-amhello',
    
    64
    +                                    elementdigest.hash, 'buildtree')
    
    65
    +        assert os.path.isdir(buildtreedir)
    
    66
    +        assert element_name in result.get_pushed_elements()
    
    67
    +        assert "Pushed partial artifact" in result.stderr
    
    68
    +        assert share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    69
    +        assert share2.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    70
    +        default_state(cli, tmpdir, share1)
    
    71
    +
    
    72
    +        # Check that after explictly pulling an artifact without it's buildtree,
    
    73
    +        # we can push it to another remote that is configured to accept the partial
    
    74
    +        # artifact
    
    75
    +        result = cli.run(project=project, args=['pull', element_name])
    
    76
    +        cli.configure({'artifacts': {'url': share3.repo, 'push': True, 'allow-partial-push': True}})
    
    77
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    78
    +        assert not os.path.isdir(buildtreedir)
    
    79
    +        result = cli.run(project=project, args=['push', element_name])
    
    80
    +        assert result.exit_code == 0
    
    81
    +        assert "Pushed partial artifact" in result.stderr
    
    82
    +        assert "Pushed artifact" not in result.stderr
    
    83
    +        assert element_name in result.get_pushed_elements()
    
    84
    +        assert share3.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    85
    +        default_state(cli, tmpdir, share3)
    
    86
    +
    
    87
    +        # Delete the local cache and pull the partial artifact from share 3,
    
    88
    +        # this should not include the buildtree when extracted locally, even when
    
    89
    +        # pull-buildtrees is given as a cli parameter
    
    90
    +        assert not os.path.isdir(buildtreedir)
    
    91
    +        assert cli.get_element_state(project, element_name) != 'cached'
    
    92
    +        print("share3 repo value is {}".format(share3.repo))
    
    93
    +        with open('/home/tom/buildstream/tmp/test_pushbuildtrees0/cache/buildstream.conf', 'r') as f:
    
    94
    +            print(f.read())
    
    95
    +        result = cli.run(project=project, args=['--pull-buildtrees', 'pull', element_name])
    
    96
    +        assert element_name in result.get_pulled_elements()
    
    97
    +        assert not os.path.isdir(buildtreedir)

  • tests/testutils/runcli.py
    ... ... @@ -171,7 +171,7 @@ class Result():
    171 171
             return list(tracked)
    
    172 172
     
    
    173 173
         def get_pushed_elements(self):
    
    174
    -        pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed artifact', self.stderr)
    
    174
    +        pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed', self.stderr)
    
    175 175
             if pushed is None:
    
    176 176
                 return []
    
    177 177
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]