[Notes] [Git][BuildStream/buildstream][tpollard/494] WIP: Don't pull artifact buildtrees by default



Title: GitLab

Tom Pollard pushed to branch tpollard/494 at BuildStream / buildstream

Commits:

7 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -38,8 +38,9 @@ CACHE_SIZE_FILE = "cache_size"
    38 38
     #     url (str): Location of the remote artifact cache
    
    39 39
     #     push (bool): Whether we should attempt to push artifacts to this cache,
    
    40 40
     #                  in addition to pulling from it.
    
    41
    +#     buildtrees (bool): Whether the default action of pull should include the artifact buildtree
    
    41 42
     #
    
    42
    -class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert client_key client_cert')):
    
    43
    +class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert client_key client_cert buildtrees')):
    
    43 44
     
    
    44 45
         # _new_from_config_node
    
    45 46
         #
    
    ... ... @@ -47,9 +48,10 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    47 48
         #
    
    48 49
         @staticmethod
    
    49 50
         def _new_from_config_node(spec_node, basedir=None):
    
    50
    -        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert'])
    
    51
    +        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert', 'pullbuildtrees'])
    
    51 52
             url = _yaml.node_get(spec_node, str, 'url')
    
    52 53
             push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
    
    54
    +        buildtrees = _yaml.node_get(spec_node, bool, 'pullbuildtrees', default_value=False)
    
    53 55
             if not url:
    
    54 56
                 provenance = _yaml.node_get_provenance(spec_node)
    
    55 57
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    ... ... @@ -67,7 +69,7 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    67 69
             if client_cert and basedir:
    
    68 70
                 client_cert = os.path.join(basedir, client_cert)
    
    69 71
     
    
    70
    -        return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert)
    
    72
    +        return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert, buildtrees)
    
    71 73
     
    
    72 74
     
    
    73 75
     ArtifactCacheSpec.__new__.__defaults__ = (None, None, None)
    
    ... ... @@ -510,11 +512,12 @@ class ArtifactCache():
    510 512
         #     element (Element): The Element whose artifact is to be fetched
    
    511 513
         #     key (str): The cache key to use
    
    512 514
         #     progress (callable): The progress callback, if any
    
    515
    +    #     buildtree (bool): If buildtrees are to be pulled from the remote cache
    
    513 516
         #
    
    514 517
         # Returns:
    
    515 518
         #   (bool): True if pull was successful, False if artifact was not available
    
    516 519
         #
    
    517
    -    def pull(self, element, key, *, progress=None):
    
    520
    +    def pull(self, element, key, *, progress=None, buildtree=None):
    
    518 521
             raise ImplError("Cache '{kind}' does not implement pull()"
    
    519 522
                             .format(kind=type(self).__name__))
    
    520 523
     
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -63,7 +63,6 @@ class CASCache(ArtifactCache):
    63 63
             self.casdir = os.path.join(context.artifactdir, 'cas')
    
    64 64
             os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
    
    65 65
             os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
    
    66
    -
    
    67 66
             self._calculate_cache_quota()
    
    68 67
     
    
    69 68
             self._enable_push = enable_push
    
    ... ... @@ -220,7 +219,7 @@ class CASCache(ArtifactCache):
    220 219
                 remotes_for_project = self._remotes[element._get_project()]
    
    221 220
                 return any(remote.spec.push for remote in remotes_for_project)
    
    222 221
     
    
    223
    -    def pull(self, element, key, *, progress=None):
    
    222
    +    def pull(self, element, key, *, progress=None, buildtree=None):
    
    224 223
             ref = self.get_artifact_fullname(element, key)
    
    225 224
     
    
    226 225
             project = element._get_project()
    
    ... ... @@ -239,8 +238,18 @@ class CASCache(ArtifactCache):
    239 238
                     tree.hash = response.digest.hash
    
    240 239
                     tree.size_bytes = response.digest.size_bytes
    
    241 240
     
    
    242
    -                self._fetch_directory(remote, tree)
    
    241
    +                # Check if buildtrees has been set at a artifact remote config level
    
    242
    +                if remote.spec.buildtrees:
    
    243
    +                    buildtree = True
    
    244
    +
    
    245
    +                self._fetch_directory(remote, tree, buildtree=buildtree)
    
    246
    +
    
    247
    +                # Check if the buildtree digest needs to be fetched
    
    248
    +                if buildtree:
    
    249
    +                    subdir = 'buildtree'
    
    250
    +                    self._fetch_subdir(remote, tree, subdir)
    
    243 251
     
    
    252
    +                # tree is the remote value, so is the same without or without buildtree locally
    
    244 253
                     self.set_ref(ref, tree)
    
    245 254
     
    
    246 255
                     # no need to pull from additional remotes
    
    ... ... @@ -635,8 +644,7 @@ class CASCache(ArtifactCache):
    635 644
         ################################################
    
    636 645
         #             Local Private Methods            #
    
    637 646
         ################################################
    
    638
    -
    
    639
    -    def _checkout(self, dest, tree):
    
    647
    +    def _checkout(self, dest, tree, buildtree=None):
    
    640 648
             os.makedirs(dest, exist_ok=True)
    
    641 649
     
    
    642 650
             directory = remote_execution_pb2.Directory()
    
    ... ... @@ -654,8 +662,12 @@ class CASCache(ArtifactCache):
    654 662
                              stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
    
    655 663
     
    
    656 664
             for dirnode in directory.directories:
    
    657
    -            fullpath = os.path.join(dest, dirnode.name)
    
    658
    -            self._checkout(fullpath, dirnode.digest)
    
    665
    +            if buildtree:
    
    666
    +                fullpath = os.path.join(dest, dirnode.name)
    
    667
    +                self._checkout(fullpath, dirnode.digest)
    
    668
    +            elif dirnode.name != 'buildtree':
    
    669
    +                fullpath = os.path.join(dest, dirnode.name)
    
    670
    +                self._checkout(fullpath, dirnode.digest)
    
    659 671
     
    
    660 672
             for symlinknode in directory.symlinks:
    
    661 673
                 # symlink
    
    ... ... @@ -830,7 +842,7 @@ class CASCache(ArtifactCache):
    830 842
     
    
    831 843
             assert digest.size_bytes == os.fstat(stream.fileno()).st_size
    
    832 844
     
    
    833
    -    def _fetch_directory(self, remote, tree):
    
    845
    +    def _fetch_directory(self, remote, tree, buildtree=None):
    
    834 846
             objpath = self.objpath(tree)
    
    835 847
             if os.path.exists(objpath):
    
    836 848
                 # already in local cache
    
    ... ... @@ -857,12 +869,23 @@ class CASCache(ArtifactCache):
    857 869
                         assert digest.hash == filenode.digest.hash
    
    858 870
     
    
    859 871
                 for dirnode in directory.directories:
    
    860
    -                self._fetch_directory(remote, dirnode.digest)
    
    861
    -
    
    872
    +                if buildtree:
    
    873
    +                    self._fetch_directory(remote, dirnode.digest)
    
    874
    +                elif dirnode.name != 'buildtree':
    
    875
    +                    self._fetch_directory(remote, dirnode.digest)
    
    862 876
                 # place directory blob only in final location when we've downloaded
    
    863 877
                 # all referenced blobs to avoid dangling references in the repository
    
    864 878
                 digest = self.add_object(path=out.name)
    
    865
    -            assert digest.hash == tree.hash
    
    879
    +
    
    880
    +            if buildtree:
    
    881
    +                assert digest.hash == tree.hash
    
    882
    +
    
    883
    +            return digest.hash
    
    884
    +
    
    885
    +    def _fetch_subdir(self, remote, tree, subdir):
    
    886
    +        subdirdigest = self._get_subdir(tree, subdir)
    
    887
    +        self._fetch_directory(remote, subdirdigest)
    
    888
    +
    
    866 889
     
    
    867 890
         def _fetch_tree(self, remote, digest):
    
    868 891
             # download but do not store the Tree object
    

  • buildstream/_context.py
    ... ... @@ -109,6 +109,9 @@ class Context():
    109 109
             # Make sure the XDG vars are set in the environment before loading anything
    
    110 110
             self._init_xdg()
    
    111 111
     
    
    112
    +        # Default to not pulling buildtrees from remote caches
    
    113
    +        self.pullbuildtrees = None
    
    114
    +
    
    112 115
             # Private variables
    
    113 116
             self._cache_key = None
    
    114 117
             self._message_handler = None
    
    ... ... @@ -159,7 +162,7 @@ class Context():
    159 162
             _yaml.node_validate(defaults, [
    
    160 163
                 'sourcedir', 'builddir', 'artifactdir', 'logdir',
    
    161 164
                 'scheduler', 'artifacts', 'logging', 'projects',
    
    162
    -            'cache'
    
    165
    +            'cache', 'pullbuildtrees'
    
    163 166
             ])
    
    164 167
     
    
    165 168
             for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir']:
    

  • buildstream/_frontend/cli.py
    ... ... @@ -305,10 +305,12 @@ def init(app, project_name, format_version, element_path, force):
    305 305
                   help="Allow tracking to cross junction boundaries")
    
    306 306
     @click.option('--track-save', default=False, is_flag=True,
    
    307 307
                   help="Deprecated: This is ignored")
    
    308
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    309
    +              help="Pull buildtrees from a remote cache server")
    
    308 310
     @click.argument('elements', nargs=-1,
    
    309 311
                     type=click.Path(readable=False))
    
    310 312
     @click.pass_obj
    
    311
    -def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions):
    
    313
    +def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions, pull_buildtrees):
    
    312 314
         """Build elements in a pipeline"""
    
    313 315
     
    
    314 316
         if (track_except or track_cross_junctions) and not (track_ or track_all):
    
    ... ... @@ -327,7 +329,8 @@ def build(app, elements, all_, track_, track_save, track_all, track_except, trac
    327 329
                              track_targets=track_,
    
    328 330
                              track_except=track_except,
    
    329 331
                              track_cross_junctions=track_cross_junctions,
    
    330
    -                         build_all=all_)
    
    332
    +                         build_all=all_,
    
    333
    +                         pull_buildtrees=pull_buildtrees)
    
    331 334
     
    
    332 335
     
    
    333 336
     ##################################################################
    
    ... ... @@ -429,10 +432,12 @@ def track(app, elements, deps, except_, cross_junctions):
    429 432
                   help='The dependency artifacts to pull (default: none)')
    
    430 433
     @click.option('--remote', '-r',
    
    431 434
                   help="The URL of the remote cache (defaults to the first configured cache)")
    
    435
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    436
    +              help="Pull buildtrees from a remote cache server")
    
    432 437
     @click.argument('elements', nargs=-1,
    
    433 438
                     type=click.Path(readable=False))
    
    434 439
     @click.pass_obj
    
    435
    -def pull(app, elements, deps, remote):
    
    440
    +def pull(app, elements, deps, remote, pull_buildtrees):
    
    436 441
         """Pull a built artifact from the configured remote artifact cache.
    
    437 442
     
    
    438 443
         By default the artifact will be pulled one of the configured caches
    
    ... ... @@ -446,7 +451,7 @@ def pull(app, elements, deps, remote):
    446 451
             all:   All dependencies
    
    447 452
         """
    
    448 453
         with app.initialized(session_name="Pull"):
    
    449
    -        app.stream.pull(elements, selection=deps, remote=remote)
    
    454
    +        app.stream.pull(elements, selection=deps, remote=remote, pull_buildtrees=pull_buildtrees)
    
    450 455
     
    
    451 456
     
    
    452 457
     ##################################################################
    

  • buildstream/_scheduler/queues/pullqueue.py
    ... ... @@ -31,9 +31,14 @@ class PullQueue(Queue):
    31 31
         complete_name = "Pulled"
    
    32 32
         resources = [ResourceType.DOWNLOAD, ResourceType.CACHE]
    
    33 33
     
    
    34
    +    def __init__(self, scheduler, buildtrees=False):
    
    35
    +        super().__init__(scheduler)
    
    36
    +
    
    37
    +        self._buildtrees = buildtrees
    
    38
    +
    
    34 39
         def process(self, element):
    
    35 40
             # returns whether an artifact was downloaded or not
    
    36
    -        return element._pull()
    
    41
    +        return element._pull(buildtree=self._buildtrees)
    
    37 42
     
    
    38 43
         def status(self, element):
    
    39 44
             # state of dependencies may have changed, recalculate element state
    
    ... ... @@ -47,7 +52,7 @@ class PullQueue(Queue):
    47 52
             if not element._can_query_cache():
    
    48 53
                 return QueueStatus.WAIT
    
    49 54
     
    
    50
    -        if element._pull_pending():
    
    55
    +        if element._pull_pending(buildtree=self._buildtrees):
    
    51 56
                 return QueueStatus.READY
    
    52 57
             else:
    
    53 58
                 return QueueStatus.SKIP
    

  • buildstream/_stream.py
    ... ... @@ -162,12 +162,14 @@ class Stream():
    162 162
         #    track_cross_junctions (bool): Whether tracking should cross junction boundaries
    
    163 163
         #    build_all (bool): Whether to build all elements, or only those
    
    164 164
         #                      which are required to build the target.
    
    165
    +    #    pull_buildtrees (bool): Whether to pull buildtrees from a remote cache server
    
    165 166
         #
    
    166 167
         def build(self, targets, *,
    
    167 168
                   track_targets=None,
    
    168 169
                   track_except=None,
    
    169 170
                   track_cross_junctions=False,
    
    170
    -              build_all=False):
    
    171
    +              build_all=False,
    
    172
    +              pull_buildtrees=False):
    
    171 173
     
    
    172 174
             if build_all:
    
    173 175
                 selection = PipelineSelection.ALL
    
    ... ... @@ -197,7 +199,7 @@ class Stream():
    197 199
                 self._add_queue(track_queue, track=True)
    
    198 200
     
    
    199 201
             if self._artifacts.has_fetch_remotes():
    
    200
    -            self._add_queue(PullQueue(self._scheduler))
    
    202
    +            self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    201 203
     
    
    202 204
             self._add_queue(FetchQueue(self._scheduler, skip_cached=True))
    
    203 205
             self._add_queue(BuildQueue(self._scheduler))
    
    ... ... @@ -297,7 +299,8 @@ class Stream():
    297 299
         #
    
    298 300
         def pull(self, targets, *,
    
    299 301
                  selection=PipelineSelection.NONE,
    
    300
    -             remote=None):
    
    302
    +             remote=None,
    
    303
    +             pull_buildtrees=False):
    
    301 304
     
    
    302 305
             use_config = True
    
    303 306
             if remote:
    
    ... ... @@ -313,7 +316,7 @@ class Stream():
    313 316
                 raise StreamError("No artifact caches available for pulling artifacts")
    
    314 317
     
    
    315 318
             self._pipeline.assert_consistent(elements)
    
    316
    -        self._add_queue(PullQueue(self._scheduler))
    
    319
    +        self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    317 320
             self._enqueue_plan(elements)
    
    318 321
             self._run()
    
    319 322
     
    

  • buildstream/element.py
    ... ... @@ -1676,18 +1676,24 @@ class Element(Plugin):
    1676 1676
     
    
    1677 1677
         # _pull_pending()
    
    1678 1678
         #
    
    1679
    -    # Check whether the artifact will be pulled.
    
    1679
    +    # Check whether the artifact will be pulled. If the pull operation is to
    
    1680
    +    # include buildtrees, then it can't be presumed that having a strong
    
    1681
    +    # cache of artifact element includes a non-dangling ref for its buildtree object.
    
    1682
    +    #
    
    1683
    +    # Args:
    
    1684
    +    #    buildtree (bool): Whether the pull has been invoked with buildtrees set
    
    1680 1685
         #
    
    1681 1686
         # Returns:
    
    1682 1687
         #   (bool): Whether a pull operation is pending
    
    1683 1688
         #
    
    1684
    -    def _pull_pending(self):
    
    1689
    +    def _pull_pending(self, buildtree=None):
    
    1685 1690
             if self._get_workspace():
    
    1686 1691
                 # Workspace builds are never pushed to artifact servers
    
    1687 1692
                 return False
    
    1688 1693
     
    
    1689
    -        if self.__strong_cached:
    
    1690
    -            # Artifact already in local cache
    
    1694
    +        # If strong cache & not specifying buildtrees
    
    1695
    +        if self.__strong_cached and not buildtree:
    
    1696
    +            # Artifact already in local cache, buildtree irrelevant
    
    1691 1697
                 return False
    
    1692 1698
     
    
    1693 1699
             # Pull is pending if artifact remote server available
    
    ... ... @@ -1709,11 +1715,10 @@ class Element(Plugin):
    1709 1715
     
    
    1710 1716
             self._update_state()
    
    1711 1717
     
    
    1712
    -    def _pull_strong(self, *, progress=None):
    
    1718
    +    def _pull_strong(self, *, progress=None, buildtree=None):
    
    1713 1719
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1714
    -
    
    1715 1720
             key = self.__strict_cache_key
    
    1716
    -        if not self.__artifacts.pull(self, key, progress=progress):
    
    1721
    +        if not self.__artifacts.pull(self, key, progress=progress, buildtree=buildtree):
    
    1717 1722
                 return False
    
    1718 1723
     
    
    1719 1724
             # update weak ref by pointing it to this newly fetched artifact
    
    ... ... @@ -1721,10 +1726,9 @@ class Element(Plugin):
    1721 1726
     
    
    1722 1727
             return True
    
    1723 1728
     
    
    1724
    -    def _pull_weak(self, *, progress=None):
    
    1729
    +    def _pull_weak(self, *, progress=None, buildtree=None):
    
    1725 1730
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1726
    -
    
    1727
    -        if not self.__artifacts.pull(self, weak_key, progress=progress):
    
    1731
    +        if not self.__artifacts.pull(self, weak_key, progress=progress, buildtree=buildtree):
    
    1728 1732
                 return False
    
    1729 1733
     
    
    1730 1734
             # extract strong cache key from this newly fetched artifact
    
    ... ... @@ -1742,17 +1746,17 @@ class Element(Plugin):
    1742 1746
         #
    
    1743 1747
         # Returns: True if the artifact has been downloaded, False otherwise
    
    1744 1748
         #
    
    1745
    -    def _pull(self):
    
    1749
    +    def _pull(self, buildtree=None):
    
    1746 1750
             context = self._get_context()
    
    1747 1751
     
    
    1748 1752
             def progress(percent, message):
    
    1749 1753
                 self.status(message)
    
    1750 1754
     
    
    1751 1755
             # Attempt to pull artifact without knowing whether it's available
    
    1752
    -        pulled = self._pull_strong(progress=progress)
    
    1756
    +        pulled = self._pull_strong(progress=progress, buildtree=buildtree)
    
    1753 1757
     
    
    1754 1758
             if not pulled and not self._cached() and not context.get_strict():
    
    1755
    -            pulled = self._pull_weak(progress=progress)
    
    1759
    +            pulled = self._pull_weak(progress=progress, buildtree=buildtree)
    
    1756 1760
     
    
    1757 1761
             if not pulled:
    
    1758 1762
                 return False
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]