[Notes] [Git][BuildStream/buildstream][tpollard/494] WIP: Don't pull artifact buildtrees by default



Title: GitLab

Tom Pollard pushed to branch tpollard/494 at BuildStream / buildstream

Commits:

6 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -38,8 +38,9 @@ CACHE_SIZE_FILE = "cache_size"
    38 38
     #     url (str): Location of the remote artifact cache
    
    39 39
     #     push (bool): Whether we should attempt to push artifacts to this cache,
    
    40 40
     #                  in addition to pulling from it.
    
    41
    +#     buildtrees (bool): Whether the default action of pull should include the artifact buildtree
    
    41 42
     #
    
    42
    -class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert client_key client_cert')):
    
    43
    +class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push buildtrees server_cert client_key client_cert')):
    
    43 44
     
    
    44 45
         # _new_from_config_node
    
    45 46
         #
    
    ... ... @@ -47,9 +48,10 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    47 48
         #
    
    48 49
         @staticmethod
    
    49 50
         def _new_from_config_node(spec_node, basedir=None):
    
    50
    -        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert'])
    
    51
    +        _yaml.node_validate(spec_node, ['url', 'push', 'pullbuildtrees', 'server-cert', 'client-key', 'client-cert'])
    
    51 52
             url = _yaml.node_get(spec_node, str, 'url')
    
    52 53
             push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
    
    54
    +        buildtrees = _yaml.node_get(spec_node, bool, 'pullbuildtrees', default_value=False)
    
    53 55
             if not url:
    
    54 56
                 provenance = _yaml.node_get_provenance(spec_node, 'url')
    
    55 57
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    ... ... @@ -77,10 +79,10 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    77 79
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    78 80
                                 "{}: 'client-cert' was specified without 'client-key'".format(provenance))
    
    79 81
     
    
    80
    -        return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert)
    
    82
    +        return ArtifactCacheSpec(url, push, buildtrees, server_cert, client_key, client_cert)
    
    81 83
     
    
    82 84
     
    
    83
    -ArtifactCacheSpec.__new__.__defaults__ = (None, None, None)
    
    85
    +ArtifactCacheSpec.__new__.__defaults__ = (None, None, None, None)
    
    84 86
     
    
    85 87
     
    
    86 88
     # An ArtifactCache manages artifacts.
    
    ... ... @@ -426,6 +428,22 @@ class ArtifactCache():
    426 428
             raise ImplError("Cache '{kind}' does not implement contains()"
    
    427 429
                             .format(kind=type(self).__name__))
    
    428 430
     
    
    431
    +    # contains_subdir_artifact():
    
    432
    +    #
    
    433
    +    # Check whether an artifact element contains a digest for a subdir
    
    434
    +    # which is populated in the cache, i.e non dangling.
    
    435
    +    #
    
    436
    +    # Args:
    
    437
    +    #     element (Element): The Element to check
    
    438
    +    #     key (str): The cache key to use
    
    439
    +    #     subdir (str): The subdir to check
    
    440
    +    #
    
    441
    +    # Returns: True if the subdir exists & is populated in the cache, False otherwise
    
    442
    +    #
    
    443
    +    def contains_subdir_artifact(self, element, key, subdir):
    
    444
    +        raise ImplError("Cache '{kind}' does not implement contains_subdir_artifact()"
    
    445
    +                        .format(kind=type(self).__name__))
    
    446
    +
    
    429 447
         # list_artifacts():
    
    430 448
         #
    
    431 449
         # List artifacts in this cache in LRU order.
    
    ... ... @@ -551,11 +569,12 @@ class ArtifactCache():
    551 569
         #     element (Element): The Element whose artifact is to be fetched
    
    552 570
         #     key (str): The cache key to use
    
    553 571
         #     progress (callable): The progress callback, if any
    
    572
    +    #     subdir (str): The optional specific subdir to pull
    
    554 573
         #
    
    555 574
         # Returns:
    
    556 575
         #   (bool): True if pull was successful, False if artifact was not available
    
    557 576
         #
    
    558
    -    def pull(self, element, key, *, progress=None):
    
    577
    +    def pull(self, element, key, *, progress=None, subdir=None, excluded_subdirs=None):
    
    559 578
             raise ImplError("Cache '{kind}' does not implement pull()"
    
    560 579
                             .format(kind=type(self).__name__))
    
    561 580
     
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -92,6 +92,16 @@ class CASCache(ArtifactCache):
    92 92
             # This assumes that the repository doesn't have any dangling pointers
    
    93 93
             return os.path.exists(refpath)
    
    94 94
     
    
    95
    +    def contains_subdir_artifact(self, element, key, subdir):
    
    96
    +        tree = self.resolve_ref(self.get_artifact_fullname(element, key))
    
    97
    +
    
    98
    +        # This assumes that the subdir digest is present in the element tree
    
    99
    +        subdirdigest = self._get_subdir(tree, subdir)
    
    100
    +        objpath = self.objpath(subdirdigest)
    
    101
    +
    
    102
    +        # True if subdir content is cached or if empty as expected
    
    103
    +        return os.path.exists(objpath)
    
    104
    +
    
    95 105
         def extract(self, element, key):
    
    96 106
             ref = self.get_artifact_fullname(element, key)
    
    97 107
     
    
    ... ... @@ -228,7 +238,7 @@ class CASCache(ArtifactCache):
    228 238
                 remotes_for_project = self._remotes[element._get_project()]
    
    229 239
                 return any(remote.spec.push for remote in remotes_for_project)
    
    230 240
     
    
    231
    -    def pull(self, element, key, *, progress=None):
    
    241
    +    def pull(self, element, key, *, progress=None, subdir=None, excluded_subdirs=None):
    
    232 242
             ref = self.get_artifact_fullname(element, key)
    
    233 243
     
    
    234 244
             project = element._get_project()
    
    ... ... @@ -247,8 +257,14 @@ class CASCache(ArtifactCache):
    247 257
                     tree.hash = response.digest.hash
    
    248 258
                     tree.size_bytes = response.digest.size_bytes
    
    249 259
     
    
    250
    -                self._fetch_directory(remote, tree)
    
    260
    +                # Check if the element artifact is present, if so just fetch subdir
    
    261
    +                if subdir and os.path.exists(self.objpath(tree)):
    
    262
    +                    self._fetch_subdir(remote, tree, subdir)
    
    263
    +                else:
    
    264
    +                    # Fetch artifact, excluded_subdirs determined in pullqueue
    
    265
    +                    self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
    
    251 266
     
    
    267
    +                # tree is the remote value, so is the same without or without dangling ref locally
    
    252 268
                     self.set_ref(ref, tree)
    
    253 269
     
    
    254 270
                     element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
    
    ... ... @@ -649,7 +665,6 @@ class CASCache(ArtifactCache):
    649 665
         ################################################
    
    650 666
         #             Local Private Methods            #
    
    651 667
         ################################################
    
    652
    -
    
    653 668
         def _checkout(self, dest, tree):
    
    654 669
             os.makedirs(dest, exist_ok=True)
    
    655 670
     
    
    ... ... @@ -668,8 +683,10 @@ class CASCache(ArtifactCache):
    668 683
                              stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
    
    669 684
     
    
    670 685
             for dirnode in directory.directories:
    
    671
    -            fullpath = os.path.join(dest, dirnode.name)
    
    672
    -            self._checkout(fullpath, dirnode.digest)
    
    686
    +            # Don't try to checkout a dangling ref
    
    687
    +            if os.path.exists(self.objpath(dirnode.digest)):
    
    688
    +                fullpath = os.path.join(dest, dirnode.name)
    
    689
    +                self._checkout(fullpath, dirnode.digest)
    
    673 690
     
    
    674 691
             for symlinknode in directory.symlinks:
    
    675 692
                 # symlink
    
    ... ... @@ -948,10 +965,12 @@ class CASCache(ArtifactCache):
    948 965
         #     remote (Remote): The remote to use.
    
    949 966
         #     dir_digest (Digest): Digest object for the directory to fetch.
    
    950 967
         #
    
    951
    -    def _fetch_directory(self, remote, dir_digest):
    
    968
    +    def _fetch_directory(self, remote, dir_digest, *, excluded_subdirs=None):
    
    952 969
             fetch_queue = [dir_digest]
    
    953 970
             fetch_next_queue = []
    
    954 971
             batch = _CASBatchRead(remote)
    
    972
    +        if not excluded_subdirs:
    
    973
    +            excluded_subdirs = []
    
    955 974
     
    
    956 975
             while len(fetch_queue) + len(fetch_next_queue) > 0:
    
    957 976
                 if len(fetch_queue) == 0:
    
    ... ... @@ -966,8 +985,9 @@ class CASCache(ArtifactCache):
    966 985
                     directory.ParseFromString(f.read())
    
    967 986
     
    
    968 987
                 for dirnode in directory.directories:
    
    969
    -                batch = self._fetch_directory_node(remote, dirnode.digest, batch,
    
    970
    -                                                   fetch_queue, fetch_next_queue, recursive=True)
    
    988
    +                if dirnode.name not in excluded_subdirs:
    
    989
    +                    batch = self._fetch_directory_node(remote, dirnode.digest, batch,
    
    990
    +                                                       fetch_queue, fetch_next_queue, recursive=True)
    
    971 991
     
    
    972 992
                 for filenode in directory.files:
    
    973 993
                     batch = self._fetch_directory_node(remote, filenode.digest, batch,
    
    ... ... @@ -976,6 +996,10 @@ class CASCache(ArtifactCache):
    976 996
             # Fetch final batch
    
    977 997
             self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
    
    978 998
     
    
    999
    +    def _fetch_subdir(self, remote, tree, subdir):
    
    1000
    +        subdirdigest = self._get_subdir(tree, subdir)
    
    1001
    +        self._fetch_directory(remote, subdirdigest)
    
    1002
    +
    
    979 1003
         def _fetch_tree(self, remote, digest):
    
    980 1004
             # download but do not store the Tree object
    
    981 1005
             with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
    

  • buildstream/_frontend/cli.py
    ... ... @@ -305,10 +305,12 @@ def init(app, project_name, format_version, element_path, force):
    305 305
                   help="Allow tracking to cross junction boundaries")
    
    306 306
     @click.option('--track-save', default=False, is_flag=True,
    
    307 307
                   help="Deprecated: This is ignored")
    
    308
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    309
    +              help="Pull buildtrees from a remote cache server")
    
    308 310
     @click.argument('elements', nargs=-1,
    
    309 311
                     type=click.Path(readable=False))
    
    310 312
     @click.pass_obj
    
    311
    -def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions):
    
    313
    +def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions, pull_buildtrees):
    
    312 314
         """Build elements in a pipeline"""
    
    313 315
     
    
    314 316
         if (track_except or track_cross_junctions) and not (track_ or track_all):
    
    ... ... @@ -327,7 +329,8 @@ def build(app, elements, all_, track_, track_save, track_all, track_except, trac
    327 329
                              track_targets=track_,
    
    328 330
                              track_except=track_except,
    
    329 331
                              track_cross_junctions=track_cross_junctions,
    
    330
    -                         build_all=all_)
    
    332
    +                         build_all=all_,
    
    333
    +                         pull_buildtrees=pull_buildtrees)
    
    331 334
     
    
    332 335
     
    
    333 336
     ##################################################################
    
    ... ... @@ -429,10 +432,12 @@ def track(app, elements, deps, except_, cross_junctions):
    429 432
                   help='The dependency artifacts to pull (default: none)')
    
    430 433
     @click.option('--remote', '-r',
    
    431 434
                   help="The URL of the remote cache (defaults to the first configured cache)")
    
    435
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    436
    +              help="Pull buildtrees from a remote cache server")
    
    432 437
     @click.argument('elements', nargs=-1,
    
    433 438
                     type=click.Path(readable=False))
    
    434 439
     @click.pass_obj
    
    435
    -def pull(app, elements, deps, remote):
    
    440
    +def pull(app, elements, deps, remote, pull_buildtrees):
    
    436 441
         """Pull a built artifact from the configured remote artifact cache.
    
    437 442
     
    
    438 443
         By default the artifact will be pulled one of the configured caches
    
    ... ... @@ -446,7 +451,7 @@ def pull(app, elements, deps, remote):
    446 451
             all:   All dependencies
    
    447 452
         """
    
    448 453
         with app.initialized(session_name="Pull"):
    
    449
    -        app.stream.pull(elements, selection=deps, remote=remote)
    
    454
    +        app.stream.pull(elements, selection=deps, remote=remote, pull_buildtrees=pull_buildtrees)
    
    450 455
     
    
    451 456
     
    
    452 457
     ##################################################################
    

  • buildstream/_scheduler/queues/pullqueue.py
    ... ... @@ -32,9 +32,20 @@ class PullQueue(Queue):
    32 32
         complete_name = "Pulled"
    
    33 33
         resources = [ResourceType.DOWNLOAD, ResourceType.CACHE]
    
    34 34
     
    
    35
    +    def __init__(self, scheduler, buildtrees=False):
    
    36
    +        super().__init__(scheduler)
    
    37
    +
    
    38
    +        # Current default exclusions on pull
    
    39
    +        self._excluded_subdirs = ["buildtree"]
    
    40
    +        self._subdir = None
    
    41
    +        # If buildtrees are to be pulled, remove the value from exclusion list
    
    42
    +        if buildtrees:
    
    43
    +            self._subdir = "buildtree"
    
    44
    +            self._excluded_subdirs.remove(self._subdir)
    
    45
    +
    
    35 46
         def process(self, element):
    
    36 47
             # returns whether an artifact was downloaded or not
    
    37
    -        if not element._pull():
    
    48
    +        if not element._pull(subdir=self._subdir, excluded_subdirs=self._excluded_subdirs):
    
    38 49
                 raise SkipJob(self.action_name)
    
    39 50
     
    
    40 51
         def status(self, element):
    
    ... ... @@ -49,7 +60,7 @@ class PullQueue(Queue):
    49 60
             if not element._can_query_cache():
    
    50 61
                 return QueueStatus.WAIT
    
    51 62
     
    
    52
    -        if element._pull_pending():
    
    63
    +        if element._pull_pending(subdir=self._subdir):
    
    53 64
                 return QueueStatus.READY
    
    54 65
             else:
    
    55 66
                 return QueueStatus.SKIP
    

  • buildstream/_stream.py
    ... ... @@ -160,12 +160,14 @@ class Stream():
    160 160
         #    track_cross_junctions (bool): Whether tracking should cross junction boundaries
    
    161 161
         #    build_all (bool): Whether to build all elements, or only those
    
    162 162
         #                      which are required to build the target.
    
    163
    +    #    pull_buildtrees (bool): Whether to pull buildtrees from a remote cache server
    
    163 164
         #
    
    164 165
         def build(self, targets, *,
    
    165 166
                   track_targets=None,
    
    166 167
                   track_except=None,
    
    167 168
                   track_cross_junctions=False,
    
    168
    -              build_all=False):
    
    169
    +              build_all=False,
    
    170
    +              pull_buildtrees=False):
    
    169 171
     
    
    170 172
             if build_all:
    
    171 173
                 selection = PipelineSelection.ALL
    
    ... ... @@ -195,7 +197,11 @@ class Stream():
    195 197
                 self._add_queue(track_queue, track=True)
    
    196 198
     
    
    197 199
             if self._artifacts.has_fetch_remotes():
    
    198
    -            self._add_queue(PullQueue(self._scheduler))
    
    200
    +            # Query if any of the user defined artifact servers have buildtrees set
    
    201
    +            for cache in self._context.artifact_cache_specs:
    
    202
    +                if cache.buildtrees:
    
    203
    +                    pull_buildtrees = True
    
    204
    +            self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    199 205
     
    
    200 206
             self._add_queue(FetchQueue(self._scheduler, skip_cached=True))
    
    201 207
             self._add_queue(BuildQueue(self._scheduler))
    
    ... ... @@ -295,7 +301,8 @@ class Stream():
    295 301
         #
    
    296 302
         def pull(self, targets, *,
    
    297 303
                  selection=PipelineSelection.NONE,
    
    298
    -             remote=None):
    
    304
    +             remote=None,
    
    305
    +             pull_buildtrees=False):
    
    299 306
     
    
    300 307
             use_config = True
    
    301 308
             if remote:
    
    ... ... @@ -310,8 +317,13 @@ class Stream():
    310 317
             if not self._artifacts.has_fetch_remotes():
    
    311 318
                 raise StreamError("No artifact caches available for pulling artifacts")
    
    312 319
     
    
    320
    +        # Query if any of the user defined artifact servers have buildtrees set
    
    321
    +        for cache in self._context.artifact_cache_specs:
    
    322
    +            if cache.buildtrees:
    
    323
    +                pull_buildtrees = True
    
    324
    +
    
    313 325
             self._pipeline.assert_consistent(elements)
    
    314
    -        self._add_queue(PullQueue(self._scheduler))
    
    326
    +        self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    315 327
             self._enqueue_plan(elements)
    
    316 328
             self._run()
    
    317 329
     
    

  • buildstream/element.py
    ... ... @@ -1689,18 +1689,26 @@ class Element(Plugin):
    1689 1689
     
    
    1690 1690
         # _pull_pending()
    
    1691 1691
         #
    
    1692
    -    # Check whether the artifact will be pulled.
    
    1692
    +    # Check whether the artifact will be pulled. If the pull operation is to
    
    1693
    +    # include a specific subdir of the element artifact (from cli or user conf)
    
    1694
    +    # then the local cache is queried for the subdirs existence.
    
    1695
    +    #
    
    1696
    +    # Args:
    
    1697
    +    #    subdir (str): Whether the pull has been invoked with a specific subdir set
    
    1693 1698
         #
    
    1694 1699
         # Returns:
    
    1695 1700
         #   (bool): Whether a pull operation is pending
    
    1696 1701
         #
    
    1697
    -    def _pull_pending(self):
    
    1702
    +    def _pull_pending(self, subdir=None):
    
    1698 1703
             if self._get_workspace():
    
    1699 1704
                 # Workspace builds are never pushed to artifact servers
    
    1700 1705
                 return False
    
    1701 1706
     
    
    1702
    -        if self.__strong_cached:
    
    1703
    -            # Artifact already in local cache
    
    1707
    +        if self.__strong_cached and subdir:
    
    1708
    +            # If we've specified a subdir, check if the subdir is cached locally
    
    1709
    +            if self.__artifacts.contains_subdir_artifact(self, self.__strict_cache_key, subdir):
    
    1710
    +                return False
    
    1711
    +        elif self.__strong_cached:
    
    1704 1712
                 return False
    
    1705 1713
     
    
    1706 1714
             # Pull is pending if artifact remote server available
    
    ... ... @@ -1722,11 +1730,10 @@ class Element(Plugin):
    1722 1730
     
    
    1723 1731
             self._update_state()
    
    1724 1732
     
    
    1725
    -    def _pull_strong(self, *, progress=None):
    
    1733
    +    def _pull_strong(self, *, progress=None, subdir=None, excluded_subdirs=None):
    
    1726 1734
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1727
    -
    
    1728 1735
             key = self.__strict_cache_key
    
    1729
    -        if not self.__artifacts.pull(self, key, progress=progress):
    
    1736
    +        if not self.__artifacts.pull(self, key, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
    
    1730 1737
                 return False
    
    1731 1738
     
    
    1732 1739
             # update weak ref by pointing it to this newly fetched artifact
    
    ... ... @@ -1734,10 +1741,10 @@ class Element(Plugin):
    1734 1741
     
    
    1735 1742
             return True
    
    1736 1743
     
    
    1737
    -    def _pull_weak(self, *, progress=None):
    
    1744
    +    def _pull_weak(self, *, progress=None, subdir=None, excluded_subdirs=None):
    
    1738 1745
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1739
    -
    
    1740
    -        if not self.__artifacts.pull(self, weak_key, progress=progress):
    
    1746
    +        if not self.__artifacts.pull(self, weak_key, progress=progress, subdir=subdir,
    
    1747
    +                                     excluded_subdirs=excluded_subdirs):
    
    1741 1748
                 return False
    
    1742 1749
     
    
    1743 1750
             # extract strong cache key from this newly fetched artifact
    
    ... ... @@ -1755,17 +1762,17 @@ class Element(Plugin):
    1755 1762
         #
    
    1756 1763
         # Returns: True if the artifact has been downloaded, False otherwise
    
    1757 1764
         #
    
    1758
    -    def _pull(self):
    
    1765
    +    def _pull(self, subdir=None, excluded_subdirs=None):
    
    1759 1766
             context = self._get_context()
    
    1760 1767
     
    
    1761 1768
             def progress(percent, message):
    
    1762 1769
                 self.status(message)
    
    1763 1770
     
    
    1764 1771
             # Attempt to pull artifact without knowing whether it's available
    
    1765
    -        pulled = self._pull_strong(progress=progress)
    
    1772
    +        pulled = self._pull_strong(progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs)
    
    1766 1773
     
    
    1767 1774
             if not pulled and not self._cached() and not context.get_strict():
    
    1768
    -            pulled = self._pull_weak(progress=progress)
    
    1775
    +            pulled = self._pull_weak(progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs)
    
    1769 1776
     
    
    1770 1777
             if not pulled:
    
    1771 1778
                 return False
    
    ... ... @@ -1788,10 +1795,14 @@ class Element(Plugin):
    1788 1795
             if not self._cached():
    
    1789 1796
                 return True
    
    1790 1797
     
    
    1791
    -        # Do not push tained artifact
    
    1798
    +        # Do not push tainted artifact
    
    1792 1799
             if self.__get_tainted():
    
    1793 1800
                 return True
    
    1794 1801
     
    
    1802
    +        # Do not push elements that have a dangling buildtree artifact unless element type is
    
    1803
    +        # expected to have an empty buildtree directory
    
    1804
    +        if not self.__artifacts.contains_subdir_artifact(self, self.__strict_cache_key, 'buildtree'):
    
    1805
    +            return True
    
    1795 1806
             return False
    
    1796 1807
     
    
    1797 1808
         # _push():
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]