[Notes] [Git][BuildStream/buildstream][tpollard/494] 10 commits: yaml: Add a cache of parsed and provenanced yaml



Title: GitLab

Tom Pollard pushed to branch tpollard/494 at BuildStream / buildstream

Commits:

16 changed files:

Changes:

  • NEWS
    ... ... @@ -27,6 +27,16 @@ buildstream 1.3.1
    27 27
       o Generate Docker images from built artifacts using
    
    28 28
         `contrib/bst-docker-import` script.
    
    29 29
     
    
    30
    +  o Due to the element `buildtree` being cached in the respective artifact their
    
    31
    +    size in some cases has significantly increased. In *most* cases the buildtree
    
    32
    +    is not utilised when building targets, as such by default bst 'pull' & 'build'
    
    33
    +    will not fetch buildtrees from remotes. This behaviour can be overriden with
    
    34
    +    the cli option '--pull-buildtrees', or the user configuration option
    
    35
    +    'pullbuildtrees = True'. The override will also add the buildtree to already
    
    36
    +    cached artifacts. When attempting to populate an artifactcache server with
    
    37
    +    cached artifacts, only 'complete' elements can be pushed. If the element is
    
    38
    +    expected to have a populated buildtree then it must be cached before pushing.
    
    39
    +
    
    30 40
     
    
    31 41
     =================
    
    32 42
     buildstream 1.1.5
    

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -426,6 +426,22 @@ class ArtifactCache():
    426 426
             raise ImplError("Cache '{kind}' does not implement contains()"
    
    427 427
                             .format(kind=type(self).__name__))
    
    428 428
     
    
    429
    +    # contains_subdir_artifact():
    
    430
    +    #
    
    431
    +    # Check whether an artifact element contains a digest for a subdir
    
    432
    +    # which is populated in the cache, i.e non dangling.
    
    433
    +    #
    
    434
    +    # Args:
    
    435
    +    #     element (Element): The Element to check
    
    436
    +    #     key (str): The cache key to use
    
    437
    +    #     subdir (str): The subdir to check
    
    438
    +    #
    
    439
    +    # Returns: True if the subdir exists & is populated in the cache, False otherwise
    
    440
    +    #
    
    441
    +    def contains_subdir_artifact(self, element, key, subdir):
    
    442
    +        raise ImplError("Cache '{kind}' does not implement contains_subdir_artifact()"
    
    443
    +                        .format(kind=type(self).__name__))
    
    444
    +
    
    429 445
         # list_artifacts():
    
    430 446
         #
    
    431 447
         # List artifacts in this cache in LRU order.
    
    ... ... @@ -551,11 +567,12 @@ class ArtifactCache():
    551 567
         #     element (Element): The Element whose artifact is to be fetched
    
    552 568
         #     key (str): The cache key to use
    
    553 569
         #     progress (callable): The progress callback, if any
    
    570
    +    #     subdir (str): The optional specific subdir to pull
    
    554 571
         #
    
    555 572
         # Returns:
    
    556 573
         #   (bool): True if pull was successful, False if artifact was not available
    
    557 574
         #
    
    558
    -    def pull(self, element, key, *, progress=None):
    
    575
    +    def pull(self, element, key, *, progress=None, subdir=None, excluded_subdirs=None):
    
    559 576
             raise ImplError("Cache '{kind}' does not implement pull()"
    
    560 577
                             .format(kind=type(self).__name__))
    
    561 578
     
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -92,6 +92,16 @@ class CASCache(ArtifactCache):
    92 92
             # This assumes that the repository doesn't have any dangling pointers
    
    93 93
             return os.path.exists(refpath)
    
    94 94
     
    
    95
    +    def contains_subdir_artifact(self, element, key, subdir):
    
    96
    +        tree = self.resolve_ref(self.get_artifact_fullname(element, key))
    
    97
    +
    
    98
    +        # This assumes that the subdir digest is present in the element tree
    
    99
    +        subdirdigest = self._get_subdir(tree, subdir)
    
    100
    +        objpath = self.objpath(subdirdigest)
    
    101
    +
    
    102
    +        # True if subdir content is cached or if empty as expected
    
    103
    +        return os.path.exists(objpath)
    
    104
    +
    
    95 105
         def extract(self, element, key):
    
    96 106
             ref = self.get_artifact_fullname(element, key)
    
    97 107
     
    
    ... ... @@ -228,7 +238,7 @@ class CASCache(ArtifactCache):
    228 238
                 remotes_for_project = self._remotes[element._get_project()]
    
    229 239
                 return any(remote.spec.push for remote in remotes_for_project)
    
    230 240
     
    
    231
    -    def pull(self, element, key, *, progress=None):
    
    241
    +    def pull(self, element, key, *, progress=None, subdir=None, excluded_subdirs=None):
    
    232 242
             ref = self.get_artifact_fullname(element, key)
    
    233 243
     
    
    234 244
             project = element._get_project()
    
    ... ... @@ -247,8 +257,14 @@ class CASCache(ArtifactCache):
    247 257
                     tree.hash = response.digest.hash
    
    248 258
                     tree.size_bytes = response.digest.size_bytes
    
    249 259
     
    
    250
    -                self._fetch_directory(remote, tree)
    
    260
    +                # Check if the element artifact is present, if so just fetch subdir
    
    261
    +                if subdir and os.path.exists(self.objpath(tree)):
    
    262
    +                    self._fetch_subdir(remote, tree, subdir)
    
    263
    +                else:
    
    264
    +                    # Fetch artifact, excluded_subdirs determined in pullqueue
    
    265
    +                    self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
    
    251 266
     
    
    267
    +                # tree is the remote value, so is the same without or without dangling ref locally
    
    252 268
                     self.set_ref(ref, tree)
    
    253 269
     
    
    254 270
                     element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
    
    ... ... @@ -671,8 +687,10 @@ class CASCache(ArtifactCache):
    671 687
                              stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
    
    672 688
     
    
    673 689
             for dirnode in directory.directories:
    
    674
    -            fullpath = os.path.join(dest, dirnode.name)
    
    675
    -            self._checkout(fullpath, dirnode.digest)
    
    690
    +            # Don't try to checkout a dangling ref
    
    691
    +            if os.path.exists(self.objpath(dirnode.digest)):
    
    692
    +                fullpath = os.path.join(dest, dirnode.name)
    
    693
    +                self._checkout(fullpath, dirnode.digest)
    
    676 694
     
    
    677 695
             for symlinknode in directory.symlinks:
    
    678 696
                 # symlink
    
    ... ... @@ -951,10 +969,12 @@ class CASCache(ArtifactCache):
    951 969
         #     remote (Remote): The remote to use.
    
    952 970
         #     dir_digest (Digest): Digest object for the directory to fetch.
    
    953 971
         #
    
    954
    -    def _fetch_directory(self, remote, dir_digest):
    
    972
    +    def _fetch_directory(self, remote, dir_digest, *, excluded_subdirs=None):
    
    955 973
             fetch_queue = [dir_digest]
    
    956 974
             fetch_next_queue = []
    
    957 975
             batch = _CASBatchRead(remote)
    
    976
    +        if not excluded_subdirs:
    
    977
    +            excluded_subdirs = []
    
    958 978
     
    
    959 979
             while len(fetch_queue) + len(fetch_next_queue) > 0:
    
    960 980
                 if len(fetch_queue) == 0:
    
    ... ... @@ -969,8 +989,9 @@ class CASCache(ArtifactCache):
    969 989
                     directory.ParseFromString(f.read())
    
    970 990
     
    
    971 991
                 for dirnode in directory.directories:
    
    972
    -                batch = self._fetch_directory_node(remote, dirnode.digest, batch,
    
    973
    -                                                   fetch_queue, fetch_next_queue, recursive=True)
    
    992
    +                if dirnode.name not in excluded_subdirs:
    
    993
    +                    batch = self._fetch_directory_node(remote, dirnode.digest, batch,
    
    994
    +                                                       fetch_queue, fetch_next_queue, recursive=True)
    
    974 995
     
    
    975 996
                 for filenode in directory.files:
    
    976 997
                     batch = self._fetch_directory_node(remote, filenode.digest, batch,
    
    ... ... @@ -979,6 +1000,10 @@ class CASCache(ArtifactCache):
    979 1000
             # Fetch final batch
    
    980 1001
             self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
    
    981 1002
     
    
    1003
    +    def _fetch_subdir(self, remote, tree, subdir):
    
    1004
    +        subdirdigest = self._get_subdir(tree, subdir)
    
    1005
    +        self._fetch_directory(remote, subdirdigest)
    
    1006
    +
    
    982 1007
         def _fetch_tree(self, remote, digest):
    
    983 1008
             # download but do not store the Tree object
    
    984 1009
             with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
    

  • buildstream/_context.py
    ... ... @@ -110,6 +110,9 @@ class Context():
    110 110
             # Make sure the XDG vars are set in the environment before loading anything
    
    111 111
             self._init_xdg()
    
    112 112
     
    
    113
    +        # Whether or not to attempt to pull buildtrees globally
    
    114
    +        self.pullbuildtrees = False
    
    115
    +
    
    113 116
             # Private variables
    
    114 117
             self._cache_key = None
    
    115 118
             self._message_handler = None
    
    ... ... @@ -160,7 +163,7 @@ class Context():
    160 163
             _yaml.node_validate(defaults, [
    
    161 164
                 'sourcedir', 'builddir', 'artifactdir', 'logdir',
    
    162 165
                 'scheduler', 'artifacts', 'logging', 'projects',
    
    163
    -            'cache'
    
    166
    +            'cache', 'pullbuildtrees'
    
    164 167
             ])
    
    165 168
     
    
    166 169
             for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir']:
    
    ... ... @@ -185,6 +188,9 @@ class Context():
    185 188
             # Load artifact share configuration
    
    186 189
             self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
    
    187 190
     
    
    191
    +        # Load pull buildtrees configuration
    
    192
    +        self.pullbuildtrees = _yaml.node_get(defaults, bool, 'pullbuildtrees', default_value='False')
    
    193
    +
    
    188 194
             # Load logging config
    
    189 195
             logging = _yaml.node_get(defaults, Mapping, 'logging')
    
    190 196
             _yaml.node_validate(logging, [
    

  • buildstream/_frontend/cli.py
    ... ... @@ -305,10 +305,12 @@ def init(app, project_name, format_version, element_path, force):
    305 305
                   help="Allow tracking to cross junction boundaries")
    
    306 306
     @click.option('--track-save', default=False, is_flag=True,
    
    307 307
                   help="Deprecated: This is ignored")
    
    308
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    309
    +              help="Pull buildtrees from a remote cache server")
    
    308 310
     @click.argument('elements', nargs=-1,
    
    309 311
                     type=click.Path(readable=False))
    
    310 312
     @click.pass_obj
    
    311
    -def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions):
    
    313
    +def build(app, elements, all_, track_, track_save, track_all, track_except, track_cross_junctions, pull_buildtrees):
    
    312 314
         """Build elements in a pipeline"""
    
    313 315
     
    
    314 316
         if (track_except or track_cross_junctions) and not (track_ or track_all):
    
    ... ... @@ -327,7 +329,8 @@ def build(app, elements, all_, track_, track_save, track_all, track_except, trac
    327 329
                              track_targets=track_,
    
    328 330
                              track_except=track_except,
    
    329 331
                              track_cross_junctions=track_cross_junctions,
    
    330
    -                         build_all=all_)
    
    332
    +                         build_all=all_,
    
    333
    +                         pull_buildtrees=pull_buildtrees)
    
    331 334
     
    
    332 335
     
    
    333 336
     ##################################################################
    
    ... ... @@ -429,10 +432,12 @@ def track(app, elements, deps, except_, cross_junctions):
    429 432
                   help='The dependency artifacts to pull (default: none)')
    
    430 433
     @click.option('--remote', '-r',
    
    431 434
                   help="The URL of the remote cache (defaults to the first configured cache)")
    
    435
    +@click.option('--pull-buildtrees', default=False, is_flag=True,
    
    436
    +              help="Pull buildtrees from a remote cache server")
    
    432 437
     @click.argument('elements', nargs=-1,
    
    433 438
                     type=click.Path(readable=False))
    
    434 439
     @click.pass_obj
    
    435
    -def pull(app, elements, deps, remote):
    
    440
    +def pull(app, elements, deps, remote, pull_buildtrees):
    
    436 441
         """Pull a built artifact from the configured remote artifact cache.
    
    437 442
     
    
    438 443
         By default the artifact will be pulled one of the configured caches
    
    ... ... @@ -446,7 +451,7 @@ def pull(app, elements, deps, remote):
    446 451
             all:   All dependencies
    
    447 452
         """
    
    448 453
         with app.initialized(session_name="Pull"):
    
    449
    -        app.stream.pull(elements, selection=deps, remote=remote)
    
    454
    +        app.stream.pull(elements, selection=deps, remote=remote, pull_buildtrees=pull_buildtrees)
    
    450 455
     
    
    451 456
     
    
    452 457
     ##################################################################
    

  • buildstream/_loader/loader.py
    ... ... @@ -29,6 +29,7 @@ from .. import _yaml
    29 29
     from ..element import Element
    
    30 30
     from .._profile import Topics, profile_start, profile_end
    
    31 31
     from .._includes import Includes
    
    32
    +from .._yamlcache import YamlCache
    
    32 33
     
    
    33 34
     from .types import Symbol, Dependency
    
    34 35
     from .loadelement import LoadElement
    
    ... ... @@ -108,13 +109,19 @@ class Loader():
    108 109
             #
    
    109 110
             deps = []
    
    110 111
     
    
    111
    -        for target in targets:
    
    112
    -            profile_start(Topics.LOAD_PROJECT, target)
    
    113
    -            junction, name, loader = self._parse_name(target, rewritable, ticker,
    
    114
    -                                                      fetch_subprojects=fetch_subprojects)
    
    115
    -            loader._load_file(name, rewritable, ticker, fetch_subprojects)
    
    116
    -            deps.append(Dependency(name, junction=junction))
    
    117
    -            profile_end(Topics.LOAD_PROJECT, target)
    
    112
    +        # XXX This will need to be changed to the context's top-level project if this method
    
    113
    +        # is ever used for subprojects
    
    114
    +        top_dir = self.project.directory
    
    115
    +
    
    116
    +        cache_file = YamlCache.get_cache_file(top_dir)
    
    117
    +        with YamlCache.open(self._context, cache_file) as yaml_cache:
    
    118
    +            for target in targets:
    
    119
    +                profile_start(Topics.LOAD_PROJECT, target)
    
    120
    +                junction, name, loader = self._parse_name(target, rewritable, ticker,
    
    121
    +                                                          fetch_subprojects=fetch_subprojects)
    
    122
    +                loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    123
    +                deps.append(Dependency(name, junction=junction))
    
    124
    +                profile_end(Topics.LOAD_PROJECT, target)
    
    118 125
     
    
    119 126
             #
    
    120 127
             # Now that we've resolve the dependencies, scan them for circular dependencies
    
    ... ... @@ -201,11 +208,12 @@ class Loader():
    201 208
         #    rewritable (bool): Whether we should load in round trippable mode
    
    202 209
         #    ticker (callable): A callback to report loaded filenames to the frontend
    
    203 210
         #    fetch_subprojects (bool): Whether to fetch subprojects while loading
    
    211
    +    #    yaml_cache (YamlCache): A yaml cache
    
    204 212
         #
    
    205 213
         # Returns:
    
    206 214
         #    (LoadElement): A loaded LoadElement
    
    207 215
         #
    
    208
    -    def _load_file(self, filename, rewritable, ticker, fetch_subprojects):
    
    216
    +    def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None):
    
    209 217
     
    
    210 218
             # Silently ignore already loaded files
    
    211 219
             if filename in self._elements:
    
    ... ... @@ -218,7 +226,8 @@ class Loader():
    218 226
             # Load the data and process any conditional statements therein
    
    219 227
             fullpath = os.path.join(self._basedir, filename)
    
    220 228
             try:
    
    221
    -            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project)
    
    229
    +            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable,
    
    230
    +                              project=self.project, yaml_cache=yaml_cache)
    
    222 231
             except LoadError as e:
    
    223 232
                 if e.reason == LoadErrorReason.MISSING_FILE:
    
    224 233
                     # If we can't find the file, try to suggest plausible
    
    ... ... @@ -261,13 +270,13 @@ class Loader():
    261 270
             # Load all dependency files for the new LoadElement
    
    262 271
             for dep in element.deps:
    
    263 272
                 if dep.junction:
    
    264
    -                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects)
    
    273
    +                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    265 274
                     loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker,
    
    266 275
                                               fetch_subprojects=fetch_subprojects)
    
    267 276
                 else:
    
    268 277
                     loader = self
    
    269 278
     
    
    270
    -            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects)
    
    279
    +            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    271 280
     
    
    272 281
                 if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction':
    
    273 282
                     raise LoadError(LoadErrorReason.INVALID_DATA,
    

  • buildstream/_scheduler/queues/pullqueue.py
    ... ... @@ -32,9 +32,20 @@ class PullQueue(Queue):
    32 32
         complete_name = "Pulled"
    
    33 33
         resources = [ResourceType.DOWNLOAD, ResourceType.CACHE]
    
    34 34
     
    
    35
    +    def __init__(self, scheduler, buildtrees=False):
    
    36
    +        super().__init__(scheduler)
    
    37
    +
    
    38
    +        # Current default exclusions on pull
    
    39
    +        self._excluded_subdirs = ["buildtree"]
    
    40
    +        self._subdir = None
    
    41
    +        # If buildtrees are to be pulled, remove the value from exclusion list
    
    42
    +        if buildtrees:
    
    43
    +            self._subdir = "buildtree"
    
    44
    +            self._excluded_subdirs.remove(self._subdir)
    
    45
    +
    
    35 46
         def process(self, element):
    
    36 47
             # returns whether an artifact was downloaded or not
    
    37
    -        if not element._pull():
    
    48
    +        if not element._pull(subdir=self._subdir, excluded_subdirs=self._excluded_subdirs):
    
    38 49
                 raise SkipJob(self.action_name)
    
    39 50
     
    
    40 51
         def status(self, element):
    
    ... ... @@ -49,7 +60,7 @@ class PullQueue(Queue):
    49 60
             if not element._can_query_cache():
    
    50 61
                 return QueueStatus.WAIT
    
    51 62
     
    
    52
    -        if element._pull_pending():
    
    63
    +        if element._pull_pending(subdir=self._subdir):
    
    53 64
                 return QueueStatus.READY
    
    54 65
             else:
    
    55 66
                 return QueueStatus.SKIP
    

  • buildstream/_stream.py
    ... ... @@ -160,12 +160,14 @@ class Stream():
    160 160
         #    track_cross_junctions (bool): Whether tracking should cross junction boundaries
    
    161 161
         #    build_all (bool): Whether to build all elements, or only those
    
    162 162
         #                      which are required to build the target.
    
    163
    +    #    pull_buildtrees (bool): Whether to pull buildtrees from a remote cache server
    
    163 164
         #
    
    164 165
         def build(self, targets, *,
    
    165 166
                   track_targets=None,
    
    166 167
                   track_except=None,
    
    167 168
                   track_cross_junctions=False,
    
    168
    -              build_all=False):
    
    169
    +              build_all=False,
    
    170
    +              pull_buildtrees=False):
    
    169 171
     
    
    170 172
             if build_all:
    
    171 173
                 selection = PipelineSelection.ALL
    
    ... ... @@ -195,7 +197,10 @@ class Stream():
    195 197
                 self._add_queue(track_queue, track=True)
    
    196 198
     
    
    197 199
             if self._artifacts.has_fetch_remotes():
    
    198
    -            self._add_queue(PullQueue(self._scheduler))
    
    200
    +            # Query if pullbuildtrees has been set globally in user config
    
    201
    +            if self._context.pullbuildtrees:
    
    202
    +                pull_buildtrees = True
    
    203
    +            self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    199 204
     
    
    200 205
             self._add_queue(FetchQueue(self._scheduler, skip_cached=True))
    
    201 206
             self._add_queue(BuildQueue(self._scheduler))
    
    ... ... @@ -295,7 +300,8 @@ class Stream():
    295 300
         #
    
    296 301
         def pull(self, targets, *,
    
    297 302
                  selection=PipelineSelection.NONE,
    
    298
    -             remote=None):
    
    303
    +             remote=None,
    
    304
    +             pull_buildtrees=False):
    
    299 305
     
    
    300 306
             use_config = True
    
    301 307
             if remote:
    
    ... ... @@ -310,8 +316,12 @@ class Stream():
    310 316
             if not self._artifacts.has_fetch_remotes():
    
    311 317
                 raise StreamError("No artifact caches available for pulling artifacts")
    
    312 318
     
    
    319
    +        # Query if pullbuildtrees has been set globally in user config
    
    320
    +        if self._context.pullbuildtrees:
    
    321
    +            pull_buildtrees = True
    
    322
    +
    
    313 323
             self._pipeline.assert_consistent(elements)
    
    314
    -        self._add_queue(PullQueue(self._scheduler))
    
    324
    +        self._add_queue(PullQueue(self._scheduler, buildtrees=pull_buildtrees))
    
    315 325
             self._enqueue_plan(elements)
    
    316 326
             self._run()
    
    317 327
     
    

  • buildstream/_yaml.py
    ... ... @@ -183,20 +183,32 @@ class CompositeTypeError(CompositeError):
    183 183
     #    shortname (str): The filename in shorthand for error reporting (or None)
    
    184 184
     #    copy_tree (bool): Whether to make a copy, preserving the original toplevels
    
    185 185
     #                      for later serialization
    
    186
    +#    yaml_cache (YamlCache): A yaml cache to consult rather than parsing
    
    186 187
     #
    
    187 188
     # Returns (dict): A loaded copy of the YAML file with provenance information
    
    188 189
     #
    
    189 190
     # Raises: LoadError
    
    190 191
     #
    
    191
    -def load(filename, shortname=None, copy_tree=False, *, project=None):
    
    192
    +def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
    
    192 193
         if not shortname:
    
    193 194
             shortname = filename
    
    194 195
     
    
    195 196
         file = ProvenanceFile(filename, shortname, project)
    
    196 197
     
    
    197 198
         try:
    
    199
    +        data = None
    
    198 200
             with open(filename) as f:
    
    199
    -            return load_data(f, file, copy_tree=copy_tree)
    
    201
    +            contents = f.read()
    
    202
    +        if yaml_cache:
    
    203
    +            data, key = yaml_cache.get(project, filename, contents, copy_tree)
    
    204
    +
    
    205
    +        if not data:
    
    206
    +            data = load_data(contents, file, copy_tree=copy_tree)
    
    207
    +
    
    208
    +        if yaml_cache:
    
    209
    +            yaml_cache.put_from_key(project, filename, key, data)
    
    210
    +
    
    211
    +        return data
    
    200 212
         except FileNotFoundError as e:
    
    201 213
             raise LoadError(LoadErrorReason.MISSING_FILE,
    
    202 214
                             "Could not find file at {}".format(filename)) from e
    

  • buildstream/_yamlcache.py
    1
    +#
    
    2
    +#  Copyright 2018 Bloomberg Finance LP
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Jonathan Maw <jonathan maw codethink co uk>
    
    19
    +
    
    20
    +import os
    
    21
    +import pickle
    
    22
    +import hashlib
    
    23
    +import io
    
    24
    +
    
    25
    +import sys
    
    26
    +
    
    27
    +from contextlib import contextmanager
    
    28
    +from collections import namedtuple
    
    29
    +
    
    30
    +from ._cachekey import generate_key
    
    31
    +from ._context import Context
    
    32
    +from . import utils, _yaml
    
    33
    +
    
    34
    +
    
    35
    +YAML_CACHE_FILENAME = "yaml_cache.pickle"
    
    36
    +
    
    37
    +
    
    38
    +# YamlCache()
    
    39
    +#
    
    40
    +# A cache that wraps around the loading of yaml in projects.
    
    41
    +#
    
    42
    +# The recommended way to use a YamlCache is:
    
    43
    +#   with YamlCache.open(context) as yamlcache:
    
    44
    +#     # Load all the yaml
    
    45
    +#     ...
    
    46
    +#
    
    47
    +# Args:
    
    48
    +#    context (Context): The invocation Context
    
    49
    +#
    
    50
    +class YamlCache():
    
    51
    +
    
    52
    +    def __init__(self, context):
    
    53
    +        self._project_caches = {}
    
    54
    +        self._context = context
    
    55
    +
    
    56
    +    ##################
    
    57
    +    # Public Methods #
    
    58
    +    ##################
    
    59
    +
    
    60
    +    # is_cached():
    
    61
    +    #
    
    62
    +    # Checks whether a file is cached.
    
    63
    +    #
    
    64
    +    # Args:
    
    65
    +    #    project (Project): The project this file is in.
    
    66
    +    #    filepath (str): The path to the file, *relative to the project's directory*.
    
    67
    +    #
    
    68
    +    # Returns:
    
    69
    +    #    (bool): Whether the file is cached.
    
    70
    +    def is_cached(self, project, filepath):
    
    71
    +        cache_path = self._get_filepath(project, filepath)
    
    72
    +        project_name = project.name if project else ""
    
    73
    +        try:
    
    74
    +            project_cache = self._project_caches[project_name]
    
    75
    +            if cache_path in project_cache.elements:
    
    76
    +                return True
    
    77
    +        except KeyError:
    
    78
    +            pass
    
    79
    +        return False
    
    80
    +
    
    81
    +    # open():
    
    82
    +    #
    
    83
    +    # Return an instance of the YamlCache which writes to disk when it leaves scope.
    
    84
    +    #
    
    85
    +    # Args:
    
    86
    +    #    context (Context): The context.
    
    87
    +    #    cachefile (str): The path to the cache file.
    
    88
    +    #
    
    89
    +    # Returns:
    
    90
    +    #    (YamlCache): A YamlCache.
    
    91
    +    @staticmethod
    
    92
    +    @contextmanager
    
    93
    +    def open(context, cachefile):
    
    94
    +        # Try to load from disk first
    
    95
    +        cache = None
    
    96
    +        if os.path.exists(cachefile):
    
    97
    +            try:
    
    98
    +                with open(cachefile, "rb") as f:
    
    99
    +                    cache = BstUnpickler(f, context).load()
    
    100
    +            except EOFError:
    
    101
    +                # The file was empty
    
    102
    +                pass
    
    103
    +            except pickle.UnpicklingError as e:
    
    104
    +                sys.stderr.write("Failed to load YamlCache, {}\n".format(e))
    
    105
    +
    
    106
    +        # Failed to load from disk, create a new one
    
    107
    +        if not cache:
    
    108
    +            cache = YamlCache(context)
    
    109
    +
    
    110
    +        yield cache
    
    111
    +
    
    112
    +        cache._write(cachefile)
    
    113
    +
    
    114
    +    # get_cache_file():
    
    115
    +    #
    
    116
    +    # Retrieves a path to the yaml cache file.
    
    117
    +    #
    
    118
    +    # Returns:
    
    119
    +    #   (str): The path to the cache file
    
    120
    +    @staticmethod
    
    121
    +    def get_cache_file(top_dir):
    
    122
    +        return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME)
    
    123
    +
    
    124
    +    # get():
    
    125
    +    #
    
    126
    +    # Gets a parsed file from the cache.
    
    127
    +    #
    
    128
    +    # Args:
    
    129
    +    #    project (Project) or None: The project this file is in, if it exists.
    
    130
    +    #    filepath (str): The absolute path to the file.
    
    131
    +    #    contents (str): The contents of the file to be cached
    
    132
    +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    133
    +    #                      (i.e. exactly as when called in yaml)
    
    134
    +    #
    
    135
    +    # Returns:
    
    136
    +    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    
    137
    +    #    (str):            The key used to look up the parsed yaml in the cache
    
    138
    +    def get(self, project, filepath, contents, copy_tree):
    
    139
    +        key = self._calculate_key(contents, copy_tree)
    
    140
    +        data = self._get(project, filepath, key)
    
    141
    +        return data, key
    
    142
    +
    
    143
    +    # put():
    
    144
    +    #
    
    145
    +    # Puts a parsed file into the cache.
    
    146
    +    #
    
    147
    +    # Args:
    
    148
    +    #    project (Project): The project this file is in.
    
    149
    +    #    filepath (str): The path to the file.
    
    150
    +    #    contents (str): The contents of the file that has been cached
    
    151
    +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    152
    +    #                      (i.e. exactly as when called in yaml)
    
    153
    +    #    value (decorated dict): The data to put into the cache.
    
    154
    +    def put(self, project, filepath, contents, copy_tree, value):
    
    155
    +        key = self._calculate_key(contents, copy_tree)
    
    156
    +        self.put_from_key(project, filepath, key, value)
    
    157
    +
    
    158
    +    # put_from_key():
    
    159
    +    #
    
    160
    +    # Put a parsed file into the cache when given a key.
    
    161
    +    #
    
    162
    +    # Args:
    
    163
    +    #    project (Project): The project this file is in.
    
    164
    +    #    filepath (str): The path to the file.
    
    165
    +    #    key (str): The key to the file within the cache. Typically, this is the
    
    166
    +    #               value of `calculate_key()` with the file's unparsed contents
    
    167
    +    #               and any relevant metadata passed in.
    
    168
    +    #    value (decorated dict): The data to put into the cache.
    
    169
    +    def put_from_key(self, project, filepath, key, value):
    
    170
    +        cache_path = self._get_filepath(project, filepath)
    
    171
    +        project_name = project.name if project else ""
    
    172
    +        try:
    
    173
    +            project_cache = self._project_caches[project_name]
    
    174
    +        except KeyError:
    
    175
    +            project_cache = self._project_caches[project_name] = CachedProject({})
    
    176
    +
    
    177
    +        project_cache.elements[cache_path] = CachedYaml(key, value)
    
    178
    +
    
    179
    +    ###################
    
    180
    +    # Private Methods #
    
    181
    +    ###################
    
    182
    +
    
    183
    +    # Writes the yaml cache to the specified path.
    
    184
    +    #
    
    185
    +    # Args:
    
    186
    +    #    path (str): The path to the cache file.
    
    187
    +    def _write(self, path):
    
    188
    +        parent_dir = os.path.dirname(path)
    
    189
    +        os.makedirs(parent_dir, exist_ok=True)
    
    190
    +        with open(path, "wb") as f:
    
    191
    +            BstPickler(f).dump(self)
    
    192
    +
    
    193
    +    # _get_filepath():
    
    194
    +    #
    
    195
    +    # Returns a file path relative to a project if passed, or the original path if
    
    196
    +    # the project is None
    
    197
    +    #
    
    198
    +    # Args:
    
    199
    +    #    project (Project) or None: The project the filepath exists within
    
    200
    +    #    full_path (str): The path that the returned path is based on
    
    201
    +    #
    
    202
    +    # Returns:
    
    203
    +    #    (str): The path to the file, relative to a project if it exists
    
    204
    +    def _get_filepath(self, project, full_path):
    
    205
    +        if project:
    
    206
    +            assert full_path.startswith(project.directory)
    
    207
    +            filepath = os.path.relpath(full_path, project.directory)
    
    208
    +        else:
    
    209
    +            filepath = full_path
    
    210
    +        return full_path
    
    211
    +
    
    212
    +    # _calculate_key():
    
    213
    +    #
    
    214
    +    # Calculates a key for putting into the cache.
    
    215
    +    #
    
    216
    +    # Args:
    
    217
    +    #    (basic object)... : Any number of strictly-ordered basic objects
    
    218
    +    #
    
    219
    +    # Returns:
    
    220
    +    #   (str): A key made out of every arg passed in
    
    221
    +    @staticmethod
    
    222
    +    def _calculate_key(*args):
    
    223
    +        string = pickle.dumps(args)
    
    224
    +        return hashlib.sha1(string).hexdigest()
    
    225
    +
    
    226
    +    # _get():
    
    227
    +    #
    
    228
    +    # Gets a parsed file from the cache when given a key.
    
    229
    +    #
    
    230
    +    # Args:
    
    231
    +    #    project (Project): The project this file is in.
    
    232
    +    #    filepath (str): The path to the file.
    
    233
    +    #    key (str): The key to the file within the cache. Typically, this is the
    
    234
    +    #               value of `calculate_key()` with the file's unparsed contents
    
    235
    +    #               and any relevant metadata passed in.
    
    236
    +    #
    
    237
    +    # Returns:
    
    238
    +    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    
    239
    +    def _get(self, project, filepath, key):
    
    240
    +        cache_path = self._get_filepath(project, filepath)
    
    241
    +        project_name = project.name if project else ""
    
    242
    +        try:
    
    243
    +            project_cache = self._project_caches[project_name]
    
    244
    +            try:
    
    245
    +                cachedyaml = project_cache.elements[cache_path]
    
    246
    +                if cachedyaml._key == key:
    
    247
    +                    # We've unpickled the YamlCache, but not the specific file
    
    248
    +                    if cachedyaml._contents is None:
    
    249
    +                        cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context)
    
    250
    +                    return cachedyaml._contents
    
    251
    +            except KeyError:
    
    252
    +                pass
    
    253
    +        except KeyError:
    
    254
    +            pass
    
    255
    +        return None
    
    256
    +
    
    257
    +
    
    258
    +CachedProject = namedtuple('CachedProject', ['elements'])
    
    259
    +
    
    260
    +
    
    261
    +class CachedYaml():
    
    262
    +    def __init__(self, key, contents):
    
    263
    +        self._key = key
    
    264
    +        self.set_contents(contents)
    
    265
    +
    
    266
    +    # Sets the contents of the CachedYaml.
    
    267
    +    #
    
    268
    +    # Args:
    
    269
    +    #    contents (provenanced dict): The contents to put in the cache.
    
    270
    +    #
    
    271
    +    def set_contents(self, contents):
    
    272
    +        self._contents = contents
    
    273
    +        self._pickled_contents = BstPickler.dumps(contents)
    
    274
    +
    
    275
    +    # Pickling helper method, prevents 'contents' from being serialised
    
    276
    +    def __getstate__(self):
    
    277
    +        data = self.__dict__.copy()
    
    278
    +        data['_contents'] = None
    
    279
    +        return data
    
    280
    +
    
    281
    +
    
    282
    +# In _yaml.load, we have a ProvenanceFile that stores the project the file
    
    283
    +# came from. Projects can't be pickled, but it's always going to be the same
    
    284
    +# project between invocations (unless the entire project is moved but the
    
    285
    +# file stayed in the same place)
    
    286
    +class BstPickler(pickle.Pickler):
    
    287
    +    def persistent_id(self, obj):
    
    288
    +        if isinstance(obj, _yaml.ProvenanceFile):
    
    289
    +            if obj.project:
    
    290
    +                # ProvenanceFile's project object cannot be stored as it is.
    
    291
    +                project_tag = obj.project.name
    
    292
    +                # ProvenanceFile's filename must be stored relative to the
    
    293
    +                # project, as the project dir may move.
    
    294
    +                name = os.path.relpath(obj.name, obj.project.directory)
    
    295
    +            else:
    
    296
    +                project_tag = None
    
    297
    +                name = obj.name
    
    298
    +            return ("ProvenanceFile", name, obj.shortname, project_tag)
    
    299
    +        elif isinstance(obj, Context):
    
    300
    +            return ("Context",)
    
    301
    +        else:
    
    302
    +            return None
    
    303
    +
    
    304
    +    @staticmethod
    
    305
    +    def dumps(obj):
    
    306
    +        stream = io.BytesIO()
    
    307
    +        BstPickler(stream).dump(obj)
    
    308
    +        stream.seek(0)
    
    309
    +        return stream.read()
    
    310
    +
    
    311
    +
    
    312
    +class BstUnpickler(pickle.Unpickler):
    
    313
    +    def __init__(self, file, context):
    
    314
    +        super().__init__(file)
    
    315
    +        self._context = context
    
    316
    +
    
    317
    +    def persistent_load(self, pid):
    
    318
    +        if pid[0] == "ProvenanceFile":
    
    319
    +            _, tagged_name, shortname, project_tag = pid
    
    320
    +
    
    321
    +            if project_tag is not None:
    
    322
    +                for p in self._context.get_projects():
    
    323
    +                    if project_tag == p.name:
    
    324
    +                        project = p
    
    325
    +                        break
    
    326
    +
    
    327
    +                name = os.path.join(project.directory, tagged_name)
    
    328
    +
    
    329
    +                if not project:
    
    330
    +                    projects = [p.name for p in self._context.get_projects()]
    
    331
    +                    raise pickle.UnpicklingError("No project with name {} found in {}"
    
    332
    +                                                 .format(key_id, projects))
    
    333
    +            else:
    
    334
    +                project = None
    
    335
    +                name = tagged_name
    
    336
    +
    
    337
    +            return _yaml.ProvenanceFile(name, shortname, project)
    
    338
    +        elif pid[0] == "Context":
    
    339
    +            return self._context
    
    340
    +        else:
    
    341
    +            raise pickle.UnpicklingError("Unsupported persistent object, {}".format(pid))
    
    342
    +
    
    343
    +    @staticmethod
    
    344
    +    def loads(text, context):
    
    345
    +        stream = io.BytesIO()
    
    346
    +        stream.write(bytes(text))
    
    347
    +        stream.seek(0)
    
    348
    +        return BstUnpickler(stream, context).load()

  • buildstream/element.py
    ... ... @@ -1692,18 +1692,26 @@ class Element(Plugin):
    1692 1692
     
    
    1693 1693
         # _pull_pending()
    
    1694 1694
         #
    
    1695
    -    # Check whether the artifact will be pulled.
    
    1695
    +    # Check whether the artifact will be pulled. If the pull operation is to
    
    1696
    +    # include a specific subdir of the element artifact (from cli or user conf)
    
    1697
    +    # then the local cache is queried for the subdirs existence.
    
    1698
    +    #
    
    1699
    +    # Args:
    
    1700
    +    #    subdir (str): Whether the pull has been invoked with a specific subdir set
    
    1696 1701
         #
    
    1697 1702
         # Returns:
    
    1698 1703
         #   (bool): Whether a pull operation is pending
    
    1699 1704
         #
    
    1700
    -    def _pull_pending(self):
    
    1705
    +    def _pull_pending(self, subdir=None):
    
    1701 1706
             if self._get_workspace():
    
    1702 1707
                 # Workspace builds are never pushed to artifact servers
    
    1703 1708
                 return False
    
    1704 1709
     
    
    1705
    -        if self.__strong_cached:
    
    1706
    -            # Artifact already in local cache
    
    1710
    +        if self.__strong_cached and subdir:
    
    1711
    +            # If we've specified a subdir, check if the subdir is cached locally
    
    1712
    +            if self.__artifacts.contains_subdir_artifact(self, self.__strict_cache_key, subdir):
    
    1713
    +                return False
    
    1714
    +        elif self.__strong_cached:
    
    1707 1715
                 return False
    
    1708 1716
     
    
    1709 1717
             # Pull is pending if artifact remote server available
    
    ... ... @@ -1725,11 +1733,10 @@ class Element(Plugin):
    1725 1733
     
    
    1726 1734
             self._update_state()
    
    1727 1735
     
    
    1728
    -    def _pull_strong(self, *, progress=None):
    
    1736
    +    def _pull_strong(self, *, progress=None, subdir=None, excluded_subdirs=None):
    
    1729 1737
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1730
    -
    
    1731 1738
             key = self.__strict_cache_key
    
    1732
    -        if not self.__artifacts.pull(self, key, progress=progress):
    
    1739
    +        if not self.__artifacts.pull(self, key, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
    
    1733 1740
                 return False
    
    1734 1741
     
    
    1735 1742
             # update weak ref by pointing it to this newly fetched artifact
    
    ... ... @@ -1737,10 +1744,10 @@ class Element(Plugin):
    1737 1744
     
    
    1738 1745
             return True
    
    1739 1746
     
    
    1740
    -    def _pull_weak(self, *, progress=None):
    
    1747
    +    def _pull_weak(self, *, progress=None, subdir=None, excluded_subdirs=None):
    
    1741 1748
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1742
    -
    
    1743
    -        if not self.__artifacts.pull(self, weak_key, progress=progress):
    
    1749
    +        if not self.__artifacts.pull(self, weak_key, progress=progress, subdir=subdir,
    
    1750
    +                                     excluded_subdirs=excluded_subdirs):
    
    1744 1751
                 return False
    
    1745 1752
     
    
    1746 1753
             # extract strong cache key from this newly fetched artifact
    
    ... ... @@ -1758,17 +1765,17 @@ class Element(Plugin):
    1758 1765
         #
    
    1759 1766
         # Returns: True if the artifact has been downloaded, False otherwise
    
    1760 1767
         #
    
    1761
    -    def _pull(self):
    
    1768
    +    def _pull(self, subdir=None, excluded_subdirs=None):
    
    1762 1769
             context = self._get_context()
    
    1763 1770
     
    
    1764 1771
             def progress(percent, message):
    
    1765 1772
                 self.status(message)
    
    1766 1773
     
    
    1767 1774
             # Attempt to pull artifact without knowing whether it's available
    
    1768
    -        pulled = self._pull_strong(progress=progress)
    
    1775
    +        pulled = self._pull_strong(progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs)
    
    1769 1776
     
    
    1770 1777
             if not pulled and not self._cached() and not context.get_strict():
    
    1771
    -            pulled = self._pull_weak(progress=progress)
    
    1778
    +            pulled = self._pull_weak(progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs)
    
    1772 1779
     
    
    1773 1780
             if not pulled:
    
    1774 1781
                 return False
    
    ... ... @@ -1791,10 +1798,21 @@ class Element(Plugin):
    1791 1798
             if not self._cached():
    
    1792 1799
                 return True
    
    1793 1800
     
    
    1794
    -        # Do not push tained artifact
    
    1801
    +        # Do not push tainted artifact
    
    1795 1802
             if self.__get_tainted():
    
    1796 1803
                 return True
    
    1797 1804
     
    
    1805
    +        # Do not push elements that have a dangling buildtree artifact unless element type is
    
    1806
    +        # expected to have an empty buildtree directory
    
    1807
    +        if not self.__artifacts.contains_subdir_artifact(self, self.__strict_cache_key, 'buildtree'):
    
    1808
    +            return True
    
    1809
    +
    
    1810
    +        # strict_cache_key can't be relied on to be available when running in non strict mode
    
    1811
    +        context = self._get_context()
    
    1812
    +        if not context.get_strict():
    
    1813
    +            if not self.__artifacts.contains_subdir_artifact(self, self.__weak_cache_key, 'buildtree'):
    
    1814
    +                return True
    
    1815
    +
    
    1798 1816
             return False
    
    1799 1817
     
    
    1800 1818
         # _push():
    

  • tests/completions/completions.py
    ... ... @@ -103,7 +103,7 @@ def test_commands(cli, cmd, word_idx, expected):
    103 103
         ('bst --no-colors build -', 3, ['--all ', '--track ', '--track-all ',
    
    104 104
                                         '--track-except ',
    
    105 105
                                         '--track-cross-junctions ', '-J ',
    
    106
    -                                    '--track-save ']),
    
    106
    +                                    '--track-save ', '--pull-buildtrees ']),
    
    107 107
     
    
    108 108
         # Test the behavior of completing after an option that has a
    
    109 109
         # parameter that cannot be completed, vs an option that has
    

  • tests/frontend/yamlcache.py
    1
    +import os
    
    2
    +import pytest
    
    3
    +import hashlib
    
    4
    +import tempfile
    
    5
    +from ruamel import yaml
    
    6
    +
    
    7
    +from tests.testutils import cli, generate_junction, create_element_size, create_repo
    
    8
    +from buildstream import _yaml
    
    9
    +from buildstream._yamlcache import YamlCache
    
    10
    +from buildstream._project import Project
    
    11
    +from buildstream._context import Context
    
    12
    +from contextlib import contextmanager
    
    13
    +
    
    14
    +
    
    15
    +def generate_project(tmpdir, ref_storage, with_junction, name="test"):
    
    16
    +    if with_junction == 'junction':
    
    17
    +        subproject_dir = generate_project(
    
    18
    +            tmpdir, ref_storage,
    
    19
    +            'no-junction', name='test-subproject'
    
    20
    +        )
    
    21
    +
    
    22
    +    project_dir = os.path.join(tmpdir, name)
    
    23
    +    os.makedirs(project_dir)
    
    24
    +    # project.conf
    
    25
    +    project_conf_path = os.path.join(project_dir, 'project.conf')
    
    26
    +    elements_path = 'elements'
    
    27
    +    project_conf = {
    
    28
    +        'name': name,
    
    29
    +        'element-path': elements_path,
    
    30
    +        'ref-storage': ref_storage,
    
    31
    +    }
    
    32
    +    _yaml.dump(project_conf, project_conf_path)
    
    33
    +
    
    34
    +    # elements
    
    35
    +    if with_junction == 'junction':
    
    36
    +        junction_name = 'junction.bst'
    
    37
    +        junction_dir = os.path.join(project_dir, elements_path)
    
    38
    +        junction_path = os.path.join(project_dir, elements_path, junction_name)
    
    39
    +        os.makedirs(junction_dir)
    
    40
    +        generate_junction(tmpdir, subproject_dir, junction_path)
    
    41
    +        element_depends = [{'junction': junction_name, 'filename': 'test.bst'}]
    
    42
    +    else:
    
    43
    +        element_depends = []
    
    44
    +
    
    45
    +    element_name = 'test.bst'
    
    46
    +    create_element_size(element_name, project_dir, elements_path, element_depends, 1)
    
    47
    +
    
    48
    +    return project_dir
    
    49
    +
    
    50
    +
    
    51
    +@contextmanager
    
    52
    +def with_yamlcache(project_dir):
    
    53
    +    context = Context()
    
    54
    +    project = Project(project_dir, context)
    
    55
    +    cache_file = YamlCache.get_cache_file(project_dir)
    
    56
    +    with YamlCache.open(context, cache_file) as yamlcache:
    
    57
    +        yield yamlcache, project
    
    58
    +
    
    59
    +
    
    60
    +def yamlcache_key(yamlcache, in_file, copy_tree=False):
    
    61
    +    with open(in_file) as f:
    
    62
    +        key = yamlcache._calculate_key(f.read(), copy_tree)
    
    63
    +    return key
    
    64
    +
    
    65
    +
    
    66
    +def modified_file(input_file, tmpdir):
    
    67
    +    with open(input_file) as f:
    
    68
    +        data = f.read()
    
    69
    +    assert 'variables' not in data
    
    70
    +    data += '\nvariables: {modified: True}\n'
    
    71
    +    _, temppath = tempfile.mkstemp(dir=tmpdir, text=True)
    
    72
    +    with open(temppath, 'w') as f:
    
    73
    +        f.write(data)
    
    74
    +
    
    75
    +    return temppath
    
    76
    +
    
    77
    +
    
    78
    +@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
    
    79
    +@pytest.mark.parametrize('with_junction', ['no-junction', 'junction'])
    
    80
    +@pytest.mark.parametrize('move_project', ['move', 'no-move'])
    
    81
    +def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
    
    82
    +    # Generate the project
    
    83
    +    project = generate_project(str(tmpdir), ref_storage, with_junction)
    
    84
    +    if with_junction == 'junction':
    
    85
    +        result = cli.run(project=project, args=['fetch', '--track', 'junction.bst'])
    
    86
    +        result.assert_success()
    
    87
    +
    
    88
    +    # bst show to put it in the cache
    
    89
    +    result = cli.run(project=project, args=['show', 'test.bst'])
    
    90
    +    result.assert_success()
    
    91
    +
    
    92
    +    element_path = os.path.join(project, 'elements', 'test.bst')
    
    93
    +    with with_yamlcache(project) as (yc, prj):
    
    94
    +        # Check that it's in the cache
    
    95
    +        assert yc.is_cached(prj, element_path)
    
    96
    +
    
    97
    +        # *Absolutely* horrible cache corruption to check it's being used
    
    98
    +        # Modifying the data from the cache is fraught with danger,
    
    99
    +        # so instead I'll load a modified version of the original file
    
    100
    +        temppath = modified_file(element_path, str(tmpdir))
    
    101
    +        contents = _yaml.load(temppath, copy_tree=False, project=prj)
    
    102
    +        key = yamlcache_key(yc, element_path)
    
    103
    +        yc.put_from_key(prj, element_path, key, contents)
    
    104
    +
    
    105
    +    # Show that a variable has been added
    
    106
    +    result = cli.run(project=project, args=['show', '--format', '%{vars}', 'test.bst'])
    
    107
    +    result.assert_success()
    
    108
    +    data = yaml.safe_load(result.output)
    
    109
    +    assert 'modified' in data
    
    110
    +    assert data['modified'] == 'True'
    
    111
    +
    
    112
    +
    
    113
    +@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
    
    114
    +@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
    
    115
    +def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction):
    
    116
    +    # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
    
    117
    +    # inline and junction can only be changed by opening a workspace
    
    118
    +    # Generate the project
    
    119
    +    project = generate_project(str(tmpdir), ref_storage, with_junction)
    
    120
    +    if with_junction == 'junction':
    
    121
    +        result = cli.run(project=project, args=['fetch', '--track', 'junction.bst'])
    
    122
    +        result.assert_success()
    
    123
    +
    
    124
    +    # bst show to put it in the cache
    
    125
    +    result = cli.run(project=project, args=['show', 'test.bst'])
    
    126
    +    result.assert_success()
    
    127
    +
    
    128
    +    element_path = os.path.join(project, 'elements', 'test.bst')
    
    129
    +    with with_yamlcache(project) as (yc, prj):
    
    130
    +        # Check that it's in the cache then modify
    
    131
    +        assert yc.is_cached(prj, element_path)
    
    132
    +        with open(element_path, "a") as f:
    
    133
    +            f.write('\nvariables: {modified: True}\n')
    
    134
    +        # Load modified yaml cache file into cache
    
    135
    +        _yaml.load(element_path, copy_tree=False, project=prj, yaml_cache=yc)
    
    136
    +
    
    137
    +    # Show that a variable has been added
    
    138
    +    result = cli.run(project=project, args=['show', '--format', '%{vars}', 'test.bst'])
    
    139
    +    result.assert_success()
    
    140
    +    data = yaml.safe_load(result.output)
    
    141
    +    assert 'modified' in data
    
    142
    +    assert data['modified'] == 'True'

  • tests/integration/pullbuildtrees.py
    1
    +import os
    
    2
    +import shutil
    
    3
    +import pytest
    
    4
    +
    
    5
    +from tests.testutils import cli_integration as cli, create_artifact_share
    
    6
    +from tests.testutils.integration import assert_contains
    
    7
    +
    
    8
    +
    
    9
    +DATA_DIR = os.path.join(
    
    10
    +    os.path.dirname(os.path.realpath(__file__)),
    
    11
    +    "project"
    
    12
    +)
    
    13
    +
    
    14
    +
    
    15
    +# Remove artifact cache & set cli.config value of pullbuildtrees
    
    16
    +# to false, which is the default user context
    
    17
    +def default_state(cli, integration_cache, share):
    
    18
    +    shutil.rmtree(os.path.join(integration_cache, 'artifacts'))
    
    19
    +    cli.configure({'pullbuildtrees': False, 'artifacts': {'url': share.repo, 'push': False}})
    
    20
    +
    
    21
    +
    
    22
    +# A test to capture the integration of the pullbuildtrees
    
    23
    +# behaviour, which by default is to not include the buildtree
    
    24
    +# directory of an element
    
    25
    +@pytest.mark.integration
    
    26
    +@pytest.mark.datafiles(DATA_DIR)
    
    27
    +def test_pullbuildtrees(cli, tmpdir, datafiles, integration_cache):
    
    28
    +
    
    29
    +    # Ensure artifact cache is purged, as we can't have dangling refs/objects
    
    30
    +    shutil.rmtree(os.path.join(integration_cache, 'artifacts'))
    
    31
    +
    
    32
    +    project = os.path.join(datafiles.dirname, datafiles.basename)
    
    33
    +    element_name = 'autotools/amhello.bst'
    
    34
    +
    
    35
    +    # Create artifact shares for pull & push testing
    
    36
    +    with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
    
    37
    +        create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2:
    
    38
    +        cli.configure({
    
    39
    +            'artifacts': {'url': share1.repo, 'push': True},
    
    40
    +        })
    
    41
    +
    
    42
    +        # Build autotools element, checked pushed, delete local
    
    43
    +        result = cli.run(project=project, args=['build', element_name])
    
    44
    +        assert result.exit_code == 0
    
    45
    +        assert cli.get_element_state(project, element_name) == 'cached'
    
    46
    +        assert share1.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    47
    +        default_state(cli, integration_cache, share1)
    
    48
    +
    
    49
    +        # Pull artifact with default config, assert that pulling again
    
    50
    +        # doesn't create a pull job, then assert with buildtrees user
    
    51
    +        # config set creates a pull job.
    
    52
    +        result = cli.run(project=project, args=['pull', element_name])
    
    53
    +        assert element_name in result.get_pulled_elements()
    
    54
    +        result = cli.run(project=project, args=['pull', element_name])
    
    55
    +        assert element_name not in result.get_pulled_elements()
    
    56
    +        cli.configure({'pullbuildtrees': True})
    
    57
    +        result = cli.run(project=project, args=['pull', element_name])
    
    58
    +        assert element_name in result.get_pulled_elements()
    
    59
    +        default_state(cli, integration_cache, share1)
    
    60
    +
    
    61
    +        # Pull artifact with default config, then assert that pulling
    
    62
    +        # with buildtrees cli flag set creates a pull job.
    
    63
    +        result = cli.run(project=project, args=['pull', element_name])
    
    64
    +        assert element_name in result.get_pulled_elements()
    
    65
    +        result = cli.run(project=project, args=['pull', '--pull-buildtrees', element_name])
    
    66
    +        assert element_name in result.get_pulled_elements()
    
    67
    +        default_state(cli, integration_cache, share1)
    
    68
    +
    
    69
    +        # Pull artifact with pullbuildtrees set in user config, then assert
    
    70
    +        # that pulling with the same user config doesn't creates a pull job,
    
    71
    +        # or when buildtrees cli flag is set.
    
    72
    +        cli.configure({'pullbuildtrees': True})
    
    73
    +        result = cli.run(project=project, args=['pull', element_name])
    
    74
    +        assert element_name in result.get_pulled_elements()
    
    75
    +        result = cli.run(project=project, args=['pull', element_name])
    
    76
    +        assert element_name not in result.get_pulled_elements()
    
    77
    +        result = cli.run(project=project, args=['pull', '--pull-buildtrees', element_name])
    
    78
    +        assert element_name not in result.get_pulled_elements()
    
    79
    +        default_state(cli, integration_cache, share1)
    
    80
    +
    
    81
    +        # Pull artifact with default config and buildtrees cli flag set, then assert
    
    82
    +        # that pulling with pullbuildtrees set in user config doesn't create a pull
    
    83
    +        # job.
    
    84
    +        result = cli.run(project=project, args=['pull', '--pull-buildtrees', element_name])
    
    85
    +        assert element_name in result.get_pulled_elements()
    
    86
    +        cli.configure({'pullbuildtrees': True})
    
    87
    +        result = cli.run(project=project, args=['pull', element_name])
    
    88
    +        assert element_name not in result.get_pulled_elements()
    
    89
    +        default_state(cli, integration_cache, share1)
    
    90
    +
    
    91
    +        # Assert that a partial build element (not containing a populated buildtree dir)
    
    92
    +        # can't be pushed to an artifact share, then assert that a complete build element
    
    93
    +        # can be. This will attempt a partial pull from share1 and then a partial push
    
    94
    +        # to share2
    
    95
    +        result = cli.run(project=project, args=['pull', element_name])
    
    96
    +        assert element_name in result.get_pulled_elements()
    
    97
    +        cli.configure({'artifacts': {'url': share2.repo, 'push': True}})
    
    98
    +        result = cli.run(project=project, args=['push', element_name])
    
    99
    +        assert element_name not in result.get_pushed_elements()
    
    100
    +        assert not share2.has_artifact('test', element_name, cli.get_element_key(project, element_name))
    
    101
    +
    
    102
    +        # Assert that after pulling the missing buildtree the element artifact can be
    
    103
    +        # successfully pushed to the remote. This will attempt to pull the buildtree
    
    104
    +        # from share1 and then a 'complete' push to share2
    
    105
    +        cli.configure({'artifacts': {'url': share1.repo, 'push': False}})
    
    106
    +        result = cli.run(project=project, args=['pull', '--pull-buildtrees', element_name])
    
    107
    +        assert element_name in result.get_pulled_elements()
    
    108
    +        cli.configure({'artifacts': {'url': share2.repo, 'push': True}})
    
    109
    +        result = cli.run(project=project, args=['push', element_name])
    
    110
    +        assert element_name in result.get_pushed_elements()
    
    111
    +        assert share2.has_artifact('test', element_name, cli.get_element_key(project, element_name))

  • tests/testutils/artifactshare.py
    ... ... @@ -128,7 +128,7 @@ class ArtifactShare():
    128 128
     
    
    129 129
             valid_chars = string.digits + string.ascii_letters + '-._'
    
    130 130
             element_name = ''.join([
    
    131
    -            x if x in valid_chars else '_'
    
    131
    +            x if x in valid_chars else '-'
    
    132 132
                 for x in element_name
    
    133 133
             ])
    
    134 134
             artifact_key = '{0}/{1}/{2}'.format(project_name, element_name, cache_key)
    

  • tests/yaml/yaml.py
    1 1
     import os
    
    2 2
     import pytest
    
    3
    +import tempfile
    
    3 4
     from collections import Mapping
    
    4 5
     
    
    5 6
     from buildstream import _yaml
    
    6 7
     from buildstream._exceptions import LoadError, LoadErrorReason
    
    8
    +from buildstream._context import Context
    
    9
    +from buildstream._yamlcache import YamlCache
    
    7 10
     
    
    8 11
     DATA_DIR = os.path.join(
    
    9 12
         os.path.dirname(os.path.realpath(__file__)),
    
    ... ... @@ -150,6 +153,21 @@ def test_composite_preserve_originals(datafiles):
    150 153
         assert(_yaml.node_get(orig_extra, str, 'old') == 'new')
    
    151 154
     
    
    152 155
     
    
    156
    +def load_yaml_file(filename, *, cache_path, shortname=None, from_cache='raw'):
    
    157
    +
    
    158
    +    _, temppath = tempfile.mkstemp(dir=os.path.join(cache_path.dirname, cache_path.basename), text=True)
    
    159
    +    context = Context()
    
    160
    +
    
    161
    +    with YamlCache.open(context, temppath) as yc:
    
    162
    +        if from_cache == 'raw':
    
    163
    +            return _yaml.load(filename, shortname)
    
    164
    +        elif from_cache == 'cached':
    
    165
    +            _yaml.load(filename, shortname, yaml_cache=yc)
    
    166
    +            return _yaml.load(filename, shortname, yaml_cache=yc)
    
    167
    +        else:
    
    168
    +            assert False
    
    169
    +
    
    170
    +
    
    153 171
     # Tests for list composition
    
    154 172
     #
    
    155 173
     # Each test composits a filename on top of basics.yaml, and tests
    
    ... ... @@ -165,6 +183,7 @@ def test_composite_preserve_originals(datafiles):
    165 183
     #    prov_col: The expected provenance column of "mood"
    
    166 184
     #
    
    167 185
     @pytest.mark.datafiles(os.path.join(DATA_DIR))
    
    186
    +@pytest.mark.parametrize('caching', [('raw'), ('cached')])
    
    168 187
     @pytest.mark.parametrize("filename,index,length,mood,prov_file,prov_line,prov_col", [
    
    169 188
     
    
    170 189
         # Test results of compositing with the (<) prepend directive
    
    ... ... @@ -195,14 +214,15 @@ def test_composite_preserve_originals(datafiles):
    195 214
         ('implicitoverwrite.yaml', 0, 2, 'overwrite1', 'implicitoverwrite.yaml', 4, 8),
    
    196 215
         ('implicitoverwrite.yaml', 1, 2, 'overwrite2', 'implicitoverwrite.yaml', 6, 8),
    
    197 216
     ])
    
    198
    -def test_list_composition(datafiles, filename,
    
    217
    +def test_list_composition(datafiles, filename, tmpdir,
    
    199 218
                               index, length, mood,
    
    200
    -                          prov_file, prov_line, prov_col):
    
    201
    -    base = os.path.join(datafiles.dirname, datafiles.basename, 'basics.yaml')
    
    202
    -    overlay = os.path.join(datafiles.dirname, datafiles.basename, filename)
    
    219
    +                          prov_file, prov_line, prov_col, caching):
    
    220
    +    base_file = os.path.join(datafiles.dirname, datafiles.basename, 'basics.yaml')
    
    221
    +    overlay_file = os.path.join(datafiles.dirname, datafiles.basename, filename)
    
    222
    +
    
    223
    +    base = load_yaml_file(base_file, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching)
    
    224
    +    overlay = load_yaml_file(overlay_file, cache_path=tmpdir, shortname=filename, from_cache=caching)
    
    203 225
     
    
    204
    -    base = _yaml.load(base, shortname='basics.yaml')
    
    205
    -    overlay = _yaml.load(overlay, shortname=filename)
    
    206 226
         _yaml.composite_dict(base, overlay)
    
    207 227
     
    
    208 228
         children = _yaml.node_get(base, list, 'children')
    
    ... ... @@ -254,6 +274,7 @@ def test_list_deletion(datafiles):
    254 274
     #    prov_col: The expected provenance column of "mood"
    
    255 275
     #
    
    256 276
     @pytest.mark.datafiles(os.path.join(DATA_DIR))
    
    277
    +@pytest.mark.parametrize('caching', [('raw'), ('cached')])
    
    257 278
     @pytest.mark.parametrize("filename1,filename2,index,length,mood,prov_file,prov_line,prov_col", [
    
    258 279
     
    
    259 280
         # Test results of compositing literal list with (>) and then (<)
    
    ... ... @@ -310,9 +331,9 @@ def test_list_deletion(datafiles):
    310 331
         ('listoverwrite.yaml', 'listprepend.yaml', 2, 4, 'overwrite1', 'listoverwrite.yaml', 5, 10),
    
    311 332
         ('listoverwrite.yaml', 'listprepend.yaml', 3, 4, 'overwrite2', 'listoverwrite.yaml', 7, 10),
    
    312 333
     ])
    
    313
    -def test_list_composition_twice(datafiles, filename1, filename2,
    
    334
    +def test_list_composition_twice(datafiles, tmpdir, filename1, filename2,
    
    314 335
                                     index, length, mood,
    
    315
    -                                prov_file, prov_line, prov_col):
    
    336
    +                                prov_file, prov_line, prov_col, caching):
    
    316 337
         file_base = os.path.join(datafiles.dirname, datafiles.basename, 'basics.yaml')
    
    317 338
         file1 = os.path.join(datafiles.dirname, datafiles.basename, filename1)
    
    318 339
         file2 = os.path.join(datafiles.dirname, datafiles.basename, filename2)
    
    ... ... @@ -320,9 +341,9 @@ def test_list_composition_twice(datafiles, filename1, filename2,
    320 341
         #####################
    
    321 342
         # Round 1 - Fight !
    
    322 343
         #####################
    
    323
    -    base = _yaml.load(file_base, shortname='basics.yaml')
    
    324
    -    overlay1 = _yaml.load(file1, shortname=filename1)
    
    325
    -    overlay2 = _yaml.load(file2, shortname=filename2)
    
    344
    +    base = load_yaml_file(file_base, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching)
    
    345
    +    overlay1 = load_yaml_file(file1, cache_path=tmpdir, shortname=filename1, from_cache=caching)
    
    346
    +    overlay2 = load_yaml_file(file2, cache_path=tmpdir, shortname=filename2, from_cache=caching)
    
    326 347
     
    
    327 348
         _yaml.composite_dict(base, overlay1)
    
    328 349
         _yaml.composite_dict(base, overlay2)
    
    ... ... @@ -337,9 +358,9 @@ def test_list_composition_twice(datafiles, filename1, filename2,
    337 358
         #####################
    
    338 359
         # Round 2 - Fight !
    
    339 360
         #####################
    
    340
    -    base = _yaml.load(file_base, shortname='basics.yaml')
    
    341
    -    overlay1 = _yaml.load(file1, shortname=filename1)
    
    342
    -    overlay2 = _yaml.load(file2, shortname=filename2)
    
    361
    +    base = load_yaml_file(file_base, cache_path=tmpdir, shortname='basics.yaml', from_cache=caching)
    
    362
    +    overlay1 = load_yaml_file(file1, cache_path=tmpdir, shortname=filename1, from_cache=caching)
    
    363
    +    overlay2 = load_yaml_file(file2, cache_path=tmpdir, shortname=filename2, from_cache=caching)
    
    343 364
     
    
    344 365
         _yaml.composite_dict(overlay1, overlay2)
    
    345 366
         _yaml.composite_dict(base, overlay1)
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]