[Notes] [Git][BuildStream/buildstream][tiagogomes/issue-573] 17 commits: source.py: Document Source.get_source_fetchers() to return an iterable



Title: GitLab

Tiago Gomes pushed to branch tiagogomes/issue-573 at BuildStream / buildstream

Commits:

19 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -16,6 +16,7 @@
    16 16
     #
    
    17 17
     #  Authors:
    
    18 18
     #        Tristan Maat <tristan maat codethink co uk>
    
    19
    +#        Tiago Gomes <tiago gomes codethink co uk>
    
    19 20
     
    
    20 21
     import os
    
    21 22
     import string
    
    ... ... @@ -85,8 +86,6 @@ class ArtifactCache():
    85 86
             self.extractdir = os.path.join(context.artifactdir, 'extract')
    
    86 87
             self.tmpdir = os.path.join(context.artifactdir, 'tmp')
    
    87 88
     
    
    88
    -        self.estimated_size = None
    
    89
    -
    
    90 89
             self.global_remote_specs = []
    
    91 90
             self.project_remote_specs = {}
    
    92 91
     
    
    ... ... @@ -228,10 +227,14 @@ class ArtifactCache():
    228 227
         #
    
    229 228
         # Clean the artifact cache as much as possible.
    
    230 229
         #
    
    230
    +    # Returns:
    
    231
    +    #     (int): Amount of bytes cleaned from the cache
    
    232
    +    #
    
    231 233
         def clean(self):
    
    232 234
             artifacts = self.list_artifacts()
    
    235
    +        cache_size = old_cache_size = self.get_cache_size()
    
    233 236
     
    
    234
    -        while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold:
    
    237
    +        while cache_size >= self.cache_quota - self.cache_lower_threshold:
    
    235 238
                 try:
    
    236 239
                     to_remove = artifacts.pop(0)
    
    237 240
                 except IndexError:
    
    ... ... @@ -245,7 +248,7 @@ class ArtifactCache():
    245 248
                               "Please increase the cache-quota in {}."
    
    246 249
                               .format(self.context.config_origin or default_conf))
    
    247 250
     
    
    248
    -                if self.calculate_cache_size() > self.cache_quota:
    
    251
    +                if cache_size > self.cache_quota:
    
    249 252
                         raise ArtifactError("Cache too full. Aborting.",
    
    250 253
                                             detail=detail,
    
    251 254
                                             reason="cache-too-full")
    
    ... ... @@ -255,44 +258,17 @@ class ArtifactCache():
    255 258
                 key = to_remove.rpartition('/')[2]
    
    256 259
                 if key not in self.required_artifacts:
    
    257 260
                     size = self.remove(to_remove)
    
    258
    -                if size:
    
    259
    -                    self.cache_size -= size
    
    260
    -
    
    261
    -        # This should be O(1) if implemented correctly
    
    262
    -        return self.calculate_cache_size()
    
    261
    +                cache_size -= size
    
    262
    +                self._message(MessageType.DEBUG,
    
    263
    +                              "Removed artifact {} ({})".format(
    
    264
    +                                  to_remove[:-(len(key) - self.context.log_key_length)],
    
    265
    +                                  utils._pretty_size(size)))
    
    263 266
     
    
    264
    -    # get_approximate_cache_size()
    
    265
    -    #
    
    266
    -    # A cheap method that aims to serve as an upper limit on the
    
    267
    -    # artifact cache size.
    
    268
    -    #
    
    269
    -    # The cache size reported by this function will normally be larger
    
    270
    -    # than the real cache size, since it is calculated using the
    
    271
    -    # pre-commit artifact size, but for very small artifacts in
    
    272
    -    # certain caches additional overhead could cause this to be
    
    273
    -    # smaller than, but close to, the actual size.
    
    274
    -    #
    
    275
    -    # Nonetheless, in practice this should be safe to use as an upper
    
    276
    -    # limit on the cache size.
    
    277
    -    #
    
    278
    -    # If the cache has built-in constant-time size reporting, please
    
    279
    -    # feel free to override this method with a more accurate
    
    280
    -    # implementation.
    
    281
    -    #
    
    282
    -    # Returns:
    
    283
    -    #     (int) An approximation of the artifact cache size.
    
    284
    -    #
    
    285
    -    def get_approximate_cache_size(self):
    
    286
    -        # If we don't currently have an estimate, figure out the real
    
    287
    -        # cache size.
    
    288
    -        if self.estimated_size is None:
    
    289
    -            stored_size = self._read_cache_size()
    
    290
    -            if stored_size is not None:
    
    291
    -                self.estimated_size = stored_size
    
    292
    -            else:
    
    293
    -                self.estimated_size = self.calculate_cache_size()
    
    267
    +        self._message(MessageType.INFO,
    
    268
    +                      "New artifact cache size: {}".format(
    
    269
    +                          utils._pretty_size(cache_size)))
    
    294 270
     
    
    295
    -        return self.estimated_size
    
    271
    +        return old_cache_size - cache_size
    
    296 272
     
    
    297 273
         ################################################
    
    298 274
         # Abstract methods for subclasses to implement #
    
    ... ... @@ -390,6 +366,10 @@ class ArtifactCache():
    390 366
         #     content (str): The element's content directory
    
    391 367
         #     keys (list): The cache keys to use
    
    392 368
         #
    
    369
    +    # Returns:
    
    370
    +    #   (int): Bytes required to cache the artifact taking deduplication
    
    371
    +    #          into account
    
    372
    +    #
    
    393 373
         def commit(self, element, content, keys):
    
    394 374
             raise ImplError("Cache '{kind}' does not implement commit()"
    
    395 375
                             .format(kind=type(self).__name__))
    
    ... ... @@ -462,6 +442,8 @@ class ArtifactCache():
    462 442
         #
    
    463 443
         # Returns:
    
    464 444
         #   (bool): True if pull was successful, False if artifact was not available
    
    445
    +    #   (int): Bytes required to cache the artifact taking deduplication
    
    446
    +    #          into account
    
    465 447
         #
    
    466 448
         def pull(self, element, key, *, progress=None):
    
    467 449
             raise ImplError("Cache '{kind}' does not implement pull()"
    
    ... ... @@ -484,8 +466,6 @@ class ArtifactCache():
    484 466
         #
    
    485 467
         # Return the real artifact cache size.
    
    486 468
         #
    
    487
    -    # Implementations should also use this to update estimated_size.
    
    488
    -    #
    
    489 469
         # Returns:
    
    490 470
         #
    
    491 471
         # (int) The size of the artifact cache.
    
    ... ... @@ -494,6 +474,22 @@ class ArtifactCache():
    494 474
             raise ImplError("Cache '{kind}' does not implement calculate_cache_size()"
    
    495 475
                             .format(kind=type(self).__name__))
    
    496 476
     
    
    477
    +    # get_cache_size()
    
    478
    +    #
    
    479
    +    # Return the artifact cache size.
    
    480
    +    #
    
    481
    +    # Returns:
    
    482
    +    #     (int) The size of the artifact cache.
    
    483
    +    #
    
    484
    +    def get_cache_size(self):
    
    485
    +        if self.cache_size is None:
    
    486
    +            self.cache_size = self._read_cache_size()
    
    487
    +
    
    488
    +        if self.cache_size is None:
    
    489
    +            self.cache_size = self.calculate_cache_size()
    
    490
    +
    
    491
    +        return self.cache_size
    
    492
    +
    
    497 493
         ################################################
    
    498 494
         #               Local Private Methods          #
    
    499 495
         ################################################
    
    ... ... @@ -537,32 +533,13 @@ class ArtifactCache():
    537 533
     
    
    538 534
         # _add_artifact_size()
    
    539 535
         #
    
    540
    -    # Since we cannot keep track of the cache size between threads,
    
    541
    -    # this method will be called by the main process every time a
    
    542
    -    # process that added something to the cache finishes.
    
    543
    -    #
    
    544
    -    # This will then add the reported size to
    
    545
    -    # ArtifactCache.estimated_size.
    
    536
    +    # Since we cannot keep track of the cache size between processes,
    
    537
    +    # this method will be called by the main process every time a job
    
    538
    +    # added or removed an artifact from the cache finishes.
    
    546 539
         #
    
    547 540
         def _add_artifact_size(self, artifact_size):
    
    548
    -        if not self.estimated_size:
    
    549
    -            self.estimated_size = self.calculate_cache_size()
    
    550
    -
    
    551
    -        self.estimated_size += artifact_size
    
    552
    -        self._write_cache_size(self.estimated_size)
    
    553
    -
    
    554
    -    # _set_cache_size()
    
    555
    -    #
    
    556
    -    # Similarly to the above method, when we calculate the actual size
    
    557
    -    # in a child thread, we can't update it. We instead pass the value
    
    558
    -    # back to the main thread and update it there.
    
    559
    -    #
    
    560
    -    def _set_cache_size(self, cache_size):
    
    561
    -        self.estimated_size = cache_size
    
    562
    -
    
    563
    -        # set_cache_size is called in cleanup, where it may set the cache to None
    
    564
    -        if self.estimated_size is not None:
    
    565
    -            self._write_cache_size(self.estimated_size)
    
    541
    +        self.cache_size = self.get_cache_size() + artifact_size
    
    542
    +        self._write_cache_size(self.cache_size)
    
    566 543
     
    
    567 544
         # _write_cache_size()
    
    568 545
         #
    
    ... ... @@ -628,7 +605,7 @@ class ArtifactCache():
    628 605
             stat = os.statvfs(artifactdir_volume)
    
    629 606
             available_space = (stat.f_bsize * stat.f_bavail)
    
    630 607
     
    
    631
    -        cache_size = self.get_approximate_cache_size()
    
    608
    +        cache_size = self.get_cache_size()
    
    632 609
     
    
    633 610
             # Ensure system has enough storage for the cache_quota
    
    634 611
             #
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -16,6 +16,7 @@
    16 16
     #
    
    17 17
     #  Authors:
    
    18 18
     #        Jürg Billeter <juerg billeter codethink co uk>
    
    19
    +#        Tiago Gomes <tiago gomes codethink co uk>
    
    19 20
     
    
    20 21
     import hashlib
    
    21 22
     import itertools
    
    ... ... @@ -95,7 +96,7 @@ class CASCache(ArtifactCache):
    95 96
     
    
    96 97
             with tempfile.TemporaryDirectory(prefix='tmp', dir=self.extractdir) as tmpdir:
    
    97 98
                 checkoutdir = os.path.join(tmpdir, ref)
    
    98
    -            self._checkout(checkoutdir, tree)
    
    99
    +            self._checkout_tree(checkoutdir, tree)
    
    99 100
     
    
    100 101
                 os.makedirs(os.path.dirname(dest), exist_ok=True)
    
    101 102
                 try:
    
    ... ... @@ -115,12 +116,12 @@ class CASCache(ArtifactCache):
    115 116
         def commit(self, element, content, keys):
    
    116 117
             refs = [self.get_artifact_fullname(element, key) for key in keys]
    
    117 118
     
    
    118
    -        tree = self._create_tree(content)
    
    119
    +        tree, size = self._create_tree(content)
    
    119 120
     
    
    120 121
             for ref in refs:
    
    121 122
                 self.set_ref(ref, tree)
    
    122 123
     
    
    123
    -        self.cache_size = None
    
    124
    +        return size
    
    124 125
     
    
    125 126
         def diff(self, element, key_a, key_b, *, subdir=None):
    
    126 127
             ref_a = self.get_artifact_fullname(element, key_a)
    
    ... ... @@ -238,12 +239,12 @@ class CASCache(ArtifactCache):
    238 239
                     tree.hash = response.digest.hash
    
    239 240
                     tree.size_bytes = response.digest.size_bytes
    
    240 241
     
    
    241
    -                self._fetch_directory(remote, tree)
    
    242
    +                size = self._fetch_tree(remote, tree)
    
    242 243
     
    
    243 244
                     self.set_ref(ref, tree)
    
    244 245
     
    
    245 246
                     # no need to pull from additional remotes
    
    246
    -                return True
    
    247
    +                return True, size
    
    247 248
     
    
    248 249
                 except grpc.RpcError as e:
    
    249 250
                     if e.code() != grpc.StatusCode.NOT_FOUND:
    
    ... ... @@ -257,7 +258,7 @@ class CASCache(ArtifactCache):
    257 258
                                 remote.spec.url, element._get_brief_display_key())
    
    258 259
                         ))
    
    259 260
     
    
    260
    -        return False
    
    261
    +        return False, 0
    
    261 262
     
    
    262 263
         def link_key(self, element, oldkey, newkey):
    
    263 264
             oldref = self.get_artifact_fullname(element, oldkey)
    
    ... ... @@ -397,6 +398,7 @@ class CASCache(ArtifactCache):
    397 398
         #
    
    398 399
         # Returns:
    
    399 400
         #     (Digest): The digest of the added object
    
    401
    +    #     (int): The number of bytes required to store the object
    
    400 402
         #
    
    401 403
         # Either `path` or `buffer` must be passed, but not both.
    
    402 404
         #
    
    ... ... @@ -425,22 +427,39 @@ class CASCache(ArtifactCache):
    425 427
     
    
    426 428
                     out.flush()
    
    427 429
     
    
    430
    +                file_size = os.fstat(out.fileno()).st_size
    
    431
    +
    
    428 432
                     digest.hash = h.hexdigest()
    
    429
    -                digest.size_bytes = os.fstat(out.fileno()).st_size
    
    433
    +                digest.size_bytes = file_size
    
    430 434
     
    
    431 435
                     # Place file at final location
    
    432 436
                     objpath = self.objpath(digest)
    
    433
    -                os.makedirs(os.path.dirname(objpath), exist_ok=True)
    
    437
    +                dirpath = os.path.dirname(objpath)
    
    438
    +
    
    439
    +                # Track the increased size on the parent directory caused by
    
    440
    +                # adding a new entry, as these directories can contain a large
    
    441
    +                # number of files.
    
    442
    +                new_dir_size = 0
    
    443
    +                old_dir_size = 0
    
    444
    +                try:
    
    445
    +                    os.makedirs(dirpath)
    
    446
    +                except FileExistsError:
    
    447
    +                    old_dir_size = os.stat(dirpath).st_size
    
    448
    +                else:
    
    449
    +                    new_dir_size = os.stat(dirpath).st_size
    
    450
    +
    
    434 451
                     os.link(out.name, objpath)
    
    452
    +                new_dir_size = os.stat(dirpath).st_size - old_dir_size
    
    435 453
     
    
    436 454
             except FileExistsError as e:
    
    437 455
                 # We can ignore the failed link() if the object is already in the repo.
    
    456
    +            file_size = 0
    
    438 457
                 pass
    
    439 458
     
    
    440 459
             except OSError as e:
    
    441 460
                 raise ArtifactError("Failed to hash object: {}".format(e)) from e
    
    442 461
     
    
    443
    -        return digest
    
    462
    +        return digest, file_size + new_dir_size
    
    444 463
     
    
    445 464
         # set_ref():
    
    446 465
         #
    
    ... ... @@ -449,6 +468,8 @@ class CASCache(ArtifactCache):
    449 468
         # Args:
    
    450 469
         #     ref (str): The name of the ref
    
    451 470
         #
    
    471
    +    # Note: Setting a ref will have a very low overhead on the cache
    
    472
    +    # size, so we don't track this.
    
    452 473
         def set_ref(self, ref, tree):
    
    453 474
             refpath = self._refpath(ref)
    
    454 475
             os.makedirs(os.path.dirname(refpath), exist_ok=True)
    
    ... ... @@ -488,11 +509,7 @@ class CASCache(ArtifactCache):
    488 509
                 raise ArtifactError("Attempt to access unavailable artifact: {}".format(e)) from e
    
    489 510
     
    
    490 511
         def calculate_cache_size(self):
    
    491
    -        if self.cache_size is None:
    
    492
    -            self.cache_size = utils._get_dir_size(self.casdir)
    
    493
    -            self.estimated_size = self.cache_size
    
    494
    -
    
    495
    -        return self.cache_size
    
    512
    +        return utils._get_dir_size(self.casdir)
    
    496 513
     
    
    497 514
         # list_artifacts():
    
    498 515
         #
    
    ... ... @@ -599,7 +616,7 @@ class CASCache(ArtifactCache):
    599 616
         ################################################
    
    600 617
         #             Local Private Methods            #
    
    601 618
         ################################################
    
    602
    -    def _checkout(self, dest, tree):
    
    619
    +    def _checkout_tree(self, dest, tree):
    
    603 620
             os.makedirs(dest, exist_ok=True)
    
    604 621
     
    
    605 622
             directory = remote_execution_pb2.Directory()
    
    ... ... @@ -618,7 +635,7 @@ class CASCache(ArtifactCache):
    618 635
     
    
    619 636
             for dirnode in directory.directories:
    
    620 637
                 fullpath = os.path.join(dest, dirnode.name)
    
    621
    -            self._checkout(fullpath, dirnode.digest)
    
    638
    +            self._checkout_tree(fullpath, dirnode.digest)
    
    622 639
     
    
    623 640
             for symlinknode in directory.symlinks:
    
    624 641
                 # symlink
    
    ... ... @@ -630,6 +647,7 @@ class CASCache(ArtifactCache):
    630 647
     
    
    631 648
         def _create_tree(self, path, *, digest=None):
    
    632 649
             directory = remote_execution_pb2.Directory()
    
    650
    +        size = 0
    
    633 651
     
    
    634 652
             for name in sorted(os.listdir(path)):
    
    635 653
                 full_path = os.path.join(path, name)
    
    ... ... @@ -637,11 +655,11 @@ class CASCache(ArtifactCache):
    637 655
                 if stat.S_ISDIR(mode):
    
    638 656
                     dirnode = directory.directories.add()
    
    639 657
                     dirnode.name = name
    
    640
    -                self._create_tree(full_path, digest=dirnode.digest)
    
    658
    +                size += self._create_tree(full_path, digest=dirnode.digest)[1]
    
    641 659
                 elif stat.S_ISREG(mode):
    
    642 660
                     filenode = directory.files.add()
    
    643 661
                     filenode.name = name
    
    644
    -                self.add_object(path=full_path, digest=filenode.digest)
    
    662
    +                size += self.add_object(path=full_path, digest=filenode.digest)[1]
    
    645 663
                     filenode.is_executable = (mode & stat.S_IXUSR) == stat.S_IXUSR
    
    646 664
                 elif stat.S_ISLNK(mode):
    
    647 665
                     symlinknode = directory.symlinks.add()
    
    ... ... @@ -650,7 +668,8 @@ class CASCache(ArtifactCache):
    650 668
                 else:
    
    651 669
                     raise ArtifactError("Unsupported file type for {}".format(full_path))
    
    652 670
     
    
    653
    -        return self.add_object(digest=digest, buffer=directory.SerializeToString())
    
    671
    +        res = self.add_object(digest=digest, buffer=directory.SerializeToString())
    
    672
    +        return res[0], res[1] + size
    
    654 673
     
    
    655 674
         def _get_subdir(self, tree, subdir):
    
    656 675
             head, name = os.path.split(subdir)
    
    ... ... @@ -793,11 +812,12 @@ class CASCache(ArtifactCache):
    793 812
             out.flush()
    
    794 813
             assert digest.size_bytes == os.fstat(out.fileno()).st_size
    
    795 814
     
    
    796
    -    def _fetch_directory(self, remote, tree):
    
    815
    +    def _fetch_tree(self, remote, tree):
    
    816
    +        size = 0
    
    797 817
             objpath = self.objpath(tree)
    
    798 818
             if os.path.exists(objpath):
    
    799 819
                 # already in local cache
    
    800
    -            return
    
    820
    +            return 0
    
    801 821
     
    
    802 822
             with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
    
    803 823
                 self._fetch_blob(remote, tree, out)
    
    ... ... @@ -808,7 +828,7 @@ class CASCache(ArtifactCache):
    808 828
                     directory.ParseFromString(f.read())
    
    809 829
     
    
    810 830
                 for filenode in directory.files:
    
    811
    -                fileobjpath = self.objpath(tree)
    
    831
    +                fileobjpath = self.objpath(filenode.digest)
    
    812 832
                     if os.path.exists(fileobjpath):
    
    813 833
                         # already in local cache
    
    814 834
                         continue
    
    ... ... @@ -816,17 +836,21 @@ class CASCache(ArtifactCache):
    816 836
                     with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
    
    817 837
                         self._fetch_blob(remote, filenode.digest, f)
    
    818 838
     
    
    819
    -                    digest = self.add_object(path=f.name)
    
    839
    +                    digest, obj_size = self.add_object(path=f.name)
    
    840
    +                    size += obj_size
    
    820 841
                         assert digest.hash == filenode.digest.hash
    
    821 842
     
    
    822 843
                 for dirnode in directory.directories:
    
    823
    -                self._fetch_directory(remote, dirnode.digest)
    
    844
    +                size += self._fetch_tree(remote, dirnode.digest)
    
    824 845
     
    
    825 846
                 # place directory blob only in final location when we've downloaded
    
    826 847
                 # all referenced blobs to avoid dangling references in the repository
    
    827
    -            digest = self.add_object(path=out.name)
    
    848
    +            digest, obj_size = self.add_object(path=out.name)
    
    849
    +            size += obj_size
    
    828 850
                 assert digest.hash == tree.hash
    
    829 851
     
    
    852
    +            return size
    
    853
    +
    
    830 854
     
    
    831 855
     # Represents a single remote CAS cache.
    
    832 856
     #
    

  • buildstream/_artifactcache/casserver.py
    ... ... @@ -203,7 +203,7 @@ class _ByteStreamServicer(bytestream_pb2_grpc.ByteStreamServicer):
    203 203
                             context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
    
    204 204
                             return response
    
    205 205
                         out.flush()
    
    206
    -                    digest = self.cas.add_object(path=out.name)
    
    206
    +                    digest = self.cas.add_object(path=out.name)[0]
    
    207 207
                         if digest.hash != client_digest.hash:
    
    208 208
                             context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
    
    209 209
                             return response
    

  • buildstream/_context.py
    ... ... @@ -119,7 +119,6 @@ class Context():
    119 119
             self._log_handle = None
    
    120 120
             self._log_filename = None
    
    121 121
             self.config_cache_quota = 'infinity'
    
    122
    -        self.artifactdir_volume = None
    
    123 122
     
    
    124 123
         # load()
    
    125 124
         #
    

  • buildstream/_scheduler/jobs/__init__.py
    1 1
     from .elementjob import ElementJob
    
    2
    -from .cachesizejob import CacheSizeJob
    
    3 2
     from .cleanupjob import CleanupJob

  • buildstream/_scheduler/jobs/cachesizejob.py deleted
    1
    -#  Copyright (C) 2018 Codethink Limited
    
    2
    -#
    
    3
    -#  This program is free software; you can redistribute it and/or
    
    4
    -#  modify it under the terms of the GNU Lesser General Public
    
    5
    -#  License as published by the Free Software Foundation; either
    
    6
    -#  version 2 of the License, or (at your option) any later version.
    
    7
    -#
    
    8
    -#  This library is distributed in the hope that it will be useful,
    
    9
    -#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    10
    -#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    
    11
    -#  Lesser General Public License for more details.
    
    12
    -#
    
    13
    -#  You should have received a copy of the GNU Lesser General Public
    
    14
    -#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    15
    -#
    
    16
    -#  Author:
    
    17
    -#        Tristan Daniël Maat <tristan maat codethink co uk>
    
    18
    -#
    
    19
    -from .job import Job
    
    20
    -from ..._platform import Platform
    
    21
    -
    
    22
    -
    
    23
    -class CacheSizeJob(Job):
    
    24
    -    def __init__(self, *args, complete_cb, **kwargs):
    
    25
    -        super().__init__(*args, **kwargs)
    
    26
    -        self._complete_cb = complete_cb
    
    27
    -        self._cache = Platform._instance.artifactcache
    
    28
    -
    
    29
    -    def child_process(self):
    
    30
    -        return self._cache.calculate_cache_size()
    
    31
    -
    
    32
    -    def parent_complete(self, success, result):
    
    33
    -        self._cache._set_cache_size(result)
    
    34
    -        if self._complete_cb:
    
    35
    -            self._complete_cb(result)
    
    36
    -
    
    37
    -    def child_process_data(self):
    
    38
    -        return {}

  • buildstream/_scheduler/jobs/cleanupjob.py
    ... ... @@ -21,18 +21,19 @@ from ..._platform import Platform
    21 21
     
    
    22 22
     
    
    23 23
     class CleanupJob(Job):
    
    24
    -    def __init__(self, *args, complete_cb, **kwargs):
    
    24
    +    def __init__(self, *args, **kwargs):
    
    25 25
             super().__init__(*args, **kwargs)
    
    26
    -        self._complete_cb = complete_cb
    
    27 26
             self._cache = Platform._instance.artifactcache
    
    28 27
     
    
    29 28
         def child_process(self):
    
    30 29
             return self._cache.clean()
    
    31 30
     
    
    32 31
         def parent_complete(self, success, result):
    
    33
    -        self._cache._set_cache_size(result)
    
    34
    -        if self._complete_cb:
    
    35
    -            self._complete_cb()
    
    32
    +        if success:
    
    33
    +            # ArtifactCache.clean() returns the number of bytes cleaned.
    
    34
    +            # We negate the number because the cache size is to be
    
    35
    +            # decreased.
    
    36
    +            self._cache._add_artifact_size(result * -1)
    
    36 37
     
    
    37 38
         def child_process_data(self):
    
    38 39
             return {}

  • buildstream/_scheduler/jobs/elementjob.py
    ... ... @@ -110,12 +110,10 @@ class ElementJob(Job):
    110 110
     
    
    111 111
             workspace = self._element._get_workspace()
    
    112 112
             artifact_size = self._element._get_artifact_size()
    
    113
    -        cache_size = self._element._get_artifact_cache().calculate_cache_size()
    
    114 113
     
    
    115 114
             if workspace is not None:
    
    116 115
                 data['workspace'] = workspace.to_dict()
    
    117 116
             if artifact_size is not None:
    
    118 117
                 data['artifact_size'] = artifact_size
    
    119
    -        data['cache_size'] = cache_size
    
    120 118
     
    
    121 119
             return data

  • buildstream/_scheduler/queues/buildqueue.py
    ... ... @@ -87,19 +87,6 @@ class BuildQueue(Queue):
    87 87
     
    
    88 88
             return QueueStatus.READY
    
    89 89
     
    
    90
    -    def _check_cache_size(self, job, element):
    
    91
    -        if not job.child_data:
    
    92
    -            return
    
    93
    -
    
    94
    -        artifact_size = job.child_data.get('artifact_size', False)
    
    95
    -
    
    96
    -        if artifact_size:
    
    97
    -            cache = element._get_artifact_cache()
    
    98
    -            cache._add_artifact_size(artifact_size)
    
    99
    -
    
    100
    -            if cache.get_approximate_cache_size() > cache.cache_quota:
    
    101
    -                self._scheduler._check_cache_size_real()
    
    102
    -
    
    103 90
         def done(self, job, element, result, success):
    
    104 91
     
    
    105 92
             if success:
    

  • buildstream/_scheduler/queues/pullqueue.py
    1 1
     #
    
    2
    -#  Copyright (C) 2016 Codethink Limited
    
    2
    +#  Copyright (C) 2018 Codethink Limited
    
    3 3
     #
    
    4 4
     #  This program is free software; you can redistribute it and/or
    
    5 5
     #  modify it under the terms of the GNU Lesser General Public
    
    ... ... @@ -52,17 +52,14 @@ class PullQueue(Queue):
    52 52
             else:
    
    53 53
                 return QueueStatus.SKIP
    
    54 54
     
    
    55
    -    def done(self, _, element, result, success):
    
    55
    +    def done(self, job, element, result, success):
    
    56 56
     
    
    57 57
             if not success:
    
    58 58
                 return False
    
    59 59
     
    
    60 60
             element._pull_done()
    
    61 61
     
    
    62
    -        # Build jobs will check the "approximate" size first. Since we
    
    63
    -        # do not get an artifact size from pull jobs, we have to
    
    64
    -        # actually check the cache size.
    
    65
    -        self._scheduler._check_cache_size_real()
    
    62
    +        self._check_cache_size(job, element)
    
    66 63
     
    
    67 64
             # Element._pull() returns True if it downloaded an artifact,
    
    68 65
             # here we want to appear skipped if we did not download.
    

  • buildstream/_scheduler/queues/queue.py
    1 1
     #
    
    2
    -#  Copyright (C) 2016 Codethink Limited
    
    2
    +#  Copyright (C) 2018 Codethink Limited
    
    3 3
     #
    
    4 4
     #  This program is free software; you can redistribute it and/or
    
    5 5
     #  modify it under the terms of the GNU Lesser General Public
    
    ... ... @@ -301,8 +301,6 @@ class Queue():
    301 301
             # Update values that need to be synchronized in the main task
    
    302 302
             # before calling any queue implementation
    
    303 303
             self._update_workspaces(element, job)
    
    304
    -        if job.child_data:
    
    305
    -            element._get_artifact_cache().cache_size = job.child_data.get('cache_size')
    
    306 304
     
    
    307 305
             # Give the result of the job to the Queue implementor,
    
    308 306
             # and determine if it should be considered as processed
    
    ... ... @@ -360,3 +358,16 @@ class Queue():
    360 358
             logfile = "{key}-{action}".format(key=key, action=action)
    
    361 359
     
    
    362 360
             return os.path.join(project.name, element.normal_name, logfile)
    
    361
    +
    
    362
    +    def _check_cache_size(self, job, element):
    
    363
    +        if not job.child_data:
    
    364
    +            return
    
    365
    +
    
    366
    +        artifact_size = job.child_data.get('artifact_size', False)
    
    367
    +
    
    368
    +        if artifact_size:
    
    369
    +            cache = element._get_artifact_cache()
    
    370
    +            cache._add_artifact_size(artifact_size)
    
    371
    +
    
    372
    +            if cache.get_cache_size() > cache.cache_quota:
    
    373
    +                self._scheduler._run_cache_cleanup()

  • buildstream/_scheduler/scheduler.py
    1 1
     #
    
    2
    -#  Copyright (C) 2016 Codethink Limited
    
    2
    +#  Copyright (C) 2018 Codethink Limited
    
    3 3
     #
    
    4 4
     #  This program is free software; you can redistribute it and/or
    
    5 5
     #  modify it under the terms of the GNU Lesser General Public
    
    ... ... @@ -28,8 +28,7 @@ from contextlib import contextmanager
    28 28
     
    
    29 29
     # Local imports
    
    30 30
     from .resources import Resources, ResourceType
    
    31
    -from .jobs import CacheSizeJob, CleanupJob
    
    32
    -from .._platform import Platform
    
    31
    +from .jobs import CleanupJob
    
    33 32
     
    
    34 33
     
    
    35 34
     # A decent return code for Scheduler.run()
    
    ... ... @@ -316,24 +315,11 @@ class Scheduler():
    316 315
             self.schedule_jobs(ready)
    
    317 316
             self._sched()
    
    318 317
     
    
    319
    -    def _run_cleanup(self, cache_size):
    
    320
    -        platform = Platform.get_platform()
    
    321
    -        if cache_size and cache_size < platform.artifactcache.cache_quota:
    
    322
    -            return
    
    323
    -
    
    324
    -        job = CleanupJob(self, 'cleanup', 'cleanup',
    
    318
    +    def _run_cache_cleanup(self):
    
    319
    +        job = CleanupJob(self, 'Cleaning artifact cache', 'cleanup',
    
    325 320
                              resources=[ResourceType.CACHE,
    
    326 321
                                         ResourceType.PROCESS],
    
    327
    -                         exclusive_resources=[ResourceType.CACHE],
    
    328
    -                         complete_cb=None)
    
    329
    -        self.schedule_jobs([job])
    
    330
    -
    
    331
    -    def _check_cache_size_real(self):
    
    332
    -        job = CacheSizeJob(self, 'cache_size', 'cache_size/cache_size',
    
    333
    -                           resources=[ResourceType.CACHE,
    
    334
    -                                      ResourceType.PROCESS],
    
    335
    -                           exclusive_resources=[ResourceType.CACHE],
    
    336
    -                           complete_cb=self._run_cleanup)
    
    322
    +                         exclusive_resources=[ResourceType.CACHE])
    
    337 323
             self.schedule_jobs([job])
    
    338 324
     
    
    339 325
         # _suspend_jobs()
    

  • buildstream/element.py
    ... ... @@ -245,7 +245,7 @@ class Element(Plugin):
    245 245
             # Collect the composited element configuration and
    
    246 246
             # ask the element to configure itself.
    
    247 247
             self.__config = self.__extract_config(meta)
    
    248
    -        self.configure(self.__config)
    
    248
    +        self._configure(self.__config)
    
    249 249
     
    
    250 250
             # Extract Sandbox config
    
    251 251
             self.__sandbox_config = self.__extract_sandbox_config(meta)
    
    ... ... @@ -1646,8 +1646,8 @@ class Element(Plugin):
    1646 1646
                         }), os.path.join(metadir, 'workspaced-dependencies.yaml'))
    
    1647 1647
     
    
    1648 1648
                         with self.timed_activity("Caching artifact"):
    
    1649
    -                        self.__artifact_size = utils._get_dir_size(assembledir)
    
    1650
    -                        self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
    
    1649
    +                        self.__artifact_size = self.__artifacts.commit(
    
    1650
    +                            self, assembledir, self.__get_cache_keys_for_commit())
    
    1651 1651
     
    
    1652 1652
                         if collect is not None and collectvdir is None:
    
    1653 1653
                             raise ElementError(
    
    ... ... @@ -1697,31 +1697,31 @@ class Element(Plugin):
    1697 1697
             self._update_state()
    
    1698 1698
     
    
    1699 1699
         def _pull_strong(self, *, progress=None):
    
    1700
    -        weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1701
    -
    
    1702 1700
             key = self.__strict_cache_key
    
    1703
    -        if not self.__artifacts.pull(self, key, progress=progress):
    
    1704
    -            return False
    
    1701
    +        pulled, self.__artifact_size = self.__artifacts.pull(
    
    1702
    +            self, key, progress=progress)
    
    1705 1703
     
    
    1706
    -        # update weak ref by pointing it to this newly fetched artifact
    
    1707
    -        self.__artifacts.link_key(self, key, weak_key)
    
    1704
    +        if pulled:
    
    1705
    +            # update weak ref by pointing it to this newly fetched artifact
    
    1706
    +            weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1707
    +            self.__artifacts.link_key(self, key, weak_key)
    
    1708 1708
     
    
    1709
    -        return True
    
    1709
    +        return pulled
    
    1710 1710
     
    
    1711 1711
         def _pull_weak(self, *, progress=None):
    
    1712 1712
             weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
    
    1713
    +        pulled, self.__artifact_size = self.__artifacts.pull(
    
    1714
    +            self, weak_key, progress=progress)
    
    1713 1715
     
    
    1714
    -        if not self.__artifacts.pull(self, weak_key, progress=progress):
    
    1715
    -            return False
    
    1716
    -
    
    1717
    -        # extract strong cache key from this newly fetched artifact
    
    1718
    -        self._pull_done()
    
    1716
    +        if pulled:
    
    1717
    +            # extract strong cache key from this newly fetched artifact
    
    1718
    +            self._pull_done()
    
    1719 1719
     
    
    1720
    -        # create tag for strong cache key
    
    1721
    -        key = self._get_cache_key(strength=_KeyStrength.STRONG)
    
    1722
    -        self.__artifacts.link_key(self, weak_key, key)
    
    1720
    +            # create tag for strong cache key
    
    1721
    +            key = self._get_cache_key(strength=_KeyStrength.STRONG)
    
    1722
    +            self.__artifacts.link_key(self, weak_key, key)
    
    1723 1723
     
    
    1724
    -        return True
    
    1724
    +        return pulled
    
    1725 1725
     
    
    1726 1726
         # _pull():
    
    1727 1727
         #
    
    ... ... @@ -1741,13 +1741,12 @@ class Element(Plugin):
    1741 1741
             if not pulled and not self._cached() and not context.get_strict():
    
    1742 1742
                 pulled = self._pull_weak(progress=progress)
    
    1743 1743
     
    
    1744
    -        if not pulled:
    
    1745
    -            return False
    
    1744
    +        if pulled:
    
    1745
    +            # Notify successfull download
    
    1746
    +            display_key = self._get_brief_display_key()
    
    1747
    +            self.info("Downloaded artifact {}".format(display_key))
    
    1746 1748
     
    
    1747
    -        # Notify successfull download
    
    1748
    -        display_key = self._get_brief_display_key()
    
    1749
    -        self.info("Downloaded artifact {}".format(display_key))
    
    1750
    -        return True
    
    1749
    +        return pulled
    
    1751 1750
     
    
    1752 1751
         # _skip_push():
    
    1753 1752
         #
    

  • buildstream/plugin.py
    ... ... @@ -179,6 +179,7 @@ class Plugin():
    179 179
             self.__provenance = provenance  # The Provenance information
    
    180 180
             self.__type_tag = type_tag      # The type of plugin (element or source)
    
    181 181
             self.__unique_id = _plugin_register(self)  # Unique ID
    
    182
    +        self.__configuring = False      # Whether we are currently configuring
    
    182 183
     
    
    183 184
             # Infer the kind identifier
    
    184 185
             modulename = type(self).__module__
    
    ... ... @@ -682,7 +683,32 @@ class Plugin():
    682 683
             else:
    
    683 684
                 yield log
    
    684 685
     
    
    686
    +    # _configure():
    
    687
    +    #
    
    688
    +    # Calls configure() for the plugin, this must be called by
    
    689
    +    # the core instead of configure() directly, so that the
    
    690
    +    # _get_configuring() state is up to date.
    
    691
    +    #
    
    692
    +    # Args:
    
    693
    +    #    node (dict): The loaded configuration dictionary
    
    694
    +    #
    
    695
    +    def _configure(self, node):
    
    696
    +        self.__configuring = True
    
    697
    +        self.configure(node)
    
    698
    +        self.__configuring = False
    
    699
    +
    
    700
    +    # _get_configuring():
    
    701
    +    #
    
    702
    +    # Checks whether the plugin is in the middle of having
    
    703
    +    # its Plugin.configure() method called
    
    704
    +    #
    
    705
    +    # Returns:
    
    706
    +    #    (bool): Whether we are currently configuring
    
    707
    +    def _get_configuring(self):
    
    708
    +        return self.__configuring
    
    709
    +
    
    685 710
         # _preflight():
    
    711
    +    #
    
    686 712
         # Calls preflight() for the plugin, and allows generic preflight
    
    687 713
         # checks to be added
    
    688 714
         #
    
    ... ... @@ -690,6 +716,7 @@ class Plugin():
    690 716
         #    SourceError: If it's a Source implementation
    
    691 717
         #    ElementError: If it's an Element implementation
    
    692 718
         #    ProgramNotFoundError: If a required host tool is not found
    
    719
    +    #
    
    693 720
         def _preflight(self):
    
    694 721
             self.preflight()
    
    695 722
     
    

  • buildstream/plugins/sources/git.py
    ... ... @@ -74,6 +74,9 @@ This plugin provides the following configurable warnings:
    74 74
     
    
    75 75
     - 'git:inconsistent-submodule' - A submodule was found to be missing from the underlying git repository.
    
    76 76
     
    
    77
    +This plugin also utilises the following configurable core plugin warnings:
    
    78
    +
    
    79
    +- 'ref-not-in-track' - The provided ref was not found in the provided track in the element's git repository.
    
    77 80
     """
    
    78 81
     
    
    79 82
     import os
    
    ... ... @@ -87,6 +90,7 @@ from configparser import RawConfigParser
    87 90
     
    
    88 91
     from buildstream import Source, SourceError, Consistency, SourceFetcher
    
    89 92
     from buildstream import utils
    
    93
    +from buildstream.plugin import CoreWarnings
    
    90 94
     
    
    91 95
     GIT_MODULES = '.gitmodules'
    
    92 96
     
    
    ... ... @@ -100,13 +104,14 @@ INCONSISTENT_SUBMODULE = "inconsistent-submodules"
    100 104
     #
    
    101 105
     class GitMirror(SourceFetcher):
    
    102 106
     
    
    103
    -    def __init__(self, source, path, url, ref):
    
    107
    +    def __init__(self, source, path, url, ref, *, primary=False):
    
    104 108
     
    
    105 109
             super().__init__()
    
    106 110
             self.source = source
    
    107 111
             self.path = path
    
    108 112
             self.url = url
    
    109 113
             self.ref = ref
    
    114
    +        self.primary = primary
    
    110 115
             self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
    
    111 116
             self.mark_download_url(url)
    
    112 117
     
    
    ... ... @@ -124,7 +129,8 @@ class GitMirror(SourceFetcher):
    124 129
                 # system configured tmpdir is not on the same partition.
    
    125 130
                 #
    
    126 131
                 with self.source.tempdir() as tmpdir:
    
    127
    -                url = self.source.translate_url(self.url, alias_override=alias_override)
    
    132
    +                url = self.source.translate_url(self.url, alias_override=alias_override,
    
    133
    +                                                primary=self.primary)
    
    128 134
                     self.source.call([self.source.host_git, 'clone', '--mirror', '-n', url, tmpdir],
    
    129 135
                                      fail="Failed to clone git repository {}".format(url),
    
    130 136
                                      fail_temporarily=True)
    
    ... ... @@ -146,7 +152,9 @@ class GitMirror(SourceFetcher):
    146 152
                                               .format(self.source, url, tmpdir, self.mirror, e)) from e
    
    147 153
     
    
    148 154
         def _fetch(self, alias_override=None):
    
    149
    -        url = self.source.translate_url(self.url, alias_override=alias_override)
    
    155
    +        url = self.source.translate_url(self.url,
    
    156
    +                                        alias_override=alias_override,
    
    157
    +                                        primary=self.primary)
    
    150 158
     
    
    151 159
             if alias_override:
    
    152 160
                 remote_name = utils.url_directory_name(alias_override)
    
    ... ... @@ -199,7 +207,7 @@ class GitMirror(SourceFetcher):
    199 207
                 cwd=self.mirror)
    
    200 208
             return output.rstrip('\n')
    
    201 209
     
    
    202
    -    def stage(self, directory):
    
    210
    +    def stage(self, directory, track=None):
    
    203 211
             fullpath = os.path.join(directory, self.path)
    
    204 212
     
    
    205 213
             # Using --shared here avoids copying the objects into the checkout, in any
    
    ... ... @@ -213,10 +221,14 @@ class GitMirror(SourceFetcher):
    213 221
                              fail="Failed to checkout git ref {}".format(self.ref),
    
    214 222
                              cwd=fullpath)
    
    215 223
     
    
    224
    +        # Check that the user specified ref exists in the track if provided & not already tracked
    
    225
    +        if track:
    
    226
    +            self.assert_ref_in_track(fullpath, track)
    
    227
    +
    
    216 228
             # Remove .git dir
    
    217 229
             shutil.rmtree(os.path.join(fullpath, ".git"))
    
    218 230
     
    
    219
    -    def init_workspace(self, directory):
    
    231
    +    def init_workspace(self, directory, track=None):
    
    220 232
             fullpath = os.path.join(directory, self.path)
    
    221 233
             url = self.source.translate_url(self.url)
    
    222 234
     
    
    ... ... @@ -232,6 +244,10 @@ class GitMirror(SourceFetcher):
    232 244
                              fail="Failed to checkout git ref {}".format(self.ref),
    
    233 245
                              cwd=fullpath)
    
    234 246
     
    
    247
    +        # Check that the user specified ref exists in the track if provided & not already tracked
    
    248
    +        if track:
    
    249
    +            self.assert_ref_in_track(fullpath, track)
    
    250
    +
    
    235 251
         # List the submodules (path/url tuples) present at the given ref of this repo
    
    236 252
         def submodule_list(self):
    
    237 253
             modules = "{}:{}".format(self.ref, GIT_MODULES)
    
    ... ... @@ -296,6 +312,28 @@ class GitMirror(SourceFetcher):
    296 312
     
    
    297 313
                 return None
    
    298 314
     
    
    315
    +    # Assert that ref exists in track, if track has been specified.
    
    316
    +    def assert_ref_in_track(self, fullpath, track):
    
    317
    +        _, branch = self.source.check_output([self.source.host_git, 'branch', '--list', track,
    
    318
    +                                              '--contains', self.ref],
    
    319
    +                                             cwd=fullpath,)
    
    320
    +        if branch:
    
    321
    +            return True
    
    322
    +        else:
    
    323
    +            _, tag = self.source.check_output([self.source.host_git, 'tag', '--list', track,
    
    324
    +                                               '--contains', self.ref],
    
    325
    +                                              cwd=fullpath,)
    
    326
    +            if tag:
    
    327
    +                return True
    
    328
    +
    
    329
    +        detail = "The ref provided for the element does not exist locally in the provided track branch / tag " + \
    
    330
    +                 "'{}'.\nYou may wish to track the element to update the ref from '{}' ".format(track, track) + \
    
    331
    +                 "with `bst track`,\nor examine the upstream at '{}' for the specific ref.".format(self.url)
    
    332
    +
    
    333
    +        self.source.warn("{}: expected ref '{}' was not found in given track '{}' for staged repository: '{}'\n"
    
    334
    +                         .format(self.source, self.ref, track, self.url),
    
    335
    +                         detail=detail, warning_token=CoreWarnings.REF_NOT_IN_TRACK)
    
    336
    +
    
    299 337
     
    
    300 338
     class GitSource(Source):
    
    301 339
         # pylint: disable=attribute-defined-outside-init
    
    ... ... @@ -307,7 +345,7 @@ class GitSource(Source):
    307 345
             self.node_validate(node, config_keys + Source.COMMON_CONFIG_KEYS)
    
    308 346
     
    
    309 347
             self.original_url = self.node_get_member(node, str, 'url')
    
    310
    -        self.mirror = GitMirror(self, '', self.original_url, ref)
    
    348
    +        self.mirror = GitMirror(self, '', self.original_url, ref, primary=True)
    
    311 349
             self.tracking = self.node_get_member(node, str, 'track', None)
    
    312 350
     
    
    313 351
             # At this point we now know if the source has a ref and/or a track.
    
    ... ... @@ -327,12 +365,18 @@ class GitSource(Source):
    327 365
             for path, _ in self.node_items(modules):
    
    328 366
                 submodule = self.node_get_member(modules, Mapping, path)
    
    329 367
                 url = self.node_get_member(submodule, str, 'url', None)
    
    368
    +
    
    369
    +            # Make sure to mark all URLs that are specified in the configuration
    
    370
    +            if url:
    
    371
    +                self.mark_download_url(url, primary=False)
    
    372
    +
    
    330 373
                 self.submodule_overrides[path] = url
    
    331 374
                 if 'checkout' in submodule:
    
    332 375
                     checkout = self.node_get_member(submodule, bool, 'checkout')
    
    333 376
                     self.submodule_checkout_overrides[path] = checkout
    
    334 377
     
    
    335 378
             self.mark_download_url(self.original_url)
    
    379
    +        self.tracked = False
    
    336 380
     
    
    337 381
         def preflight(self):
    
    338 382
             # Check if git is installed, get the binary at the same time
    
    ... ... @@ -398,6 +442,8 @@ class GitSource(Source):
    398 442
                 # Update self.mirror.ref and node.ref from the self.tracking branch
    
    399 443
                 ret = self.mirror.latest_commit(self.tracking)
    
    400 444
     
    
    445
    +        # Set tracked attribute, parameter for if self.mirror.assert_ref_in_track is needed
    
    446
    +        self.tracked = True
    
    401 447
             return ret
    
    402 448
     
    
    403 449
         def init_workspace(self, directory):
    
    ... ... @@ -405,7 +451,7 @@ class GitSource(Source):
    405 451
             self.refresh_submodules()
    
    406 452
     
    
    407 453
             with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
    
    408
    -            self.mirror.init_workspace(directory)
    
    454
    +            self.mirror.init_workspace(directory, track=(self.tracking if not self.tracked else None))
    
    409 455
                 for mirror in self.submodules:
    
    410 456
                     mirror.init_workspace(directory)
    
    411 457
     
    
    ... ... @@ -421,7 +467,7 @@ class GitSource(Source):
    421 467
             # Stage the main repo in the specified directory
    
    422 468
             #
    
    423 469
             with self.timed_activity("Staging {}".format(self.mirror.url), silent_nested=True):
    
    424
    -            self.mirror.stage(directory)
    
    470
    +            self.mirror.stage(directory, track=(self.tracking if not self.tracked else None))
    
    425 471
                 for mirror in self.submodules:
    
    426 472
                     if mirror.path in self.submodule_checkout_overrides:
    
    427 473
                         checkout = self.submodule_checkout_overrides[mirror.path]
    

  • buildstream/source.py
    ... ... @@ -28,6 +28,18 @@ Abstract Methods
    28 28
     For loading and configuration purposes, Sources must implement the
    
    29 29
     :ref:`Plugin base class abstract methods <core_plugin_abstract_methods>`.
    
    30 30
     
    
    31
    +.. attention::
    
    32
    +
    
    33
    +   In order to ensure that all configuration data is processed at
    
    34
    +   load time, it is important that all URLs have been processed during
    
    35
    +   :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>`.
    
    36
    +
    
    37
    +   Source implementations *must* either call
    
    38
    +   :func:`Source.translate_url() <buildstream.source.Source.translate_url>` or
    
    39
    +   :func:`Source.mark_download_url() <buildstream.source.Source.mark_download_url>`
    
    40
    +   for every URL that has been specified in the configuration during
    
    41
    +   :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>`
    
    42
    +
    
    31 43
     Sources expose the following abstract methods. Unless explicitly mentioned,
    
    32 44
     these methods are mandatory to implement.
    
    33 45
     
    
    ... ... @@ -184,6 +196,13 @@ class SourceFetcher():
    184 196
         fetching and substituting aliases.
    
    185 197
     
    
    186 198
         *Since: 1.2*
    
    199
    +
    
    200
    +    .. attention::
    
    201
    +
    
    202
    +       When implementing a SourceFetcher, remember to call
    
    203
    +       :func:`Source.mark_download_url() <buildstream.source.Source.mark_download_url>`
    
    204
    +       for every URL found in the configuration data at
    
    205
    +       :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>` time.
    
    187 206
         """
    
    188 207
         def __init__(self):
    
    189 208
             self.__alias = None
    
    ... ... @@ -206,7 +225,7 @@ class SourceFetcher():
    206 225
             Implementors should raise :class:`.SourceError` if the there is some
    
    207 226
             network error or if the source reference could not be matched.
    
    208 227
             """
    
    209
    -        raise ImplError("Source fetcher '{}' does not implement fetch()".format(type(self)))
    
    228
    +        raise ImplError("SourceFetcher '{}' does not implement fetch()".format(type(self)))
    
    210 229
     
    
    211 230
         #############################################################
    
    212 231
         #                       Public Methods                      #
    
    ... ... @@ -277,8 +296,11 @@ class Source(Plugin):
    277 296
             self.__element_kind = meta.element_kind         # The kind of the element owning this source
    
    278 297
             self.__directory = meta.directory               # Staging relative directory
    
    279 298
             self.__consistency = Consistency.INCONSISTENT   # Cached consistency state
    
    299
    +
    
    300
    +        # The alias_override is only set on a re-instantiated Source
    
    280 301
             self.__alias_override = alias_override          # Tuple of alias and its override to use instead
    
    281
    -        self.__expected_alias = None                    # A hacky way to store the first alias used
    
    302
    +        self.__expected_alias = None                    # The primary alias
    
    303
    +        self.__marked_urls = set()                      # Set of marked download URLs
    
    282 304
     
    
    283 305
             # FIXME: Reconstruct a MetaSource from a Source instead of storing it.
    
    284 306
             self.__meta = meta                              # MetaSource stored so we can copy this source later.
    
    ... ... @@ -289,7 +311,7 @@ class Source(Plugin):
    289 311
             self.__config = self.__extract_config(meta)
    
    290 312
             self.__first_pass = meta.first_pass
    
    291 313
     
    
    292
    -        self.configure(self.__config)
    
    314
    +        self._configure(self.__config)
    
    293 315
     
    
    294 316
         COMMON_CONFIG_KEYS = ['kind', 'directory']
    
    295 317
         """Common source config keys
    
    ... ... @@ -351,10 +373,10 @@ class Source(Plugin):
    351 373
             Args:
    
    352 374
                ref (simple object): The internal source reference to set, or ``None``
    
    353 375
                node (dict): The same dictionary which was previously passed
    
    354
    -                        to :func:`~buildstream.source.Source.configure`
    
    376
    +                        to :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>`
    
    355 377
     
    
    356
    -        See :func:`~buildstream.source.Source.get_ref` for a discussion on
    
    357
    -        the *ref* parameter.
    
    378
    +        See :func:`Source.get_ref() <buildstream.source.Source.get_ref>`
    
    379
    +        for a discussion on the *ref* parameter.
    
    358 380
     
    
    359 381
             .. note::
    
    360 382
     
    
    ... ... @@ -384,8 +406,8 @@ class Source(Plugin):
    384 406
             backend store allows one to query for a new ref from a symbolic
    
    385 407
             tracking data without downloading then that is desirable.
    
    386 408
     
    
    387
    -        See :func:`~buildstream.source.Source.get_ref` for a discussion on
    
    388
    -        the *ref* parameter.
    
    409
    +        See :func:`Source.get_ref() <buildstream.source.Source.get_ref>`
    
    410
    +        for a discussion on the *ref* parameter.
    
    389 411
             """
    
    390 412
             # Allow a non implementation
    
    391 413
             return None
    
    ... ... @@ -435,7 +457,7 @@ class Source(Plugin):
    435 457
                :class:`.SourceError`
    
    436 458
     
    
    437 459
             Default implementation is to call
    
    438
    -        :func:`~buildstream.source.Source.stage`.
    
    460
    +        :func:`Source.stage() <buildstream.source.Source.stage>`.
    
    439 461
     
    
    440 462
             Implementors overriding this method should assume that *directory*
    
    441 463
             already exists.
    
    ... ... @@ -453,8 +475,15 @@ class Source(Plugin):
    453 475
             is recommended.
    
    454 476
     
    
    455 477
             Returns:
    
    456
    -           list: A list of SourceFetchers. If SourceFetchers are not supported,
    
    457
    -                 this will be an empty list.
    
    478
    +           iterable: The Source's SourceFetchers, if any.
    
    479
    +
    
    480
    +        .. note::
    
    481
    +
    
    482
    +           Implementors can implement this as a generator.
    
    483
    +
    
    484
    +           The :func:`SourceFetcher.fetch() <buildstream.source.SourceFetcher.fetch>`
    
    485
    +           method will be called on the returned fetchers one by one,
    
    486
    +           before consuming the next fetcher in the list.
    
    458 487
     
    
    459 488
             *Since: 1.2*
    
    460 489
             """
    
    ... ... @@ -477,17 +506,27 @@ class Source(Plugin):
    477 506
             os.makedirs(directory, exist_ok=True)
    
    478 507
             return directory
    
    479 508
     
    
    480
    -    def translate_url(self, url, *, alias_override=None):
    
    509
    +    def translate_url(self, url, *, alias_override=None, primary=True):
    
    481 510
             """Translates the given url which may be specified with an alias
    
    482 511
             into a fully qualified url.
    
    483 512
     
    
    484 513
             Args:
    
    485
    -           url (str): A url, which may be using an alias
    
    514
    +           url (str): A URL, which may be using an alias
    
    486 515
                alias_override (str): Optionally, an URI to override the alias with. (*Since: 1.2*)
    
    516
    +           primary (bool): Whether this is the primary URL for the source. (*Since: 1.2*)
    
    487 517
     
    
    488 518
             Returns:
    
    489
    -           str: The fully qualified url, with aliases resolved
    
    519
    +           str: The fully qualified URL, with aliases resolved
    
    520
    +        .. note::
    
    521
    +
    
    522
    +           This must be called for every URL in the configuration during
    
    523
    +           :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>` if
    
    524
    +           :func:`Source.mark_download_url() <buildstream.source.Source.mark_download_url>`
    
    525
    +           is not called.
    
    490 526
             """
    
    527
    +        # Ensure that the download URL is also marked
    
    528
    +        self.mark_download_url(url, primary=primary)
    
    529
    +
    
    491 530
             # Alias overriding can happen explicitly (by command-line) or
    
    492 531
             # implicitly (the Source being constructed with an __alias_override).
    
    493 532
             if alias_override or self.__alias_override:
    
    ... ... @@ -506,25 +545,55 @@ class Source(Plugin):
    506 545
                             url = override_url + url_body
    
    507 546
                 return url
    
    508 547
             else:
    
    509
    -            # Sneakily store the alias if it hasn't already been stored
    
    510
    -            if not self.__expected_alias and url and utils._ALIAS_SEPARATOR in url:
    
    511
    -                self.mark_download_url(url)
    
    512
    -
    
    513 548
                 project = self._get_project()
    
    514 549
                 return project.translate_url(url, first_pass=self.__first_pass)
    
    515 550
     
    
    516
    -    def mark_download_url(self, url):
    
    551
    +    def mark_download_url(self, url, *, primary=True):
    
    517 552
             """Identifies the URL that this Source uses to download
    
    518 553
     
    
    519
    -        This must be called during :func:`~buildstream.plugin.Plugin.configure` if
    
    520
    -        :func:`~buildstream.source.Source.translate_url` is not called.
    
    521
    -
    
    522 554
             Args:
    
    523
    -           url (str): The url used to download
    
    555
    +           url (str): The URL used to download
    
    556
    +           primary (bool): Whether this is the primary URL for the source
    
    557
    +
    
    558
    +        .. note::
    
    559
    +
    
    560
    +           This must be called for every URL in the configuration during
    
    561
    +           :func:`Plugin.configure() <buildstream.plugin.Plugin.configure>` if
    
    562
    +           :func:`Source.translate_url() <buildstream.source.Source.translate_url>`
    
    563
    +           is not called.
    
    524 564
     
    
    525 565
             *Since: 1.2*
    
    526 566
             """
    
    527
    -        self.__expected_alias = _extract_alias(url)
    
    567
    +        # Only mark the Source level aliases on the main instance, not in
    
    568
    +        # a reinstantiated instance in mirroring.
    
    569
    +        if not self.__alias_override:
    
    570
    +            if primary:
    
    571
    +                expected_alias = _extract_alias(url)
    
    572
    +
    
    573
    +                assert (self.__expected_alias is None or
    
    574
    +                        self.__expected_alias == expected_alias), \
    
    575
    +                    "Primary URL marked twice with different URLs"
    
    576
    +
    
    577
    +                self.__expected_alias = expected_alias
    
    578
    +
    
    579
    +        # Enforce proper behaviour of plugins by ensuring that all
    
    580
    +        # aliased URLs have been marked at Plugin.configure() time.
    
    581
    +        #
    
    582
    +        if self._get_configuring():
    
    583
    +            # Record marked urls while configuring
    
    584
    +            #
    
    585
    +            self.__marked_urls.add(url)
    
    586
    +        else:
    
    587
    +            # If an unknown aliased URL is seen after configuring,
    
    588
    +            # this is an error.
    
    589
    +            #
    
    590
    +            # It is still possible that a URL that was not mentioned
    
    591
    +            # in the element configuration can be marked, this is
    
    592
    +            # the case for git submodules which might be automatically
    
    593
    +            # discovered.
    
    594
    +            #
    
    595
    +            assert (url in self.__marked_urls or not _extract_alias(url)), \
    
    596
    +                "URL was not seen at configure time: {}".format(url)
    
    528 597
     
    
    529 598
         def get_project_directory(self):
    
    530 599
             """Fetch the project base directory
    

  • buildstream/storage/_casbaseddirectory.py
    ... ... @@ -111,7 +111,7 @@ class CasBasedDirectory(Directory):
    111 111
             the parent).
    
    112 112
     
    
    113 113
             """
    
    114
    -        self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    114
    +        self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())[0]
    
    115 115
             if caller:
    
    116 116
                 old_dir = self._find_pb2_entry(caller.filename)
    
    117 117
                 self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
    
    ... ... @@ -130,9 +130,10 @@ class CasBasedDirectory(Directory):
    130 130
                 self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
    
    131 131
     
    
    132 132
             if parent:
    
    133
    -            self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
    
    133
    +            self.ref = self.cas_cache.add_object(digest=parent.digest,
    
    134
    +                                                 buffer=self.pb2_directory.SerializeToString())[0]
    
    134 135
             else:
    
    135
    -            self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
    
    136
    +            self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())[0]
    
    136 137
             # We don't need to do anything more than that; files were already added ealier, and symlinks are
    
    137 138
             # part of the directory structure.
    
    138 139
     
    

  • tests/frontend/project/sources/fetch_source.py
    ... ... @@ -15,14 +15,17 @@ from buildstream import Source, Consistency, SourceError, SourceFetcher
    15 15
     
    
    16 16
     
    
    17 17
     class FetchFetcher(SourceFetcher):
    
    18
    -    def __init__(self, source, url):
    
    18
    +    def __init__(self, source, url, primary=False):
    
    19 19
             super().__init__()
    
    20 20
             self.source = source
    
    21 21
             self.original_url = url
    
    22
    +        self.primary = primary
    
    22 23
             self.mark_download_url(url)
    
    23 24
     
    
    24 25
         def fetch(self, alias_override=None):
    
    25
    -        url = self.source.translate_url(self.original_url, alias_override=alias_override)
    
    26
    +        url = self.source.translate_url(self.original_url,
    
    27
    +                                        alias_override=alias_override,
    
    28
    +                                        primary=self.primary)
    
    26 29
             with open(self.source.output_file, "a") as f:
    
    27 30
                 success = url in self.source.fetch_succeeds and self.source.fetch_succeeds[url]
    
    28 31
                 message = "Fetch {} {} from {}\n".format(self.original_url,
    
    ... ... @@ -37,12 +40,21 @@ class FetchSource(Source):
    37 40
         # Read config to know which URLs to fetch
    
    38 41
         def configure(self, node):
    
    39 42
             self.original_urls = self.node_get_member(node, list, 'urls')
    
    40
    -        self.fetchers = [FetchFetcher(self, url) for url in self.original_urls]
    
    41 43
             self.output_file = self.node_get_member(node, str, 'output-text')
    
    42 44
             self.fetch_succeeds = {}
    
    43 45
             if 'fetch-succeeds' in node:
    
    44 46
                 self.fetch_succeeds = {x[0]: x[1] for x in self.node_items(node['fetch-succeeds'])}
    
    45 47
     
    
    48
    +        # First URL is the primary one for this test
    
    49
    +        #
    
    50
    +        primary = True
    
    51
    +        self.fetchers = []
    
    52
    +        for url in self.original_urls:
    
    53
    +            self.mark_download_url(url, primary=primary)
    
    54
    +            fetcher = FetchFetcher(self, url, primary=primary)
    
    55
    +            self.fetchers.append(fetcher)
    
    56
    +            primary = False
    
    57
    +
    
    46 58
         def get_source_fetchers(self):
    
    47 59
             return self.fetchers
    
    48 60
     
    

  • tests/sources/git.py
    ... ... @@ -25,6 +25,7 @@ import pytest
    25 25
     
    
    26 26
     from buildstream._exceptions import ErrorDomain
    
    27 27
     from buildstream import _yaml
    
    28
    +from buildstream.plugin import CoreWarnings
    
    28 29
     
    
    29 30
     from tests.testutils import cli, create_repo
    
    30 31
     from tests.testutils.site import HAVE_GIT
    
    ... ... @@ -408,3 +409,70 @@ def test_submodule_track_no_ref_or_track(cli, tmpdir, datafiles):
    408 409
         result = cli.run(project=project, args=['show', 'target.bst'])
    
    409 410
         result.assert_main_error(ErrorDomain.SOURCE, "missing-track-and-ref")
    
    410 411
         result.assert_task_error(None, None)
    
    412
    +
    
    413
    +
    
    414
    +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
    
    415
    +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
    
    416
    +def test_ref_not_in_track_warn(cli, tmpdir, datafiles):
    
    417
    +    project = os.path.join(datafiles.dirname, datafiles.basename)
    
    418
    +
    
    419
    +    # Create the repo from 'repofiles', create a branch without latest commit
    
    420
    +    repo = create_repo('git', str(tmpdir))
    
    421
    +    ref = repo.create(os.path.join(project, 'repofiles'))
    
    422
    +
    
    423
    +    gitsource = repo.source_config(ref=ref)
    
    424
    +
    
    425
    +    # Overwrite the track value to the added branch
    
    426
    +    gitsource['track'] = 'foo'
    
    427
    +
    
    428
    +    # Write out our test target
    
    429
    +    element = {
    
    430
    +        'kind': 'import',
    
    431
    +        'sources': [
    
    432
    +            gitsource
    
    433
    +        ]
    
    434
    +    }
    
    435
    +    _yaml.dump(element, os.path.join(project, 'target.bst'))
    
    436
    +
    
    437
    +    # Assert the warning is raised as ref is not in branch foo.
    
    438
    +    # Assert warning not error to the user, when not set as fatal.
    
    439
    +    result = cli.run(project=project, args=['build', 'target.bst'])
    
    440
    +    assert "The ref provided for the element does not exist locally" in result.stderr
    
    441
    +
    
    442
    +
    
    443
    +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
    
    444
    +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
    
    445
    +def test_ref_not_in_track_warn_error(cli, tmpdir, datafiles):
    
    446
    +    project = os.path.join(datafiles.dirname, datafiles.basename)
    
    447
    +
    
    448
    +    # Add fatal-warnings ref-not-in-track to project.conf
    
    449
    +    project_template = {
    
    450
    +        "name": "foo",
    
    451
    +        "fatal-warnings": [CoreWarnings.REF_NOT_IN_TRACK]
    
    452
    +    }
    
    453
    +
    
    454
    +    _yaml.dump(project_template, os.path.join(project, 'project.conf'))
    
    455
    +
    
    456
    +    # Create the repo from 'repofiles', create a branch without latest commit
    
    457
    +    repo = create_repo('git', str(tmpdir))
    
    458
    +    ref = repo.create(os.path.join(project, 'repofiles'))
    
    459
    +
    
    460
    +    gitsource = repo.source_config(ref=ref)
    
    461
    +
    
    462
    +    # Overwrite the track value to the added branch
    
    463
    +    gitsource['track'] = 'foo'
    
    464
    +
    
    465
    +    # Write out our test target
    
    466
    +    element = {
    
    467
    +        'kind': 'import',
    
    468
    +        'sources': [
    
    469
    +            gitsource
    
    470
    +        ]
    
    471
    +    }
    
    472
    +    _yaml.dump(element, os.path.join(project, 'target.bst'))
    
    473
    +
    
    474
    +    # Assert that build raises a warning here that is captured
    
    475
    +    # as plugin error, due to the fatal warning being set
    
    476
    +    result = cli.run(project=project, args=['build', 'target.bst'])
    
    477
    +    result.assert_main_error(ErrorDomain.STREAM, None)
    
    478
    +    result.assert_task_error(ErrorDomain.PLUGIN, CoreWarnings.REF_NOT_IN_TRACK)



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]