Tiago Gomes pushed to branch tiagogomes/issue-573 at BuildStream / buildstream
Commits:
-
e9d75e4d
by Tiago Gomes at 2018-09-01T16:02:38Z
-
2ee790ff
by Tiago Gomes at 2018-09-01T16:02:46Z
-
45b88974
by Tiago Gomes at 2018-09-01T16:02:46Z
-
0b23aa62
by Tiago Gomes at 2018-09-01T16:02:46Z
13 changed files:
- buildstream/_artifactcache/artifactcache.py
- buildstream/_artifactcache/cascache.py
- buildstream/_artifactcache/casserver.py
- buildstream/_scheduler/jobs/__init__.py
- − buildstream/_scheduler/jobs/cachesizejob.py
- buildstream/_scheduler/jobs/cleanupjob.py
- buildstream/_scheduler/jobs/elementjob.py
- buildstream/_scheduler/queues/buildqueue.py
- buildstream/_scheduler/queues/pullqueue.py
- buildstream/_scheduler/queues/queue.py
- buildstream/_scheduler/scheduler.py
- buildstream/element.py
- buildstream/storage/_casbaseddirectory.py
Changes:
| ... | ... | @@ -16,6 +16,7 @@ |
| 16 | 16 |
#
|
| 17 | 17 |
# Authors:
|
| 18 | 18 |
# Tristan Maat <tristan maat codethink co uk>
|
| 19 |
+# Tiago Gomes <tiago gomes codethink co uk>
|
|
| 19 | 20 |
|
| 20 | 21 |
import os
|
| 21 | 22 |
import string
|
| ... | ... | @@ -85,8 +86,6 @@ class ArtifactCache(): |
| 85 | 86 |
self.extractdir = os.path.join(context.artifactdir, 'extract')
|
| 86 | 87 |
self.tmpdir = os.path.join(context.artifactdir, 'tmp')
|
| 87 | 88 |
|
| 88 |
- self.estimated_size = None
|
|
| 89 |
- |
|
| 90 | 89 |
self.global_remote_specs = []
|
| 91 | 90 |
self.project_remote_specs = {}
|
| 92 | 91 |
|
| ... | ... | @@ -228,10 +227,14 @@ class ArtifactCache(): |
| 228 | 227 |
#
|
| 229 | 228 |
# Clean the artifact cache as much as possible.
|
| 230 | 229 |
#
|
| 230 |
+ # Returns:
|
|
| 231 |
+ # (int): Amount of bytes cleaned from the cache
|
|
| 232 |
+ #
|
|
| 231 | 233 |
def clean(self):
|
| 232 | 234 |
artifacts = self.list_artifacts()
|
| 235 |
+ cache_size = old_cache_size = self.get_cache_size()
|
|
| 233 | 236 |
|
| 234 |
- while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold:
|
|
| 237 |
+ while cache_size >= self.cache_quota - self.cache_lower_threshold:
|
|
| 235 | 238 |
try:
|
| 236 | 239 |
to_remove = artifacts.pop(0)
|
| 237 | 240 |
except IndexError:
|
| ... | ... | @@ -245,7 +248,7 @@ class ArtifactCache(): |
| 245 | 248 |
"Please increase the cache-quota in {}."
|
| 246 | 249 |
.format(self.context.config_origin or default_conf))
|
| 247 | 250 |
|
| 248 |
- if self.calculate_cache_size() > self.cache_quota:
|
|
| 251 |
+ if cache_size > self.cache_quota:
|
|
| 249 | 252 |
raise ArtifactError("Cache too full. Aborting.",
|
| 250 | 253 |
detail=detail,
|
| 251 | 254 |
reason="cache-too-full")
|
| ... | ... | @@ -255,44 +258,17 @@ class ArtifactCache(): |
| 255 | 258 |
key = to_remove.rpartition('/')[2]
|
| 256 | 259 |
if key not in self.required_artifacts:
|
| 257 | 260 |
size = self.remove(to_remove)
|
| 258 |
- if size:
|
|
| 259 |
- self.cache_size -= size
|
|
| 261 |
+ cache_size -= size
|
|
| 262 |
+ self._message(MessageType.DEBUG,
|
|
| 263 |
+ "Removed artifact {} ({})".format(
|
|
| 264 |
+ to_remove[:-(len(key) - self.context.log_key_length)],
|
|
| 265 |
+ utils._pretty_size(size)))
|
|
| 260 | 266 |
|
| 261 |
- # This should be O(1) if implemented correctly
|
|
| 262 |
- return self.calculate_cache_size()
|
|
| 267 |
+ self._message(MessageType.INFO,
|
|
| 268 |
+ "New artifact cache size: {}".format(
|
|
| 269 |
+ utils._pretty_size(cache_size)))
|
|
| 263 | 270 |
|
| 264 |
- # get_approximate_cache_size()
|
|
| 265 |
- #
|
|
| 266 |
- # A cheap method that aims to serve as an upper limit on the
|
|
| 267 |
- # artifact cache size.
|
|
| 268 |
- #
|
|
| 269 |
- # The cache size reported by this function will normally be larger
|
|
| 270 |
- # than the real cache size, since it is calculated using the
|
|
| 271 |
- # pre-commit artifact size, but for very small artifacts in
|
|
| 272 |
- # certain caches additional overhead could cause this to be
|
|
| 273 |
- # smaller than, but close to, the actual size.
|
|
| 274 |
- #
|
|
| 275 |
- # Nonetheless, in practice this should be safe to use as an upper
|
|
| 276 |
- # limit on the cache size.
|
|
| 277 |
- #
|
|
| 278 |
- # If the cache has built-in constant-time size reporting, please
|
|
| 279 |
- # feel free to override this method with a more accurate
|
|
| 280 |
- # implementation.
|
|
| 281 |
- #
|
|
| 282 |
- # Returns:
|
|
| 283 |
- # (int) An approximation of the artifact cache size.
|
|
| 284 |
- #
|
|
| 285 |
- def get_approximate_cache_size(self):
|
|
| 286 |
- # If we don't currently have an estimate, figure out the real
|
|
| 287 |
- # cache size.
|
|
| 288 |
- if self.estimated_size is None:
|
|
| 289 |
- stored_size = self._read_cache_size()
|
|
| 290 |
- if stored_size is not None:
|
|
| 291 |
- self.estimated_size = stored_size
|
|
| 292 |
- else:
|
|
| 293 |
- self.estimated_size = self.calculate_cache_size()
|
|
| 294 |
- |
|
| 295 |
- return self.estimated_size
|
|
| 271 |
+ return old_cache_size - cache_size
|
|
| 296 | 272 |
|
| 297 | 273 |
################################################
|
| 298 | 274 |
# Abstract methods for subclasses to implement #
|
| ... | ... | @@ -390,6 +366,10 @@ class ArtifactCache(): |
| 390 | 366 |
# content (str): The element's content directory
|
| 391 | 367 |
# keys (list): The cache keys to use
|
| 392 | 368 |
#
|
| 369 |
+ # Returns:
|
|
| 370 |
+ # (int): Bytes required to cache the artifact taking deduplication
|
|
| 371 |
+ # into account
|
|
| 372 |
+ #
|
|
| 393 | 373 |
def commit(self, element, content, keys):
|
| 394 | 374 |
raise ImplError("Cache '{kind}' does not implement commit()"
|
| 395 | 375 |
.format(kind=type(self).__name__))
|
| ... | ... | @@ -462,6 +442,8 @@ class ArtifactCache(): |
| 462 | 442 |
#
|
| 463 | 443 |
# Returns:
|
| 464 | 444 |
# (bool): True if pull was successful, False if artifact was not available
|
| 445 |
+ # (int): Bytes required to cache the artifact taking deduplication
|
|
| 446 |
+ # into account
|
|
| 465 | 447 |
#
|
| 466 | 448 |
def pull(self, element, key, *, progress=None):
|
| 467 | 449 |
raise ImplError("Cache '{kind}' does not implement pull()"
|
| ... | ... | @@ -484,8 +466,6 @@ class ArtifactCache(): |
| 484 | 466 |
#
|
| 485 | 467 |
# Return the real artifact cache size.
|
| 486 | 468 |
#
|
| 487 |
- # Implementations should also use this to update estimated_size.
|
|
| 488 |
- #
|
|
| 489 | 469 |
# Returns:
|
| 490 | 470 |
#
|
| 491 | 471 |
# (int) The size of the artifact cache.
|
| ... | ... | @@ -494,6 +474,23 @@ class ArtifactCache(): |
| 494 | 474 |
raise ImplError("Cache '{kind}' does not implement calculate_cache_size()"
|
| 495 | 475 |
.format(kind=type(self).__name__))
|
| 496 | 476 |
|
| 477 |
+ # get_cache_size()
|
|
| 478 |
+ #
|
|
| 479 |
+ # Return the artifact cache size.
|
|
| 480 |
+ #
|
|
| 481 |
+ # Returns:
|
|
| 482 |
+ #
|
|
| 483 |
+ # (int) The size of the artifact cache.
|
|
| 484 |
+ #
|
|
| 485 |
+ def get_cache_size(self):
|
|
| 486 |
+ if self.cache_size is None:
|
|
| 487 |
+ self.cache_size = self._read_cache_size()
|
|
| 488 |
+ |
|
| 489 |
+ if self.cache_size is None:
|
|
| 490 |
+ self.cache_size = self.calculate_cache_size()
|
|
| 491 |
+ |
|
| 492 |
+ return self.cache_size
|
|
| 493 |
+ |
|
| 497 | 494 |
################################################
|
| 498 | 495 |
# Local Private Methods #
|
| 499 | 496 |
################################################
|
| ... | ... | @@ -537,32 +534,13 @@ class ArtifactCache(): |
| 537 | 534 |
|
| 538 | 535 |
# _add_artifact_size()
|
| 539 | 536 |
#
|
| 540 |
- # Since we cannot keep track of the cache size between threads,
|
|
| 541 |
- # this method will be called by the main process every time a
|
|
| 542 |
- # process that added something to the cache finishes.
|
|
| 543 |
- #
|
|
| 544 |
- # This will then add the reported size to
|
|
| 545 |
- # ArtifactCache.estimated_size.
|
|
| 537 |
+ # Since we cannot keep track of the cache size between processes,
|
|
| 538 |
+ # this method will be called by the main process every time a job
|
|
| 539 |
+ # added or removed an artifact from the cache finishes.
|
|
| 546 | 540 |
#
|
| 547 | 541 |
def _add_artifact_size(self, artifact_size):
|
| 548 |
- if not self.estimated_size:
|
|
| 549 |
- self.estimated_size = self.calculate_cache_size()
|
|
| 550 |
- |
|
| 551 |
- self.estimated_size += artifact_size
|
|
| 552 |
- self._write_cache_size(self.estimated_size)
|
|
| 553 |
- |
|
| 554 |
- # _set_cache_size()
|
|
| 555 |
- #
|
|
| 556 |
- # Similarly to the above method, when we calculate the actual size
|
|
| 557 |
- # in a child thread, we can't update it. We instead pass the value
|
|
| 558 |
- # back to the main thread and update it there.
|
|
| 559 |
- #
|
|
| 560 |
- def _set_cache_size(self, cache_size):
|
|
| 561 |
- self.estimated_size = cache_size
|
|
| 562 |
- |
|
| 563 |
- # set_cache_size is called in cleanup, where it may set the cache to None
|
|
| 564 |
- if self.estimated_size is not None:
|
|
| 565 |
- self._write_cache_size(self.estimated_size)
|
|
| 542 |
+ self.cache_size = self.get_cache_size() + artifact_size
|
|
| 543 |
+ self._write_cache_size(self.cache_size)
|
|
| 566 | 544 |
|
| 567 | 545 |
# _write_cache_size()
|
| 568 | 546 |
#
|
| ... | ... | @@ -628,7 +606,7 @@ class ArtifactCache(): |
| 628 | 606 |
stat = os.statvfs(artifactdir_volume)
|
| 629 | 607 |
available_space = (stat.f_bsize * stat.f_bavail)
|
| 630 | 608 |
|
| 631 |
- cache_size = self.get_approximate_cache_size()
|
|
| 609 |
+ cache_size = self.get_cache_size()
|
|
| 632 | 610 |
|
| 633 | 611 |
# Ensure system has enough storage for the cache_quota
|
| 634 | 612 |
#
|
| ... | ... | @@ -16,6 +16,7 @@ |
| 16 | 16 |
#
|
| 17 | 17 |
# Authors:
|
| 18 | 18 |
# Jürg Billeter <juerg billeter codethink co uk>
|
| 19 |
+# Tiago Gomes <tiago gomes codethink co uk>
|
|
| 19 | 20 |
|
| 20 | 21 |
import hashlib
|
| 21 | 22 |
import itertools
|
| ... | ... | @@ -115,12 +116,12 @@ class CASCache(ArtifactCache): |
| 115 | 116 |
def commit(self, element, content, keys):
|
| 116 | 117 |
refs = [self.get_artifact_fullname(element, key) for key in keys]
|
| 117 | 118 |
|
| 118 |
- tree = self._create_tree(content)
|
|
| 119 |
+ tree, size = self._create_tree(content)
|
|
| 119 | 120 |
|
| 120 | 121 |
for ref in refs:
|
| 121 | 122 |
self.set_ref(ref, tree)
|
| 122 | 123 |
|
| 123 |
- self.cache_size = None
|
|
| 124 |
+ return size
|
|
| 124 | 125 |
|
| 125 | 126 |
def diff(self, element, key_a, key_b, *, subdir=None):
|
| 126 | 127 |
ref_a = self.get_artifact_fullname(element, key_a)
|
| ... | ... | @@ -238,12 +239,12 @@ class CASCache(ArtifactCache): |
| 238 | 239 |
tree.hash = response.digest.hash
|
| 239 | 240 |
tree.size_bytes = response.digest.size_bytes
|
| 240 | 241 |
|
| 241 |
- self._fetch_tree(remote, tree)
|
|
| 242 |
+ size = self._fetch_tree(remote, tree)
|
|
| 242 | 243 |
|
| 243 | 244 |
self.set_ref(ref, tree)
|
| 244 | 245 |
|
| 245 | 246 |
# no need to pull from additional remotes
|
| 246 |
- return True
|
|
| 247 |
+ return True, size
|
|
| 247 | 248 |
|
| 248 | 249 |
except grpc.RpcError as e:
|
| 249 | 250 |
if e.code() != grpc.StatusCode.NOT_FOUND:
|
| ... | ... | @@ -257,7 +258,7 @@ class CASCache(ArtifactCache): |
| 257 | 258 |
remote.spec.url, element._get_brief_display_key())
|
| 258 | 259 |
))
|
| 259 | 260 |
|
| 260 |
- return False
|
|
| 261 |
+ return False, 0
|
|
| 261 | 262 |
|
| 262 | 263 |
def link_key(self, element, oldkey, newkey):
|
| 263 | 264 |
oldref = self.get_artifact_fullname(element, oldkey)
|
| ... | ... | @@ -397,6 +398,7 @@ class CASCache(ArtifactCache): |
| 397 | 398 |
#
|
| 398 | 399 |
# Returns:
|
| 399 | 400 |
# (Digest): The digest of the added object
|
| 401 |
+ # (int): The number of bytes required to store the object
|
|
| 400 | 402 |
#
|
| 401 | 403 |
# Either `path` or `buffer` must be passed, but not both.
|
| 402 | 404 |
#
|
| ... | ... | @@ -425,22 +427,39 @@ class CASCache(ArtifactCache): |
| 425 | 427 |
|
| 426 | 428 |
out.flush()
|
| 427 | 429 |
|
| 430 |
+ file_size = os.fstat(out.fileno()).st_size
|
|
| 431 |
+ |
|
| 428 | 432 |
digest.hash = h.hexdigest()
|
| 429 |
- digest.size_bytes = os.fstat(out.fileno()).st_size
|
|
| 433 |
+ digest.size_bytes = file_size
|
|
| 430 | 434 |
|
| 431 | 435 |
# Place file at final location
|
| 432 | 436 |
objpath = self.objpath(digest)
|
| 433 |
- os.makedirs(os.path.dirname(objpath), exist_ok=True)
|
|
| 437 |
+ dirpath = os.path.dirname(objpath)
|
|
| 438 |
+ |
|
| 439 |
+ # Track the increased size on the parent directory caused by
|
|
| 440 |
+ # adding a new entry, as these directories can contain a large
|
|
| 441 |
+ # number of files.
|
|
| 442 |
+ new_dir_size = 0
|
|
| 443 |
+ old_dir_size = 0
|
|
| 444 |
+ try:
|
|
| 445 |
+ os.makedirs(dirpath)
|
|
| 446 |
+ except FileExistsError:
|
|
| 447 |
+ old_dir_size = os.stat(dirpath).st_size
|
|
| 448 |
+ else:
|
|
| 449 |
+ new_dir_size = os.stat(dirpath).st_size
|
|
| 450 |
+ |
|
| 434 | 451 |
os.link(out.name, objpath)
|
| 452 |
+ new_dir_size = os.stat(dirpath).st_size - old_dir_size
|
|
| 435 | 453 |
|
| 436 | 454 |
except FileExistsError as e:
|
| 437 | 455 |
# We can ignore the failed link() if the object is already in the repo.
|
| 456 |
+ file_size = 0
|
|
| 438 | 457 |
pass
|
| 439 | 458 |
|
| 440 | 459 |
except OSError as e:
|
| 441 | 460 |
raise ArtifactError("Failed to hash object: {}".format(e)) from e
|
| 442 | 461 |
|
| 443 |
- return digest
|
|
| 462 |
+ return digest, file_size + new_dir_size
|
|
| 444 | 463 |
|
| 445 | 464 |
# set_ref():
|
| 446 | 465 |
#
|
| ... | ... | @@ -449,6 +468,8 @@ class CASCache(ArtifactCache): |
| 449 | 468 |
# Args:
|
| 450 | 469 |
# ref (str): The name of the ref
|
| 451 | 470 |
#
|
| 471 |
+ # Note: Setting a ref will have a very low overhead on the cache
|
|
| 472 |
+ # size, so we don't track this.
|
|
| 452 | 473 |
def set_ref(self, ref, tree):
|
| 453 | 474 |
refpath = self._refpath(ref)
|
| 454 | 475 |
os.makedirs(os.path.dirname(refpath), exist_ok=True)
|
| ... | ... | @@ -488,11 +509,7 @@ class CASCache(ArtifactCache): |
| 488 | 509 |
raise ArtifactError("Attempt to access unavailable artifact: {}".format(e)) from e
|
| 489 | 510 |
|
| 490 | 511 |
def calculate_cache_size(self):
|
| 491 |
- if self.cache_size is None:
|
|
| 492 |
- self.cache_size = utils._get_dir_size(self.casdir)
|
|
| 493 |
- self.estimated_size = self.cache_size
|
|
| 494 |
- |
|
| 495 |
- return self.cache_size
|
|
| 512 |
+ return utils._get_dir_size(self.casdir)
|
|
| 496 | 513 |
|
| 497 | 514 |
# list_artifacts():
|
| 498 | 515 |
#
|
| ... | ... | @@ -630,6 +647,7 @@ class CASCache(ArtifactCache): |
| 630 | 647 |
|
| 631 | 648 |
def _create_tree(self, path, *, digest=None):
|
| 632 | 649 |
directory = remote_execution_pb2.Directory()
|
| 650 |
+ size = 0
|
|
| 633 | 651 |
|
| 634 | 652 |
for name in sorted(os.listdir(path)):
|
| 635 | 653 |
full_path = os.path.join(path, name)
|
| ... | ... | @@ -637,11 +655,11 @@ class CASCache(ArtifactCache): |
| 637 | 655 |
if stat.S_ISDIR(mode):
|
| 638 | 656 |
dirnode = directory.directories.add()
|
| 639 | 657 |
dirnode.name = name
|
| 640 |
- self._create_tree(full_path, digest=dirnode.digest)
|
|
| 658 |
+ size += self._create_tree(full_path, digest=dirnode.digest)[1]
|
|
| 641 | 659 |
elif stat.S_ISREG(mode):
|
| 642 | 660 |
filenode = directory.files.add()
|
| 643 | 661 |
filenode.name = name
|
| 644 |
- self.add_object(path=full_path, digest=filenode.digest)
|
|
| 662 |
+ size += self.add_object(path=full_path, digest=filenode.digest)[1]
|
|
| 645 | 663 |
filenode.is_executable = (mode & stat.S_IXUSR) == stat.S_IXUSR
|
| 646 | 664 |
elif stat.S_ISLNK(mode):
|
| 647 | 665 |
symlinknode = directory.symlinks.add()
|
| ... | ... | @@ -650,7 +668,8 @@ class CASCache(ArtifactCache): |
| 650 | 668 |
else:
|
| 651 | 669 |
raise ArtifactError("Unsupported file type for {}".format(full_path))
|
| 652 | 670 |
|
| 653 |
- return self.add_object(digest=digest, buffer=directory.SerializeToString())
|
|
| 671 |
+ res = self.add_object(digest=digest, buffer=directory.SerializeToString())
|
|
| 672 |
+ return res[0], res[1] + size
|
|
| 654 | 673 |
|
| 655 | 674 |
def _get_subdir(self, tree, subdir):
|
| 656 | 675 |
head, name = os.path.split(subdir)
|
| ... | ... | @@ -794,10 +813,11 @@ class CASCache(ArtifactCache): |
| 794 | 813 |
assert digest.size_bytes == os.fstat(out.fileno()).st_size
|
| 795 | 814 |
|
| 796 | 815 |
def _fetch_tree(self, remote, tree):
|
| 816 |
+ size = 0
|
|
| 797 | 817 |
objpath = self.objpath(tree)
|
| 798 | 818 |
if os.path.exists(objpath):
|
| 799 | 819 |
# already in local cache
|
| 800 |
- return
|
|
| 820 |
+ return 0
|
|
| 801 | 821 |
|
| 802 | 822 |
with tempfile.NamedTemporaryFile(dir=self.tmpdir) as out:
|
| 803 | 823 |
self._fetch_blob(remote, tree, out)
|
| ... | ... | @@ -816,17 +836,21 @@ class CASCache(ArtifactCache): |
| 816 | 836 |
with tempfile.NamedTemporaryFile(dir=self.tmpdir) as f:
|
| 817 | 837 |
self._fetch_blob(remote, filenode.digest, f)
|
| 818 | 838 |
|
| 819 |
- digest = self.add_object(path=f.name)
|
|
| 839 |
+ digest, obj_size = self.add_object(path=f.name)
|
|
| 840 |
+ size += obj_size
|
|
| 820 | 841 |
assert digest.hash == filenode.digest.hash
|
| 821 | 842 |
|
| 822 | 843 |
for dirnode in directory.directories:
|
| 823 |
- self._fetch_tree(remote, dirnode.digest)
|
|
| 844 |
+ size += self._fetch_tree(remote, dirnode.digest)
|
|
| 824 | 845 |
|
| 825 | 846 |
# place directory blob only in final location when we've downloaded
|
| 826 | 847 |
# all referenced blobs to avoid dangling references in the repository
|
| 827 |
- digest = self.add_object(path=out.name)
|
|
| 848 |
+ digest, obj_size = self.add_object(path=out.name)
|
|
| 849 |
+ size += obj_size
|
|
| 828 | 850 |
assert digest.hash == tree.hash
|
| 829 | 851 |
|
| 852 |
+ return size
|
|
| 853 |
+ |
|
| 830 | 854 |
|
| 831 | 855 |
# Represents a single remote CAS cache.
|
| 832 | 856 |
#
|
| ... | ... | @@ -203,7 +203,7 @@ class _ByteStreamServicer(bytestream_pb2_grpc.ByteStreamServicer): |
| 203 | 203 |
context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
|
| 204 | 204 |
return response
|
| 205 | 205 |
out.flush()
|
| 206 |
- digest = self.cas.add_object(path=out.name)
|
|
| 206 |
+ digest = self.cas.add_object(path=out.name)[0]
|
|
| 207 | 207 |
if digest.hash != client_digest.hash:
|
| 208 | 208 |
context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
|
| 209 | 209 |
return response
|
| 1 | 1 |
from .elementjob import ElementJob
|
| 2 |
-from .cachesizejob import CacheSizeJob
|
|
| 3 | 2 |
from .cleanupjob import CleanupJob
|
| 1 |
-# Copyright (C) 2018 Codethink Limited
|
|
| 2 |
-#
|
|
| 3 |
-# This program is free software; you can redistribute it and/or
|
|
| 4 |
-# modify it under the terms of the GNU Lesser General Public
|
|
| 5 |
-# License as published by the Free Software Foundation; either
|
|
| 6 |
-# version 2 of the License, or (at your option) any later version.
|
|
| 7 |
-#
|
|
| 8 |
-# This library is distributed in the hope that it will be useful,
|
|
| 9 |
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
| 10 |
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
| 11 |
-# Lesser General Public License for more details.
|
|
| 12 |
-#
|
|
| 13 |
-# You should have received a copy of the GNU Lesser General Public
|
|
| 14 |
-# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
| 15 |
-#
|
|
| 16 |
-# Author:
|
|
| 17 |
-# Tristan Daniël Maat <tristan maat codethink co uk>
|
|
| 18 |
-#
|
|
| 19 |
-from .job import Job
|
|
| 20 |
-from ..._platform import Platform
|
|
| 21 |
- |
|
| 22 |
- |
|
| 23 |
-class CacheSizeJob(Job):
|
|
| 24 |
- def __init__(self, *args, complete_cb, **kwargs):
|
|
| 25 |
- super().__init__(*args, **kwargs)
|
|
| 26 |
- self._complete_cb = complete_cb
|
|
| 27 |
- self._cache = Platform._instance.artifactcache
|
|
| 28 |
- |
|
| 29 |
- def child_process(self):
|
|
| 30 |
- return self._cache.calculate_cache_size()
|
|
| 31 |
- |
|
| 32 |
- def parent_complete(self, success, result):
|
|
| 33 |
- self._cache._set_cache_size(result)
|
|
| 34 |
- if self._complete_cb:
|
|
| 35 |
- self._complete_cb(result)
|
|
| 36 |
- |
|
| 37 |
- def child_process_data(self):
|
|
| 38 |
- return {}
|
| ... | ... | @@ -21,18 +21,19 @@ from ..._platform import Platform |
| 21 | 21 |
|
| 22 | 22 |
|
| 23 | 23 |
class CleanupJob(Job):
|
| 24 |
- def __init__(self, *args, complete_cb, **kwargs):
|
|
| 24 |
+ def __init__(self, *args, **kwargs):
|
|
| 25 | 25 |
super().__init__(*args, **kwargs)
|
| 26 |
- self._complete_cb = complete_cb
|
|
| 27 | 26 |
self._cache = Platform._instance.artifactcache
|
| 28 | 27 |
|
| 29 | 28 |
def child_process(self):
|
| 30 | 29 |
return self._cache.clean()
|
| 31 | 30 |
|
| 32 | 31 |
def parent_complete(self, success, result):
|
| 33 |
- self._cache._set_cache_size(result)
|
|
| 34 |
- if self._complete_cb:
|
|
| 35 |
- self._complete_cb()
|
|
| 32 |
+ if success:
|
|
| 33 |
+ # ArtifactCache.clean() returns the number of bytes cleaned.
|
|
| 34 |
+ # We negate the number because the cache size is to be
|
|
| 35 |
+ # decreased.
|
|
| 36 |
+ self._cache._add_artifact_size(result * -1)
|
|
| 36 | 37 |
|
| 37 | 38 |
def child_process_data(self):
|
| 38 | 39 |
return {}
|
| ... | ... | @@ -110,12 +110,10 @@ class ElementJob(Job): |
| 110 | 110 |
|
| 111 | 111 |
workspace = self._element._get_workspace()
|
| 112 | 112 |
artifact_size = self._element._get_artifact_size()
|
| 113 |
- cache_size = self._element._get_artifact_cache().calculate_cache_size()
|
|
| 114 | 113 |
|
| 115 | 114 |
if workspace is not None:
|
| 116 | 115 |
data['workspace'] = workspace.to_dict()
|
| 117 | 116 |
if artifact_size is not None:
|
| 118 | 117 |
data['artifact_size'] = artifact_size
|
| 119 |
- data['cache_size'] = cache_size
|
|
| 120 | 118 |
|
| 121 | 119 |
return data
|
| ... | ... | @@ -87,19 +87,6 @@ class BuildQueue(Queue): |
| 87 | 87 |
|
| 88 | 88 |
return QueueStatus.READY
|
| 89 | 89 |
|
| 90 |
- def _check_cache_size(self, job, element):
|
|
| 91 |
- if not job.child_data:
|
|
| 92 |
- return
|
|
| 93 |
- |
|
| 94 |
- artifact_size = job.child_data.get('artifact_size', False)
|
|
| 95 |
- |
|
| 96 |
- if artifact_size:
|
|
| 97 |
- cache = element._get_artifact_cache()
|
|
| 98 |
- cache._add_artifact_size(artifact_size)
|
|
| 99 |
- |
|
| 100 |
- if cache.get_approximate_cache_size() > cache.cache_quota:
|
|
| 101 |
- self._scheduler._check_cache_size_real()
|
|
| 102 |
- |
|
| 103 | 90 |
def done(self, job, element, result, success):
|
| 104 | 91 |
|
| 105 | 92 |
if success:
|
| 1 | 1 |
#
|
| 2 |
-# Copyright (C) 2016 Codethink Limited
|
|
| 2 |
+# Copyright (C) 2018 Codethink Limited
|
|
| 3 | 3 |
#
|
| 4 | 4 |
# This program is free software; you can redistribute it and/or
|
| 5 | 5 |
# modify it under the terms of the GNU Lesser General Public
|
| ... | ... | @@ -52,17 +52,14 @@ class PullQueue(Queue): |
| 52 | 52 |
else:
|
| 53 | 53 |
return QueueStatus.SKIP
|
| 54 | 54 |
|
| 55 |
- def done(self, _, element, result, success):
|
|
| 55 |
+ def done(self, job, element, result, success):
|
|
| 56 | 56 |
|
| 57 | 57 |
if not success:
|
| 58 | 58 |
return False
|
| 59 | 59 |
|
| 60 | 60 |
element._pull_done()
|
| 61 | 61 |
|
| 62 |
- # Build jobs will check the "approximate" size first. Since we
|
|
| 63 |
- # do not get an artifact size from pull jobs, we have to
|
|
| 64 |
- # actually check the cache size.
|
|
| 65 |
- self._scheduler._check_cache_size_real()
|
|
| 62 |
+ self._check_cache_size(job, element)
|
|
| 66 | 63 |
|
| 67 | 64 |
# Element._pull() returns True if it downloaded an artifact,
|
| 68 | 65 |
# here we want to appear skipped if we did not download.
|
| 1 | 1 |
#
|
| 2 |
-# Copyright (C) 2016 Codethink Limited
|
|
| 2 |
+# Copyright (C) 2018 Codethink Limited
|
|
| 3 | 3 |
#
|
| 4 | 4 |
# This program is free software; you can redistribute it and/or
|
| 5 | 5 |
# modify it under the terms of the GNU Lesser General Public
|
| ... | ... | @@ -301,8 +301,6 @@ class Queue(): |
| 301 | 301 |
# Update values that need to be synchronized in the main task
|
| 302 | 302 |
# before calling any queue implementation
|
| 303 | 303 |
self._update_workspaces(element, job)
|
| 304 |
- if job.child_data:
|
|
| 305 |
- element._get_artifact_cache().cache_size = job.child_data.get('cache_size')
|
|
| 306 | 304 |
|
| 307 | 305 |
# Give the result of the job to the Queue implementor,
|
| 308 | 306 |
# and determine if it should be considered as processed
|
| ... | ... | @@ -360,3 +358,16 @@ class Queue(): |
| 360 | 358 |
logfile = "{key}-{action}".format(key=key, action=action)
|
| 361 | 359 |
|
| 362 | 360 |
return os.path.join(project.name, element.normal_name, logfile)
|
| 361 |
+ |
|
| 362 |
+ def _check_cache_size(self, job, element):
|
|
| 363 |
+ if not job.child_data:
|
|
| 364 |
+ return
|
|
| 365 |
+ |
|
| 366 |
+ artifact_size = job.child_data.get('artifact_size', False)
|
|
| 367 |
+ |
|
| 368 |
+ if artifact_size:
|
|
| 369 |
+ cache = element._get_artifact_cache()
|
|
| 370 |
+ cache._add_artifact_size(artifact_size)
|
|
| 371 |
+ |
|
| 372 |
+ if cache.get_cache_size() > cache.cache_quota:
|
|
| 373 |
+ self._scheduler._run_cache_cleanup()
|
| 1 | 1 |
#
|
| 2 |
-# Copyright (C) 2016 Codethink Limited
|
|
| 2 |
+# Copyright (C) 2018 Codethink Limited
|
|
| 3 | 3 |
#
|
| 4 | 4 |
# This program is free software; you can redistribute it and/or
|
| 5 | 5 |
# modify it under the terms of the GNU Lesser General Public
|
| ... | ... | @@ -28,8 +28,7 @@ from contextlib import contextmanager |
| 28 | 28 |
|
| 29 | 29 |
# Local imports
|
| 30 | 30 |
from .resources import Resources, ResourceType
|
| 31 |
-from .jobs import CacheSizeJob, CleanupJob
|
|
| 32 |
-from .._platform import Platform
|
|
| 31 |
+from .jobs import CleanupJob
|
|
| 33 | 32 |
|
| 34 | 33 |
|
| 35 | 34 |
# A decent return code for Scheduler.run()
|
| ... | ... | @@ -316,24 +315,11 @@ class Scheduler(): |
| 316 | 315 |
self.schedule_jobs(ready)
|
| 317 | 316 |
self._sched()
|
| 318 | 317 |
|
| 319 |
- def _run_cleanup(self, cache_size):
|
|
| 320 |
- platform = Platform.get_platform()
|
|
| 321 |
- if cache_size and cache_size < platform.artifactcache.cache_quota:
|
|
| 322 |
- return
|
|
| 323 |
- |
|
| 324 |
- job = CleanupJob(self, 'cleanup', 'cleanup',
|
|
| 318 |
+ def _run_cache_cleanup(self):
|
|
| 319 |
+ job = CleanupJob(self, 'Cleaning artifact cache', 'cleanup',
|
|
| 325 | 320 |
resources=[ResourceType.CACHE,
|
| 326 | 321 |
ResourceType.PROCESS],
|
| 327 |
- exclusive_resources=[ResourceType.CACHE],
|
|
| 328 |
- complete_cb=None)
|
|
| 329 |
- self.schedule_jobs([job])
|
|
| 330 |
- |
|
| 331 |
- def _check_cache_size_real(self):
|
|
| 332 |
- job = CacheSizeJob(self, 'cache_size', 'cache_size/cache_size',
|
|
| 333 |
- resources=[ResourceType.CACHE,
|
|
| 334 |
- ResourceType.PROCESS],
|
|
| 335 |
- exclusive_resources=[ResourceType.CACHE],
|
|
| 336 |
- complete_cb=self._run_cleanup)
|
|
| 322 |
+ exclusive_resources=[ResourceType.CACHE])
|
|
| 337 | 323 |
self.schedule_jobs([job])
|
| 338 | 324 |
|
| 339 | 325 |
# _suspend_jobs()
|
| ... | ... | @@ -1646,8 +1646,8 @@ class Element(Plugin): |
| 1646 | 1646 |
}), os.path.join(metadir, 'workspaced-dependencies.yaml'))
|
| 1647 | 1647 |
|
| 1648 | 1648 |
with self.timed_activity("Caching artifact"):
|
| 1649 |
- self.__artifact_size = utils._get_dir_size(assembledir)
|
|
| 1650 |
- self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
|
|
| 1649 |
+ self.__artifact_size = self.__artifacts.commit(
|
|
| 1650 |
+ self, assembledir, self.__get_cache_keys_for_commit())
|
|
| 1651 | 1651 |
|
| 1652 | 1652 |
if collect is not None and collectvdir is None:
|
| 1653 | 1653 |
raise ElementError(
|
| ... | ... | @@ -1697,31 +1697,31 @@ class Element(Plugin): |
| 1697 | 1697 |
self._update_state()
|
| 1698 | 1698 |
|
| 1699 | 1699 |
def _pull_strong(self, *, progress=None):
|
| 1700 |
- weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
|
|
| 1701 |
- |
|
| 1702 | 1700 |
key = self.__strict_cache_key
|
| 1703 |
- if not self.__artifacts.pull(self, key, progress=progress):
|
|
| 1704 |
- return False
|
|
| 1701 |
+ pulled, self.__artifact_size = self.__artifacts.pull(
|
|
| 1702 |
+ self, key, progress=progress)
|
|
| 1705 | 1703 |
|
| 1706 |
- # update weak ref by pointing it to this newly fetched artifact
|
|
| 1707 |
- self.__artifacts.link_key(self, key, weak_key)
|
|
| 1704 |
+ if pulled:
|
|
| 1705 |
+ # update weak ref by pointing it to this newly fetched artifact
|
|
| 1706 |
+ weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
|
|
| 1707 |
+ self.__artifacts.link_key(self, key, weak_key)
|
|
| 1708 | 1708 |
|
| 1709 |
- return True
|
|
| 1709 |
+ return pulled
|
|
| 1710 | 1710 |
|
| 1711 | 1711 |
def _pull_weak(self, *, progress=None):
|
| 1712 | 1712 |
weak_key = self._get_cache_key(strength=_KeyStrength.WEAK)
|
| 1713 |
+ pulled, self.__artifact_size = self.__artifacts.pull(
|
|
| 1714 |
+ self, weak_key, progress=progress)
|
|
| 1713 | 1715 |
|
| 1714 |
- if not self.__artifacts.pull(self, weak_key, progress=progress):
|
|
| 1715 |
- return False
|
|
| 1716 |
- |
|
| 1717 |
- # extract strong cache key from this newly fetched artifact
|
|
| 1718 |
- self._pull_done()
|
|
| 1716 |
+ if pulled:
|
|
| 1717 |
+ # extract strong cache key from this newly fetched artifact
|
|
| 1718 |
+ self._pull_done()
|
|
| 1719 | 1719 |
|
| 1720 |
- # create tag for strong cache key
|
|
| 1721 |
- key = self._get_cache_key(strength=_KeyStrength.STRONG)
|
|
| 1722 |
- self.__artifacts.link_key(self, weak_key, key)
|
|
| 1720 |
+ # create tag for strong cache key
|
|
| 1721 |
+ key = self._get_cache_key(strength=_KeyStrength.STRONG)
|
|
| 1722 |
+ self.__artifacts.link_key(self, weak_key, key)
|
|
| 1723 | 1723 |
|
| 1724 |
- return True
|
|
| 1724 |
+ return pulled
|
|
| 1725 | 1725 |
|
| 1726 | 1726 |
# _pull():
|
| 1727 | 1727 |
#
|
| ... | ... | @@ -1741,13 +1741,12 @@ class Element(Plugin): |
| 1741 | 1741 |
if not pulled and not self._cached() and not context.get_strict():
|
| 1742 | 1742 |
pulled = self._pull_weak(progress=progress)
|
| 1743 | 1743 |
|
| 1744 |
- if not pulled:
|
|
| 1745 |
- return False
|
|
| 1744 |
+ if pulled:
|
|
| 1745 |
+ # Notify successfull download
|
|
| 1746 |
+ display_key = self._get_brief_display_key()
|
|
| 1747 |
+ self.info("Downloaded artifact {}".format(display_key))
|
|
| 1746 | 1748 |
|
| 1747 |
- # Notify successfull download
|
|
| 1748 |
- display_key = self._get_brief_display_key()
|
|
| 1749 |
- self.info("Downloaded artifact {}".format(display_key))
|
|
| 1750 |
- return True
|
|
| 1749 |
+ return pulled
|
|
| 1751 | 1750 |
|
| 1752 | 1751 |
# _skip_push():
|
| 1753 | 1752 |
#
|
| ... | ... | @@ -111,7 +111,7 @@ class CasBasedDirectory(Directory): |
| 111 | 111 |
the parent).
|
| 112 | 112 |
|
| 113 | 113 |
"""
|
| 114 |
- self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
|
|
| 114 |
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())[0]
|
|
| 115 | 115 |
if caller:
|
| 116 | 116 |
old_dir = self._find_pb2_entry(caller.filename)
|
| 117 | 117 |
self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
|
| ... | ... | @@ -130,9 +130,10 @@ class CasBasedDirectory(Directory): |
| 130 | 130 |
self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
|
| 131 | 131 |
|
| 132 | 132 |
if parent:
|
| 133 |
- self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
|
|
| 133 |
+ self.ref = self.cas_cache.add_object(digest=parent.digest,
|
|
| 134 |
+ buffer=self.pb2_directory.SerializeToString())[0]
|
|
| 134 | 135 |
else:
|
| 135 |
- self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
|
|
| 136 |
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())[0]
|
|
| 136 | 137 |
# We don't need to do anything more than that; files were already added ealier, and symlinks are
|
| 137 | 138 |
# part of the directory structure.
|
| 138 | 139 |
|
