Jonathan Maw pushed to branch jonathan/cache-cache-size at BuildStream / buildstream
Commits:
-
c15cb951
by Tristan Van Berkom at 2018-08-20T10:19:03Z
-
372abed5
by Tristan Van Berkom at 2018-08-20T11:47:53Z
-
1f88a1e9
by Jonathan Maw at 2018-08-20T12:36:02Z
-
d9507a9e
by Jonathan Maw at 2018-08-20T12:36:02Z
-
a9b81dc4
by Jonathan Maw at 2018-08-20T12:36:02Z
-
a67906e6
by Jonathan Maw at 2018-08-20T13:06:51Z
10 changed files:
- buildstream/_artifactcache/__init__.py
- buildstream/_artifactcache/artifactcache.py
- buildstream/_artifactcache/cascache.py
- buildstream/_context.py
- buildstream/_frontend/app.py
- buildstream/_scheduler/queues/buildqueue.py
- buildstream/_scheduler/scheduler.py
- setup.cfg
- + tests/artifactcache/cache_size.py
- tests/testutils/artifactshare.py
Changes:
... | ... | @@ -17,4 +17,4 @@ |
17 | 17 |
# Authors:
|
18 | 18 |
# Tristan Van Berkom <tristan vanberkom codethink co uk>
|
19 | 19 |
|
20 |
-from .artifactcache import ArtifactCache, ArtifactCacheSpec
|
|
20 |
+from .artifactcache import ArtifactCache, ArtifactCacheSpec, CACHE_SIZE_FILE
|
... | ... | @@ -28,6 +28,9 @@ from .. import utils |
28 | 28 |
from .. import _yaml
|
29 | 29 |
|
30 | 30 |
|
31 |
+CACHE_SIZE_FILE = "cache_size"
|
|
32 |
+ |
|
33 |
+ |
|
31 | 34 |
# An ArtifactCacheSpec holds the user configuration for a single remote
|
32 | 35 |
# artifact cache.
|
33 | 36 |
#
|
... | ... | @@ -82,7 +85,6 @@ class ArtifactCache(): |
82 | 85 |
self.extractdir = os.path.join(context.artifactdir, 'extract')
|
83 | 86 |
self.tmpdir = os.path.join(context.artifactdir, 'tmp')
|
84 | 87 |
|
85 |
- self.max_size = context.cache_quota
|
|
86 | 88 |
self.estimated_size = None
|
87 | 89 |
|
88 | 90 |
self.global_remote_specs = []
|
... | ... | @@ -90,6 +92,8 @@ class ArtifactCache(): |
90 | 92 |
|
91 | 93 |
self._local = False
|
92 | 94 |
self.cache_size = None
|
95 |
+ self.cache_quota = None
|
|
96 |
+ self.cache_lower_threshold = None
|
|
93 | 97 |
|
94 | 98 |
os.makedirs(self.extractdir, exist_ok=True)
|
95 | 99 |
os.makedirs(self.tmpdir, exist_ok=True)
|
... | ... | @@ -227,7 +231,7 @@ class ArtifactCache(): |
227 | 231 |
def clean(self):
|
228 | 232 |
artifacts = self.list_artifacts()
|
229 | 233 |
|
230 |
- while self.calculate_cache_size() >= self.context.cache_quota - self.context.cache_lower_threshold:
|
|
234 |
+ while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold:
|
|
231 | 235 |
try:
|
232 | 236 |
to_remove = artifacts.pop(0)
|
233 | 237 |
except IndexError:
|
... | ... | @@ -241,7 +245,7 @@ class ArtifactCache(): |
241 | 245 |
"Please increase the cache-quota in {}."
|
242 | 246 |
.format(self.context.config_origin or default_conf))
|
243 | 247 |
|
244 |
- if self.calculate_cache_size() > self.context.cache_quota:
|
|
248 |
+ if self.calculate_cache_size() > self.cache_quota:
|
|
245 | 249 |
raise ArtifactError("Cache too full. Aborting.",
|
246 | 250 |
detail=detail,
|
247 | 251 |
reason="cache-too-full")
|
... | ... | @@ -282,7 +286,11 @@ class ArtifactCache(): |
282 | 286 |
# If we don't currently have an estimate, figure out the real
|
283 | 287 |
# cache size.
|
284 | 288 |
if self.estimated_size is None:
|
285 |
- self.estimated_size = self.calculate_cache_size()
|
|
289 |
+ stored_size = self._read_cache_size()
|
|
290 |
+ if stored_size is not None:
|
|
291 |
+ self.estimated_size = stored_size
|
|
292 |
+ else:
|
|
293 |
+ self.estimated_size = self.calculate_cache_size()
|
|
286 | 294 |
|
287 | 295 |
return self.estimated_size
|
288 | 296 |
|
... | ... | @@ -541,6 +549,7 @@ class ArtifactCache(): |
541 | 549 |
self.estimated_size = self.calculate_cache_size()
|
542 | 550 |
|
543 | 551 |
self.estimated_size += artifact_size
|
552 |
+ self._write_cache_size(self.estimated_size)
|
|
544 | 553 |
|
545 | 554 |
# _set_cache_size()
|
546 | 555 |
#
|
... | ... | @@ -551,6 +560,109 @@ class ArtifactCache(): |
551 | 560 |
def _set_cache_size(self, cache_size):
|
552 | 561 |
self.estimated_size = cache_size
|
553 | 562 |
|
563 |
+ # set_cache_size is called in cleanup, where it may set the cache to None
|
|
564 |
+ if self.estimated_size is not None:
|
|
565 |
+ self._write_cache_size(self.estimated_size)
|
|
566 |
+ |
|
567 |
+ # _write_cache_size()
|
|
568 |
+ #
|
|
569 |
+ # Writes the given size of the artifact to the cache's size file
|
|
570 |
+ #
|
|
571 |
+ def _write_cache_size(self, size):
|
|
572 |
+ assert isinstance(size, int)
|
|
573 |
+ size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
|
|
574 |
+ with open(size_file_path, "w") as f:
|
|
575 |
+ f.write(str(size))
|
|
576 |
+ |
|
577 |
+ # _read_cache_size()
|
|
578 |
+ #
|
|
579 |
+ # Reads and returns the size of the artifact cache that's stored in the
|
|
580 |
+ # cache's size file
|
|
581 |
+ #
|
|
582 |
+ def _read_cache_size(self):
|
|
583 |
+ size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
|
|
584 |
+ |
|
585 |
+ if not os.path.exists(size_file_path):
|
|
586 |
+ return None
|
|
587 |
+ |
|
588 |
+ with open(size_file_path, "r") as f:
|
|
589 |
+ size = f.read()
|
|
590 |
+ |
|
591 |
+ try:
|
|
592 |
+ num_size = int(size)
|
|
593 |
+ except ValueError as e:
|
|
594 |
+ raise ArtifactError("Size '{}' parsed from '{}' was not an integer".format(
|
|
595 |
+ size, size_file_path)) from e
|
|
596 |
+ |
|
597 |
+ return num_size
|
|
598 |
+ |
|
599 |
+ # _calculate_cache_quota()
|
|
600 |
+ #
|
|
601 |
+ # Calculates and sets the cache quota and lower threshold based on the
|
|
602 |
+ # quota set in Context.
|
|
603 |
+ # It checks that the quota is both a valid _expression_, and that there is
|
|
604 |
+ # enough disk space to satisfy that quota
|
|
605 |
+ #
|
|
606 |
+ def _calculate_cache_quota(self):
|
|
607 |
+ # Headroom intended to give BuildStream a bit of leeway.
|
|
608 |
+ # This acts as the minimum size of cache_quota and also
|
|
609 |
+ # is taken from the user requested cache_quota.
|
|
610 |
+ #
|
|
611 |
+ if 'BST_TEST_SUITE' in os.environ:
|
|
612 |
+ headroom = 0
|
|
613 |
+ else:
|
|
614 |
+ headroom = 2e9
|
|
615 |
+ |
|
616 |
+ artifactdir_volume = self.context.artifactdir
|
|
617 |
+ while not os.path.exists(artifactdir_volume):
|
|
618 |
+ artifactdir_volume = os.path.dirname(artifactdir_volume)
|
|
619 |
+ |
|
620 |
+ try:
|
|
621 |
+ cache_quota = utils._parse_size(self.context.config_cache_quota, artifactdir_volume)
|
|
622 |
+ except utils.UtilError as e:
|
|
623 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
624 |
+ "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
|
|
625 |
+ "\nValid values are, for example: 800M 10G 1T 50%\n"
|
|
626 |
+ .format(str(e))) from e
|
|
627 |
+ |
|
628 |
+ stat = os.statvfs(artifactdir_volume)
|
|
629 |
+ available_space = (stat.f_bsize * stat.f_bavail)
|
|
630 |
+ |
|
631 |
+ cache_size = self.get_approximate_cache_size()
|
|
632 |
+ |
|
633 |
+ # Ensure system has enough storage for the cache_quota
|
|
634 |
+ #
|
|
635 |
+ # If cache_quota is none, set it to the maximum it could possibly be.
|
|
636 |
+ #
|
|
637 |
+ # Also check that cache_quota is atleast as large as our headroom.
|
|
638 |
+ #
|
|
639 |
+ if cache_quota is None: # Infinity, set to max system storage
|
|
640 |
+ cache_quota = cache_size + available_space
|
|
641 |
+ if cache_quota < headroom: # Check minimum
|
|
642 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
643 |
+ "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
|
|
644 |
+ "BuildStream requires a minimum cache quota of 2G.")
|
|
645 |
+ elif cache_quota > cache_size + available_space: # Check maximum
|
|
646 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
647 |
+ ("Your system does not have enough available " +
|
|
648 |
+ "space to support the cache quota specified.\n" +
|
|
649 |
+ "You currently have:\n" +
|
|
650 |
+ "- {used} of cache in use at {local_cache_path}\n" +
|
|
651 |
+ "- {available} of available system storage").format(
|
|
652 |
+ used=utils._pretty_size(cache_size),
|
|
653 |
+ local_cache_path=self.context.artifactdir,
|
|
654 |
+ available=utils._pretty_size(available_space)))
|
|
655 |
+ |
|
656 |
+ # Place a slight headroom (2e9 (2GB) on the cache_quota) into
|
|
657 |
+ # cache_quota to try and avoid exceptions.
|
|
658 |
+ #
|
|
659 |
+ # Of course, we might still end up running out during a build
|
|
660 |
+ # if we end up writing more than 2G, but hey, this stuff is
|
|
661 |
+ # already really fuzzy.
|
|
662 |
+ #
|
|
663 |
+ self.cache_quota = cache_quota - headroom
|
|
664 |
+ self.cache_lower_threshold = self.cache_quota / 2
|
|
665 |
+ |
|
554 | 666 |
|
555 | 667 |
# _configured_remote_artifact_cache_specs():
|
556 | 668 |
#
|
... | ... | @@ -61,6 +61,8 @@ class CASCache(ArtifactCache): |
61 | 61 |
os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
|
62 | 62 |
os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
|
63 | 63 |
|
64 |
+ self._calculate_cache_quota()
|
|
65 |
+ |
|
64 | 66 |
self._enable_push = enable_push
|
65 | 67 |
|
66 | 68 |
# Per-project list of _CASRemote instances.
|
... | ... | @@ -330,7 +332,7 @@ class CASCache(ArtifactCache): |
330 | 332 |
request.write_offset = offset
|
331 | 333 |
# max. 64 kB chunks
|
332 | 334 |
request.data = f.read(chunk_size)
|
333 |
- request.resource_name = resource_name
|
|
335 |
+ request.resource_name = resource_name # pylint: disable=cell-var-from-loop
|
|
334 | 336 |
request.finish_write = remaining <= 0
|
335 | 337 |
yield request
|
336 | 338 |
offset += chunk_size
|
... | ... | @@ -64,12 +64,6 @@ class Context(): |
64 | 64 |
# The locations from which to push and pull prebuilt artifacts
|
65 | 65 |
self.artifact_cache_specs = []
|
66 | 66 |
|
67 |
- # The artifact cache quota
|
|
68 |
- self.cache_quota = None
|
|
69 |
- |
|
70 |
- # The lower threshold to which we aim to reduce the cache size
|
|
71 |
- self.cache_lower_threshold = None
|
|
72 |
- |
|
73 | 67 |
# The directory to store build logs
|
74 | 68 |
self.logdir = None
|
75 | 69 |
|
... | ... | @@ -124,6 +118,8 @@ class Context(): |
124 | 118 |
self._workspaces = None
|
125 | 119 |
self._log_handle = None
|
126 | 120 |
self._log_filename = None
|
121 |
+ self.config_cache_quota = 'infinity'
|
|
122 |
+ self.artifactdir_volume = None
|
|
127 | 123 |
|
128 | 124 |
# load()
|
129 | 125 |
#
|
... | ... | @@ -183,71 +179,7 @@ class Context(): |
183 | 179 |
cache = _yaml.node_get(defaults, Mapping, 'cache')
|
184 | 180 |
_yaml.node_validate(cache, ['quota'])
|
185 | 181 |
|
186 |
- artifactdir_volume = self.artifactdir
|
|
187 |
- while not os.path.exists(artifactdir_volume):
|
|
188 |
- artifactdir_volume = os.path.dirname(artifactdir_volume)
|
|
189 |
- |
|
190 |
- # We read and parse the cache quota as specified by the user
|
|
191 |
- cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
|
|
192 |
- try:
|
|
193 |
- cache_quota = utils._parse_size(cache_quota, artifactdir_volume)
|
|
194 |
- except utils.UtilError as e:
|
|
195 |
- raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
196 |
- "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
|
|
197 |
- "\nValid values are, for example: 800M 10G 1T 50%\n"
|
|
198 |
- .format(str(e))) from e
|
|
199 |
- |
|
200 |
- # Headroom intended to give BuildStream a bit of leeway.
|
|
201 |
- # This acts as the minimum size of cache_quota and also
|
|
202 |
- # is taken from the user requested cache_quota.
|
|
203 |
- #
|
|
204 |
- if 'BST_TEST_SUITE' in os.environ:
|
|
205 |
- headroom = 0
|
|
206 |
- else:
|
|
207 |
- headroom = 2e9
|
|
208 |
- |
|
209 |
- stat = os.statvfs(artifactdir_volume)
|
|
210 |
- available_space = (stat.f_bsize * stat.f_bavail)
|
|
211 |
- |
|
212 |
- # Again, the artifact directory may not yet have been created yet
|
|
213 |
- #
|
|
214 |
- if not os.path.exists(self.artifactdir):
|
|
215 |
- cache_size = 0
|
|
216 |
- else:
|
|
217 |
- cache_size = utils._get_dir_size(self.artifactdir)
|
|
218 |
- |
|
219 |
- # Ensure system has enough storage for the cache_quota
|
|
220 |
- #
|
|
221 |
- # If cache_quota is none, set it to the maximum it could possibly be.
|
|
222 |
- #
|
|
223 |
- # Also check that cache_quota is atleast as large as our headroom.
|
|
224 |
- #
|
|
225 |
- if cache_quota is None: # Infinity, set to max system storage
|
|
226 |
- cache_quota = cache_size + available_space
|
|
227 |
- if cache_quota < headroom: # Check minimum
|
|
228 |
- raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
229 |
- "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
|
|
230 |
- "BuildStream requires a minimum cache quota of 2G.")
|
|
231 |
- elif cache_quota > cache_size + available_space: # Check maximum
|
|
232 |
- raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
233 |
- ("Your system does not have enough available " +
|
|
234 |
- "space to support the cache quota specified.\n" +
|
|
235 |
- "You currently have:\n" +
|
|
236 |
- "- {used} of cache in use at {local_cache_path}\n" +
|
|
237 |
- "- {available} of available system storage").format(
|
|
238 |
- used=utils._pretty_size(cache_size),
|
|
239 |
- local_cache_path=self.artifactdir,
|
|
240 |
- available=utils._pretty_size(available_space)))
|
|
241 |
- |
|
242 |
- # Place a slight headroom (2e9 (2GB) on the cache_quota) into
|
|
243 |
- # cache_quota to try and avoid exceptions.
|
|
244 |
- #
|
|
245 |
- # Of course, we might still end up running out during a build
|
|
246 |
- # if we end up writing more than 2G, but hey, this stuff is
|
|
247 |
- # already really fuzzy.
|
|
248 |
- #
|
|
249 |
- self.cache_quota = cache_quota - headroom
|
|
250 |
- self.cache_lower_threshold = self.cache_quota / 2
|
|
182 |
+ self.config_cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
|
|
251 | 183 |
|
252 | 184 |
# Load artifact share configuration
|
253 | 185 |
self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
|
... | ... | @@ -198,8 +198,10 @@ class App(): |
198 | 198 |
option_value = self._main_options.get(cli_option)
|
199 | 199 |
if option_value is not None:
|
200 | 200 |
setattr(self.context, context_attr, option_value)
|
201 |
- |
|
202 |
- Platform.create_instance(self.context)
|
|
201 |
+ try:
|
|
202 |
+ Platform.create_instance(self.context)
|
|
203 |
+ except BstError as e:
|
|
204 |
+ self._error_exit(e, "Error instantiating platform")
|
|
203 | 205 |
|
204 | 206 |
# Create the logger right before setting the message handler
|
205 | 207 |
self.logger = LogLine(self.context,
|
... | ... | @@ -97,7 +97,7 @@ class BuildQueue(Queue): |
97 | 97 |
cache = element._get_artifact_cache()
|
98 | 98 |
cache._add_artifact_size(artifact_size)
|
99 | 99 |
|
100 |
- if cache.get_approximate_cache_size() > self._scheduler.context.cache_quota:
|
|
100 |
+ if cache.get_approximate_cache_size() > cache.cache_quota:
|
|
101 | 101 |
self._scheduler._check_cache_size_real()
|
102 | 102 |
|
103 | 103 |
def done(self, job, element, result, success):
|
... | ... | @@ -29,6 +29,7 @@ from contextlib import contextmanager |
29 | 29 |
# Local imports
|
30 | 30 |
from .resources import Resources, ResourceType
|
31 | 31 |
from .jobs import CacheSizeJob, CleanupJob
|
32 |
+from .._platform import Platform
|
|
32 | 33 |
|
33 | 34 |
|
34 | 35 |
# A decent return code for Scheduler.run()
|
... | ... | @@ -316,7 +317,8 @@ class Scheduler(): |
316 | 317 |
self._sched()
|
317 | 318 |
|
318 | 319 |
def _run_cleanup(self, cache_size):
|
319 |
- if cache_size and cache_size < self.context.cache_quota:
|
|
320 |
+ platform = Platform.get_platform()
|
|
321 |
+ if cache_size and cache_size < platform.artifactcache.cache_quota:
|
|
320 | 322 |
return
|
321 | 323 |
|
322 | 324 |
job = CleanupJob(self, 'cleanup', 'cleanup',
|
... | ... | @@ -11,7 +11,7 @@ test=pytest |
11 | 11 |
|
12 | 12 |
[tool:pytest]
|
13 | 13 |
addopts = --verbose --basetemp ./tmp --pep8 --pylint --pylint-rcfile=.pylintrc --cov=buildstream --cov-config .coveragerc
|
14 |
-norecursedirs = integration-cache tmp __pycache__ .eggs
|
|
14 |
+norecursedirs = tests/integration/project integration-cache tmp __pycache__ .eggs
|
|
15 | 15 |
python_files = tests/*/*.py
|
16 | 16 |
pep8maxlinelength = 119
|
17 | 17 |
pep8ignore =
|
1 |
+import os
|
|
2 |
+import pytest
|
|
3 |
+ |
|
4 |
+from buildstream import _yaml
|
|
5 |
+from buildstream._artifactcache import CACHE_SIZE_FILE
|
|
6 |
+ |
|
7 |
+from tests.testutils import cli, create_element_size
|
|
8 |
+ |
|
9 |
+# XXX: Currently lacking:
|
|
10 |
+# * A way to check whether it's faster to read cache size on
|
|
11 |
+# successive invocations.
|
|
12 |
+# * A way to check whether the cache size file has been read.
|
|
13 |
+ |
|
14 |
+ |
|
15 |
+def create_project(project_dir):
|
|
16 |
+ project_file = os.path.join(project_dir, "project.conf")
|
|
17 |
+ project_conf = {
|
|
18 |
+ "name": "test"
|
|
19 |
+ }
|
|
20 |
+ _yaml.dump(project_conf, project_file)
|
|
21 |
+ element_name = "test.bst"
|
|
22 |
+ create_element_size(element_name, project_dir, ".", [], 1024)
|
|
23 |
+ |
|
24 |
+ |
|
25 |
+def test_cache_size_roundtrip(cli, tmpdir):
|
|
26 |
+ # Builds (to put files in the cache), then invokes buildstream again
|
|
27 |
+ # to check nothing breaks
|
|
28 |
+ |
|
29 |
+ # Create project
|
|
30 |
+ project_dir = str(tmpdir)
|
|
31 |
+ create_project(project_dir)
|
|
32 |
+ |
|
33 |
+ # Build, to populate the cache
|
|
34 |
+ res = cli.run(project=project_dir, args=["build", "test.bst"])
|
|
35 |
+ res.assert_success()
|
|
36 |
+ |
|
37 |
+ # Show, to check that nothing breaks while reading cache size
|
|
38 |
+ res = cli.run(project=project_dir, args=["show", "test.bst"])
|
|
39 |
+ res.assert_success()
|
|
40 |
+ |
|
41 |
+ |
|
42 |
+def test_cache_size_write(cli, tmpdir):
|
|
43 |
+ # Builds (to put files in the cache), then checks a number is
|
|
44 |
+ # written to the cache size file.
|
|
45 |
+ |
|
46 |
+ project_dir = str(tmpdir)
|
|
47 |
+ create_project(project_dir)
|
|
48 |
+ |
|
49 |
+ # Artifact cache must be in a known place
|
|
50 |
+ artifactdir = os.path.join(project_dir, "artifacts")
|
|
51 |
+ cli.configure({"artifactdir": artifactdir})
|
|
52 |
+ |
|
53 |
+ # Build, to populate the cache
|
|
54 |
+ res = cli.run(project=project_dir, args=["build", "test.bst"])
|
|
55 |
+ res.assert_success()
|
|
56 |
+ |
|
57 |
+ # Inspect the artifact cache
|
|
58 |
+ sizefile = os.path.join(artifactdir, CACHE_SIZE_FILE)
|
|
59 |
+ assert os.path.isfile(sizefile)
|
|
60 |
+ with open(sizefile, "r") as f:
|
|
61 |
+ size_data = f.read()
|
|
62 |
+ size = int(size_data)
|
... | ... | @@ -140,6 +140,7 @@ class ArtifactShare(): |
140 | 140 |
|
141 | 141 |
return statvfs_result(f_blocks=self.total_space,
|
142 | 142 |
f_bfree=self.free_space - repo_size,
|
143 |
+ f_bavail=self.free_space - repo_size,
|
|
143 | 144 |
f_bsize=1)
|
144 | 145 |
|
145 | 146 |
|
... | ... | @@ -156,4 +157,4 @@ def create_artifact_share(directory, *, total_space=None, free_space=None): |
156 | 157 |
share.close()
|
157 | 158 |
|
158 | 159 |
|
159 |
-statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize')
|
|
160 |
+statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize f_bavail')
|