Jim MacArthur pushed to branch jmac/remote_execution_rebase at BuildStream / buildstream
Commits:
-
42aa3999
by William Salmon at 2018-08-07T13:41:02Z
-
2ceb5dec
by Will Salmon at 2018-08-07T14:46:36Z
-
eee4b674
by Jürg Billeter at 2018-08-07T15:36:35Z
-
ea27e389
by Jürg Billeter at 2018-08-07T15:36:35Z
-
fa5a59f0
by Jürg Billeter at 2018-08-07T16:48:21Z
-
ed653fbc
by Chandan Singh at 2018-08-08T00:06:28Z
-
65f382f1
by Chandan Singh at 2018-08-08T10:52:32Z
-
7de520f8
by Jim MacArthur at 2018-08-08T12:52:26Z
-
421c6969
by Jim MacArthur at 2018-08-08T12:52:26Z
-
0aba926c
by Jim MacArthur at 2018-08-08T12:52:26Z
-
a626f146
by Jim MacArthur at 2018-08-08T12:52:26Z
-
b4351094
by Jim MacArthur at 2018-08-08T12:52:26Z
-
f0bce7da
by Jim MacArthur at 2018-08-08T12:52:26Z
-
45e96e5a
by Jim MacArthur at 2018-08-08T12:52:26Z
-
0054727e
by Jim MacArthur at 2018-08-08T12:52:26Z
-
3fcdcf3a
by Jim MacArthur at 2018-08-08T12:52:26Z
-
197f081d
by Jim MacArthur at 2018-08-09T15:29:01Z
-
612a33a2
by Jim MacArthur at 2018-08-09T15:29:12Z
-
fe9b657a
by Jim MacArthur at 2018-08-14T16:29:31Z
-
47b81e9c
by Jim MacArthur at 2018-08-14T16:29:31Z
-
08565f1e
by Jim MacArthur at 2018-08-14T16:29:54Z
-
403cd1a8
by Jim MacArthur at 2018-08-14T16:29:54Z
-
5d7751dd
by Jim MacArthur at 2018-08-14T16:29:54Z
-
cc6319b2
by Jim MacArthur at 2018-08-14T16:29:54Z
25 changed files:
- .gitlab-ci.yml
- buildstream/__init__.py
- buildstream/_artifactcache/artifactcache.py
- buildstream/_artifactcache/cascache.py
- buildstream/_pipeline.py
- buildstream/_platform/linux.py
- buildstream/buildelement.py
- buildstream/element.py
- + buildstream/element_enums.py
- buildstream/plugins/sources/git.py
- buildstream/sandbox/__init__.py
- buildstream/sandbox/_mount.py
- buildstream/sandbox/_sandboxbwrap.py
- buildstream/sandbox/_sandboxchroot.py
- + buildstream/sandbox/_sandboxremote.py
- buildstream/sandbox/sandbox.py
- + buildstream/storage/_casbaseddirectory.py
- buildstream/storage/_filebaseddirectory.py
- buildstream/storage/directory.py
- setup.py
- + tests/sandboxes/storage-test/original/bin/bash
- + tests/sandboxes/storage-test/original/bin/hello
- + tests/sandboxes/storage-test/overlay/bin/bash
- + tests/sandboxes/storage-tests.py
- tests/sources/git.py
Changes:
... | ... | @@ -26,15 +26,6 @@ source_dist: |
26 | 26 |
- tar -ztf dist/*
|
27 | 27 |
- tarball=$(cd dist && echo $(ls *))
|
28 | 28 |
|
29 |
- # Create an installer script
|
|
30 |
- - |
|
|
31 |
- cat > dist/install.sh << EOF
|
|
32 |
- #!/bin/sh
|
|
33 |
- tar -zxf ${tarball}
|
|
34 |
- cd ${tarball%.tar.gz}
|
|
35 |
- pip3 install --no-index .
|
|
36 |
- EOF
|
|
37 |
- |
|
38 | 29 |
# unpack tarball as `dist/buildstream` directory
|
39 | 30 |
- |
|
40 | 31 |
cat > dist/unpack.sh << EOF
|
... | ... | @@ -44,7 +35,6 @@ source_dist: |
44 | 35 |
EOF
|
45 | 36 |
|
46 | 37 |
# Make our helpers executable
|
47 |
- - chmod +x dist/install.sh
|
|
48 | 38 |
- chmod +x dist/unpack.sh
|
49 | 39 |
artifacts:
|
50 | 40 |
paths:
|
... | ... | @@ -30,6 +30,7 @@ if "_BST_COMPLETION" not in os.environ: |
30 | 30 |
from .sandbox import Sandbox, SandboxFlags
|
31 | 31 |
from .plugin import Plugin
|
32 | 32 |
from .source import Source, SourceError, Consistency, SourceFetcher
|
33 |
- from .element import Element, ElementError, Scope
|
|
33 |
+ from .element import Element, ElementError
|
|
34 |
+ from .element_enums import Scope
|
|
34 | 35 |
from .buildelement import BuildElement
|
35 | 36 |
from .scriptelement import ScriptElement
|
... | ... | @@ -21,7 +21,7 @@ import os |
21 | 21 |
import string
|
22 | 22 |
from collections import Mapping, namedtuple
|
23 | 23 |
|
24 |
-from ..element import _KeyStrength
|
|
24 |
+from ..element_enums import _KeyStrength
|
|
25 | 25 |
from .._exceptions import ArtifactError, ImplError, LoadError, LoadErrorReason
|
26 | 26 |
from .._message import Message, MessageType
|
27 | 27 |
from .. import utils
|
... | ... | @@ -213,6 +213,29 @@ class CASCache(ArtifactCache): |
213 | 213 |
remotes_for_project = self._remotes[element._get_project()]
|
214 | 214 |
return any(remote.spec.push for remote in remotes_for_project)
|
215 | 215 |
|
216 |
+ def pull_key(self, key, size_bytes, project):
|
|
217 |
+ """ Pull a single key rather than an artifact.
|
|
218 |
+ Does not update local refs. """
|
|
219 |
+ |
|
220 |
+ for remote in self._remotes[project]:
|
|
221 |
+ try:
|
|
222 |
+ remote.init()
|
|
223 |
+ |
|
224 |
+ tree = remote_execution_pb2.Digest()
|
|
225 |
+ tree.hash = key
|
|
226 |
+ tree.size_bytes = size_bytes
|
|
227 |
+ |
|
228 |
+ self._fetch_directory(remote, tree)
|
|
229 |
+ |
|
230 |
+ # no need to pull from additional remotes
|
|
231 |
+ return True
|
|
232 |
+ |
|
233 |
+ except grpc.RpcError as e:
|
|
234 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
235 |
+ raise
|
|
236 |
+ |
|
237 |
+ return False
|
|
238 |
+ |
|
216 | 239 |
def pull(self, element, key, *, progress=None):
|
217 | 240 |
ref = self.get_artifact_fullname(element, key)
|
218 | 241 |
|
... | ... | @@ -254,10 +277,93 @@ class CASCache(ArtifactCache): |
254 | 277 |
|
255 | 278 |
self.set_ref(newref, tree)
|
256 | 279 |
|
280 |
+ def _push_refs_to_remote(self, refs, remote, may_have_dependencies):
|
|
281 |
+ skipped_remote = True
|
|
282 |
+ |
|
283 |
+ try:
|
|
284 |
+ for ref in refs:
|
|
285 |
+ tree = self.resolve_ref(ref)
|
|
286 |
+ |
|
287 |
+ # Check whether ref is already on the server in which case
|
|
288 |
+ # there is no need to push the artifact
|
|
289 |
+ try:
|
|
290 |
+ request = buildstream_pb2.GetReferenceRequest()
|
|
291 |
+ request.key = ref
|
|
292 |
+ response = remote.ref_storage.GetReference(request)
|
|
293 |
+ |
|
294 |
+ if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
|
295 |
+ # ref is already on the server with the same tree
|
|
296 |
+ continue
|
|
297 |
+ |
|
298 |
+ except grpc.RpcError as e:
|
|
299 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
300 |
+ # Intentionally re-raise RpcError for outer except block.
|
|
301 |
+ raise
|
|
302 |
+ |
|
303 |
+ missing_blobs = {}
|
|
304 |
+ required_blobs = self._required_blobs(tree, may_have_dependencies)
|
|
305 |
+ |
|
306 |
+ # Limit size of FindMissingBlobs request
|
|
307 |
+ for required_blobs_group in _grouper(required_blobs, 512):
|
|
308 |
+ request = remote_execution_pb2.FindMissingBlobsRequest()
|
|
309 |
+ |
|
310 |
+ for required_digest in required_blobs_group:
|
|
311 |
+ d = request.blob_digests.add()
|
|
312 |
+ d.hash = required_digest.hash
|
|
313 |
+ d.size_bytes = required_digest.size_bytes
|
|
314 |
+ |
|
315 |
+ response = remote.cas.FindMissingBlobs(request)
|
|
316 |
+ for digest in response.missing_blob_digests:
|
|
317 |
+ d = remote_execution_pb2.Digest()
|
|
318 |
+ d.hash = digest.hash
|
|
319 |
+ d.size_bytes = digest.size_bytes
|
|
320 |
+ missing_blobs[d.hash] = d
|
|
321 |
+ |
|
322 |
+ # Upload any blobs missing on the server
|
|
323 |
+ skipped_remote = False
|
|
324 |
+ for digest in missing_blobs.values():
|
|
325 |
+ def request_stream():
|
|
326 |
+ resource_name = os.path.join(digest.hash, str(digest.size_bytes))
|
|
327 |
+ with open(self.objpath(digest), 'rb') as f:
|
|
328 |
+ assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
329 |
+ offset = 0
|
|
330 |
+ finished = False
|
|
331 |
+ remaining = digest.size_bytes
|
|
332 |
+ while not finished:
|
|
333 |
+ chunk_size = min(remaining, 64 * 1024)
|
|
334 |
+ remaining -= chunk_size
|
|
335 |
+ |
|
336 |
+ request = bytestream_pb2.WriteRequest()
|
|
337 |
+ request.write_offset = offset
|
|
338 |
+ # max. 64 kB chunks
|
|
339 |
+ request.data = f.read(chunk_size)
|
|
340 |
+ request.resource_name = resource_name
|
|
341 |
+ request.finish_write = remaining <= 0
|
|
342 |
+ yield request
|
|
343 |
+ offset += chunk_size
|
|
344 |
+ finished = request.finish_write
|
|
345 |
+ response = remote.bytestream.Write(request_stream())
|
|
346 |
+ |
|
347 |
+ request = buildstream_pb2.UpdateReferenceRequest()
|
|
348 |
+ request.keys.append(ref)
|
|
349 |
+ request.digest.hash = tree.hash
|
|
350 |
+ request.digest.size_bytes = tree.size_bytes
|
|
351 |
+ remote.ref_storage.UpdateReference(request)
|
|
352 |
+ |
|
353 |
+ except grpc.RpcError as e:
|
|
354 |
+ if e.code() != grpc.StatusCode.RESOURCE_EXHAUSTED:
|
|
355 |
+ raise ArtifactError("Failed to push artifact {}: {}".format(refs, e), temporary=True) from e
|
|
356 |
+ |
|
357 |
+ return not skipped_remote
|
|
358 |
+ |
|
257 | 359 |
def push(self, element, keys):
|
360 |
+ keys = list(keys)
|
|
258 | 361 |
refs = [self.get_artifact_fullname(element, key) for key in keys]
|
259 | 362 |
|
260 | 363 |
project = element._get_project()
|
364 |
+ return self.push_refs(refs, project, element=element)
|
|
365 |
+ |
|
366 |
+ def push_refs(self, refs, project, may_have_dependencies=True, element=None):
|
|
261 | 367 |
|
262 | 368 |
push_remotes = [r for r in self._remotes[project] if r.spec.push]
|
263 | 369 |
|
... | ... | @@ -265,94 +371,52 @@ class CASCache(ArtifactCache): |
265 | 371 |
|
266 | 372 |
for remote in push_remotes:
|
267 | 373 |
remote.init()
|
268 |
- skipped_remote = True
|
|
269 |
- element.info("Pushing {} -> {}".format(element._get_brief_display_key(), remote.spec.url))
|
|
270 |
- |
|
271 |
- try:
|
|
272 |
- for ref in refs:
|
|
273 |
- tree = self.resolve_ref(ref)
|
|
274 |
- |
|
275 |
- # Check whether ref is already on the server in which case
|
|
276 |
- # there is no need to push the artifact
|
|
277 |
- try:
|
|
278 |
- request = buildstream_pb2.GetReferenceRequest()
|
|
279 |
- request.key = ref
|
|
280 |
- response = remote.ref_storage.GetReference(request)
|
|
281 |
- |
|
282 |
- if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
|
283 |
- # ref is already on the server with the same tree
|
|
284 |
- continue
|
|
285 |
- |
|
286 |
- except grpc.RpcError as e:
|
|
287 |
- if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
288 |
- # Intentionally re-raise RpcError for outer except block.
|
|
289 |
- raise
|
|
290 |
- |
|
291 |
- missing_blobs = {}
|
|
292 |
- required_blobs = self._required_blobs(tree)
|
|
293 |
- |
|
294 |
- # Limit size of FindMissingBlobs request
|
|
295 |
- for required_blobs_group in _grouper(required_blobs, 512):
|
|
296 |
- request = remote_execution_pb2.FindMissingBlobsRequest()
|
|
297 |
- |
|
298 |
- for required_digest in required_blobs_group:
|
|
299 |
- d = request.blob_digests.add()
|
|
300 |
- d.hash = required_digest.hash
|
|
301 |
- d.size_bytes = required_digest.size_bytes
|
|
302 |
- |
|
303 |
- response = remote.cas.FindMissingBlobs(request)
|
|
304 |
- for digest in response.missing_blob_digests:
|
|
305 |
- d = remote_execution_pb2.Digest()
|
|
306 |
- d.hash = digest.hash
|
|
307 |
- d.size_bytes = digest.size_bytes
|
|
308 |
- missing_blobs[d.hash] = d
|
|
309 |
- |
|
310 |
- # Upload any blobs missing on the server
|
|
311 |
- skipped_remote = False
|
|
312 |
- for digest in missing_blobs.values():
|
|
313 |
- def request_stream():
|
|
314 |
- resource_name = os.path.join(digest.hash, str(digest.size_bytes))
|
|
315 |
- with open(self.objpath(digest), 'rb') as f:
|
|
316 |
- assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
|
317 |
- offset = 0
|
|
318 |
- finished = False
|
|
319 |
- remaining = digest.size_bytes
|
|
320 |
- while not finished:
|
|
321 |
- chunk_size = min(remaining, 64 * 1024)
|
|
322 |
- remaining -= chunk_size
|
|
323 |
- |
|
324 |
- request = bytestream_pb2.WriteRequest()
|
|
325 |
- request.write_offset = offset
|
|
326 |
- # max. 64 kB chunks
|
|
327 |
- request.data = f.read(chunk_size)
|
|
328 |
- request.resource_name = resource_name
|
|
329 |
- request.finish_write = remaining <= 0
|
|
330 |
- yield request
|
|
331 |
- offset += chunk_size
|
|
332 |
- finished = request.finish_write
|
|
333 |
- response = remote.bytestream.Write(request_stream())
|
|
334 |
- |
|
335 |
- request = buildstream_pb2.UpdateReferenceRequest()
|
|
336 |
- request.keys.append(ref)
|
|
337 |
- request.digest.hash = tree.hash
|
|
338 |
- request.digest.size_bytes = tree.size_bytes
|
|
339 |
- remote.ref_storage.UpdateReference(request)
|
|
340 |
- |
|
341 |
- pushed = True
|
|
342 |
- |
|
343 |
- except grpc.RpcError as e:
|
|
344 |
- if e.code() != grpc.StatusCode.RESOURCE_EXHAUSTED:
|
|
345 |
- raise ArtifactError("Failed to push artifact {}: {}".format(refs, e), temporary=True) from e
|
|
346 |
- |
|
347 |
- if skipped_remote:
|
|
374 |
+ if self._push_refs_to_remote(refs, remote, may_have_dependencies):
|
|
375 |
+ pushed = True
|
|
376 |
+ elif element:
|
|
348 | 377 |
self.context.message(Message(
|
349 | 378 |
None,
|
350 | 379 |
MessageType.SKIPPED,
|
351 | 380 |
"Remote ({}) already has {} cached".format(
|
352 | 381 |
remote.spec.url, element._get_brief_display_key())
|
353 | 382 |
))
|
383 |
+ |
|
354 | 384 |
return pushed
|
355 | 385 |
|
386 |
+ def verify_key_pushed(self, key, project):
|
|
387 |
+ ref = key
|
|
388 |
+ push_remotes = [r for r in self._remotes[project] if r.spec.push]
|
|
389 |
+ |
|
390 |
+ pushed = False
|
|
391 |
+ |
|
392 |
+ for remote in push_remotes:
|
|
393 |
+ remote.init()
|
|
394 |
+ |
|
395 |
+ if self._verify_ref_on_remote(ref, remote):
|
|
396 |
+ pushed = True
|
|
397 |
+ |
|
398 |
+ return pushed
|
|
399 |
+ |
|
400 |
+ def _verify_ref_on_remote(self, ref, remote):
|
|
401 |
+ tree = self.resolve_ref(ref)
|
|
402 |
+ |
|
403 |
+ # Check whether ref is already on the server in which case
|
|
404 |
+ # there is no need to push the artifact
|
|
405 |
+ try:
|
|
406 |
+ request = buildstream_pb2.GetReferenceRequest()
|
|
407 |
+ request.key = ref
|
|
408 |
+ response = remote.ref_storage.GetReference(request)
|
|
409 |
+ |
|
410 |
+ if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
|
|
411 |
+ # ref is already on the server with the same tree
|
|
412 |
+ return True
|
|
413 |
+ |
|
414 |
+ except grpc.RpcError as e:
|
|
415 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
416 |
+ raise
|
|
417 |
+ |
|
418 |
+ return False
|
|
419 |
+ |
|
356 | 420 |
################################################
|
357 | 421 |
# API Private Methods #
|
358 | 422 |
################################################
|
... | ... | @@ -726,26 +790,27 @@ class CASCache(ArtifactCache): |
726 | 790 |
#
|
727 | 791 |
q.put(str(e))
|
728 | 792 |
|
729 |
- def _required_blobs(self, tree):
|
|
793 |
+ def _required_blobs(self, tree, may_have_dependencies=True):
|
|
730 | 794 |
# parse directory, and recursively add blobs
|
731 | 795 |
d = remote_execution_pb2.Digest()
|
732 | 796 |
d.hash = tree.hash
|
733 | 797 |
d.size_bytes = tree.size_bytes
|
734 | 798 |
yield d
|
735 | 799 |
|
736 |
- directory = remote_execution_pb2.Directory()
|
|
800 |
+ if may_have_dependencies:
|
|
801 |
+ directory = remote_execution_pb2.Directory()
|
|
737 | 802 |
|
738 |
- with open(self.objpath(tree), 'rb') as f:
|
|
739 |
- directory.ParseFromString(f.read())
|
|
803 |
+ with open(self.objpath(tree), 'rb') as f:
|
|
804 |
+ directory.ParseFromString(f.read())
|
|
740 | 805 |
|
741 |
- for filenode in directory.files:
|
|
742 |
- d = remote_execution_pb2.Digest()
|
|
743 |
- d.hash = filenode.digest.hash
|
|
744 |
- d.size_bytes = filenode.digest.size_bytes
|
|
745 |
- yield d
|
|
806 |
+ for filenode in directory.files:
|
|
807 |
+ d = remote_execution_pb2.Digest()
|
|
808 |
+ d.hash = filenode.digest.hash
|
|
809 |
+ d.size_bytes = filenode.digest.size_bytes
|
|
810 |
+ yield d
|
|
746 | 811 |
|
747 |
- for dirnode in directory.directories:
|
|
748 |
- yield from self._required_blobs(dirnode.digest)
|
|
812 |
+ for dirnode in directory.directories:
|
|
813 |
+ yield from self._required_blobs(dirnode.digest)
|
|
749 | 814 |
|
750 | 815 |
def _fetch_blob(self, remote, digest, out):
|
751 | 816 |
resource_name = os.path.join(digest.hash, str(digest.size_bytes))
|
... | ... | @@ -846,6 +911,9 @@ class _CASRemote(): |
846 | 911 |
|
847 | 912 |
|
848 | 913 |
def _grouper(iterable, n):
|
849 |
- # pylint: disable=stop-iteration-return
|
|
850 | 914 |
while True:
|
851 |
- yield itertools.chain([next(iterable)], itertools.islice(iterable, n - 1))
|
|
915 |
+ try:
|
|
916 |
+ current = next(iterable)
|
|
917 |
+ except StopIteration:
|
|
918 |
+ return
|
|
919 |
+ yield itertools.chain([current], itertools.islice(iterable, n - 1))
|
... | ... | @@ -358,10 +358,24 @@ class Pipeline(): |
358 | 358 |
inconsistent.append(element)
|
359 | 359 |
|
360 | 360 |
if inconsistent:
|
361 |
- detail = "Exact versions are missing for the following elements\n" + \
|
|
362 |
- "Try tracking these elements first with `bst track`\n\n"
|
|
361 |
+ detail = "Exact versions are missing for the following elements:\n\n"
|
|
362 |
+ |
|
363 |
+ missingTrack = 0
|
|
363 | 364 |
for element in inconsistent:
|
364 |
- detail += " " + element._get_full_name() + "\n"
|
|
365 |
+ detail += " " + element._get_full_name()
|
|
366 |
+ for source in element.sources():
|
|
367 |
+ if not source._get_consistency() and not source.get_ref():
|
|
368 |
+ if hasattr(source, 'tracking') and source.tracking is None:
|
|
369 |
+ detail += ": Source {} is missing ref and track. ".format(source._get_full_name()) + \
|
|
370 |
+ "Please specify a ref or branch/tag to track."
|
|
371 |
+ missingTrack = 1
|
|
372 |
+ |
|
373 |
+ detail += "\n"
|
|
374 |
+ |
|
375 |
+ if missingTrack:
|
|
376 |
+ detail += "\nThen track these elements with `bst track`\n"
|
|
377 |
+ else:
|
|
378 |
+ detail += "\nTry tracking these elements first with `bst track`\n"
|
|
365 | 379 |
raise PipelineError("Inconsistent pipeline", detail=detail, reason="inconsistent-pipeline")
|
366 | 380 |
|
367 | 381 |
#############################################################
|
... | ... | @@ -24,6 +24,7 @@ from .. import utils |
24 | 24 |
from .._artifactcache.cascache import CASCache
|
25 | 25 |
from .._message import Message, MessageType
|
26 | 26 |
from ..sandbox import SandboxBwrap
|
27 |
+from ..sandbox import SandboxRemote
|
|
27 | 28 |
|
28 | 29 |
from . import Platform
|
29 | 30 |
|
... | ... | @@ -46,7 +47,7 @@ class Linux(Platform): |
46 | 47 |
# Inform the bubblewrap sandbox as to whether it can use user namespaces or not
|
47 | 48 |
kwargs['user_ns_available'] = self._user_ns_available
|
48 | 49 |
kwargs['die_with_parent_available'] = self._die_with_parent_available
|
49 |
- return SandboxBwrap(*args, **kwargs)
|
|
50 |
+ return SandboxRemote(*args, **kwargs)
|
|
50 | 51 |
|
51 | 52 |
################################################
|
52 | 53 |
# Private Methods #
|
... | ... | @@ -155,6 +155,9 @@ class BuildElement(Element): |
155 | 155 |
command_dir = build_root
|
156 | 156 |
sandbox.set_work_directory(command_dir)
|
157 | 157 |
|
158 |
+ # Tell sandbox which directory is preserved in the finished artifact
|
|
159 |
+ sandbox.set_output_directory(install_root)
|
|
160 |
+ |
|
158 | 161 |
# Setup environment
|
159 | 162 |
sandbox.set_environment(self.get_environment())
|
160 | 163 |
|
... | ... | @@ -78,7 +78,6 @@ import stat |
78 | 78 |
import copy
|
79 | 79 |
from collections import Mapping, OrderedDict
|
80 | 80 |
from contextlib import contextmanager
|
81 |
-from enum import Enum
|
|
82 | 81 |
import tempfile
|
83 | 82 |
import shutil
|
84 | 83 |
|
... | ... | @@ -97,41 +96,9 @@ from ._platform import Platform |
97 | 96 |
from .sandbox._config import SandboxConfig
|
98 | 97 |
|
99 | 98 |
from .storage.directory import Directory
|
100 |
-from .storage._filebaseddirectory import FileBasedDirectory, VirtualDirectoryError
|
|
101 |
- |
|
102 |
- |
|
103 |
-# _KeyStrength():
|
|
104 |
-#
|
|
105 |
-# Strength of cache key
|
|
106 |
-#
|
|
107 |
-class _KeyStrength(Enum):
|
|
108 |
- |
|
109 |
- # Includes strong cache keys of all build dependencies and their
|
|
110 |
- # runtime dependencies.
|
|
111 |
- STRONG = 1
|
|
112 |
- |
|
113 |
- # Includes names of direct build dependencies but does not include
|
|
114 |
- # cache keys of dependencies.
|
|
115 |
- WEAK = 2
|
|
116 |
- |
|
117 |
- |
|
118 |
-class Scope(Enum):
|
|
119 |
- """Types of scope for a given element"""
|
|
120 |
- |
|
121 |
- ALL = 1
|
|
122 |
- """All elements which the given element depends on, following
|
|
123 |
- all elements required for building. Including the element itself.
|
|
124 |
- """
|
|
125 |
- |
|
126 |
- BUILD = 2
|
|
127 |
- """All elements required for building the element, including their
|
|
128 |
- respective run dependencies. Not including the given element itself.
|
|
129 |
- """
|
|
130 |
- |
|
131 |
- RUN = 3
|
|
132 |
- """All elements required for running the element. Including the element
|
|
133 |
- itself.
|
|
134 |
- """
|
|
99 |
+from .storage._filebaseddirectory import FileBasedDirectory
|
|
100 |
+from .storage.directory import VirtualDirectoryError
|
|
101 |
+from .element_enums import _KeyStrength, Scope
|
|
135 | 102 |
|
136 | 103 |
|
137 | 104 |
class ElementError(BstError):
|
... | ... | @@ -1579,6 +1546,8 @@ class Element(Plugin): |
1579 | 1546 |
finally:
|
1580 | 1547 |
if collect is not None:
|
1581 | 1548 |
try:
|
1549 |
+ # Sandbox will probably have replaced its virtual directory, so get it again
|
|
1550 |
+ sandbox_vroot = sandbox.get_virtual_directory()
|
|
1582 | 1551 |
collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
|
1583 | 1552 |
except VirtualDirectoryError:
|
1584 | 1553 |
# No collect directory existed
|
1 |
+#
|
|
2 |
+# Copyright (C) 2018 Bloomberg LLC
|
|
3 |
+#
|
|
4 |
+# This program is free software; you can redistribute it and/or
|
|
5 |
+# modify it under the terms of the GNU Lesser General Public
|
|
6 |
+# License as published by the Free Software Foundation; either
|
|
7 |
+# version 2 of the License, or (at your option) any later version.
|
|
8 |
+#
|
|
9 |
+# This library is distributed in the hope that it will be useful,
|
|
10 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
+# Lesser General Public License for more details.
|
|
13 |
+#
|
|
14 |
+# You should have received a copy of the GNU Lesser General Public
|
|
15 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
+#
|
|
17 |
+# Authors:
|
|
18 |
+# Tristan Van Berkom <tristan vanberkom codethink co uk>
|
|
19 |
+# Jim MacArthur <jim macarthur codethink co uk>
|
|
20 |
+ |
|
21 |
+"""
|
|
22 |
+Element - Globally visible enumerations
|
|
23 |
+=======================================
|
|
24 |
+ |
|
25 |
+"""
|
|
26 |
+ |
|
27 |
+from enum import Enum
|
|
28 |
+ |
|
29 |
+ |
|
30 |
+# KeyStrength():
|
|
31 |
+#
|
|
32 |
+# Strength of cache key
|
|
33 |
+#
|
|
34 |
+class _KeyStrength(Enum):
|
|
35 |
+ |
|
36 |
+ # Includes strong cache keys of all build dependencies and their
|
|
37 |
+ # runtime dependencies.
|
|
38 |
+ STRONG = 1
|
|
39 |
+ |
|
40 |
+ # Includes names of direct build dependencies but does not include
|
|
41 |
+ # cache keys of dependencies.
|
|
42 |
+ WEAK = 2
|
|
43 |
+ |
|
44 |
+ |
|
45 |
+class Scope(Enum):
|
|
46 |
+ """Types of scope for a given element"""
|
|
47 |
+ |
|
48 |
+ ALL = 1
|
|
49 |
+ """All elements which the given element depends on, following
|
|
50 |
+ all elements required for building. Including the element itself.
|
|
51 |
+ """
|
|
52 |
+ |
|
53 |
+ BUILD = 2
|
|
54 |
+ """All elements required for building the element, including their
|
|
55 |
+ respective run dependencies. Not including the given element itself.
|
|
56 |
+ """
|
|
57 |
+ |
|
58 |
+ RUN = 3
|
|
59 |
+ """All elements required for running the element. Including the element
|
|
60 |
+ itself.
|
|
61 |
+ """
|
... | ... | @@ -363,6 +363,12 @@ class GitSource(Source): |
363 | 363 |
|
364 | 364 |
# If self.tracking is not specified it's not an error, just silently return
|
365 | 365 |
if not self.tracking:
|
366 |
+ # Is there a better way to check if a ref is given.
|
|
367 |
+ if self.mirror.ref is None:
|
|
368 |
+ detail = 'Without a tracking branch ref can not be updated. Please ' + \
|
|
369 |
+ 'provide a ref or a track.'
|
|
370 |
+ raise SourceError("{}: No track or ref".format(self),
|
|
371 |
+ detail=detail, reason="track-attempt-no-track")
|
|
366 | 372 |
return None
|
367 | 373 |
|
368 | 374 |
with self.timed_activity("Tracking {} from {}"
|
... | ... | @@ -20,3 +20,4 @@ |
20 | 20 |
from .sandbox import Sandbox, SandboxFlags
|
21 | 21 |
from ._sandboxchroot import SandboxChroot
|
22 | 22 |
from ._sandboxbwrap import SandboxBwrap
|
23 |
+from ._sandboxremote import SandboxRemote
|
... | ... | @@ -32,8 +32,10 @@ from .._fuse import SafeHardlinks |
32 | 32 |
class Mount():
|
33 | 33 |
def __init__(self, sandbox, mount_point, safe_hardlinks):
|
34 | 34 |
scratch_directory = sandbox._get_scratch_directory()
|
35 |
- # Getting external_directory here is acceptable as we're part of the sandbox code.
|
|
36 |
- root_directory = sandbox.get_virtual_directory().external_directory
|
|
35 |
+ # Getting _get_underlying_directory() here is acceptable as
|
|
36 |
+ # we're part of the sandbox code. This will fail if our
|
|
37 |
+ # directory is CAS-based.
|
|
38 |
+ root_directory = sandbox.get_virtual_directory()._get_underlying_directory()
|
|
37 | 39 |
|
38 | 40 |
self.mount_point = mount_point
|
39 | 41 |
self.safe_hardlinks = safe_hardlinks
|
... | ... | @@ -58,7 +58,7 @@ class SandboxBwrap(Sandbox): |
58 | 58 |
stdout, stderr = self._get_output()
|
59 | 59 |
|
60 | 60 |
# Allowable access to underlying storage as we're part of the sandbox
|
61 |
- root_directory = self.get_virtual_directory().external_directory
|
|
61 |
+ root_directory = self.get_virtual_directory()._get_underlying_directory()
|
|
62 | 62 |
|
63 | 63 |
# Fallback to the sandbox default settings for
|
64 | 64 |
# the cwd and env.
|
... | ... | @@ -243,6 +243,7 @@ class SandboxBwrap(Sandbox): |
243 | 243 |
# a bug, bwrap mounted a tempfs here and when it exits, that better be empty.
|
244 | 244 |
pass
|
245 | 245 |
|
246 |
+ self._vdir.mark_changed()
|
|
246 | 247 |
return exit_code
|
247 | 248 |
|
248 | 249 |
def run_bwrap(self, argv, stdin, stdout, stderr, env, interactive):
|
... | ... | @@ -106,6 +106,7 @@ class SandboxChroot(Sandbox): |
106 | 106 |
status = self.chroot(rootfs, command, stdin, stdout,
|
107 | 107 |
stderr, cwd, env, flags)
|
108 | 108 |
|
109 |
+ self._vdir.mark_changed()
|
|
109 | 110 |
return status
|
110 | 111 |
|
111 | 112 |
# chroot()
|
1 |
+#!/usr/bin/env python3
|
|
2 |
+#
|
|
3 |
+# Copyright (C) 2018 Codethink Limited
|
|
4 |
+#
|
|
5 |
+# This program is free software; you can redistribute it and/or
|
|
6 |
+# modify it under the terms of the GNU Lesser General Public
|
|
7 |
+# License as published by the Free Software Foundation; either
|
|
8 |
+# version 2 of the License, or (at your option) any later version.
|
|
9 |
+#
|
|
10 |
+# This library is distributed in the hope that it will be useful,
|
|
11 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
13 |
+# Lesser General Public License for more details.
|
|
14 |
+#
|
|
15 |
+# You should have received a copy of the GNU Lesser General Public
|
|
16 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
17 |
+#
|
|
18 |
+# Authors:
|
|
19 |
+# Jim MacArthur <jim macarthur codethink co uk>
|
|
20 |
+ |
|
21 |
+import os
|
|
22 |
+import time
|
|
23 |
+ |
|
24 |
+import grpc
|
|
25 |
+ |
|
26 |
+from . import Sandbox
|
|
27 |
+from ..storage._filebaseddirectory import FileBasedDirectory
|
|
28 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
29 |
+from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
30 |
+from .._protos.google.longrunning import operations_pb2, operations_pb2_grpc
|
|
31 |
+ |
|
32 |
+from .._artifactcache.cascache import CASCache
|
|
33 |
+ |
|
34 |
+ |
|
35 |
+class SandboxError(Exception):
|
|
36 |
+ pass
|
|
37 |
+ |
|
38 |
+ |
|
39 |
+# SandboxRemote()
|
|
40 |
+#
|
|
41 |
+# This isn't really a sandbox, it's a stub which sends all the source
|
|
42 |
+# to a remote server and retrieves the results from it.
|
|
43 |
+#
|
|
44 |
+class SandboxRemote(Sandbox):
|
|
45 |
+ |
|
46 |
+ def __init__(self, *args, **kwargs):
|
|
47 |
+ super().__init__(*args, **kwargs)
|
|
48 |
+ self.user_ns_available = kwargs['user_ns_available']
|
|
49 |
+ self.die_with_parent_available = kwargs['die_with_parent_available']
|
|
50 |
+ self.cascache = None
|
|
51 |
+ |
|
52 |
+ def _get_cascache(self):
|
|
53 |
+ if self.cascache is None:
|
|
54 |
+ self.cascache = CASCache(self._get_context())
|
|
55 |
+ self.cascache.setup_remotes(use_config=True)
|
|
56 |
+ return self.cascache
|
|
57 |
+ |
|
58 |
+ def __run_remote_command(self, cascache, command, input_root_digest, environment):
|
|
59 |
+ |
|
60 |
+ environment_variables = [ remote_execution_pb2.Command.
|
|
61 |
+ EnvironmentVariable(name=k, value=v)
|
|
62 |
+ for (k,v) in environment.items() ]
|
|
63 |
+ |
|
64 |
+ # Create and send the Command object.
|
|
65 |
+ remote_command = remote_execution_pb2.Command(arguments=command, environment_variables=environment_variables,
|
|
66 |
+ output_files=[],
|
|
67 |
+ output_directories=[self._output_directory],
|
|
68 |
+ platform=None)
|
|
69 |
+ command_digest = cascache.add_object(buffer=remote_command.SerializeToString())
|
|
70 |
+ command_ref = 'worker-command/{}'.format(command_digest.hash)
|
|
71 |
+ cascache.set_ref(command_ref, command_digest)
|
|
72 |
+ |
|
73 |
+ command_push_successful = cascache.push_refs([command_ref], self._get_project(), may_have_dependencies=False)
|
|
74 |
+ if not command_push_successful and not cascache.verify_key_pushed(command_ref, self._get_project()):
|
|
75 |
+ # Command push failed
|
|
76 |
+ return None
|
|
77 |
+ |
|
78 |
+ # Create and send the action.
|
|
79 |
+ |
|
80 |
+ action = remote_execution_pb2.Action(command_digest=command_digest,
|
|
81 |
+ input_root_digest=input_root_digest,
|
|
82 |
+ timeout=None,
|
|
83 |
+ do_not_cache=True)
|
|
84 |
+ |
|
85 |
+ action_digest = cascache.add_object(buffer=action.SerializeToString())
|
|
86 |
+ action_ref = 'worker-action/{}'.format(command_digest.hash)
|
|
87 |
+ cascache.set_ref(action_ref, action_digest)
|
|
88 |
+ action_push_successful = cascache.push_refs([action_ref], self._get_project(), may_have_dependencies=False)
|
|
89 |
+ |
|
90 |
+ if not action_push_successful and not cascache.verify_key_pushed(action_ref, self._get_project()):
|
|
91 |
+ # Action push failed
|
|
92 |
+ return None
|
|
93 |
+ |
|
94 |
+ # Next, try to create a communication channel to the BuildGrid server.
|
|
95 |
+ port = 50051
|
|
96 |
+ channel = grpc.insecure_channel('dekatron.office.codethink.co.uk:{}'.format(port))
|
|
97 |
+ stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
|
98 |
+ |
|
99 |
+ request = remote_execution_pb2.ExecuteRequest(instance_name='default',
|
|
100 |
+ action_digest=action_digest,
|
|
101 |
+ skip_cache_lookup=True)
|
|
102 |
+ |
|
103 |
+ operation_iterator = stub.Execute(request)
|
|
104 |
+ |
|
105 |
+ for operation in operation_iterator:
|
|
106 |
+ if operation.done:
|
|
107 |
+ break
|
|
108 |
+ # TODO: Do we need a sleep here?
|
|
109 |
+ return operation
|
|
110 |
+ |
|
111 |
+ def process_job_output(self, output_directories, output_files):
|
|
112 |
+ # output_directories is an array of OutputDirectory objects.
|
|
113 |
+ # output_files is an array of OutputFile objects.
|
|
114 |
+ #
|
|
115 |
+ # We only specify one output_directory, so it's an error
|
|
116 |
+ # for there to be any output files or more than one directory at the moment.
|
|
117 |
+ |
|
118 |
+ if output_files:
|
|
119 |
+ raise SandboxError("Output files were returned when we didn't request any.")
|
|
120 |
+ elif len(output_directories) > 1:
|
|
121 |
+ error_text = "More than one output directory was returned from the build server: {}"
|
|
122 |
+ raise SandboxError(error_text.format(output_directories))
|
|
123 |
+ elif len(output_directories) < 1:
|
|
124 |
+ error_text = "No output directory was returned from the build server."
|
|
125 |
+ raise SandboxError(error_test)
|
|
126 |
+ |
|
127 |
+ digest = output_directories[0].tree_digest
|
|
128 |
+ if digest is None or digest.hash is None or digest.hash == "":
|
|
129 |
+ raise SandboxError("Output directory structure had no digest attached.")
|
|
130 |
+ |
|
131 |
+ # Now do a pull to ensure we have the necessary parts.
|
|
132 |
+ cascache = self._get_cascache()
|
|
133 |
+ cascache.pull_key(digest.hash, digest.size_bytes, self._get_project())
|
|
134 |
+ path_components = os.path.split(self._output_directory)
|
|
135 |
+ |
|
136 |
+ # Now what we have is a digest for the output. Once we return, the calling process will
|
|
137 |
+ # attempt to descend into our directory and find that directory, so we need to overwrite
|
|
138 |
+ # that.
|
|
139 |
+ |
|
140 |
+ if not path_components:
|
|
141 |
+ # The artifact wants the whole directory; we could just return the returned hash in its
|
|
142 |
+ # place, but we don't have a means to do that yet.
|
|
143 |
+ raise SandboxError("Unimplemented: Output directory is empty or equal to the sandbox root.")
|
|
144 |
+ |
|
145 |
+ # At the moment, we will get the whole directory back in the first directory argument and we need
|
|
146 |
+ # to replace the sandbox's virtual directory with that. Creating a new virtual directory object
|
|
147 |
+ # from another hash will be interesting, though...
|
|
148 |
+ |
|
149 |
+ new_dir = CasBasedDirectory(self._get_context(), ref=digest)
|
|
150 |
+ self.set_virtual_directory(new_dir)
|
|
151 |
+ |
|
152 |
+ def run(self, command, flags, *, cwd=None, env=None):
|
|
153 |
+ # Upload sources
|
|
154 |
+ upload_vdir = self.get_virtual_directory()
|
|
155 |
+ |
|
156 |
+ if isinstance(upload_vdir, FileBasedDirectory):
|
|
157 |
+ # Make a new temporary directory to put source in
|
|
158 |
+ upload_vdir = CasBasedDirectory(self._get_context(), ref=None)
|
|
159 |
+ upload_vdir.import_files(self.get_virtual_directory()._get_underlying_directory())
|
|
160 |
+ |
|
161 |
+ |
|
162 |
+ # Now, push that key (without necessarily needing a ref) to the remote.
|
|
163 |
+ cascache = self._get_cascache()
|
|
164 |
+ |
|
165 |
+ ref = 'worker-source/{}'.format(upload_vdir.ref.hash)
|
|
166 |
+ upload_vdir._save(ref)
|
|
167 |
+ source_push_successful = cascache.push_refs([ref], self._get_project())
|
|
168 |
+ |
|
169 |
+ # Set up environment and PWD
|
|
170 |
+ if env is None:
|
|
171 |
+ env = self._get_environment()
|
|
172 |
+ if 'PWD' not in env:
|
|
173 |
+ env['PWD'] = self._get_work_directory()
|
|
174 |
+ |
|
175 |
+ # We want command args as a list of strings
|
|
176 |
+ if isinstance(command, str):
|
|
177 |
+ command = [command]
|
|
178 |
+ |
|
179 |
+ # Now transmit the command to execute
|
|
180 |
+ if source_push_successful or cascache.verify_key_pushed(ref, self._get_project()):
|
|
181 |
+ response = self.__run_remote_command(cascache, command, upload_vdir.ref, env)
|
|
182 |
+ |
|
183 |
+ if response is None or response.HasField("error"):
|
|
184 |
+ # Build failed, so return a failure code
|
|
185 |
+ return 1
|
|
186 |
+ else:
|
|
187 |
+ |
|
188 |
+ # At the moment, response can either be an
|
|
189 |
+ # ExecutionResponse containing an ActionResult, or an
|
|
190 |
+ # ActionResult directly.
|
|
191 |
+ executeResponse = remote_execution_pb2.ExecuteResponse()
|
|
192 |
+ if response.response.Is(executeResponse.DESCRIPTOR):
|
|
193 |
+ # Unpack ExecuteResponse and set response to its response
|
|
194 |
+ response.response.Unpack(executeResponse)
|
|
195 |
+ response = executeResponse
|
|
196 |
+ |
|
197 |
+ actionResult = remote_execution_pb2.ActionResult()
|
|
198 |
+ if response.response.Is(actionResult.DESCRIPTOR):
|
|
199 |
+ response.response.Unpack(actionResult)
|
|
200 |
+ self.process_job_output(actionResult.output_directories, actionResult.output_files)
|
|
201 |
+ else:
|
|
202 |
+ raise SandboxError("Received unknown message from server (expected ExecutionResponse).")
|
|
203 |
+ else:
|
|
204 |
+ raise SandboxError("Failed to verify that source has been pushed to the remote artifact cache.")
|
|
205 |
+ return 0
|
... | ... | @@ -31,6 +31,7 @@ See also: :ref:`sandboxing`. |
31 | 31 |
import os
|
32 | 32 |
from .._exceptions import ImplError, BstError
|
33 | 33 |
from ..storage._filebaseddirectory import FileBasedDirectory
|
34 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
34 | 35 |
|
35 | 36 |
|
36 | 37 |
class SandboxFlags():
|
... | ... | @@ -98,13 +99,16 @@ class Sandbox(): |
98 | 99 |
self.__stdout = kwargs['stdout']
|
99 | 100 |
self.__stderr = kwargs['stderr']
|
100 | 101 |
|
101 |
- # Setup the directories. Root should be available to subclasses, hence
|
|
102 |
- # being single-underscore. The others are private to this class.
|
|
102 |
+ # Setup the directories. Root and output_directory should be
|
|
103 |
+ # available to subclasses, hence being single-underscore. The
|
|
104 |
+ # others are private to this class.
|
|
103 | 105 |
self._root = os.path.join(directory, 'root')
|
106 |
+ self._output_directory = None
|
|
104 | 107 |
self.__directory = directory
|
105 | 108 |
self.__scratch = os.path.join(self.__directory, 'scratch')
|
106 | 109 |
for directory_ in [self._root, self.__scratch]:
|
107 | 110 |
os.makedirs(directory_, exist_ok=True)
|
111 |
+ self._vdir = None
|
|
108 | 112 |
|
109 | 113 |
def get_directory(self):
|
110 | 114 |
"""Fetches the sandbox root directory
|
... | ... | @@ -132,15 +136,39 @@ class Sandbox(): |
132 | 136 |
Returns:
|
133 | 137 |
(str): The sandbox root directory
|
134 | 138 |
|
139 |
+ """
|
|
140 |
+ if not self._vdir:
|
|
141 |
+ # BST_CAS_DIRECTORIES is a deliberately hidden environment variable which
|
|
142 |
+ # can be used to switch on CAS-based directories for testing.
|
|
143 |
+ if 'BST_CAS_DIRECTORIES' in os.environ:
|
|
144 |
+ self._vdir = CasBasedDirectory(self.__context, ref=None)
|
|
145 |
+ else:
|
|
146 |
+ self._vdir = FileBasedDirectory(self._root)
|
|
147 |
+ return self._vdir
|
|
148 |
+ |
|
149 |
+ def set_virtual_directory(self, vdir):
|
|
150 |
+ """ Sets virtual directory. Useful after remote execution
|
|
151 |
+ has rewritten the working directory. """
|
|
152 |
+ self._vdir = vdir
|
|
153 |
+ |
|
154 |
+ def get_virtual_toplevel_directory(self):
|
|
155 |
+ """Fetches the sandbox's toplevel directory
|
|
156 |
+ |
|
157 |
+ The toplevel directory contains 'root', 'scratch' and later
|
|
158 |
+ 'artifact' where output is copied to.
|
|
159 |
+ |
|
160 |
+ Returns:
|
|
161 |
+ (str): The sandbox toplevel directory
|
|
162 |
+ |
|
135 | 163 |
"""
|
136 | 164 |
# For now, just create a new Directory every time we're asked
|
137 |
- return FileBasedDirectory(self._root)
|
|
165 |
+ return FileBasedDirectory(self.__directory)
|
|
138 | 166 |
|
139 | 167 |
def set_environment(self, environment):
|
140 | 168 |
"""Sets the environment variables for the sandbox
|
141 | 169 |
|
142 | 170 |
Args:
|
143 |
- directory (dict): The environment variables to use in the sandbox
|
|
171 |
+ environment (dict): The environment variables to use in the sandbox
|
|
144 | 172 |
"""
|
145 | 173 |
self.__env = environment
|
146 | 174 |
|
... | ... | @@ -152,6 +180,15 @@ class Sandbox(): |
152 | 180 |
"""
|
153 | 181 |
self.__cwd = directory
|
154 | 182 |
|
183 |
+ def set_output_directory(self, directory):
|
|
184 |
+ """Sets the output directory - the directory which is preserved
|
|
185 |
+ as an artifact after assembly.
|
|
186 |
+ |
|
187 |
+ Args:
|
|
188 |
+ directory (str): An absolute path within the sandbox
|
|
189 |
+ """
|
|
190 |
+ self._output_directory = directory
|
|
191 |
+ |
|
155 | 192 |
def mark_directory(self, directory, *, artifact=False):
|
156 | 193 |
"""Marks a sandbox directory and ensures it will exist
|
157 | 194 |
|
1 |
+#
|
|
2 |
+# Copyright (C) 2018 Bloomberg LLC
|
|
3 |
+#
|
|
4 |
+# This program is free software; you can redistribute it and/or
|
|
5 |
+# modify it under the terms of the GNU Lesser General Public
|
|
6 |
+# License as published by the Free Software Foundation; either
|
|
7 |
+# version 2 of the License, or (at your option) any later version.
|
|
8 |
+#
|
|
9 |
+# This library is distributed in the hope that it will be useful,
|
|
10 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
+# Lesser General Public License for more details.
|
|
13 |
+#
|
|
14 |
+# You should have received a copy of the GNU Lesser General Public
|
|
15 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
+#
|
|
17 |
+# Authors:
|
|
18 |
+# Jim MacArthur <jim macarthur codethink co uk>
|
|
19 |
+ |
|
20 |
+"""
|
|
21 |
+CasBasedDirectory
|
|
22 |
+=========
|
|
23 |
+ |
|
24 |
+Implementation of the Directory class which backs onto a Merkle-tree based content
|
|
25 |
+addressable storage system.
|
|
26 |
+ |
|
27 |
+See also: :ref:`sandboxing`.
|
|
28 |
+"""
|
|
29 |
+ |
|
30 |
+from collections import OrderedDict
|
|
31 |
+ |
|
32 |
+import os
|
|
33 |
+import tempfile
|
|
34 |
+import stat
|
|
35 |
+ |
|
36 |
+from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
37 |
+from .._exceptions import BstError
|
|
38 |
+from .directory import Directory, VirtualDirectoryError
|
|
39 |
+from ._filebaseddirectory import FileBasedDirectory
|
|
40 |
+from ..utils import FileListResult, safe_copy, list_relative_paths
|
|
41 |
+from .._artifactcache.cascache import CASCache
|
|
42 |
+ |
|
43 |
+ |
|
44 |
+class IndexEntry():
|
|
45 |
+ """ Used in our index of names to objects to store the 'modified' flag
|
|
46 |
+ for directory entries. Because we need both the remote_execution_pb2 object
|
|
47 |
+ and our own Directory object for directory entries, we store both. For files
|
|
48 |
+ and symlinks, only pb_object is used. """
|
|
49 |
+ def __init__(self, pb_object, buildstream_object=None, modified=False):
|
|
50 |
+ self.pb_object = pb_object # Short for 'protocol buffer object')
|
|
51 |
+ self.buildstream_object = buildstream_object
|
|
52 |
+ self.modified = modified
|
|
53 |
+ |
|
54 |
+ |
|
55 |
+# CasBasedDirectory intentionally doesn't call its superclass constuctor,
|
|
56 |
+# which is meant to be unimplemented.
|
|
57 |
+# pylint: disable=super-init-not-called
|
|
58 |
+ |
|
59 |
+class CasBasedDirectory(Directory):
|
|
60 |
+ """
|
|
61 |
+ CAS-based directories can have two names; one is a 'common name' which has no effect
|
|
62 |
+ on functionality, and the 'filename'. If a CasBasedDirectory has a parent, then 'filename'
|
|
63 |
+ must be the name of an entry in the parent directory's index which points to this object.
|
|
64 |
+ This is used to inform a parent directory that it must update the given hash for this
|
|
65 |
+ object when this object changes.
|
|
66 |
+ |
|
67 |
+ Typically a top-level CasBasedDirectory will have a common_name and no filename, and
|
|
68 |
+ subdirectories wil have a filename and no common_name. common_name can used to identify
|
|
69 |
+ CasBasedDirectory objects in a log file, since they have no unique position in a file
|
|
70 |
+ system.
|
|
71 |
+ """
|
|
72 |
+ |
|
73 |
+ # Two constants which define the separators used by the remote execution API.
|
|
74 |
+ _pb2_path_sep = "/"
|
|
75 |
+ _pb2_absolute_path_prefix = "/"
|
|
76 |
+ |
|
77 |
+ def __init__(self, context, ref=None, parent=None, common_name="untitled", filename=None):
|
|
78 |
+ self.context = context
|
|
79 |
+ self.cas_directory = os.path.join(context.artifactdir, 'cas')
|
|
80 |
+ self.filename = filename
|
|
81 |
+ self.common_name = common_name
|
|
82 |
+ self.pb2_directory = remote_execution_pb2.Directory()
|
|
83 |
+ self.cas_cache = CASCache(context)
|
|
84 |
+ if ref:
|
|
85 |
+ with open(self.cas_cache.objpath(ref), 'rb') as f:
|
|
86 |
+ self.pb2_directory.ParseFromString(f.read())
|
|
87 |
+ |
|
88 |
+ self.ref = ref
|
|
89 |
+ self.index = OrderedDict()
|
|
90 |
+ self.parent = parent
|
|
91 |
+ self._directory_read = False
|
|
92 |
+ self._populate_index()
|
|
93 |
+ |
|
94 |
+ def _populate_index(self):
|
|
95 |
+ if self._directory_read:
|
|
96 |
+ return
|
|
97 |
+ for entry in self.pb2_directory.directories:
|
|
98 |
+ buildStreamDirectory = CasBasedDirectory(self.context, ref=entry.digest,
|
|
99 |
+ parent=self, filename=entry.name)
|
|
100 |
+ self.index[entry.name] = IndexEntry(entry, buildstream_object=buildStreamDirectory)
|
|
101 |
+ for entry in self.pb2_directory.files:
|
|
102 |
+ self.index[entry.name] = IndexEntry(entry)
|
|
103 |
+ for entry in self.pb2_directory.symlinks:
|
|
104 |
+ self.index[entry.name] = IndexEntry(entry)
|
|
105 |
+ self._directory_read = True
|
|
106 |
+ |
|
107 |
+ def _recalculate_recursing_up(self, caller=None):
|
|
108 |
+ """Recalcuate the hash for this directory and store the results in
|
|
109 |
+ the cache. If this directory has a parent, tell it to
|
|
110 |
+ recalculate (since changing this directory changes an entry in
|
|
111 |
+ the parent).
|
|
112 |
+ |
|
113 |
+ """
|
|
114 |
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
|
|
115 |
+ if caller:
|
|
116 |
+ old_dir = self._find_pb2_entry(caller.filename)
|
|
117 |
+ self.cas_cache.add_object(digest=old_dir.digest, buffer=caller.pb2_directory.SerializeToString())
|
|
118 |
+ if self.parent:
|
|
119 |
+ self.parent._recalculate_recursing_up(self)
|
|
120 |
+ |
|
121 |
+ def _recalculate_recursing_down(self, parent=None):
|
|
122 |
+ """Recalcuate the hash for this directory and any
|
|
123 |
+ subdirectories. Hashes for subdirectories should be calculated
|
|
124 |
+ and stored after a significant operation (e.g. an
|
|
125 |
+ import_files() call) but not after adding each file, as that
|
|
126 |
+ is extremely wasteful.
|
|
127 |
+ |
|
128 |
+ """
|
|
129 |
+ for entry in self.pb2_directory.directories:
|
|
130 |
+ self.index[entry.name].buildstream_object._recalculate_recursing_down(entry)
|
|
131 |
+ |
|
132 |
+ if parent:
|
|
133 |
+ self.ref = self.cas_cache.add_object(digest=parent.digest, buffer=self.pb2_directory.SerializeToString())
|
|
134 |
+ else:
|
|
135 |
+ self.ref = self.cas_cache.add_object(buffer=self.pb2_directory.SerializeToString())
|
|
136 |
+ # We don't need to do anything more than that; files were already added ealier, and symlinks are
|
|
137 |
+ # part of the directory structure.
|
|
138 |
+ |
|
139 |
+ def _find_pb2_entry(self, name):
|
|
140 |
+ if name in self.index:
|
|
141 |
+ return self.index[name].pb_object
|
|
142 |
+ return None
|
|
143 |
+ |
|
144 |
+ def _find_self_in_parent(self):
|
|
145 |
+ assert self.parent is not None
|
|
146 |
+ parent = self.parent
|
|
147 |
+ for (k, v) in parent.index.items():
|
|
148 |
+ if v.buildstream_object == self:
|
|
149 |
+ return k
|
|
150 |
+ return None
|
|
151 |
+ |
|
152 |
+ def _add_directory(self, name):
|
|
153 |
+ if name in self.index:
|
|
154 |
+ newdir = self.index[name].buildstream_object
|
|
155 |
+ if not isinstance(newdir, CasBasedDirectory):
|
|
156 |
+ # TODO: This may not be an actual error; it may actually overwrite it
|
|
157 |
+ raise VirtualDirectoryError("New directory {} in {} would overwrite existing non-directory of type {}"
|
|
158 |
+ .format(name, str(self), type(newdir)))
|
|
159 |
+ dirnode = self._find_pb2_entry(name)
|
|
160 |
+ else:
|
|
161 |
+ newdir = CasBasedDirectory(self.context, parent=self, filename=name)
|
|
162 |
+ dirnode = self.pb2_directory.directories.add()
|
|
163 |
+ |
|
164 |
+ dirnode.name = name
|
|
165 |
+ |
|
166 |
+ # Calculate the hash for an empty directory
|
|
167 |
+ new_directory = remote_execution_pb2.Directory()
|
|
168 |
+ self.cas_cache.add_object(digest=dirnode.digest, buffer=new_directory.SerializeToString())
|
|
169 |
+ self.index[name] = IndexEntry(dirnode, buildstream_object=newdir)
|
|
170 |
+ return newdir
|
|
171 |
+ |
|
172 |
+ def _add_new_file(self, basename, filename):
|
|
173 |
+ filenode = self.pb2_directory.files.add()
|
|
174 |
+ filenode.name = filename
|
|
175 |
+ self.cas_cache.add_object(digest=filenode.digest, path=os.path.join(basename, filename))
|
|
176 |
+ is_executable = os.access(os.path.join(basename, filename), os.X_OK)
|
|
177 |
+ filenode.is_executable = is_executable
|
|
178 |
+ self.index[filename] = IndexEntry(filenode, modified=(filename in self.index))
|
|
179 |
+ |
|
180 |
+ def _add_new_link(self, basename, filename):
|
|
181 |
+ existing_link = self._find_pb2_entry(filename)
|
|
182 |
+ if existing_link:
|
|
183 |
+ symlinknode = existing_link
|
|
184 |
+ else:
|
|
185 |
+ symlinknode = self.pb2_directory.symlinks.add()
|
|
186 |
+ symlinknode.name = filename
|
|
187 |
+ # A symlink node has no digest.
|
|
188 |
+ symlinknode.target = os.readlink(os.path.join(basename, filename))
|
|
189 |
+ self.index[filename] = IndexEntry(symlinknode, modified=(existing_link is not None))
|
|
190 |
+ |
|
191 |
+ def delete_entry(self, name):
|
|
192 |
+ for collection in [self.pb2_directory.files, self.pb2_directory.symlinks, self.pb2_directory.directories]:
|
|
193 |
+ if name in collection:
|
|
194 |
+ collection.remove(name)
|
|
195 |
+ if name in self.index:
|
|
196 |
+ del self.index[name]
|
|
197 |
+ |
|
198 |
+ def descend(self, subdirectory_spec, create=False):
|
|
199 |
+ """Descend one or more levels of directory hierarchy and return a new
|
|
200 |
+ Directory object for that directory.
|
|
201 |
+ |
|
202 |
+ Arguments:
|
|
203 |
+ * subdirectory_spec (list of strings): A list of strings which are all directory
|
|
204 |
+ names.
|
|
205 |
+ * create (boolean): If this is true, the directories will be created if
|
|
206 |
+ they don't already exist.
|
|
207 |
+ |
|
208 |
+ Note: At the moment, creating a directory by descending does
|
|
209 |
+ not update this object in the CAS cache. However, performing
|
|
210 |
+ an import_files() into a subdirectory of any depth obtained by
|
|
211 |
+ descending from this object *will* cause this directory to be
|
|
212 |
+ updated and stored.
|
|
213 |
+ |
|
214 |
+ """
|
|
215 |
+ |
|
216 |
+ # It's very common to send a directory name instead of a list and this causes
|
|
217 |
+ # bizarre errors, so check for it here
|
|
218 |
+ if not isinstance(subdirectory_spec, list):
|
|
219 |
+ subdirectory_spec = [subdirectory_spec]
|
|
220 |
+ |
|
221 |
+ # Because of the way split works, it's common to get a list which begins with
|
|
222 |
+ # an empty string. Detect these and remove them.
|
|
223 |
+ while subdirectory_spec and subdirectory_spec[0] == "":
|
|
224 |
+ subdirectory_spec.pop(0)
|
|
225 |
+ |
|
226 |
+ # Descending into [] returns the same directory.
|
|
227 |
+ if not subdirectory_spec:
|
|
228 |
+ return self
|
|
229 |
+ |
|
230 |
+ if subdirectory_spec[0] in self.index:
|
|
231 |
+ entry = self.index[subdirectory_spec[0]].buildstream_object
|
|
232 |
+ if isinstance(entry, CasBasedDirectory):
|
|
233 |
+ return entry.descend(subdirectory_spec[1:], create)
|
|
234 |
+ else:
|
|
235 |
+ error = "Cannot descend into {}, which is a '{}' in the directory {}"
|
|
236 |
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0],
|
|
237 |
+ type(entry).__name__,
|
|
238 |
+ self))
|
|
239 |
+ else:
|
|
240 |
+ if create:
|
|
241 |
+ newdir = self._add_directory(subdirectory_spec[0])
|
|
242 |
+ return newdir.descend(subdirectory_spec[1:], create)
|
|
243 |
+ else:
|
|
244 |
+ error = "No entry called '{}' found in {}. There are directories called {}."
|
|
245 |
+ directory_list = ",".join([entry.name for entry in self.pb2_directory.directories])
|
|
246 |
+ raise VirtualDirectoryError(error.format(subdirectory_spec[0], str(self),
|
|
247 |
+ directory_list))
|
|
248 |
+ return None
|
|
249 |
+ |
|
250 |
+ def find_root(self):
|
|
251 |
+ """ Finds the root of this directory tree by following 'parent' until there is
|
|
252 |
+ no parent. """
|
|
253 |
+ if self.parent:
|
|
254 |
+ return self.parent.find_root()
|
|
255 |
+ else:
|
|
256 |
+ return self
|
|
257 |
+ |
|
258 |
+ def _resolve_symlink_or_directory(self, name):
|
|
259 |
+ """Used only by _import_files_from_directory. Tries to resolve a
|
|
260 |
+ directory name or symlink name. 'name' must be an entry in this
|
|
261 |
+ directory. It must be a single symlink or directory name, not a path
|
|
262 |
+ separated by path separators. If it's an existing directory name, it
|
|
263 |
+ just returns the Directory object for that. If it's a symlink, it will
|
|
264 |
+ attempt to find the target of the symlink and return that as a
|
|
265 |
+ Directory object.
|
|
266 |
+ |
|
267 |
+ If a symlink target doesn't exist, it will attempt to create it
|
|
268 |
+ as a directory as long as it's within this directory tree.
|
|
269 |
+ """
|
|
270 |
+ |
|
271 |
+ if isinstance(self.index[name].buildstream_object, Directory):
|
|
272 |
+ return self.index[name].buildstream_object
|
|
273 |
+ # OK then, it's a symlink
|
|
274 |
+ symlink = self._find_pb2_entry(name)
|
|
275 |
+ absolute = symlink.target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
|
|
276 |
+ if absolute:
|
|
277 |
+ root = self.find_root()
|
|
278 |
+ else:
|
|
279 |
+ root = self
|
|
280 |
+ directory = root
|
|
281 |
+ components = symlink.target.split(CasBasedDirectory._pb2_path_sep)
|
|
282 |
+ for c in components:
|
|
283 |
+ if c == "..":
|
|
284 |
+ directory = directory.parent
|
|
285 |
+ else:
|
|
286 |
+ directory = directory.descend(c, create=True)
|
|
287 |
+ return directory
|
|
288 |
+ |
|
289 |
+ def _check_replacement(self, name, path_prefix, fileListResult):
|
|
290 |
+ """ Checks whether 'name' exists, and if so, whether we can overwrite it.
|
|
291 |
+ If we can, add the name to 'overwritten_files' and delete the existing entry.
|
|
292 |
+ Returns 'True' if the import should go ahead.
|
|
293 |
+ fileListResult.overwritten and fileListResult.ignore are updated depending
|
|
294 |
+ on the result. """
|
|
295 |
+ existing_entry = self._find_pb2_entry(name)
|
|
296 |
+ relative_pathname = os.path.join(path_prefix, name)
|
|
297 |
+ if existing_entry is None:
|
|
298 |
+ return True
|
|
299 |
+ if (isinstance(existing_entry,
|
|
300 |
+ (remote_execution_pb2.FileNode, remote_execution_pb2.SymlinkNode))):
|
|
301 |
+ fileListResult.overwritten.append(relative_pathname)
|
|
302 |
+ return True
|
|
303 |
+ elif isinstance(existing_entry, remote_execution_pb2.DirectoryNode):
|
|
304 |
+ # If 'name' maps to a DirectoryNode, then there must be an entry in index
|
|
305 |
+ # pointing to another Directory.
|
|
306 |
+ if self.index[name].buildstream_object.is_empty():
|
|
307 |
+ self.delete_entry(name)
|
|
308 |
+ fileListResult.overwritten.append(relative_pathname)
|
|
309 |
+ return True
|
|
310 |
+ else:
|
|
311 |
+ # We can't overwrite a non-empty directory, so we just ignore it.
|
|
312 |
+ fileListResult.ignored.append(relative_pathname)
|
|
313 |
+ return False
|
|
314 |
+ assert False, ("Entry '{}' is not a recognised file/link/directory and not None; it is {}"
|
|
315 |
+ .format(name, type(existing_entry)))
|
|
316 |
+ return False # In case asserts are disabled
|
|
317 |
+ |
|
318 |
+ def _import_directory_recursively(self, directory_name, source_directory, remaining_path, path_prefix):
|
|
319 |
+ """ _import_directory_recursively and _import_files_from_directory will be called alternately
|
|
320 |
+ as a directory tree is descended. """
|
|
321 |
+ if directory_name in self.index:
|
|
322 |
+ subdir = self._resolve_symlink_or_directory(directory_name)
|
|
323 |
+ else:
|
|
324 |
+ subdir = self._add_directory(directory_name)
|
|
325 |
+ new_path_prefix = os.path.join(path_prefix, directory_name)
|
|
326 |
+ subdir_result = subdir._import_files_from_directory(os.path.join(source_directory, directory_name),
|
|
327 |
+ [os.path.sep.join(remaining_path)],
|
|
328 |
+ path_prefix=new_path_prefix)
|
|
329 |
+ return subdir_result
|
|
330 |
+ |
|
331 |
+ def _import_files_from_directory(self, source_directory, files, path_prefix=""):
|
|
332 |
+ """ Imports files from a traditional directory """
|
|
333 |
+ result = FileListResult()
|
|
334 |
+ for entry in sorted(files):
|
|
335 |
+ split_path = entry.split(os.path.sep)
|
|
336 |
+ # The actual file on the FS we're importing
|
|
337 |
+ import_file = os.path.join(source_directory, entry)
|
|
338 |
+ # The destination filename, relative to the root where the import started
|
|
339 |
+ relative_pathname = os.path.join(path_prefix, entry)
|
|
340 |
+ if len(split_path) > 1:
|
|
341 |
+ directory_name = split_path[0]
|
|
342 |
+ # Hand this off to the importer for that subdir. This will only do one file -
|
|
343 |
+ # a better way would be to hand off all the files in this subdir at once.
|
|
344 |
+ subdir_result = self._import_directory_recursively(directory_name, source_directory,
|
|
345 |
+ split_path[1:], path_prefix)
|
|
346 |
+ result.combine(subdir_result)
|
|
347 |
+ elif os.path.islink(import_file):
|
|
348 |
+ if self._check_replacement(entry, path_prefix, result):
|
|
349 |
+ self._add_new_link(source_directory, entry)
|
|
350 |
+ result.files_written.append(relative_pathname)
|
|
351 |
+ elif os.path.isdir(import_file):
|
|
352 |
+ # A plain directory which already exists isn't a problem; just ignore it.
|
|
353 |
+ if entry not in self.index:
|
|
354 |
+ self._add_directory(entry)
|
|
355 |
+ elif os.path.isfile(import_file):
|
|
356 |
+ if self._check_replacement(entry, path_prefix, result):
|
|
357 |
+ self._add_new_file(source_directory, entry)
|
|
358 |
+ result.files_written.append(relative_pathname)
|
|
359 |
+ return result
|
|
360 |
+ |
|
361 |
+ def _save(self, name):
|
|
362 |
+ """ Saves this directory into the content cache as a named ref. This function is not
|
|
363 |
+ currently in use, but may be useful later. """
|
|
364 |
+ self._recalculate_recursing_up()
|
|
365 |
+ self._recalculate_recursing_down()
|
|
366 |
+ (rel_refpath, refname) = os.path.split(name)
|
|
367 |
+ refdir = os.path.join(self.cas_directory, 'refs', 'heads', rel_refpath)
|
|
368 |
+ refname = os.path.join(refdir, refname)
|
|
369 |
+ |
|
370 |
+ if not os.path.exists(refdir):
|
|
371 |
+ os.makedirs(refdir)
|
|
372 |
+ with open(refname, "wb") as f:
|
|
373 |
+ f.write(self.ref.SerializeToString())
|
|
374 |
+ |
|
375 |
+ def import_files(self, external_pathspec, *, files=None,
|
|
376 |
+ report_written=True, update_utimes=False,
|
|
377 |
+ can_link=False):
|
|
378 |
+ """Imports some or all files from external_path into this directory.
|
|
379 |
+ |
|
380 |
+ Keyword arguments: external_pathspec: Either a string
|
|
381 |
+ containing a pathname, or a Directory object, to use as the
|
|
382 |
+ source.
|
|
383 |
+ |
|
384 |
+ files (list of strings): A list of all the files relative to
|
|
385 |
+ the external_pathspec to copy. If 'None' is supplied, all
|
|
386 |
+ files are copied.
|
|
387 |
+ |
|
388 |
+ report_written (bool): Return the full list of files
|
|
389 |
+ written. Defaults to true. If false, only a list of
|
|
390 |
+ overwritten files is returned.
|
|
391 |
+ |
|
392 |
+ update_utimes (bool): Currently ignored, since CAS does not store utimes.
|
|
393 |
+ |
|
394 |
+ can_link (bool): Ignored, since hard links do not have any meaning within CAS.
|
|
395 |
+ """
|
|
396 |
+ if isinstance(external_pathspec, FileBasedDirectory):
|
|
397 |
+ source_directory = external_pathspec._get_underlying_directory()
|
|
398 |
+ elif isinstance(external_pathspec, CasBasedDirectory):
|
|
399 |
+ # TODO: This transfers from one CAS to another via the
|
|
400 |
+ # filesystem, which is very inefficient. Alter this so it
|
|
401 |
+ # transfers refs across directly.
|
|
402 |
+ with tempfile.TemporaryDirectory(prefix="roundtrip") as tmpdir:
|
|
403 |
+ external_pathspec.export_files(tmpdir)
|
|
404 |
+ if files is None:
|
|
405 |
+ files = list_relative_paths(tmpdir)
|
|
406 |
+ result = self._import_files_from_directory(tmpdir, files=files)
|
|
407 |
+ return result
|
|
408 |
+ else:
|
|
409 |
+ source_directory = external_pathspec
|
|
410 |
+ |
|
411 |
+ if files is None:
|
|
412 |
+ files = list_relative_paths(source_directory)
|
|
413 |
+ |
|
414 |
+ # TODO: No notice is taken of report_written, update_utimes or can_link.
|
|
415 |
+ # Current behaviour is to fully populate the report, which is inefficient,
|
|
416 |
+ # but still correct.
|
|
417 |
+ result = self._import_files_from_directory(source_directory, files=files)
|
|
418 |
+ |
|
419 |
+ # We need to recalculate and store the hashes of all directories both
|
|
420 |
+ # up and down the tree; we have changed our directory by importing files
|
|
421 |
+ # which changes our hash and all our parents' hashes of us. The trees
|
|
422 |
+ # lower down need to be stored in the CAS as they are not automatically
|
|
423 |
+ # added during construction.
|
|
424 |
+ self._recalculate_recursing_down()
|
|
425 |
+ if self.parent:
|
|
426 |
+ self.parent._recalculate_recursing_up(self)
|
|
427 |
+ return result
|
|
428 |
+ |
|
429 |
+ def set_deterministic_mtime(self):
|
|
430 |
+ """ Sets a static modification time for all regular files in this directory.
|
|
431 |
+ Since we don't store any modification time, we don't need to do anything.
|
|
432 |
+ """
|
|
433 |
+ pass
|
|
434 |
+ |
|
435 |
+ def set_deterministic_user(self):
|
|
436 |
+ """ Sets all files in this directory to the current user's euid/egid.
|
|
437 |
+ We also don't store user data, so this can be ignored.
|
|
438 |
+ """
|
|
439 |
+ pass
|
|
440 |
+ |
|
441 |
+ def export_files(self, to_directory, *, can_link=False, can_destroy=False):
|
|
442 |
+ """Copies everything from this into to_directory, which must be the name
|
|
443 |
+ of a traditional filesystem directory.
|
|
444 |
+ |
|
445 |
+ Arguments:
|
|
446 |
+ |
|
447 |
+ to_directory (string): a path outside this directory object
|
|
448 |
+ where the contents will be copied to.
|
|
449 |
+ |
|
450 |
+ can_link (bool): Whether we can create hard links in to_directory
|
|
451 |
+ instead of copying.
|
|
452 |
+ |
|
453 |
+ can_destroy (bool): Whether we can destroy elements in this
|
|
454 |
+ directory to export them (e.g. by renaming them as the
|
|
455 |
+ target).
|
|
456 |
+ |
|
457 |
+ """
|
|
458 |
+ |
|
459 |
+ if not os.path.exists(to_directory):
|
|
460 |
+ os.mkdir(to_directory)
|
|
461 |
+ |
|
462 |
+ for entry in self.pb2_directory.directories:
|
|
463 |
+ if entry.name not in self.index:
|
|
464 |
+ raise VirtualDirectoryError("CasDir {} contained {} in directories but not in the index"
|
|
465 |
+ .format(str(self), entry.name))
|
|
466 |
+ if not self._directory_read:
|
|
467 |
+ raise VirtualDirectoryError("CasDir {} has not been indexed yet".format(str(self)))
|
|
468 |
+ dest_dir = os.path.join(to_directory, entry.name)
|
|
469 |
+ if not os.path.exists(dest_dir):
|
|
470 |
+ os.mkdir(dest_dir)
|
|
471 |
+ target = self.descend([entry.name])
|
|
472 |
+ target.export_files(dest_dir)
|
|
473 |
+ for entry in self.pb2_directory.files:
|
|
474 |
+ # Extract the entry to a single file
|
|
475 |
+ dest_name = os.path.join(to_directory, entry.name)
|
|
476 |
+ src_name = self.cas_cache.objpath(entry.digest)
|
|
477 |
+ safe_copy(src_name, dest_name)
|
|
478 |
+ if entry.is_executable:
|
|
479 |
+ os.chmod(dest_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
|
|
480 |
+ stat.S_IRGRP | stat.S_IXGRP |
|
|
481 |
+ stat.S_IROTH | stat.S_IXOTH)
|
|
482 |
+ for entry in self.pb2_directory.symlinks:
|
|
483 |
+ src_name = os.path.join(to_directory, entry.name)
|
|
484 |
+ target_name = entry.target
|
|
485 |
+ try:
|
|
486 |
+ os.symlink(target_name, src_name)
|
|
487 |
+ except FileExistsError as e:
|
|
488 |
+ raise BstError(("Cannot create a symlink named {} pointing to {}." +
|
|
489 |
+ " The original error was: {}").
|
|
490 |
+ format(src_name, entry.target, e))
|
|
491 |
+ |
|
492 |
+ def export_to_tar(self, tarfile, destination_dir, mtime=0):
|
|
493 |
+ raise NotImplementedError()
|
|
494 |
+ |
|
495 |
+ def mark_changed(self):
|
|
496 |
+ """ It should not be possible to externally modify a CAS-based
|
|
497 |
+ directory at the moment."""
|
|
498 |
+ raise NotImplementedError()
|
|
499 |
+ |
|
500 |
+ def is_empty(self):
|
|
501 |
+ """ Return true if this directory has no files, subdirectories or links in it.
|
|
502 |
+ """
|
|
503 |
+ return len(self.index) == 0
|
|
504 |
+ |
|
505 |
+ def _mark_directory_unmodified(self):
|
|
506 |
+ # Marks all entries in this directory and all child directories as unmodified.
|
|
507 |
+ for i in self.index.values():
|
|
508 |
+ i.modified = False
|
|
509 |
+ if isinstance(i.buildstream_object, CasBasedDirectory):
|
|
510 |
+ i.buildstream_object._mark_directory_unmodified()
|
|
511 |
+ |
|
512 |
+ def _mark_entry_unmodified(self, name):
|
|
513 |
+ # Marks an entry as unmodified. If the entry is a directory, it will
|
|
514 |
+ # recursively mark all its tree as unmodified.
|
|
515 |
+ self.index[name].modified = False
|
|
516 |
+ if self.index[name].buildstream_object:
|
|
517 |
+ self.index[name].buildstream_object._mark_directory_unmodified()
|
|
518 |
+ |
|
519 |
+ def mark_unmodified(self):
|
|
520 |
+ """ Marks all files in this directory (recursively) as unmodified.
|
|
521 |
+ If we have a parent, we mark our own entry as unmodified in that parent's
|
|
522 |
+ index.
|
|
523 |
+ """
|
|
524 |
+ if self.parent:
|
|
525 |
+ self.parent._mark_entry_unmodified(self._find_self_in_parent())
|
|
526 |
+ else:
|
|
527 |
+ self._mark_directory_unmodified()
|
|
528 |
+ |
|
529 |
+ def list_modified_paths(self):
|
|
530 |
+ """Provide a list of relative paths which have been modified since the
|
|
531 |
+ last call to mark_unmodified.
|
|
532 |
+ |
|
533 |
+ Return value: List(str) - list of modified paths
|
|
534 |
+ """
|
|
535 |
+ |
|
536 |
+ filelist = []
|
|
537 |
+ for (k, v) in self.index.items():
|
|
538 |
+ if isinstance(v.buildstream_object, CasBasedDirectory):
|
|
539 |
+ filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_modified_paths()])
|
|
540 |
+ elif isinstance(v.pb_object, remote_execution_pb2.FileNode) and v.modified:
|
|
541 |
+ filelist.append(k)
|
|
542 |
+ return filelist
|
|
543 |
+ |
|
544 |
+ def list_relative_paths(self):
|
|
545 |
+ """Provide a list of all relative paths.
|
|
546 |
+ |
|
547 |
+ NOTE: This list is not in the same order as utils.list_relative_paths.
|
|
548 |
+ |
|
549 |
+ Return value: List(str) - list of all paths
|
|
550 |
+ """
|
|
551 |
+ |
|
552 |
+ filelist = []
|
|
553 |
+ for (k, v) in self.index.items():
|
|
554 |
+ if isinstance(v.buildstream_object, CasBasedDirectory):
|
|
555 |
+ filelist.extend([k + os.path.sep + x for x in v.buildstream_object.list_relative_paths()])
|
|
556 |
+ elif isinstance(v.pb_object, remote_execution_pb2.FileNode):
|
|
557 |
+ filelist.append(k)
|
|
558 |
+ return filelist
|
|
559 |
+ |
|
560 |
+ def _get_identifier(self):
|
|
561 |
+ path = ""
|
|
562 |
+ if self.parent:
|
|
563 |
+ path = self.parent._get_identifier()
|
|
564 |
+ if self.filename:
|
|
565 |
+ path += "/" + self.filename
|
|
566 |
+ else:
|
|
567 |
+ path += "/" + self.common_name
|
|
568 |
+ return path
|
|
569 |
+ |
|
570 |
+ def __str__(self):
|
|
571 |
+ return "[CAS:{}]".format(self._get_identifier())
|
|
572 |
+ |
|
573 |
+ def _get_underlying_directory(self):
|
|
574 |
+ """ There is no underlying directory for a CAS-backed directory, so
|
|
575 |
+ throw an exception. """
|
|
576 |
+ raise VirtualDirectoryError("_get_underlying_directory was called on a CAS-backed directory," +
|
|
577 |
+ " which has no underlying directory.")
|
... | ... | @@ -29,25 +29,12 @@ See also: :ref:`sandboxing`. |
29 | 29 |
|
30 | 30 |
import os
|
31 | 31 |
import time
|
32 |
-from .._exceptions import BstError, ErrorDomain
|
|
33 |
-from .directory import Directory
|
|
32 |
+from .directory import Directory, VirtualDirectoryError
|
|
34 | 33 |
from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
|
35 | 34 |
from ..utils import _set_deterministic_user, _set_deterministic_mtime
|
36 | 35 |
|
37 |
- |
|
38 |
-class VirtualDirectoryError(BstError):
|
|
39 |
- """Raised by Directory functions when system calls fail.
|
|
40 |
- This will be handled internally by the BuildStream core,
|
|
41 |
- if you need to handle this error, then it should be reraised,
|
|
42 |
- or either of the :class:`.ElementError` or :class:`.SourceError`
|
|
43 |
- exceptions should be raised from this error.
|
|
44 |
- """
|
|
45 |
- def __init__(self, message, reason=None):
|
|
46 |
- super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
|
|
47 |
- |
|
48 |
- |
|
49 | 36 |
# FileBasedDirectory intentionally doesn't call its superclass constuctor,
|
50 |
-# which is mean to be unimplemented.
|
|
37 |
+# which is meant to be unimplemented.
|
|
51 | 38 |
# pylint: disable=super-init-not-called
|
52 | 39 |
|
53 | 40 |
|
... | ... | @@ -108,7 +95,8 @@ class FileBasedDirectory(Directory): |
108 | 95 |
if create:
|
109 | 96 |
new_path = os.path.join(self.external_directory, subdirectory_spec[0])
|
110 | 97 |
os.makedirs(new_path, exist_ok=True)
|
111 |
- return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
|
|
98 |
+ self.index[subdirectory_spec[0]] = FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
|
|
99 |
+ return self.index[subdirectory_spec[0]]
|
|
112 | 100 |
else:
|
113 | 101 |
error = "No entry called '{}' found in the directory rooted at {}"
|
114 | 102 |
raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
|
... | ... | @@ -134,8 +122,12 @@ class FileBasedDirectory(Directory): |
134 | 122 |
|
135 | 123 |
for f in import_result.files_written:
|
136 | 124 |
os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
|
125 |
+ self.mark_changed()
|
|
137 | 126 |
return import_result
|
138 | 127 |
|
128 |
+ def mark_changed(self):
|
|
129 |
+ self._directory_read = False
|
|
130 |
+ |
|
139 | 131 |
def set_deterministic_mtime(self):
|
140 | 132 |
_set_deterministic_mtime(self.external_directory)
|
141 | 133 |
|
... | ... | @@ -214,3 +206,8 @@ class FileBasedDirectory(Directory): |
214 | 206 |
# which exposes the sandbox directory; we will have to assume for the time being
|
215 | 207 |
# that people will not abuse __str__.
|
216 | 208 |
return self.external_directory
|
209 |
+ |
|
210 |
+ def _get_underlying_directory(self) -> str:
|
|
211 |
+ """ Returns the underlying (real) file system directory this
|
|
212 |
+ object refers to. """
|
|
213 |
+ return self.external_directory
|
... | ... | @@ -31,6 +31,19 @@ See also: :ref:`sandboxing`. |
31 | 31 |
|
32 | 32 |
"""
|
33 | 33 |
|
34 |
+from .._exceptions import BstError, ErrorDomain
|
|
35 |
+ |
|
36 |
+ |
|
37 |
+class VirtualDirectoryError(BstError):
|
|
38 |
+ """Raised by Directory functions when system calls fail.
|
|
39 |
+ This will be handled internally by the BuildStream core,
|
|
40 |
+ if you need to handle this error, then it should be reraised,
|
|
41 |
+ or either of the :class:`.ElementError` or :class:`.SourceError`
|
|
42 |
+ exceptions should be raised from this error.
|
|
43 |
+ """
|
|
44 |
+ def __init__(self, message, reason=None):
|
|
45 |
+ super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
|
|
46 |
+ |
|
34 | 47 |
|
35 | 48 |
class Directory():
|
36 | 49 |
def __init__(self, external_directory=None):
|
... | ... | @@ -111,6 +124,14 @@ class Directory(): |
111 | 124 |
"""
|
112 | 125 |
raise NotImplementedError()
|
113 | 126 |
|
127 |
+ def mark_changed(self):
|
|
128 |
+ """ Mark this directory as having been changed outside this API. This
|
|
129 |
+ normally can only happen by calling the Sandbox's `run`
|
|
130 |
+ method.
|
|
131 |
+ |
|
132 |
+ """
|
|
133 |
+ raise NotImplementedError()
|
|
134 |
+ |
|
114 | 135 |
# Convenience functions
|
115 | 136 |
def is_empty(self):
|
116 | 137 |
""" Return true if this directory has no files, subdirectories or links in it.
|
... | ... | @@ -272,6 +272,5 @@ setup(name='BuildStream', |
272 | 272 |
'pytest-cov >= 2.5.0',
|
273 | 273 |
# Provide option to run tests in parallel, less reliable
|
274 | 274 |
'pytest-xdist',
|
275 |
- 'pytest >= 3.1.0',
|
|
276 |
- 'pylint >= 1.8 , < 2'],
|
|
275 |
+ 'pytest >= 3.1.0'],
|
|
277 | 276 |
zip_safe=False)
|
1 |
+This is the original /bin/bash.
|
1 |
+This is the original /bin/hello.
|
1 |
+This is the replacement /bin/bash.
|
|
\ No newline at end of file |
1 |
+import os
|
|
2 |
+import pytest
|
|
3 |
+ |
|
4 |
+from buildstream._exceptions import ErrorDomain
|
|
5 |
+ |
|
6 |
+from buildstream._context import Context
|
|
7 |
+from buildstream.storage._casbaseddirectory import CasBasedDirectory
|
|
8 |
+from buildstream.storage._filebaseddirectory import FileBasedDirectory
|
|
9 |
+ |
|
10 |
+DATA_DIR = os.path.join(
|
|
11 |
+ os.path.dirname(os.path.realpath(__file__)),
|
|
12 |
+ "storage-test"
|
|
13 |
+)
|
|
14 |
+ |
|
15 |
+ |
|
16 |
+def setup_backend(backend_class, tmpdir):
|
|
17 |
+ if backend_class == FileBasedDirectory:
|
|
18 |
+ return backend_class(os.path.join(tmpdir, "vdir"))
|
|
19 |
+ else:
|
|
20 |
+ context = Context()
|
|
21 |
+ context.artifactdir = os.path.join(tmpdir, "cas")
|
|
22 |
+ return backend_class(context)
|
|
23 |
+ |
|
24 |
+ |
|
25 |
+@pytest.mark.parametrize("backend", [
|
|
26 |
+ FileBasedDirectory, CasBasedDirectory])
|
|
27 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
28 |
+def test_import(tmpdir, datafiles, backend):
|
|
29 |
+ original = os.path.join(str(datafiles), "original")
|
|
30 |
+ |
|
31 |
+ c = setup_backend(backend, str(tmpdir))
|
|
32 |
+ |
|
33 |
+ c.import_files(original)
|
|
34 |
+ |
|
35 |
+ assert("bin/bash" in c.list_relative_paths())
|
|
36 |
+ assert("bin/hello" in c.list_relative_paths())
|
|
37 |
+ |
|
38 |
+ |
|
39 |
+@pytest.mark.parametrize("backend", [
|
|
40 |
+ FileBasedDirectory, CasBasedDirectory])
|
|
41 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
42 |
+def test_modified_file_list(tmpdir, datafiles, backend):
|
|
43 |
+ original = os.path.join(str(datafiles), "original")
|
|
44 |
+ overlay = os.path.join(str(datafiles), "overlay")
|
|
45 |
+ |
|
46 |
+ c = setup_backend(backend, str(tmpdir))
|
|
47 |
+ |
|
48 |
+ c.import_files(original)
|
|
49 |
+ |
|
50 |
+ c.mark_unmodified()
|
|
51 |
+ |
|
52 |
+ c.import_files(overlay)
|
|
53 |
+ |
|
54 |
+ print("List of all paths in imported results: {}".format(c.list_relative_paths()))
|
|
55 |
+ assert("bin/bash" in c.list_relative_paths())
|
|
56 |
+ assert("bin/bash" in c.list_modified_paths())
|
|
57 |
+ assert("bin/hello" not in c.list_modified_paths())
|
... | ... | @@ -359,3 +359,45 @@ def test_submodule_track_ignore_inconsistent(cli, tmpdir, datafiles): |
359 | 359 |
|
360 | 360 |
# Assert that we are just fine without it, and emit a warning to the user.
|
361 | 361 |
assert "Ignoring inconsistent submodule" in result.stderr
|
362 |
+ |
|
363 |
+ |
|
364 |
+@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
|
|
365 |
+@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
|
|
366 |
+def test_submodule_track_no_ref_or_track(cli, tmpdir, datafiles):
|
|
367 |
+ project = os.path.join(datafiles.dirname, datafiles.basename)
|
|
368 |
+ |
|
369 |
+ # Create the repo from 'repofiles' subdir
|
|
370 |
+ repo = create_repo('git', str(tmpdir))
|
|
371 |
+ ref = repo.create(os.path.join(project, 'repofiles'))
|
|
372 |
+ |
|
373 |
+ # Write out our test target
|
|
374 |
+ gitsource = repo.source_config(ref=None)
|
|
375 |
+ gitsource.pop('track')
|
|
376 |
+ element = {
|
|
377 |
+ 'kind': 'import',
|
|
378 |
+ 'sources': [
|
|
379 |
+ gitsource
|
|
380 |
+ ]
|
|
381 |
+ }
|
|
382 |
+ |
|
383 |
+ _yaml.dump(element, os.path.join(project, 'target.bst'))
|
|
384 |
+ |
|
385 |
+ # Track will encounter an inconsistent submodule without any ref
|
|
386 |
+ result = cli.run(project=project, args=['track', 'target.bst'])
|
|
387 |
+ result.assert_main_error(ErrorDomain.STREAM, None)
|
|
388 |
+ result.assert_task_error(ErrorDomain.SOURCE, 'track-attempt-no-track')
|
|
389 |
+ |
|
390 |
+ # Assert that we are just fine without it, and emit a warning to the user.
|
|
391 |
+ assert "FAILURE git source at" in result.stderr
|
|
392 |
+ assert "Without a tracking branch ref can not be updated. Please " + \
|
|
393 |
+ "provide a ref or a track." in result.stderr
|
|
394 |
+ |
|
395 |
+ # Track will encounter an inconsistent submodule without any ref
|
|
396 |
+ result = cli.run(project=project, args=['build', 'target.bst'])
|
|
397 |
+ result.assert_main_error(ErrorDomain.PIPELINE, 'inconsistent-pipeline')
|
|
398 |
+ result.assert_task_error(None, None)
|
|
399 |
+ |
|
400 |
+ # Assert that we are just fine without it, and emit a warning to the user.
|
|
401 |
+ assert "Exact versions are missing for the following elements" in result.stderr
|
|
402 |
+ assert "is missing ref and track." in result.stderr
|
|
403 |
+ assert "Then track these elements with `bst track`" in result.stderr
|