Martin Blanchard pushed to branch mablanch/630-remote-execution-reconn at BuildStream / buildstream
Commits:
-
727f2faa
by Tristan Van Berkom at 2018-09-18T07:43:07Z
-
ffa0bb36
by Tristan Van Berkom at 2018-09-18T07:47:44Z
-
f2ae46f8
by Tristan Van Berkom at 2018-09-18T08:14:23Z
-
7b117e40
by Daniel Silverstone at 2018-09-18T08:41:32Z
-
345f5f49
by Daniel Silverstone at 2018-09-18T08:41:32Z
-
e32221b6
by Daniel Silverstone at 2018-09-18T08:45:50Z
-
b587579f
by Tristan Van Berkom at 2018-09-18T09:53:26Z
-
30b41959
by Tristan Van Berkom at 2018-09-18T09:56:45Z
-
41e8dc81
by Tristan Van Berkom at 2018-09-18T09:56:45Z
-
97071b6e
by Tristan Van Berkom at 2018-09-18T10:16:43Z
-
a1c6cba3
by Martin Blanchard at 2018-09-18T13:37:41Z
11 changed files:
- buildstream/_artifactcache/artifactcache.py
- buildstream/_project.py
- buildstream/sandbox/_sandboxremote.py
- tests/artifactcache/config.py
- + tests/artifactcache/missing-certs/certificates/client.crt
- + tests/artifactcache/missing-certs/certificates/client.key
- + tests/artifactcache/missing-certs/element.bst
- tests/artifactcache/pull.py
- + tests/format/option-overrides/element.bst
- + tests/format/option-overrides/project.conf
- + tests/format/optionoverrides.py
Changes:
| ... | ... | @@ -51,7 +51,7 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl |
| 51 | 51 |
url = _yaml.node_get(spec_node, str, 'url')
|
| 52 | 52 |
push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
|
| 53 | 53 |
if not url:
|
| 54 |
- provenance = _yaml.node_get_provenance(spec_node)
|
|
| 54 |
+ provenance = _yaml.node_get_provenance(spec_node, 'url')
|
|
| 55 | 55 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
| 56 | 56 |
"{}: empty artifact cache URL".format(provenance))
|
| 57 | 57 |
|
| ... | ... | @@ -67,6 +67,16 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl |
| 67 | 67 |
if client_cert and basedir:
|
| 68 | 68 |
client_cert = os.path.join(basedir, client_cert)
|
| 69 | 69 |
|
| 70 |
+ if client_key and not client_cert:
|
|
| 71 |
+ provenance = _yaml.node_get_provenance(spec_node, 'client-key')
|
|
| 72 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
| 73 |
+ "{}: 'client-key' was specified without 'client-cert'".format(provenance))
|
|
| 74 |
+ |
|
| 75 |
+ if client_cert and not client_key:
|
|
| 76 |
+ provenance = _yaml.node_get_provenance(spec_node, 'client-cert')
|
|
| 77 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
| 78 |
+ "{}: 'client-cert' was specified without 'client-key'".format(provenance))
|
|
| 79 |
+ |
|
| 70 | 80 |
return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert)
|
| 71 | 81 |
|
| 72 | 82 |
|
| ... | ... | @@ -91,6 +101,7 @@ class ArtifactCache(): |
| 91 | 101 |
self._cache_size = None # The current cache size, sometimes it's an estimate
|
| 92 | 102 |
self._cache_quota = None # The cache quota
|
| 93 | 103 |
self._cache_lower_threshold = None # The target cache size for a cleanup
|
| 104 |
+ self._remotes_setup = False # Check to prevent double-setup of remotes
|
|
| 94 | 105 |
|
| 95 | 106 |
os.makedirs(self.extractdir, exist_ok=True)
|
| 96 | 107 |
os.makedirs(self.tmpdir, exist_ok=True)
|
| ... | ... | @@ -143,6 +154,10 @@ class ArtifactCache(): |
| 143 | 154 |
#
|
| 144 | 155 |
def setup_remotes(self, *, use_config=False, remote_url=None):
|
| 145 | 156 |
|
| 157 |
+ # Ensure we do not double-initialise since this can be expensive
|
|
| 158 |
+ assert(not self._remotes_setup)
|
|
| 159 |
+ self._remotes_setup = True
|
|
| 160 |
+ |
|
| 146 | 161 |
# Initialize remote artifact caches. We allow the commandline to override
|
| 147 | 162 |
# the user config in some cases (for example `bst push --remote=...`).
|
| 148 | 163 |
has_remote_caches = False
|
| ... | ... | @@ -598,7 +598,10 @@ class Project(): |
| 598 | 598 |
# any conditionals specified for project option declarations,
|
| 599 | 599 |
# or conditionally specifying the project name; will be ignored.
|
| 600 | 600 |
#
|
| 601 |
+ # Don't forget to also resolve options in the element and source overrides.
|
|
| 601 | 602 |
output.options.process_node(config)
|
| 603 |
+ output.options.process_node(output.element_overrides)
|
|
| 604 |
+ output.options.process_node(output.source_overrides)
|
|
| 602 | 605 |
|
| 603 | 606 |
# Load base variables
|
| 604 | 607 |
output.base_variables = _yaml.node_get(config, Mapping, 'variables')
|
| ... | ... | @@ -27,7 +27,7 @@ from . import Sandbox |
| 27 | 27 |
from ..storage._filebaseddirectory import FileBasedDirectory
|
| 28 | 28 |
from ..storage._casbaseddirectory import CasBasedDirectory
|
| 29 | 29 |
from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
| 30 |
-from .._artifactcache.cascache import CASCache
|
|
| 30 |
+from .._platform import Platform
|
|
| 31 | 31 |
|
| 32 | 32 |
|
| 33 | 33 |
class SandboxError(Exception):
|
| ... | ... | @@ -43,7 +43,6 @@ class SandboxRemote(Sandbox): |
| 43 | 43 |
|
| 44 | 44 |
def __init__(self, *args, **kwargs):
|
| 45 | 45 |
super().__init__(*args, **kwargs)
|
| 46 |
- self.cascache = None
|
|
| 47 | 46 |
|
| 48 | 47 |
url = urlparse(kwargs['server_url'])
|
| 49 | 48 |
if not url.scheme or not url.hostname or not url.port:
|
| ... | ... | @@ -56,12 +55,6 @@ class SandboxRemote(Sandbox): |
| 56 | 55 |
|
| 57 | 56 |
self.server_url = '{}:{}'.format(url.hostname, url.port)
|
| 58 | 57 |
|
| 59 |
- def _get_cascache(self):
|
|
| 60 |
- if self.cascache is None:
|
|
| 61 |
- self.cascache = CASCache(self._get_context())
|
|
| 62 |
- self.cascache.setup_remotes(use_config=True)
|
|
| 63 |
- return self.cascache
|
|
| 64 |
- |
|
| 65 | 58 |
def run_remote_command(self, command, input_root_digest, working_directory, environment):
|
| 66 | 59 |
# Sends an execution request to the remote execution server.
|
| 67 | 60 |
#
|
| ... | ... | @@ -78,13 +71,12 @@ class SandboxRemote(Sandbox): |
| 78 | 71 |
output_files=[],
|
| 79 | 72 |
output_directories=[self._output_directory],
|
| 80 | 73 |
platform=None)
|
| 81 |
- |
|
| 82 |
- cascache = self._get_cascache()
|
|
| 74 |
+ platform = Platform.get_platform()
|
|
| 75 |
+ cascache = platform.artifactcache
|
|
| 83 | 76 |
# Upload the Command message to the remote CAS server
|
| 84 | 77 |
command_digest = cascache.push_message(self._get_project(), remote_command)
|
| 85 | 78 |
if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
|
| 86 |
- # Command push failed
|
|
| 87 |
- return None
|
|
| 79 |
+ raise SandboxError("Failed pushing build command to remote CAS.")
|
|
| 88 | 80 |
|
| 89 | 81 |
# Create and send the action.
|
| 90 | 82 |
action = remote_execution_pb2.Action(command_digest=command_digest,
|
| ... | ... | @@ -95,27 +87,53 @@ class SandboxRemote(Sandbox): |
| 95 | 87 |
# Upload the Action message to the remote CAS server
|
| 96 | 88 |
action_digest = cascache.push_message(self._get_project(), action)
|
| 97 | 89 |
if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
|
| 98 |
- # Action push failed
|
|
| 99 |
- return None
|
|
| 90 |
+ raise SandboxError("Failed pushing build action to remote CAS.")
|
|
| 100 | 91 |
|
| 101 | 92 |
# Next, try to create a communication channel to the BuildGrid server.
|
| 102 | 93 |
channel = grpc.insecure_channel(self.server_url)
|
| 103 | 94 |
stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
| 104 | 95 |
request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
|
| 105 | 96 |
skip_cache_lookup=False)
|
| 106 |
- try:
|
|
| 107 |
- operation_iterator = stub.Execute(request)
|
|
| 108 |
- except grpc.RpcError:
|
|
| 109 |
- return None
|
|
| 97 |
+ |
|
| 98 |
+ def __run_remote_command(stub, execute_request=None, running_operation=None):
|
|
| 99 |
+ try:
|
|
| 100 |
+ last_operation = None
|
|
| 101 |
+ if execute_request is not None:
|
|
| 102 |
+ operation_iterator = stub.Execute(execute_request)
|
|
| 103 |
+ else:
|
|
| 104 |
+ request = remote_execution_pb2.WaitExecutionRequest(name=running_operation.name)
|
|
| 105 |
+ operation_iterator = stub.WaitExecution(request)
|
|
| 106 |
+ |
|
| 107 |
+ for operation in operation_iterator:
|
|
| 108 |
+ if operation.done:
|
|
| 109 |
+ return operation
|
|
| 110 |
+ else:
|
|
| 111 |
+ last_operation = operation
|
|
| 112 |
+ except grpc.RpcError as e:
|
|
| 113 |
+ status_code = e.code()
|
|
| 114 |
+ if status_code == grpc.StatusCode.UNAVAILABLE:
|
|
| 115 |
+ raise SandboxError("Failed contacting remote execution server at {}."
|
|
| 116 |
+ .format(self.server_url))
|
|
| 117 |
+ |
|
| 118 |
+ elif (status_code == grpc.StatusCode.INVALID_ARGUMENT or
|
|
| 119 |
+ status_code == grpc.StatusCode.FAILED_PRECONDITION or
|
|
| 120 |
+ status_code == grpc.StatusCode.RESOURCE_EXHAUSTED):
|
|
| 121 |
+ raise SandboxError("{} ({}).".format(e.details(), status_code.name))
|
|
| 122 |
+ |
|
| 123 |
+ elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
|
|
| 124 |
+ raise SandboxError("Failed trying to recover from connection loss: "
|
|
| 125 |
+ "server does not support operation status polling recovery.")
|
|
| 126 |
+ |
|
| 127 |
+ return last_operation
|
|
| 110 | 128 |
|
| 111 | 129 |
operation = None
|
| 112 | 130 |
with self._get_context().timed_activity("Waiting for the remote build to complete"):
|
| 113 |
- # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
|
|
| 114 |
- # which will check the server is actually contactable. However, calling it when the
|
|
| 115 |
- # server is available seems to cause .code() to hang forever.
|
|
| 116 |
- for operation in operation_iterator:
|
|
| 117 |
- if operation.done:
|
|
| 118 |
- break
|
|
| 131 |
+ operation = __run_remote_command(stub, execute_request=request)
|
|
| 132 |
+ if operation and operation.done:
|
|
| 133 |
+ return operation
|
|
| 134 |
+ |
|
| 135 |
+ while not operation.done:
|
|
| 136 |
+ operation = __run_remote_command(stub, running_operation=operation)
|
|
| 119 | 137 |
|
| 120 | 138 |
return operation
|
| 121 | 139 |
|
| ... | ... | @@ -141,7 +159,8 @@ class SandboxRemote(Sandbox): |
| 141 | 159 |
if tree_digest is None or not tree_digest.hash:
|
| 142 | 160 |
raise SandboxError("Output directory structure had no digest attached.")
|
| 143 | 161 |
|
| 144 |
- cascache = self._get_cascache()
|
|
| 162 |
+ platform = Platform.get_platform()
|
|
| 163 |
+ cascache = platform.artifactcache
|
|
| 145 | 164 |
# Now do a pull to ensure we have the necessary parts.
|
| 146 | 165 |
dir_digest = cascache.pull_tree(self._get_project(), tree_digest)
|
| 147 | 166 |
if dir_digest is None or not dir_digest.hash or not dir_digest.size_bytes:
|
| ... | ... | @@ -176,7 +195,8 @@ class SandboxRemote(Sandbox): |
| 176 | 195 |
|
| 177 | 196 |
upload_vdir.recalculate_hash()
|
| 178 | 197 |
|
| 179 |
- cascache = self._get_cascache()
|
|
| 198 |
+ platform = Platform.get_platform()
|
|
| 199 |
+ cascache = platform.artifactcache
|
|
| 180 | 200 |
# Now, push that key (without necessarily needing a ref) to the remote.
|
| 181 | 201 |
vdir_digest = cascache.push_directory(self._get_project(), upload_vdir)
|
| 182 | 202 |
if not vdir_digest or not cascache.verify_digest_pushed(self._get_project(), vdir_digest):
|
| ... | ... | @@ -201,7 +221,6 @@ class SandboxRemote(Sandbox): |
| 201 | 221 |
|
| 202 | 222 |
if operation is None:
|
| 203 | 223 |
# Failure of remote execution, usually due to an error in BuildStream
|
| 204 |
- # NB This error could be raised in __run_remote_command
|
|
| 205 | 224 |
raise SandboxError("No response returned from server")
|
| 206 | 225 |
|
| 207 | 226 |
assert not operation.HasField('error') and operation.HasField('response')
|
| ... | ... | @@ -9,8 +9,12 @@ from buildstream._context import Context |
| 9 | 9 |
from buildstream._project import Project
|
| 10 | 10 |
from buildstream.utils import _deduplicate
|
| 11 | 11 |
from buildstream import _yaml
|
| 12 |
+from buildstream._exceptions import ErrorDomain, LoadErrorReason
|
|
| 12 | 13 |
|
| 14 |
+from tests.testutils.runcli import cli
|
|
| 13 | 15 |
|
| 16 |
+ |
|
| 17 |
+DATA_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
| 14 | 18 |
cache1 = ArtifactCacheSpec(url='https://example.com/cache1', push=True)
|
| 15 | 19 |
cache2 = ArtifactCacheSpec(url='https://example.com/cache2', push=False)
|
| 16 | 20 |
cache3 = ArtifactCacheSpec(url='https://example.com/cache3', push=False)
|
| ... | ... | @@ -106,3 +110,33 @@ def test_artifact_cache_precedence(tmpdir, override_caches, project_caches, user |
| 106 | 110 |
# Verify that it was correctly read.
|
| 107 | 111 |
expected_cache_specs = list(_deduplicate(itertools.chain(override_caches, project_caches, user_caches)))
|
| 108 | 112 |
assert parsed_cache_specs == expected_cache_specs
|
| 113 |
+ |
|
| 114 |
+ |
|
| 115 |
+# Assert that if either the client key or client cert is specified
|
|
| 116 |
+# without specifying it's counterpart, we get a comprehensive LoadError
|
|
| 117 |
+# instead of an unhandled exception.
|
|
| 118 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
| 119 |
+@pytest.mark.parametrize('config_key, config_value', [
|
|
| 120 |
+ ('client-cert', 'client.crt'),
|
|
| 121 |
+ ('client-key', 'client.key')
|
|
| 122 |
+])
|
|
| 123 |
+def test_missing_certs(cli, datafiles, config_key, config_value):
|
|
| 124 |
+ project = os.path.join(datafiles.dirname, datafiles.basename, 'missing-certs')
|
|
| 125 |
+ |
|
| 126 |
+ project_conf = {
|
|
| 127 |
+ 'name': 'test',
|
|
| 128 |
+ |
|
| 129 |
+ 'artifacts': {
|
|
| 130 |
+ 'url': 'https://cache.example.com:12345',
|
|
| 131 |
+ 'push': 'true',
|
|
| 132 |
+ config_key: config_value
|
|
| 133 |
+ }
|
|
| 134 |
+ }
|
|
| 135 |
+ project_conf_file = os.path.join(project, 'project.conf')
|
|
| 136 |
+ _yaml.dump(project_conf, project_conf_file)
|
|
| 137 |
+ |
|
| 138 |
+ # Use `pull` here to ensure we try to initialize the remotes, triggering the error
|
|
| 139 |
+ #
|
|
| 140 |
+ # This does not happen for a simple `bst show`.
|
|
| 141 |
+ result = cli.run(project=project, args=['pull', 'element.bst'])
|
|
| 142 |
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
|
| 1 |
+kind: autotools
|
| ... | ... | @@ -137,7 +137,6 @@ def _test_pull(user_config_file, project_dir, artifact_dir, |
| 137 | 137 |
|
| 138 | 138 |
# Manually setup the CAS remote
|
| 139 | 139 |
cas.setup_remotes(use_config=True)
|
| 140 |
- cas.initialize_remotes()
|
|
| 141 | 140 |
|
| 142 | 141 |
if cas.has_push_remotes(element=element):
|
| 143 | 142 |
# Push the element's artifact
|
| ... | ... | @@ -274,7 +273,6 @@ def _test_push_tree(user_config_file, project_dir, artifact_dir, artifact_digest |
| 274 | 273 |
|
| 275 | 274 |
# Manually setup the CAS remote
|
| 276 | 275 |
cas.setup_remotes(use_config=True)
|
| 277 |
- cas.initialize_remotes()
|
|
| 278 | 276 |
|
| 279 | 277 |
if cas.has_push_remotes():
|
| 280 | 278 |
directory = remote_execution_pb2.Directory()
|
| ... | ... | @@ -310,7 +308,6 @@ def _test_pull_tree(user_config_file, project_dir, artifact_dir, artifact_digest |
| 310 | 308 |
|
| 311 | 309 |
# Manually setup the CAS remote
|
| 312 | 310 |
cas.setup_remotes(use_config=True)
|
| 313 |
- cas.initialize_remotes()
|
|
| 314 | 311 |
|
| 315 | 312 |
if cas.has_push_remotes():
|
| 316 | 313 |
# Pull the artifact using the Tree object
|
| 1 |
+kind: autotools
|
| 1 |
+# Test case ensuring that we can use options
|
|
| 2 |
+# in the element overrides.
|
|
| 3 |
+#
|
|
| 4 |
+name: test
|
|
| 5 |
+ |
|
| 6 |
+options:
|
|
| 7 |
+ arch:
|
|
| 8 |
+ type: arch
|
|
| 9 |
+ description: architecture
|
|
| 10 |
+ values: [i686, x86_64]
|
|
| 11 |
+ |
|
| 12 |
+elements:
|
|
| 13 |
+ autotools:
|
|
| 14 |
+ variables:
|
|
| 15 |
+ (?):
|
|
| 16 |
+ - arch == 'i686':
|
|
| 17 |
+ conf-global: --host=i686-unknown-linux-gnu
|
|
| 18 |
+ - arch == 'x86_64':
|
|
| 19 |
+ conf-global: --host=x86_64-unknown-linux-gnu
|
| 1 |
+import os
|
|
| 2 |
+import pytest
|
|
| 3 |
+from buildstream import _yaml
|
|
| 4 |
+from tests.testutils.runcli import cli
|
|
| 5 |
+ |
|
| 6 |
+# Project directory
|
|
| 7 |
+DATA_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
| 8 |
+ |
|
| 9 |
+ |
|
| 10 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
| 11 |
+@pytest.mark.parametrize("arch", [('i686'), ('x86_64')])
|
|
| 12 |
+def test_override(cli, datafiles, arch):
|
|
| 13 |
+ project = os.path.join(datafiles.dirname, datafiles.basename, 'option-overrides')
|
|
| 14 |
+ |
|
| 15 |
+ bst_args = ['--option', 'arch', arch]
|
|
| 16 |
+ bst_args += [
|
|
| 17 |
+ 'show',
|
|
| 18 |
+ '--deps', 'none',
|
|
| 19 |
+ '--format', '%{vars}',
|
|
| 20 |
+ 'element.bst'
|
|
| 21 |
+ ]
|
|
| 22 |
+ result = cli.run(project=project, silent=True, args=bst_args)
|
|
| 23 |
+ result.assert_success()
|
|
| 24 |
+ |
|
| 25 |
+ # See the associated project.conf for the expected values
|
|
| 26 |
+ expected_value = '--host={}-unknown-linux-gnu'.format(arch)
|
|
| 27 |
+ |
|
| 28 |
+ loaded = _yaml.load_data(result.output)
|
|
| 29 |
+ assert loaded['conf-global'] == expected_value
|
