Martin Blanchard pushed to branch mablanch/630-remote-execution-reconn at BuildStream / buildstream
Commits:
-
727f2faa
by Tristan Van Berkom at 2018-09-18T07:43:07Z
-
ffa0bb36
by Tristan Van Berkom at 2018-09-18T07:47:44Z
-
f2ae46f8
by Tristan Van Berkom at 2018-09-18T08:14:23Z
-
7b117e40
by Daniel Silverstone at 2018-09-18T08:41:32Z
-
345f5f49
by Daniel Silverstone at 2018-09-18T08:41:32Z
-
e32221b6
by Daniel Silverstone at 2018-09-18T08:45:50Z
-
b587579f
by Tristan Van Berkom at 2018-09-18T09:53:26Z
-
30b41959
by Tristan Van Berkom at 2018-09-18T09:56:45Z
-
41e8dc81
by Tristan Van Berkom at 2018-09-18T09:56:45Z
-
97071b6e
by Tristan Van Berkom at 2018-09-18T10:16:43Z
-
a1c6cba3
by Martin Blanchard at 2018-09-18T13:37:41Z
11 changed files:
- buildstream/_artifactcache/artifactcache.py
- buildstream/_project.py
- buildstream/sandbox/_sandboxremote.py
- tests/artifactcache/config.py
- + tests/artifactcache/missing-certs/certificates/client.crt
- + tests/artifactcache/missing-certs/certificates/client.key
- + tests/artifactcache/missing-certs/element.bst
- tests/artifactcache/pull.py
- + tests/format/option-overrides/element.bst
- + tests/format/option-overrides/project.conf
- + tests/format/optionoverrides.py
Changes:
... | ... | @@ -51,7 +51,7 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl |
51 | 51 |
url = _yaml.node_get(spec_node, str, 'url')
|
52 | 52 |
push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
|
53 | 53 |
if not url:
|
54 |
- provenance = _yaml.node_get_provenance(spec_node)
|
|
54 |
+ provenance = _yaml.node_get_provenance(spec_node, 'url')
|
|
55 | 55 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
56 | 56 |
"{}: empty artifact cache URL".format(provenance))
|
57 | 57 |
|
... | ... | @@ -67,6 +67,16 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl |
67 | 67 |
if client_cert and basedir:
|
68 | 68 |
client_cert = os.path.join(basedir, client_cert)
|
69 | 69 |
|
70 |
+ if client_key and not client_cert:
|
|
71 |
+ provenance = _yaml.node_get_provenance(spec_node, 'client-key')
|
|
72 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
73 |
+ "{}: 'client-key' was specified without 'client-cert'".format(provenance))
|
|
74 |
+ |
|
75 |
+ if client_cert and not client_key:
|
|
76 |
+ provenance = _yaml.node_get_provenance(spec_node, 'client-cert')
|
|
77 |
+ raise LoadError(LoadErrorReason.INVALID_DATA,
|
|
78 |
+ "{}: 'client-cert' was specified without 'client-key'".format(provenance))
|
|
79 |
+ |
|
70 | 80 |
return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert)
|
71 | 81 |
|
72 | 82 |
|
... | ... | @@ -91,6 +101,7 @@ class ArtifactCache(): |
91 | 101 |
self._cache_size = None # The current cache size, sometimes it's an estimate
|
92 | 102 |
self._cache_quota = None # The cache quota
|
93 | 103 |
self._cache_lower_threshold = None # The target cache size for a cleanup
|
104 |
+ self._remotes_setup = False # Check to prevent double-setup of remotes
|
|
94 | 105 |
|
95 | 106 |
os.makedirs(self.extractdir, exist_ok=True)
|
96 | 107 |
os.makedirs(self.tmpdir, exist_ok=True)
|
... | ... | @@ -143,6 +154,10 @@ class ArtifactCache(): |
143 | 154 |
#
|
144 | 155 |
def setup_remotes(self, *, use_config=False, remote_url=None):
|
145 | 156 |
|
157 |
+ # Ensure we do not double-initialise since this can be expensive
|
|
158 |
+ assert(not self._remotes_setup)
|
|
159 |
+ self._remotes_setup = True
|
|
160 |
+ |
|
146 | 161 |
# Initialize remote artifact caches. We allow the commandline to override
|
147 | 162 |
# the user config in some cases (for example `bst push --remote=...`).
|
148 | 163 |
has_remote_caches = False
|
... | ... | @@ -598,7 +598,10 @@ class Project(): |
598 | 598 |
# any conditionals specified for project option declarations,
|
599 | 599 |
# or conditionally specifying the project name; will be ignored.
|
600 | 600 |
#
|
601 |
+ # Don't forget to also resolve options in the element and source overrides.
|
|
601 | 602 |
output.options.process_node(config)
|
603 |
+ output.options.process_node(output.element_overrides)
|
|
604 |
+ output.options.process_node(output.source_overrides)
|
|
602 | 605 |
|
603 | 606 |
# Load base variables
|
604 | 607 |
output.base_variables = _yaml.node_get(config, Mapping, 'variables')
|
... | ... | @@ -27,7 +27,7 @@ from . import Sandbox |
27 | 27 |
from ..storage._filebaseddirectory import FileBasedDirectory
|
28 | 28 |
from ..storage._casbaseddirectory import CasBasedDirectory
|
29 | 29 |
from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
30 |
-from .._artifactcache.cascache import CASCache
|
|
30 |
+from .._platform import Platform
|
|
31 | 31 |
|
32 | 32 |
|
33 | 33 |
class SandboxError(Exception):
|
... | ... | @@ -43,7 +43,6 @@ class SandboxRemote(Sandbox): |
43 | 43 |
|
44 | 44 |
def __init__(self, *args, **kwargs):
|
45 | 45 |
super().__init__(*args, **kwargs)
|
46 |
- self.cascache = None
|
|
47 | 46 |
|
48 | 47 |
url = urlparse(kwargs['server_url'])
|
49 | 48 |
if not url.scheme or not url.hostname or not url.port:
|
... | ... | @@ -56,12 +55,6 @@ class SandboxRemote(Sandbox): |
56 | 55 |
|
57 | 56 |
self.server_url = '{}:{}'.format(url.hostname, url.port)
|
58 | 57 |
|
59 |
- def _get_cascache(self):
|
|
60 |
- if self.cascache is None:
|
|
61 |
- self.cascache = CASCache(self._get_context())
|
|
62 |
- self.cascache.setup_remotes(use_config=True)
|
|
63 |
- return self.cascache
|
|
64 |
- |
|
65 | 58 |
def run_remote_command(self, command, input_root_digest, working_directory, environment):
|
66 | 59 |
# Sends an execution request to the remote execution server.
|
67 | 60 |
#
|
... | ... | @@ -78,13 +71,12 @@ class SandboxRemote(Sandbox): |
78 | 71 |
output_files=[],
|
79 | 72 |
output_directories=[self._output_directory],
|
80 | 73 |
platform=None)
|
81 |
- |
|
82 |
- cascache = self._get_cascache()
|
|
74 |
+ platform = Platform.get_platform()
|
|
75 |
+ cascache = platform.artifactcache
|
|
83 | 76 |
# Upload the Command message to the remote CAS server
|
84 | 77 |
command_digest = cascache.push_message(self._get_project(), remote_command)
|
85 | 78 |
if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
|
86 |
- # Command push failed
|
|
87 |
- return None
|
|
79 |
+ raise SandboxError("Failed pushing build command to remote CAS.")
|
|
88 | 80 |
|
89 | 81 |
# Create and send the action.
|
90 | 82 |
action = remote_execution_pb2.Action(command_digest=command_digest,
|
... | ... | @@ -95,27 +87,53 @@ class SandboxRemote(Sandbox): |
95 | 87 |
# Upload the Action message to the remote CAS server
|
96 | 88 |
action_digest = cascache.push_message(self._get_project(), action)
|
97 | 89 |
if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
|
98 |
- # Action push failed
|
|
99 |
- return None
|
|
90 |
+ raise SandboxError("Failed pushing build action to remote CAS.")
|
|
100 | 91 |
|
101 | 92 |
# Next, try to create a communication channel to the BuildGrid server.
|
102 | 93 |
channel = grpc.insecure_channel(self.server_url)
|
103 | 94 |
stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
104 | 95 |
request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
|
105 | 96 |
skip_cache_lookup=False)
|
106 |
- try:
|
|
107 |
- operation_iterator = stub.Execute(request)
|
|
108 |
- except grpc.RpcError:
|
|
109 |
- return None
|
|
97 |
+ |
|
98 |
+ def __run_remote_command(stub, execute_request=None, running_operation=None):
|
|
99 |
+ try:
|
|
100 |
+ last_operation = None
|
|
101 |
+ if execute_request is not None:
|
|
102 |
+ operation_iterator = stub.Execute(execute_request)
|
|
103 |
+ else:
|
|
104 |
+ request = remote_execution_pb2.WaitExecutionRequest(name=running_operation.name)
|
|
105 |
+ operation_iterator = stub.WaitExecution(request)
|
|
106 |
+ |
|
107 |
+ for operation in operation_iterator:
|
|
108 |
+ if operation.done:
|
|
109 |
+ return operation
|
|
110 |
+ else:
|
|
111 |
+ last_operation = operation
|
|
112 |
+ except grpc.RpcError as e:
|
|
113 |
+ status_code = e.code()
|
|
114 |
+ if status_code == grpc.StatusCode.UNAVAILABLE:
|
|
115 |
+ raise SandboxError("Failed contacting remote execution server at {}."
|
|
116 |
+ .format(self.server_url))
|
|
117 |
+ |
|
118 |
+ elif (status_code == grpc.StatusCode.INVALID_ARGUMENT or
|
|
119 |
+ status_code == grpc.StatusCode.FAILED_PRECONDITION or
|
|
120 |
+ status_code == grpc.StatusCode.RESOURCE_EXHAUSTED):
|
|
121 |
+ raise SandboxError("{} ({}).".format(e.details(), status_code.name))
|
|
122 |
+ |
|
123 |
+ elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
|
|
124 |
+ raise SandboxError("Failed trying to recover from connection loss: "
|
|
125 |
+ "server does not support operation status polling recovery.")
|
|
126 |
+ |
|
127 |
+ return last_operation
|
|
110 | 128 |
|
111 | 129 |
operation = None
|
112 | 130 |
with self._get_context().timed_activity("Waiting for the remote build to complete"):
|
113 |
- # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
|
|
114 |
- # which will check the server is actually contactable. However, calling it when the
|
|
115 |
- # server is available seems to cause .code() to hang forever.
|
|
116 |
- for operation in operation_iterator:
|
|
117 |
- if operation.done:
|
|
118 |
- break
|
|
131 |
+ operation = __run_remote_command(stub, execute_request=request)
|
|
132 |
+ if operation and operation.done:
|
|
133 |
+ return operation
|
|
134 |
+ |
|
135 |
+ while not operation.done:
|
|
136 |
+ operation = __run_remote_command(stub, running_operation=operation)
|
|
119 | 137 |
|
120 | 138 |
return operation
|
121 | 139 |
|
... | ... | @@ -141,7 +159,8 @@ class SandboxRemote(Sandbox): |
141 | 159 |
if tree_digest is None or not tree_digest.hash:
|
142 | 160 |
raise SandboxError("Output directory structure had no digest attached.")
|
143 | 161 |
|
144 |
- cascache = self._get_cascache()
|
|
162 |
+ platform = Platform.get_platform()
|
|
163 |
+ cascache = platform.artifactcache
|
|
145 | 164 |
# Now do a pull to ensure we have the necessary parts.
|
146 | 165 |
dir_digest = cascache.pull_tree(self._get_project(), tree_digest)
|
147 | 166 |
if dir_digest is None or not dir_digest.hash or not dir_digest.size_bytes:
|
... | ... | @@ -176,7 +195,8 @@ class SandboxRemote(Sandbox): |
176 | 195 |
|
177 | 196 |
upload_vdir.recalculate_hash()
|
178 | 197 |
|
179 |
- cascache = self._get_cascache()
|
|
198 |
+ platform = Platform.get_platform()
|
|
199 |
+ cascache = platform.artifactcache
|
|
180 | 200 |
# Now, push that key (without necessarily needing a ref) to the remote.
|
181 | 201 |
vdir_digest = cascache.push_directory(self._get_project(), upload_vdir)
|
182 | 202 |
if not vdir_digest or not cascache.verify_digest_pushed(self._get_project(), vdir_digest):
|
... | ... | @@ -201,7 +221,6 @@ class SandboxRemote(Sandbox): |
201 | 221 |
|
202 | 222 |
if operation is None:
|
203 | 223 |
# Failure of remote execution, usually due to an error in BuildStream
|
204 |
- # NB This error could be raised in __run_remote_command
|
|
205 | 224 |
raise SandboxError("No response returned from server")
|
206 | 225 |
|
207 | 226 |
assert not operation.HasField('error') and operation.HasField('response')
|
... | ... | @@ -9,8 +9,12 @@ from buildstream._context import Context |
9 | 9 |
from buildstream._project import Project
|
10 | 10 |
from buildstream.utils import _deduplicate
|
11 | 11 |
from buildstream import _yaml
|
12 |
+from buildstream._exceptions import ErrorDomain, LoadErrorReason
|
|
12 | 13 |
|
14 |
+from tests.testutils.runcli import cli
|
|
13 | 15 |
|
16 |
+ |
|
17 |
+DATA_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
14 | 18 |
cache1 = ArtifactCacheSpec(url='https://example.com/cache1', push=True)
|
15 | 19 |
cache2 = ArtifactCacheSpec(url='https://example.com/cache2', push=False)
|
16 | 20 |
cache3 = ArtifactCacheSpec(url='https://example.com/cache3', push=False)
|
... | ... | @@ -106,3 +110,33 @@ def test_artifact_cache_precedence(tmpdir, override_caches, project_caches, user |
106 | 110 |
# Verify that it was correctly read.
|
107 | 111 |
expected_cache_specs = list(_deduplicate(itertools.chain(override_caches, project_caches, user_caches)))
|
108 | 112 |
assert parsed_cache_specs == expected_cache_specs
|
113 |
+ |
|
114 |
+ |
|
115 |
+# Assert that if either the client key or client cert is specified
|
|
116 |
+# without specifying it's counterpart, we get a comprehensive LoadError
|
|
117 |
+# instead of an unhandled exception.
|
|
118 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
119 |
+@pytest.mark.parametrize('config_key, config_value', [
|
|
120 |
+ ('client-cert', 'client.crt'),
|
|
121 |
+ ('client-key', 'client.key')
|
|
122 |
+])
|
|
123 |
+def test_missing_certs(cli, datafiles, config_key, config_value):
|
|
124 |
+ project = os.path.join(datafiles.dirname, datafiles.basename, 'missing-certs')
|
|
125 |
+ |
|
126 |
+ project_conf = {
|
|
127 |
+ 'name': 'test',
|
|
128 |
+ |
|
129 |
+ 'artifacts': {
|
|
130 |
+ 'url': 'https://cache.example.com:12345',
|
|
131 |
+ 'push': 'true',
|
|
132 |
+ config_key: config_value
|
|
133 |
+ }
|
|
134 |
+ }
|
|
135 |
+ project_conf_file = os.path.join(project, 'project.conf')
|
|
136 |
+ _yaml.dump(project_conf, project_conf_file)
|
|
137 |
+ |
|
138 |
+ # Use `pull` here to ensure we try to initialize the remotes, triggering the error
|
|
139 |
+ #
|
|
140 |
+ # This does not happen for a simple `bst show`.
|
|
141 |
+ result = cli.run(project=project, args=['pull', 'element.bst'])
|
|
142 |
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
|
1 |
+kind: autotools
|
... | ... | @@ -137,7 +137,6 @@ def _test_pull(user_config_file, project_dir, artifact_dir, |
137 | 137 |
|
138 | 138 |
# Manually setup the CAS remote
|
139 | 139 |
cas.setup_remotes(use_config=True)
|
140 |
- cas.initialize_remotes()
|
|
141 | 140 |
|
142 | 141 |
if cas.has_push_remotes(element=element):
|
143 | 142 |
# Push the element's artifact
|
... | ... | @@ -274,7 +273,6 @@ def _test_push_tree(user_config_file, project_dir, artifact_dir, artifact_digest |
274 | 273 |
|
275 | 274 |
# Manually setup the CAS remote
|
276 | 275 |
cas.setup_remotes(use_config=True)
|
277 |
- cas.initialize_remotes()
|
|
278 | 276 |
|
279 | 277 |
if cas.has_push_remotes():
|
280 | 278 |
directory = remote_execution_pb2.Directory()
|
... | ... | @@ -310,7 +308,6 @@ def _test_pull_tree(user_config_file, project_dir, artifact_dir, artifact_digest |
310 | 308 |
|
311 | 309 |
# Manually setup the CAS remote
|
312 | 310 |
cas.setup_remotes(use_config=True)
|
313 |
- cas.initialize_remotes()
|
|
314 | 311 |
|
315 | 312 |
if cas.has_push_remotes():
|
316 | 313 |
# Pull the artifact using the Tree object
|
1 |
+kind: autotools
|
1 |
+# Test case ensuring that we can use options
|
|
2 |
+# in the element overrides.
|
|
3 |
+#
|
|
4 |
+name: test
|
|
5 |
+ |
|
6 |
+options:
|
|
7 |
+ arch:
|
|
8 |
+ type: arch
|
|
9 |
+ description: architecture
|
|
10 |
+ values: [i686, x86_64]
|
|
11 |
+ |
|
12 |
+elements:
|
|
13 |
+ autotools:
|
|
14 |
+ variables:
|
|
15 |
+ (?):
|
|
16 |
+ - arch == 'i686':
|
|
17 |
+ conf-global: --host=i686-unknown-linux-gnu
|
|
18 |
+ - arch == 'x86_64':
|
|
19 |
+ conf-global: --host=x86_64-unknown-linux-gnu
|
1 |
+import os
|
|
2 |
+import pytest
|
|
3 |
+from buildstream import _yaml
|
|
4 |
+from tests.testutils.runcli import cli
|
|
5 |
+ |
|
6 |
+# Project directory
|
|
7 |
+DATA_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
8 |
+ |
|
9 |
+ |
|
10 |
+@pytest.mark.datafiles(DATA_DIR)
|
|
11 |
+@pytest.mark.parametrize("arch", [('i686'), ('x86_64')])
|
|
12 |
+def test_override(cli, datafiles, arch):
|
|
13 |
+ project = os.path.join(datafiles.dirname, datafiles.basename, 'option-overrides')
|
|
14 |
+ |
|
15 |
+ bst_args = ['--option', 'arch', arch]
|
|
16 |
+ bst_args += [
|
|
17 |
+ 'show',
|
|
18 |
+ '--deps', 'none',
|
|
19 |
+ '--format', '%{vars}',
|
|
20 |
+ 'element.bst'
|
|
21 |
+ ]
|
|
22 |
+ result = cli.run(project=project, silent=True, args=bst_args)
|
|
23 |
+ result.assert_success()
|
|
24 |
+ |
|
25 |
+ # See the associated project.conf for the expected values
|
|
26 |
+ expected_value = '--host={}-unknown-linux-gnu'.format(arch)
|
|
27 |
+ |
|
28 |
+ loaded = _yaml.load_data(result.output)
|
|
29 |
+ assert loaded['conf-global'] == expected_value
|