Jim MacArthur pushed to branch jmac/remote_execution_client at BuildStream / buildstream
Commits:
-
f072a7fe
by Jim MacArthur at 2018-08-21T11:37:40Z
-
bedaaadf
by Jim MacArthur at 2018-08-21T11:37:40Z
-
dcecf1a6
by Jim MacArthur at 2018-08-21T11:37:40Z
-
82751700
by Jim MacArthur at 2018-08-21T11:37:40Z
-
51ffdb1e
by Jim MacArthur at 2018-08-21T11:37:40Z
-
f1a1b42c
by Jim MacArthur at 2018-08-21T11:37:40Z
-
e91c294a
by Jim MacArthur at 2018-08-21T12:09:22Z
-
fa54d424
by Jim MacArthur at 2018-08-21T13:44:32Z
-
d92d13bc
by Jim MacArthur at 2018-08-21T13:46:36Z
-
7ca3af4b
by Jim MacArthur at 2018-08-21T13:47:23Z
-
d4312952
by Jim MacArthur at 2018-08-21T14:19:45Z
-
e74f78eb
by Jim MacArthur at 2018-08-21T14:20:09Z
10 changed files:
- buildstream/_artifactcache/cascache.py
- buildstream/_loader/loadelement.py
- buildstream/_loader/types.py
- buildstream/_project.py
- buildstream/data/projectconfig.yaml
- buildstream/element.py
- buildstream/plugins/elements/autotools.py
- buildstream/sandbox/__init__.py
- + buildstream/sandbox/_sandboxremote.py
- doc/source/format_project.rst
Changes:
... | ... | @@ -326,6 +326,7 @@ class CASCache(ArtifactCache): |
326 | 326 |
uuid_ = uuid.uuid4()
|
327 | 327 |
resource_name = '/'.join(['uploads', str(uuid_), 'blobs',
|
328 | 328 |
digest.hash, str(digest.size_bytes)])
|
329 |
+ |
|
329 | 330 |
def request_stream():
|
330 | 331 |
with open(self.objpath(digest), 'rb') as f:
|
331 | 332 |
assert os.fstat(f.fileno()).st_size == digest.size_bytes
|
... | ... | @@ -71,7 +71,7 @@ class LoadElement(): |
71 | 71 |
'kind', 'depends', 'sources', 'sandbox',
|
72 | 72 |
'variables', 'environment', 'environment-nocache',
|
73 | 73 |
'config', 'public', 'description',
|
74 |
- 'build-depends', 'runtime-depends',
|
|
74 |
+ 'build-depends', 'runtime-depends'
|
|
75 | 75 |
])
|
76 | 76 |
|
77 | 77 |
# Extract the Dependencies
|
... | ... | @@ -41,6 +41,7 @@ class Symbol(): |
41 | 41 |
DIRECTORY = "directory"
|
42 | 42 |
JUNCTION = "junction"
|
43 | 43 |
SANDBOX = "sandbox"
|
44 |
+ REMOTE_EXECUTION = "remote-execution"
|
|
44 | 45 |
|
45 | 46 |
|
46 | 47 |
# Dependency()
|
... | ... | @@ -129,6 +129,7 @@ class Project(): |
129 | 129 |
|
130 | 130 |
self.artifact_cache_specs = None
|
131 | 131 |
self._sandbox = None
|
132 |
+ self._remote_execution = None
|
|
132 | 133 |
self._splits = None
|
133 | 134 |
|
134 | 135 |
self._context.add_project(self)
|
... | ... | @@ -460,7 +461,7 @@ class Project(): |
460 | 461 |
'aliases', 'name',
|
461 | 462 |
'artifacts', 'options',
|
462 | 463 |
'fail-on-overlap', 'shell', 'fatal-warnings',
|
463 |
- 'ref-storage', 'sandbox', 'mirrors'
|
|
464 |
+ 'ref-storage', 'sandbox', 'mirrors', 'remote-execution'
|
|
464 | 465 |
])
|
465 | 466 |
|
466 | 467 |
#
|
... | ... | @@ -478,6 +479,9 @@ class Project(): |
478 | 479 |
# Load sandbox configuration
|
479 | 480 |
self._sandbox = _yaml.node_get(config, Mapping, 'sandbox')
|
480 | 481 |
|
482 |
+ # Load remote execution configuration
|
|
483 |
+ self._remote_execution = _yaml.node_get(config, Mapping, 'remote-execution')
|
|
484 |
+ |
|
481 | 485 |
# Load project split rules
|
482 | 486 |
self._splits = _yaml.node_get(config, Mapping, 'split-rules')
|
483 | 487 |
|
... | ... | @@ -204,3 +204,6 @@ shell: |
204 | 204 |
# Command to run when `bst shell` does not provide a command
|
205 | 205 |
#
|
206 | 206 |
command: [ 'sh', '-i' ]
|
207 |
+ |
|
208 |
+remote-execution:
|
|
209 |
+ url: ""
|
|
\ No newline at end of file |
... | ... | @@ -95,6 +95,7 @@ from . import _site |
95 | 95 |
from ._platform import Platform
|
96 | 96 |
from .plugin import CoreWarnings
|
97 | 97 |
from .sandbox._config import SandboxConfig
|
98 |
+from .sandbox._sandboxremote import SandboxRemote
|
|
98 | 99 |
|
99 | 100 |
from .storage.directory import Directory
|
100 | 101 |
from .storage._filebaseddirectory import FileBasedDirectory
|
... | ... | @@ -250,6 +251,9 @@ class Element(Plugin): |
250 | 251 |
# Extract Sandbox config
|
251 | 252 |
self.__sandbox_config = self.__extract_sandbox_config(meta)
|
252 | 253 |
|
254 |
+ # Extract remote execution URL
|
|
255 |
+ self.__remote_execution_url = self.__extract_remote_execution_config(meta)
|
|
256 |
+ |
|
253 | 257 |
def __lt__(self, other):
|
254 | 258 |
return self.name < other.name
|
255 | 259 |
|
... | ... | @@ -1545,6 +1549,8 @@ class Element(Plugin): |
1545 | 1549 |
finally:
|
1546 | 1550 |
if collect is not None:
|
1547 | 1551 |
try:
|
1552 |
+ # Sandbox will probably have replaced its virtual directory, so get it again
|
|
1553 |
+ sandbox_vroot = sandbox.get_virtual_directory()
|
|
1548 | 1554 |
collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
|
1549 | 1555 |
except VirtualDirectoryError:
|
1550 | 1556 |
# No collect directory existed
|
... | ... | @@ -2117,7 +2123,24 @@ class Element(Plugin): |
2117 | 2123 |
project = self._get_project()
|
2118 | 2124 |
platform = Platform.get_platform()
|
2119 | 2125 |
|
2120 |
- if directory is not None and os.path.exists(directory):
|
|
2126 |
+ if self.__remote_execution_url and self.BST_VIRTUAL_DIRECTORY:
|
|
2127 |
+ if not self.__artifacts.has_push_remotes(element=self):
|
|
2128 |
+ # Give an early warning if remote execution will not work
|
|
2129 |
+ raise ElementError("Artifact {} is configured to use remote execution but has no push remotes. "
|
|
2130 |
+ .format(self.name) +
|
|
2131 |
+ "The remote artifact server(s) may not be correctly configured or contactable.")
|
|
2132 |
+ |
|
2133 |
+ self.info("Using a remote 'sandbox' for artifact {}".format(self.name))
|
|
2134 |
+ sandbox = SandboxRemote(context, project,
|
|
2135 |
+ directory,
|
|
2136 |
+ stdout=stdout,
|
|
2137 |
+ stderr=stderr,
|
|
2138 |
+ config=config,
|
|
2139 |
+ server_url=self.__remote_execution_url,
|
|
2140 |
+ allow_real_directory=False)
|
|
2141 |
+ yield sandbox
|
|
2142 |
+ elif directory is not None and os.path.exists(directory):
|
|
2143 |
+ self.info("Using a local sandbox for artifact {}".format(self.name))
|
|
2121 | 2144 |
sandbox = platform.create_sandbox(context, project,
|
2122 | 2145 |
directory,
|
2123 | 2146 |
stdout=stdout,
|
... | ... | @@ -2289,6 +2312,18 @@ class Element(Plugin): |
2289 | 2312 |
return SandboxConfig(self.node_get_member(sandbox_config, int, 'build-uid'),
|
2290 | 2313 |
self.node_get_member(sandbox_config, int, 'build-gid'))
|
2291 | 2314 |
|
2315 |
+ def __extract_remote_execution_config(self, meta):
|
|
2316 |
+ if self.__is_junction:
|
|
2317 |
+ return ''
|
|
2318 |
+ else:
|
|
2319 |
+ project = self._get_project()
|
|
2320 |
+ project.ensure_fully_loaded()
|
|
2321 |
+ if project._remote_execution:
|
|
2322 |
+ rexec_config = _yaml.node_chain_copy(project._remote_execution)
|
|
2323 |
+ return self.node_get_member(rexec_config, str, 'url')
|
|
2324 |
+ else:
|
|
2325 |
+ return ''
|
|
2326 |
+ |
|
2292 | 2327 |
# This makes a special exception for the split rules, which
|
2293 | 2328 |
# elements may extend but whos defaults are defined in the project.
|
2294 | 2329 |
#
|
... | ... | @@ -57,7 +57,7 @@ from buildstream import BuildElement |
57 | 57 |
|
58 | 58 |
# Element implementation for the 'autotools' kind.
|
59 | 59 |
class AutotoolsElement(BuildElement):
|
60 |
- pass
|
|
60 |
+ BST_VIRTUAL_DIRECTORY = True
|
|
61 | 61 |
|
62 | 62 |
|
63 | 63 |
# Plugin entry point
|
... | ... | @@ -20,3 +20,4 @@ |
20 | 20 |
from .sandbox import Sandbox, SandboxFlags
|
21 | 21 |
from ._sandboxchroot import SandboxChroot
|
22 | 22 |
from ._sandboxbwrap import SandboxBwrap
|
23 |
+from ._sandboxremote import SandboxRemote
|
1 |
+#!/usr/bin/env python3
|
|
2 |
+#
|
|
3 |
+# Copyright (C) 2018 Codethink Limited
|
|
4 |
+#
|
|
5 |
+# This program is free software; you can redistribute it and/or
|
|
6 |
+# modify it under the terms of the GNU Lesser General Public
|
|
7 |
+# License as published by the Free Software Foundation; either
|
|
8 |
+# version 2 of the License, or (at your option) any later version.
|
|
9 |
+#
|
|
10 |
+# This library is distributed in the hope that it will be useful,
|
|
11 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
13 |
+# Lesser General Public License for more details.
|
|
14 |
+#
|
|
15 |
+# You should have received a copy of the GNU Lesser General Public
|
|
16 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
17 |
+#
|
|
18 |
+# Authors:
|
|
19 |
+# Jim MacArthur <jim macarthur codethink co uk>
|
|
20 |
+ |
|
21 |
+import os
|
|
22 |
+import re
|
|
23 |
+ |
|
24 |
+import grpc
|
|
25 |
+ |
|
26 |
+from . import Sandbox
|
|
27 |
+from ..storage._filebaseddirectory import FileBasedDirectory
|
|
28 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
29 |
+from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
30 |
+ |
|
31 |
+from .._artifactcache.cascache import CASCache
|
|
32 |
+ |
|
33 |
+ |
|
34 |
+class SandboxError(Exception):
|
|
35 |
+ pass
|
|
36 |
+ |
|
37 |
+ |
|
38 |
+# SandboxRemote()
|
|
39 |
+#
|
|
40 |
+# This isn't really a sandbox, it's a stub which sends all the source
|
|
41 |
+# to a remote server and retrieves the results from it.
|
|
42 |
+#
|
|
43 |
+class SandboxRemote(Sandbox):
|
|
44 |
+ |
|
45 |
+ def __init__(self, *args, **kwargs):
|
|
46 |
+ super().__init__(*args, **kwargs)
|
|
47 |
+ self.cascache = None
|
|
48 |
+ self.server_url = kwargs['server_url']
|
|
49 |
+ # Check the format of the url ourselves to save the user from
|
|
50 |
+ # whatever error messages grpc will produce
|
|
51 |
+ m = re.match(r'^(.+):(\d+)$', self.server_url)
|
|
52 |
+ if m is None:
|
|
53 |
+ raise SandboxError("Configured remote URL '{}' does not match the expected layout. "
|
|
54 |
+ .format(self.server_url) +
|
|
55 |
+ "It should be of the form <protocol>://<domain name>:<port>.")
|
|
56 |
+ |
|
57 |
+ def _get_cascache(self):
|
|
58 |
+ if self.cascache is None:
|
|
59 |
+ self.cascache = CASCache(self._get_context())
|
|
60 |
+ self.cascache.setup_remotes(use_config=True)
|
|
61 |
+ return self.cascache
|
|
62 |
+ |
|
63 |
+ def __run_remote_command(self, cascache, command, input_root_digest, environment):
|
|
64 |
+ |
|
65 |
+ environment_variables = [remote_execution_pb2.Command.
|
|
66 |
+ EnvironmentVariable(name=k, value=v)
|
|
67 |
+ for (k, v) in environment.items()]
|
|
68 |
+ |
|
69 |
+ # Create and send the Command object.
|
|
70 |
+ remote_command = remote_execution_pb2.Command(arguments=command, environment_variables=environment_variables,
|
|
71 |
+ output_files=[],
|
|
72 |
+ output_directories=[self._output_directory],
|
|
73 |
+ platform=None)
|
|
74 |
+ command_digest = cascache.add_object(buffer=remote_command.SerializeToString())
|
|
75 |
+ command_ref = 'worker-command/{}'.format(command_digest.hash)
|
|
76 |
+ cascache.set_ref(command_ref, command_digest)
|
|
77 |
+ |
|
78 |
+ command_push_successful = cascache.push_refs([command_ref], self._get_project(), may_have_dependencies=False)
|
|
79 |
+ if not command_push_successful and not cascache.verify_key_pushed(command_ref, self._get_project()):
|
|
80 |
+ # Command push failed
|
|
81 |
+ return None
|
|
82 |
+ |
|
83 |
+ # Create and send the action.
|
|
84 |
+ |
|
85 |
+ action = remote_execution_pb2.Action(command_digest=command_digest,
|
|
86 |
+ input_root_digest=input_root_digest,
|
|
87 |
+ timeout=None,
|
|
88 |
+ do_not_cache=True)
|
|
89 |
+ |
|
90 |
+ action_digest = cascache.add_object(buffer=action.SerializeToString())
|
|
91 |
+ action_ref = 'worker-action/{}'.format(command_digest.hash)
|
|
92 |
+ cascache.set_ref(action_ref, action_digest)
|
|
93 |
+ action_push_successful = cascache.push_refs([action_ref], self._get_project(), may_have_dependencies=False)
|
|
94 |
+ |
|
95 |
+ if not action_push_successful and not cascache.verify_key_pushed(action_ref, self._get_project()):
|
|
96 |
+ # Action push failed
|
|
97 |
+ return None
|
|
98 |
+ |
|
99 |
+ # Next, try to create a communication channel to the BuildGrid server.
|
|
100 |
+ |
|
101 |
+ channel = grpc.insecure_channel(self.server_url)
|
|
102 |
+ stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
|
103 |
+ request = remote_execution_pb2.ExecuteRequest(instance_name='default',
|
|
104 |
+ action_digest=action_digest,
|
|
105 |
+ skip_cache_lookup=True)
|
|
106 |
+ |
|
107 |
+ operation_iterator = stub.Execute(request)
|
|
108 |
+ operation = None
|
|
109 |
+ with self._get_context().timed_activity("Waiting for the remote build to complete"):
|
|
110 |
+ # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
|
|
111 |
+ # which will check the server is actually contactable. However, calling it when the
|
|
112 |
+ # server is available seems to cause .code() to hang forever.
|
|
113 |
+ for operation in operation_iterator:
|
|
114 |
+ if operation.done:
|
|
115 |
+ break
|
|
116 |
+ return operation
|
|
117 |
+ |
|
118 |
+ def process_job_output(self, output_directories, output_files):
|
|
119 |
+ # output_directories is an array of OutputDirectory objects.
|
|
120 |
+ # output_files is an array of OutputFile objects.
|
|
121 |
+ #
|
|
122 |
+ # We only specify one output_directory, so it's an error
|
|
123 |
+ # for there to be any output files or more than one directory at the moment.
|
|
124 |
+ |
|
125 |
+ if output_files:
|
|
126 |
+ raise SandboxError("Output files were returned when we didn't request any.")
|
|
127 |
+ elif len(output_directories) > 1:
|
|
128 |
+ error_text = "More than one output directory was returned from the build server: {}"
|
|
129 |
+ raise SandboxError(error_text.format(output_directories))
|
|
130 |
+ elif len(output_directories) < 1: # pylint: disable=len-as-condition
|
|
131 |
+ error_text = "No output directory was returned from the build server."
|
|
132 |
+ raise SandboxError(error_text)
|
|
133 |
+ |
|
134 |
+ digest = output_directories[0].tree_digest
|
|
135 |
+ if digest is None or digest.hash is None or digest.hash == "":
|
|
136 |
+ raise SandboxError("Output directory structure had no digest attached.")
|
|
137 |
+ |
|
138 |
+ # Now do a pull to ensure we have the necessary parts.
|
|
139 |
+ cascache = self._get_cascache()
|
|
140 |
+ cascache.pull_key(digest.hash, digest.size_bytes, self._get_project())
|
|
141 |
+ path_components = os.path.split(self._output_directory)
|
|
142 |
+ |
|
143 |
+ # Now what we have is a digest for the output. Once we return, the calling process will
|
|
144 |
+ # attempt to descend into our directory and find that directory, so we need to overwrite
|
|
145 |
+ # that.
|
|
146 |
+ |
|
147 |
+ if not path_components:
|
|
148 |
+ # The artifact wants the whole directory; we could just return the returned hash in its
|
|
149 |
+ # place, but we don't have a means to do that yet.
|
|
150 |
+ raise SandboxError("Unimplemented: Output directory is empty or equal to the sandbox root.")
|
|
151 |
+ |
|
152 |
+ # At the moment, we will get the whole directory back in the first directory argument and we need
|
|
153 |
+ # to replace the sandbox's virtual directory with that. Creating a new virtual directory object
|
|
154 |
+ # from another hash will be interesting, though...
|
|
155 |
+ |
|
156 |
+ new_dir = CasBasedDirectory(self._get_context(), ref=digest)
|
|
157 |
+ self._set_virtual_directory(new_dir)
|
|
158 |
+ |
|
159 |
+ def run(self, command, flags, *, cwd=None, env=None):
|
|
160 |
+ # Upload sources
|
|
161 |
+ upload_vdir = self.get_virtual_directory()
|
|
162 |
+ |
|
163 |
+ if isinstance(upload_vdir, FileBasedDirectory):
|
|
164 |
+ # Make a new temporary directory to put source in
|
|
165 |
+ upload_vdir = CasBasedDirectory(self._get_context(), ref=None)
|
|
166 |
+ upload_vdir.import_files(self.get_virtual_directory()._get_underlying_directory())
|
|
167 |
+ |
|
168 |
+ # Now, push that key (without necessarily needing a ref) to the remote.
|
|
169 |
+ cascache = self._get_cascache()
|
|
170 |
+ |
|
171 |
+ ref = 'worker-source/{}'.format(upload_vdir.ref.hash)
|
|
172 |
+ upload_vdir._save(ref)
|
|
173 |
+ source_push_successful = cascache.push_refs([ref], self._get_project())
|
|
174 |
+ |
|
175 |
+ # Set up environment and PWD
|
|
176 |
+ if env is None:
|
|
177 |
+ env = self._get_environment()
|
|
178 |
+ if 'PWD' not in env:
|
|
179 |
+ env['PWD'] = self._get_work_directory()
|
|
180 |
+ |
|
181 |
+ # We want command args as a list of strings
|
|
182 |
+ if isinstance(command, str):
|
|
183 |
+ command = [command]
|
|
184 |
+ |
|
185 |
+ # Now transmit the command to execute
|
|
186 |
+ if source_push_successful or cascache.verify_key_pushed(ref, self._get_project()):
|
|
187 |
+ response = self.__run_remote_command(cascache, command, upload_vdir.ref, env)
|
|
188 |
+ |
|
189 |
+ if response is None:
|
|
190 |
+ # Failure of remote execution, usually due to an error in BuildStream
|
|
191 |
+ # NB This error could be raised in __run_remote_command
|
|
192 |
+ raise SandboxError("No response returned from server")
|
|
193 |
+ |
|
194 |
+ assert(response.HasField("error") or response.HasField("response"))
|
|
195 |
+ |
|
196 |
+ if response.HasField("error"):
|
|
197 |
+ # A normal error during the build: the remote execution system
|
|
198 |
+ # has worked correctly but the command failed.
|
|
199 |
+ # response.error also contains 'message' (str) and 'details'
|
|
200 |
+ # (iterator of Any) which we ignore at the moment.
|
|
201 |
+ return response.error.code
|
|
202 |
+ else:
|
|
203 |
+ |
|
204 |
+ # At the moment, response can either be an
|
|
205 |
+ # ExecutionResponse containing an ActionResult, or an
|
|
206 |
+ # ActionResult directly.
|
|
207 |
+ executeResponse = remote_execution_pb2.ExecuteResponse()
|
|
208 |
+ if response.response.Is(executeResponse.DESCRIPTOR):
|
|
209 |
+ # Unpack ExecuteResponse and set response to its response
|
|
210 |
+ response.response.Unpack(executeResponse)
|
|
211 |
+ response = executeResponse
|
|
212 |
+ |
|
213 |
+ actionResult = remote_execution_pb2.ActionResult()
|
|
214 |
+ if response.response.Is(actionResult.DESCRIPTOR):
|
|
215 |
+ response.response.Unpack(actionResult)
|
|
216 |
+ self.process_job_output(actionResult.output_directories, actionResult.output_files)
|
|
217 |
+ else:
|
|
218 |
+ raise SandboxError("Received unknown message from server (expected ExecutionResponse).")
|
|
219 |
+ else:
|
|
220 |
+ raise SandboxError("Failed to verify that source has been pushed to the remote artifact cache.")
|
|
221 |
+ return 0
|
... | ... | @@ -204,6 +204,23 @@ with an artifact share. |
204 | 204 |
You can also specify a list of caches here; earlier entries in the list
|
205 | 205 |
will have higher priority than later ones.
|
206 | 206 |
|
207 |
+Remote execution
|
|
208 |
+~~~~~~~~~~~~~~~~
|
|
209 |
+Buildstream supports remote execution using the Google Remote Execution API
|
|
210 |
+(REAPI). A description of how remote execution works is beyond the scope
|
|
211 |
+of this document, but you can specify a remote server complying with the REAPI
|
|
212 |
+using the `remote-execution` option:
|
|
213 |
+ |
|
214 |
+.. code:: yaml
|
|
215 |
+ |
|
216 |
+ remote-execution:
|
|
217 |
+ |
|
218 |
+ # A url defining a remote execution server
|
|
219 |
+ url: buildserver.example.com:50051
|
|
220 |
+ |
|
221 |
+The url should be a hostname and port separated by ':'. Do not include a protocol.
|
|
222 |
+ |
|
223 |
+The Remote Execution API can be found via https://github.com/bazelbuild/remote-apis.
|
|
207 | 224 |
|
208 | 225 |
.. _project_essentials_mirrors:
|
209 | 226 |
|