Jim MacArthur pushed to branch jmac/remote_execution_rebase at BuildStream / buildstream
Commits:
-
9b3d123f
by Jim MacArthur at 2018-08-02T16:36:10Z
-
10d8ecc7
by Jim MacArthur at 2018-08-02T16:36:25Z
-
f60a84e2
by Jim MacArthur at 2018-08-02T16:38:00Z
-
45df1aaf
by Jim MacArthur at 2018-08-02T16:38:24Z
-
583ebb36
by Jim MacArthur at 2018-08-02T16:38:33Z
-
9fedbbd5
by Jim MacArthur at 2018-08-02T16:38:41Z
-
853c2aed
by Jim MacArthur at 2018-08-02T16:38:55Z
-
cc8206cf
by Jim MacArthur at 2018-08-02T16:39:07Z
-
15459a0f
by Jim MacArthur at 2018-08-03T08:43:10Z
-
80169eaf
by Jim MacArthur at 2018-08-03T08:43:22Z
6 changed files:
- buildstream/_artifactcache/cascache.py
- buildstream/_platform/linux.py
- buildstream/buildelement.py
- buildstream/sandbox/__init__.py
- + buildstream/sandbox/_sandboxremote.py
- buildstream/sandbox/sandbox.py
Changes:
... | ... | @@ -213,6 +213,30 @@ class CASCache(ArtifactCache): |
213 | 213 |
remotes_for_project = self._remotes[element._get_project()]
|
214 | 214 |
return any(remote.spec.push for remote in remotes_for_project)
|
215 | 215 |
|
216 |
+ |
|
217 |
+ def pull_key(self, key, size_bytes, project):
|
|
218 |
+ """ Pull a single key rather than an artifact.
|
|
219 |
+ Does not update local refs. """
|
|
220 |
+ |
|
221 |
+ for remote in self._remotes[project]:
|
|
222 |
+ try:
|
|
223 |
+ remote.init()
|
|
224 |
+ |
|
225 |
+ tree = remote_execution_pb2.Digest()
|
|
226 |
+ tree.hash = key
|
|
227 |
+ tree.size_bytes = size_bytes
|
|
228 |
+ |
|
229 |
+ self._fetch_directory(remote, tree)
|
|
230 |
+ |
|
231 |
+ # no need to pull from additional remotes
|
|
232 |
+ return True
|
|
233 |
+ |
|
234 |
+ except grpc.RpcError as e:
|
|
235 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
236 |
+ raise
|
|
237 |
+ |
|
238 |
+ return False
|
|
239 |
+ |
|
216 | 240 |
def pull(self, element, key, *, progress=None):
|
217 | 241 |
ref = self.get_artifact_fullname(element, key)
|
218 | 242 |
|
... | ... | @@ -338,9 +362,9 @@ class CASCache(ArtifactCache): |
338 | 362 |
refs = [self.get_artifact_fullname(element, key) for key in keys]
|
339 | 363 |
|
340 | 364 |
project = element._get_project()
|
341 |
- return self.push_refs(element, refs, project)
|
|
365 |
+ return self.push_refs(refs, project, element=element)
|
|
342 | 366 |
|
343 |
- def push_refs(self, element, refs, project, may_have_dependencies=True):
|
|
367 |
+ def push_refs(self, refs, project, may_have_dependencies=True, element=None):
|
|
344 | 368 |
|
345 | 369 |
push_remotes = [r for r in self._remotes[project] if r.spec.push]
|
346 | 370 |
|
... | ... | @@ -350,7 +374,7 @@ class CASCache(ArtifactCache): |
350 | 374 |
remote.init()
|
351 | 375 |
if self._push_refs_to_remote(refs, remote, may_have_dependencies):
|
352 | 376 |
pushed = True
|
353 |
- else:
|
|
377 |
+ elif element:
|
|
354 | 378 |
self.context.message(Message(
|
355 | 379 |
None,
|
356 | 380 |
MessageType.SKIPPED,
|
... | ... | @@ -374,6 +398,27 @@ class CASCache(ArtifactCache): |
374 | 398 |
|
375 | 399 |
return pushed
|
376 | 400 |
|
401 |
+ def _verify_ref_on_remote(self, ref, remote):
|
|
402 |
+ pushed = False
|
|
403 |
+ tree = self.resolve_ref(ref)
|
|
404 |
+ |
|
405 |
+ # Check whether ref is already on the server in which case
|
|
406 |
+ # there is no need to push the artifact
|
|
407 |
+ try:
|
|
408 |
+ request = buildstream_pb2.GetArtifactRequest()
|
|
409 |
+ request.key = ref
|
|
410 |
+ response = remote.artifact_cache.GetArtifact(request)
|
|
411 |
+ |
|
412 |
+ if response.artifact.hash == tree.hash and response.artifact.size_bytes == tree.size_bytes:
|
|
413 |
+ # ref is already on the server with the same tree
|
|
414 |
+ return True
|
|
415 |
+ |
|
416 |
+ except grpc.RpcError as e:
|
|
417 |
+ if e.code() != grpc.StatusCode.NOT_FOUND:
|
|
418 |
+ raise
|
|
419 |
+ |
|
420 |
+ return False
|
|
421 |
+ |
|
377 | 422 |
################################################
|
378 | 423 |
# API Private Methods #
|
379 | 424 |
################################################
|
... | ... | @@ -24,6 +24,7 @@ from .. import utils |
24 | 24 |
from .._artifactcache.cascache import CASCache
|
25 | 25 |
from .._message import Message, MessageType
|
26 | 26 |
from ..sandbox import SandboxBwrap
|
27 |
+from ..sandbox import SandboxRemote
|
|
27 | 28 |
|
28 | 29 |
from . import Platform
|
29 | 30 |
|
... | ... | @@ -46,7 +47,7 @@ class Linux(Platform): |
46 | 47 |
# Inform the bubblewrap sandbox as to whether it can use user namespaces or not
|
47 | 48 |
kwargs['user_ns_available'] = self._user_ns_available
|
48 | 49 |
kwargs['die_with_parent_available'] = self._die_with_parent_available
|
49 |
- return SandboxBwrap(*args, **kwargs)
|
|
50 |
+ return SandboxRemote(*args, **kwargs)
|
|
50 | 51 |
|
51 | 52 |
################################################
|
52 | 53 |
# Private Methods #
|
... | ... | @@ -155,6 +155,9 @@ class BuildElement(Element): |
155 | 155 |
command_dir = build_root
|
156 | 156 |
sandbox.set_work_directory(command_dir)
|
157 | 157 |
|
158 |
+ # Tell sandbox which directory is preserved in the finished artifact
|
|
159 |
+ sandbox.set_output_directory(install_root)
|
|
160 |
+ |
|
158 | 161 |
# Setup environment
|
159 | 162 |
sandbox.set_environment(self.get_environment())
|
160 | 163 |
|
... | ... | @@ -239,8 +242,18 @@ class BuildElement(Element): |
239 | 242 |
# Note the -e switch to 'sh' means to exit with an error
|
240 | 243 |
# if any untested command fails.
|
241 | 244 |
#
|
242 |
- exitcode = sandbox.run(['sh', '-c', '-e', cmd + '\n'],
|
|
243 |
- SandboxFlags.ROOT_READ_ONLY)
|
|
245 |
+ arguments = ['sh', '-c', '-e']
|
|
246 |
+ change_directory = True
|
|
247 |
+ if change_directory:
|
|
248 |
+ # Prepend a command to change directory.
|
|
249 |
+ # '&&' should be supported by all POSIX shells.
|
|
250 |
+ cmd = 'cd '+self.get_variable('build-root') + ' && ' + cmd
|
|
251 |
+ arguments.append(cmd + '\n')
|
|
252 |
+ |
|
253 |
+ # Because we previously had two methods to transmit build-root,
|
|
254 |
+ # we also set PWD for compatibility.
|
|
255 |
+ exitcode = sandbox.run(arguments, SandboxFlags.ROOT_READ_ONLY,
|
|
256 |
+ env={"PWD": self.get_variable('build-root')})
|
|
244 | 257 |
if exitcode != 0:
|
245 | 258 |
raise ElementError("Command '{}' failed with exitcode {}".format(cmd, exitcode),
|
246 | 259 |
collect=self.get_variable('install-root'))
|
... | ... | @@ -20,3 +20,4 @@ |
20 | 20 |
from .sandbox import Sandbox, SandboxFlags
|
21 | 21 |
from ._sandboxchroot import SandboxChroot
|
22 | 22 |
from ._sandboxbwrap import SandboxBwrap
|
23 |
+from ._sandboxremote import SandboxRemote
|
1 |
+#!/usr/bin/env python3
|
|
2 |
+#
|
|
3 |
+# Copyright (C) 2016 Codethink Limited
|
|
4 |
+#
|
|
5 |
+# This program is free software; you can redistribute it and/or
|
|
6 |
+# modify it under the terms of the GNU Lesser General Public
|
|
7 |
+# License as published by the Free Software Foundation; either
|
|
8 |
+# version 2 of the License, or (at your option) any later version.
|
|
9 |
+#
|
|
10 |
+# This library is distributed in the hope that it will be useful,
|
|
11 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
13 |
+# Lesser General Public License for more details.
|
|
14 |
+#
|
|
15 |
+# You should have received a copy of the GNU Lesser General Public
|
|
16 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
17 |
+#
|
|
18 |
+# Authors:
|
|
19 |
+# Andrew Leeming <andrew leeming codethink co uk>
|
|
20 |
+# Tristan Van Berkom <tristan vanberkom codethink co uk>
|
|
21 |
+import os
|
|
22 |
+import sys
|
|
23 |
+import time
|
|
24 |
+import errno
|
|
25 |
+import signal
|
|
26 |
+import subprocess
|
|
27 |
+import shutil
|
|
28 |
+from contextlib import ExitStack
|
|
29 |
+ |
|
30 |
+import grpc
|
|
31 |
+import psutil
|
|
32 |
+ |
|
33 |
+from .. import utils, _signals
|
|
34 |
+from ._mount import MountMap
|
|
35 |
+from . import Sandbox, SandboxFlags
|
|
36 |
+from ..storage._filebaseddirectory import FileBasedDirectory
|
|
37 |
+from ..storage._casbaseddirectory import CasBasedDirectory
|
|
38 |
+from .._protos.google.bytestream import bytestream_pb2, bytestream_pb2_grpc
|
|
39 |
+from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
40 |
+from .._protos.buildstream.v2 import buildstream_pb2, buildstream_pb2_grpc
|
|
41 |
+from .._protos.google.longrunning import operations_pb2, operations_pb2_grpc
|
|
42 |
+ |
|
43 |
+from .._artifactcache.cascache import CASCache
|
|
44 |
+ |
|
45 |
+class SandboxError(Exception):
|
|
46 |
+ pass
|
|
47 |
+ |
|
48 |
+# SandboxRemote()
|
|
49 |
+#
|
|
50 |
+# This isn't really a sandbox, it's a stub which sends all the source to a remote server and retrieves the results from it.
|
|
51 |
+#
|
|
52 |
+class SandboxRemote(Sandbox):
|
|
53 |
+ |
|
54 |
+ def __init__(self, *args, **kwargs):
|
|
55 |
+ super().__init__(*args, **kwargs)
|
|
56 |
+ self.user_ns_available = kwargs['user_ns_available']
|
|
57 |
+ self.die_with_parent_available = kwargs['die_with_parent_available']
|
|
58 |
+ self.cascache = None
|
|
59 |
+ |
|
60 |
+ def _get_cascache(self):
|
|
61 |
+ if self.cascache is None:
|
|
62 |
+ self.cascache = CASCache(self._get_context())
|
|
63 |
+ self.cascache.setup_remotes(use_config=True)
|
|
64 |
+ return self.cascache
|
|
65 |
+ |
|
66 |
+ def __run_remote_command(self, cascache, command, input_root_digest, environment):
|
|
67 |
+ environment_variables = []
|
|
68 |
+ for(k,v) in environment.items():
|
|
69 |
+ environment_variables.append(remote_execution_pb2.Command.EnvironmentVariable(name=k, value=v))
|
|
70 |
+ remote_command = remote_execution_pb2.Command(arguments=command, environment_variables = environment_variables)
|
|
71 |
+ |
|
72 |
+ # Serialise this into the cascache...
|
|
73 |
+ command_digest = cascache.add_object(buffer=remote_command.SerializeToString())
|
|
74 |
+ |
|
75 |
+ command_ref = 'worker-command/{}'.format(command_digest.hash)
|
|
76 |
+ cascache.set_ref(command_ref, command_digest)
|
|
77 |
+ |
|
78 |
+ command_push_successful = cascache.push_refs([command_ref], self._get_project(), may_have_dependencies=False)
|
|
79 |
+ if command_push_successful or cascache.verify_key_pushed(command_ref, self._get_project()):
|
|
80 |
+ # Next, try to create a communication channel
|
|
81 |
+ port = 50051
|
|
82 |
+ channel = grpc.insecure_channel('dekatron.office.codethink.co.uk:{}'.format(port))
|
|
83 |
+ stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
|
84 |
+ ops_stub = operations_pb2_grpc.OperationsStub(channel)
|
|
85 |
+ |
|
86 |
+ # Having done that, create and send the action.
|
|
87 |
+ |
|
88 |
+ action = remote_execution_pb2.Action(command_digest = command_digest,
|
|
89 |
+ input_root_digest = input_root_digest,
|
|
90 |
+ output_files = [],
|
|
91 |
+ output_directories = [self._output_directory],
|
|
92 |
+ platform = None,
|
|
93 |
+ timeout = None,
|
|
94 |
+ do_not_cache = True)
|
|
95 |
+ |
|
96 |
+ request = remote_execution_pb2.ExecuteRequest(instance_name = 'default',
|
|
97 |
+ action = action,
|
|
98 |
+ skip_cache_lookup = True)
|
|
99 |
+ |
|
100 |
+ operation = stub.Execute(request) # Returns Operation
|
|
101 |
+ job_name = operation.name
|
|
102 |
+ else:
|
|
103 |
+ # Source push failed
|
|
104 |
+ return None
|
|
105 |
+ while True:
|
|
106 |
+ # TODO: Timeout
|
|
107 |
+ # Refresh the operation data periodically using the name
|
|
108 |
+ request = operations_pb2.GetOperationRequest(name=job_name)
|
|
109 |
+ operation = ops_stub.GetOperation(request)
|
|
110 |
+ sys.stderr.write("Operation {} is in stage <{}>\n".format(operation.name, operation.metadata))
|
|
111 |
+ sys.stderr.write("......... {} has response <{}>\n".format(operation.name, operation.response))
|
|
112 |
+ time.sleep(1)
|
|
113 |
+ if operation.done:
|
|
114 |
+ break
|
|
115 |
+ return operation
|
|
116 |
+ |
|
117 |
+ """ output_directories is an array of OutputDirectory objects
|
|
118 |
+ output_files is an array of OutputFile objects """
|
|
119 |
+ def process_job_output(self, output_directories, output_files):
|
|
120 |
+ # We only specify one output_directory, so it's an error
|
|
121 |
+ # for there to be any output files or more than one directory at the moment.
|
|
122 |
+ |
|
123 |
+ if len(output_files)>0:
|
|
124 |
+ raise SandboxError("Output files were returned when we didn't request any.")
|
|
125 |
+ if len(output_directories)>1:
|
|
126 |
+ raise SandboxError("More than one output directory was returned from the build server: {}".format(output_directories))
|
|
127 |
+ |
|
128 |
+ digest = output_directories[0].tree_digest
|
|
129 |
+ if digest is None or digest.hash is None or digest.hash=="":
|
|
130 |
+ raise SandboxError("Output directory structure had no digest attached.")
|
|
131 |
+ |
|
132 |
+ # Now do a pull to ensure we have the necessary parts.
|
|
133 |
+ cascache = self._get_cascache()
|
|
134 |
+ cascache.pull_key(digest.hash, digest.size_bytes, self._get_project())
|
|
135 |
+ path_components = os.path.split(self._output_directory)
|
|
136 |
+ |
|
137 |
+ # Now what we have is a digest for the output. Once we return, the calling process will
|
|
138 |
+ # attempt to descend into our directory and find that directory, so we need to overwrite
|
|
139 |
+ # that.
|
|
140 |
+ |
|
141 |
+ if len(path_components)==0:
|
|
142 |
+ # The artifact wants the whole directory; we could just return the returned hash in its
|
|
143 |
+ # place, but we don't have a means to do that yet.
|
|
144 |
+ raise SandboxError("Unimplemented: Output directory is empty or equal to the sandbox root.")
|
|
145 |
+ |
|
146 |
+ # At the moment, we will get the whole directory back in the first directory argument and we need
|
|
147 |
+ # to replace the sandbox's virtual directory with that. Creating a new virtual directory object
|
|
148 |
+ # from another hash will be interesting, though...
|
|
149 |
+ |
|
150 |
+ new_dir = CasBasedDirectory(self._get_context(), ref=digest)
|
|
151 |
+ self.set_virtual_directory(new_dir)
|
|
152 |
+ |
|
153 |
+ def run(self, command, flags, *, cwd=None, env=None):
|
|
154 |
+ stdout, stderr = self._get_output()
|
|
155 |
+ sys.stderr.write("Attempting run with remote sandbox...\n")
|
|
156 |
+ # Upload sources
|
|
157 |
+ upload_vdir = self.get_virtual_directory()
|
|
158 |
+ if isinstance(upload_vdir, FileBasedDirectory):
|
|
159 |
+ # Make a new temporary directory to put source in
|
|
160 |
+ upload_vdir = CasBasedDirectory(self._get_context(), ref=None)
|
|
161 |
+ upload_vdir.import_files(self.get_virtual_directory().get_underlying_directory())
|
|
162 |
+ |
|
163 |
+ # Now, push that key (without necessarily needing a ref) to the remote.
|
|
164 |
+ cascache = self._get_cascache()
|
|
165 |
+ |
|
166 |
+ ref = 'worker-source/{}'.format(upload_vdir.ref.hash)
|
|
167 |
+ upload_vdir._save(ref)
|
|
168 |
+ source_push_successful = cascache.push_refs([ref], self._get_project())
|
|
169 |
+ # Fallback to the sandbox default settings for
|
|
170 |
+ # the cwd and environment.
|
|
171 |
+ |
|
172 |
+ if env is None:
|
|
173 |
+ env = self._get_environment()
|
|
174 |
+ |
|
175 |
+ # We want command args as a list of strings
|
|
176 |
+ if isinstance(command, str):
|
|
177 |
+ command = [command]
|
|
178 |
+ |
|
179 |
+ # Now transmit the command to execute
|
|
180 |
+ if source_push_successful or cascache.verify_key_pushed(ref, self._get_project()):
|
|
181 |
+ response = self.__run_remote_command(cascache, command, upload_vdir.ref, env)
|
|
182 |
+ |
|
183 |
+ if response is None or response.HasField("error"):
|
|
184 |
+ # Build failed, so return a failure code
|
|
185 |
+ return 1
|
|
186 |
+ else:
|
|
187 |
+ |
|
188 |
+ # At the moment, response can either be an ExecutionResponse containing an ActionResult, or an ActionResult directly.
|
|
189 |
+ executeResponse = remote_execution_pb2.ExecuteResponse()
|
|
190 |
+ if response.response.Is(executeResponse.DESCRIPTOR):
|
|
191 |
+ # Unpack ExecuteResponse and set response to its response
|
|
192 |
+ response.response.Unpack(executeResponse)
|
|
193 |
+ response = executeResponse
|
|
194 |
+ |
|
195 |
+ actionResult = remote_execution_pb2.ActionResult()
|
|
196 |
+ if response.response.Is(actionResult.DESCRIPTOR):
|
|
197 |
+ response.response.Unpack(actionResult)
|
|
198 |
+ self.process_job_output(actionResult.output_directories, actionResult.output_files)
|
|
199 |
+ else:
|
|
200 |
+ sys.stderr.write("Received unknown message from server.\n")
|
|
201 |
+ return 1
|
|
202 |
+ else:
|
|
203 |
+ sys.stderr.write("Failed to verify source on remote artifact cache.\n")
|
|
204 |
+ return 1
|
|
205 |
+ # TODO: Pull the results
|
|
206 |
+ sys.stderr.write("Completed remote run with sandbox.\n")
|
|
207 |
+ return 0
|
... | ... | @@ -99,9 +99,11 @@ class Sandbox(): |
99 | 99 |
self.__stdout = kwargs['stdout']
|
100 | 100 |
self.__stderr = kwargs['stderr']
|
101 | 101 |
|
102 |
- # Setup the directories. Root should be available to subclasses, hence
|
|
103 |
- # being single-underscore. The others are private to this class.
|
|
102 |
+ # Setup the directories. Root and output_directory should be
|
|
103 |
+ # available to subclasses, hence being single-underscore. The
|
|
104 |
+ # others are private to this class.
|
|
104 | 105 |
self._root = os.path.join(directory, 'root')
|
106 |
+ self._output_directory = None
|
|
105 | 107 |
self.__directory = directory
|
106 | 108 |
self.__scratch = os.path.join(self.__directory, 'scratch')
|
107 | 109 |
for directory_ in [self._root, self.__scratch]:
|
... | ... | @@ -142,11 +144,29 @@ class Sandbox(): |
142 | 144 |
self._vdir = FileBasedDirectory(self._root)
|
143 | 145 |
return self._vdir
|
144 | 146 |
|
147 |
+ def set_virtual_directory(self, vdir):
|
|
148 |
+ """ Sets virtual directory. Useful after remote execution
|
|
149 |
+ has rewritten the working directory. """
|
|
150 |
+ self.__vdir = vdir
|
|
151 |
+ |
|
152 |
+ def get_virtual_toplevel_directory(self):
|
|
153 |
+ """Fetches the sandbox's toplevel directory
|
|
154 |
+ |
|
155 |
+ The toplevel directory contains 'root', 'scratch' and later
|
|
156 |
+ 'artifact' where output is copied to.
|
|
157 |
+ |
|
158 |
+ Returns:
|
|
159 |
+ (str): The sandbox toplevel directory
|
|
160 |
+ |
|
161 |
+ """
|
|
162 |
+ # For now, just create a new Directory every time we're asked
|
|
163 |
+ return FileBasedDirectory(self.__directory)
|
|
164 |
+ |
|
145 | 165 |
def set_environment(self, environment):
|
146 | 166 |
"""Sets the environment variables for the sandbox
|
147 | 167 |
|
148 | 168 |
Args:
|
149 |
- directory (dict): The environment variables to use in the sandbox
|
|
169 |
+ environment (dict): The environment variables to use in the sandbox
|
|
150 | 170 |
"""
|
151 | 171 |
self.__env = environment
|
152 | 172 |
|
... | ... | @@ -158,6 +178,15 @@ class Sandbox(): |
158 | 178 |
"""
|
159 | 179 |
self.__cwd = directory
|
160 | 180 |
|
181 |
+ def set_output_directory(self, directory):
|
|
182 |
+ """Sets the output directory - the directory which is preserved
|
|
183 |
+ as an artifact after assembly.
|
|
184 |
+ |
|
185 |
+ Args:
|
|
186 |
+ directory (str): An absolute path within the sandbox
|
|
187 |
+ """
|
|
188 |
+ self._output_directory = directory
|
|
189 |
+ |
|
161 | 190 |
def mark_directory(self, directory, *, artifact=False):
|
162 | 191 |
"""Marks a sandbox directory and ensures it will exist
|
163 | 192 |
|