... |
... |
@@ -83,8 +83,7 @@ class SandboxRemote(Sandbox): |
83
|
83
|
# Upload the Command message to the remote CAS server
|
84
|
84
|
command_digest = cascache.push_message(self._get_project(), remote_command)
|
85
|
85
|
if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
|
86
|
|
- # Command push failed
|
87
|
|
- return None
|
|
86
|
+ raise SandboxError("Failed pushing build command to remote CAS.")
|
88
|
87
|
|
89
|
88
|
# Create and send the action.
|
90
|
89
|
action = remote_execution_pb2.Action(command_digest=command_digest,
|
... |
... |
@@ -95,27 +94,49 @@ class SandboxRemote(Sandbox): |
95
|
94
|
# Upload the Action message to the remote CAS server
|
96
|
95
|
action_digest = cascache.push_message(self._get_project(), action)
|
97
|
96
|
if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
|
98
|
|
- # Action push failed
|
99
|
|
- return None
|
|
97
|
+ raise SandboxError("Failed pushing build action to remote CAS.")
|
100
|
98
|
|
101
|
99
|
# Next, try to create a communication channel to the BuildGrid server.
|
102
|
100
|
channel = grpc.insecure_channel(self.server_url)
|
103
|
101
|
stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
104
|
102
|
request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
|
105
|
103
|
skip_cache_lookup=False)
|
106
|
|
- try:
|
107
|
|
- operation_iterator = stub.Execute(request)
|
108
|
|
- except grpc.RpcError:
|
109
|
|
- return None
|
|
104
|
+
|
|
105
|
+ def __run_remote_command(stub, execute_request=None, running_operation=None):
|
|
106
|
+ try:
|
|
107
|
+ last_operation = None
|
|
108
|
+ if execute_request is not None:
|
|
109
|
+ operation_iterator = stub.Execute(execute_request)
|
|
110
|
+ else:
|
|
111
|
+ request = remote_execution_pb2.WaitExecutionRequest(name=operation.name)
|
|
112
|
+ operation_iterator = stub.WaitExecution(request)
|
|
113
|
+
|
|
114
|
+ for operation in operation_iterator:
|
|
115
|
+ if operation.done:
|
|
116
|
+ return operation
|
|
117
|
+ else:
|
|
118
|
+ last_operation = operation
|
|
119
|
+ except grpc.RpcError as e:
|
|
120
|
+ status_code = e.code()
|
|
121
|
+ if status_code == grpc.StatusCode.UNAVAILABLE:
|
|
122
|
+ raise SandboxError("Failed contacting remote execution server at {}."
|
|
123
|
+ .format(self.server_url))
|
|
124
|
+ elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
|
|
125
|
+ raise SandboxError("Failed trying to recover from connection loss: "
|
|
126
|
+ "server does not support operation status polling recovery.")
|
|
127
|
+ else:
|
|
128
|
+ raise SandboxError("{} ({}).".format(e.details(), status_code.name))
|
|
129
|
+
|
|
130
|
+ return last_operation
|
110
|
131
|
|
111
|
132
|
operation = None
|
112
|
133
|
with self._get_context().timed_activity("Waiting for the remote build to complete"):
|
113
|
|
- # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
|
114
|
|
- # which will check the server is actually contactable. However, calling it when the
|
115
|
|
- # server is available seems to cause .code() to hang forever.
|
116
|
|
- for operation in operation_iterator:
|
117
|
|
- if operation.done:
|
118
|
|
- break
|
|
134
|
+ operation = __run_remote_command(stub, execute_request=request)
|
|
135
|
+ if operation and operation.done:
|
|
136
|
+ return operation
|
|
137
|
+
|
|
138
|
+ while not operation.done:
|
|
139
|
+ operation = __run_remote_command(stub, running_operation=operation)
|
119
|
140
|
|
120
|
141
|
return operation
|
121
|
142
|
|
... |
... |
@@ -201,7 +222,6 @@ class SandboxRemote(Sandbox): |
201
|
222
|
|
202
|
223
|
if operation is None:
|
203
|
224
|
# Failure of remote execution, usually due to an error in BuildStream
|
204
|
|
- # NB This error could be raised in __run_remote_command
|
205
|
225
|
raise SandboxError("No response returned from server")
|
206
|
226
|
|
207
|
227
|
assert not operation.HasField('error') and operation.HasField('response')
|