... |
... |
@@ -76,8 +76,7 @@ class SandboxRemote(Sandbox): |
76
|
76
|
# Upload the Command message to the remote CAS server
|
77
|
77
|
command_digest = cascache.push_message(self._get_project(), remote_command)
|
78
|
78
|
if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
|
79
|
|
- # Command push failed
|
80
|
|
- return None
|
|
79
|
+ raise SandboxError("Failed pushing build command to remote CAS.")
|
81
|
80
|
|
82
|
81
|
# Create and send the action.
|
83
|
82
|
action = remote_execution_pb2.Action(command_digest=command_digest,
|
... |
... |
@@ -88,27 +87,57 @@ class SandboxRemote(Sandbox): |
88
|
87
|
# Upload the Action message to the remote CAS server
|
89
|
88
|
action_digest = cascache.push_message(self._get_project(), action)
|
90
|
89
|
if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
|
91
|
|
- # Action push failed
|
92
|
|
- return None
|
|
90
|
+ raise SandboxError("Failed pushing build action to remote CAS.")
|
93
|
91
|
|
94
|
92
|
# Next, try to create a communication channel to the BuildGrid server.
|
95
|
93
|
channel = grpc.insecure_channel(self.server_url)
|
96
|
94
|
stub = remote_execution_pb2_grpc.ExecutionStub(channel)
|
97
|
95
|
request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
|
98
|
96
|
skip_cache_lookup=False)
|
99
|
|
- try:
|
100
|
|
- operation_iterator = stub.Execute(request)
|
101
|
|
- except grpc.RpcError:
|
102
|
|
- return None
|
|
97
|
+
|
|
98
|
+ def __run_remote_command(stub, execute_request=None, running_operation=None):
|
|
99
|
+ try:
|
|
100
|
+ last_operation = None
|
|
101
|
+ if execute_request is not None:
|
|
102
|
+ operation_iterator = stub.Execute(execute_request)
|
|
103
|
+ else:
|
|
104
|
+ request = remote_execution_pb2.WaitExecutionRequest(name=running_operation.name)
|
|
105
|
+ operation_iterator = stub.WaitExecution(request)
|
|
106
|
+
|
|
107
|
+ for operation in operation_iterator:
|
|
108
|
+ if operation.done:
|
|
109
|
+ return operation
|
|
110
|
+ else:
|
|
111
|
+ last_operation = operation
|
|
112
|
+ except grpc.RpcError as e:
|
|
113
|
+ status_code = e.code()
|
|
114
|
+ if status_code == grpc.StatusCode.UNAVAILABLE:
|
|
115
|
+ raise SandboxError("Failed contacting remote execution server at {}."
|
|
116
|
+ .format(self.server_url))
|
|
117
|
+
|
|
118
|
+ elif status_code in (grpc.StatusCode.INVALID_ARGUMENT,
|
|
119
|
+ grpc.StatusCode.FAILED_PRECONDITION,
|
|
120
|
+ grpc.StatusCode.RESOURCE_EXHAUSTED,
|
|
121
|
+ grpc.StatusCode.INTERNAL,
|
|
122
|
+ grpc.StatusCode.DEADLINE_EXCEEDED):
|
|
123
|
+ raise SandboxError("{} ({}).".format(e.details(), status_code.name))
|
|
124
|
+
|
|
125
|
+ elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
|
|
126
|
+ raise SandboxError("Failed trying to recover from connection loss: "
|
|
127
|
+ "server does not support operation status polling recovery.")
|
|
128
|
+
|
|
129
|
+ return last_operation
|
103
|
130
|
|
104
|
131
|
operation = None
|
105
|
132
|
with self._get_context().timed_activity("Waiting for the remote build to complete"):
|
106
|
|
- # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
|
107
|
|
- # which will check the server is actually contactable. However, calling it when the
|
108
|
|
- # server is available seems to cause .code() to hang forever.
|
109
|
|
- for operation in operation_iterator:
|
110
|
|
- if operation.done:
|
111
|
|
- break
|
|
133
|
+ operation = __run_remote_command(stub, execute_request=request)
|
|
134
|
+ if operation is None:
|
|
135
|
+ return None
|
|
136
|
+ elif operation.done:
|
|
137
|
+ return operation
|
|
138
|
+
|
|
139
|
+ while operation is not None and not operation.done:
|
|
140
|
+ operation = __run_remote_command(stub, running_operation=operation)
|
112
|
141
|
|
113
|
142
|
return operation
|
114
|
143
|
|
... |
... |
@@ -192,7 +221,6 @@ class SandboxRemote(Sandbox): |
192
|
221
|
|
193
|
222
|
if operation is None:
|
194
|
223
|
# Failure of remote execution, usually due to an error in BuildStream
|
195
|
|
- # NB This error could be raised in __run_remote_command
|
196
|
224
|
raise SandboxError("No response returned from server")
|
197
|
225
|
|
198
|
226
|
assert not operation.HasField('error') and operation.HasField('response')
|