finn pushed to branch finn/81-precon-fail at BuildGrid / buildgrid
Commits:
-
bff9c19b
by finnball at 2018-09-20T14:30:28Z
-
f4a2e657
by finnball at 2018-09-20T14:30:28Z
-
07e06b4a
by finnball at 2018-09-20T16:21:42Z
-
1113a905
by finnball at 2018-09-20T16:21:42Z
-
1cc0759c
by finnball at 2018-09-20T16:21:42Z
-
920afd04
by finnball at 2018-09-20T16:21:42Z
13 changed files:
- .gitlab-ci.yml
- buildgrid/_app/commands/cmd_cas.py
- buildgrid/_app/commands/cmd_execute.py
- buildgrid/_app/settings/cas.yml
- buildgrid/_app/settings/default.yml
- buildgrid/_app/settings/parser.py
- buildgrid/_app/settings/remote-storage.yml
- buildgrid/server/_exceptions.py
- buildgrid/server/execution/instance.py
- buildgrid/server/execution/service.py
- docs/source/using_internal.rst
- tests/integration/execution_service.py
- tests/integration/operations_service.py
Changes:
... | ... | @@ -33,6 +33,7 @@ before_script: |
33 | 33 |
- ${BGD} server start buildgrid/_app/settings/default.yml &
|
34 | 34 |
- sleep 1 # Allow server to boot
|
35 | 35 |
- ${BGD} bot dummy &
|
36 |
+ - ${BGD} cas upload-dummy
|
|
36 | 37 |
- ${BGD} execute request-dummy --wait-for-completion
|
37 | 38 |
|
38 | 39 |
|
... | ... | @@ -65,6 +65,23 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert): |
65 | 65 |
context.logger.debug("Starting for remote {}".format(context.remote))
|
66 | 66 |
|
67 | 67 |
|
68 |
+@cli.command('upload-dummy', short_help="Upload a dummy action. Should be used with `execute dummy-request`")
|
|
69 |
+@pass_context
|
|
70 |
+def upload_dummy(context):
|
|
71 |
+ context.logger.info("Uploading dummy action...")
|
|
72 |
+ action = remote_execution_pb2.Action(do_not_cache=True)
|
|
73 |
+ action_digest = create_digest(action.SerializeToString())
|
|
74 |
+ |
|
75 |
+ request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name)
|
|
76 |
+ request.requests.add(digest=action_digest,
|
|
77 |
+ data=action.SerializeToString())
|
|
78 |
+ |
|
79 |
+ stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel)
|
|
80 |
+ response = stub.BatchUpdateBlobs(request)
|
|
81 |
+ |
|
82 |
+ context.logger.info(response)
|
|
83 |
+ |
|
84 |
+ |
|
68 | 85 |
@cli.command('upload-files', short_help="Upload files to the CAS server.")
|
69 | 86 |
@click.argument('files', nargs=-1, type=click.File('rb'), required=True)
|
70 | 87 |
@pass_context
|
... | ... | @@ -76,9 +76,11 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert): |
76 | 76 |
help="Stream updates until jobs are completed.")
|
77 | 77 |
@pass_context
|
78 | 78 |
def request_dummy(context, number, wait_for_completion):
|
79 |
- action_digest = remote_execution_pb2.Digest()
|
|
80 | 79 |
|
81 | 80 |
context.logger.info("Sending execution request...")
|
81 |
+ action = remote_execution_pb2.Action(do_not_cache=True)
|
|
82 |
+ action_digest = create_digest(action.SerializeToString())
|
|
83 |
+ |
|
82 | 84 |
stub = remote_execution_pb2_grpc.ExecutionStub(context.channel)
|
83 | 85 |
|
84 | 86 |
request = remote_execution_pb2.ExecuteRequest(instance_name=context.instance_name,
|
... | ... | @@ -90,9 +92,18 @@ def request_dummy(context, number, wait_for_completion): |
90 | 92 |
responses.append(stub.Execute(request))
|
91 | 93 |
|
92 | 94 |
for response in responses:
|
95 |
+ |
|
93 | 96 |
if wait_for_completion:
|
97 |
+ result = None
|
|
94 | 98 |
for stream in response:
|
95 |
- context.logger.info(stream)
|
|
99 |
+ result = stream
|
|
100 |
+ context.logger.info(result)
|
|
101 |
+ |
|
102 |
+ if not result.done:
|
|
103 |
+ click.echo("Result did not return True." +
|
|
104 |
+ "Was the action uploaded to CAS?", err=True)
|
|
105 |
+ sys.exit(-1)
|
|
106 |
+ |
|
96 | 107 |
else:
|
97 | 108 |
context.logger.info(next(response))
|
98 | 109 |
|
... | ... | @@ -17,7 +17,7 @@ instances: |
17 | 17 |
|
18 | 18 |
storages:
|
19 | 19 |
- !disk-storage &main-storage
|
20 |
- path: ~/cas/
|
|
20 |
+ path: !expand-path $HOME/cas/
|
|
21 | 21 |
|
22 | 22 |
services:
|
23 | 23 |
- !cas
|
... | ... | @@ -17,7 +17,7 @@ instances: |
17 | 17 |
|
18 | 18 |
storages:
|
19 | 19 |
- !disk-storage &main-storage
|
20 |
- path: ~/cas/
|
|
20 |
+ path: !expand-path $HOME/cas/
|
|
21 | 21 |
|
22 | 22 |
services:
|
23 | 23 |
- !action-cache &main-action
|
... | ... | @@ -68,12 +68,21 @@ class Channel(YamlFactory): |
68 | 68 |
sys.exit(-1)
|
69 | 69 |
|
70 | 70 |
|
71 |
+class ExpandPath(YamlFactory):
|
|
72 |
+ |
|
73 |
+ yaml_tag = u'!expand-path'
|
|
74 |
+ |
|
75 |
+ def __new__(cls, path):
|
|
76 |
+ path = os.path.expanduser(path)
|
|
77 |
+ path = os.path.expandvars(path)
|
|
78 |
+ return path
|
|
79 |
+ |
|
80 |
+ |
|
71 | 81 |
class Disk(YamlFactory):
|
72 | 82 |
|
73 | 83 |
yaml_tag = u'!disk-storage'
|
74 | 84 |
|
75 | 85 |
def __new__(cls, path):
|
76 |
- path = os.path.expanduser(path)
|
|
77 | 86 |
return DiskStorage(path)
|
78 | 87 |
|
79 | 88 |
|
... | ... | @@ -197,6 +206,7 @@ def _parse_size(size): |
197 | 206 |
def get_parser():
|
198 | 207 |
|
199 | 208 |
yaml.SafeLoader.add_constructor(Channel.yaml_tag, Channel.from_yaml)
|
209 |
+ yaml.SafeLoader.add_constructor(ExpandPath.yaml_tag, ExpandPath.from_yaml)
|
|
200 | 210 |
yaml.SafeLoader.add_constructor(Execution.yaml_tag, Execution.from_yaml)
|
201 | 211 |
yaml.SafeLoader.add_constructor(Action.yaml_tag, Action.from_yaml)
|
202 | 212 |
yaml.SafeLoader.add_constructor(Reference.yaml_tag, Reference.from_yaml)
|
... | ... | @@ -19,10 +19,10 @@ instances: |
19 | 19 |
- !remote-storage &main-storage
|
20 | 20 |
url: "http://localhost:50052"
|
21 | 21 |
instance_name: main
|
22 |
- credentials:
|
|
23 |
- tls-client-key: null
|
|
24 |
- tls-client-cert: null
|
|
25 |
- tls-server-cert: null
|
|
22 |
+# credentials:
|
|
23 |
+# tls-client-key: null
|
|
24 |
+# tls-client-cert: null
|
|
25 |
+# tls-server-cert: null
|
|
26 | 26 |
|
27 | 27 |
services:
|
28 | 28 |
- !action-cache &main-action
|
... | ... | @@ -46,3 +46,12 @@ class OutOfRangeError(BgdError): |
46 | 46 |
|
47 | 47 |
def __init__(self, message, detail=None, reason=None):
|
48 | 48 |
super().__init__(message, detail=detail, domain=ErrorDomain.SERVER, reason=reason)
|
49 |
+ |
|
50 |
+ |
|
51 |
+class FailedPreconditionError(BgdError):
|
|
52 |
+ """ One or more errors occurred in setting up the action requested, such as a missing input
|
|
53 |
+ or command or no worker being available. The client may be able to fix the errors and retry.
|
|
54 |
+ """
|
|
55 |
+ |
|
56 |
+ def __init__(self, message, detail=None, reason=None):
|
|
57 |
+ super().__init__(message, detail=detail, domain=ErrorDomain.SERVER, reason=reason)
|
... | ... | @@ -24,12 +24,12 @@ import logging |
24 | 24 |
from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Action
|
25 | 25 |
|
26 | 26 |
from ..job import Job
|
27 |
-from .._exceptions import InvalidArgumentError
|
|
27 |
+from .._exceptions import InvalidArgumentError, FailedPreconditionError
|
|
28 | 28 |
|
29 | 29 |
|
30 | 30 |
class ExecutionInstance:
|
31 | 31 |
|
32 |
- def __init__(self, scheduler, storage=None):
|
|
32 |
+ def __init__(self, scheduler, storage):
|
|
33 | 33 |
self.logger = logging.getLogger(__name__)
|
34 | 34 |
self._storage = storage
|
35 | 35 |
self._scheduler = scheduler
|
... | ... | @@ -43,13 +43,12 @@ class ExecutionInstance: |
43 | 43 |
this action.
|
44 | 44 |
"""
|
45 | 45 |
|
46 |
- do_not_cache = False
|
|
47 |
- if self._storage is not None:
|
|
48 |
- action = self._storage.get_message(action_digest, Action)
|
|
49 |
- if action is not None:
|
|
50 |
- do_not_cache = action.do_not_cache
|
|
46 |
+ action = self._storage.get_message(action_digest, Action)
|
|
51 | 47 |
|
52 |
- job = Job(action_digest, do_not_cache, message_queue)
|
|
48 |
+ if not action:
|
|
49 |
+ raise FailedPreconditionError("Could not get action from storage.")
|
|
50 |
+ |
|
51 |
+ job = Job(action_digest, action.do_not_cache, message_queue)
|
|
53 | 52 |
self.logger.info("Operation name: [{}]".format(job.name))
|
54 | 53 |
|
55 | 54 |
self._scheduler.append_job(job, skip_cache_lookup)
|
... | ... | @@ -30,7 +30,7 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p |
30 | 30 |
|
31 | 31 |
from buildgrid._protos.google.longrunning import operations_pb2
|
32 | 32 |
|
33 |
-from .._exceptions import InvalidArgumentError
|
|
33 |
+from .._exceptions import InvalidArgumentError, FailedPreconditionError
|
|
34 | 34 |
|
35 | 35 |
|
36 | 36 |
class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
|
... | ... | @@ -63,6 +63,12 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer): |
63 | 63 |
context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
|
64 | 64 |
yield operations_pb2.Operation()
|
65 | 65 |
|
66 |
+ except FailedPreconditionError as e:
|
|
67 |
+ self.logger.error(e)
|
|
68 |
+ context.set_details(str(e))
|
|
69 |
+ context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
|
|
70 |
+ yield operations_pb2.Operation()
|
|
71 |
+ |
|
66 | 72 |
def WaitExecution(self, request, context):
|
67 | 73 |
try:
|
68 | 74 |
names = request.name.split("/")
|
1 |
- |
|
2 | 1 |
.. _internal-client:
|
3 | 2 |
|
3 |
+ |
|
4 | 4 |
Internal client
|
5 | 5 |
===============
|
6 | 6 |
|
... | ... | @@ -19,7 +19,13 @@ In one terminal, start a server: |
19 | 19 |
|
20 | 20 |
bgd server start buildgrid/_app/settings/default.yml
|
21 | 21 |
|
22 |
-In another terminal, send a request for work:
|
|
22 |
+In another terminal, upload an action to CAS:
|
|
23 |
+ |
|
24 |
+.. code-block::sh
|
|
25 |
+ |
|
26 |
+ bgd cas upload-dummy
|
|
27 |
+ |
|
28 |
+Then send a request for work:
|
|
23 | 29 |
|
24 | 30 |
.. code-block:: sh
|
25 | 31 |
|
... | ... | @@ -28,6 +28,7 @@ import pytest |
28 | 28 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
29 | 29 |
from buildgrid._protos.google.longrunning import operations_pb2
|
30 | 30 |
|
31 |
+from buildgrid.utils import create_digest
|
|
31 | 32 |
from buildgrid.server import job
|
32 | 33 |
from buildgrid.server.controller import ExecutionController
|
33 | 34 |
from buildgrid.server.cas.storage import lru_memory_cache
|
... | ... | @@ -37,6 +38,8 @@ from buildgrid.server.execution.service import ExecutionService |
37 | 38 |
|
38 | 39 |
|
39 | 40 |
server = mock.create_autospec(grpc.server)
|
41 |
+action = remote_execution_pb2.Action(do_not_cache=True)
|
|
42 |
+action_digest = create_digest(action.SerializeToString())
|
|
40 | 43 |
|
41 | 44 |
|
42 | 45 |
@pytest.fixture
|
... | ... | @@ -47,12 +50,16 @@ def context(): |
47 | 50 |
|
48 | 51 |
@pytest.fixture(params=["action-cache", "no-action-cache"])
|
49 | 52 |
def controller(request):
|
53 |
+ storage = lru_memory_cache.LRUMemoryCache(1024 * 1024)
|
|
54 |
+ write_session = storage.begin_write(action_digest)
|
|
55 |
+ storage.commit_write(action_digest, write_session)
|
|
56 |
+ |
|
50 | 57 |
if request.param == "action-cache":
|
51 |
- storage = lru_memory_cache.LRUMemoryCache(1024 * 1024)
|
|
52 | 58 |
cache = ActionCache(storage, 50)
|
53 | 59 |
yield ExecutionController(cache, storage)
|
60 |
+ |
|
54 | 61 |
else:
|
55 |
- yield ExecutionController()
|
|
62 |
+ yield ExecutionController(None, storage)
|
|
56 | 63 |
|
57 | 64 |
|
58 | 65 |
# Instance to test
|
... | ... | @@ -66,9 +73,6 @@ def instance(controller): |
66 | 73 |
|
67 | 74 |
@pytest.mark.parametrize("skip_cache_lookup", [True, False])
|
68 | 75 |
def test_execute(skip_cache_lookup, instance, context):
|
69 |
- action_digest = remote_execution_pb2.Digest()
|
|
70 |
- action_digest.hash = 'zhora'
|
|
71 |
- |
|
72 | 76 |
request = remote_execution_pb2.ExecuteRequest(instance_name='',
|
73 | 77 |
action_digest=action_digest,
|
74 | 78 |
skip_cache_lookup=skip_cache_lookup)
|
... | ... | @@ -91,10 +95,16 @@ def test_wrong_execute_instance(instance, context): |
91 | 95 |
context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
|
92 | 96 |
|
93 | 97 |
|
94 |
-def test_wait_execution(instance, controller, context):
|
|
95 |
- action_digest = remote_execution_pb2.Digest()
|
|
96 |
- action_digest.hash = 'zhora'
|
|
98 |
+def test_no_action_digest_in_storage(instance, context):
|
|
99 |
+ request = remote_execution_pb2.ExecuteRequest(instance_name='',
|
|
100 |
+ skip_cache_lookup=True)
|
|
101 |
+ response = instance.Execute(request, context)
|
|
102 |
+ |
|
103 |
+ next(response)
|
|
104 |
+ context.set_code.assert_called_once_with(grpc.StatusCode.FAILED_PRECONDITION)
|
|
97 | 105 |
|
106 |
+ |
|
107 |
+def test_wait_execution(instance, controller, context):
|
|
98 | 108 |
j = job.Job(action_digest, None)
|
99 | 109 |
j._operation.done = True
|
100 | 110 |
|
... | ... | @@ -24,18 +24,21 @@ import grpc |
24 | 24 |
from grpc._server import _Context
|
25 | 25 |
import pytest
|
26 | 26 |
|
27 |
-from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
28 |
-from buildgrid._protos.google.longrunning import operations_pb2
|
|
29 |
- |
|
27 |
+from buildgrid.utils import create_digest
|
|
30 | 28 |
from buildgrid.server.controller import ExecutionController
|
31 |
-from buildgrid.server._exceptions import InvalidArgumentError
|
|
32 |
- |
|
29 |
+from buildgrid.server.cas.storage import lru_memory_cache
|
|
33 | 30 |
from buildgrid.server.operations import service
|
34 | 31 |
from buildgrid.server.operations.service import OperationsService
|
32 |
+from buildgrid.server._exceptions import InvalidArgumentError
|
|
33 |
+ |
|
34 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
35 |
+from buildgrid._protos.google.longrunning import operations_pb2
|
|
35 | 36 |
|
36 | 37 |
|
37 | 38 |
server = mock.create_autospec(grpc.server)
|
38 | 39 |
instance_name = "blade"
|
40 |
+action = remote_execution_pb2.Action(do_not_cache=True)
|
|
41 |
+action_digest = create_digest(action.SerializeToString())
|
|
39 | 42 |
|
40 | 43 |
|
41 | 44 |
# Can mock this
|
... | ... | @@ -47,9 +50,6 @@ def context(): |
47 | 50 |
# Requests to make
|
48 | 51 |
@pytest.fixture
|
49 | 52 |
def execute_request():
|
50 |
- action_digest = remote_execution_pb2.Digest()
|
|
51 |
- action_digest.hash = 'zhora'
|
|
52 |
- |
|
53 | 53 |
yield remote_execution_pb2.ExecuteRequest(instance_name='',
|
54 | 54 |
action_digest=action_digest,
|
55 | 55 |
skip_cache_lookup=True)
|
... | ... | @@ -57,7 +57,11 @@ def execute_request(): |
57 | 57 |
|
58 | 58 |
@pytest.fixture
|
59 | 59 |
def controller():
|
60 |
- yield ExecutionController()
|
|
60 |
+ storage = lru_memory_cache.LRUMemoryCache(1024 * 1024)
|
|
61 |
+ write_session = storage.begin_write(action_digest)
|
|
62 |
+ storage.commit_write(action_digest, write_session)
|
|
63 |
+ |
|
64 |
+ yield ExecutionController(None, storage)
|
|
61 | 65 |
|
62 | 66 |
|
63 | 67 |
# Instance to test
|