Martin Blanchard pushed to branch mablanch/77-cas-uploader at BuildGrid / buildgrid
Commits:
-
8632e234
by Martin Blanchard at 2018-09-24T17:01:29Z
-
2ef719ff
by Martin Blanchard at 2018-09-24T17:01:37Z
-
1b35f8bd
by Martin Blanchard at 2018-09-24T17:01:37Z
-
dd4adf29
by Martin Blanchard at 2018-09-24T17:01:37Z
-
336a2229
by Martin Blanchard at 2018-09-24T17:01:37Z
11 changed files:
- buildgrid/_app/commands/cmd_cas.py
- buildgrid/utils.py
- setup.py
- + tests/cas/data/hello.cc
- + tests/cas/data/hello/hello.c
- + tests/cas/data/hello/hello.h
- + tests/cas/data/void
- + tests/cas/test_client.py
- tests/cas/test_storage.py
- + tests/utils/__init__.py
- + tests/utils/cas.py
Changes:
| ... | ... | @@ -21,14 +21,16 @@ Request work to be executed and monitor status of jobs. |
| 21 | 21 |
"""
|
| 22 | 22 |
|
| 23 | 23 |
import logging
|
| 24 |
+import os
|
|
| 24 | 25 |
import sys
|
| 25 | 26 |
from urllib.parse import urlparse
|
| 26 | 27 |
|
| 27 | 28 |
import click
|
| 28 | 29 |
import grpc
|
| 29 | 30 |
|
| 30 |
-from buildgrid.utils import merkle_maker, create_digest
|
|
| 31 |
-from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
| 31 |
+from buildgrid.client.cas import upload
|
|
| 32 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 33 |
+from buildgrid.utils import merkle_tree_maker
|
|
| 32 | 34 |
|
| 33 | 35 |
from ..cli import pass_context
|
| 34 | 36 |
|
| ... | ... | @@ -68,56 +70,62 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert): |
| 68 | 70 |
@cli.command('upload-dummy', short_help="Upload a dummy action. Should be used with `execute dummy-request`")
|
| 69 | 71 |
@pass_context
|
| 70 | 72 |
def upload_dummy(context):
|
| 71 |
- context.logger.info("Uploading dummy action...")
|
|
| 72 | 73 |
action = remote_execution_pb2.Action(do_not_cache=True)
|
| 73 |
- action_digest = create_digest(action.SerializeToString())
|
|
| 74 |
+ with upload(context.channel, instance=context.instance_name) as uploader:
|
|
| 75 |
+ action_digest = uploader.put_message(action)
|
|
| 74 | 76 |
|
| 75 |
- request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name)
|
|
| 76 |
- request.requests.add(digest=action_digest,
|
|
| 77 |
- data=action.SerializeToString())
|
|
| 78 |
- |
|
| 79 |
- stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel)
|
|
| 80 |
- response = stub.BatchUpdateBlobs(request)
|
|
| 81 |
- |
|
| 82 |
- context.logger.info(response)
|
|
| 77 |
+ if action_digest.ByteSize():
|
|
| 78 |
+ click.echo('Success: Pushed digest "{}/{}"'
|
|
| 79 |
+ .format(action_digest.hash, action_digest.size_bytes))
|
|
| 80 |
+ else:
|
|
| 81 |
+ click.echo("Error: Failed pushing empty message.", err=True)
|
|
| 83 | 82 |
|
| 84 | 83 |
|
| 85 | 84 |
@cli.command('upload-files', short_help="Upload files to the CAS server.")
|
| 86 |
-@click.argument('files', nargs=-1, type=click.File('rb'), required=True)
|
|
| 85 |
+@click.argument('files', nargs=-1, type=click.Path(exists=True, dir_okay=False), required=True)
|
|
| 87 | 86 |
@pass_context
|
| 88 | 87 |
def upload_files(context, files):
|
| 89 |
- stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel)
|
|
| 88 |
+ sent_digests, files_map = [], {}
|
|
| 89 |
+ with upload(context.channel, instance=context.instance_name) as uploader:
|
|
| 90 |
+ for file_path in files:
|
|
| 91 |
+ context.logger.debug("Queueing {}".format(file_path))
|
|
| 90 | 92 |
|
| 91 |
- requests = []
|
|
| 92 |
- for file in files:
|
|
| 93 |
- chunk = file.read()
|
|
| 94 |
- requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
|
|
| 95 |
- digest=create_digest(chunk), data=chunk))
|
|
| 93 |
+ file_digest = uploader.upload_file(file_path, queue=True)
|
|
| 96 | 94 |
|
| 97 |
- request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name,
|
|
| 98 |
- requests=requests)
|
|
| 95 |
+ files_map[file_digest.hash] = file_path
|
|
| 96 |
+ sent_digests.append(file_digest)
|
|
| 99 | 97 |
|
| 100 |
- context.logger.info("Sending: {}".format(request))
|
|
| 101 |
- response = stub.BatchUpdateBlobs(request)
|
|
| 102 |
- context.logger.info("Response: {}".format(response))
|
|
| 98 |
+ for file_digest in sent_digests:
|
|
| 99 |
+ file_path = files_map[file_digest.hash]
|
|
| 100 |
+ if os.path.isabs(file_path):
|
|
| 101 |
+ file_path = os.path.relpath(file_path)
|
|
| 102 |
+ if file_digest.ByteSize():
|
|
| 103 |
+ click.echo('Success: Pushed "{}" with digest "{}/{}"'
|
|
| 104 |
+ .format(file_path, file_digest.hash, file_digest.size_bytes))
|
|
| 105 |
+ else:
|
|
| 106 |
+ click.echo('Error: Failed to push "{}"'.format(file_path), err=True)
|
|
| 103 | 107 |
|
| 104 | 108 |
|
| 105 | 109 |
@cli.command('upload-dir', short_help="Upload a directory to the CAS server.")
|
| 106 |
-@click.argument('directory', nargs=1, type=click.Path(), required=True)
|
|
| 110 |
+@click.argument('directory', nargs=1, type=click.Path(exists=True, file_okay=False), required=True)
|
|
| 107 | 111 |
@pass_context
|
| 108 | 112 |
def upload_dir(context, directory):
|
| 109 |
- context.logger.info("Uploading directory to cas")
|
|
| 110 |
- stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel)
|
|
| 111 |
- |
|
| 112 |
- requests = []
|
|
| 113 |
- |
|
| 114 |
- for chunk, file_digest in merkle_maker(directory):
|
|
| 115 |
- requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
|
|
| 116 |
- digest=file_digest, data=chunk))
|
|
| 117 |
- |
|
| 118 |
- request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name,
|
|
| 119 |
- requests=requests)
|
|
| 120 |
- |
|
| 121 |
- context.logger.info("Request:\n{}".format(request))
|
|
| 122 |
- response = stub.BatchUpdateBlobs(request)
|
|
| 123 |
- context.logger.info("Response:\n{}".format(response))
|
|
| 113 |
+ sent_digests, nodes_map = [], {}
|
|
| 114 |
+ with upload(context.channel, instance=context.instance_name) as uploader:
|
|
| 115 |
+ for node, blob, path in merkle_tree_maker(directory):
|
|
| 116 |
+ context.logger.debug("Queueing {}".format(path))
|
|
| 117 |
+ |
|
| 118 |
+ node_digest = uploader.put_blob(blob, digest=node.digest, queue=True)
|
|
| 119 |
+ |
|
| 120 |
+ nodes_map[node.digest.hash] = path
|
|
| 121 |
+ sent_digests.append(node_digest)
|
|
| 122 |
+ |
|
| 123 |
+ for node_digest in sent_digests:
|
|
| 124 |
+ node_path = nodes_map[node_digest.hash]
|
|
| 125 |
+ if os.path.isabs(node_path):
|
|
| 126 |
+ node_path = os.path.relpath(node_path, start=directory)
|
|
| 127 |
+ if node_digest.ByteSize():
|
|
| 128 |
+ click.echo('Success: Pushed "{}" with digest "{}/{}"'
|
|
| 129 |
+ .format(node_path, node_digest.hash, node_digest.size_bytes))
|
|
| 130 |
+ else:
|
|
| 131 |
+ click.echo('Error: Failed to push "{}"'.format(node_path), err=True)
|
| ... | ... | @@ -15,7 +15,6 @@ |
| 15 | 15 |
|
| 16 | 16 |
from operator import attrgetter
|
| 17 | 17 |
import os
|
| 18 |
-import uuid
|
|
| 19 | 18 |
|
| 20 | 19 |
from buildgrid.settings import HASH
|
| 21 | 20 |
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
| ... | ... | @@ -34,32 +33,6 @@ def gen_fetch_blob(stub, digest, instance_name=""): |
| 34 | 33 |
yield response.data
|
| 35 | 34 |
|
| 36 | 35 |
|
| 37 |
-def gen_write_request_blob(digest_bytes, digest, instance_name=""):
|
|
| 38 |
- """ Generates a bytestream write request
|
|
| 39 |
- """
|
|
| 40 |
- resource_name = os.path.join(instance_name, 'uploads', str(uuid.uuid4()),
|
|
| 41 |
- 'blobs', digest.hash, str(digest.size_bytes))
|
|
| 42 |
- |
|
| 43 |
- offset = 0
|
|
| 44 |
- finished = False
|
|
| 45 |
- remaining = digest.size_bytes
|
|
| 46 |
- |
|
| 47 |
- while not finished:
|
|
| 48 |
- chunk_size = min(remaining, 64 * 1024)
|
|
| 49 |
- remaining -= chunk_size
|
|
| 50 |
- finished = remaining <= 0
|
|
| 51 |
- |
|
| 52 |
- request = bytestream_pb2.WriteRequest()
|
|
| 53 |
- request.resource_name = resource_name
|
|
| 54 |
- request.write_offset = offset
|
|
| 55 |
- request.data = digest_bytes.read(chunk_size)
|
|
| 56 |
- request.finish_write = finished
|
|
| 57 |
- |
|
| 58 |
- yield request
|
|
| 59 |
- |
|
| 60 |
- offset += chunk_size
|
|
| 61 |
- |
|
| 62 |
- |
|
| 63 | 36 |
def write_fetch_directory(root_directory, stub, digest, instance_name=None):
|
| 64 | 37 |
"""Locally replicates a directory from CAS.
|
| 65 | 38 |
|
| ... | ... | @@ -89,6 +89,7 @@ tests_require = [ |
| 89 | 89 |
'coverage == 4.4.0',
|
| 90 | 90 |
'moto',
|
| 91 | 91 |
'pep8',
|
| 92 |
+ 'psutil',
|
|
| 92 | 93 |
'pytest == 3.6.4',
|
| 93 | 94 |
'pytest-cov >= 2.6.0',
|
| 94 | 95 |
'pytest-pep8',
|
| 1 |
+#include <iostream>
|
|
| 2 |
+ |
|
| 3 |
+int main()
|
|
| 4 |
+{
|
|
| 5 |
+ std::cout << "Hello, World!" << std::endl;
|
|
| 6 |
+ return 0;
|
|
| 7 |
+}
|
| 1 |
+#include <stdio.h>
|
|
| 2 |
+ |
|
| 3 |
+#include "hello.h"
|
|
| 4 |
+ |
|
| 5 |
+int main()
|
|
| 6 |
+{
|
|
| 7 |
+ printf("%s\n", HELLO_WORLD);
|
|
| 8 |
+ return 0;
|
|
| 9 |
+}
|
| 1 |
+#define HELLO_WORLD "Hello, World!"
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+# pylint: disable=redefined-outer-name
|
|
| 16 |
+ |
|
| 17 |
+import os
|
|
| 18 |
+ |
|
| 19 |
+import grpc
|
|
| 20 |
+import pytest
|
|
| 21 |
+ |
|
| 22 |
+from buildgrid.client.cas import upload
|
|
| 23 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 24 |
+from buildgrid.utils import create_digest
|
|
| 25 |
+ |
|
| 26 |
+from ..utils.cas import serve_cas, run_in_subprocess
|
|
| 27 |
+ |
|
| 28 |
+ |
|
| 29 |
+INTANCES = ['', 'instance']
|
|
| 30 |
+BLOBS = [(b'',), (b'test-string',), (b'test', b'string')]
|
|
| 31 |
+MESSAGES = [
|
|
| 32 |
+ (remote_execution_pb2.Directory(),),
|
|
| 33 |
+ (remote_execution_pb2.SymlinkNode(name='name', target='target'),),
|
|
| 34 |
+ (remote_execution_pb2.Action(do_not_cache=True),
|
|
| 35 |
+ remote_execution_pb2.ActionResult(exit_code=12))
|
|
| 36 |
+]
|
|
| 37 |
+DATA_DIR = os.path.join(
|
|
| 38 |
+ os.path.dirname(os.path.realpath(__file__)), 'data')
|
|
| 39 |
+FILES = [
|
|
| 40 |
+ (os.path.join(DATA_DIR, 'void'),),
|
|
| 41 |
+ (os.path.join(DATA_DIR, 'hello.cc'),),
|
|
| 42 |
+ (os.path.join(DATA_DIR, 'hello', 'hello.c'),
|
|
| 43 |
+ os.path.join(DATA_DIR, 'hello', 'hello.h'))]
|
|
| 44 |
+DIRECTORIES = [
|
|
| 45 |
+ (os.path.join(DATA_DIR, 'hello'),),
|
|
| 46 |
+ (os.path.join(DATA_DIR, 'hello'), DATA_DIR)]
|
|
| 47 |
+ |
|
| 48 |
+ |
|
| 49 |
+@pytest.mark.parametrize('blobs', BLOBS)
|
|
| 50 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
| 51 |
+def test_upload_blob(instance, blobs):
|
|
| 52 |
+ # Actual test function, to be run in a subprocess:
|
|
| 53 |
+ def __test_upload_blob(queue, remote, instance, blobs):
|
|
| 54 |
+ # Open a channel to the remote CAS server:
|
|
| 55 |
+ channel = grpc.insecure_channel(remote)
|
|
| 56 |
+ |
|
| 57 |
+ digests = []
|
|
| 58 |
+ with upload(channel, instance) as uploader:
|
|
| 59 |
+ if len(blobs) > 1:
|
|
| 60 |
+ for blob in blobs:
|
|
| 61 |
+ digest = uploader.put_blob(blob, queue=True)
|
|
| 62 |
+ digests.append(digest.SerializeToString())
|
|
| 63 |
+ else:
|
|
| 64 |
+ digest = uploader.put_blob(blobs[0], queue=False)
|
|
| 65 |
+ digests.append(digest.SerializeToString())
|
|
| 66 |
+ |
|
| 67 |
+ queue.put(digests)
|
|
| 68 |
+ |
|
| 69 |
+ # Start a minimal CAS server in a subprocess:
|
|
| 70 |
+ with serve_cas([instance]) as server:
|
|
| 71 |
+ digests = run_in_subprocess(__test_upload_blob,
|
|
| 72 |
+ server.remote, instance, blobs)
|
|
| 73 |
+ |
|
| 74 |
+ for blob, digest_blob in zip(blobs, digests):
|
|
| 75 |
+ digest = remote_execution_pb2.Digest()
|
|
| 76 |
+ digest.ParseFromString(digest_blob)
|
|
| 77 |
+ |
|
| 78 |
+ assert server.has(digest)
|
|
| 79 |
+ assert server.compare_blobs(digest, blob)
|
|
| 80 |
+ |
|
| 81 |
+ |
|
| 82 |
+@pytest.mark.parametrize('messages', MESSAGES)
|
|
| 83 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
| 84 |
+def test_upload_message(instance, messages):
|
|
| 85 |
+ # Actual test function, to be run in a subprocess:
|
|
| 86 |
+ def __test_upload_message(queue, remote, instance, messages):
|
|
| 87 |
+ # Open a channel to the remote CAS server:
|
|
| 88 |
+ channel = grpc.insecure_channel(remote)
|
|
| 89 |
+ |
|
| 90 |
+ digests = []
|
|
| 91 |
+ with upload(channel, instance) as uploader:
|
|
| 92 |
+ if len(messages) > 1:
|
|
| 93 |
+ for message in messages:
|
|
| 94 |
+ digest = uploader.put_message(message, queue=True)
|
|
| 95 |
+ digests.append(digest.SerializeToString())
|
|
| 96 |
+ else:
|
|
| 97 |
+ digest = uploader.put_message(messages[0], queue=False)
|
|
| 98 |
+ digests.append(digest.SerializeToString())
|
|
| 99 |
+ |
|
| 100 |
+ queue.put(digests)
|
|
| 101 |
+ |
|
| 102 |
+ # Start a minimal CAS server in a subprocess:
|
|
| 103 |
+ with serve_cas([instance]) as server:
|
|
| 104 |
+ digests = run_in_subprocess(__test_upload_message,
|
|
| 105 |
+ server.remote, instance, messages)
|
|
| 106 |
+ |
|
| 107 |
+ for message, digest_blob in zip(messages, digests):
|
|
| 108 |
+ digest = remote_execution_pb2.Digest()
|
|
| 109 |
+ digest.ParseFromString(digest_blob)
|
|
| 110 |
+ |
|
| 111 |
+ assert server.has(digest)
|
|
| 112 |
+ assert server.compare_messages(digest, message)
|
|
| 113 |
+ |
|
| 114 |
+ |
|
| 115 |
+@pytest.mark.parametrize('file_paths', FILES)
|
|
| 116 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
| 117 |
+def test_upload_file(instance, file_paths):
|
|
| 118 |
+ # Actual test function, to be run in a subprocess:
|
|
| 119 |
+ def __test_upload_file(queue, remote, instance, file_paths):
|
|
| 120 |
+ # Open a channel to the remote CAS server:
|
|
| 121 |
+ channel = grpc.insecure_channel(remote)
|
|
| 122 |
+ |
|
| 123 |
+ digests = []
|
|
| 124 |
+ with upload(channel, instance) as uploader:
|
|
| 125 |
+ if len(file_paths) > 1:
|
|
| 126 |
+ for file_path in file_paths:
|
|
| 127 |
+ digest = uploader.upload_file(file_path, queue=True)
|
|
| 128 |
+ digests.append(digest.SerializeToString())
|
|
| 129 |
+ else:
|
|
| 130 |
+ digest = uploader.upload_file(file_paths[0], queue=False)
|
|
| 131 |
+ digests.append(digest.SerializeToString())
|
|
| 132 |
+ |
|
| 133 |
+ queue.put(digests)
|
|
| 134 |
+ |
|
| 135 |
+ # Start a minimal CAS server in a subprocess:
|
|
| 136 |
+ with serve_cas([instance]) as server:
|
|
| 137 |
+ digests = run_in_subprocess(__test_upload_file,
|
|
| 138 |
+ server.remote, instance, file_paths)
|
|
| 139 |
+ |
|
| 140 |
+ for file_path, digest_blob in zip(file_paths, digests):
|
|
| 141 |
+ digest = remote_execution_pb2.Digest()
|
|
| 142 |
+ digest.ParseFromString(digest_blob)
|
|
| 143 |
+ |
|
| 144 |
+ assert server.has(digest)
|
|
| 145 |
+ assert server.compare_files(digest, file_path)
|
|
| 146 |
+ |
|
| 147 |
+ |
|
| 148 |
+@pytest.mark.parametrize('directory_paths', DIRECTORIES)
|
|
| 149 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
| 150 |
+def test_upload_directory(instance, directory_paths):
|
|
| 151 |
+ # Actual test function, to be run in a subprocess:
|
|
| 152 |
+ def __test_upload_directory(queue, remote, instance, directory_paths):
|
|
| 153 |
+ # Open a channel to the remote CAS server:
|
|
| 154 |
+ channel = grpc.insecure_channel(remote)
|
|
| 155 |
+ |
|
| 156 |
+ digests = []
|
|
| 157 |
+ with upload(channel, instance) as uploader:
|
|
| 158 |
+ if len(directory_paths) > 1:
|
|
| 159 |
+ for directory_path in directory_paths:
|
|
| 160 |
+ digest = uploader.upload_directory(directory_path, queue=True)
|
|
| 161 |
+ digests.append(digest.SerializeToString())
|
|
| 162 |
+ else:
|
|
| 163 |
+ digest = uploader.upload_directory(directory_paths[0], queue=False)
|
|
| 164 |
+ digests.append(digest.SerializeToString())
|
|
| 165 |
+ |
|
| 166 |
+ queue.put(digests)
|
|
| 167 |
+ |
|
| 168 |
+ # Start a minimal CAS server in a subprocess:
|
|
| 169 |
+ with serve_cas([instance]) as server:
|
|
| 170 |
+ digests = run_in_subprocess(__test_upload_directory,
|
|
| 171 |
+ server.remote, instance, directory_paths)
|
|
| 172 |
+ |
|
| 173 |
+ for directory_path, digest_blob in zip(directory_paths, digests):
|
|
| 174 |
+ digest = remote_execution_pb2.Digest()
|
|
| 175 |
+ digest.ParseFromString(digest_blob)
|
|
| 176 |
+ |
|
| 177 |
+ assert server.compare_directories(digest, directory_path)
|
|
| 178 |
+ |
|
| 179 |
+ |
|
| 180 |
+@pytest.mark.parametrize('directory_paths', DIRECTORIES)
|
|
| 181 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
| 182 |
+def test_upload_tree(instance, directory_paths):
|
|
| 183 |
+ # Actual test function, to be run in a subprocess:
|
|
| 184 |
+ def __test_upload_tree(queue, remote, instance, directory_paths):
|
|
| 185 |
+ # Open a channel to the remote CAS server:
|
|
| 186 |
+ channel = grpc.insecure_channel(remote)
|
|
| 187 |
+ |
|
| 188 |
+ digests = []
|
|
| 189 |
+ with upload(channel, instance) as uploader:
|
|
| 190 |
+ if len(directory_paths) > 1:
|
|
| 191 |
+ for directory_path in directory_paths:
|
|
| 192 |
+ digest = uploader.upload_tree(directory_path, queue=True)
|
|
| 193 |
+ digests.append(digest.SerializeToString())
|
|
| 194 |
+ else:
|
|
| 195 |
+ digest = uploader.upload_tree(directory_paths[0], queue=False)
|
|
| 196 |
+ digests.append(digest.SerializeToString())
|
|
| 197 |
+ |
|
| 198 |
+ queue.put(digests)
|
|
| 199 |
+ |
|
| 200 |
+ # Start a minimal CAS server in a subprocess:
|
|
| 201 |
+ with serve_cas([instance]) as server:
|
|
| 202 |
+ digests = run_in_subprocess(__test_upload_tree,
|
|
| 203 |
+ server.remote, instance, directory_paths)
|
|
| 204 |
+ |
|
| 205 |
+ for directory_path, digest_blob in zip(directory_paths, digests):
|
|
| 206 |
+ digest = remote_execution_pb2.Digest()
|
|
| 207 |
+ digest.ParseFromString(digest_blob)
|
|
| 208 |
+ |
|
| 209 |
+ assert server.has(digest)
|
|
| 210 |
+ |
|
| 211 |
+ tree = remote_execution_pb2.Tree()
|
|
| 212 |
+ tree.ParseFromString(server.get(digest))
|
|
| 213 |
+ |
|
| 214 |
+ directory_digest = create_digest(tree.root.SerializeToString())
|
|
| 215 |
+ |
|
| 216 |
+ assert server.compare_directories(directory_digest, directory_path)
|
| ... | ... | @@ -19,220 +19,286 @@ |
| 19 | 19 |
|
| 20 | 20 |
import tempfile
|
| 21 | 21 |
|
| 22 |
-from unittest import mock
|
|
| 23 |
- |
|
| 24 | 22 |
import boto3
|
| 25 | 23 |
import grpc
|
| 26 |
-from grpc._server import _Context
|
|
| 27 | 24 |
import pytest
|
| 28 | 25 |
from moto import mock_s3
|
| 29 | 26 |
|
| 30 |
-from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Digest
|
|
| 31 |
-from buildgrid.server.cas import service
|
|
| 32 |
-from buildgrid.server.cas.instance import ByteStreamInstance, ContentAddressableStorageInstance
|
|
| 33 |
-from buildgrid.server.cas.storage import remote
|
|
| 27 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 28 |
+from buildgrid.server.cas.storage.remote import RemoteStorage
|
|
| 34 | 29 |
from buildgrid.server.cas.storage.lru_memory_cache import LRUMemoryCache
|
| 35 | 30 |
from buildgrid.server.cas.storage.disk import DiskStorage
|
| 36 | 31 |
from buildgrid.server.cas.storage.s3 import S3Storage
|
| 37 | 32 |
from buildgrid.server.cas.storage.with_cache import WithCacheStorage
|
| 38 | 33 |
from buildgrid.settings import HASH
|
| 39 | 34 |
|
| 35 |
+from ..utils.cas import serve_cas, run_in_subprocess
|
|
| 40 | 36 |
|
| 41 |
-context = mock.create_autospec(_Context)
|
|
| 42 |
-server = mock.create_autospec(grpc.server)
|
|
| 43 |
- |
|
| 44 |
-abc = b"abc"
|
|
| 45 |
-abc_digest = Digest(hash=HASH(abc).hexdigest(), size_bytes=3)
|
|
| 46 |
-defg = b"defg"
|
|
| 47 |
-defg_digest = Digest(hash=HASH(defg).hexdigest(), size_bytes=4)
|
|
| 48 |
-hijk = b"hijk"
|
|
| 49 |
-hijk_digest = Digest(hash=HASH(hijk).hexdigest(), size_bytes=4)
|
|
| 50 |
- |
|
| 51 |
- |
|
| 52 |
-def write(storage, digest, blob):
|
|
| 53 |
- session = storage.begin_write(digest)
|
|
| 54 |
- session.write(blob)
|
|
| 55 |
- storage.commit_write(digest, session)
|
|
| 56 |
- |
|
| 57 |
- |
|
| 58 |
-class MockCASStorage(ByteStreamInstance, ContentAddressableStorageInstance):
|
|
| 59 |
- |
|
| 60 |
- def __init__(self):
|
|
| 61 |
- storage = LRUMemoryCache(256)
|
|
| 62 |
- super().__init__(storage)
|
|
| 63 |
- |
|
| 64 |
- |
|
| 65 |
-# Mock a CAS server with LRUStorage to return "calls" made to it
|
|
| 66 |
-class MockStubServer:
|
|
| 67 |
- |
|
| 68 |
- def __init__(self):
|
|
| 69 |
- instances = {"": MockCASStorage(), "dna": MockCASStorage()}
|
|
| 70 |
- self._requests = []
|
|
| 71 |
- with mock.patch.object(service, 'bytestream_pb2_grpc'):
|
|
| 72 |
- self._bs_service = service.ByteStreamService(server)
|
|
| 73 |
- for k, v in instances.items():
|
|
| 74 |
- self._bs_service.add_instance(k, v)
|
|
| 75 |
- with mock.patch.object(service, 'remote_execution_pb2_grpc'):
|
|
| 76 |
- self._cas_service = service.ContentAddressableStorageService(server)
|
|
| 77 |
- for k, v in instances.items():
|
|
| 78 |
- self._cas_service.add_instance(k, v)
|
|
| 79 |
- |
|
| 80 |
- def Read(self, request):
|
|
| 81 |
- yield from self._bs_service.Read(request, context)
|
|
| 82 |
- |
|
| 83 |
- def Write(self, request):
|
|
| 84 |
- self._requests.append(request)
|
|
| 85 |
- if request.finish_write:
|
|
| 86 |
- response = self._bs_service.Write(self._requests, context)
|
|
| 87 |
- self._requests = []
|
|
| 88 |
- return response
|
|
| 89 |
- |
|
| 90 |
- return None
|
|
| 91 |
- |
|
| 92 |
- def FindMissingBlobs(self, request):
|
|
| 93 |
- return self._cas_service.FindMissingBlobs(request, context)
|
|
| 94 |
- |
|
| 95 |
- def BatchUpdateBlobs(self, request):
|
|
| 96 |
- return self._cas_service.BatchUpdateBlobs(request, context)
|
|
| 97 | 37 |
|
| 38 |
+BLOBS = [(b'abc', b'defg', b'hijk', b'')]
|
|
| 39 |
+BLOBS_DIGESTS = [tuple([remote_execution_pb2.Digest(hash=HASH(blob).hexdigest(),
|
|
| 40 |
+ size_bytes=len(blob)) for blob in blobs])
|
|
| 41 |
+ for blobs in BLOBS]
|
|
| 98 | 42 |
|
| 99 |
-# Instances of MockCASStorage
|
|
| 100 |
-@pytest.fixture(params=["", "dna"])
|
|
| 101 |
-def instance(params):
|
|
| 102 |
- return {params, MockCASStorage()}
|
|
| 103 | 43 |
|
| 104 |
- |
|
| 105 |
-# General tests for all storage providers
|
|
| 106 |
- |
|
| 107 |
- |
|
| 108 |
-@pytest.fixture(params=["lru", "disk", "s3", "lru_disk", "disk_s3", "remote"])
|
|
| 44 |
+@pytest.fixture(params=['lru', 'disk', 's3', 'lru_disk', 'disk_s3', 'remote'])
|
|
| 109 | 45 |
def any_storage(request):
|
| 110 |
- if request.param == "lru":
|
|
| 46 |
+ if request.param == 'lru':
|
|
| 111 | 47 |
yield LRUMemoryCache(256)
|
| 112 |
- elif request.param == "disk":
|
|
| 48 |
+ elif request.param == 'disk':
|
|
| 113 | 49 |
with tempfile.TemporaryDirectory() as path:
|
| 114 | 50 |
yield DiskStorage(path)
|
| 115 |
- elif request.param == "s3":
|
|
| 51 |
+ elif request.param == 's3':
|
|
| 116 | 52 |
with mock_s3():
|
| 117 |
- boto3.resource('s3').create_bucket(Bucket="testing")
|
|
| 118 |
- yield S3Storage("testing")
|
|
| 119 |
- elif request.param == "lru_disk":
|
|
| 53 |
+ boto3.resource('s3').create_bucket(Bucket='testing')
|
|
| 54 |
+ yield S3Storage('testing')
|
|
| 55 |
+ elif request.param == 'lru_disk':
|
|
| 120 | 56 |
# LRU cache with a uselessly small limit, so requests always fall back
|
| 121 | 57 |
with tempfile.TemporaryDirectory() as path:
|
| 122 | 58 |
yield WithCacheStorage(LRUMemoryCache(1), DiskStorage(path))
|
| 123 |
- elif request.param == "disk_s3":
|
|
| 59 |
+ elif request.param == 'disk_s3':
|
|
| 124 | 60 |
# Disk-based cache of S3, but we don't delete files, so requests
|
| 125 | 61 |
# are always handled by the cache
|
| 126 | 62 |
with tempfile.TemporaryDirectory() as path:
|
| 127 | 63 |
with mock_s3():
|
| 128 |
- boto3.resource('s3').create_bucket(Bucket="testing")
|
|
| 129 |
- yield WithCacheStorage(DiskStorage(path), S3Storage("testing"))
|
|
| 130 |
- elif request.param == "remote":
|
|
| 131 |
- with mock.patch.object(remote, 'bytestream_pb2_grpc'):
|
|
| 132 |
- with mock.patch.object(remote, 'remote_execution_pb2_grpc'):
|
|
| 133 |
- mock_server = MockStubServer()
|
|
| 134 |
- storage = remote.RemoteStorage(None, "")
|
|
| 135 |
- storage._stub_bs = mock_server
|
|
| 136 |
- storage._stub_cas = mock_server
|
|
| 137 |
- yield storage
|
|
| 138 |
- |
|
| 139 |
- |
|
| 140 |
-def test_initially_empty(any_storage):
|
|
| 141 |
- assert not any_storage.has_blob(abc_digest)
|
|
| 142 |
- assert not any_storage.has_blob(defg_digest)
|
|
| 143 |
- assert not any_storage.has_blob(hijk_digest)
|
|
| 144 |
- |
|
| 145 |
- |
|
| 146 |
-def test_basic_write_read(any_storage):
|
|
| 147 |
- assert not any_storage.has_blob(abc_digest)
|
|
| 148 |
- write(any_storage, abc_digest, abc)
|
|
| 149 |
- assert any_storage.has_blob(abc_digest)
|
|
| 150 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
| 151 |
- |
|
| 152 |
- # Try writing the same digest again (since it's valid to do that)
|
|
| 153 |
- write(any_storage, abc_digest, abc)
|
|
| 154 |
- assert any_storage.has_blob(abc_digest)
|
|
| 155 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
| 156 |
- |
|
| 157 |
- |
|
| 158 |
-def test_bulk_write_read(any_storage):
|
|
| 159 |
- missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
|
|
| 160 |
- assert len(missing_digests) == 3
|
|
| 161 |
- assert abc_digest in missing_digests
|
|
| 162 |
- assert defg_digest in missing_digests
|
|
| 163 |
- assert hijk_digest in missing_digests
|
|
| 64 |
+ boto3.resource('s3').create_bucket(Bucket='testing')
|
|
| 65 |
+ yield WithCacheStorage(DiskStorage(path), S3Storage('testing'))
|
|
| 66 |
+ elif request.param == 'remote':
|
|
| 67 |
+ with serve_cas(['testing']) as server:
|
|
| 68 |
+ yield server.remote
|
|
| 69 |
+ server = None
|
|
| 164 | 70 |
|
| 165 |
- bulk_update_results = any_storage.bulk_update_blobs([(abc_digest, abc), (defg_digest, defg),
|
|
| 166 |
- (hijk_digest, b'????')])
|
|
| 167 |
- assert len(bulk_update_results) == 3
|
|
| 168 |
- assert bulk_update_results[0].code == 0
|
|
| 169 |
- assert bulk_update_results[1].code == 0
|
|
| 170 |
- assert bulk_update_results[2].code != 0
|
|
| 171 |
- |
|
| 172 |
- missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
|
|
| 173 |
- assert missing_digests == [hijk_digest]
|
|
| 174 |
- |
|
| 175 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
| 176 |
- assert any_storage.get_blob(defg_digest).read() == defg
|
|
| 177 |
- |
|
| 178 |
- |
|
| 179 |
-def test_nonexistent_read(any_storage):
|
|
| 180 |
- assert any_storage.get_blob(abc_digest) is None
|
|
| 181 | 71 |
|
| 72 |
+def write(storage, digest, blob):
|
|
| 73 |
+ session = storage.begin_write(digest)
|
|
| 74 |
+ session.write(blob)
|
|
| 75 |
+ storage.commit_write(digest, session)
|
|
| 182 | 76 |
|
| 183 |
-# Tests for special behavior of individual storage providers
|
|
| 184 | 77 |
|
| 78 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
| 79 |
+def test_initially_empty(any_storage, blobs_digests):
|
|
| 80 |
+ _, digests = blobs_digests
|
|
| 81 |
+ |
|
| 82 |
+ # Actual test function, failing on assertions:
|
|
| 83 |
+ def __test_initially_empty(any_storage, digests):
|
|
| 84 |
+ for digest in digests:
|
|
| 85 |
+ assert not any_storage.has_blob(digest)
|
|
| 86 |
+ |
|
| 87 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
| 88 |
+ def __test_remote_initially_empty(queue, remote, serialized_digests):
|
|
| 89 |
+ channel = grpc.insecure_channel(remote)
|
|
| 90 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
| 91 |
+ digests = []
|
|
| 92 |
+ |
|
| 93 |
+ for data in serialized_digests:
|
|
| 94 |
+ digest = remote_execution_pb2.Digest()
|
|
| 95 |
+ digest.ParseFromString(data)
|
|
| 96 |
+ digests.append(digest)
|
|
| 97 |
+ |
|
| 98 |
+ try:
|
|
| 99 |
+ __test_initially_empty(remote_storage, digests)
|
|
| 100 |
+ except AssertionError:
|
|
| 101 |
+ queue.put(False)
|
|
| 102 |
+ else:
|
|
| 103 |
+ queue.put(True)
|
|
| 104 |
+ |
|
| 105 |
+ if isinstance(any_storage, str):
|
|
| 106 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
| 107 |
+ assert run_in_subprocess(__test_remote_initially_empty,
|
|
| 108 |
+ any_storage, serialized_digests)
|
|
| 109 |
+ else:
|
|
| 110 |
+ __test_initially_empty(any_storage, digests)
|
|
| 111 |
+ |
|
| 112 |
+ |
|
| 113 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
| 114 |
+def test_basic_write_read(any_storage, blobs_digests):
|
|
| 115 |
+ blobs, digests = blobs_digests
|
|
| 116 |
+ |
|
| 117 |
+ # Actual test function, failing on assertions:
|
|
| 118 |
+ def __test_basic_write_read(any_storage, blobs, digests):
|
|
| 119 |
+ for blob, digest in zip(blobs, digests):
|
|
| 120 |
+ assert not any_storage.has_blob(digest)
|
|
| 121 |
+ write(any_storage, digest, blob)
|
|
| 122 |
+ assert any_storage.has_blob(digest)
|
|
| 123 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
| 124 |
+ |
|
| 125 |
+ # Try writing the same digest again (since it's valid to do that)
|
|
| 126 |
+ write(any_storage, digest, blob)
|
|
| 127 |
+ assert any_storage.has_blob(digest)
|
|
| 128 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
| 129 |
+ |
|
| 130 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
| 131 |
+ def __test_remote_basic_write_read(queue, remote, blobs, serialized_digests):
|
|
| 132 |
+ channel = grpc.insecure_channel(remote)
|
|
| 133 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
| 134 |
+ digests = []
|
|
| 135 |
+ |
|
| 136 |
+ for data in serialized_digests:
|
|
| 137 |
+ digest = remote_execution_pb2.Digest()
|
|
| 138 |
+ digest.ParseFromString(data)
|
|
| 139 |
+ digests.append(digest)
|
|
| 140 |
+ |
|
| 141 |
+ try:
|
|
| 142 |
+ __test_basic_write_read(remote_storage, blobs, digests)
|
|
| 143 |
+ except AssertionError:
|
|
| 144 |
+ queue.put(False)
|
|
| 145 |
+ else:
|
|
| 146 |
+ queue.put(True)
|
|
| 147 |
+ |
|
| 148 |
+ if isinstance(any_storage, str):
|
|
| 149 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
| 150 |
+ assert run_in_subprocess(__test_remote_basic_write_read,
|
|
| 151 |
+ any_storage, blobs, serialized_digests)
|
|
| 152 |
+ else:
|
|
| 153 |
+ __test_basic_write_read(any_storage, blobs, digests)
|
|
| 154 |
+ |
|
| 155 |
+ |
|
| 156 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
| 157 |
+def test_bulk_write_read(any_storage, blobs_digests):
|
|
| 158 |
+ blobs, digests = blobs_digests
|
|
| 159 |
+ |
|
| 160 |
+ # Actual test function, failing on assertions:
|
|
| 161 |
+ def __test_bulk_write_read(any_storage, blobs, digests):
|
|
| 162 |
+ missing_digests = any_storage.missing_blobs(digests)
|
|
| 163 |
+ assert len(missing_digests) == len(digests)
|
|
| 164 |
+ for digest in digests:
|
|
| 165 |
+ assert digest in missing_digests
|
|
| 166 |
+ |
|
| 167 |
+ faulty_blobs = list(blobs)
|
|
| 168 |
+ faulty_blobs[-1] = b'this-is-not-matching'
|
|
| 169 |
+ |
|
| 170 |
+ results = any_storage.bulk_update_blobs(list(zip(digests, faulty_blobs)))
|
|
| 171 |
+ assert len(results) == len(digests)
|
|
| 172 |
+ for result, blob, digest in zip(results[:-1], faulty_blobs[:-1], digests[:-1]):
|
|
| 173 |
+ assert result.code == 0
|
|
| 174 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
| 175 |
+ assert results[-1].code != 0
|
|
| 176 |
+ |
|
| 177 |
+ missing_digests = any_storage.missing_blobs(digests)
|
|
| 178 |
+ assert len(missing_digests) == 1
|
|
| 179 |
+ assert missing_digests[0] == digests[-1]
|
|
| 180 |
+ |
|
| 181 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
| 182 |
+ def __test_remote_bulk_write_read(queue, remote, blobs, serialized_digests):
|
|
| 183 |
+ channel = grpc.insecure_channel(remote)
|
|
| 184 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
| 185 |
+ digests = []
|
|
| 186 |
+ |
|
| 187 |
+ for data in serialized_digests:
|
|
| 188 |
+ digest = remote_execution_pb2.Digest()
|
|
| 189 |
+ digest.ParseFromString(data)
|
|
| 190 |
+ digests.append(digest)
|
|
| 191 |
+ |
|
| 192 |
+ try:
|
|
| 193 |
+ __test_bulk_write_read(remote_storage, blobs, digests)
|
|
| 194 |
+ except AssertionError:
|
|
| 195 |
+ queue.put(False)
|
|
| 196 |
+ else:
|
|
| 197 |
+ queue.put(True)
|
|
| 198 |
+ |
|
| 199 |
+ if isinstance(any_storage, str):
|
|
| 200 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
| 201 |
+ assert run_in_subprocess(__test_remote_bulk_write_read,
|
|
| 202 |
+ any_storage, blobs, serialized_digests)
|
|
| 203 |
+ else:
|
|
| 204 |
+ __test_bulk_write_read(any_storage, blobs, digests)
|
|
| 205 |
+ |
|
| 206 |
+ |
|
| 207 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
| 208 |
+def test_nonexistent_read(any_storage, blobs_digests):
|
|
| 209 |
+ _, digests = blobs_digests
|
|
| 210 |
+ |
|
| 211 |
+ # Actual test function, failing on assertions:
|
|
| 212 |
+ def __test_nonexistent_read(any_storage, digests):
|
|
| 213 |
+ for digest in digests:
|
|
| 214 |
+ assert any_storage.get_blob(digest) is None
|
|
| 215 |
+ |
|
| 216 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
| 217 |
+ def __test_remote_nonexistent_read(queue, remote, serialized_digests):
|
|
| 218 |
+ channel = grpc.insecure_channel(remote)
|
|
| 219 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
| 220 |
+ digests = []
|
|
| 221 |
+ |
|
| 222 |
+ for data in serialized_digests:
|
|
| 223 |
+ digest = remote_execution_pb2.Digest()
|
|
| 224 |
+ digest.ParseFromString(data)
|
|
| 225 |
+ digests.append(digest)
|
|
| 226 |
+ |
|
| 227 |
+ try:
|
|
| 228 |
+ __test_nonexistent_read(remote_storage, digests)
|
|
| 229 |
+ except AssertionError:
|
|
| 230 |
+ queue.put(False)
|
|
| 231 |
+ else:
|
|
| 232 |
+ queue.put(True)
|
|
| 233 |
+ |
|
| 234 |
+ if isinstance(any_storage, str):
|
|
| 235 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
| 236 |
+ assert run_in_subprocess(__test_remote_nonexistent_read,
|
|
| 237 |
+ any_storage, serialized_digests)
|
|
| 238 |
+ else:
|
|
| 239 |
+ __test_nonexistent_read(any_storage, digests)
|
|
| 240 |
+ |
|
| 241 |
+ |
|
| 242 |
+@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
|
|
| 243 |
+def test_lru_eviction(blobs_digests):
|
|
| 244 |
+ blobs, digests = blobs_digests
|
|
| 245 |
+ blob1, blob2, blob3, *_ = blobs
|
|
| 246 |
+ digest1, digest2, digest3, *_ = digests
|
|
| 185 | 247 |
|
| 186 |
-def test_lru_eviction():
|
|
| 187 | 248 |
lru = LRUMemoryCache(8)
|
| 188 |
- write(lru, abc_digest, abc)
|
|
| 189 |
- write(lru, defg_digest, defg)
|
|
| 190 |
- assert lru.has_blob(abc_digest)
|
|
| 191 |
- assert lru.has_blob(defg_digest)
|
|
| 192 |
- |
|
| 193 |
- write(lru, hijk_digest, hijk)
|
|
| 194 |
- # Check that the LRU evicted abc (it was written first)
|
|
| 195 |
- assert not lru.has_blob(abc_digest)
|
|
| 196 |
- assert lru.has_blob(defg_digest)
|
|
| 197 |
- assert lru.has_blob(hijk_digest)
|
|
| 198 |
- |
|
| 199 |
- assert lru.get_blob(defg_digest).read() == defg
|
|
| 200 |
- write(lru, abc_digest, abc)
|
|
| 201 |
- # Check that the LRU evicted hijk (since we just read defg)
|
|
| 202 |
- assert lru.has_blob(abc_digest)
|
|
| 203 |
- assert lru.has_blob(defg_digest)
|
|
| 204 |
- assert not lru.has_blob(hijk_digest)
|
|
| 205 |
- |
|
| 206 |
- assert lru.has_blob(defg_digest)
|
|
| 207 |
- write(lru, hijk_digest, abc)
|
|
| 208 |
- # Check that the LRU evicted abc (since we just checked hijk)
|
|
| 209 |
- assert not lru.has_blob(abc_digest)
|
|
| 210 |
- assert lru.has_blob(defg_digest)
|
|
| 211 |
- assert lru.has_blob(hijk_digest)
|
|
| 212 |
- |
|
| 213 |
- |
|
| 214 |
-def test_with_cache():
|
|
| 249 |
+ write(lru, digest1, blob1)
|
|
| 250 |
+ write(lru, digest2, blob2)
|
|
| 251 |
+ assert lru.has_blob(digest1)
|
|
| 252 |
+ assert lru.has_blob(digest2)
|
|
| 253 |
+ |
|
| 254 |
+ write(lru, digest3, blob3)
|
|
| 255 |
+ # Check that the LRU evicted blob1 (it was written first)
|
|
| 256 |
+ assert not lru.has_blob(digest1)
|
|
| 257 |
+ assert lru.has_blob(digest2)
|
|
| 258 |
+ assert lru.has_blob(digest3)
|
|
| 259 |
+ |
|
| 260 |
+ assert lru.get_blob(digest2).read() == blob2
|
|
| 261 |
+ write(lru, digest1, blob1)
|
|
| 262 |
+ # Check that the LRU evicted blob3 (since we just read blob2)
|
|
| 263 |
+ assert lru.has_blob(digest1)
|
|
| 264 |
+ assert lru.has_blob(digest2)
|
|
| 265 |
+ assert not lru.has_blob(digest3)
|
|
| 266 |
+ |
|
| 267 |
+ assert lru.has_blob(digest2)
|
|
| 268 |
+ write(lru, digest3, blob1)
|
|
| 269 |
+ # Check that the LRU evicted blob1 (since we just checked blob3)
|
|
| 270 |
+ assert not lru.has_blob(digest1)
|
|
| 271 |
+ assert lru.has_blob(digest2)
|
|
| 272 |
+ assert lru.has_blob(digest3)
|
|
| 273 |
+ |
|
| 274 |
+ |
|
| 275 |
+@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
|
|
| 276 |
+def test_with_cache(blobs_digests):
|
|
| 277 |
+ blobs, digests = blobs_digests
|
|
| 278 |
+ blob1, blob2, blob3, *_ = blobs
|
|
| 279 |
+ digest1, digest2, digest3, *_ = digests
|
|
| 280 |
+ |
|
| 215 | 281 |
cache = LRUMemoryCache(256)
|
| 216 | 282 |
fallback = LRUMemoryCache(256)
|
| 217 | 283 |
with_cache_storage = WithCacheStorage(cache, fallback)
|
| 218 | 284 |
|
| 219 |
- assert not with_cache_storage.has_blob(abc_digest)
|
|
| 220 |
- write(with_cache_storage, abc_digest, abc)
|
|
| 221 |
- assert cache.has_blob(abc_digest)
|
|
| 222 |
- assert fallback.has_blob(abc_digest)
|
|
| 223 |
- assert with_cache_storage.get_blob(abc_digest).read() == abc
|
|
| 285 |
+ assert not with_cache_storage.has_blob(digest1)
|
|
| 286 |
+ write(with_cache_storage, digest1, blob1)
|
|
| 287 |
+ assert cache.has_blob(digest1)
|
|
| 288 |
+ assert fallback.has_blob(digest1)
|
|
| 289 |
+ assert with_cache_storage.get_blob(digest1).read() == blob1
|
|
| 224 | 290 |
|
| 225 | 291 |
# Even if a blob is in cache, we still need to check if the fallback
|
| 226 | 292 |
# has it.
|
| 227 |
- write(cache, defg_digest, defg)
|
|
| 228 |
- assert not with_cache_storage.has_blob(defg_digest)
|
|
| 229 |
- write(fallback, defg_digest, defg)
|
|
| 230 |
- assert with_cache_storage.has_blob(defg_digest)
|
|
| 293 |
+ write(cache, digest2, blob2)
|
|
| 294 |
+ assert not with_cache_storage.has_blob(digest2)
|
|
| 295 |
+ write(fallback, digest2, blob2)
|
|
| 296 |
+ assert with_cache_storage.has_blob(digest2)
|
|
| 231 | 297 |
|
| 232 | 298 |
# When a blob is in the fallback but not the cache, reading it should
|
| 233 | 299 |
# put it into the cache.
|
| 234 |
- write(fallback, hijk_digest, hijk)
|
|
| 235 |
- assert with_cache_storage.get_blob(hijk_digest).read() == hijk
|
|
| 236 |
- assert cache.has_blob(hijk_digest)
|
|
| 237 |
- assert cache.get_blob(hijk_digest).read() == hijk
|
|
| 238 |
- assert cache.has_blob(hijk_digest)
|
|
| 300 |
+ write(fallback, digest3, blob3)
|
|
| 301 |
+ assert with_cache_storage.get_blob(digest3).read() == blob3
|
|
| 302 |
+ assert cache.has_blob(digest3)
|
|
| 303 |
+ assert cache.get_blob(digest3).read() == blob3
|
|
| 304 |
+ assert cache.has_blob(digest3)
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+ |
|
| 16 |
+from concurrent import futures
|
|
| 17 |
+from contextlib import contextmanager
|
|
| 18 |
+import multiprocessing
|
|
| 19 |
+import os
|
|
| 20 |
+import signal
|
|
| 21 |
+import tempfile
|
|
| 22 |
+ |
|
| 23 |
+import grpc
|
|
| 24 |
+import psutil
|
|
| 25 |
+import pytest_cov
|
|
| 26 |
+ |
|
| 27 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 28 |
+from buildgrid.server.cas.service import ByteStreamService
|
|
| 29 |
+from buildgrid.server.cas.service import ContentAddressableStorageService
|
|
| 30 |
+from buildgrid.server.cas.instance import ByteStreamInstance
|
|
| 31 |
+from buildgrid.server.cas.instance import ContentAddressableStorageInstance
|
|
| 32 |
+from buildgrid.server.cas.storage.disk import DiskStorage
|
|
| 33 |
+ |
|
| 34 |
+ |
|
| 35 |
+@contextmanager
|
|
| 36 |
+def serve_cas(instances):
|
|
| 37 |
+ server = Server(instances)
|
|
| 38 |
+ try:
|
|
| 39 |
+ yield server
|
|
| 40 |
+ finally:
|
|
| 41 |
+ server.quit()
|
|
| 42 |
+ |
|
| 43 |
+ |
|
| 44 |
+def kill_process_tree(pid):
|
|
| 45 |
+ proc = psutil.Process(pid)
|
|
| 46 |
+ children = proc.children(recursive=True)
|
|
| 47 |
+ |
|
| 48 |
+ def kill_proc(p):
|
|
| 49 |
+ try:
|
|
| 50 |
+ p.kill()
|
|
| 51 |
+ except psutil.AccessDenied:
|
|
| 52 |
+ # Ignore this error, it can happen with
|
|
| 53 |
+ # some setuid bwrap processes.
|
|
| 54 |
+ pass
|
|
| 55 |
+ |
|
| 56 |
+ # Bloody Murder
|
|
| 57 |
+ for child in children:
|
|
| 58 |
+ kill_proc(child)
|
|
| 59 |
+ kill_proc(proc)
|
|
| 60 |
+ |
|
| 61 |
+ |
|
| 62 |
+def run_in_subprocess(function, *arguments):
|
|
| 63 |
+ queue = multiprocessing.Queue()
|
|
| 64 |
+ # Use subprocess to avoid creation of gRPC threads in main process
|
|
| 65 |
+ # See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
|
|
| 66 |
+ process = multiprocessing.Process(target=function,
|
|
| 67 |
+ args=(queue, *arguments))
|
|
| 68 |
+ |
|
| 69 |
+ try:
|
|
| 70 |
+ process.start()
|
|
| 71 |
+ |
|
| 72 |
+ result = queue.get()
|
|
| 73 |
+ process.join()
|
|
| 74 |
+ except KeyboardInterrupt:
|
|
| 75 |
+ kill_process_tree(process.pid)
|
|
| 76 |
+ raise
|
|
| 77 |
+ |
|
| 78 |
+ return result
|
|
| 79 |
+ |
|
| 80 |
+ |
|
| 81 |
+class Server:
|
|
| 82 |
+ |
|
| 83 |
+ def __init__(self, instances):
|
|
| 84 |
+ |
|
| 85 |
+ self.instances = instances
|
|
| 86 |
+ |
|
| 87 |
+ self.__storage_path = tempfile.TemporaryDirectory()
|
|
| 88 |
+ self.__storage = DiskStorage(self.__storage_path.name)
|
|
| 89 |
+ |
|
| 90 |
+ self.__queue = multiprocessing.Queue()
|
|
| 91 |
+ self.__process = multiprocessing.Process(
|
|
| 92 |
+ target=Server.serve,
|
|
| 93 |
+ args=(self.__queue, self.instances, self.__storage_path.name))
|
|
| 94 |
+ self.__process.start()
|
|
| 95 |
+ |
|
| 96 |
+ self.port = self.__queue.get()
|
|
| 97 |
+ self.remote = 'localhost:{}'.format(self.port)
|
|
| 98 |
+ |
|
| 99 |
+ @classmethod
|
|
| 100 |
+ def serve(cls, queue, instances, storage_path):
|
|
| 101 |
+ pytest_cov.embed.cleanup_on_sigterm()
|
|
| 102 |
+ |
|
| 103 |
+ # Use max_workers default from Python 3.5+
|
|
| 104 |
+ max_workers = (os.cpu_count() or 1) * 5
|
|
| 105 |
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers))
|
|
| 106 |
+ port = server.add_insecure_port('localhost:0')
|
|
| 107 |
+ |
|
| 108 |
+ storage = DiskStorage(storage_path)
|
|
| 109 |
+ |
|
| 110 |
+ bs_service = ByteStreamService(server)
|
|
| 111 |
+ cas_service = ContentAddressableStorageService(server)
|
|
| 112 |
+ for name in instances:
|
|
| 113 |
+ bs_service.add_instance(name, ByteStreamInstance(storage))
|
|
| 114 |
+ cas_service.add_instance(name, ContentAddressableStorageInstance(storage))
|
|
| 115 |
+ |
|
| 116 |
+ server.start()
|
|
| 117 |
+ queue.put(port)
|
|
| 118 |
+ |
|
| 119 |
+ signal.pause()
|
|
| 120 |
+ |
|
| 121 |
+ def has(self, digest):
|
|
| 122 |
+ return self.__storage.has_blob(digest)
|
|
| 123 |
+ |
|
| 124 |
+ def get(self, digest):
|
|
| 125 |
+ return self.__storage.get_blob(digest).read()
|
|
| 126 |
+ |
|
| 127 |
+ def compare_blobs(self, digest, blob):
|
|
| 128 |
+ if not self.__storage.has_blob(digest):
|
|
| 129 |
+ return False
|
|
| 130 |
+ |
|
| 131 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
| 132 |
+ stored_blob = stored_blob.read()
|
|
| 133 |
+ |
|
| 134 |
+ return blob == stored_blob
|
|
| 135 |
+ |
|
| 136 |
+ def compare_messages(self, digest, message):
|
|
| 137 |
+ if not self.__storage.has_blob(digest):
|
|
| 138 |
+ return False
|
|
| 139 |
+ |
|
| 140 |
+ message_blob = message.SerializeToString()
|
|
| 141 |
+ |
|
| 142 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
| 143 |
+ stored_blob = stored_blob.read()
|
|
| 144 |
+ |
|
| 145 |
+ return message_blob == stored_blob
|
|
| 146 |
+ |
|
| 147 |
+ def compare_files(self, digest, file_path):
|
|
| 148 |
+ if not self.__storage.has_blob(digest):
|
|
| 149 |
+ return False
|
|
| 150 |
+ |
|
| 151 |
+ with open(file_path, 'rb') as file_bytes:
|
|
| 152 |
+ file_blob = file_bytes.read()
|
|
| 153 |
+ |
|
| 154 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
| 155 |
+ stored_blob = stored_blob.read()
|
|
| 156 |
+ |
|
| 157 |
+ return file_blob == stored_blob
|
|
| 158 |
+ |
|
| 159 |
+ def compare_directories(self, digest, directory_path):
|
|
| 160 |
+ if not self.__storage.has_blob(digest):
|
|
| 161 |
+ return False
|
|
| 162 |
+ elif not os.path.isdir(directory_path):
|
|
| 163 |
+ return False
|
|
| 164 |
+ |
|
| 165 |
+ def __compare_folders(digest, path):
|
|
| 166 |
+ directory = remote_execution_pb2.Directory()
|
|
| 167 |
+ directory.ParseFromString(self.__storage.get_blob(digest).read())
|
|
| 168 |
+ |
|
| 169 |
+ files, directories, symlinks = [], [], []
|
|
| 170 |
+ for entry in os.scandir(path):
|
|
| 171 |
+ if entry.is_file(follow_symlinks=False):
|
|
| 172 |
+ files.append(entry.name)
|
|
| 173 |
+ |
|
| 174 |
+ elif entry.is_dir(follow_symlinks=False):
|
|
| 175 |
+ directories.append(entry.name)
|
|
| 176 |
+ |
|
| 177 |
+ elif os.path.islink(entry.path):
|
|
| 178 |
+ symlinks.append(entry.name)
|
|
| 179 |
+ |
|
| 180 |
+ assert len(files) == len(directory.files)
|
|
| 181 |
+ assert len(directories) == len(directory.directories)
|
|
| 182 |
+ assert len(symlinks) == len(directory.symlinks)
|
|
| 183 |
+ |
|
| 184 |
+ for file_node in directory.files:
|
|
| 185 |
+ file_path = os.path.join(path, file_node.name)
|
|
| 186 |
+ |
|
| 187 |
+ assert file_node.name in files
|
|
| 188 |
+ assert os.path.isfile(file_path)
|
|
| 189 |
+ assert not os.path.islink(file_path)
|
|
| 190 |
+ if file_node.is_executable:
|
|
| 191 |
+ assert os.access(file_path, os.X_OK)
|
|
| 192 |
+ |
|
| 193 |
+ assert self.compare_files(file_node.digest, file_path)
|
|
| 194 |
+ |
|
| 195 |
+ for directory_node in directory.directories:
|
|
| 196 |
+ directory_path = os.path.join(path, directory_node.name)
|
|
| 197 |
+ |
|
| 198 |
+ assert directory_node.name in directories
|
|
| 199 |
+ assert os.path.exists(directory_path)
|
|
| 200 |
+ assert not os.path.islink(directory_path)
|
|
| 201 |
+ |
|
| 202 |
+ assert __compare_folders(directory_node.digest, directory_path)
|
|
| 203 |
+ |
|
| 204 |
+ for symlink_node in directory.symlinks:
|
|
| 205 |
+ symlink_path = os.path.join(path, symlink_node.name)
|
|
| 206 |
+ |
|
| 207 |
+ assert symlink_node.name in symlinks
|
|
| 208 |
+ assert os.path.islink(symlink_path)
|
|
| 209 |
+ assert os.readlink(symlink_path) == symlink_node.target
|
|
| 210 |
+ |
|
| 211 |
+ return True
|
|
| 212 |
+ |
|
| 213 |
+ return __compare_folders(digest, directory_path)
|
|
| 214 |
+ |
|
| 215 |
+ def quit(self):
|
|
| 216 |
+ if self.__process:
|
|
| 217 |
+ self.__process.terminate()
|
|
| 218 |
+ self.__process.join()
|
|
| 219 |
+ |
|
| 220 |
+ self.__storage_path.cleanup()
|
