Martin Blanchard pushed to branch mablanch/77-cas-uploader at BuildGrid / buildgrid
Commits:
-
22f7327c
by Martin Blanchard at 2018-09-21T12:38:12Z
-
16a058fb
by Martin Blanchard at 2018-09-21T12:38:17Z
-
d91cf82c
by Martin Blanchard at 2018-09-21T12:38:17Z
9 changed files:
- setup.py
- + tests/cas/data/hello.cc
- + tests/cas/data/hello/hello.c
- + tests/cas/data/hello/hello.h
- + tests/cas/data/void
- + tests/cas/test_client.py
- tests/cas/test_storage.py
- + tests/utils/__init__.py
- + tests/utils/cas.py
Changes:
... | ... | @@ -89,6 +89,7 @@ tests_require = [ |
89 | 89 |
'coverage == 4.4.0',
|
90 | 90 |
'moto',
|
91 | 91 |
'pep8',
|
92 |
+ 'psutil',
|
|
92 | 93 |
'pytest == 3.6.4',
|
93 | 94 |
'pytest-cov >= 2.6.0',
|
94 | 95 |
'pytest-pep8',
|
1 |
+#include <iostream>
|
|
2 |
+ |
|
3 |
+int main()
|
|
4 |
+{
|
|
5 |
+ std::cout << "Hello, World!" << std::endl;
|
|
6 |
+ return 0;
|
|
7 |
+}
|
1 |
+#include <stdio.h>
|
|
2 |
+ |
|
3 |
+#include "hello.h"
|
|
4 |
+ |
|
5 |
+int main()
|
|
6 |
+{
|
|
7 |
+ printf("%s\n", HELLO_WORLD);
|
|
8 |
+ return 0;
|
|
9 |
+}
|
1 |
+#define HELLO_WORLD "Hello, World!"
|
1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
2 |
+#
|
|
3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4 |
+# you may not use this file except in compliance with the License.
|
|
5 |
+# You may obtain a copy of the License at
|
|
6 |
+#
|
|
7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
8 |
+#
|
|
9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12 |
+# See the License for the specific language governing permissions and
|
|
13 |
+# limitations under the License.
|
|
14 |
+ |
|
15 |
+# pylint: disable=redefined-outer-name
|
|
16 |
+ |
|
17 |
+import os
|
|
18 |
+ |
|
19 |
+import grpc
|
|
20 |
+import pytest
|
|
21 |
+ |
|
22 |
+from buildgrid.client.cas import upload
|
|
23 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
24 |
+ |
|
25 |
+from ..utils.cas import serve_cas, run_in_subprocess
|
|
26 |
+ |
|
27 |
+ |
|
28 |
+INTANCES = ['', 'instance']
|
|
29 |
+BLOBS = [(b'',), (b'test-string',), (b'test', b'string')]
|
|
30 |
+MESSAGES = [
|
|
31 |
+ (remote_execution_pb2.Directory(),),
|
|
32 |
+ (remote_execution_pb2.SymlinkNode(name='name', target='target'),),
|
|
33 |
+ (remote_execution_pb2.Action(do_not_cache=True),
|
|
34 |
+ remote_execution_pb2.ActionResult(exit_code=12))
|
|
35 |
+]
|
|
36 |
+DATA_DIR = os.path.join(
|
|
37 |
+ os.path.dirname(os.path.realpath(__file__)), 'data')
|
|
38 |
+FILES = [
|
|
39 |
+ (os.path.join(DATA_DIR, 'void'),),
|
|
40 |
+ (os.path.join(DATA_DIR, 'hello.cc'),),
|
|
41 |
+ (os.path.join(DATA_DIR, 'hello', 'hello.c'),
|
|
42 |
+ os.path.join(DATA_DIR, 'hello', 'hello.h'))]
|
|
43 |
+DIRECTORIES = [
|
|
44 |
+ (os.path.join(DATA_DIR, 'hello'),),
|
|
45 |
+ (os.path.join(DATA_DIR, 'hello'), DATA_DIR)]
|
|
46 |
+ |
|
47 |
+ |
|
48 |
+@pytest.mark.parametrize('blobs', BLOBS)
|
|
49 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
50 |
+def test_upload_blob(instance, blobs):
|
|
51 |
+ # Actual test function, to be run in a subprocess:
|
|
52 |
+ def __test_upload_blob(queue, remote, instance, blobs):
|
|
53 |
+ # Open a channel to the remote CAS server:
|
|
54 |
+ channel = grpc.insecure_channel(remote)
|
|
55 |
+ |
|
56 |
+ digests = []
|
|
57 |
+ with upload(channel, instance) as uploader:
|
|
58 |
+ if len(blobs) > 1:
|
|
59 |
+ for blob in blobs:
|
|
60 |
+ digest = uploader.put_blob(blob, queue=True)
|
|
61 |
+ digests.append(digest.SerializeToString())
|
|
62 |
+ else:
|
|
63 |
+ digest = uploader.put_blob(blobs[0], queue=False)
|
|
64 |
+ digests.append(digest.SerializeToString())
|
|
65 |
+ |
|
66 |
+ queue.put(digests)
|
|
67 |
+ |
|
68 |
+ # Start a minimal CAS server in a subprocess:
|
|
69 |
+ with serve_cas([instance]) as server:
|
|
70 |
+ digests = run_in_subprocess(__test_upload_blob,
|
|
71 |
+ server.remote, instance, blobs)
|
|
72 |
+ |
|
73 |
+ for blob, digest_blob in zip(blobs, digests):
|
|
74 |
+ digest = remote_execution_pb2.Digest()
|
|
75 |
+ digest.ParseFromString(digest_blob)
|
|
76 |
+ |
|
77 |
+ assert server.has(digest)
|
|
78 |
+ assert server.compare_blobs(digest, blob)
|
|
79 |
+ |
|
80 |
+ |
|
81 |
+@pytest.mark.parametrize('messages', MESSAGES)
|
|
82 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
83 |
+def test_upload_message(instance, messages):
|
|
84 |
+ # Actual test function, to be run in a subprocess:
|
|
85 |
+ def __test_upload_message(queue, remote, instance, messages):
|
|
86 |
+ # Open a channel to the remote CAS server:
|
|
87 |
+ channel = grpc.insecure_channel(remote)
|
|
88 |
+ |
|
89 |
+ digests = []
|
|
90 |
+ with upload(channel, instance) as uploader:
|
|
91 |
+ if len(messages) > 1:
|
|
92 |
+ for message in messages:
|
|
93 |
+ digest = uploader.put_message(message, queue=True)
|
|
94 |
+ digests.append(digest.SerializeToString())
|
|
95 |
+ else:
|
|
96 |
+ digest = uploader.put_message(messages[0], queue=False)
|
|
97 |
+ digests.append(digest.SerializeToString())
|
|
98 |
+ |
|
99 |
+ queue.put(digests)
|
|
100 |
+ |
|
101 |
+ # Start a minimal CAS server in a subprocess:
|
|
102 |
+ with serve_cas([instance]) as server:
|
|
103 |
+ digests = run_in_subprocess(__test_upload_message,
|
|
104 |
+ server.remote, instance, messages)
|
|
105 |
+ |
|
106 |
+ for message, digest_blob in zip(messages, digests):
|
|
107 |
+ digest = remote_execution_pb2.Digest()
|
|
108 |
+ digest.ParseFromString(digest_blob)
|
|
109 |
+ |
|
110 |
+ assert server.has(digest)
|
|
111 |
+ assert server.compare_messages(digest, message)
|
|
112 |
+ |
|
113 |
+ |
|
114 |
+@pytest.mark.parametrize('file_paths', FILES)
|
|
115 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
116 |
+def test_upload_file(instance, file_paths):
|
|
117 |
+ # Actual test function, to be run in a subprocess:
|
|
118 |
+ def __test_upload_file(queue, remote, instance, file_paths):
|
|
119 |
+ # Open a channel to the remote CAS server:
|
|
120 |
+ channel = grpc.insecure_channel(remote)
|
|
121 |
+ |
|
122 |
+ digests = []
|
|
123 |
+ with upload(channel, instance) as uploader:
|
|
124 |
+ if len(file_paths) > 1:
|
|
125 |
+ for file_path in file_paths:
|
|
126 |
+ digest = uploader.upload_file(file_path, queue=True)
|
|
127 |
+ digests.append(digest.SerializeToString())
|
|
128 |
+ else:
|
|
129 |
+ digest = uploader.upload_file(file_paths[0], queue=False)
|
|
130 |
+ digests.append(digest.SerializeToString())
|
|
131 |
+ |
|
132 |
+ queue.put(digests)
|
|
133 |
+ |
|
134 |
+ # Start a minimal CAS server in a subprocess:
|
|
135 |
+ with serve_cas([instance]) as server:
|
|
136 |
+ digests = run_in_subprocess(__test_upload_file,
|
|
137 |
+ server.remote, instance, file_paths)
|
|
138 |
+ |
|
139 |
+ for file_path, digest_blob in zip(file_paths, digests):
|
|
140 |
+ digest = remote_execution_pb2.Digest()
|
|
141 |
+ digest.ParseFromString(digest_blob)
|
|
142 |
+ |
|
143 |
+ assert server.has(digest)
|
|
144 |
+ assert server.compare_files(digest, file_path)
|
|
145 |
+ |
|
146 |
+ |
|
147 |
+@pytest.mark.parametrize('directory_paths', DIRECTORIES)
|
|
148 |
+@pytest.mark.parametrize('instance', INTANCES)
|
|
149 |
+def test_upload_directory(instance, directory_paths):
|
|
150 |
+ # Actual test function, to be run in a subprocess:
|
|
151 |
+ def __test_upload_directory(queue, remote, instance, directory_paths):
|
|
152 |
+ # Open a channel to the remote CAS server:
|
|
153 |
+ channel = grpc.insecure_channel(remote)
|
|
154 |
+ |
|
155 |
+ digests = []
|
|
156 |
+ with upload(channel, instance) as uploader:
|
|
157 |
+ if len(directory_paths) > 1:
|
|
158 |
+ for directory_path in directory_paths:
|
|
159 |
+ digest = uploader.upload_directory(directory_path, queue=True)
|
|
160 |
+ digests.append(digest.SerializeToString())
|
|
161 |
+ else:
|
|
162 |
+ digest = uploader.upload_directory(directory_paths[0], queue=False)
|
|
163 |
+ digests.append(digest.SerializeToString())
|
|
164 |
+ |
|
165 |
+ queue.put(digests)
|
|
166 |
+ |
|
167 |
+ # Start a minimal CAS server in a subprocess:
|
|
168 |
+ with serve_cas([instance]) as server:
|
|
169 |
+ digests = run_in_subprocess(__test_upload_directory,
|
|
170 |
+ server.remote, instance, directory_paths)
|
|
171 |
+ |
|
172 |
+ for directory_path, digest_blob in zip(directory_paths, digests):
|
|
173 |
+ digest = remote_execution_pb2.Digest()
|
|
174 |
+ digest.ParseFromString(digest_blob)
|
|
175 |
+ |
|
176 |
+ assert server.compare_directories(digest, directory_path)
|
... | ... | @@ -19,220 +19,285 @@ |
19 | 19 |
|
20 | 20 |
import tempfile
|
21 | 21 |
|
22 |
-from unittest import mock
|
|
23 |
- |
|
24 | 22 |
import boto3
|
25 | 23 |
import grpc
|
26 |
-from grpc._server import _Context
|
|
27 | 24 |
import pytest
|
28 | 25 |
from moto import mock_s3
|
29 | 26 |
|
30 |
-from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Digest
|
|
31 |
-from buildgrid.server.cas import service
|
|
32 |
-from buildgrid.server.cas.instance import ByteStreamInstance, ContentAddressableStorageInstance
|
|
33 |
-from buildgrid.server.cas.storage import remote
|
|
27 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
28 |
+from buildgrid.server.cas.storage.remote import RemoteStorage
|
|
34 | 29 |
from buildgrid.server.cas.storage.lru_memory_cache import LRUMemoryCache
|
35 | 30 |
from buildgrid.server.cas.storage.disk import DiskStorage
|
36 | 31 |
from buildgrid.server.cas.storage.s3 import S3Storage
|
37 | 32 |
from buildgrid.server.cas.storage.with_cache import WithCacheStorage
|
38 | 33 |
from buildgrid.settings import HASH
|
39 | 34 |
|
35 |
+from ..utils.cas import serve_cas, run_in_subprocess
|
|
40 | 36 |
|
41 |
-context = mock.create_autospec(_Context)
|
|
42 |
-server = mock.create_autospec(grpc.server)
|
|
43 |
- |
|
44 |
-abc = b"abc"
|
|
45 |
-abc_digest = Digest(hash=HASH(abc).hexdigest(), size_bytes=3)
|
|
46 |
-defg = b"defg"
|
|
47 |
-defg_digest = Digest(hash=HASH(defg).hexdigest(), size_bytes=4)
|
|
48 |
-hijk = b"hijk"
|
|
49 |
-hijk_digest = Digest(hash=HASH(hijk).hexdigest(), size_bytes=4)
|
|
50 |
- |
|
51 |
- |
|
52 |
-def write(storage, digest, blob):
|
|
53 |
- session = storage.begin_write(digest)
|
|
54 |
- session.write(blob)
|
|
55 |
- storage.commit_write(digest, session)
|
|
56 |
- |
|
57 |
- |
|
58 |
-class MockCASStorage(ByteStreamInstance, ContentAddressableStorageInstance):
|
|
59 |
- |
|
60 |
- def __init__(self):
|
|
61 |
- storage = LRUMemoryCache(256)
|
|
62 |
- super().__init__(storage)
|
|
63 |
- |
|
64 |
- |
|
65 |
-# Mock a CAS server with LRUStorage to return "calls" made to it
|
|
66 |
-class MockStubServer:
|
|
67 |
- |
|
68 |
- def __init__(self):
|
|
69 |
- instances = {"": MockCASStorage(), "dna": MockCASStorage()}
|
|
70 |
- self._requests = []
|
|
71 |
- with mock.patch.object(service, 'bytestream_pb2_grpc'):
|
|
72 |
- self._bs_service = service.ByteStreamService(server)
|
|
73 |
- for k, v in instances.items():
|
|
74 |
- self._bs_service.add_instance(k, v)
|
|
75 |
- with mock.patch.object(service, 'remote_execution_pb2_grpc'):
|
|
76 |
- self._cas_service = service.ContentAddressableStorageService(server)
|
|
77 |
- for k, v in instances.items():
|
|
78 |
- self._cas_service.add_instance(k, v)
|
|
79 |
- |
|
80 |
- def Read(self, request):
|
|
81 |
- yield from self._bs_service.Read(request, context)
|
|
82 |
- |
|
83 |
- def Write(self, request):
|
|
84 |
- self._requests.append(request)
|
|
85 |
- if request.finish_write:
|
|
86 |
- response = self._bs_service.Write(self._requests, context)
|
|
87 |
- self._requests = []
|
|
88 |
- return response
|
|
89 |
- |
|
90 |
- return None
|
|
91 |
- |
|
92 |
- def FindMissingBlobs(self, request):
|
|
93 |
- return self._cas_service.FindMissingBlobs(request, context)
|
|
94 |
- |
|
95 |
- def BatchUpdateBlobs(self, request):
|
|
96 |
- return self._cas_service.BatchUpdateBlobs(request, context)
|
|
97 | 37 |
|
38 |
+BLOBS = [(b'abc', b'defg', b'hijk', b'')]
|
|
39 |
+BLOBS_DIGESTS = [tuple([remote_execution_pb2.Digest(hash=HASH(blob).hexdigest(),
|
|
40 |
+ size_bytes=len(blob)) for blob in blobs])
|
|
41 |
+ for blobs in BLOBS]
|
|
98 | 42 |
|
99 |
-# Instances of MockCASStorage
|
|
100 |
-@pytest.fixture(params=["", "dna"])
|
|
101 |
-def instance(params):
|
|
102 |
- return {params, MockCASStorage()}
|
|
103 | 43 |
|
104 |
- |
|
105 |
-# General tests for all storage providers
|
|
106 |
- |
|
107 |
- |
|
108 |
-@pytest.fixture(params=["lru", "disk", "s3", "lru_disk", "disk_s3", "remote"])
|
|
44 |
+@pytest.fixture(params=['lru', 'disk', 's3', 'lru_disk', 'disk_s3', 'remote'])
|
|
109 | 45 |
def any_storage(request):
|
110 |
- if request.param == "lru":
|
|
46 |
+ if request.param == 'lru':
|
|
111 | 47 |
yield LRUMemoryCache(256)
|
112 |
- elif request.param == "disk":
|
|
48 |
+ elif request.param == 'disk':
|
|
113 | 49 |
with tempfile.TemporaryDirectory() as path:
|
114 | 50 |
yield DiskStorage(path)
|
115 |
- elif request.param == "s3":
|
|
51 |
+ elif request.param == 's3':
|
|
116 | 52 |
with mock_s3():
|
117 |
- boto3.resource('s3').create_bucket(Bucket="testing")
|
|
118 |
- yield S3Storage("testing")
|
|
119 |
- elif request.param == "lru_disk":
|
|
53 |
+ boto3.resource('s3').create_bucket(Bucket='testing')
|
|
54 |
+ yield S3Storage('testing')
|
|
55 |
+ elif request.param == 'lru_disk':
|
|
120 | 56 |
# LRU cache with a uselessly small limit, so requests always fall back
|
121 | 57 |
with tempfile.TemporaryDirectory() as path:
|
122 | 58 |
yield WithCacheStorage(LRUMemoryCache(1), DiskStorage(path))
|
123 |
- elif request.param == "disk_s3":
|
|
59 |
+ elif request.param == 'disk_s3':
|
|
124 | 60 |
# Disk-based cache of S3, but we don't delete files, so requests
|
125 | 61 |
# are always handled by the cache
|
126 | 62 |
with tempfile.TemporaryDirectory() as path:
|
127 | 63 |
with mock_s3():
|
128 |
- boto3.resource('s3').create_bucket(Bucket="testing")
|
|
129 |
- yield WithCacheStorage(DiskStorage(path), S3Storage("testing"))
|
|
130 |
- elif request.param == "remote":
|
|
131 |
- with mock.patch.object(remote, 'bytestream_pb2_grpc'):
|
|
132 |
- with mock.patch.object(remote, 'remote_execution_pb2_grpc'):
|
|
133 |
- mock_server = MockStubServer()
|
|
134 |
- storage = remote.RemoteStorage(None, "")
|
|
135 |
- storage._stub_bs = mock_server
|
|
136 |
- storage._stub_cas = mock_server
|
|
137 |
- yield storage
|
|
138 |
- |
|
139 |
- |
|
140 |
-def test_initially_empty(any_storage):
|
|
141 |
- assert not any_storage.has_blob(abc_digest)
|
|
142 |
- assert not any_storage.has_blob(defg_digest)
|
|
143 |
- assert not any_storage.has_blob(hijk_digest)
|
|
144 |
- |
|
145 |
- |
|
146 |
-def test_basic_write_read(any_storage):
|
|
147 |
- assert not any_storage.has_blob(abc_digest)
|
|
148 |
- write(any_storage, abc_digest, abc)
|
|
149 |
- assert any_storage.has_blob(abc_digest)
|
|
150 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
151 |
- |
|
152 |
- # Try writing the same digest again (since it's valid to do that)
|
|
153 |
- write(any_storage, abc_digest, abc)
|
|
154 |
- assert any_storage.has_blob(abc_digest)
|
|
155 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
156 |
- |
|
157 |
- |
|
158 |
-def test_bulk_write_read(any_storage):
|
|
159 |
- missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
|
|
160 |
- assert len(missing_digests) == 3
|
|
161 |
- assert abc_digest in missing_digests
|
|
162 |
- assert defg_digest in missing_digests
|
|
163 |
- assert hijk_digest in missing_digests
|
|
64 |
+ boto3.resource('s3').create_bucket(Bucket='testing')
|
|
65 |
+ yield WithCacheStorage(DiskStorage(path), S3Storage('testing'))
|
|
66 |
+ elif request.param == 'remote':
|
|
67 |
+ with serve_cas(['testing']) as server:
|
|
68 |
+ yield server.remote
|
|
164 | 69 |
|
165 |
- bulk_update_results = any_storage.bulk_update_blobs([(abc_digest, abc), (defg_digest, defg),
|
|
166 |
- (hijk_digest, b'????')])
|
|
167 |
- assert len(bulk_update_results) == 3
|
|
168 |
- assert bulk_update_results[0].code == 0
|
|
169 |
- assert bulk_update_results[1].code == 0
|
|
170 |
- assert bulk_update_results[2].code != 0
|
|
171 |
- |
|
172 |
- missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
|
|
173 |
- assert missing_digests == [hijk_digest]
|
|
174 |
- |
|
175 |
- assert any_storage.get_blob(abc_digest).read() == abc
|
|
176 |
- assert any_storage.get_blob(defg_digest).read() == defg
|
|
177 |
- |
|
178 |
- |
|
179 |
-def test_nonexistent_read(any_storage):
|
|
180 |
- assert any_storage.get_blob(abc_digest) is None
|
|
181 | 70 |
|
71 |
+def write(storage, digest, blob):
|
|
72 |
+ session = storage.begin_write(digest)
|
|
73 |
+ session.write(blob)
|
|
74 |
+ storage.commit_write(digest, session)
|
|
182 | 75 |
|
183 |
-# Tests for special behavior of individual storage providers
|
|
184 | 76 |
|
77 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
78 |
+def test_initially_empty(any_storage, blobs_digests):
|
|
79 |
+ _, digests = blobs_digests
|
|
80 |
+ |
|
81 |
+ # Actual test function, failing on assertions:
|
|
82 |
+ def __test_initially_empty(any_storage, digests):
|
|
83 |
+ for digest in digests:
|
|
84 |
+ assert not any_storage.has_blob(digest)
|
|
85 |
+ |
|
86 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
87 |
+ def __test_remote_initially_empty(queue, remote, serialized_digests):
|
|
88 |
+ channel = grpc.insecure_channel(remote)
|
|
89 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
90 |
+ digests = []
|
|
91 |
+ |
|
92 |
+ for data in serialized_digests:
|
|
93 |
+ digest = remote_execution_pb2.Digest()
|
|
94 |
+ digest.ParseFromString(data)
|
|
95 |
+ digests.append(digest)
|
|
96 |
+ |
|
97 |
+ try:
|
|
98 |
+ __test_initially_empty(remote_storage, digests)
|
|
99 |
+ except AssertionError:
|
|
100 |
+ queue.put(False)
|
|
101 |
+ else:
|
|
102 |
+ queue.put(True)
|
|
103 |
+ |
|
104 |
+ if isinstance(any_storage, str):
|
|
105 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
106 |
+ assert run_in_subprocess(__test_remote_initially_empty,
|
|
107 |
+ any_storage, serialized_digests)
|
|
108 |
+ else:
|
|
109 |
+ __test_initially_empty(any_storage, digests)
|
|
110 |
+ |
|
111 |
+ |
|
112 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
113 |
+def test_basic_write_read(any_storage, blobs_digests):
|
|
114 |
+ blobs, digests = blobs_digests
|
|
115 |
+ |
|
116 |
+ # Actual test function, failing on assertions:
|
|
117 |
+ def __test_basic_write_read(any_storage, blobs, digests):
|
|
118 |
+ for blob, digest in zip(blobs, digests):
|
|
119 |
+ assert not any_storage.has_blob(digest)
|
|
120 |
+ write(any_storage, digest, blob)
|
|
121 |
+ assert any_storage.has_blob(digest)
|
|
122 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
123 |
+ |
|
124 |
+ # Try writing the same digest again (since it's valid to do that)
|
|
125 |
+ write(any_storage, digest, blob)
|
|
126 |
+ assert any_storage.has_blob(digest)
|
|
127 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
128 |
+ |
|
129 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
130 |
+ def __test_remote_basic_write_read(queue, remote, blobs, serialized_digests):
|
|
131 |
+ channel = grpc.insecure_channel(remote)
|
|
132 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
133 |
+ digests = []
|
|
134 |
+ |
|
135 |
+ for data in serialized_digests:
|
|
136 |
+ digest = remote_execution_pb2.Digest()
|
|
137 |
+ digest.ParseFromString(data)
|
|
138 |
+ digests.append(digest)
|
|
139 |
+ |
|
140 |
+ try:
|
|
141 |
+ __test_basic_write_read(remote_storage, blobs, digests)
|
|
142 |
+ except AssertionError:
|
|
143 |
+ queue.put(False)
|
|
144 |
+ else:
|
|
145 |
+ queue.put(True)
|
|
146 |
+ |
|
147 |
+ if isinstance(any_storage, str):
|
|
148 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
149 |
+ assert run_in_subprocess(__test_remote_basic_write_read,
|
|
150 |
+ any_storage, blobs, serialized_digests)
|
|
151 |
+ else:
|
|
152 |
+ __test_basic_write_read(any_storage, blobs, digests)
|
|
153 |
+ |
|
154 |
+ |
|
155 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
156 |
+def test_bulk_write_read(any_storage, blobs_digests):
|
|
157 |
+ blobs, digests = blobs_digests
|
|
158 |
+ |
|
159 |
+ # Actual test function, failing on assertions:
|
|
160 |
+ def __test_bulk_write_read(any_storage, blobs, digests):
|
|
161 |
+ missing_digests = any_storage.missing_blobs(digests)
|
|
162 |
+ assert len(missing_digests) == len(digests)
|
|
163 |
+ for digest in digests:
|
|
164 |
+ assert digest in missing_digests
|
|
165 |
+ |
|
166 |
+ faulty_blobs = list(blobs)
|
|
167 |
+ faulty_blobs[-1] = b'this-is-not-matching'
|
|
168 |
+ |
|
169 |
+ results = any_storage.bulk_update_blobs(list(zip(digests, faulty_blobs)))
|
|
170 |
+ assert len(results) == len(digests)
|
|
171 |
+ for result, blob, digest in zip(results[:-1], faulty_blobs[:-1], digests[:-1]):
|
|
172 |
+ assert result.code == 0
|
|
173 |
+ assert any_storage.get_blob(digest).read() == blob
|
|
174 |
+ assert results[-1].code != 0
|
|
175 |
+ |
|
176 |
+ missing_digests = any_storage.missing_blobs(digests)
|
|
177 |
+ assert len(missing_digests) == 1
|
|
178 |
+ assert missing_digests[0] == digests[-1]
|
|
179 |
+ |
|
180 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
181 |
+ def __test_remote_bulk_write_read(queue, remote, blobs, serialized_digests):
|
|
182 |
+ channel = grpc.insecure_channel(remote)
|
|
183 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
184 |
+ digests = []
|
|
185 |
+ |
|
186 |
+ for data in serialized_digests:
|
|
187 |
+ digest = remote_execution_pb2.Digest()
|
|
188 |
+ digest.ParseFromString(data)
|
|
189 |
+ digests.append(digest)
|
|
190 |
+ |
|
191 |
+ try:
|
|
192 |
+ __test_bulk_write_read(remote_storage, blobs, digests)
|
|
193 |
+ except AssertionError:
|
|
194 |
+ queue.put(False)
|
|
195 |
+ else:
|
|
196 |
+ queue.put(True)
|
|
197 |
+ |
|
198 |
+ if isinstance(any_storage, str):
|
|
199 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
200 |
+ assert run_in_subprocess(__test_remote_bulk_write_read,
|
|
201 |
+ any_storage, blobs, serialized_digests)
|
|
202 |
+ else:
|
|
203 |
+ __test_bulk_write_read(any_storage, blobs, digests)
|
|
204 |
+ |
|
205 |
+ |
|
206 |
+@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
|
|
207 |
+def test_nonexistent_read(any_storage, blobs_digests):
|
|
208 |
+ _, digests = blobs_digests
|
|
209 |
+ |
|
210 |
+ # Actual test function, failing on assertions:
|
|
211 |
+ def __test_nonexistent_read(any_storage, digests):
|
|
212 |
+ for digest in digests:
|
|
213 |
+ assert any_storage.get_blob(digest) is None
|
|
214 |
+ |
|
215 |
+ # Helper test function for remote storage, to be run in a subprocess:
|
|
216 |
+ def __test_remote_nonexistent_read(queue, remote, serialized_digests):
|
|
217 |
+ channel = grpc.insecure_channel(remote)
|
|
218 |
+ remote_storage = RemoteStorage(channel, 'testing')
|
|
219 |
+ digests = []
|
|
220 |
+ |
|
221 |
+ for data in serialized_digests:
|
|
222 |
+ digest = remote_execution_pb2.Digest()
|
|
223 |
+ digest.ParseFromString(data)
|
|
224 |
+ digests.append(digest)
|
|
225 |
+ |
|
226 |
+ try:
|
|
227 |
+ __test_nonexistent_read(remote_storage, digests)
|
|
228 |
+ except AssertionError:
|
|
229 |
+ queue.put(False)
|
|
230 |
+ else:
|
|
231 |
+ queue.put(True)
|
|
232 |
+ |
|
233 |
+ if isinstance(any_storage, str):
|
|
234 |
+ serialized_digests = [digest.SerializeToString() for digest in digests]
|
|
235 |
+ assert run_in_subprocess(__test_remote_nonexistent_read,
|
|
236 |
+ any_storage, serialized_digests)
|
|
237 |
+ else:
|
|
238 |
+ __test_nonexistent_read(any_storage, digests)
|
|
239 |
+ |
|
240 |
+ |
|
241 |
+@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
|
|
242 |
+def test_lru_eviction(blobs_digests):
|
|
243 |
+ blobs, digests = blobs_digests
|
|
244 |
+ blob1, blob2, blob3, *_ = blobs
|
|
245 |
+ digest1, digest2, digest3, *_ = digests
|
|
185 | 246 |
|
186 |
-def test_lru_eviction():
|
|
187 | 247 |
lru = LRUMemoryCache(8)
|
188 |
- write(lru, abc_digest, abc)
|
|
189 |
- write(lru, defg_digest, defg)
|
|
190 |
- assert lru.has_blob(abc_digest)
|
|
191 |
- assert lru.has_blob(defg_digest)
|
|
192 |
- |
|
193 |
- write(lru, hijk_digest, hijk)
|
|
194 |
- # Check that the LRU evicted abc (it was written first)
|
|
195 |
- assert not lru.has_blob(abc_digest)
|
|
196 |
- assert lru.has_blob(defg_digest)
|
|
197 |
- assert lru.has_blob(hijk_digest)
|
|
198 |
- |
|
199 |
- assert lru.get_blob(defg_digest).read() == defg
|
|
200 |
- write(lru, abc_digest, abc)
|
|
201 |
- # Check that the LRU evicted hijk (since we just read defg)
|
|
202 |
- assert lru.has_blob(abc_digest)
|
|
203 |
- assert lru.has_blob(defg_digest)
|
|
204 |
- assert not lru.has_blob(hijk_digest)
|
|
205 |
- |
|
206 |
- assert lru.has_blob(defg_digest)
|
|
207 |
- write(lru, hijk_digest, abc)
|
|
208 |
- # Check that the LRU evicted abc (since we just checked hijk)
|
|
209 |
- assert not lru.has_blob(abc_digest)
|
|
210 |
- assert lru.has_blob(defg_digest)
|
|
211 |
- assert lru.has_blob(hijk_digest)
|
|
212 |
- |
|
213 |
- |
|
214 |
-def test_with_cache():
|
|
248 |
+ write(lru, digest1, blob1)
|
|
249 |
+ write(lru, digest2, blob2)
|
|
250 |
+ assert lru.has_blob(digest1)
|
|
251 |
+ assert lru.has_blob(digest2)
|
|
252 |
+ |
|
253 |
+ write(lru, digest3, blob3)
|
|
254 |
+ # Check that the LRU evicted blob1 (it was written first)
|
|
255 |
+ assert not lru.has_blob(digest1)
|
|
256 |
+ assert lru.has_blob(digest2)
|
|
257 |
+ assert lru.has_blob(digest3)
|
|
258 |
+ |
|
259 |
+ assert lru.get_blob(digest2).read() == blob2
|
|
260 |
+ write(lru, digest1, blob1)
|
|
261 |
+ # Check that the LRU evicted blob3 (since we just read blob2)
|
|
262 |
+ assert lru.has_blob(digest1)
|
|
263 |
+ assert lru.has_blob(digest2)
|
|
264 |
+ assert not lru.has_blob(digest3)
|
|
265 |
+ |
|
266 |
+ assert lru.has_blob(digest2)
|
|
267 |
+ write(lru, digest3, blob1)
|
|
268 |
+ # Check that the LRU evicted blob1 (since we just checked blob3)
|
|
269 |
+ assert not lru.has_blob(digest1)
|
|
270 |
+ assert lru.has_blob(digest2)
|
|
271 |
+ assert lru.has_blob(digest3)
|
|
272 |
+ |
|
273 |
+ |
|
274 |
+@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
|
|
275 |
+def test_with_cache(blobs_digests):
|
|
276 |
+ blobs, digests = blobs_digests
|
|
277 |
+ blob1, blob2, blob3, *_ = blobs
|
|
278 |
+ digest1, digest2, digest3, *_ = digests
|
|
279 |
+ |
|
215 | 280 |
cache = LRUMemoryCache(256)
|
216 | 281 |
fallback = LRUMemoryCache(256)
|
217 | 282 |
with_cache_storage = WithCacheStorage(cache, fallback)
|
218 | 283 |
|
219 |
- assert not with_cache_storage.has_blob(abc_digest)
|
|
220 |
- write(with_cache_storage, abc_digest, abc)
|
|
221 |
- assert cache.has_blob(abc_digest)
|
|
222 |
- assert fallback.has_blob(abc_digest)
|
|
223 |
- assert with_cache_storage.get_blob(abc_digest).read() == abc
|
|
284 |
+ assert not with_cache_storage.has_blob(digest1)
|
|
285 |
+ write(with_cache_storage, digest1, blob1)
|
|
286 |
+ assert cache.has_blob(digest1)
|
|
287 |
+ assert fallback.has_blob(digest1)
|
|
288 |
+ assert with_cache_storage.get_blob(digest1).read() == blob1
|
|
224 | 289 |
|
225 | 290 |
# Even if a blob is in cache, we still need to check if the fallback
|
226 | 291 |
# has it.
|
227 |
- write(cache, defg_digest, defg)
|
|
228 |
- assert not with_cache_storage.has_blob(defg_digest)
|
|
229 |
- write(fallback, defg_digest, defg)
|
|
230 |
- assert with_cache_storage.has_blob(defg_digest)
|
|
292 |
+ write(cache, digest2, blob2)
|
|
293 |
+ assert not with_cache_storage.has_blob(digest2)
|
|
294 |
+ write(fallback, digest2, blob2)
|
|
295 |
+ assert with_cache_storage.has_blob(digest2)
|
|
231 | 296 |
|
232 | 297 |
# When a blob is in the fallback but not the cache, reading it should
|
233 | 298 |
# put it into the cache.
|
234 |
- write(fallback, hijk_digest, hijk)
|
|
235 |
- assert with_cache_storage.get_blob(hijk_digest).read() == hijk
|
|
236 |
- assert cache.has_blob(hijk_digest)
|
|
237 |
- assert cache.get_blob(hijk_digest).read() == hijk
|
|
238 |
- assert cache.has_blob(hijk_digest)
|
|
299 |
+ write(fallback, digest3, blob3)
|
|
300 |
+ assert with_cache_storage.get_blob(digest3).read() == blob3
|
|
301 |
+ assert cache.has_blob(digest3)
|
|
302 |
+ assert cache.get_blob(digest3).read() == blob3
|
|
303 |
+ assert cache.has_blob(digest3)
|
1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
2 |
+#
|
|
3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4 |
+# you may not use this file except in compliance with the License.
|
|
5 |
+# You may obtain a copy of the License at
|
|
6 |
+#
|
|
7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
8 |
+#
|
|
9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12 |
+# See the License for the specific language governing permissions and
|
|
13 |
+# limitations under the License.
|
|
14 |
+ |
|
15 |
+ |
|
16 |
+from concurrent import futures
|
|
17 |
+from contextlib import contextmanager
|
|
18 |
+import multiprocessing
|
|
19 |
+import os
|
|
20 |
+import signal
|
|
21 |
+import tempfile
|
|
22 |
+ |
|
23 |
+import grpc
|
|
24 |
+import psutil
|
|
25 |
+import pytest_cov
|
|
26 |
+ |
|
27 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
28 |
+from buildgrid.server.cas.service import ByteStreamService
|
|
29 |
+from buildgrid.server.cas.service import ContentAddressableStorageService
|
|
30 |
+from buildgrid.server.cas.instance import ByteStreamInstance
|
|
31 |
+from buildgrid.server.cas.instance import ContentAddressableStorageInstance
|
|
32 |
+from buildgrid.server.cas.storage.disk import DiskStorage
|
|
33 |
+ |
|
34 |
+ |
|
35 |
+@contextmanager
|
|
36 |
+def serve_cas(instances):
|
|
37 |
+ server = Server(instances)
|
|
38 |
+ try:
|
|
39 |
+ yield server
|
|
40 |
+ finally:
|
|
41 |
+ server.quit()
|
|
42 |
+ |
|
43 |
+ |
|
44 |
+def kill_process_tree(pid):
|
|
45 |
+ proc = psutil.Process(pid)
|
|
46 |
+ children = proc.children(recursive=True)
|
|
47 |
+ |
|
48 |
+ def kill_proc(p):
|
|
49 |
+ try:
|
|
50 |
+ p.kill()
|
|
51 |
+ except psutil.AccessDenied:
|
|
52 |
+ # Ignore this error, it can happen with
|
|
53 |
+ # some setuid bwrap processes.
|
|
54 |
+ pass
|
|
55 |
+ |
|
56 |
+ # Bloody Murder
|
|
57 |
+ for child in children:
|
|
58 |
+ kill_proc(child)
|
|
59 |
+ kill_proc(proc)
|
|
60 |
+ |
|
61 |
+ |
|
62 |
+def run_in_subprocess(function, *arguments):
|
|
63 |
+ queue = multiprocessing.Queue()
|
|
64 |
+ # Use subprocess to avoid creation of gRPC threads in main process
|
|
65 |
+ # See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
|
|
66 |
+ process = multiprocessing.Process(target=function,
|
|
67 |
+ args=(queue, *arguments))
|
|
68 |
+ |
|
69 |
+ try:
|
|
70 |
+ process.start()
|
|
71 |
+ |
|
72 |
+ result = queue.get()
|
|
73 |
+ process.join()
|
|
74 |
+ except KeyboardInterrupt:
|
|
75 |
+ kill_process_tree(process.pid)
|
|
76 |
+ raise
|
|
77 |
+ |
|
78 |
+ return result
|
|
79 |
+ |
|
80 |
+ |
|
81 |
+class Server:
|
|
82 |
+ |
|
83 |
+ def __init__(self, instances):
|
|
84 |
+ |
|
85 |
+ self.instances = instances
|
|
86 |
+ |
|
87 |
+ self.__storage_path = tempfile.TemporaryDirectory()
|
|
88 |
+ self.__storage = DiskStorage(self.__storage_path.name)
|
|
89 |
+ |
|
90 |
+ self.__queue = multiprocessing.Queue()
|
|
91 |
+ self.__process = multiprocessing.Process(
|
|
92 |
+ target=Server.serve,
|
|
93 |
+ args=(self.__queue, self.instances, self.__storage_path.name))
|
|
94 |
+ self.__process.start()
|
|
95 |
+ |
|
96 |
+ self.port = self.__queue.get()
|
|
97 |
+ self.remote = 'localhost:{}'.format(self.port)
|
|
98 |
+ |
|
99 |
+ @classmethod
|
|
100 |
+ def serve(cls, queue, instances, storage_path):
|
|
101 |
+ pytest_cov.embed.cleanup_on_sigterm()
|
|
102 |
+ |
|
103 |
+ # Use max_workers default from Python 3.5+
|
|
104 |
+ max_workers = (os.cpu_count() or 1) * 5
|
|
105 |
+ server = grpc.server(futures.ThreadPoolExecutor(max_workers))
|
|
106 |
+ port = server.add_insecure_port('localhost:0')
|
|
107 |
+ |
|
108 |
+ storage = DiskStorage(storage_path)
|
|
109 |
+ |
|
110 |
+ bs_service = ByteStreamService(server)
|
|
111 |
+ cas_service = ContentAddressableStorageService(server)
|
|
112 |
+ for name in instances:
|
|
113 |
+ bs_service.add_instance(name, ByteStreamInstance(storage))
|
|
114 |
+ cas_service.add_instance(name, ContentAddressableStorageInstance(storage))
|
|
115 |
+ |
|
116 |
+ server.start()
|
|
117 |
+ queue.put(port)
|
|
118 |
+ |
|
119 |
+ signal.pause()
|
|
120 |
+ |
|
121 |
+ def has(self, digest):
|
|
122 |
+ return self.__storage.has_blob(digest)
|
|
123 |
+ |
|
124 |
+ def compare_blobs(self, digest, blob):
|
|
125 |
+ if not self.__storage.has_blob(digest):
|
|
126 |
+ return False
|
|
127 |
+ |
|
128 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
129 |
+ stored_blob = stored_blob.read()
|
|
130 |
+ |
|
131 |
+ return blob == stored_blob
|
|
132 |
+ |
|
133 |
+ def compare_messages(self, digest, message):
|
|
134 |
+ if not self.__storage.has_blob(digest):
|
|
135 |
+ return False
|
|
136 |
+ |
|
137 |
+ message_blob = message.SerializeToString()
|
|
138 |
+ |
|
139 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
140 |
+ stored_blob = stored_blob.read()
|
|
141 |
+ |
|
142 |
+ return message_blob == stored_blob
|
|
143 |
+ |
|
144 |
+ def compare_files(self, digest, file_path):
|
|
145 |
+ if not self.__storage.has_blob(digest):
|
|
146 |
+ return False
|
|
147 |
+ |
|
148 |
+ with open(file_path, 'rb') as file_bytes:
|
|
149 |
+ file_blob = file_bytes.read()
|
|
150 |
+ |
|
151 |
+ stored_blob = self.__storage.get_blob(digest)
|
|
152 |
+ stored_blob = stored_blob.read()
|
|
153 |
+ |
|
154 |
+ return file_blob == stored_blob
|
|
155 |
+ |
|
156 |
+ def compare_directories(self, digest, directory_path):
|
|
157 |
+ if not self.__storage.has_blob(digest):
|
|
158 |
+ return False
|
|
159 |
+ elif not os.path.isdir(directory_path):
|
|
160 |
+ return False
|
|
161 |
+ |
|
162 |
+ def __compare_folders(digest, path):
|
|
163 |
+ directory = remote_execution_pb2.Directory()
|
|
164 |
+ directory.ParseFromString(self.__storage.get_blob(digest).read())
|
|
165 |
+ |
|
166 |
+ files, directories, symlinks = [], [], []
|
|
167 |
+ for entry in os.scandir(path):
|
|
168 |
+ if entry.is_file(follow_symlinks=False):
|
|
169 |
+ files.append(entry.name)
|
|
170 |
+ |
|
171 |
+ elif entry.is_dir(follow_symlinks=False):
|
|
172 |
+ directories.append(entry.name)
|
|
173 |
+ |
|
174 |
+ elif os.path.islink(entry.path):
|
|
175 |
+ symlinks.append(entry.name)
|
|
176 |
+ |
|
177 |
+ assert len(files) == len(directory.files)
|
|
178 |
+ assert len(directories) == len(directory.directories)
|
|
179 |
+ assert len(symlinks) == len(directory.symlinks)
|
|
180 |
+ |
|
181 |
+ for file_node in directory.files:
|
|
182 |
+ file_path = os.path.join(path, file_node.name)
|
|
183 |
+ |
|
184 |
+ assert file_node.name in files
|
|
185 |
+ assert os.path.isfile(file_path)
|
|
186 |
+ assert not os.path.islink(file_path)
|
|
187 |
+ if file_node.is_executable:
|
|
188 |
+ assert os.access(file_path, os.X_OK)
|
|
189 |
+ |
|
190 |
+ assert self.compare_files(file_node.digest, file_path)
|
|
191 |
+ |
|
192 |
+ for directory_node in directory.directories:
|
|
193 |
+ directory_path = os.path.join(path, directory_node.name)
|
|
194 |
+ |
|
195 |
+ assert directory_node.name in directories
|
|
196 |
+ assert os.path.exists(directory_path)
|
|
197 |
+ assert not os.path.islink(directory_path)
|
|
198 |
+ |
|
199 |
+ assert __compare_folders(directory_node.digest, directory_path)
|
|
200 |
+ |
|
201 |
+ for symlink_node in directory.symlinks:
|
|
202 |
+ symlink_path = os.path.join(path, symlink_node.name)
|
|
203 |
+ |
|
204 |
+ assert symlink_node.name in symlinks
|
|
205 |
+ assert os.path.islink(symlink_path)
|
|
206 |
+ assert os.readlink(symlink_path) == symlink_node.target
|
|
207 |
+ |
|
208 |
+ return True
|
|
209 |
+ |
|
210 |
+ return __compare_folders(digest, directory_path)
|
|
211 |
+ |
|
212 |
+ def quit(self):
|
|
213 |
+ if self.__process:
|
|
214 |
+ self.__process.terminate()
|
|
215 |
+ self.__process.join()
|
|
216 |
+ |
|
217 |
+ self.__storage_path.cleanup()
|