[Notes] [Git][BuildGrid/buildgrid][mablanch/77-cas-uploader] 3 commits: tests/utils/cas.py: New CAS server helper



Title: GitLab

Martin Blanchard pushed to branch mablanch/77-cas-uploader at BuildGrid / buildgrid

Commits:

9 changed files:

Changes:

  • setup.py
    ... ... @@ -89,6 +89,7 @@ tests_require = [
    89 89
         'coverage == 4.4.0',
    
    90 90
         'moto',
    
    91 91
         'pep8',
    
    92
    +    'psutil',
    
    92 93
         'pytest == 3.6.4',
    
    93 94
         'pytest-cov >= 2.6.0',
    
    94 95
         'pytest-pep8',
    

  • tests/cas/data/hello.cc
    1
    +#include <iostream>
    
    2
    +
    
    3
    +int main()
    
    4
    +{
    
    5
    +  std::cout << "Hello, World!" << std::endl;
    
    6
    +  return 0;
    
    7
    +}

  • tests/cas/data/hello/hello.c
    1
    +#include <stdio.h>
    
    2
    +
    
    3
    +#include "hello.h"
    
    4
    +
    
    5
    +int main()
    
    6
    +{
    
    7
    +  printf("%s\n", HELLO_WORLD);
    
    8
    +  return 0;
    
    9
    +}

  • tests/cas/data/hello/hello.h
    1
    +#define HELLO_WORLD "Hello, World!"

  • tests/cas/data/void

  • tests/cas/test_client.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +# pylint: disable=redefined-outer-name
    
    16
    +
    
    17
    +import os
    
    18
    +
    
    19
    +import grpc
    
    20
    +import pytest
    
    21
    +
    
    22
    +from buildgrid.client.cas import upload
    
    23
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    24
    +from buildgrid.utils import create_digest
    
    25
    +
    
    26
    +from ..utils.cas import serve_cas, run_in_subprocess
    
    27
    +
    
    28
    +
    
    29
    +INTANCES = ['', 'instance']
    
    30
    +BLOBS = [(b'',), (b'test-string',), (b'test', b'string')]
    
    31
    +MESSAGES = [
    
    32
    +    (remote_execution_pb2.Directory(),),
    
    33
    +    (remote_execution_pb2.SymlinkNode(name='name', target='target'),),
    
    34
    +    (remote_execution_pb2.Action(do_not_cache=True),
    
    35
    +     remote_execution_pb2.ActionResult(exit_code=12))
    
    36
    +]
    
    37
    +DATA_DIR = os.path.join(
    
    38
    +    os.path.dirname(os.path.realpath(__file__)), 'data')
    
    39
    +FILES = [
    
    40
    +    (os.path.join(DATA_DIR, 'void'),),
    
    41
    +    (os.path.join(DATA_DIR, 'hello.cc'),),
    
    42
    +    (os.path.join(DATA_DIR, 'hello', 'hello.c'),
    
    43
    +     os.path.join(DATA_DIR, 'hello', 'hello.h'))]
    
    44
    +DIRECTORIES = [
    
    45
    +    (os.path.join(DATA_DIR, 'hello'),),
    
    46
    +    (os.path.join(DATA_DIR, 'hello'), DATA_DIR)]
    
    47
    +
    
    48
    +
    
    49
    +@pytest.mark.parametrize('blobs', BLOBS)
    
    50
    +@pytest.mark.parametrize('instance', INTANCES)
    
    51
    +def test_upload_blob(instance, blobs):
    
    52
    +    # Actual test function, to be run in a subprocess:
    
    53
    +    def __test_upload_blob(queue, remote, instance, blobs):
    
    54
    +        # Open a channel to the remote CAS server:
    
    55
    +        channel = grpc.insecure_channel(remote)
    
    56
    +
    
    57
    +        digests = []
    
    58
    +        with upload(channel, instance) as uploader:
    
    59
    +            if len(blobs) > 1:
    
    60
    +                for blob in blobs:
    
    61
    +                    digest = uploader.put_blob(blob, queue=True)
    
    62
    +                    digests.append(digest.SerializeToString())
    
    63
    +            else:
    
    64
    +                digest = uploader.put_blob(blobs[0], queue=False)
    
    65
    +                digests.append(digest.SerializeToString())
    
    66
    +
    
    67
    +        queue.put(digests)
    
    68
    +
    
    69
    +    # Start a minimal CAS server in a subprocess:
    
    70
    +    with serve_cas([instance]) as server:
    
    71
    +        digests = run_in_subprocess(__test_upload_blob,
    
    72
    +                                    server.remote, instance, blobs)
    
    73
    +
    
    74
    +        for blob, digest_blob in zip(blobs, digests):
    
    75
    +            digest = remote_execution_pb2.Digest()
    
    76
    +            digest.ParseFromString(digest_blob)
    
    77
    +
    
    78
    +            assert server.has(digest)
    
    79
    +            assert server.compare_blobs(digest, blob)
    
    80
    +
    
    81
    +
    
    82
    +@pytest.mark.parametrize('messages', MESSAGES)
    
    83
    +@pytest.mark.parametrize('instance', INTANCES)
    
    84
    +def test_upload_message(instance, messages):
    
    85
    +    # Actual test function, to be run in a subprocess:
    
    86
    +    def __test_upload_message(queue, remote, instance, messages):
    
    87
    +        # Open a channel to the remote CAS server:
    
    88
    +        channel = grpc.insecure_channel(remote)
    
    89
    +
    
    90
    +        digests = []
    
    91
    +        with upload(channel, instance) as uploader:
    
    92
    +            if len(messages) > 1:
    
    93
    +                for message in messages:
    
    94
    +                    digest = uploader.put_message(message, queue=True)
    
    95
    +                    digests.append(digest.SerializeToString())
    
    96
    +            else:
    
    97
    +                digest = uploader.put_message(messages[0], queue=False)
    
    98
    +                digests.append(digest.SerializeToString())
    
    99
    +
    
    100
    +        queue.put(digests)
    
    101
    +
    
    102
    +    # Start a minimal CAS server in a subprocess:
    
    103
    +    with serve_cas([instance]) as server:
    
    104
    +        digests = run_in_subprocess(__test_upload_message,
    
    105
    +                                    server.remote, instance, messages)
    
    106
    +
    
    107
    +        for message, digest_blob in zip(messages, digests):
    
    108
    +            digest = remote_execution_pb2.Digest()
    
    109
    +            digest.ParseFromString(digest_blob)
    
    110
    +
    
    111
    +            assert server.has(digest)
    
    112
    +            assert server.compare_messages(digest, message)
    
    113
    +
    
    114
    +
    
    115
    +@pytest.mark.parametrize('file_paths', FILES)
    
    116
    +@pytest.mark.parametrize('instance', INTANCES)
    
    117
    +def test_upload_file(instance, file_paths):
    
    118
    +    # Actual test function, to be run in a subprocess:
    
    119
    +    def __test_upload_file(queue, remote, instance, file_paths):
    
    120
    +        # Open a channel to the remote CAS server:
    
    121
    +        channel = grpc.insecure_channel(remote)
    
    122
    +
    
    123
    +        digests = []
    
    124
    +        with upload(channel, instance) as uploader:
    
    125
    +            if len(file_paths) > 1:
    
    126
    +                for file_path in file_paths:
    
    127
    +                    digest = uploader.upload_file(file_path, queue=True)
    
    128
    +                    digests.append(digest.SerializeToString())
    
    129
    +            else:
    
    130
    +                digest = uploader.upload_file(file_paths[0], queue=False)
    
    131
    +                digests.append(digest.SerializeToString())
    
    132
    +
    
    133
    +        queue.put(digests)
    
    134
    +
    
    135
    +    # Start a minimal CAS server in a subprocess:
    
    136
    +    with serve_cas([instance]) as server:
    
    137
    +        digests = run_in_subprocess(__test_upload_file,
    
    138
    +                                    server.remote, instance, file_paths)
    
    139
    +
    
    140
    +        for file_path, digest_blob in zip(file_paths, digests):
    
    141
    +            digest = remote_execution_pb2.Digest()
    
    142
    +            digest.ParseFromString(digest_blob)
    
    143
    +
    
    144
    +            assert server.has(digest)
    
    145
    +            assert server.compare_files(digest, file_path)
    
    146
    +
    
    147
    +
    
    148
    +@pytest.mark.parametrize('directory_paths', DIRECTORIES)
    
    149
    +@pytest.mark.parametrize('instance', INTANCES)
    
    150
    +def test_upload_directory(instance, directory_paths):
    
    151
    +    # Actual test function, to be run in a subprocess:
    
    152
    +    def __test_upload_directory(queue, remote, instance, directory_paths):
    
    153
    +        # Open a channel to the remote CAS server:
    
    154
    +        channel = grpc.insecure_channel(remote)
    
    155
    +
    
    156
    +        digests = []
    
    157
    +        with upload(channel, instance) as uploader:
    
    158
    +            if len(directory_paths) > 1:
    
    159
    +                for directory_path in directory_paths:
    
    160
    +                    digest = uploader.upload_directory(directory_path, queue=True)
    
    161
    +                    digests.append(digest.SerializeToString())
    
    162
    +            else:
    
    163
    +                digest = uploader.upload_directory(directory_paths[0], queue=False)
    
    164
    +                digests.append(digest.SerializeToString())
    
    165
    +
    
    166
    +        queue.put(digests)
    
    167
    +
    
    168
    +    # Start a minimal CAS server in a subprocess:
    
    169
    +    with serve_cas([instance]) as server:
    
    170
    +        digests = run_in_subprocess(__test_upload_directory,
    
    171
    +                                    server.remote, instance, directory_paths)
    
    172
    +
    
    173
    +        for directory_path, digest_blob in zip(directory_paths, digests):
    
    174
    +            digest = remote_execution_pb2.Digest()
    
    175
    +            digest.ParseFromString(digest_blob)
    
    176
    +
    
    177
    +            assert server.compare_directories(digest, directory_path)
    
    178
    +
    
    179
    +
    
    180
    +@pytest.mark.parametrize('directory_paths', DIRECTORIES)
    
    181
    +@pytest.mark.parametrize('instance', INTANCES)
    
    182
    +def test_upload_tree(instance, directory_paths):
    
    183
    +    # Actual test function, to be run in a subprocess:
    
    184
    +    def __test_upload_tree(queue, remote, instance, directory_paths):
    
    185
    +        # Open a channel to the remote CAS server:
    
    186
    +        channel = grpc.insecure_channel(remote)
    
    187
    +
    
    188
    +        digests = []
    
    189
    +        with upload(channel, instance) as uploader:
    
    190
    +            if len(directory_paths) > 1:
    
    191
    +                for directory_path in directory_paths:
    
    192
    +                    digest = uploader.upload_tree(directory_path, queue=True)
    
    193
    +                    digests.append(digest.SerializeToString())
    
    194
    +            else:
    
    195
    +                digest = uploader.upload_tree(directory_paths[0], queue=False)
    
    196
    +                digests.append(digest.SerializeToString())
    
    197
    +
    
    198
    +        queue.put(digests)
    
    199
    +
    
    200
    +    # Start a minimal CAS server in a subprocess:
    
    201
    +    with serve_cas([instance]) as server:
    
    202
    +        digests = run_in_subprocess(__test_upload_tree,
    
    203
    +                                    server.remote, instance, directory_paths)
    
    204
    +
    
    205
    +        for directory_path, digest_blob in zip(directory_paths, digests):
    
    206
    +            digest = remote_execution_pb2.Digest()
    
    207
    +            digest.ParseFromString(digest_blob)
    
    208
    +
    
    209
    +            assert server.has(digest)
    
    210
    +
    
    211
    +            tree = remote_execution_pb2.Tree()
    
    212
    +            tree.ParseFromString(server.get(digest))
    
    213
    +
    
    214
    +            directory_digest = create_digest(tree.root.SerializeToString())
    
    215
    +
    
    216
    +            assert server.compare_directories(directory_digest, directory_path)

  • tests/cas/test_storage.py
    ... ... @@ -19,220 +19,285 @@
    19 19
     
    
    20 20
     import tempfile
    
    21 21
     
    
    22
    -from unittest import mock
    
    23
    -
    
    24 22
     import boto3
    
    25 23
     import grpc
    
    26
    -from grpc._server import _Context
    
    27 24
     import pytest
    
    28 25
     from moto import mock_s3
    
    29 26
     
    
    30
    -from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Digest
    
    31
    -from buildgrid.server.cas import service
    
    32
    -from buildgrid.server.cas.instance import ByteStreamInstance, ContentAddressableStorageInstance
    
    33
    -from buildgrid.server.cas.storage import remote
    
    27
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    28
    +from buildgrid.server.cas.storage.remote import RemoteStorage
    
    34 29
     from buildgrid.server.cas.storage.lru_memory_cache import LRUMemoryCache
    
    35 30
     from buildgrid.server.cas.storage.disk import DiskStorage
    
    36 31
     from buildgrid.server.cas.storage.s3 import S3Storage
    
    37 32
     from buildgrid.server.cas.storage.with_cache import WithCacheStorage
    
    38 33
     from buildgrid.settings import HASH
    
    39 34
     
    
    35
    +from ..utils.cas import serve_cas, run_in_subprocess
    
    40 36
     
    
    41
    -context = mock.create_autospec(_Context)
    
    42
    -server = mock.create_autospec(grpc.server)
    
    43
    -
    
    44
    -abc = b"abc"
    
    45
    -abc_digest = Digest(hash=HASH(abc).hexdigest(), size_bytes=3)
    
    46
    -defg = b"defg"
    
    47
    -defg_digest = Digest(hash=HASH(defg).hexdigest(), size_bytes=4)
    
    48
    -hijk = b"hijk"
    
    49
    -hijk_digest = Digest(hash=HASH(hijk).hexdigest(), size_bytes=4)
    
    50
    -
    
    51
    -
    
    52
    -def write(storage, digest, blob):
    
    53
    -    session = storage.begin_write(digest)
    
    54
    -    session.write(blob)
    
    55
    -    storage.commit_write(digest, session)
    
    56
    -
    
    57
    -
    
    58
    -class MockCASStorage(ByteStreamInstance, ContentAddressableStorageInstance):
    
    59
    -
    
    60
    -    def __init__(self):
    
    61
    -        storage = LRUMemoryCache(256)
    
    62
    -        super().__init__(storage)
    
    63
    -
    
    64
    -
    
    65
    -# Mock a CAS server with LRUStorage to return "calls" made to it
    
    66
    -class MockStubServer:
    
    67
    -
    
    68
    -    def __init__(self):
    
    69
    -        instances = {"": MockCASStorage(), "dna": MockCASStorage()}
    
    70
    -        self._requests = []
    
    71
    -        with mock.patch.object(service, 'bytestream_pb2_grpc'):
    
    72
    -            self._bs_service = service.ByteStreamService(server)
    
    73
    -            for k, v in instances.items():
    
    74
    -                self._bs_service.add_instance(k, v)
    
    75
    -        with mock.patch.object(service, 'remote_execution_pb2_grpc'):
    
    76
    -            self._cas_service = service.ContentAddressableStorageService(server)
    
    77
    -            for k, v in instances.items():
    
    78
    -                self._cas_service.add_instance(k, v)
    
    79
    -
    
    80
    -    def Read(self, request):
    
    81
    -        yield from self._bs_service.Read(request, context)
    
    82
    -
    
    83
    -    def Write(self, request):
    
    84
    -        self._requests.append(request)
    
    85
    -        if request.finish_write:
    
    86
    -            response = self._bs_service.Write(self._requests, context)
    
    87
    -            self._requests = []
    
    88
    -            return response
    
    89
    -
    
    90
    -        return None
    
    91
    -
    
    92
    -    def FindMissingBlobs(self, request):
    
    93
    -        return self._cas_service.FindMissingBlobs(request, context)
    
    94
    -
    
    95
    -    def BatchUpdateBlobs(self, request):
    
    96
    -        return self._cas_service.BatchUpdateBlobs(request, context)
    
    97 37
     
    
    38
    +BLOBS = [(b'abc', b'defg', b'hijk', b'')]
    
    39
    +BLOBS_DIGESTS = [tuple([remote_execution_pb2.Digest(hash=HASH(blob).hexdigest(),
    
    40
    +                                                    size_bytes=len(blob)) for blob in blobs])
    
    41
    +                 for blobs in BLOBS]
    
    98 42
     
    
    99
    -# Instances of MockCASStorage
    
    100
    -@pytest.fixture(params=["", "dna"])
    
    101
    -def instance(params):
    
    102
    -    return {params, MockCASStorage()}
    
    103 43
     
    
    104
    -
    
    105
    -# General tests for all storage providers
    
    106
    -
    
    107
    -
    
    108
    -@pytest.fixture(params=["lru", "disk", "s3", "lru_disk", "disk_s3", "remote"])
    
    44
    +@pytest.fixture(params=['lru', 'disk', 's3', 'lru_disk', 'disk_s3', 'remote'])
    
    109 45
     def any_storage(request):
    
    110
    -    if request.param == "lru":
    
    46
    +    if request.param == 'lru':
    
    111 47
             yield LRUMemoryCache(256)
    
    112
    -    elif request.param == "disk":
    
    48
    +    elif request.param == 'disk':
    
    113 49
             with tempfile.TemporaryDirectory() as path:
    
    114 50
                 yield DiskStorage(path)
    
    115
    -    elif request.param == "s3":
    
    51
    +    elif request.param == 's3':
    
    116 52
             with mock_s3():
    
    117
    -            boto3.resource('s3').create_bucket(Bucket="testing")
    
    118
    -            yield S3Storage("testing")
    
    119
    -    elif request.param == "lru_disk":
    
    53
    +            boto3.resource('s3').create_bucket(Bucket='testing')
    
    54
    +            yield S3Storage('testing')
    
    55
    +    elif request.param == 'lru_disk':
    
    120 56
             # LRU cache with a uselessly small limit, so requests always fall back
    
    121 57
             with tempfile.TemporaryDirectory() as path:
    
    122 58
                 yield WithCacheStorage(LRUMemoryCache(1), DiskStorage(path))
    
    123
    -    elif request.param == "disk_s3":
    
    59
    +    elif request.param == 'disk_s3':
    
    124 60
             # Disk-based cache of S3, but we don't delete files, so requests
    
    125 61
             # are always handled by the cache
    
    126 62
             with tempfile.TemporaryDirectory() as path:
    
    127 63
                 with mock_s3():
    
    128
    -                boto3.resource('s3').create_bucket(Bucket="testing")
    
    129
    -                yield WithCacheStorage(DiskStorage(path), S3Storage("testing"))
    
    130
    -    elif request.param == "remote":
    
    131
    -        with mock.patch.object(remote, 'bytestream_pb2_grpc'):
    
    132
    -            with mock.patch.object(remote, 'remote_execution_pb2_grpc'):
    
    133
    -                mock_server = MockStubServer()
    
    134
    -                storage = remote.RemoteStorage(None, "")
    
    135
    -                storage._stub_bs = mock_server
    
    136
    -                storage._stub_cas = mock_server
    
    137
    -                yield storage
    
    138
    -
    
    139
    -
    
    140
    -def test_initially_empty(any_storage):
    
    141
    -    assert not any_storage.has_blob(abc_digest)
    
    142
    -    assert not any_storage.has_blob(defg_digest)
    
    143
    -    assert not any_storage.has_blob(hijk_digest)
    
    144
    -
    
    145
    -
    
    146
    -def test_basic_write_read(any_storage):
    
    147
    -    assert not any_storage.has_blob(abc_digest)
    
    148
    -    write(any_storage, abc_digest, abc)
    
    149
    -    assert any_storage.has_blob(abc_digest)
    
    150
    -    assert any_storage.get_blob(abc_digest).read() == abc
    
    151
    -
    
    152
    -    # Try writing the same digest again (since it's valid to do that)
    
    153
    -    write(any_storage, abc_digest, abc)
    
    154
    -    assert any_storage.has_blob(abc_digest)
    
    155
    -    assert any_storage.get_blob(abc_digest).read() == abc
    
    156
    -
    
    157
    -
    
    158
    -def test_bulk_write_read(any_storage):
    
    159
    -    missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
    
    160
    -    assert len(missing_digests) == 3
    
    161
    -    assert abc_digest in missing_digests
    
    162
    -    assert defg_digest in missing_digests
    
    163
    -    assert hijk_digest in missing_digests
    
    64
    +                boto3.resource('s3').create_bucket(Bucket='testing')
    
    65
    +                yield WithCacheStorage(DiskStorage(path), S3Storage('testing'))
    
    66
    +    elif request.param == 'remote':
    
    67
    +        with serve_cas(['testing']) as server:
    
    68
    +            yield server.remote
    
    164 69
     
    
    165
    -    bulk_update_results = any_storage.bulk_update_blobs([(abc_digest, abc), (defg_digest, defg),
    
    166
    -                                                         (hijk_digest, b'????')])
    
    167
    -    assert len(bulk_update_results) == 3
    
    168
    -    assert bulk_update_results[0].code == 0
    
    169
    -    assert bulk_update_results[1].code == 0
    
    170
    -    assert bulk_update_results[2].code != 0
    
    171
    -
    
    172
    -    missing_digests = any_storage.missing_blobs([abc_digest, defg_digest, hijk_digest])
    
    173
    -    assert missing_digests == [hijk_digest]
    
    174
    -
    
    175
    -    assert any_storage.get_blob(abc_digest).read() == abc
    
    176
    -    assert any_storage.get_blob(defg_digest).read() == defg
    
    177
    -
    
    178
    -
    
    179
    -def test_nonexistent_read(any_storage):
    
    180
    -    assert any_storage.get_blob(abc_digest) is None
    
    181 70
     
    
    71
    +def write(storage, digest, blob):
    
    72
    +    session = storage.begin_write(digest)
    
    73
    +    session.write(blob)
    
    74
    +    storage.commit_write(digest, session)
    
    182 75
     
    
    183
    -# Tests for special behavior of individual storage providers
    
    184 76
     
    
    77
    +@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
    
    78
    +def test_initially_empty(any_storage, blobs_digests):
    
    79
    +    _, digests = blobs_digests
    
    80
    +
    
    81
    +    # Actual test function, failing on assertions:
    
    82
    +    def __test_initially_empty(any_storage, digests):
    
    83
    +        for digest in digests:
    
    84
    +            assert not any_storage.has_blob(digest)
    
    85
    +
    
    86
    +    # Helper test function for remote storage, to be run in a subprocess:
    
    87
    +    def __test_remote_initially_empty(queue, remote, serialized_digests):
    
    88
    +        channel = grpc.insecure_channel(remote)
    
    89
    +        remote_storage = RemoteStorage(channel, 'testing')
    
    90
    +        digests = []
    
    91
    +
    
    92
    +        for data in serialized_digests:
    
    93
    +            digest = remote_execution_pb2.Digest()
    
    94
    +            digest.ParseFromString(data)
    
    95
    +            digests.append(digest)
    
    96
    +
    
    97
    +        try:
    
    98
    +            __test_initially_empty(remote_storage, digests)
    
    99
    +        except AssertionError:
    
    100
    +            queue.put(False)
    
    101
    +        else:
    
    102
    +            queue.put(True)
    
    103
    +
    
    104
    +    if isinstance(any_storage, str):
    
    105
    +        serialized_digests = [digest.SerializeToString() for digest in digests]
    
    106
    +        assert run_in_subprocess(__test_remote_initially_empty,
    
    107
    +                                 any_storage, serialized_digests)
    
    108
    +    else:
    
    109
    +        __test_initially_empty(any_storage, digests)
    
    110
    +
    
    111
    +
    
    112
    +@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
    
    113
    +def test_basic_write_read(any_storage, blobs_digests):
    
    114
    +    blobs, digests = blobs_digests
    
    115
    +
    
    116
    +    # Actual test function, failing on assertions:
    
    117
    +    def __test_basic_write_read(any_storage, blobs, digests):
    
    118
    +        for blob, digest in zip(blobs, digests):
    
    119
    +            assert not any_storage.has_blob(digest)
    
    120
    +            write(any_storage, digest, blob)
    
    121
    +            assert any_storage.has_blob(digest)
    
    122
    +            assert any_storage.get_blob(digest).read() == blob
    
    123
    +
    
    124
    +            # Try writing the same digest again (since it's valid to do that)
    
    125
    +            write(any_storage, digest, blob)
    
    126
    +            assert any_storage.has_blob(digest)
    
    127
    +            assert any_storage.get_blob(digest).read() == blob
    
    128
    +
    
    129
    +    # Helper test function for remote storage, to be run in a subprocess:
    
    130
    +    def __test_remote_basic_write_read(queue, remote, blobs, serialized_digests):
    
    131
    +        channel = grpc.insecure_channel(remote)
    
    132
    +        remote_storage = RemoteStorage(channel, 'testing')
    
    133
    +        digests = []
    
    134
    +
    
    135
    +        for data in serialized_digests:
    
    136
    +            digest = remote_execution_pb2.Digest()
    
    137
    +            digest.ParseFromString(data)
    
    138
    +            digests.append(digest)
    
    139
    +
    
    140
    +        try:
    
    141
    +            __test_basic_write_read(remote_storage, blobs, digests)
    
    142
    +        except AssertionError:
    
    143
    +            queue.put(False)
    
    144
    +        else:
    
    145
    +            queue.put(True)
    
    146
    +
    
    147
    +    if isinstance(any_storage, str):
    
    148
    +        serialized_digests = [digest.SerializeToString() for digest in digests]
    
    149
    +        assert run_in_subprocess(__test_remote_basic_write_read,
    
    150
    +                                 any_storage, blobs, serialized_digests)
    
    151
    +    else:
    
    152
    +        __test_basic_write_read(any_storage, blobs, digests)
    
    153
    +
    
    154
    +
    
    155
    +@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
    
    156
    +def test_bulk_write_read(any_storage, blobs_digests):
    
    157
    +    blobs, digests = blobs_digests
    
    158
    +
    
    159
    +    # Actual test function, failing on assertions:
    
    160
    +    def __test_bulk_write_read(any_storage, blobs, digests):
    
    161
    +        missing_digests = any_storage.missing_blobs(digests)
    
    162
    +        assert len(missing_digests) == len(digests)
    
    163
    +        for digest in digests:
    
    164
    +            assert digest in missing_digests
    
    165
    +
    
    166
    +        faulty_blobs = list(blobs)
    
    167
    +        faulty_blobs[-1] = b'this-is-not-matching'
    
    168
    +
    
    169
    +        results = any_storage.bulk_update_blobs(list(zip(digests, faulty_blobs)))
    
    170
    +        assert len(results) == len(digests)
    
    171
    +        for result, blob, digest in zip(results[:-1], faulty_blobs[:-1], digests[:-1]):
    
    172
    +            assert result.code == 0
    
    173
    +            assert any_storage.get_blob(digest).read() == blob
    
    174
    +        assert results[-1].code != 0
    
    175
    +
    
    176
    +        missing_digests = any_storage.missing_blobs(digests)
    
    177
    +        assert len(missing_digests) == 1
    
    178
    +        assert missing_digests[0] == digests[-1]
    
    179
    +
    
    180
    +    # Helper test function for remote storage, to be run in a subprocess:
    
    181
    +    def __test_remote_bulk_write_read(queue, remote, blobs, serialized_digests):
    
    182
    +        channel = grpc.insecure_channel(remote)
    
    183
    +        remote_storage = RemoteStorage(channel, 'testing')
    
    184
    +        digests = []
    
    185
    +
    
    186
    +        for data in serialized_digests:
    
    187
    +            digest = remote_execution_pb2.Digest()
    
    188
    +            digest.ParseFromString(data)
    
    189
    +            digests.append(digest)
    
    190
    +
    
    191
    +        try:
    
    192
    +            __test_bulk_write_read(remote_storage, blobs, digests)
    
    193
    +        except AssertionError:
    
    194
    +            queue.put(False)
    
    195
    +        else:
    
    196
    +            queue.put(True)
    
    197
    +
    
    198
    +    if isinstance(any_storage, str):
    
    199
    +        serialized_digests = [digest.SerializeToString() for digest in digests]
    
    200
    +        assert run_in_subprocess(__test_remote_bulk_write_read,
    
    201
    +                                 any_storage, blobs, serialized_digests)
    
    202
    +    else:
    
    203
    +        __test_bulk_write_read(any_storage, blobs, digests)
    
    204
    +
    
    205
    +
    
    206
    +@pytest.mark.parametrize('blobs_digests', zip(BLOBS, BLOBS_DIGESTS))
    
    207
    +def test_nonexistent_read(any_storage, blobs_digests):
    
    208
    +    _, digests = blobs_digests
    
    209
    +
    
    210
    +    # Actual test function, failing on assertions:
    
    211
    +    def __test_nonexistent_read(any_storage, digests):
    
    212
    +        for digest in digests:
    
    213
    +            assert any_storage.get_blob(digest) is None
    
    214
    +
    
    215
    +    # Helper test function for remote storage, to be run in a subprocess:
    
    216
    +    def __test_remote_nonexistent_read(queue, remote, serialized_digests):
    
    217
    +        channel = grpc.insecure_channel(remote)
    
    218
    +        remote_storage = RemoteStorage(channel, 'testing')
    
    219
    +        digests = []
    
    220
    +
    
    221
    +        for data in serialized_digests:
    
    222
    +            digest = remote_execution_pb2.Digest()
    
    223
    +            digest.ParseFromString(data)
    
    224
    +            digests.append(digest)
    
    225
    +
    
    226
    +        try:
    
    227
    +            __test_nonexistent_read(remote_storage, digests)
    
    228
    +        except AssertionError:
    
    229
    +            queue.put(False)
    
    230
    +        else:
    
    231
    +            queue.put(True)
    
    232
    +
    
    233
    +    if isinstance(any_storage, str):
    
    234
    +        serialized_digests = [digest.SerializeToString() for digest in digests]
    
    235
    +        assert run_in_subprocess(__test_remote_nonexistent_read,
    
    236
    +                                 any_storage, serialized_digests)
    
    237
    +    else:
    
    238
    +        __test_nonexistent_read(any_storage, digests)
    
    239
    +
    
    240
    +
    
    241
    +@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
    
    242
    +def test_lru_eviction(blobs_digests):
    
    243
    +    blobs, digests = blobs_digests
    
    244
    +    blob1, blob2, blob3, *_ = blobs
    
    245
    +    digest1, digest2, digest3, *_ = digests
    
    185 246
     
    
    186
    -def test_lru_eviction():
    
    187 247
         lru = LRUMemoryCache(8)
    
    188
    -    write(lru, abc_digest, abc)
    
    189
    -    write(lru, defg_digest, defg)
    
    190
    -    assert lru.has_blob(abc_digest)
    
    191
    -    assert lru.has_blob(defg_digest)
    
    192
    -
    
    193
    -    write(lru, hijk_digest, hijk)
    
    194
    -    # Check that the LRU evicted abc (it was written first)
    
    195
    -    assert not lru.has_blob(abc_digest)
    
    196
    -    assert lru.has_blob(defg_digest)
    
    197
    -    assert lru.has_blob(hijk_digest)
    
    198
    -
    
    199
    -    assert lru.get_blob(defg_digest).read() == defg
    
    200
    -    write(lru, abc_digest, abc)
    
    201
    -    # Check that the LRU evicted hijk (since we just read defg)
    
    202
    -    assert lru.has_blob(abc_digest)
    
    203
    -    assert lru.has_blob(defg_digest)
    
    204
    -    assert not lru.has_blob(hijk_digest)
    
    205
    -
    
    206
    -    assert lru.has_blob(defg_digest)
    
    207
    -    write(lru, hijk_digest, abc)
    
    208
    -    # Check that the LRU evicted abc (since we just checked hijk)
    
    209
    -    assert not lru.has_blob(abc_digest)
    
    210
    -    assert lru.has_blob(defg_digest)
    
    211
    -    assert lru.has_blob(hijk_digest)
    
    212
    -
    
    213
    -
    
    214
    -def test_with_cache():
    
    248
    +    write(lru, digest1, blob1)
    
    249
    +    write(lru, digest2, blob2)
    
    250
    +    assert lru.has_blob(digest1)
    
    251
    +    assert lru.has_blob(digest2)
    
    252
    +
    
    253
    +    write(lru, digest3, blob3)
    
    254
    +    # Check that the LRU evicted blob1 (it was written first)
    
    255
    +    assert not lru.has_blob(digest1)
    
    256
    +    assert lru.has_blob(digest2)
    
    257
    +    assert lru.has_blob(digest3)
    
    258
    +
    
    259
    +    assert lru.get_blob(digest2).read() == blob2
    
    260
    +    write(lru, digest1, blob1)
    
    261
    +    # Check that the LRU evicted blob3 (since we just read blob2)
    
    262
    +    assert lru.has_blob(digest1)
    
    263
    +    assert lru.has_blob(digest2)
    
    264
    +    assert not lru.has_blob(digest3)
    
    265
    +
    
    266
    +    assert lru.has_blob(digest2)
    
    267
    +    write(lru, digest3, blob1)
    
    268
    +    # Check that the LRU evicted blob1 (since we just checked blob3)
    
    269
    +    assert not lru.has_blob(digest1)
    
    270
    +    assert lru.has_blob(digest2)
    
    271
    +    assert lru.has_blob(digest3)
    
    272
    +
    
    273
    +
    
    274
    +@pytest.mark.parametrize('blobs_digests', [(BLOBS[0], BLOBS_DIGESTS[0])])
    
    275
    +def test_with_cache(blobs_digests):
    
    276
    +    blobs, digests = blobs_digests
    
    277
    +    blob1, blob2, blob3, *_ = blobs
    
    278
    +    digest1, digest2, digest3, *_ = digests
    
    279
    +
    
    215 280
         cache = LRUMemoryCache(256)
    
    216 281
         fallback = LRUMemoryCache(256)
    
    217 282
         with_cache_storage = WithCacheStorage(cache, fallback)
    
    218 283
     
    
    219
    -    assert not with_cache_storage.has_blob(abc_digest)
    
    220
    -    write(with_cache_storage, abc_digest, abc)
    
    221
    -    assert cache.has_blob(abc_digest)
    
    222
    -    assert fallback.has_blob(abc_digest)
    
    223
    -    assert with_cache_storage.get_blob(abc_digest).read() == abc
    
    284
    +    assert not with_cache_storage.has_blob(digest1)
    
    285
    +    write(with_cache_storage, digest1, blob1)
    
    286
    +    assert cache.has_blob(digest1)
    
    287
    +    assert fallback.has_blob(digest1)
    
    288
    +    assert with_cache_storage.get_blob(digest1).read() == blob1
    
    224 289
     
    
    225 290
         # Even if a blob is in cache, we still need to check if the fallback
    
    226 291
         # has it.
    
    227
    -    write(cache, defg_digest, defg)
    
    228
    -    assert not with_cache_storage.has_blob(defg_digest)
    
    229
    -    write(fallback, defg_digest, defg)
    
    230
    -    assert with_cache_storage.has_blob(defg_digest)
    
    292
    +    write(cache, digest2, blob2)
    
    293
    +    assert not with_cache_storage.has_blob(digest2)
    
    294
    +    write(fallback, digest2, blob2)
    
    295
    +    assert with_cache_storage.has_blob(digest2)
    
    231 296
     
    
    232 297
         # When a blob is in the fallback but not the cache, reading it should
    
    233 298
         # put it into the cache.
    
    234
    -    write(fallback, hijk_digest, hijk)
    
    235
    -    assert with_cache_storage.get_blob(hijk_digest).read() == hijk
    
    236
    -    assert cache.has_blob(hijk_digest)
    
    237
    -    assert cache.get_blob(hijk_digest).read() == hijk
    
    238
    -    assert cache.has_blob(hijk_digest)
    299
    +    write(fallback, digest3, blob3)
    
    300
    +    assert with_cache_storage.get_blob(digest3).read() == blob3
    
    301
    +    assert cache.has_blob(digest3)
    
    302
    +    assert cache.get_blob(digest3).read() == blob3
    
    303
    +    assert cache.has_blob(digest3)

  • tests/utils/__init__.py

  • tests/utils/cas.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    16
    +from concurrent import futures
    
    17
    +from contextlib import contextmanager
    
    18
    +import multiprocessing
    
    19
    +import os
    
    20
    +import signal
    
    21
    +import tempfile
    
    22
    +
    
    23
    +import grpc
    
    24
    +import psutil
    
    25
    +import pytest_cov
    
    26
    +
    
    27
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    28
    +from buildgrid.server.cas.service import ByteStreamService
    
    29
    +from buildgrid.server.cas.service import ContentAddressableStorageService
    
    30
    +from buildgrid.server.cas.instance import ByteStreamInstance
    
    31
    +from buildgrid.server.cas.instance import ContentAddressableStorageInstance
    
    32
    +from buildgrid.server.cas.storage.disk import DiskStorage
    
    33
    +
    
    34
    +
    
    35
    +@contextmanager
    
    36
    +def serve_cas(instances):
    
    37
    +    server = Server(instances)
    
    38
    +    try:
    
    39
    +        yield server
    
    40
    +    finally:
    
    41
    +        server.quit()
    
    42
    +
    
    43
    +
    
    44
    +def kill_process_tree(pid):
    
    45
    +    proc = psutil.Process(pid)
    
    46
    +    children = proc.children(recursive=True)
    
    47
    +
    
    48
    +    def kill_proc(p):
    
    49
    +        try:
    
    50
    +            p.kill()
    
    51
    +        except psutil.AccessDenied:
    
    52
    +            # Ignore this error, it can happen with
    
    53
    +            # some setuid bwrap processes.
    
    54
    +            pass
    
    55
    +
    
    56
    +    # Bloody Murder
    
    57
    +    for child in children:
    
    58
    +        kill_proc(child)
    
    59
    +    kill_proc(proc)
    
    60
    +
    
    61
    +
    
    62
    +def run_in_subprocess(function, *arguments):
    
    63
    +    queue = multiprocessing.Queue()
    
    64
    +    # Use subprocess to avoid creation of gRPC threads in main process
    
    65
    +    # See https://github.com/grpc/grpc/blob/master/doc/fork_support.md
    
    66
    +    process = multiprocessing.Process(target=function,
    
    67
    +                                      args=(queue, *arguments))
    
    68
    +
    
    69
    +    try:
    
    70
    +        process.start()
    
    71
    +
    
    72
    +        result = queue.get()
    
    73
    +        process.join()
    
    74
    +    except KeyboardInterrupt:
    
    75
    +        kill_process_tree(process.pid)
    
    76
    +        raise
    
    77
    +
    
    78
    +    return result
    
    79
    +
    
    80
    +
    
    81
    +class Server:
    
    82
    +
    
    83
    +    def __init__(self, instances):
    
    84
    +
    
    85
    +        self.instances = instances
    
    86
    +
    
    87
    +        self.__storage_path = tempfile.TemporaryDirectory()
    
    88
    +        self.__storage = DiskStorage(self.__storage_path.name)
    
    89
    +
    
    90
    +        self.__queue = multiprocessing.Queue()
    
    91
    +        self.__process = multiprocessing.Process(
    
    92
    +            target=Server.serve,
    
    93
    +            args=(self.__queue, self.instances, self.__storage_path.name))
    
    94
    +        self.__process.start()
    
    95
    +
    
    96
    +        self.port = self.__queue.get()
    
    97
    +        self.remote = 'localhost:{}'.format(self.port)
    
    98
    +
    
    99
    +    @classmethod
    
    100
    +    def serve(cls, queue, instances, storage_path):
    
    101
    +        pytest_cov.embed.cleanup_on_sigterm()
    
    102
    +
    
    103
    +        # Use max_workers default from Python 3.5+
    
    104
    +        max_workers = (os.cpu_count() or 1) * 5
    
    105
    +        server = grpc.server(futures.ThreadPoolExecutor(max_workers))
    
    106
    +        port = server.add_insecure_port('localhost:0')
    
    107
    +
    
    108
    +        storage = DiskStorage(storage_path)
    
    109
    +
    
    110
    +        bs_service = ByteStreamService(server)
    
    111
    +        cas_service = ContentAddressableStorageService(server)
    
    112
    +        for name in instances:
    
    113
    +            bs_service.add_instance(name, ByteStreamInstance(storage))
    
    114
    +            cas_service.add_instance(name, ContentAddressableStorageInstance(storage))
    
    115
    +
    
    116
    +        server.start()
    
    117
    +        queue.put(port)
    
    118
    +
    
    119
    +        signal.pause()
    
    120
    +
    
    121
    +    def has(self, digest):
    
    122
    +        return self.__storage.has_blob(digest)
    
    123
    +
    
    124
    +    def get(self, digest):
    
    125
    +        return self.__storage.get_blob(digest).read()
    
    126
    +
    
    127
    +    def compare_blobs(self, digest, blob):
    
    128
    +        if not self.__storage.has_blob(digest):
    
    129
    +            return False
    
    130
    +
    
    131
    +        stored_blob = self.__storage.get_blob(digest)
    
    132
    +        stored_blob = stored_blob.read()
    
    133
    +
    
    134
    +        return blob == stored_blob
    
    135
    +
    
    136
    +    def compare_messages(self, digest, message):
    
    137
    +        if not self.__storage.has_blob(digest):
    
    138
    +            return False
    
    139
    +
    
    140
    +        message_blob = message.SerializeToString()
    
    141
    +
    
    142
    +        stored_blob = self.__storage.get_blob(digest)
    
    143
    +        stored_blob = stored_blob.read()
    
    144
    +
    
    145
    +        return message_blob == stored_blob
    
    146
    +
    
    147
    +    def compare_files(self, digest, file_path):
    
    148
    +        if not self.__storage.has_blob(digest):
    
    149
    +            return False
    
    150
    +
    
    151
    +        with open(file_path, 'rb') as file_bytes:
    
    152
    +            file_blob = file_bytes.read()
    
    153
    +
    
    154
    +        stored_blob = self.__storage.get_blob(digest)
    
    155
    +        stored_blob = stored_blob.read()
    
    156
    +
    
    157
    +        return file_blob == stored_blob
    
    158
    +
    
    159
    +    def compare_directories(self, digest, directory_path):
    
    160
    +        if not self.__storage.has_blob(digest):
    
    161
    +            return False
    
    162
    +        elif not os.path.isdir(directory_path):
    
    163
    +            return False
    
    164
    +
    
    165
    +        def __compare_folders(digest, path):
    
    166
    +            directory = remote_execution_pb2.Directory()
    
    167
    +            directory.ParseFromString(self.__storage.get_blob(digest).read())
    
    168
    +
    
    169
    +            files, directories, symlinks = [], [], []
    
    170
    +            for entry in os.scandir(path):
    
    171
    +                if entry.is_file(follow_symlinks=False):
    
    172
    +                    files.append(entry.name)
    
    173
    +
    
    174
    +                elif entry.is_dir(follow_symlinks=False):
    
    175
    +                    directories.append(entry.name)
    
    176
    +
    
    177
    +                elif os.path.islink(entry.path):
    
    178
    +                    symlinks.append(entry.name)
    
    179
    +
    
    180
    +            assert len(files) == len(directory.files)
    
    181
    +            assert len(directories) == len(directory.directories)
    
    182
    +            assert len(symlinks) == len(directory.symlinks)
    
    183
    +
    
    184
    +            for file_node in directory.files:
    
    185
    +                file_path = os.path.join(path, file_node.name)
    
    186
    +
    
    187
    +                assert file_node.name in files
    
    188
    +                assert os.path.isfile(file_path)
    
    189
    +                assert not os.path.islink(file_path)
    
    190
    +                if file_node.is_executable:
    
    191
    +                    assert os.access(file_path, os.X_OK)
    
    192
    +
    
    193
    +                assert self.compare_files(file_node.digest, file_path)
    
    194
    +
    
    195
    +            for directory_node in directory.directories:
    
    196
    +                directory_path = os.path.join(path, directory_node.name)
    
    197
    +
    
    198
    +                assert directory_node.name in directories
    
    199
    +                assert os.path.exists(directory_path)
    
    200
    +                assert not os.path.islink(directory_path)
    
    201
    +
    
    202
    +                assert __compare_folders(directory_node.digest, directory_path)
    
    203
    +
    
    204
    +            for symlink_node in directory.symlinks:
    
    205
    +                symlink_path = os.path.join(path, symlink_node.name)
    
    206
    +
    
    207
    +                assert symlink_node.name in symlinks
    
    208
    +                assert os.path.islink(symlink_path)
    
    209
    +                assert os.readlink(symlink_path) == symlink_node.target
    
    210
    +
    
    211
    +            return True
    
    212
    +
    
    213
    +        return __compare_folders(digest, directory_path)
    
    214
    +
    
    215
    +    def quit(self):
    
    216
    +        if self.__process:
    
    217
    +            self.__process.terminate()
    
    218
    +            self.__process.join()
    
    219
    +
    
    220
    +        self.__storage_path.cleanup()



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]