[Notes] [Git][BuildGrid/buildgrid][mablanch/77-cas-uploader] 7 commits: client/cas.py: Rename the message uploading helper



Title: GitLab

Martin Blanchard pushed to branch mablanch/77-cas-uploader at BuildGrid / buildgrid

Commits:

6 changed files:

Changes:

  • buildgrid/_app/bots/buildbox.py
    ... ... @@ -104,7 +104,7 @@ def work_buildbox(context, lease):
    104 104
                 output_tree = _cas_tree_maker(stub_bytestream, output_digest)
    
    105 105
     
    
    106 106
                 with upload(context.cas_channel) as cas:
    
    107
    -                output_tree_digest = cas.send_message(output_tree)
    
    107
    +                output_tree_digest = cas.put_message(output_tree)
    
    108 108
     
    
    109 109
                 output_directory = remote_execution_pb2.OutputDirectory()
    
    110 110
                 output_directory.tree_digest.CopyFrom(output_tree_digest)
    

  • buildgrid/_app/commands/cmd_cas.py
    ... ... @@ -27,8 +27,9 @@ from urllib.parse import urlparse
    27 27
     import click
    
    28 28
     import grpc
    
    29 29
     
    
    30
    -from buildgrid.utils import merkle_maker, create_digest
    
    30
    +from buildgrid.client.cas import upload
    
    31 31
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    32
    +from buildgrid.utils import merkle_maker
    
    32 33
     
    
    33 34
     from ..cli import pass_context
    
    34 35
     
    
    ... ... @@ -66,27 +67,31 @@ def cli(context, remote, instance_name, client_key, client_cert, server_cert):
    66 67
     
    
    67 68
     
    
    68 69
     @cli.command('upload-files', short_help="Upload files to the CAS server.")
    
    69
    -@click.argument('files', nargs=-1, type=click.File('rb'), required=True)
    
    70
    +@click.argument('files', nargs=-1, type=click.Path(exists=True, dir_okay=False), required=True)
    
    70 71
     @pass_context
    
    71 72
     def upload_files(context, files):
    
    72
    -    stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel)
    
    73
    +    sent_digests, file_map = list(), dict()
    
    74
    +    with upload(context.channel, instance=context.instance_name) as cas:
    
    75
    +        for file_path in files:
    
    76
    +            context.logger.info("Queueing {}".format(file_path))
    
    73 77
     
    
    74
    -    requests = []
    
    75
    -    for file in files:
    
    76
    -        chunk = file.read()
    
    77
    -        requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
    
    78
    -            digest=create_digest(chunk), data=chunk))
    
    78
    +            file_digest = cas.upload_file(file_path, queue=True)
    
    79 79
     
    
    80
    -    request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name,
    
    81
    -                                                           requests=requests)
    
    80
    +            assert file_digest.hash and file_digest.size_bytes
    
    82 81
     
    
    83
    -    context.logger.info("Sending: {}".format(request))
    
    84
    -    response = stub.BatchUpdateBlobs(request)
    
    85
    -    context.logger.info("Response: {}".format(response))
    
    82
    +            file_map[file_digest.hash] = file_path
    
    83
    +            sent_digests.append(file_digest)
    
    84
    +
    
    85
    +    for file_digest in sent_digests:
    
    86
    +        file_path = file_map[file_digest.hash]
    
    87
    +        if file_digest.ByteSize():
    
    88
    +            context.logger.info("{}: {}".format(file_path, file_digest.hash))
    
    89
    +        else:
    
    90
    +            context.logger.info("{}: FAILED".format(file_path))
    
    86 91
     
    
    87 92
     
    
    88 93
     @cli.command('upload-dir', short_help="Upload a directory to the CAS server.")
    
    89
    -@click.argument('directory', nargs=1, type=click.Path(), required=True)
    
    94
    +@click.argument('directory', nargs=1, type=click.Path(exists=True, file_okay=False), required=True)
    
    90 95
     @pass_context
    
    91 96
     def upload_dir(context, directory):
    
    92 97
         context.logger.info("Uploading directory to cas")
    

  • buildgrid/_app/commands/cmd_execute.py
    ... ... @@ -30,9 +30,10 @@ from urllib.parse import urlparse
    30 30
     import click
    
    31 31
     import grpc
    
    32 32
     
    
    33
    -from buildgrid.utils import merkle_maker, create_digest, write_fetch_blob
    
    33
    +from buildgrid.client.cas import upload
    
    34 34
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    35 35
     from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
    
    36
    +from buildgrid.utils import merkle_maker, write_fetch_blob
    
    36 37
     
    
    37 38
     from ..cli import pass_context
    
    38 39
     
    
    ... ... @@ -119,46 +120,37 @@ def wait_execution(context, operation_name):
    119 120
     @click.argument('input-root', nargs=1, type=click.Path(), required=True)
    
    120 121
     @click.argument('commands', nargs=-1, type=click.STRING, required=True)
    
    121 122
     @pass_context
    
    122
    -def command(context, input_root, commands, output_file, output_directory):
    
    123
    +def run_command(context, input_root, commands, output_file, output_directory):
    
    123 124
         stub = remote_execution_pb2_grpc.ExecutionStub(context.channel)
    
    124 125
     
    
    125
    -    execute_command = remote_execution_pb2.Command()
    
    126
    +    output_executeables = list()
    
    127
    +    with upload(context.channel, instance=context.instance_name) as cas:
    
    128
    +        command = remote_execution_pb2.Command()
    
    126 129
     
    
    127
    -    for arg in commands:
    
    128
    -        execute_command.arguments.extend([arg])
    
    130
    +        for arg in commands:
    
    131
    +            command.arguments.extend([arg])
    
    129 132
     
    
    130
    -    output_executeables = []
    
    131
    -    for file, is_executeable in output_file:
    
    132
    -        execute_command.output_files.extend([file])
    
    133
    -        if is_executeable:
    
    134
    -            output_executeables.append(file)
    
    133
    +        for file, is_executeable in output_file:
    
    134
    +            command.output_files.extend([file])
    
    135
    +            if is_executeable:
    
    136
    +                output_executeables.append(file)
    
    135 137
     
    
    136
    -    command_digest = create_digest(execute_command.SerializeToString())
    
    137
    -    context.logger.info(command_digest)
    
    138
    +        command_digest = cas.put_message(command, queue=True)
    
    138 139
     
    
    139
    -    # TODO: Check for missing blobs
    
    140
    -    digest = None
    
    141
    -    for _, digest in merkle_maker(input_root):
    
    142
    -        pass
    
    140
    +        context.logger.info('Sent command: {}'.format(command_digest))
    
    143 141
     
    
    144
    -    action = remote_execution_pb2.Action(command_digest=command_digest,
    
    145
    -                                         input_root_digest=digest,
    
    146
    -                                         do_not_cache=True)
    
    142
    +        # TODO: Check for missing blobs
    
    143
    +        input_root_digest = None
    
    144
    +        for _, input_root_digest in merkle_maker(input_root):
    
    145
    +            pass
    
    147 146
     
    
    148
    -    action_digest = create_digest(action.SerializeToString())
    
    147
    +        action = remote_execution_pb2.Action(command_digest=command_digest,
    
    148
    +                                             input_root_digest=input_root_digest,
    
    149
    +                                             do_not_cache=True)
    
    149 150
     
    
    150
    -    context.logger.info("Sending execution request...")
    
    151
    -
    
    152
    -    requests = []
    
    153
    -    requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
    
    154
    -        digest=command_digest, data=execute_command.SerializeToString()))
    
    155
    -
    
    156
    -    requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
    
    157
    -        digest=action_digest, data=action.SerializeToString()))
    
    151
    +        action_digest = cas.put_message(action, queue=True)
    
    158 152
     
    
    159
    -    request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=context.instance_name,
    
    160
    -                                                           requests=requests)
    
    161
    -    remote_execution_pb2_grpc.ContentAddressableStorageStub(context.channel).BatchUpdateBlobs(request)
    
    153
    +        context.logger.info("Sent action: {}".format(action_digest))
    
    162 154
     
    
    163 155
         request = remote_execution_pb2.ExecuteRequest(instance_name=context.instance_name,
    
    164 156
                                                       action_digest=action_digest,
    

  • buildgrid/client/cas.py
    ... ... @@ -17,9 +17,29 @@ from contextlib import contextmanager
    17 17
     import uuid
    
    18 18
     import os
    
    19 19
     
    
    20
    -from buildgrid.settings import HASH
    
    20
    +import grpc
    
    21
    +
    
    21 22
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    22 23
     from buildgrid._protos.google.bytestream import bytestream_pb2, bytestream_pb2_grpc
    
    24
    +from buildgrid._protos.google.rpc import code_pb2
    
    25
    +from buildgrid.settings import HASH
    
    26
    +
    
    27
    +
    
    28
    +class CallCache:
    
    29
    +    """Per remote grpc.StatusCode.UNIMPLEMENTED call cache."""
    
    30
    +    __calls = dict()
    
    31
    +
    
    32
    +    @classmethod
    
    33
    +    def mark_unimplemented(cls, channel, name):
    
    34
    +        if channel not in cls.__calls:
    
    35
    +            cls.__calls[channel] = set()
    
    36
    +        cls.__calls[channel].add(name)
    
    37
    +
    
    38
    +    @classmethod
    
    39
    +    def unimplemented(cls, channel, name):
    
    40
    +        if channel not in cls.__calls:
    
    41
    +            return False
    
    42
    +        return name in cls.__calls[channel]
    
    23 43
     
    
    24 44
     
    
    25 45
     @contextmanager
    
    ... ... @@ -28,7 +48,7 @@ def upload(channel, instance=None, u_uid=None):
    28 48
         try:
    
    29 49
             yield uploader
    
    30 50
         finally:
    
    31
    -        uploader.flush()
    
    51
    +        uploader.close()
    
    32 52
     
    
    33 53
     
    
    34 54
     class Uploader:
    
    ... ... @@ -47,6 +67,7 @@ class Uploader:
    47 67
     
    
    48 68
         FILE_SIZE_THRESHOLD = 1 * 1024 * 1024
    
    49 69
         MAX_REQUEST_SIZE = 2 * 1024 * 1024
    
    70
    +    MAX_REQUEST_COUNT = 500
    
    50 71
     
    
    51 72
         def __init__(self, channel, instance=None, u_uid=None):
    
    52 73
             """Initializes a new :class:`Uploader` instance.
    
    ... ... @@ -68,8 +89,61 @@ class Uploader:
    68 89
             self.__cas_stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(self.channel)
    
    69 90
     
    
    70 91
             self.__requests = dict()
    
    92
    +        self.__request_count = 0
    
    71 93
             self.__request_size = 0
    
    72 94
     
    
    95
    +    # --- Public API ---
    
    96
    +
    
    97
    +    def put_blob(self, blob, digest=None, queue=False):
    
    98
    +        """Stores a blob into the remote CAS server.
    
    99
    +
    
    100
    +        If queuing is allowed (`queue=True`), the upload request **may** be
    
    101
    +        defer. An explicit call to :method:`flush` can force the request to be
    
    102
    +        send immediately (along with the rest of the queued batch).
    
    103
    +
    
    104
    +        Args:
    
    105
    +            blob (bytes): the blob's data.
    
    106
    +            digest (:obj:`Digest`, optional): the blob's digest.
    
    107
    +            queue (bool, optional): whether or not the upload request may be
    
    108
    +                queued and submitted as part of a batch upload request. Defaults
    
    109
    +                to False.
    
    110
    +
    
    111
    +        Returns:
    
    112
    +            :obj:`Digest`: the sent blob's digest.
    
    113
    +        """
    
    114
    +        if not queue or len(blob) > Uploader.FILE_SIZE_THRESHOLD:
    
    115
    +            blob_digest = self._send_blob(blob)
    
    116
    +        else:
    
    117
    +            blob_digest = self._queue_blob(blob)
    
    118
    +
    
    119
    +        return blob_digest
    
    120
    +
    
    121
    +    def put_message(self, message, digest=None, queue=False):
    
    122
    +        """Stores a message into the remote CAS server.
    
    123
    +
    
    124
    +        If queuing is allowed (`queue=True`), the upload request **may** be
    
    125
    +        defer. An explicit call to :method:`flush` can force the request to be
    
    126
    +        send immediately (along with the rest of the queued batch).
    
    127
    +
    
    128
    +        Args:
    
    129
    +            message (:obj:`Message`): the message object.
    
    130
    +            digest (:obj:`Digest`, optional): the message's digest.
    
    131
    +            queue (bool, optional): whether or not the upload request may be
    
    132
    +                queued and submitted as part of a batch upload request. Defaults
    
    133
    +                to False.
    
    134
    +
    
    135
    +        Returns:
    
    136
    +            :obj:`Digest`: the sent message's digest.
    
    137
    +        """
    
    138
    +        message_blob = message.SerializeToString()
    
    139
    +
    
    140
    +        if not queue or len(message_blob) > Uploader.FILE_SIZE_THRESHOLD:
    
    141
    +            message_digest = self._send_blob(message_blob)
    
    142
    +        else:
    
    143
    +            message_digest = self._queue_blob(message_blob)
    
    144
    +
    
    145
    +        return message_digest
    
    146
    +
    
    73 147
         def upload_file(self, file_path, queue=True):
    
    74 148
             """Stores a local file into the remote CAS storage.
    
    75 149
     
    
    ... ... @@ -79,7 +153,7 @@ class Uploader:
    79 153
     
    
    80 154
             Args:
    
    81 155
                 file_path (str): absolute or relative path to a local file.
    
    82
    -            queue (bool, optional): wheter or not the upload request may be
    
    156
    +            queue (bool, optional): whether or not the upload request may be
    
    83 157
                     queued and submitted as part of a batch upload request. Defaults
    
    84 158
                     to True.
    
    85 159
     
    
    ... ... @@ -96,11 +170,11 @@ class Uploader:
    96 170
                 file_bytes = bytes_steam.read()
    
    97 171
     
    
    98 172
             if not queue or len(file_bytes) > Uploader.FILE_SIZE_THRESHOLD:
    
    99
    -            blob_digest = self._send_blob(file_bytes)
    
    173
    +            file_digest = self._send_blob(file_bytes)
    
    100 174
             else:
    
    101
    -            blob_digest = self._queue_blob(file_bytes)
    
    175
    +            file_digest = self._queue_blob(file_bytes)
    
    102 176
     
    
    103
    -        return blob_digest
    
    177
    +        return file_digest
    
    104 178
     
    
    105 179
         def upload_directory(self, directory, queue=True):
    
    106 180
             """Stores a :obj:`Directory` into the remote CAS storage.
    
    ... ... @@ -126,50 +200,37 @@ class Uploader:
    126 200
             else:
    
    127 201
                 return self._queue_blob(directory.SerializeToString())
    
    128 202
     
    
    129
    -    def send_message(self, message):
    
    130
    -        """Stores a message into the remote CAS storage.
    
    131
    -
    
    132
    -        Args:
    
    133
    -            message (:obj:`Message`): a protobuf message object.
    
    134
    -
    
    135
    -        Returns:
    
    136
    -            :obj:`Digest`: The digest of the message.
    
    137
    -        """
    
    138
    -        return self._send_blob(message.SerializeToString())
    
    139
    -
    
    140 203
         def flush(self):
    
    141 204
             """Ensures any queued request gets sent."""
    
    142 205
             if self.__requests:
    
    143
    -            self._send_batch()
    
    144
    -
    
    145
    -    def _queue_blob(self, blob):
    
    146
    -        """Queues a memory block for later batch upload"""
    
    147
    -        blob_digest = remote_execution_pb2.Digest()
    
    148
    -        blob_digest.hash = HASH(blob).hexdigest()
    
    149
    -        blob_digest.size_bytes = len(blob)
    
    206
    +            self._send_blob_batch(self.__requests)
    
    150 207
     
    
    151
    -        if self.__request_size + len(blob) > Uploader.MAX_REQUEST_SIZE:
    
    152
    -            self._send_batch()
    
    208
    +            self.__requests.clear()
    
    209
    +            self.__request_count = 0
    
    210
    +            self.__request_size = 0
    
    153 211
     
    
    154
    -        update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request()
    
    155
    -        update_request.digest.CopyFrom(blob_digest)
    
    156
    -        update_request.data = blob
    
    212
    +    def close(self):
    
    213
    +        """Closes the underlying connection stubs.
    
    157 214
     
    
    158
    -        update_request_size = update_request.ByteSize()
    
    159
    -        if self.__request_size + update_request_size > Uploader.MAX_REQUEST_SIZE:
    
    160
    -            self._send_batch()
    
    215
    +        Note:
    
    216
    +            This will always send pending requests before closing connections,
    
    217
    +            if any.
    
    218
    +        """
    
    219
    +        self.flush()
    
    161 220
     
    
    162
    -        self.__requests[update_request.digest.hash] = update_request
    
    163
    -        self.__request_size += update_request_size
    
    221
    +        self.__bytestream_stub = None
    
    222
    +        self.__cas_stub = None
    
    164 223
     
    
    165
    -        return blob_digest
    
    224
    +    # --- Private API ---
    
    166 225
     
    
    167
    -    def _send_blob(self, blob):
    
    226
    +    def _send_blob(self, blob, digest=None):
    
    168 227
             """Sends a memory block using ByteStream.Write()"""
    
    169 228
             blob_digest = remote_execution_pb2.Digest()
    
    170
    -        blob_digest.hash = HASH(blob).hexdigest()
    
    171
    -        blob_digest.size_bytes = len(blob)
    
    172
    -
    
    229
    +        if digest is not None:
    
    230
    +            blob_digest.CopyFrom(digest)
    
    231
    +        else:
    
    232
    +            blob_digest.hash = HASH(blob).hexdigest()
    
    233
    +            blob_digest.size_bytes = len(blob)
    
    173 234
             if self.instance_name is not None:
    
    174 235
                 resource_name = '/'.join([self.instance_name, 'uploads', self.u_uid, 'blobs',
    
    175 236
                                           blob_digest.hash, str(blob_digest.size_bytes)])
    
    ... ... @@ -204,18 +265,64 @@ class Uploader:
    204 265
     
    
    205 266
             return blob_digest
    
    206 267
     
    
    207
    -    def _send_batch(self):
    
    268
    +    def _queue_blob(self, blob, digest=None):
    
    269
    +        """Queues a memory block for later batch upload"""
    
    270
    +        blob_digest = remote_execution_pb2.Digest()
    
    271
    +        if digest is not None:
    
    272
    +            blob_digest.CopyFrom(digest)
    
    273
    +        else:
    
    274
    +            blob_digest.hash = HASH(blob).hexdigest()
    
    275
    +            blob_digest.size_bytes = len(blob)
    
    276
    +
    
    277
    +        if self.__request_size + blob_digest.size_bytes > Uploader.MAX_REQUEST_SIZE:
    
    278
    +            self.flush()
    
    279
    +        elif self.__request_count >= Uploader.MAX_REQUEST_COUNT:
    
    280
    +            self.flush()
    
    281
    +
    
    282
    +        self.__requests[blob_digest.hash] = (blob, blob_digest)
    
    283
    +        self.__request_count += 1
    
    284
    +        self.__request_size += blob_digest.size_bytes
    
    285
    +
    
    286
    +        return blob_digest
    
    287
    +
    
    288
    +    def _send_blob_batch(self, batch):
    
    208 289
             """Sends queued data using ContentAddressableStorage.BatchUpdateBlobs()"""
    
    209
    -        batch_request = remote_execution_pb2.BatchUpdateBlobsRequest()
    
    210
    -        batch_request.requests.extend(self.__requests.values())
    
    211
    -        if self.instance_name is not None:
    
    212
    -            batch_request.instance_name = self.instance_name
    
    290
    +        batch_fetched = False
    
    291
    +        written_digests = list()
    
    213 292
     
    
    214
    -        batch_response = self.__cas_stub.BatchUpdateBlobs(batch_request)
    
    293
    +        # First, try BatchUpdateBlobs(), if not already known not being implemented:
    
    294
    +        if not CallCache.unimplemented(self.channel, 'BatchUpdateBlobs'):
    
    295
    +            batch_request = remote_execution_pb2.BatchUpdateBlobsRequest()
    
    296
    +            if self.instance_name is not None:
    
    297
    +                batch_request.instance_name = self.instance_name
    
    215 298
     
    
    216
    -        for response in batch_response.responses:
    
    217
    -            assert response.digest.hash in self.__requests
    
    218
    -            assert response.status.code is 0
    
    299
    +            for blob, digest in batch.values():
    
    300
    +                request = batch_request.requests.add()
    
    301
    +                request.digest.CopyFrom(digest)
    
    302
    +                request.data = blob
    
    219 303
     
    
    220
    -        self.__requests.clear()
    
    221
    -        self.__request_size = 0
    304
    +            try:
    
    305
    +                batch_response = self.__cas_stub.BatchUpdateBlobs(batch_request)
    
    306
    +                for response in batch_response.responses:
    
    307
    +                    assert response.digest.hash in batch
    
    308
    +
    
    309
    +                    written_digests.append(response.digest)
    
    310
    +                    if response.status.code != code_pb2.OK:
    
    311
    +                        response.digest.Clear()
    
    312
    +
    
    313
    +                batch_fetched = True
    
    314
    +
    
    315
    +            except grpc.RpcError as e:
    
    316
    +                status_code = e.code()
    
    317
    +                if status_code == grpc.StatusCode.UNIMPLEMENTED:
    
    318
    +                    CallCache.mark_unimplemented(self.channel, 'BatchUpdateBlobs')
    
    319
    +
    
    320
    +                else:
    
    321
    +                    assert False
    
    322
    +
    
    323
    +        # Fallback to Write() if no BatchUpdateBlobs():
    
    324
    +        if not batch_fetched:
    
    325
    +            for blob, digest in batch.values():
    
    326
    +                written_digests.append(self._send_blob(blob, digest=digest))
    
    327
    +
    
    328
    +        return written_digests

  • buildgrid/server/cas/storage/remote.py
    ... ... @@ -25,9 +25,12 @@ import logging
    25 25
     
    
    26 26
     import grpc
    
    27 27
     
    
    28
    -from buildgrid.utils import gen_fetch_blob, gen_write_request_blob
    
    28
    +from buildgrid.client.cas import upload
    
    29 29
     from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
    
    30 30
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    31
    +from buildgrid._protos.google.rpc import code_pb2
    
    32
    +from buildgrid._protos.google.rpc import status_pb2
    
    33
    +from buildgrid.utils import gen_fetch_blob
    
    31 34
     
    
    32 35
     from .storage_abc import StorageABC
    
    33 36
     
    
    ... ... @@ -36,7 +39,10 @@ class RemoteStorage(StorageABC):
    36 39
     
    
    37 40
         def __init__(self, channel, instance_name):
    
    38 41
             self.logger = logging.getLogger(__name__)
    
    39
    -        self._instance_name = instance_name
    
    42
    +
    
    43
    +        self.instance_name = instance_name
    
    44
    +        self.channel = channel
    
    45
    +
    
    40 46
             self._stub_bs = bytestream_pb2_grpc.ByteStreamStub(channel)
    
    41 47
             self._stub_cas = remote_execution_pb2_grpc.ContentAddressableStorageStub(channel)
    
    42 48
     
    
    ... ... @@ -50,7 +56,7 @@ class RemoteStorage(StorageABC):
    50 56
                 fetched_data = io.BytesIO()
    
    51 57
                 length = 0
    
    52 58
     
    
    53
    -            for data in gen_fetch_blob(self._stub_bs, digest, self._instance_name):
    
    59
    +            for data in gen_fetch_blob(self._stub_bs, digest, self.instance_name):
    
    54 60
                     length += fetched_data.write(data)
    
    55 61
     
    
    56 62
                 if length:
    
    ... ... @@ -74,13 +80,11 @@ class RemoteStorage(StorageABC):
    74 80
             return io.BytesIO(digest.SerializeToString())
    
    75 81
     
    
    76 82
         def commit_write(self, digest, write_session):
    
    77
    -        write_session.seek(0)
    
    78
    -
    
    79
    -        for request in gen_write_request_blob(write_session, digest, self._instance_name):
    
    80
    -            self._stub_bs.Write(request)
    
    83
    +        with upload(self.channel, instance=self.instance_name) as cas:
    
    84
    +            cas.put_blob(write_session.getvalue())
    
    81 85
     
    
    82 86
         def missing_blobs(self, blobs):
    
    83
    -        request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=self._instance_name)
    
    87
    +        request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=self.instance_name)
    
    84 88
     
    
    85 89
             for blob in blobs:
    
    86 90
                 request_digest = request.blob_digests.add()
    
    ... ... @@ -92,19 +96,12 @@ class RemoteStorage(StorageABC):
    92 96
             return [x for x in response.missing_blob_digests]
    
    93 97
     
    
    94 98
         def bulk_update_blobs(self, blobs):
    
    95
    -        request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=self._instance_name)
    
    96
    -
    
    97
    -        for digest, data in blobs:
    
    98
    -            reqs = request.requests.add()
    
    99
    -            reqs.digest.CopyFrom(digest)
    
    100
    -            reqs.data = data
    
    101
    -
    
    102
    -        response = self._stub_cas.BatchUpdateBlobs(request)
    
    103
    -
    
    104
    -        responses = response.responses
    
    99
    +        sent_digests = list()
    
    100
    +        with upload(self.channel, instance=self.instance_name) as cas:
    
    101
    +            for digest, blob in blobs:
    
    102
    +                sent_digests.append(cas.put_blob(blob, digest=digest, queue=True))
    
    105 103
     
    
    106
    -        # Check everything was sent back, even if order changed
    
    107
    -        assert ([x.digest for x in request.requests].sort(key=lambda x: x.hash)) == \
    
    108
    -            ([x.digest for x in responses].sort(key=lambda x: x.hash))
    
    104
    +        assert len(sent_digests) == len(blobs)
    
    109 105
     
    
    110
    -        return [x.status for x in responses]
    106
    +        return [status_pb2.Status(code=code_pb2.OK) if d.ByteSize() > 0
    
    107
    +                else status_pb2.Status(code=code_pb2.UNKNOWN) for d in sent_digests]

  • buildgrid/utils.py
    ... ... @@ -15,7 +15,6 @@
    15 15
     
    
    16 16
     from operator import attrgetter
    
    17 17
     import os
    
    18
    -import uuid
    
    19 18
     
    
    20 19
     from buildgrid.settings import HASH
    
    21 20
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    ... ... @@ -34,32 +33,6 @@ def gen_fetch_blob(stub, digest, instance_name=""):
    34 33
             yield response.data
    
    35 34
     
    
    36 35
     
    
    37
    -def gen_write_request_blob(digest_bytes, digest, instance_name=""):
    
    38
    -    """ Generates a bytestream write request
    
    39
    -    """
    
    40
    -    resource_name = os.path.join(instance_name, 'uploads', str(uuid.uuid4()),
    
    41
    -                                 'blobs', digest.hash, str(digest.size_bytes))
    
    42
    -
    
    43
    -    offset = 0
    
    44
    -    finished = False
    
    45
    -    remaining = digest.size_bytes
    
    46
    -
    
    47
    -    while not finished:
    
    48
    -        chunk_size = min(remaining, 64 * 1024)
    
    49
    -        remaining -= chunk_size
    
    50
    -        finished = remaining <= 0
    
    51
    -
    
    52
    -        request = bytestream_pb2.WriteRequest()
    
    53
    -        request.resource_name = resource_name
    
    54
    -        request.write_offset = offset
    
    55
    -        request.data = digest_bytes.read(chunk_size)
    
    56
    -        request.finish_write = finished
    
    57
    -
    
    58
    -        yield request
    
    59
    -
    
    60
    -        offset += chunk_size
    
    61
    -
    
    62
    -
    
    63 36
     def write_fetch_directory(root_directory, stub, digest, instance_name=None):
    
    64 37
         """Locally replicates a directory from CAS.
    
    65 38
     
    
    ... ... @@ -280,8 +253,12 @@ def tree_maker(directory_path, cas=None):
    280 253
         tree.children.extend(child_directories)
    
    281 254
         tree.root.CopyFrom(directory)
    
    282 255
     
    
    256
    +    # Ensure that we've uploded the tree structure first
    
    257
    +    if cas is not None:
    
    258
    +        cas.flush()
    
    259
    +
    
    283 260
         if cas is not None:
    
    284
    -        tree_digest = cas.send_message(tree)
    
    261
    +        tree_digest = cas.put_message(tree)
    
    285 262
         else:
    
    286 263
             tree_digest = create_digest(tree.SerializeToString())
    
    287 264
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]