[Notes] [Git][BuildGrid/buildgrid][mablanch/61-bazel-support] 6 commits: utils.py: Support symlinks and folders in fetcher helper



Title: GitLab

Martin Blanchard pushed to branch mablanch/61-bazel-support at BuildGrid / buildgrid

Commits:

3 changed files:

Changes:

  • buildgrid/_app/bots/temp_directory.py
    ... ... @@ -19,71 +19,97 @@ import tempfile
    19 19
     
    
    20 20
     from google.protobuf import any_pb2
    
    21 21
     
    
    22
    -from buildgrid.utils import read_file, create_digest, write_fetch_directory, parse_to_pb2_from_fetch
    
    23 22
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    24 23
     from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
    
    24
    +from buildgrid.utils import write_fetch_directory, parse_to_pb2_from_fetch
    
    25
    +from buildgrid.utils import output_file_maker, output_directory_maker
    
    25 26
     
    
    26 27
     
    
    27 28
     def work_temp_directory(context, lease):
    
    28
    -    """ Bot downloads directories and files into a temp directory,
    
    29
    -    then uploads results back to CAS
    
    29
    +    """Executes a lease for a build action, using host tools.
    
    30 30
         """
    
    31 31
     
    
    32
    -    parent = context.parent
    
    32
    +    instance_name = context.parent
    
    33 33
         stub_bytestream = bytestream_pb2_grpc.ByteStreamStub(context.cas_channel)
    
    34 34
     
    
    35 35
         action_digest = remote_execution_pb2.Digest()
    
    36 36
         lease.payload.Unpack(action_digest)
    
    37 37
     
    
    38
    -    action = remote_execution_pb2.Action()
    
    38
    +    action = parse_to_pb2_from_fetch(remote_execution_pb2.Action(),
    
    39
    +                                     stub_bytestream, action_digest, instance_name)
    
    39 40
     
    
    40
    -    action = parse_to_pb2_from_fetch(action, stub_bytestream, action_digest, parent)
    
    41
    +    with tempfile.TemporaryDirectory() as temp_directory:
    
    42
    +        command = parse_to_pb2_from_fetch(remote_execution_pb2.Command(),
    
    43
    +                                          stub_bytestream, action.command_digest, instance_name)
    
    41 44
     
    
    42
    -    with tempfile.TemporaryDirectory() as temp_dir:
    
    45
    +        write_fetch_directory(temp_directory, stub_bytestream,
    
    46
    +                              action.input_root_digest, instance_name)
    
    43 47
     
    
    44
    -        command = remote_execution_pb2.Command()
    
    45
    -        command = parse_to_pb2_from_fetch(command, stub_bytestream, action.command_digest, parent)
    
    46
    -
    
    47
    -        arguments = "cd {} &&".format(temp_dir)
    
    48
    +        environment = os.environ.copy()
    
    49
    +        for variable in command.environment_variables:
    
    50
    +            if variable.name not in ['PATH', 'PWD']:
    
    51
    +                environment[variable.name] = variable.value
    
    48 52
     
    
    53
    +        command_line = list()
    
    49 54
             for argument in command.arguments:
    
    50
    -            arguments += " {}".format(argument)
    
    51
    -
    
    52
    -        context.logger.info(arguments)
    
    53
    -
    
    54
    -        write_fetch_directory(temp_dir, stub_bytestream, action.input_root_digest, parent)
    
    55
    -
    
    56
    -        proc = subprocess.Popen(arguments,
    
    57
    -                                shell=True,
    
    58
    -                                stdin=subprocess.PIPE,
    
    59
    -                                stdout=subprocess.PIPE)
    
    60
    -
    
    61
    -        # TODO: Should return the std_out to the user
    
    62
    -        proc.communicate()
    
    63
    -
    
    64
    -        result = remote_execution_pb2.ActionResult()
    
    65
    -        requests = []
    
    66
    -        for output_file in command.output_files:
    
    67
    -            path = os.path.join(temp_dir, output_file)
    
    68
    -            chunk = read_file(path)
    
    69
    -
    
    70
    -            digest = create_digest(chunk)
    
    71
    -
    
    72
    -            result.output_files.extend([remote_execution_pb2.OutputFile(path=output_file,
    
    73
    -                                                                        digest=digest)])
    
    74
    -
    
    75
    -            requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
    
    76
    -                digest=digest, data=chunk))
    
    77
    -
    
    78
    -        request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=parent,
    
    79
    -                                                               requests=requests)
    
    55
    +            command_line.append(argument.strip())
    
    56
    +
    
    57
    +        working_directory = None
    
    58
    +        if command.working_directory:
    
    59
    +            working_directory = os.path.join(temp_directory,
    
    60
    +                                             command.working_directory)
    
    61
    +            os.makedirs(working_directory, exist_ok=True)
    
    62
    +        else:
    
    63
    +            working_directory = temp_directory
    
    64
    +
    
    65
    +        # Ensure that output files structure exists:
    
    66
    +        for output_path in command.output_files:
    
    67
    +            directory_path = os.path.join(working_directory,
    
    68
    +                                          os.path.dirname(output_path))
    
    69
    +            os.makedirs(directory_path, exist_ok=True)
    
    70
    +
    
    71
    +        process = subprocess.Popen(command_line,
    
    72
    +                                   cwd=working_directory,
    
    73
    +                                   universal_newlines=True,
    
    74
    +                                   env=environment,
    
    75
    +                                   stdin=subprocess.PIPE,
    
    76
    +                                   stdout=subprocess.PIPE)
    
    77
    +        # TODO: Should return the stdout and stderr to the user.
    
    78
    +        process.communicate()
    
    79
    +
    
    80
    +        update_requests = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=instance_name)
    
    81
    +        action_result = remote_execution_pb2.ActionResult()
    
    82
    +
    
    83
    +        for output_path in command.output_files:
    
    84
    +            file_path = os.path.join(working_directory, output_path)
    
    85
    +            # Missing outputs should simply be omitted in ActionResult:
    
    86
    +            if not os.path.isfile(file_path):
    
    87
    +                continue
    
    88
    +
    
    89
    +            # OutputFile.path should be relative to the working direcory:
    
    90
    +            output_file, update_request = output_file_maker(file_path, working_directory)
    
    91
    +
    
    92
    +            action_result.output_files.extend([output_file])
    
    93
    +            update_requests.requests.extend([update_request])
    
    94
    +
    
    95
    +        for output_path in command.output_directories:
    
    96
    +            directory_path = os.path.join(working_directory, output_path)
    
    97
    +            # Missing outputs should simply be omitted in ActionResult:
    
    98
    +            if not os.path.isdir(directory_path):
    
    99
    +                continue
    
    100
    +
    
    101
    +            # OutputDirectory.path should be relative to the working direcory:
    
    102
    +            output_directory, update_request = output_directory_maker(directory_path, working_directory)
    
    103
    +
    
    104
    +            action_result.output_directories.extend([output_directory])
    
    105
    +            update_requests.requests.extend(update_request)
    
    80 106
     
    
    81 107
             stub_cas = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.cas_channel)
    
    82
    -        stub_cas.BatchUpdateBlobs(request)
    
    108
    +        stub_cas.BatchUpdateBlobs(update_requests)
    
    83 109
     
    
    84
    -        result_any = any_pb2.Any()
    
    85
    -        result_any.Pack(result)
    
    110
    +        action_result_any = any_pb2.Any()
    
    111
    +        action_result_any.Pack(action_result)
    
    86 112
     
    
    87
    -        lease.result.CopyFrom(result_any)
    
    113
    +        lease.result.CopyFrom(action_result_any)
    
    88 114
     
    
    89 115
         return lease

  • buildgrid/server/execution/execution_service.py
    ... ... @@ -86,6 +86,11 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    86 86
                 yield operations_pb2.Operation()
    
    87 87
     
    
    88 88
         def _get_instance(self, name):
    
    89
    +        # If client does not support multiple instances, it may omit the
    
    90
    +        # instance name request parameter, so better map our default:
    
    91
    +        if not name and len(self._instances) == 1:
    
    92
    +            name = next(iter(self._instances))
    
    93
    +
    
    89 94
             try:
    
    90 95
                 return self._instances[name]
    
    91 96
     
    

  • buildgrid/utils.py
    ... ... @@ -13,6 +13,7 @@
    13 13
     # limitations under the License.
    
    14 14
     
    
    15 15
     
    
    16
    +from operator import attrgetter
    
    16 17
     import os
    
    17 18
     
    
    18 19
     from buildgrid.settings import HASH
    
    ... ... @@ -31,30 +32,59 @@ def gen_fetch_blob(stub, digest, instance_name=""):
    31 32
             yield response.data
    
    32 33
     
    
    33 34
     
    
    34
    -def write_fetch_directory(directory, stub, digest, instance_name=""):
    
    35
    -    """ Given a directory digest, fetches files and writes them to a directory
    
    35
    +def write_fetch_directory(root_directory, stub, digest, instance_name=None):
    
    36
    +    """Locally replicates a directory from CAS.
    
    37
    +
    
    38
    +    Args:
    
    39
    +        root_directory (str): local directory to populate.
    
    40
    +        stub (): gRPC stub for CAS communication.
    
    41
    +        digest (Digest): digest for the directory to fetch from CAS.
    
    42
    +        instance_name (str, optional): farm instance name to query data from.
    
    36 43
         """
    
    37
    -    # TODO: Extend to symlinks and inner directories
    
    38
    -    # pathlib.Path('/my/directory').mkdir(parents=True, exist_ok=True)
    
    44
    +    if not os.path.isabs(root_directory):
    
    45
    +        root_directory = os.path.abspath(root_directory)
    
    46
    +    if not os.path.exists(root_directory):
    
    47
    +        os.makedirs(root_directory, exist_ok=True)
    
    39 48
     
    
    40
    -    directory_pb2 = remote_execution_pb2.Directory()
    
    41
    -    directory_pb2 = parse_to_pb2_from_fetch(directory_pb2, stub, digest, instance_name)
    
    49
    +    directory = parse_to_pb2_from_fetch(remote_execution_pb2.Directory(),
    
    50
    +                                        stub, digest, instance_name)
    
    51
    +
    
    52
    +    for directory_node in directory.directories:
    
    53
    +        child_path = os.path.join(root_directory, directory_node.name)
    
    54
    +
    
    55
    +        write_fetch_directory(child_path, stub, directory_node.digest, instance_name)
    
    56
    +
    
    57
    +    for file_node in directory.files:
    
    58
    +        child_path = os.path.join(root_directory, file_node.name)
    
    42 59
     
    
    43
    -    for file_node in directory_pb2.files:
    
    44
    -        path = os.path.join(directory, file_node.name)
    
    45
    -        with open(path, 'wb') as f:
    
    46
    -            write_fetch_blob(f, stub, file_node.digest, instance_name)
    
    60
    +        with open(child_path, 'wb') as child_file:
    
    61
    +            write_fetch_blob(child_file, stub, file_node.digest, instance_name)
    
    47 62
     
    
    63
    +    for symlink_node in directory.symlinks:
    
    64
    +        child_path = os.path.join(root_directory, symlink_node.name)
    
    48 65
     
    
    49
    -def write_fetch_blob(out, stub, digest, instance_name=""):
    
    50
    -    """ Given an output buffer, fetches blob and writes to buffer
    
    66
    +        if os.path.isabs(symlink_node.target):
    
    67
    +            continue  # No out of temp-directory links for now.
    
    68
    +        target_path = os.path.join(root_directory, symlink_node.target)
    
    69
    +
    
    70
    +        os.symlink(child_path, target_path)
    
    71
    +
    
    72
    +
    
    73
    +def write_fetch_blob(target_file, stub, digest, instance_name=None):
    
    74
    +    """Extracts a blob from CAS into a local file.
    
    75
    +
    
    76
    +    Args:
    
    77
    +        target_file (str): local file to write.
    
    78
    +        stub (): gRPC stub for CAS communication.
    
    79
    +        digest (Digest): digest for the blob to fetch from CAS.
    
    80
    +        instance_name (str, optional): farm instance name to query data from.
    
    51 81
         """
    
    52 82
     
    
    53 83
         for stream in gen_fetch_blob(stub, digest, instance_name):
    
    54
    -        out.write(stream)
    
    84
    +        target_file.write(stream)
    
    85
    +    target_file.flush()
    
    55 86
     
    
    56
    -    out.flush()
    
    57
    -    assert digest.size_bytes == os.fstat(out.fileno()).st_size
    
    87
    +    assert digest.size_bytes == os.fstat(target_file.fileno()).st_size
    
    58 88
     
    
    59 89
     
    
    60 90
     def parse_to_pb2_from_fetch(pb2, stub, digest, instance_name=""):
    
    ... ... @@ -70,7 +100,15 @@ def parse_to_pb2_from_fetch(pb2, stub, digest, instance_name=""):
    70 100
     
    
    71 101
     
    
    72 102
     def create_digest(bytes_to_digest):
    
    73
    -    """ Creates a hash based on the hex digest and returns the digest
    
    103
    +    """Computes the :obj:`Digest` of a piece of data.
    
    104
    +
    
    105
    +    The :obj:`Digest` of a data is a function of its hash **and** size.
    
    106
    +
    
    107
    +    Args:
    
    108
    +        bytes_to_digest (bytes): byte data to digest.
    
    109
    +
    
    110
    +    Returns:
    
    111
    +        :obj:`Digest`: The gRPC :obj:`Digest` for the given byte data.
    
    74 112
         """
    
    75 113
         return remote_execution_pb2.Digest(hash=HASH(bytes_to_digest).hexdigest(),
    
    76 114
                                            size_bytes=len(bytes_to_digest))
    
    ... ... @@ -107,6 +145,202 @@ def file_maker(file_path, file_digest):
    107 145
                                              is_executable=os.access(file_path, os.X_OK))
    
    108 146
     
    
    109 147
     
    
    110
    -def read_file(read):
    
    111
    -    with open(read, 'rb') as f:
    
    112
    -        return f.read()
    148
    +def directory_maker(directory_path):
    
    149
    +    """Creates a :obj:`Directory` from a local directory.
    
    150
    +
    
    151
    +    Args:
    
    152
    +        directory_path (str): absolute or relative path to a local directory.
    
    153
    +
    
    154
    +    Returns:
    
    155
    +        :obj:`Directory`, list of :obj:`Directory`, list of
    
    156
    +        :obj:`BatchUpdateBlobsRequest`: Tuple of a new gRPC :obj:`Directory` for
    
    157
    +        the directory pointed by `directory_path`, a list of new gRPC
    
    158
    +        :obj:`Directory` for every children of that directory and the
    
    159
    +        corresponding list of :obj:`BatchUpdateBlobsRequest` for CAS upload.
    
    160
    +
    
    161
    +        The :obj:`Directory` children list may come in any order.
    
    162
    +
    
    163
    +        The :obj:`BatchUpdateBlobsRequest` list may come in any order. However,
    
    164
    +        its last element is guaranteed to be the root :obj:`Direcotry`'s
    
    165
    +        request.
    
    166
    +    """
    
    167
    +    if not os.path.isabs(directory_path):
    
    168
    +        directory_path = os.path.abspath(directory_path)
    
    169
    +
    
    170
    +    child_directories = list()
    
    171
    +    update_requests = list()
    
    172
    +
    
    173
    +    files, directories, symlinks = list(), list(), list()
    
    174
    +    for directory_entry in os.scandir(directory_path):
    
    175
    +        # Create a FileNode and corresponding BatchUpdateBlobsRequest:
    
    176
    +        if directory_entry.is_file(follow_symlinks=False):
    
    177
    +            node_blob = read_file(directory_entry.path)
    
    178
    +            node_digest = create_digest(node_blob)
    
    179
    +
    
    180
    +            node = remote_execution_pb2.FileNode()
    
    181
    +            node.name = directory_entry.name
    
    182
    +            node.digest = node_digest
    
    183
    +            node.is_executable = os.access(directory_entry.path, os.X_OK)
    
    184
    +
    
    185
    +            node_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request(digest=node_digest)
    
    186
    +            node_request.data = node_blob
    
    187
    +
    
    188
    +            update_requests.append(node_request)
    
    189
    +            files.append(node)
    
    190
    +
    
    191
    +        # Create a DirectoryNode and corresponding BatchUpdateBlobsRequest:
    
    192
    +        elif directory_entry.is_dir(follow_symlinks=False):
    
    193
    +            node_directory, node_children, node_requests = directory_maker(directory_entry.path)
    
    194
    +
    
    195
    +            node = remote_execution_pb2.DirectoryNode()
    
    196
    +            node.name = directory_entry.name
    
    197
    +            node.digest = node_requests[-1].digest
    
    198
    +
    
    199
    +            child_directories.extend(node_children)
    
    200
    +            child_directories.append(node_directory)
    
    201
    +            update_requests.extend(node_requests)
    
    202
    +            directories.append(node)
    
    203
    +
    
    204
    +        # Create a SymlinkNode if necessary;
    
    205
    +        elif os.path.islink(directory_entry.path):
    
    206
    +            node_target = os.readlink(directory_entry.path)
    
    207
    +
    
    208
    +            node = remote_execution_pb2.SymlinkNode()
    
    209
    +            node.name = directory_entry.name
    
    210
    +            node.target = node_target
    
    211
    +
    
    212
    +            symlinks.append(node)
    
    213
    +
    
    214
    +    directory = remote_execution_pb2.Directory()
    
    215
    +    directory.files.extend(files.sort(key=attrgetter('name')))
    
    216
    +    directory.directories.extend(directories.sort(key=attrgetter('name')))
    
    217
    +    directory.symlinks.extend(symlinks.sort(key=attrgetter('name')))
    
    218
    +
    
    219
    +    directory_blob = directory.SerializeToString()
    
    220
    +    directory_digest = create_digest(directory_blob)
    
    221
    +
    
    222
    +    update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request(digest=directory_digest)
    
    223
    +    update_request.data = directory_blob
    
    224
    +
    
    225
    +    update_requests.append(update_request)
    
    226
    +
    
    227
    +    return directory, child_directories, update_requests
    
    228
    +
    
    229
    +
    
    230
    +def read_file(file_path):
    
    231
    +    """Loads raw file content in memory.
    
    232
    +
    
    233
    +    Args:
    
    234
    +        file_path (str): path to the target file.
    
    235
    +
    
    236
    +    Returns:
    
    237
    +        bytes: Raw file's content until EOF.
    
    238
    +
    
    239
    +    Raises:
    
    240
    +        OSError: If `file_path` does not exist or is not readable.
    
    241
    +    """
    
    242
    +    with open(file_path, 'rb') as byte_file:
    
    243
    +        return byte_file.read()
    
    244
    +
    
    245
    +
    
    246
    +def output_file_maker(file_path, input_path):
    
    247
    +    """Creates an :obj:`OutputFile` from a local file.
    
    248
    +
    
    249
    +    `file_path` **must** point inside or be relative to `input_path`.
    
    250
    +
    
    251
    +    Args:
    
    252
    +        file_path (str): absolute or relative path to a local file.
    
    253
    +        input_path (str): absolute or relative path to the input root directory.
    
    254
    +
    
    255
    +    Returns:
    
    256
    +        :obj:`OutputFile`, :obj:`BatchUpdateBlobsRequest`: Tuple of a new gRPC
    
    257
    +        :obj:`OutputFile` object for the file pointed by `file_path` and the
    
    258
    +        corresponding :obj:`BatchUpdateBlobsRequest` for CAS upload.
    
    259
    +    """
    
    260
    +    if not os.path.isabs(file_path):
    
    261
    +        file_path = os.path.abspath(file_path)
    
    262
    +    if not os.path.isabs(input_path):
    
    263
    +        input_path = os.path.abspath(input_path)
    
    264
    +
    
    265
    +    file_blob = read_file(file_path)
    
    266
    +    file_digest = create_digest(file_blob)
    
    267
    +
    
    268
    +    output_file = remote_execution_pb2.OutputFile(digest=file_digest)
    
    269
    +    output_file.path = os.path.relpath(file_path, start=input_path)
    
    270
    +    output_file.is_executable = os.access(file_path, os.X_OK)
    
    271
    +
    
    272
    +    update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request(digest=file_digest)
    
    273
    +    update_request.data = file_blob
    
    274
    +
    
    275
    +    return output_file, update_request
    
    276
    +
    
    277
    +
    
    278
    +def output_directory_maker(directory_path, working_path):
    
    279
    +    """Creates an :obj:`OutputDirectory` from a local directory.
    
    280
    +
    
    281
    +    `directory_path` **must** point inside or be relative to `input_path`.
    
    282
    +
    
    283
    +    Args:
    
    284
    +        directory_path (str): absolute or relative path to a local directory.
    
    285
    +        working_path (str): absolute or relative path to the working directory.
    
    286
    +
    
    287
    +    Returns:
    
    288
    +        :obj:`OutputDirectory`, :obj:`BatchUpdateBlobsRequest`: Tuple of a new
    
    289
    +        gRPC :obj:`OutputDirectory` for the directory pointed by
    
    290
    +        `directory_path` and the corresponding list of
    
    291
    +        :obj:`BatchUpdateBlobsRequest` for CAS upload.
    
    292
    +    """
    
    293
    +    if not os.path.isabs(directory_path):
    
    294
    +        directory_path = os.path.abspath(directory_path)
    
    295
    +    if not os.path.isabs(working_path):
    
    296
    +        working_path = os.path.abspath(working_path)
    
    297
    +
    
    298
    +    _, update_requests = tree_maker(directory_path)
    
    299
    +
    
    300
    +    output_directory = remote_execution_pb2.OutputDirectory()
    
    301
    +    output_directory.tree_digest = update_requests[-1].digest
    
    302
    +    output_directory.path = os.path.relpath(directory_path, start=working_path)
    
    303
    +
    
    304
    +    output_directory_blob = output_directory.SerializeToString()
    
    305
    +    output_directory_digest = create_digest(output_directory_blob)
    
    306
    +
    
    307
    +    update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request(digest=output_directory_digest)
    
    308
    +    update_request.data = output_directory_blob
    
    309
    +
    
    310
    +    update_requests.append(update_request)
    
    311
    +
    
    312
    +    return output_directory, update_requests
    
    313
    +
    
    314
    +
    
    315
    +def tree_maker(directory_path):
    
    316
    +    """Creates a :obj:`Tree` from a local directory.
    
    317
    +
    
    318
    +    Args:
    
    319
    +        directory_path (str): absolute or relative path to a local directory.
    
    320
    +
    
    321
    +    Returns:
    
    322
    +        :obj:`Tree`, :obj:`BatchUpdateBlobsRequest`: Tuple of a new
    
    323
    +        gRPC :obj:`Tree` for the directory pointed by `directory_path` and the
    
    324
    +        corresponding list of :obj:`BatchUpdateBlobsRequest` for CAS upload.
    
    325
    +
    
    326
    +        The :obj:`BatchUpdateBlobsRequest` list may come in any order. However,
    
    327
    +        its last element is guaranteed to be the :obj:`Tree`'s request.
    
    328
    +    """
    
    329
    +    if not os.path.isabs(directory_path):
    
    330
    +        directory_path = os.path.abspath(directory_path)
    
    331
    +
    
    332
    +    directory, child_directories, update_requests = directory_maker(directory_path)
    
    333
    +
    
    334
    +    tree = remote_execution_pb2.Tree()
    
    335
    +    tree.children.extend([child_directories])
    
    336
    +    tree.root.CopyFrom(directory)
    
    337
    +
    
    338
    +    tree_blob = tree.SerializeToString()
    
    339
    +    tree_digest = create_digest(tree_blob)
    
    340
    +
    
    341
    +    update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request(digest=tree_digest)
    
    342
    +    update_request.data = tree_blob
    
    343
    +
    
    344
    +    update_requests.append(update_request)
    
    345
    +
    
    346
    +    return tree, update_requests



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]