[Notes] [Git][BuildStream/buildstream][jmac/remote_execution_client] 12 commits: Add "remote-execution" project configuration option.



Title: GitLab

Jim MacArthur pushed to branch jmac/remote_execution_client at BuildStream / buildstream

Commits:

10 changed files:

Changes:

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -326,6 +326,7 @@ class CASCache(ArtifactCache):
    326 326
                         uuid_ = uuid.uuid4()
    
    327 327
                         resource_name = '/'.join(['uploads', str(uuid_), 'blobs',
    
    328 328
                                                   digest.hash, str(digest.size_bytes)])
    
    329
    +
    
    329 330
                         def request_stream():
    
    330 331
                             with open(self.objpath(digest), 'rb') as f:
    
    331 332
                                 assert os.fstat(f.fileno()).st_size == digest.size_bytes
    

  • buildstream/_loader/loadelement.py
    ... ... @@ -71,7 +71,7 @@ class LoadElement():
    71 71
                 'kind', 'depends', 'sources', 'sandbox',
    
    72 72
                 'variables', 'environment', 'environment-nocache',
    
    73 73
                 'config', 'public', 'description',
    
    74
    -            'build-depends', 'runtime-depends',
    
    74
    +            'build-depends', 'runtime-depends'
    
    75 75
             ])
    
    76 76
     
    
    77 77
             # Extract the Dependencies
    

  • buildstream/_loader/types.py
    ... ... @@ -41,6 +41,7 @@ class Symbol():
    41 41
         DIRECTORY = "directory"
    
    42 42
         JUNCTION = "junction"
    
    43 43
         SANDBOX = "sandbox"
    
    44
    +    REMOTE_EXECUTION = "remote-execution"
    
    44 45
     
    
    45 46
     
    
    46 47
     # Dependency()
    

  • buildstream/_project.py
    ... ... @@ -129,6 +129,7 @@ class Project():
    129 129
     
    
    130 130
             self.artifact_cache_specs = None
    
    131 131
             self._sandbox = None
    
    132
    +        self._remote_execution = None
    
    132 133
             self._splits = None
    
    133 134
     
    
    134 135
             self._context.add_project(self)
    
    ... ... @@ -460,7 +461,7 @@ class Project():
    460 461
                 'aliases', 'name',
    
    461 462
                 'artifacts', 'options',
    
    462 463
                 'fail-on-overlap', 'shell', 'fatal-warnings',
    
    463
    -            'ref-storage', 'sandbox', 'mirrors'
    
    464
    +            'ref-storage', 'sandbox', 'mirrors', 'remote-execution'
    
    464 465
             ])
    
    465 466
     
    
    466 467
             #
    
    ... ... @@ -478,6 +479,9 @@ class Project():
    478 479
             # Load sandbox configuration
    
    479 480
             self._sandbox = _yaml.node_get(config, Mapping, 'sandbox')
    
    480 481
     
    
    482
    +        # Load remote execution configuration
    
    483
    +        self._remote_execution = _yaml.node_get(config, Mapping, 'remote-execution')
    
    484
    +
    
    481 485
             # Load project split rules
    
    482 486
             self._splits = _yaml.node_get(config, Mapping, 'split-rules')
    
    483 487
     
    

  • buildstream/data/projectconfig.yaml
    ... ... @@ -204,3 +204,6 @@ shell:
    204 204
       # Command to run when `bst shell` does not provide a command
    
    205 205
       #
    
    206 206
       command: [ 'sh', '-i' ]
    
    207
    +
    
    208
    +remote-execution:
    
    209
    +  url: ""
    \ No newline at end of file

  • buildstream/element.py
    ... ... @@ -95,6 +95,7 @@ from . import _site
    95 95
     from ._platform import Platform
    
    96 96
     from .plugin import CoreWarnings
    
    97 97
     from .sandbox._config import SandboxConfig
    
    98
    +from .sandbox._sandboxremote import SandboxRemote
    
    98 99
     
    
    99 100
     from .storage.directory import Directory
    
    100 101
     from .storage._filebaseddirectory import FileBasedDirectory
    
    ... ... @@ -250,6 +251,9 @@ class Element(Plugin):
    250 251
             # Extract Sandbox config
    
    251 252
             self.__sandbox_config = self.__extract_sandbox_config(meta)
    
    252 253
     
    
    254
    +        # Extract remote execution URL
    
    255
    +        self.__remote_execution_url = self.__extract_remote_execution_config(meta)
    
    256
    +
    
    253 257
         def __lt__(self, other):
    
    254 258
             return self.name < other.name
    
    255 259
     
    
    ... ... @@ -1545,6 +1549,8 @@ class Element(Plugin):
    1545 1549
                     finally:
    
    1546 1550
                         if collect is not None:
    
    1547 1551
                             try:
    
    1552
    +                            # Sandbox will probably have replaced its virtual directory, so get it again
    
    1553
    +                            sandbox_vroot = sandbox.get_virtual_directory()
    
    1548 1554
                                 collectvdir = sandbox_vroot.descend(collect.lstrip(os.sep).split(os.sep))
    
    1549 1555
                             except VirtualDirectoryError:
    
    1550 1556
                                 # No collect directory existed
    
    ... ... @@ -2117,7 +2123,24 @@ class Element(Plugin):
    2117 2123
             project = self._get_project()
    
    2118 2124
             platform = Platform.get_platform()
    
    2119 2125
     
    
    2120
    -        if directory is not None and os.path.exists(directory):
    
    2126
    +        if self.__remote_execution_url and self.BST_VIRTUAL_DIRECTORY:
    
    2127
    +            if not self.__artifacts.has_push_remotes(element=self):
    
    2128
    +                # Give an early warning if remote execution will not work
    
    2129
    +                raise ElementError("Artifact {} is configured to use remote execution but has no push remotes. "
    
    2130
    +                                   .format(self.name) +
    
    2131
    +                                   "The remote artifact server(s) may not be correctly configured or contactable.")
    
    2132
    +
    
    2133
    +            self.info("Using a remote 'sandbox' for artifact {}".format(self.name))
    
    2134
    +            sandbox = SandboxRemote(context, project,
    
    2135
    +                                    directory,
    
    2136
    +                                    stdout=stdout,
    
    2137
    +                                    stderr=stderr,
    
    2138
    +                                    config=config,
    
    2139
    +                                    server_url=self.__remote_execution_url,
    
    2140
    +                                    allow_real_directory=False)
    
    2141
    +            yield sandbox
    
    2142
    +        elif directory is not None and os.path.exists(directory):
    
    2143
    +            self.info("Using a local sandbox for artifact {}".format(self.name))
    
    2121 2144
                 sandbox = platform.create_sandbox(context, project,
    
    2122 2145
                                                   directory,
    
    2123 2146
                                                   stdout=stdout,
    
    ... ... @@ -2289,6 +2312,18 @@ class Element(Plugin):
    2289 2312
             return SandboxConfig(self.node_get_member(sandbox_config, int, 'build-uid'),
    
    2290 2313
                                  self.node_get_member(sandbox_config, int, 'build-gid'))
    
    2291 2314
     
    
    2315
    +    def __extract_remote_execution_config(self, meta):
    
    2316
    +        if self.__is_junction:
    
    2317
    +            return ''
    
    2318
    +        else:
    
    2319
    +            project = self._get_project()
    
    2320
    +            project.ensure_fully_loaded()
    
    2321
    +            if project._remote_execution:
    
    2322
    +                rexec_config = _yaml.node_chain_copy(project._remote_execution)
    
    2323
    +                return self.node_get_member(rexec_config, str, 'url')
    
    2324
    +            else:
    
    2325
    +                return ''
    
    2326
    +
    
    2292 2327
         # This makes a special exception for the split rules, which
    
    2293 2328
         # elements may extend but whos defaults are defined in the project.
    
    2294 2329
         #
    

  • buildstream/plugins/elements/autotools.py
    ... ... @@ -57,7 +57,7 @@ from buildstream import BuildElement
    57 57
     
    
    58 58
     # Element implementation for the 'autotools' kind.
    
    59 59
     class AutotoolsElement(BuildElement):
    
    60
    -    pass
    
    60
    +    BST_VIRTUAL_DIRECTORY = True
    
    61 61
     
    
    62 62
     
    
    63 63
     # Plugin entry point
    

  • buildstream/sandbox/__init__.py
    ... ... @@ -20,3 +20,4 @@
    20 20
     from .sandbox import Sandbox, SandboxFlags
    
    21 21
     from ._sandboxchroot import SandboxChroot
    
    22 22
     from ._sandboxbwrap import SandboxBwrap
    
    23
    +from ._sandboxremote import SandboxRemote

  • buildstream/sandbox/_sandboxremote.py
    1
    +#!/usr/bin/env python3
    
    2
    +#
    
    3
    +#  Copyright (C) 2018 Codethink Limited
    
    4
    +#
    
    5
    +#  This program is free software; you can redistribute it and/or
    
    6
    +#  modify it under the terms of the GNU Lesser General Public
    
    7
    +#  License as published by the Free Software Foundation; either
    
    8
    +#  version 2 of the License, or (at your option) any later version.
    
    9
    +#
    
    10
    +#  This library is distributed in the hope that it will be useful,
    
    11
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    12
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
    
    13
    +#  Lesser General Public License for more details.
    
    14
    +#
    
    15
    +#  You should have received a copy of the GNU Lesser General Public
    
    16
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    17
    +#
    
    18
    +#  Authors:
    
    19
    +#        Jim MacArthur <jim macarthur codethink co uk>
    
    20
    +
    
    21
    +import os
    
    22
    +import re
    
    23
    +
    
    24
    +import grpc
    
    25
    +
    
    26
    +from . import Sandbox
    
    27
    +from ..storage._filebaseddirectory import FileBasedDirectory
    
    28
    +from ..storage._casbaseddirectory import CasBasedDirectory
    
    29
    +from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    30
    +
    
    31
    +from .._artifactcache.cascache import CASCache
    
    32
    +
    
    33
    +
    
    34
    +class SandboxError(Exception):
    
    35
    +    pass
    
    36
    +
    
    37
    +
    
    38
    +# SandboxRemote()
    
    39
    +#
    
    40
    +# This isn't really a sandbox, it's a stub which sends all the source
    
    41
    +# to a remote server and retrieves the results from it.
    
    42
    +#
    
    43
    +class SandboxRemote(Sandbox):
    
    44
    +
    
    45
    +    def __init__(self, *args, **kwargs):
    
    46
    +        super().__init__(*args, **kwargs)
    
    47
    +        self.cascache = None
    
    48
    +        self.server_url = kwargs['server_url']
    
    49
    +        # Check the format of the url ourselves to save the user from
    
    50
    +        # whatever error messages grpc will produce
    
    51
    +        m = re.match(r'^(.+):(\d+)$', self.server_url)
    
    52
    +        if m is None:
    
    53
    +            raise SandboxError("Configured remote URL '{}' does not match the expected layout. "
    
    54
    +                               .format(self.server_url) +
    
    55
    +                               "It should be of the form <protocol>://<domain name>:<port>.")
    
    56
    +
    
    57
    +    def _get_cascache(self):
    
    58
    +        if self.cascache is None:
    
    59
    +            self.cascache = CASCache(self._get_context())
    
    60
    +            self.cascache.setup_remotes(use_config=True)
    
    61
    +        return self.cascache
    
    62
    +
    
    63
    +    def __run_remote_command(self, cascache, command, input_root_digest, environment):
    
    64
    +
    
    65
    +        environment_variables = [remote_execution_pb2.Command.
    
    66
    +                                 EnvironmentVariable(name=k, value=v)
    
    67
    +                                 for (k, v) in environment.items()]
    
    68
    +
    
    69
    +        # Create and send the Command object.
    
    70
    +        remote_command = remote_execution_pb2.Command(arguments=command, environment_variables=environment_variables,
    
    71
    +                                                      output_files=[],
    
    72
    +                                                      output_directories=[self._output_directory],
    
    73
    +                                                      platform=None)
    
    74
    +        command_digest = cascache.add_object(buffer=remote_command.SerializeToString())
    
    75
    +        command_ref = 'worker-command/{}'.format(command_digest.hash)
    
    76
    +        cascache.set_ref(command_ref, command_digest)
    
    77
    +
    
    78
    +        command_push_successful = cascache.push_refs([command_ref], self._get_project(), may_have_dependencies=False)
    
    79
    +        if not command_push_successful and not cascache.verify_key_pushed(command_ref, self._get_project()):
    
    80
    +            # Command push failed
    
    81
    +            return None
    
    82
    +
    
    83
    +        # Create and send the action.
    
    84
    +
    
    85
    +        action = remote_execution_pb2.Action(command_digest=command_digest,
    
    86
    +                                             input_root_digest=input_root_digest,
    
    87
    +                                             timeout=None,
    
    88
    +                                             do_not_cache=True)
    
    89
    +
    
    90
    +        action_digest = cascache.add_object(buffer=action.SerializeToString())
    
    91
    +        action_ref = 'worker-action/{}'.format(command_digest.hash)
    
    92
    +        cascache.set_ref(action_ref, action_digest)
    
    93
    +        action_push_successful = cascache.push_refs([action_ref], self._get_project(), may_have_dependencies=False)
    
    94
    +
    
    95
    +        if not action_push_successful and not cascache.verify_key_pushed(action_ref, self._get_project()):
    
    96
    +            # Action push failed
    
    97
    +            return None
    
    98
    +
    
    99
    +        # Next, try to create a communication channel to the BuildGrid server.
    
    100
    +
    
    101
    +        channel = grpc.insecure_channel(self.server_url)
    
    102
    +        stub = remote_execution_pb2_grpc.ExecutionStub(channel)
    
    103
    +        request = remote_execution_pb2.ExecuteRequest(instance_name='default',
    
    104
    +                                                      action_digest=action_digest,
    
    105
    +                                                      skip_cache_lookup=True)
    
    106
    +
    
    107
    +        operation_iterator = stub.Execute(request)
    
    108
    +        operation = None
    
    109
    +        with self._get_context().timed_activity("Waiting for the remote build to complete"):
    
    110
    +            # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
    
    111
    +            # which will check the server is actually contactable. However, calling it when the
    
    112
    +            # server is available seems to cause .code() to hang forever.
    
    113
    +            for operation in operation_iterator:
    
    114
    +                if operation.done:
    
    115
    +                    break
    
    116
    +        return operation
    
    117
    +
    
    118
    +    def process_job_output(self, output_directories, output_files):
    
    119
    +        # output_directories is an array of OutputDirectory objects.
    
    120
    +        # output_files is an array of OutputFile objects.
    
    121
    +        #
    
    122
    +        # We only specify one output_directory, so it's an error
    
    123
    +        # for there to be any output files or more than one directory at the moment.
    
    124
    +
    
    125
    +        if output_files:
    
    126
    +            raise SandboxError("Output files were returned when we didn't request any.")
    
    127
    +        elif len(output_directories) > 1:
    
    128
    +            error_text = "More than one output directory was returned from the build server: {}"
    
    129
    +            raise SandboxError(error_text.format(output_directories))
    
    130
    +        elif len(output_directories) < 1:  # pylint: disable=len-as-condition
    
    131
    +            error_text = "No output directory was returned from the build server."
    
    132
    +            raise SandboxError(error_text)
    
    133
    +
    
    134
    +        digest = output_directories[0].tree_digest
    
    135
    +        if digest is None or digest.hash is None or digest.hash == "":
    
    136
    +            raise SandboxError("Output directory structure had no digest attached.")
    
    137
    +
    
    138
    +        # Now do a pull to ensure we have the necessary parts.
    
    139
    +        cascache = self._get_cascache()
    
    140
    +        cascache.pull_key(digest.hash, digest.size_bytes, self._get_project())
    
    141
    +        path_components = os.path.split(self._output_directory)
    
    142
    +
    
    143
    +        # Now what we have is a digest for the output. Once we return, the calling process will
    
    144
    +        # attempt to descend into our directory and find that directory, so we need to overwrite
    
    145
    +        # that.
    
    146
    +
    
    147
    +        if not path_components:
    
    148
    +            # The artifact wants the whole directory; we could just return the returned hash in its
    
    149
    +            # place, but we don't have a means to do that yet.
    
    150
    +            raise SandboxError("Unimplemented: Output directory is empty or equal to the sandbox root.")
    
    151
    +
    
    152
    +        # At the moment, we will get the whole directory back in the first directory argument and we need
    
    153
    +        # to replace the sandbox's virtual directory with that. Creating a new virtual directory object
    
    154
    +        # from another hash will be interesting, though...
    
    155
    +
    
    156
    +        new_dir = CasBasedDirectory(self._get_context(), ref=digest)
    
    157
    +        self._set_virtual_directory(new_dir)
    
    158
    +
    
    159
    +    def run(self, command, flags, *, cwd=None, env=None):
    
    160
    +        # Upload sources
    
    161
    +        upload_vdir = self.get_virtual_directory()
    
    162
    +
    
    163
    +        if isinstance(upload_vdir, FileBasedDirectory):
    
    164
    +            # Make a new temporary directory to put source in
    
    165
    +            upload_vdir = CasBasedDirectory(self._get_context(), ref=None)
    
    166
    +            upload_vdir.import_files(self.get_virtual_directory()._get_underlying_directory())
    
    167
    +
    
    168
    +        # Now, push that key (without necessarily needing a ref) to the remote.
    
    169
    +        cascache = self._get_cascache()
    
    170
    +
    
    171
    +        ref = 'worker-source/{}'.format(upload_vdir.ref.hash)
    
    172
    +        upload_vdir._save(ref)
    
    173
    +        source_push_successful = cascache.push_refs([ref], self._get_project())
    
    174
    +
    
    175
    +        # Set up environment and PWD
    
    176
    +        if env is None:
    
    177
    +            env = self._get_environment()
    
    178
    +        if 'PWD' not in env:
    
    179
    +            env['PWD'] = self._get_work_directory()
    
    180
    +
    
    181
    +        # We want command args as a list of strings
    
    182
    +        if isinstance(command, str):
    
    183
    +            command = [command]
    
    184
    +
    
    185
    +        # Now transmit the command to execute
    
    186
    +        if source_push_successful or cascache.verify_key_pushed(ref, self._get_project()):
    
    187
    +            response = self.__run_remote_command(cascache, command, upload_vdir.ref, env)
    
    188
    +
    
    189
    +            if response is None:
    
    190
    +                # Failure of remote execution, usually due to an error in BuildStream
    
    191
    +                # NB This error could be raised in __run_remote_command
    
    192
    +                raise SandboxError("No response returned from server")
    
    193
    +
    
    194
    +            assert(response.HasField("error") or response.HasField("response"))
    
    195
    +
    
    196
    +            if response.HasField("error"):
    
    197
    +                # A normal error during the build: the remote execution system
    
    198
    +                # has worked correctly but the command failed.
    
    199
    +                # response.error also contains 'message' (str) and 'details'
    
    200
    +                # (iterator of Any) which we ignore at the moment.
    
    201
    +                return response.error.code
    
    202
    +            else:
    
    203
    +
    
    204
    +                # At the moment, response can either be an
    
    205
    +                # ExecutionResponse containing an ActionResult, or an
    
    206
    +                # ActionResult directly.
    
    207
    +                executeResponse = remote_execution_pb2.ExecuteResponse()
    
    208
    +                if response.response.Is(executeResponse.DESCRIPTOR):
    
    209
    +                    # Unpack ExecuteResponse and set response to its response
    
    210
    +                    response.response.Unpack(executeResponse)
    
    211
    +                    response = executeResponse
    
    212
    +
    
    213
    +                actionResult = remote_execution_pb2.ActionResult()
    
    214
    +                if response.response.Is(actionResult.DESCRIPTOR):
    
    215
    +                    response.response.Unpack(actionResult)
    
    216
    +                    self.process_job_output(actionResult.output_directories, actionResult.output_files)
    
    217
    +                else:
    
    218
    +                    raise SandboxError("Received unknown message from server (expected ExecutionResponse).")
    
    219
    +        else:
    
    220
    +            raise SandboxError("Failed to verify that source has been pushed to the remote artifact cache.")
    
    221
    +        return 0

  • doc/source/format_project.rst
    ... ... @@ -204,6 +204,23 @@ with an artifact share.
    204 204
     You can also specify a list of caches here; earlier entries in the list
    
    205 205
     will have higher priority than later ones.
    
    206 206
     
    
    207
    +Remote execution
    
    208
    +~~~~~~~~~~~~~~~~
    
    209
    +Buildstream supports remote execution using the Google Remote Execution API
    
    210
    +(REAPI). A description of how remote execution works is beyond the scope
    
    211
    +of this document, but you can specify a remote server complying with the REAPI
    
    212
    +using the `remote-execution` option:
    
    213
    +
    
    214
    +.. code:: yaml
    
    215
    +
    
    216
    +  remote-execution:
    
    217
    +
    
    218
    +    # A url defining a remote execution server
    
    219
    +    url: buildserver.example.com:50051
    
    220
    +
    
    221
    +The url should be a hostname and port separated by ':'. Do not include a protocol.
    
    222
    +
    
    223
    +The Remote Execution API can be found via https://github.com/bazelbuild/remote-apis.
    
    207 224
     
    
    208 225
     .. _project_essentials_mirrors:
    
    209 226
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]