[Notes] [Git][BuildStream/buildstream][mablanch/630-remote-execution-reconn] 11 commits: _project.py: Fix option resolution in element & source overrides



Title: GitLab

Martin Blanchard pushed to branch mablanch/630-remote-execution-reconn at BuildStream / buildstream

Commits:

11 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -51,7 +51,7 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    51 51
             url = _yaml.node_get(spec_node, str, 'url')
    
    52 52
             push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
    
    53 53
             if not url:
    
    54
    -            provenance = _yaml.node_get_provenance(spec_node)
    
    54
    +            provenance = _yaml.node_get_provenance(spec_node, 'url')
    
    55 55
                 raise LoadError(LoadErrorReason.INVALID_DATA,
    
    56 56
                                 "{}: empty artifact cache URL".format(provenance))
    
    57 57
     
    
    ... ... @@ -67,6 +67,16 @@ class ArtifactCacheSpec(namedtuple('ArtifactCacheSpec', 'url push server_cert cl
    67 67
             if client_cert and basedir:
    
    68 68
                 client_cert = os.path.join(basedir, client_cert)
    
    69 69
     
    
    70
    +        if client_key and not client_cert:
    
    71
    +            provenance = _yaml.node_get_provenance(spec_node, 'client-key')
    
    72
    +            raise LoadError(LoadErrorReason.INVALID_DATA,
    
    73
    +                            "{}: 'client-key' was specified without 'client-cert'".format(provenance))
    
    74
    +
    
    75
    +        if client_cert and not client_key:
    
    76
    +            provenance = _yaml.node_get_provenance(spec_node, 'client-cert')
    
    77
    +            raise LoadError(LoadErrorReason.INVALID_DATA,
    
    78
    +                            "{}: 'client-cert' was specified without 'client-key'".format(provenance))
    
    79
    +
    
    70 80
             return ArtifactCacheSpec(url, push, server_cert, client_key, client_cert)
    
    71 81
     
    
    72 82
     
    
    ... ... @@ -91,6 +101,7 @@ class ArtifactCache():
    91 101
             self._cache_size = None               # The current cache size, sometimes it's an estimate
    
    92 102
             self._cache_quota = None              # The cache quota
    
    93 103
             self._cache_lower_threshold = None    # The target cache size for a cleanup
    
    104
    +        self._remotes_setup = False           # Check to prevent double-setup of remotes
    
    94 105
     
    
    95 106
             os.makedirs(self.extractdir, exist_ok=True)
    
    96 107
             os.makedirs(self.tmpdir, exist_ok=True)
    
    ... ... @@ -143,6 +154,10 @@ class ArtifactCache():
    143 154
         #
    
    144 155
         def setup_remotes(self, *, use_config=False, remote_url=None):
    
    145 156
     
    
    157
    +        # Ensure we do not double-initialise since this can be expensive
    
    158
    +        assert(not self._remotes_setup)
    
    159
    +        self._remotes_setup = True
    
    160
    +
    
    146 161
             # Initialize remote artifact caches. We allow the commandline to override
    
    147 162
             # the user config in some cases (for example `bst push --remote=...`).
    
    148 163
             has_remote_caches = False
    

  • buildstream/_project.py
    ... ... @@ -598,7 +598,10 @@ class Project():
    598 598
             # any conditionals specified for project option declarations,
    
    599 599
             # or conditionally specifying the project name; will be ignored.
    
    600 600
             #
    
    601
    +        # Don't forget to also resolve options in the element and source overrides.
    
    601 602
             output.options.process_node(config)
    
    603
    +        output.options.process_node(output.element_overrides)
    
    604
    +        output.options.process_node(output.source_overrides)
    
    602 605
     
    
    603 606
             # Load base variables
    
    604 607
             output.base_variables = _yaml.node_get(config, Mapping, 'variables')
    

  • buildstream/sandbox/_sandboxremote.py
    ... ... @@ -27,7 +27,7 @@ from . import Sandbox
    27 27
     from ..storage._filebaseddirectory import FileBasedDirectory
    
    28 28
     from ..storage._casbaseddirectory import CasBasedDirectory
    
    29 29
     from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
    
    30
    -from .._artifactcache.cascache import CASCache
    
    30
    +from .._platform import Platform
    
    31 31
     
    
    32 32
     
    
    33 33
     class SandboxError(Exception):
    
    ... ... @@ -43,7 +43,6 @@ class SandboxRemote(Sandbox):
    43 43
     
    
    44 44
         def __init__(self, *args, **kwargs):
    
    45 45
             super().__init__(*args, **kwargs)
    
    46
    -        self.cascache = None
    
    47 46
     
    
    48 47
             url = urlparse(kwargs['server_url'])
    
    49 48
             if not url.scheme or not url.hostname or not url.port:
    
    ... ... @@ -56,12 +55,6 @@ class SandboxRemote(Sandbox):
    56 55
     
    
    57 56
             self.server_url = '{}:{}'.format(url.hostname, url.port)
    
    58 57
     
    
    59
    -    def _get_cascache(self):
    
    60
    -        if self.cascache is None:
    
    61
    -            self.cascache = CASCache(self._get_context())
    
    62
    -            self.cascache.setup_remotes(use_config=True)
    
    63
    -        return self.cascache
    
    64
    -
    
    65 58
         def run_remote_command(self, command, input_root_digest, working_directory, environment):
    
    66 59
             # Sends an execution request to the remote execution server.
    
    67 60
             #
    
    ... ... @@ -78,13 +71,12 @@ class SandboxRemote(Sandbox):
    78 71
                                                           output_files=[],
    
    79 72
                                                           output_directories=[self._output_directory],
    
    80 73
                                                           platform=None)
    
    81
    -
    
    82
    -        cascache = self._get_cascache()
    
    74
    +        platform = Platform.get_platform()
    
    75
    +        cascache = platform.artifactcache
    
    83 76
             # Upload the Command message to the remote CAS server
    
    84 77
             command_digest = cascache.push_message(self._get_project(), remote_command)
    
    85 78
             if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
    
    86
    -            # Command push failed
    
    87
    -            return None
    
    79
    +            raise SandboxError("Failed pushing build command to remote CAS.")
    
    88 80
     
    
    89 81
             # Create and send the action.
    
    90 82
             action = remote_execution_pb2.Action(command_digest=command_digest,
    
    ... ... @@ -95,27 +87,53 @@ class SandboxRemote(Sandbox):
    95 87
             # Upload the Action message to the remote CAS server
    
    96 88
             action_digest = cascache.push_message(self._get_project(), action)
    
    97 89
             if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
    
    98
    -            # Action push failed
    
    99
    -            return None
    
    90
    +            raise SandboxError("Failed pushing build action to remote CAS.")
    
    100 91
     
    
    101 92
             # Next, try to create a communication channel to the BuildGrid server.
    
    102 93
             channel = grpc.insecure_channel(self.server_url)
    
    103 94
             stub = remote_execution_pb2_grpc.ExecutionStub(channel)
    
    104 95
             request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
    
    105 96
                                                           skip_cache_lookup=False)
    
    106
    -        try:
    
    107
    -            operation_iterator = stub.Execute(request)
    
    108
    -        except grpc.RpcError:
    
    109
    -            return None
    
    97
    +
    
    98
    +        def __run_remote_command(stub, execute_request=None, running_operation=None):
    
    99
    +            try:
    
    100
    +                last_operation = None
    
    101
    +                if execute_request is not None:
    
    102
    +                    operation_iterator = stub.Execute(execute_request)
    
    103
    +                else:
    
    104
    +                    request = remote_execution_pb2.WaitExecutionRequest(name=running_operation.name)
    
    105
    +                    operation_iterator = stub.WaitExecution(request)
    
    106
    +
    
    107
    +                for operation in operation_iterator:
    
    108
    +                    if operation.done:
    
    109
    +                        return operation
    
    110
    +                    else:
    
    111
    +                        last_operation = operation
    
    112
    +            except grpc.RpcError as e:
    
    113
    +                status_code = e.code()
    
    114
    +                if status_code == grpc.StatusCode.UNAVAILABLE:
    
    115
    +                    raise SandboxError("Failed contacting remote execution server at {}."
    
    116
    +                                       .format(self.server_url))
    
    117
    +
    
    118
    +                elif (status_code == grpc.StatusCode.INVALID_ARGUMENT or
    
    119
    +                      status_code == grpc.StatusCode.FAILED_PRECONDITION or
    
    120
    +                      status_code == grpc.StatusCode.RESOURCE_EXHAUSTED):
    
    121
    +                    raise SandboxError("{} ({}).".format(e.details(), status_code.name))
    
    122
    +
    
    123
    +                elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
    
    124
    +                    raise SandboxError("Failed trying to recover from connection loss: "
    
    125
    +                                       "server does not support operation status polling recovery.")
    
    126
    +
    
    127
    +            return last_operation
    
    110 128
     
    
    111 129
             operation = None
    
    112 130
             with self._get_context().timed_activity("Waiting for the remote build to complete"):
    
    113
    -            # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
    
    114
    -            # which will check the server is actually contactable. However, calling it when the
    
    115
    -            # server is available seems to cause .code() to hang forever.
    
    116
    -            for operation in operation_iterator:
    
    117
    -                if operation.done:
    
    118
    -                    break
    
    131
    +            operation = __run_remote_command(stub, execute_request=request)
    
    132
    +            if operation and operation.done:
    
    133
    +                return operation
    
    134
    +
    
    135
    +            while not operation.done:
    
    136
    +                operation = __run_remote_command(stub, running_operation=operation)
    
    119 137
     
    
    120 138
             return operation
    
    121 139
     
    
    ... ... @@ -141,7 +159,8 @@ class SandboxRemote(Sandbox):
    141 159
             if tree_digest is None or not tree_digest.hash:
    
    142 160
                 raise SandboxError("Output directory structure had no digest attached.")
    
    143 161
     
    
    144
    -        cascache = self._get_cascache()
    
    162
    +        platform = Platform.get_platform()
    
    163
    +        cascache = platform.artifactcache
    
    145 164
             # Now do a pull to ensure we have the necessary parts.
    
    146 165
             dir_digest = cascache.pull_tree(self._get_project(), tree_digest)
    
    147 166
             if dir_digest is None or not dir_digest.hash or not dir_digest.size_bytes:
    
    ... ... @@ -176,7 +195,8 @@ class SandboxRemote(Sandbox):
    176 195
     
    
    177 196
             upload_vdir.recalculate_hash()
    
    178 197
     
    
    179
    -        cascache = self._get_cascache()
    
    198
    +        platform = Platform.get_platform()
    
    199
    +        cascache = platform.artifactcache
    
    180 200
             # Now, push that key (without necessarily needing a ref) to the remote.
    
    181 201
             vdir_digest = cascache.push_directory(self._get_project(), upload_vdir)
    
    182 202
             if not vdir_digest or not cascache.verify_digest_pushed(self._get_project(), vdir_digest):
    
    ... ... @@ -201,7 +221,6 @@ class SandboxRemote(Sandbox):
    201 221
     
    
    202 222
             if operation is None:
    
    203 223
                 # Failure of remote execution, usually due to an error in BuildStream
    
    204
    -            # NB This error could be raised in __run_remote_command
    
    205 224
                 raise SandboxError("No response returned from server")
    
    206 225
     
    
    207 226
             assert not operation.HasField('error') and operation.HasField('response')
    

  • tests/artifactcache/config.py
    ... ... @@ -9,8 +9,12 @@ from buildstream._context import Context
    9 9
     from buildstream._project import Project
    
    10 10
     from buildstream.utils import _deduplicate
    
    11 11
     from buildstream import _yaml
    
    12
    +from buildstream._exceptions import ErrorDomain, LoadErrorReason
    
    12 13
     
    
    14
    +from tests.testutils.runcli import cli
    
    13 15
     
    
    16
    +
    
    17
    +DATA_DIR = os.path.dirname(os.path.realpath(__file__))
    
    14 18
     cache1 = ArtifactCacheSpec(url='https://example.com/cache1', push=True)
    
    15 19
     cache2 = ArtifactCacheSpec(url='https://example.com/cache2', push=False)
    
    16 20
     cache3 = ArtifactCacheSpec(url='https://example.com/cache3', push=False)
    
    ... ... @@ -106,3 +110,33 @@ def test_artifact_cache_precedence(tmpdir, override_caches, project_caches, user
    106 110
         # Verify that it was correctly read.
    
    107 111
         expected_cache_specs = list(_deduplicate(itertools.chain(override_caches, project_caches, user_caches)))
    
    108 112
         assert parsed_cache_specs == expected_cache_specs
    
    113
    +
    
    114
    +
    
    115
    +# Assert that if either the client key or client cert is specified
    
    116
    +# without specifying it's counterpart, we get a comprehensive LoadError
    
    117
    +# instead of an unhandled exception.
    
    118
    +@pytest.mark.datafiles(DATA_DIR)
    
    119
    +@pytest.mark.parametrize('config_key, config_value', [
    
    120
    +    ('client-cert', 'client.crt'),
    
    121
    +    ('client-key', 'client.key')
    
    122
    +])
    
    123
    +def test_missing_certs(cli, datafiles, config_key, config_value):
    
    124
    +    project = os.path.join(datafiles.dirname, datafiles.basename, 'missing-certs')
    
    125
    +
    
    126
    +    project_conf = {
    
    127
    +        'name': 'test',
    
    128
    +
    
    129
    +        'artifacts': {
    
    130
    +            'url': 'https://cache.example.com:12345',
    
    131
    +            'push': 'true',
    
    132
    +            config_key: config_value
    
    133
    +        }
    
    134
    +    }
    
    135
    +    project_conf_file = os.path.join(project, 'project.conf')
    
    136
    +    _yaml.dump(project_conf, project_conf_file)
    
    137
    +
    
    138
    +    # Use `pull` here to ensure we try to initialize the remotes, triggering the error
    
    139
    +    #
    
    140
    +    # This does not happen for a simple `bst show`.
    
    141
    +    result = cli.run(project=project, args=['pull', 'element.bst'])
    
    142
    +    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)

  • tests/artifactcache/missing-certs/certificates/client.crt

  • tests/artifactcache/missing-certs/certificates/client.key

  • tests/artifactcache/missing-certs/element.bst
    1
    +kind: autotools

  • tests/artifactcache/pull.py
    ... ... @@ -137,7 +137,6 @@ def _test_pull(user_config_file, project_dir, artifact_dir,
    137 137
     
    
    138 138
         # Manually setup the CAS remote
    
    139 139
         cas.setup_remotes(use_config=True)
    
    140
    -    cas.initialize_remotes()
    
    141 140
     
    
    142 141
         if cas.has_push_remotes(element=element):
    
    143 142
             # Push the element's artifact
    
    ... ... @@ -274,7 +273,6 @@ def _test_push_tree(user_config_file, project_dir, artifact_dir, artifact_digest
    274 273
     
    
    275 274
         # Manually setup the CAS remote
    
    276 275
         cas.setup_remotes(use_config=True)
    
    277
    -    cas.initialize_remotes()
    
    278 276
     
    
    279 277
         if cas.has_push_remotes():
    
    280 278
             directory = remote_execution_pb2.Directory()
    
    ... ... @@ -310,7 +308,6 @@ def _test_pull_tree(user_config_file, project_dir, artifact_dir, artifact_digest
    310 308
     
    
    311 309
         # Manually setup the CAS remote
    
    312 310
         cas.setup_remotes(use_config=True)
    
    313
    -    cas.initialize_remotes()
    
    314 311
     
    
    315 312
         if cas.has_push_remotes():
    
    316 313
             # Pull the artifact using the Tree object
    

  • tests/format/option-overrides/element.bst
    1
    +kind: autotools

  • tests/format/option-overrides/project.conf
    1
    +# Test case ensuring that we can use options
    
    2
    +# in the element overrides.
    
    3
    +#
    
    4
    +name: test
    
    5
    +
    
    6
    +options:
    
    7
    +  arch:
    
    8
    +    type: arch
    
    9
    +    description: architecture
    
    10
    +    values: [i686, x86_64]
    
    11
    +
    
    12
    +elements:
    
    13
    +  autotools:
    
    14
    +    variables:
    
    15
    +      (?):
    
    16
    +      - arch == 'i686':
    
    17
    +          conf-global: --host=i686-unknown-linux-gnu
    
    18
    +      - arch == 'x86_64':
    
    19
    +          conf-global: --host=x86_64-unknown-linux-gnu

  • tests/format/optionoverrides.py
    1
    +import os
    
    2
    +import pytest
    
    3
    +from buildstream import _yaml
    
    4
    +from tests.testutils.runcli import cli
    
    5
    +
    
    6
    +# Project directory
    
    7
    +DATA_DIR = os.path.dirname(os.path.realpath(__file__))
    
    8
    +
    
    9
    +
    
    10
    +@pytest.mark.datafiles(DATA_DIR)
    
    11
    +@pytest.mark.parametrize("arch", [('i686'), ('x86_64')])
    
    12
    +def test_override(cli, datafiles, arch):
    
    13
    +    project = os.path.join(datafiles.dirname, datafiles.basename, 'option-overrides')
    
    14
    +
    
    15
    +    bst_args = ['--option', 'arch', arch]
    
    16
    +    bst_args += [
    
    17
    +        'show',
    
    18
    +        '--deps', 'none',
    
    19
    +        '--format', '%{vars}',
    
    20
    +        'element.bst'
    
    21
    +    ]
    
    22
    +    result = cli.run(project=project, silent=True, args=bst_args)
    
    23
    +    result.assert_success()
    
    24
    +
    
    25
    +    # See the associated project.conf for the expected values
    
    26
    +    expected_value = '--host={}-unknown-linux-gnu'.format(arch)
    
    27
    +
    
    28
    +    loaded = _yaml.load_data(result.output)
    
    29
    +    assert loaded['conf-global'] == expected_value



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]