[Notes] [Git][BuildStream/buildstream][mablanch/630-remote-execution-reconn] 6 commits: _artifactcache: Rename update_atime() to update_mtime()



Title: GitLab

Martin Blanchard pushed to branch mablanch/630-remote-execution-reconn at BuildStream / buildstream

Commits:

4 changed files:

Changes:

  • buildstream/_artifactcache/artifactcache.py
    ... ... @@ -228,7 +228,7 @@ class ArtifactCache():
    228 228
             self._required_elements.update(elements)
    
    229 229
     
    
    230 230
             # For the cache keys which were resolved so far, we bump
    
    231
    -        # the atime of them.
    
    231
    +        # the mtime of them.
    
    232 232
             #
    
    233 233
             # This is just in case we have concurrent instances of
    
    234 234
             # BuildStream running with the same artifact cache, it will
    
    ... ... @@ -240,7 +240,7 @@ class ArtifactCache():
    240 240
                 for key in (strong_key, weak_key):
    
    241 241
                     if key:
    
    242 242
                         try:
    
    243
    -                        self.update_atime(key)
    
    243
    +                        self.update_mtime(element, key)
    
    244 244
                         except ArtifactError:
    
    245 245
                             pass
    
    246 246
     
    
    ... ... @@ -391,15 +391,16 @@ class ArtifactCache():
    391 391
         def preflight(self):
    
    392 392
             pass
    
    393 393
     
    
    394
    -    # update_atime()
    
    394
    +    # update_mtime()
    
    395 395
         #
    
    396
    -    # Update the atime of an artifact.
    
    396
    +    # Update the mtime of an artifact.
    
    397 397
         #
    
    398 398
         # Args:
    
    399
    +    #     element (Element): The Element to update
    
    399 400
         #     key (str): The key of the artifact.
    
    400 401
         #
    
    401
    -    def update_atime(self, key):
    
    402
    -        raise ImplError("Cache '{kind}' does not implement contains()"
    
    402
    +    def update_mtime(self, element, key):
    
    403
    +        raise ImplError("Cache '{kind}' does not implement update_mtime()"
    
    403 404
                             .format(kind=type(self).__name__))
    
    404 405
     
    
    405 406
         # initialize_remotes():
    

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -538,8 +538,9 @@ class CASCache(ArtifactCache):
    538 538
             except FileNotFoundError as e:
    
    539 539
                 raise ArtifactError("Attempt to access unavailable artifact: {}".format(e)) from e
    
    540 540
     
    
    541
    -    def update_atime(self, ref):
    
    541
    +    def update_mtime(self, element, key):
    
    542 542
             try:
    
    543
    +            ref = self.get_artifact_fullname(element, key)
    
    543 544
                 os.utime(self._refpath(ref))
    
    544 545
             except FileNotFoundError as e:
    
    545 546
                 raise ArtifactError("Attempt to access unavailable artifact: {}".format(e)) from e
    

  • buildstream/sandbox/_sandboxremote.py
    ... ... @@ -76,8 +76,7 @@ class SandboxRemote(Sandbox):
    76 76
             # Upload the Command message to the remote CAS server
    
    77 77
             command_digest = cascache.push_message(self._get_project(), remote_command)
    
    78 78
             if not command_digest or not cascache.verify_digest_pushed(self._get_project(), command_digest):
    
    79
    -            # Command push failed
    
    80
    -            return None
    
    79
    +            raise SandboxError("Failed pushing build command to remote CAS.")
    
    81 80
     
    
    82 81
             # Create and send the action.
    
    83 82
             action = remote_execution_pb2.Action(command_digest=command_digest,
    
    ... ... @@ -88,27 +87,57 @@ class SandboxRemote(Sandbox):
    88 87
             # Upload the Action message to the remote CAS server
    
    89 88
             action_digest = cascache.push_message(self._get_project(), action)
    
    90 89
             if not action_digest or not cascache.verify_digest_pushed(self._get_project(), action_digest):
    
    91
    -            # Action push failed
    
    92
    -            return None
    
    90
    +            raise SandboxError("Failed pushing build action to remote CAS.")
    
    93 91
     
    
    94 92
             # Next, try to create a communication channel to the BuildGrid server.
    
    95 93
             channel = grpc.insecure_channel(self.server_url)
    
    96 94
             stub = remote_execution_pb2_grpc.ExecutionStub(channel)
    
    97 95
             request = remote_execution_pb2.ExecuteRequest(action_digest=action_digest,
    
    98 96
                                                           skip_cache_lookup=False)
    
    99
    -        try:
    
    100
    -            operation_iterator = stub.Execute(request)
    
    101
    -        except grpc.RpcError:
    
    102
    -            return None
    
    97
    +
    
    98
    +        def __run_remote_command(stub, execute_request=None, running_operation=None):
    
    99
    +            try:
    
    100
    +                last_operation = None
    
    101
    +                if execute_request is not None:
    
    102
    +                    operation_iterator = stub.Execute(execute_request)
    
    103
    +                else:
    
    104
    +                    request = remote_execution_pb2.WaitExecutionRequest(name=running_operation.name)
    
    105
    +                    operation_iterator = stub.WaitExecution(request)
    
    106
    +
    
    107
    +                for operation in operation_iterator:
    
    108
    +                    if operation.done:
    
    109
    +                        return operation
    
    110
    +                    else:
    
    111
    +                        last_operation = operation
    
    112
    +            except grpc.RpcError as e:
    
    113
    +                status_code = e.code()
    
    114
    +                if status_code == grpc.StatusCode.UNAVAILABLE:
    
    115
    +                    raise SandboxError("Failed contacting remote execution server at {}."
    
    116
    +                                       .format(self.server_url))
    
    117
    +
    
    118
    +                elif status_code in (grpc.StatusCode.INVALID_ARGUMENT,
    
    119
    +                                     grpc.StatusCode.FAILED_PRECONDITION,
    
    120
    +                                     grpc.StatusCode.RESOURCE_EXHAUSTED,
    
    121
    +                                     grpc.StatusCode.INTERNAL,
    
    122
    +                                     grpc.StatusCode.DEADLINE_EXCEEDED):
    
    123
    +                    raise SandboxError("{} ({}).".format(e.details(), status_code.name))
    
    124
    +
    
    125
    +                elif running_operation and status_code == grpc.StatusCode.UNIMPLEMENTED:
    
    126
    +                    raise SandboxError("Failed trying to recover from connection loss: "
    
    127
    +                                       "server does not support operation status polling recovery.")
    
    128
    +
    
    129
    +            return last_operation
    
    103 130
     
    
    104 131
             operation = None
    
    105 132
             with self._get_context().timed_activity("Waiting for the remote build to complete"):
    
    106
    -            # It is advantageous to check operation_iterator.code() is grpc.StatusCode.OK here,
    
    107
    -            # which will check the server is actually contactable. However, calling it when the
    
    108
    -            # server is available seems to cause .code() to hang forever.
    
    109
    -            for operation in operation_iterator:
    
    110
    -                if operation.done:
    
    111
    -                    break
    
    133
    +            operation = __run_remote_command(stub, execute_request=request)
    
    134
    +            if operation is None:
    
    135
    +                return None
    
    136
    +            elif operation.done:
    
    137
    +                return operation
    
    138
    +
    
    139
    +            while operation is not None and not operation.done:
    
    140
    +                operation = __run_remote_command(stub, running_operation=operation)
    
    112 141
     
    
    113 142
             return operation
    
    114 143
     
    
    ... ... @@ -192,7 +221,6 @@ class SandboxRemote(Sandbox):
    192 221
     
    
    193 222
             if operation is None:
    
    194 223
                 # Failure of remote execution, usually due to an error in BuildStream
    
    195
    -            # NB This error could be raised in __run_remote_command
    
    196 224
                 raise SandboxError("No response returned from server")
    
    197 225
     
    
    198 226
             assert not operation.HasField('error') and operation.HasField('response')
    

  • tests/testutils/artifactshare.py
    ... ... @@ -122,9 +122,8 @@ class ArtifactShare():
    122 122
             #       same algo for creating an artifact reference
    
    123 123
             #
    
    124 124
     
    
    125
    -        # Chop off the .bst suffix first
    
    126
    -        assert element_name.endswith('.bst')
    
    127
    -        element_name = element_name[:-4]
    
    125
    +        # Replace path separator and chop off the .bst suffix
    
    126
    +        element_name = os.path.splitext(element_name.replace(os.sep, '-'))[0]
    
    128 127
     
    
    129 128
             valid_chars = string.digits + string.ascii_letters + '-._'
    
    130 129
             element_name = ''.join([
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]