[Notes] [Git][BuildGrid/buildgrid][finn/84-bot-errors] 7 commits: Added HASH_LENGTH to settings file.



Title: GitLab

finn pushed to branch finn/84-bot-errors at BuildGrid / buildgrid

Commits:

8 changed files:

Changes:

  • buildgrid/_app/bots/buildbox.py
    ... ... @@ -19,9 +19,11 @@ import tempfile
    19 19
     
    
    20 20
     from google.protobuf import any_pb2
    
    21 21
     
    
    22
    +from buildgrid.settings import HASH_LENGTH
    
    22 23
     from buildgrid.client.cas import upload
    
    23 24
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    24 25
     from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
    
    26
    +from buildgrid._exceptions import BotError
    
    25 27
     from buildgrid.utils import read_file, write_file, parse_to_pb2_from_fetch
    
    26 28
     
    
    27 29
     
    
    ... ... @@ -87,17 +89,30 @@ def work_buildbox(context, lease):
    87 89
     
    
    88 90
                 command_line = subprocess.Popen(command_line,
    
    89 91
                                                 stdin=subprocess.PIPE,
    
    90
    -                                            stdout=subprocess.PIPE)
    
    91
    -            # TODO: Should return the stdout and stderr to the user.
    
    92
    -            command_line.communicate()
    
    92
    +                                            stdout=subprocess.PIPE,
    
    93
    +                                            stderr=subprocess.PIPE)
    
    94
    +            stdout, stderr = command_line.communicate()
    
    95
    +            returncode = command_line.returncode
    
    96
    +            action_result = remote_execution_pb2.ActionResult()
    
    97
    +            # TODO: Upload to CAS or output RAW
    
    98
    +            # For now, just pass raw
    
    99
    +            # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    100
    +            action_result.stdout_raw = stdout
    
    101
    +            action_result.stderr_raw = stderr
    
    102
    +            action_result.exit_code = returncode
    
    103
    +
    
    104
    +            logger.debug("BuildBox stderr: [{}]".format(stderr.encode()))
    
    105
    +            logger.debug("BuildBox stdout: [{}]".format(stdout.encode()))
    
    106
    +            logger.debug("BuildBox exit code: [{}]".format(returncode))
    
    93 107
     
    
    94 108
                 output_digest = remote_execution_pb2.Digest()
    
    95 109
                 output_digest.ParseFromString(read_file(output_digest_file.name))
    
    96 110
     
    
    97
    -            logger.debug("Output root digest: {}".format(output_digest))
    
    111
    +            logger.debug("Output root digest: [{}]".format(output_digest))
    
    98 112
     
    
    99
    -            if len(output_digest.hash) < 64:
    
    100
    -                logger.warning("Buildbox command failed - no output root digest present.")
    
    113
    +            if len(output_digest.hash) != HASH_LENGTH:
    
    114
    +                raise BotError(stdout,
    
    115
    +                               detail=stderr, reason="Output root digest too small.")
    
    101 116
     
    
    102 117
                 # TODO: Have BuildBox helping us creating the Tree instance here
    
    103 118
                 # See https://gitlab.com/BuildStream/buildbox/issues/7 for details
    
    ... ... @@ -110,7 +125,6 @@ def work_buildbox(context, lease):
    110 125
                 output_directory.tree_digest.CopyFrom(output_tree_digest)
    
    111 126
                 output_directory.path = os.path.relpath(working_directory, start='/')
    
    112 127
     
    
    113
    -            action_result = remote_execution_pb2.ActionResult()
    
    114 128
                 action_result.output_directories.extend([output_directory])
    
    115 129
     
    
    116 130
                 action_result_any = any_pb2.Any()
    

  • buildgrid/_app/bots/temp_directory.py
    ... ... @@ -77,11 +77,23 @@ def work_temp_directory(context, lease):
    77 77
                                        universal_newlines=True,
    
    78 78
                                        env=environment,
    
    79 79
                                        stdin=subprocess.PIPE,
    
    80
    -                                   stdout=subprocess.PIPE)
    
    81
    -        # TODO: Should return the stdout and stderr in the ActionResult.
    
    82
    -        process.communicate()
    
    80
    +                                   stdout=subprocess.PIPE,
    
    81
    +                                   stderr=subprocess.PIPE)
    
    82
    +
    
    83
    +        stdout, stderr = process.communicate()
    
    84
    +        returncode = process.returncode
    
    83 85
     
    
    84 86
             action_result = remote_execution_pb2.ActionResult()
    
    87
    +        # TODO: Upload to CAS or output RAW
    
    88
    +        # For now, just pass raw
    
    89
    +        # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    90
    +        action_result.stdout_raw = stdout.encode()
    
    91
    +        action_result.stderr_raw = stderr.encode()
    
    92
    +        action_result.exit_code = returncode
    
    93
    +
    
    94
    +        logger.debug("Command stderr: [{}]".format(stderr.encode()))
    
    95
    +        logger.debug("Command stdout: [{}]".format(stdout.encode()))
    
    96
    +        logger.debug("Command exit code: [{}]".format(returncode))
    
    85 97
     
    
    86 98
             with upload(context.cas_channel, instance=instance_name) as cas:
    
    87 99
                 for output_path in command.output_files:
    

  • buildgrid/bot/bot_session.py
    ... ... @@ -12,6 +12,9 @@
    12 12
     # See the License for the specific language governing permissions and
    
    13 13
     # limitations under the License.
    
    14 14
     
    
    15
    +# Disable broad exception catch
    
    16
    +# pylint: disable=broad-except
    
    17
    +
    
    15 18
     
    
    16 19
     """
    
    17 20
     Bot Session
    
    ... ... @@ -23,10 +26,13 @@ import asyncio
    23 26
     import logging
    
    24 27
     import platform
    
    25 28
     import uuid
    
    26
    -
    
    27 29
     from enum import Enum
    
    28 30
     
    
    31
    +import grpc
    
    32
    +
    
    33
    +from buildgrid._protos.google.rpc import code_pb2
    
    29 34
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    35
    +from buildgrid._exceptions import BotError
    
    30 36
     
    
    31 37
     
    
    32 38
     class BotStatus(Enum):
    
    ... ... @@ -142,9 +148,22 @@ class BotSession:
    142 148
     
    
    143 149
         async def create_work(self, lease):
    
    144 150
             self.logger.debug("Work created: [{}]".format(lease.id))
    
    145
    -
    
    146 151
             loop = asyncio.get_event_loop()
    
    147
    -        lease = await loop.run_in_executor(None, self._work, self._context, lease)
    
    152
    +
    
    153
    +        try:
    
    154
    +            lease = await loop.run_in_executor(None, self._work, self._context, lease)
    
    155
    +
    
    156
    +        except grpc.RpcError as e:
    
    157
    +            self.logger.error("Connection error thrown: [{}]".format(e))
    
    158
    +            lease.status.code = e.code()
    
    159
    +
    
    160
    +        except BotError as e:
    
    161
    +            self.logger.error("Internal bot error thrown: [{}]".format(e))
    
    162
    +            lease.status.code = code_pb2.INTERNAL
    
    163
    +
    
    164
    +        except Exception as e:
    
    165
    +            self.logger.error("Connection error thrown: [{}]".format(e))
    
    166
    +            lease.status.code = code_pb2.INTERNAL
    
    148 167
     
    
    149 168
             self.logger.debug("Work complete: [{}]".format(lease.id))
    
    150 169
             self.lease_completed(lease)
    

  • buildgrid/server/bots/instance.py
    ... ... @@ -117,7 +117,7 @@ class BotsInterface:
    117 117
     
    
    118 118
                 elif client_state == LeaseState.COMPLETED:
    
    119 119
                     self._scheduler.update_job_lease_state(client_lease.id, client_lease.state)
    
    120
    -                self._scheduler.job_complete(client_lease.id, client_lease.result)
    
    120
    +                self._scheduler.job_complete(client_lease.id, client_lease.result, client_lease.status)
    
    121 121
                     return None
    
    122 122
     
    
    123 123
                 else:
    

  • buildgrid/server/job.py
    ... ... @@ -121,10 +121,9 @@ class Job:
    121 121
             self._operation.metadata.CopyFrom(self._pack_any(self.get_operation_meta()))
    
    122 122
             if self.result is not None:
    
    123 123
                 self._operation.done = True
    
    124
    -            action_result = remote_execution_pb2.ActionResult()
    
    125
    -            self.result.Unpack(action_result)
    
    126
    -            response = remote_execution_pb2.ExecuteResponse(result=action_result,
    
    127
    -                                                            cached_result=self.result_cached)
    
    124
    +            response = remote_execution_pb2.ExecuteResponse(result=self.result,
    
    125
    +                                                            cached_result=self.result_cached,
    
    126
    +                                                            status=self.lease.status)
    
    128 127
                 self._operation.response.CopyFrom(self._pack_any(response))
    
    129 128
     
    
    130 129
             return self._operation
    

  • buildgrid/server/scheduler.py
    ... ... @@ -27,6 +27,7 @@ from google.protobuf import any_pb2
    27 27
     
    
    28 28
     
    
    29 29
     from buildgrid.server._exceptions import NotFoundError
    
    30
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    30 31
     from buildgrid._protos.google.longrunning import operations_pb2
    
    31 32
     
    
    32 33
     from .job import ExecuteStage, LeaseState
    
    ... ... @@ -82,12 +83,16 @@ class Scheduler:
    82 83
                     job.n_tries += 1
    
    83 84
                     self.queue.appendleft(job)
    
    84 85
     
    
    85
    -    def job_complete(self, name, result):
    
    86
    +    def job_complete(self, name, result, status):
    
    86 87
             job = self.jobs[name]
    
    87
    -        job.result = result
    
    88
    -        job.update_execute_stage(ExecuteStage.COMPLETED)
    
    88
    +        job.lease.status.CopyFrom(status)
    
    89
    +        action_result = remote_execution_pb2.ActionResult()
    
    90
    +        result.Unpack(action_result)
    
    91
    +        job.result = action_result
    
    89 92
             if not job.do_not_cache and self._action_cache is not None:
    
    90
    -            self._action_cache.update_action_result(job.action_digest, result)
    
    93
    +            if not job.lease.status.code:
    
    94
    +                self._action_cache.update_action_result(job.action_digest, result)
    
    95
    +        job.update_execute_stage(ExecuteStage.COMPLETED)
    
    91 96
     
    
    92 97
         def get_operations(self):
    
    93 98
             response = operations_pb2.ListOperationsResponse()
    
    ... ... @@ -112,6 +117,6 @@ class Scheduler:
    112 117
             while self.queue:
    
    113 118
                 job = self.queue.popleft()
    
    114 119
                 job.update_execute_stage(ExecuteStage.EXECUTING)
    
    115
    -            job.lease = job.create_lease()
    
    120
    +            job.create_lease()
    
    116 121
                 job.lease.state = LeaseState.PENDING.value
    
    117 122
                 yield job.lease

  • buildgrid/settings.py
    ... ... @@ -3,3 +3,4 @@ import hashlib
    3 3
     
    
    4 4
     # The hash function that CAS uses
    
    5 5
     HASH = hashlib.sha256
    
    6
    +HASH_LENGTH = HASH().digest_size * 2

  • tests/integration/operations_service.py
    ... ... @@ -33,6 +33,7 @@ from buildgrid.server._exceptions import InvalidArgumentError
    33 33
     
    
    34 34
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    35 35
     from buildgrid._protos.google.longrunning import operations_pb2
    
    36
    +from buildgrid._protos.google.rpc import status_pb2
    
    36 37
     
    
    37 38
     
    
    38 39
     server = mock.create_autospec(grpc.server)
    
    ... ... @@ -130,8 +131,10 @@ def test_list_operations_with_result(instance, controller, execute_request, cont
    130 131
         output_file = remote_execution_pb2.OutputFile(path='unicorn')
    
    131 132
         action_result.output_files.extend([output_file])
    
    132 133
     
    
    134
    +    controller.operations_instance._scheduler.jobs[response_execute.name].create_lease()
    
    133 135
         controller.operations_instance._scheduler.job_complete(response_execute.name,
    
    134
    -                                                           _pack_any(action_result))
    
    136
    +                                                           _pack_any(action_result),
    
    137
    +                                                           status_pb2.Status())
    
    135 138
     
    
    136 139
         request = operations_pb2.ListOperationsRequest(name=instance_name)
    
    137 140
         response = instance.ListOperations(request, context)
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]