[Notes] [Git][BuildGrid/buildgrid][finn/84-bot-errors] 6 commits: Added HASH_LENGTH to settings file.



Title: GitLab

finn pushed to branch finn/84-bot-errors at BuildGrid / buildgrid

Commits:

6 changed files:

Changes:

  • buildgrid/_app/bots/buildbox.py
    ... ... @@ -19,7 +19,9 @@ import tempfile
    19 19
     
    
    20 20
     from google.protobuf import any_pb2
    
    21 21
     
    
    22
    +from buildgrid.settings import HASH_LENGTH
    
    22 23
     from buildgrid.client.cas import upload
    
    24
    +from buildgrid._exceptions import BotError
    
    23 25
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    24 26
     from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
    
    25 27
     from buildgrid.utils import read_file, write_file, parse_to_pb2_from_fetch
    
    ... ... @@ -87,17 +89,33 @@ def work_buildbox(context, lease):
    87 89
     
    
    88 90
                 command_line = subprocess.Popen(command_line,
    
    89 91
                                                 stdin=subprocess.PIPE,
    
    90
    -                                            stdout=subprocess.PIPE)
    
    91
    -            # TODO: Should return the stdout and stderr to the user.
    
    92
    -            command_line.communicate()
    
    92
    +                                            stdout=subprocess.PIPE,
    
    93
    +                                            stderr=subprocess.PIPE)
    
    94
    +            stdout, stderr = command_line.communicate()
    
    95
    +            returncode = command_line.returncode
    
    96
    +            action_result = remote_execution_pb2.ActionResult()
    
    97
    +            # TODO: Upload to CAS or output RAW
    
    98
    +            # For now, just pass raw
    
    99
    +            # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    100
    +            action_result.stdout_raw = stdout
    
    101
    +            action_result.stderr_raw = stderr
    
    102
    +            action_result.exit_code = returncode
    
    103
    +
    
    104
    +            if returncode:
    
    105
    +                # TODO: Upload to CAS or output RAW
    
    106
    +                # For now, just pass raw
    
    107
    +                # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    108
    +                logger.error("BuildBox error: [{}]".format(stderr))
    
    109
    +                raise BotError(stderr, detail=stdout, reason="Non-zero exit code [{}]".format(returncode))
    
    93 110
     
    
    94 111
                 output_digest = remote_execution_pb2.Digest()
    
    95 112
                 output_digest.ParseFromString(read_file(output_digest_file.name))
    
    96 113
     
    
    97 114
                 logger.debug("Output root digest: {}".format(output_digest))
    
    98 115
     
    
    99
    -            if len(output_digest.hash) < 64:
    
    100
    -                logger.warning("Buildbox command failed - no output root digest present.")
    
    116
    +            if len(output_digest.hash) < HASH_LENGTH:
    
    117
    +                raise BotError("Output hash length too small",
    
    118
    +                               detail=stdout, reason="No output root digest present.")
    
    101 119
     
    
    102 120
                 # TODO: Have BuildBox helping us creating the Tree instance here
    
    103 121
                 # See https://gitlab.com/BuildStream/buildbox/issues/7 for details
    
    ... ... @@ -110,7 +128,6 @@ def work_buildbox(context, lease):
    110 128
                 output_directory.tree_digest.CopyFrom(output_tree_digest)
    
    111 129
                 output_directory.path = os.path.relpath(working_directory, start='/')
    
    112 130
     
    
    113
    -            action_result = remote_execution_pb2.ActionResult()
    
    114 131
                 action_result.output_directories.extend([output_directory])
    
    115 132
     
    
    116 133
                 action_result_any = any_pb2.Any()
    

  • buildgrid/_app/bots/temp_directory.py
    ... ... @@ -77,11 +77,25 @@ def work_temp_directory(context, lease):
    77 77
                                        universal_newlines=True,
    
    78 78
                                        env=environment,
    
    79 79
                                        stdin=subprocess.PIPE,
    
    80
    -                                   stdout=subprocess.PIPE)
    
    81
    -        # TODO: Should return the stdout and stderr in the ActionResult.
    
    82
    -        process.communicate()
    
    80
    +                                   stdout=subprocess.PIPE,
    
    81
    +                                   stderr=subprocess.PIPE)
    
    82
    +        stdout, stderr = process.communicate()
    
    83
    +        returncode = process.returncode
    
    83 84
     
    
    84 85
             action_result = remote_execution_pb2.ActionResult()
    
    86
    +        # TODO: Upload to CAS or output RAW
    
    87
    +        # For now, just pass raw
    
    88
    +        # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    89
    +        action_result.stdout_raw = stdout
    
    90
    +        action_result.stderr_raw = stderr
    
    91
    +        action_result.exit_code = returncode
    
    92
    +
    
    93
    +        if returncode:
    
    94
    +            # TODO: Upload to CAS or output RAW
    
    95
    +            # For now, just pass raw
    
    96
    +            # https://gitlab.com/BuildGrid/buildgrid/issues/90
    
    97
    +            logger.error("BuildBox error: [{}]".format(stderr))
    
    98
    +            raise BotError(stderr, detail=stdout, reason="Non-zero exit code [{}]".format(returncode
    
    85 99
     
    
    86 100
             with upload(context.cas_channel, instance=instance_name) as cas:
    
    87 101
                 for output_path in command.output_files:
    

  • buildgrid/bot/bot_session.py
    ... ... @@ -12,6 +12,9 @@
    12 12
     # See the License for the specific language governing permissions and
    
    13 13
     # limitations under the License.
    
    14 14
     
    
    15
    +# Disable broad exception catch
    
    16
    +# pylint: disable=broad-except
    
    17
    +
    
    15 18
     
    
    16 19
     """
    
    17 20
     Bot Session
    
    ... ... @@ -23,10 +26,14 @@ import asyncio
    23 26
     import logging
    
    24 27
     import platform
    
    25 28
     import uuid
    
    26
    -
    
    27 29
     from enum import Enum
    
    28 30
     
    
    31
    +import grpc
    
    32
    +from google.protobuf import any_pb2
    
    33
    +
    
    29 34
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    35
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    36
    +from buildgrid._exceptions import BotError
    
    30 37
     
    
    31 38
     
    
    32 39
     class BotStatus(Enum):
    
    ... ... @@ -142,13 +149,37 @@ class BotSession:
    142 149
     
    
    143 150
         async def create_work(self, lease):
    
    144 151
             self.logger.debug("Work created: [{}]".format(lease.id))
    
    145
    -
    
    152
    +        input_lease = lease
    
    146 153
             loop = asyncio.get_event_loop()
    
    147
    -        lease = await loop.run_in_executor(None, self._work, self._context, lease)
    
    154
    +
    
    155
    +        try:
    
    156
    +            lease = await loop.run_in_executor(None, self._work, self._context, lease)
    
    157
    +
    
    158
    +        except BotError as e:
    
    159
    +            self.logger.error("Bot error thrown: [{}]".format(e))
    
    160
    +            lease = self._lease_error(input_lease, e)
    
    161
    +
    
    162
    +        except grpc.RpcError as e:
    
    163
    +            self.logger.error("Connection error thrown: [{}]".format(e))
    
    164
    +            lease = self._lease_error(input_lease, e)
    
    165
    +
    
    166
    +        except Exception as e:
    
    167
    +            self.logger.error("Connection error thrown: [{}]".format(e))
    
    168
    +            lease = self._lease_error(input_lease, e)
    
    148 169
     
    
    149 170
             self.logger.debug("Work complete: [{}]".format(lease.id))
    
    150 171
             self.lease_completed(lease)
    
    151 172
     
    
    173
    +    def _lease_error(self, lease, error):
    
    174
    +        action_result = remote_execution_pb2.ActionResult()
    
    175
    +        action_result.stderr_raw = str(error)
    
    176
    +        action_result.exit_code = -1
    
    177
    +
    
    178
    +        action_result_any = any_pb2.Any()
    
    179
    +        action_result_any.Pack(action_result)
    
    180
    +        lease.result.CopyFrom(action_result_any)
    
    181
    +        return lease
    
    182
    +
    
    152 183
     
    
    153 184
     class Worker:
    
    154 185
         def __init__(self, properties=None, configs=None):
    

  • buildgrid/server/job.py
    ... ... @@ -21,6 +21,7 @@ from enum import Enum
    21 21
     
    
    22 22
     from google.protobuf import any_pb2
    
    23 23
     
    
    24
    +from buildgrid._protos.google.rpc import code_pb2, status_pb2
    
    24 25
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    25 26
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    26 27
     from buildgrid._protos.google.longrunning import operations_pb2
    
    ... ... @@ -121,10 +122,14 @@ class Job:
    121 122
             self._operation.metadata.CopyFrom(self._pack_any(self.get_operation_meta()))
    
    122 123
             if self.result is not None:
    
    123 124
                 self._operation.done = True
    
    124
    -            action_result = remote_execution_pb2.ActionResult()
    
    125
    -            self.result.Unpack(action_result)
    
    126
    -            response = remote_execution_pb2.ExecuteResponse(result=action_result,
    
    127
    -                                                            cached_result=self.result_cached)
    
    125
    +            status = status_pb2.Status()
    
    126
    +            status.code = code_pb2.OK
    
    127
    +            if self.result.exit_code:
    
    128
    +                status.code = code_pb2.INTERNAL
    
    129
    +
    
    130
    +            response = remote_execution_pb2.ExecuteResponse(result=self.result,
    
    131
    +                                                            cached_result=self.result_cached,
    
    132
    +                                                            status=status)
    
    128 133
                 self._operation.response.CopyFrom(self._pack_any(response))
    
    129 134
     
    
    130 135
             return self._operation
    

  • buildgrid/server/scheduler.py
    ... ... @@ -27,6 +27,7 @@ from google.protobuf import any_pb2
    27 27
     
    
    28 28
     
    
    29 29
     from buildgrid.server._exceptions import NotFoundError
    
    30
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    30 31
     from buildgrid._protos.google.longrunning import operations_pb2
    
    31 32
     
    
    32 33
     from .job import ExecuteStage, LeaseState
    
    ... ... @@ -84,10 +85,13 @@ class Scheduler:
    84 85
     
    
    85 86
         def job_complete(self, name, result):
    
    86 87
             job = self.jobs[name]
    
    87
    -        job.result = result
    
    88
    -        job.update_execute_stage(ExecuteStage.COMPLETED)
    
    88
    +        action_result = remote_execution_pb2.ActionResult()
    
    89
    +        result.Unpack(action_result)
    
    90
    +        job.result = action_result
    
    89 91
             if not job.do_not_cache and self._action_cache is not None:
    
    90
    -            self._action_cache.update_action_result(job.action_digest, result)
    
    92
    +            if action_result.exit_code:
    
    93
    +                self._action_cache.update_action_result(job.action_digest, result)
    
    94
    +        job.update_execute_stage(ExecuteStage.COMPLETED)
    
    91 95
     
    
    92 96
         def get_operations(self):
    
    93 97
             response = operations_pb2.ListOperationsResponse()
    

  • buildgrid/settings.py
    ... ... @@ -3,3 +3,4 @@ import hashlib
    3 3
     
    
    4 4
     # The hash function that CAS uses
    
    5 5
     HASH = hashlib.sha256
    
    6
    +HASH_LENGTH = HASH().block_size



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]