[Notes] [Git][BuildGrid/buildgrid][finn/48-cancellation-leases] 13 commits: Catch cancelled operations in service.



Title: GitLab

finn pushed to branch finn/48-cancellation-leases at BuildGrid / buildgrid

Commits:

19 changed files:

Changes:

  • buildgrid/_app/commands/cmd_bot.py
    ... ... @@ -28,8 +28,11 @@ from urllib.parse import urlparse
    28 28
     import click
    
    29 29
     import grpc
    
    30 30
     
    
    31
    -from buildgrid.bot import bot, bot_interface
    
    32
    -from buildgrid.bot.bot_session import BotSession, Device, Worker
    
    31
    +from buildgrid.bot import bot, interface, session
    
    32
    +from buildgrid.bot.hardware.interface import HardwareInterface
    
    33
    +from buildgrid.bot.hardware.device import Device
    
    34
    +from buildgrid.bot.hardware.worker import Worker
    
    35
    +
    
    33 36
     
    
    34 37
     from ..bots import buildbox, dummy, host
    
    35 38
     from ..cli import pass_context
    
    ... ... @@ -123,13 +126,14 @@ def cli(context, parent, update_period, remote, client_key, client_cert, server_
    123 126
         context.logger = logging.getLogger(__name__)
    
    124 127
         context.logger.debug("Starting for remote {}".format(context.remote))
    
    125 128
     
    
    126
    -    interface = bot_interface.BotInterface(context.channel)
    
    129
    +    bot_interface = interface.BotInterface(context.channel)
    
    127 130
     
    
    128 131
         worker = Worker()
    
    129 132
         worker.add_device(Device())
    
    130 133
     
    
    131
    -    bot_session = BotSession(parent, interface)
    
    132
    -    bot_session.add_worker(worker)
    
    134
    +    hardware_interface = HardwareInterface(worker)
    
    135
    +
    
    136
    +    bot_session = session.BotSession(parent, bot_interface, hardware_interface)
    
    133 137
     
    
    134 138
         context.bot_session = bot_session
    
    135 139
     
    
    ... ... @@ -142,8 +146,7 @@ def run_dummy(context):
    142 146
         """
    
    143 147
         try:
    
    144 148
             b = bot.Bot(context.bot_session, context.update_period)
    
    145
    -        b.session(dummy.work_dummy,
    
    146
    -                  context)
    
    149
    +        b.session(dummy.work_dummy, context)
    
    147 150
         except KeyboardInterrupt:
    
    148 151
             pass
    
    149 152
     
    

  • buildgrid/_app/commands/cmd_operation.py
    ... ... @@ -155,6 +155,19 @@ def status(context, operation_name, json):
    155 155
             click.echo(json_format.MessageToJson(operation))
    
    156 156
     
    
    157 157
     
    
    158
    +@cli.command('cancel', short_help="Cancel an operation.")
    
    159
    +@click.argument('operation-name', nargs=1, type=click.STRING, required=True)
    
    160
    +@pass_context
    
    161
    +def cancel(context, operation_name):
    
    162
    +    context.logger.info("Cancelling an operation...")
    
    163
    +    stub = operations_pb2_grpc.OperationsStub(context.channel)
    
    164
    +
    
    165
    +    request = operations_pb2.CancelOperationRequest(name=operation_name)
    
    166
    +
    
    167
    +    stub.CancelOperation(request)
    
    168
    +    context.logger.info("Operation cancelled: [{}]".format(request))
    
    169
    +
    
    170
    +
    
    158 171
     @cli.command('list', short_help="List operations.")
    
    159 172
     @click.option('--json', is_flag=True, show_default=True,
    
    160 173
                   help="Print operations list in JSON format.")
    

  • buildgrid/bot/bot.py
    ... ... @@ -17,7 +17,7 @@
    17 17
     Bot
    
    18 18
     ====
    
    19 19
     
    
    20
    -Creates a bot session.
    
    20
    +Creates a bot session and sends updates to the server.
    
    21 21
     """
    
    22 22
     
    
    23 23
     import asyncio
    
    ... ... @@ -45,6 +45,7 @@ class Bot:
    45 45
                 loop.run_forever()
    
    46 46
             except KeyboardInterrupt:
    
    47 47
                 pass
    
    48
    +
    
    48 49
             finally:
    
    49 50
                 task.cancel()
    
    50 51
                 loop.close()
    

  • buildgrid/bot/bot_session.py deleted
    1
    -# Copyright (C) 2018 Bloomberg LP
    
    2
    -#
    
    3
    -# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    -# you may not use this file except in compliance with the License.
    
    5
    -# You may obtain a copy of the License at
    
    6
    -#
    
    7
    -#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    -#
    
    9
    -# Unless required by applicable law or agreed to in writing, software
    
    10
    -# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    -# See the License for the specific language governing permissions and
    
    13
    -# limitations under the License.
    
    14
    -
    
    15
    -# Disable broad exception catch
    
    16
    -# pylint: disable=broad-except
    
    17
    -
    
    18
    -
    
    19
    -"""
    
    20
    -Bot Session
    
    21
    -====
    
    22
    -
    
    23
    -Allows connections
    
    24
    -"""
    
    25
    -import asyncio
    
    26
    -import logging
    
    27
    -import platform
    
    28
    -import uuid
    
    29
    -
    
    30
    -import grpc
    
    31
    -
    
    32
    -from buildgrid._enums import BotStatus, LeaseState
    
    33
    -from buildgrid._protos.google.rpc import code_pb2
    
    34
    -from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    35
    -from buildgrid._exceptions import BotError
    
    36
    -
    
    37
    -
    
    38
    -class BotSession:
    
    39
    -    def __init__(self, parent, interface):
    
    40
    -        """ Unique bot ID within the farm used to identify this bot
    
    41
    -        Needs to be human readable.
    
    42
    -        All prior sessions with bot_id of same ID are invalidated.
    
    43
    -        If a bot attempts to update an invalid session, it must be rejected and
    
    44
    -        may be put in quarantine.
    
    45
    -        """
    
    46
    -
    
    47
    -        self.logger = logging.getLogger(__name__)
    
    48
    -
    
    49
    -        self._bot_id = '{}.{}'.format(parent, platform.node())
    
    50
    -        self._context = None
    
    51
    -        self._interface = interface
    
    52
    -        self._leases = {}
    
    53
    -        self._name = None
    
    54
    -        self._parent = parent
    
    55
    -        self._status = BotStatus.OK.value
    
    56
    -        self._work = None
    
    57
    -        self._worker = None
    
    58
    -
    
    59
    -    @property
    
    60
    -    def bot_id(self):
    
    61
    -        return self._bot_id
    
    62
    -
    
    63
    -    def add_worker(self, worker):
    
    64
    -        self._worker = worker
    
    65
    -
    
    66
    -    def create_bot_session(self, work, context=None):
    
    67
    -        self.logger.debug("Creating bot session")
    
    68
    -        self._work = work
    
    69
    -        self._context = context
    
    70
    -
    
    71
    -        session = self._interface.create_bot_session(self._parent, self.get_pb2())
    
    72
    -        self._name = session.name
    
    73
    -
    
    74
    -        self.logger.info("Created bot session with name: [{}]".format(self._name))
    
    75
    -
    
    76
    -        for lease in session.leases:
    
    77
    -            self._update_lease_from_server(lease)
    
    78
    -
    
    79
    -    def update_bot_session(self):
    
    80
    -        self.logger.debug("Updating bot session: [{}]".format(self._bot_id))
    
    81
    -        session = self._interface.update_bot_session(self.get_pb2())
    
    82
    -        for k, v in list(self._leases.items()):
    
    83
    -            if v.state == LeaseState.COMPLETED.value:
    
    84
    -                del self._leases[k]
    
    85
    -
    
    86
    -        for lease in session.leases:
    
    87
    -            self._update_lease_from_server(lease)
    
    88
    -
    
    89
    -    def get_pb2(self):
    
    90
    -        leases = list(self._leases.values())
    
    91
    -        if not leases:
    
    92
    -            leases = None
    
    93
    -
    
    94
    -        return bots_pb2.BotSession(worker=self._worker.get_pb2(),
    
    95
    -                                   status=self._status,
    
    96
    -                                   leases=leases,
    
    97
    -                                   bot_id=self._bot_id,
    
    98
    -                                   name=self._name)
    
    99
    -
    
    100
    -    def lease_completed(self, lease):
    
    101
    -        lease.state = LeaseState.COMPLETED.value
    
    102
    -        self._leases[lease.id] = lease
    
    103
    -
    
    104
    -    def _update_lease_from_server(self, lease):
    
    105
    -        """
    
    106
    -        State machine for any recieved updates to the leases.
    
    107
    -        """
    
    108
    -        # TODO: Compare with previous state of lease
    
    109
    -        if lease.state == LeaseState.PENDING.value:
    
    110
    -            lease.state = LeaseState.ACTIVE.value
    
    111
    -            self._leases[lease.id] = lease
    
    112
    -            self.update_bot_session()
    
    113
    -            asyncio.ensure_future(self.create_work(lease))
    
    114
    -
    
    115
    -    async def create_work(self, lease):
    
    116
    -        self.logger.debug("Work created: [{}]".format(lease.id))
    
    117
    -        loop = asyncio.get_event_loop()
    
    118
    -
    
    119
    -        try:
    
    120
    -            lease = await loop.run_in_executor(None, self._work, self._context, lease)
    
    121
    -
    
    122
    -        except grpc.RpcError as e:
    
    123
    -            self.logger.error("RPC error thrown: [{}]".format(e))
    
    124
    -            lease.status.CopyFrom(e.code())
    
    125
    -
    
    126
    -        except BotError as e:
    
    127
    -            self.logger.error("Internal bot error thrown: [{}]".format(e))
    
    128
    -            lease.status.code = code_pb2.INTERNAL
    
    129
    -
    
    130
    -        except Exception as e:
    
    131
    -            self.logger.error("Exception thrown: [{}]".format(e))
    
    132
    -            lease.status.code = code_pb2.INTERNAL
    
    133
    -
    
    134
    -        self.logger.debug("Work complete: [{}]".format(lease.id))
    
    135
    -        self.lease_completed(lease)
    
    136
    -
    
    137
    -
    
    138
    -class Worker:
    
    139
    -    def __init__(self, properties=None, configs=None):
    
    140
    -        self.properties = {}
    
    141
    -        self._configs = {}
    
    142
    -        self._devices = []
    
    143
    -
    
    144
    -        if properties:
    
    145
    -            for k, v in properties.items():
    
    146
    -                if k == 'pool':
    
    147
    -                    self.properties[k] = v
    
    148
    -                else:
    
    149
    -                    raise KeyError('Key not supported: [{}]'.format(k))
    
    150
    -
    
    151
    -        if configs:
    
    152
    -            for k, v in configs.items():
    
    153
    -                if k == 'DockerImage':
    
    154
    -                    self.configs[k] = v
    
    155
    -                else:
    
    156
    -                    raise KeyError('Key not supported: [{}]'.format(k))
    
    157
    -
    
    158
    -    @property
    
    159
    -    def configs(self):
    
    160
    -        return self._configs
    
    161
    -
    
    162
    -    def add_device(self, device):
    
    163
    -        self._devices.append(device)
    
    164
    -
    
    165
    -    def get_pb2(self):
    
    166
    -        devices = [device.get_pb2() for device in self._devices]
    
    167
    -        worker = worker_pb2.Worker(devices=devices)
    
    168
    -        property_message = worker_pb2.Worker.Property()
    
    169
    -        for k, v in self.properties.items():
    
    170
    -            property_message.key = k
    
    171
    -            property_message.value = v
    
    172
    -            worker.properties.extend([property_message])
    
    173
    -
    
    174
    -        config_message = worker_pb2.Worker.Config()
    
    175
    -        for k, v in self.properties.items():
    
    176
    -            property_message.key = k
    
    177
    -            property_message.value = v
    
    178
    -            worker.configs.extend([config_message])
    
    179
    -
    
    180
    -        return worker
    
    181
    -
    
    182
    -
    
    183
    -class Device:
    
    184
    -    def __init__(self, properties=None):
    
    185
    -        """ Creates devices available to the worker
    
    186
    -        The first device is know as the Primary Device - the revice which
    
    187
    -        is running a bit and responsible to actually executing commands.
    
    188
    -        All other devices are known as Attatched Devices and must be controlled
    
    189
    -        by the Primary Device.
    
    190
    -        """
    
    191
    -
    
    192
    -        self._name = str(uuid.uuid4())
    
    193
    -        self._properties = {}
    
    194
    -
    
    195
    -        if properties:
    
    196
    -            for k, v in properties.items():
    
    197
    -                if k == 'os':
    
    198
    -                    self._properties[k] = v
    
    199
    -
    
    200
    -                elif k == 'docker':
    
    201
    -                    if v not in ('True', 'False'):
    
    202
    -                        raise ValueError('Value not supported: [{}]'.format(v))
    
    203
    -                    self._properties[k] = v
    
    204
    -
    
    205
    -                else:
    
    206
    -                    raise KeyError('Key not supported: [{}]'.format(k))
    
    207
    -
    
    208
    -    @property
    
    209
    -    def name(self):
    
    210
    -        return self._name
    
    211
    -
    
    212
    -    @property
    
    213
    -    def properties(self):
    
    214
    -        return self._properties
    
    215
    -
    
    216
    -    def get_pb2(self):
    
    217
    -        device = worker_pb2.Device(handle=self._name)
    
    218
    -        property_message = worker_pb2.Device.Property()
    
    219
    -        for k, v in self._properties.items():
    
    220
    -            property_message.key = k
    
    221
    -            property_message.value = v
    
    222
    -            device.properties.extend([property_message])
    
    223
    -        return device

  • buildgrid/bot/hardware/__init__.py

  • buildgrid/bot/hardware/device.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    16
    +"""
    
    17
    +Device
    
    18
    +======
    
    19
    +
    
    20
    +A device.
    
    21
    +"""
    
    22
    +
    
    23
    +
    
    24
    +import uuid
    
    25
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import worker_pb2
    
    26
    +
    
    27
    +class Device:
    
    28
    +
    
    29
    +    def __init__(self, properties=None):
    
    30
    +        """ Creates devices available to the worker
    
    31
    +        The first device is know as the Primary Device - the revice which
    
    32
    +        is running a bit and responsible to actually executing commands.
    
    33
    +        All other devices are known as Attatched Devices and must be controlled
    
    34
    +        by the Primary Device.
    
    35
    +
    
    36
    +        properties (list(dict(string : string))) : Properties of device. Keys may
    
    37
    +        repeated.
    
    38
    +        """
    
    39
    +
    
    40
    +        self._properties = {}
    
    41
    +        self.__property_keys = ['os', 'has-docker']
    
    42
    +        self.__name = str(uuid.uuid4())
    
    43
    +
    
    44
    +        if properties:
    
    45
    +            for prop in properties:
    
    46
    +                self._add_property(prop)
    
    47
    +
    
    48
    +    @property
    
    49
    +    def name(self):
    
    50
    +        return self.__name
    
    51
    +
    
    52
    +    @property
    
    53
    +    def properties(self):
    
    54
    +        return self._properties
    
    55
    +
    
    56
    +    def get_pb2(self):
    
    57
    +        device = worker_pb2.Device(handle=self.__name)
    
    58
    +        for k, v in self._properties.items():
    
    59
    +            for prop in v:
    
    60
    +                property_message = worker_pb2.Device.Property()
    
    61
    +                property_message.key = k
    
    62
    +                property_message.value = prop
    
    63
    +                device.properties.extend([property_message])
    
    64
    +        return device
    
    65
    +
    
    66
    +    def _add_property(self, key, value):
    
    67
    +        if key in self.__property_keys:
    
    68
    +            prop = self._properties.get(key)
    
    69
    +            if not prop:
    
    70
    +                self._properties[key] = [value]
    
    71
    +            else:
    
    72
    +                prop[key].append(value)
    
    73
    +
    
    74
    +        else:
    
    75
    +            raise KeyError('Key not supported: [{}]'.format(key))

  • buildgrid/bot/hardware/interface.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    16
    +"""
    
    17
    +HardwareInterface
    
    18
    +=================
    
    19
    +
    
    20
    +Class to configure hardware and check requirements of leases.
    
    21
    +
    
    22
    +In the future this could also be used to request and display
    
    23
    +the status of hardware.
    
    24
    +"""
    
    25
    +
    
    26
    +
    
    27
    +from buildgrid._exceptions import FailedPreconditionError
    
    28
    +
    
    29
    +
    
    30
    +class HardwareInterface:
    
    31
    +
    
    32
    +    def __init__(self, worker):
    
    33
    +        self._worker = worker
    
    34
    +
    
    35
    +    def configure_hardware(self, lease):
    
    36
    +        """ Can check if the requirements can be met and also
    
    37
    +        in the future, potentially configure the hardware.
    
    38
    +        """
    
    39
    +        worker = self._worker
    
    40
    +        worker_requirements = lease.worker
    
    41
    +
    
    42
    +        for config_requirement in worker_requirements.configs:
    
    43
    +            if config_requirement.key not in worker.configs:
    
    44
    +                raise FailedPreconditionError("Config not supported: [{}]".format(config_requirement))
    
    45
    +
    
    46
    +    def get_worker_pb2(self):
    
    47
    +        return self._worker.get_pb2()

  • buildgrid/bot/hardware/worker.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    16
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import worker_pb2
    
    17
    +
    
    18
    +
    
    19
    +class Worker:
    
    20
    +
    
    21
    +    def __init__(self, properties=None, configs=None):
    
    22
    +        self._devices = []
    
    23
    +        self._configs = {}
    
    24
    +        self._properties = {}
    
    25
    +        self.__property_keys = ['pool']
    
    26
    +        self.__config_keys = ['DockerImage']
    
    27
    +
    
    28
    +        if properties:
    
    29
    +            for k, v in properties.items():
    
    30
    +                if k in self.__property_keys:
    
    31
    +                    self._add_properties(k, v)
    
    32
    +
    
    33
    +        if configs:
    
    34
    +            for k, v in configs.items():
    
    35
    +                self._add_config(k, v)
    
    36
    +
    
    37
    +    @property
    
    38
    +    def configs(self):
    
    39
    +        return self._configs
    
    40
    +
    
    41
    +    @property
    
    42
    +    def properties(self):
    
    43
    +        return self._properties
    
    44
    +
    
    45
    +    def add_device(self, device):
    
    46
    +        self._devices.append(device)
    
    47
    +
    
    48
    +    def get_pb2(self):
    
    49
    +        devices = [device.get_pb2() for device in self._devices]
    
    50
    +        worker = worker_pb2.Worker(devices=devices)
    
    51
    +
    
    52
    +        for k, v in self._properties.items():
    
    53
    +            for prop in v:
    
    54
    +                property_message = worker_pb2.Device.Property()
    
    55
    +                property_message.key = k
    
    56
    +                property_message.value = prop
    
    57
    +                device.properties.extend([property_message])
    
    58
    +
    
    59
    +        for k, v in self._configs.items():
    
    60
    +            for cfg in v:
    
    61
    +                config_message = worker_pb2.Worker.Config()
    
    62
    +                config.key = k
    
    63
    +                config_message.value = cfg
    
    64
    +                worker.configs.extend([config_message])
    
    65
    +
    
    66
    +        return worker
    
    67
    +
    
    68
    +    def _add_config(self, key, value):
    
    69
    +        if key in self.__config_keys:
    
    70
    +            cfg = self._configs.get(key)
    
    71
    +            if not cfg:
    
    72
    +                self._configs[key] = [value]
    
    73
    +            else:
    
    74
    +                cfg[key].append(value)
    
    75
    +
    
    76
    +        else:
    
    77
    +            raise KeyError('Key not supported: [{}]'.format(key))
    
    78
    +
    
    79
    +    def _add_property(self, key, value):
    
    80
    +        if key in self.__property_keys:
    
    81
    +            prop = self._properties.get(key)
    
    82
    +            if not prop:
    
    83
    +                self._properties[key] = [value]
    
    84
    +            else:
    
    85
    +                prop[key].append(value)
    
    86
    +
    
    87
    +        else:
    
    88
    +            raise KeyError('Key not supported: [{}]'.format(key))

  • buildgrid/bot/bot_interface.pybuildgrid/bot/interface.py
    ... ... @@ -15,7 +15,7 @@
    15 15
     
    
    16 16
     """
    
    17 17
     Bot Interface
    
    18
    -====
    
    18
    +=============
    
    19 19
     
    
    20 20
     Interface to grpc
    
    21 21
     """
    

  • buildgrid/bot/session.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +# Disable broad exception catch
    
    16
    +# pylint: disable=broad-except
    
    17
    +
    
    18
    +
    
    19
    +"""
    
    20
    +Bot Session
    
    21
    +===========
    
    22
    +
    
    23
    +Allows connections
    
    24
    +"""
    
    25
    +import asyncio
    
    26
    +import logging
    
    27
    +import platform
    
    28
    +
    
    29
    +import grpc
    
    30
    +
    
    31
    +from buildgrid._enums import BotStatus, LeaseState
    
    32
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    33
    +from buildgrid._protos.google.rpc import code_pb2
    
    34
    +from buildgrid._exceptions import BotError
    
    35
    +
    
    36
    +from buildgrid._exceptions import FailedPreconditionError
    
    37
    +
    
    38
    +from .tenantmanager import TenantManager
    
    39
    +
    
    40
    +class BotSession:
    
    41
    +    def __init__(self, parent, bots_interface, hardware_interface):
    
    42
    +        """ Unique bot ID within the farm used to identify this bot
    
    43
    +        Needs to be human readable.
    
    44
    +        All prior sessions with bot_id of same ID are invalidated.
    
    45
    +        If a bot attempts to update an invalid session, it must be rejected and
    
    46
    +        may be put in quarantine.
    
    47
    +        """
    
    48
    +
    
    49
    +        self.logger = logging.getLogger(__name__)
    
    50
    +
    
    51
    +        self._bots_interface = bots_interface
    
    52
    +        self._hardware_interface = hardware_interface
    
    53
    +
    
    54
    +        self._status = BotStatus.OK.value
    
    55
    +        self._tenant_manager = TenantManager()
    
    56
    +
    
    57
    +        self.__parent = parent
    
    58
    +        self.__bot_id = '{}.{}'.format(parent, platform.node())
    
    59
    +        self.__name = None
    
    60
    +
    
    61
    +        # Remove these and add to a worker config in the future
    
    62
    +        self._work = None
    
    63
    +        self._context = None
    
    64
    +
    
    65
    +    @property
    
    66
    +    def bot_id(self):
    
    67
    +        return self.__bot_id
    
    68
    +
    
    69
    +    def create_bot_session(self, work, context):
    
    70
    +        # Drop this when properly adding to the work
    
    71
    +        self._work = work
    
    72
    +        self._context = context
    
    73
    +
    
    74
    +        self.logger.debug("Creating bot session")
    
    75
    +
    
    76
    +        session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
    
    77
    +        self.__name = session.name
    
    78
    +
    
    79
    +        self.logger.info("Created bot session with name: [{}]".format(self.__name))
    
    80
    +
    
    81
    +        for lease in session.leases:
    
    82
    +            self._register_lease(lease)
    
    83
    +
    
    84
    +    def update_bot_session(self):
    
    85
    +        self.logger.debug("Updating bot session: [{}]".format(self.__bot_id))
    
    86
    +
    
    87
    +        session = self._bots_interface.update_bot_session(self.get_pb2())
    
    88
    +
    
    89
    +        server_ids = []
    
    90
    +
    
    91
    +        for lease in session.leases:
    
    92
    +            server_ids.append(lease.id)
    
    93
    +
    
    94
    +            if lease.status.PENDING:
    
    95
    +                self._register_lease(lease)
    
    96
    +
    
    97
    +            elif lease.status.CANCELLED:
    
    98
    +                self._tenant_manager.cancel_tenancy(lease_id)
    
    99
    +
    
    100
    +        closed_lease_ids = [x for x in self._tenant_manager.get_lease_ids() if x not in server_ids]
    
    101
    +
    
    102
    +        for lease_id in closed_lease_ids:
    
    103
    +            self._tenant_manager.remove_tenant(lease_id)
    
    104
    +
    
    105
    +    def get_pb2(self):
    
    106
    +        return bots_pb2.BotSession(worker=self._hardware_interface.get_worker_pb2(),
    
    107
    +                                   status=self._status,
    
    108
    +                                   leases=self._tenant_manager.get_leases(),
    
    109
    +                                   bot_id=self.__bot_id,
    
    110
    +                                   name=self.__name)
    
    111
    +
    
    112
    +    def _register_lease(self, lease):
    
    113
    +        lease_id = lease.id
    
    114
    +        try:
    
    115
    +            self._tenant_manager.create_tenancy(lease)
    
    116
    +
    
    117
    +        except KeyError as e:
    
    118
    +            self.logger.error("Cannot register lease: [{}]. Error: {}".format(lease.id, e))
    
    119
    +
    
    120
    +        else:
    
    121
    +            try:
    
    122
    +                self._hardware_interface.configure_hardware(lease)
    
    123
    +
    
    124
    +            except FailedPreconditionError as e:
    
    125
    +                self._tenant_manager.complete_lease(lease_id, status=code_pb2.FailedPreconditionError)
    
    126
    +
    
    127
    +            else:
    
    128
    +                self._tenant_manager.create_work(lease_id, self._work, self._context)

  • buildgrid/bot/tenant.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +"""
    
    16
    +Tenant
    
    17
    +======
    
    18
    +
    
    19
    +Handles leased and runs leased work.
    
    20
    +"""
    
    21
    +
    
    22
    +
    
    23
    +from functools import partial
    
    24
    +
    
    25
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    26
    +
    
    27
    +from buildgrid._enums import LeaseState
    
    28
    +
    
    29
    +
    
    30
    +class Tenant:
    
    31
    +
    
    32
    +    def __init__(self, lease):
    
    33
    +
    
    34
    +        if lease.state != LeaseState.PENDING:
    
    35
    +            raise ValueError("Lease state not `PENDING`: {}".format(lease.state))
    
    36
    +
    
    37
    +        self.logger = logging.getLogger(__name__)
    
    38
    +        self.lease_finished = False
    
    39
    +
    
    40
    +        self._lease = lease
    
    41
    +
    
    42
    +    @property
    
    43
    +    def lease(self):
    
    44
    +        return self._lease
    
    45
    +
    
    46
    +    def get_state_state(self):
    
    47
    +        return self._lease.state
    
    48
    +
    
    49
    +    def update_lease_state(self, state):
    
    50
    +        self._lease.state = state
    
    51
    +
    
    52
    +    def update_lease_status(self, status):
    
    53
    +        self._lease.status = status
    
    54
    +
    
    55
    +    async def run_work(self, work, context=None, executor=None):
    
    56
    +        self.logger.debug("Work created: [{}]".format(self._lease.id))
    
    57
    +
    
    58
    +        # Ensures if anything happens to the lease during work, we still have a copy.
    
    59
    +        lease = bots_pb2.Lease()
    
    60
    +        lease.CopyFrom(self._lease)
    
    61
    +
    
    62
    +        loop = asyncio.get_event_loop()
    
    63
    +
    
    64
    +        try:
    
    65
    +            lease = await loop.run_in_executor(executor, partial(work, context, self._lease))
    
    66
    +            self._lease.CopyFrom(lease)
    
    67
    +
    
    68
    +        except asyncio.CancelledError as e:
    
    69
    +            self.logger.error("Task cancelled: [{}]".format(e))
    
    70
    +
    
    71
    +        except grpc.RpcError as e:
    
    72
    +            self.logger.error("RPC error thrown: [{}]".format(e))
    
    73
    +            lease.status.CopyFrom(e.code())
    
    74
    +
    
    75
    +        except BotError as e:
    
    76
    +            self.logger.error("Internal bot error thrown: [{}]".format(e))
    
    77
    +            lease.status.code = code_pb2.INTERNAL
    
    78
    +
    
    79
    +        except Exception as e:
    
    80
    +            self.logger.error("Exception thrown: [{}]".format(e))
    
    81
    +            lease.status.code = code_pb2.INTERNAL
    
    82
    +
    
    83
    +        finally:
    
    84
    +            self.logger.debug("Work completed: [{}]".format(lease.id))

  • buildgrid/bot/tenantmanager.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    16
    +"""
    
    17
    +TenantManager
    
    18
    +=============
    
    19
    +
    
    20
    +Looks after leases of work.
    
    21
    +"""
    
    22
    +
    
    23
    +
    
    24
    +import asyncio
    
    25
    +import logging
    
    26
    +from functools import partial
    
    27
    +
    
    28
    +import grpc
    
    29
    +
    
    30
    +from buildgrid._enums import LeaseState
    
    31
    +
    
    32
    +from .tenant import Tenant
    
    33
    +
    
    34
    +class TenantManager:
    
    35
    +
    
    36
    +    def __init__(self):
    
    37
    +        self.logger = logging.getLogger(__name__)
    
    38
    +        self._tenants = {}
    
    39
    +        self._tasks = {}
    
    40
    +
    
    41
    +    def create_tenancy(self, lease):
    
    42
    +        lease_id = lease.id
    
    43
    +
    
    44
    +        if lease_id not in self._tenants:
    
    45
    +            tenant = Tenant(lease, context)
    
    46
    +            self._tenants[lease_id] = tenant
    
    47
    +
    
    48
    +        else:
    
    49
    +            raise KeyError("Lease id already exists: [{}]".format(lease_id))
    
    50
    +
    
    51
    +    def remove_tenant(self, lease_id):
    
    52
    +        state = self.get_lease_state(lease_id)
    
    53
    +        if state != LeaseState.COMPLETED or state != LeaseState.CANCELLED:
    
    54
    +            self.logger.error("Attempting to remove a lease not finished."
    
    55
    +                              "Bot will not remove lease."
    
    56
    +                              "Lease: [{}]".format(self._tenant_manager.get_lease(lease_id)))
    
    57
    +
    
    58
    +        else:
    
    59
    +            del self._tenants[lease_id]
    
    60
    +            del self._tasks[lease_id]
    
    61
    +
    
    62
    +    def get_leases(self):
    
    63
    +        leases = []
    
    64
    +        for _, tenant in self._tenants.items():
    
    65
    +            leases.append(tenant.lease)
    
    66
    +
    
    67
    +        if not leases:
    
    68
    +            return None
    
    69
    +
    
    70
    +        return leases
    
    71
    +
    
    72
    +    def get_lease_ids(self):
    
    73
    +        lease_ids = []
    
    74
    +        for lease_id in self._tenants.keys():
    
    75
    +            lease_ids.append(lease_id)
    
    76
    +
    
    77
    +        if not lease_ids:
    
    78
    +            return []
    
    79
    +
    
    80
    +        return leases
    
    81
    +
    
    82
    +    def get_lease_state(self, lease_id):
    
    83
    +        return self._tenants[lease_id].get_lease_state()
    
    84
    +
    
    85
    +    def complete_lease(self, lease_id, status=None):
    
    86
    +        if status is not None:
    
    87
    +            self._update_lease_status(lease_id, status)
    
    88
    +
    
    89
    +        if self._tenants[lease_id].get_lease_state() != LeaseState.CANCELLED:
    
    90
    +            self._update_lease_state(lease_id, LeaseState.COMPLETED)
    
    91
    +
    
    92
    +    def create_work(self, lease_id, work, context):
    
    93
    +        self._update_lease_state(lease_id, LeaseState.ACTIVE)
    
    94
    +        tenant = self._tenants[lease_id]
    
    95
    +        task = asyncio.ensure_future(tenant.run_work(work, context))
    
    96
    +
    
    97
    +        task.add_done_callback(partial(self.complete_lease(lease_id)))
    
    98
    +
    
    99
    +    def cancel_tenancy(self, lease_id):
    
    100
    +        self._update_lease_state(LeaseState.CANCELLED)
    
    101
    +        self._tasks[lease_id].cancel()
    
    102
    +
    
    103
    +    def _update_lease_state(self, lease_id, state):
    
    104
    +        self._tenants[lease_id].update_lease_state(state)
    
    105
    +
    
    106
    +    def _update_lease_status(self, lease_id, status):
    
    107
    +        self._tenants[lease_id].update_lease_status(status)

  • buildgrid/server/bots/instance.py
    ... ... @@ -80,9 +80,11 @@ class BotsInterface:
    80 80
             self.logger.debug("Updating bot session name={}".format(name))
    
    81 81
             self._check_bot_ids(bot_session.bot_id, name)
    
    82 82
     
    
    83
    -        leases = filter(None, [self.check_states(lease) for lease in bot_session.leases])
    
    83
    +        leases = filter(None, [self._check_lease_state(lease) for lease in bot_session.leases])
    
    84
    +
    
    85
    +        for lease in bot_session.leases:
    
    86
    +            lease.Clear()
    
    84 87
     
    
    85
    -        del bot_session.leases[:]
    
    86 88
             bot_session.leases.extend(leases)
    
    87 89
     
    
    88 90
             # TODO: Send worker capabilities to the scheduler!
    
    ... ... @@ -94,55 +96,22 @@ class BotsInterface:
    94 96
             self._bot_sessions[name] = bot_session
    
    95 97
             return bot_session
    
    96 98
     
    
    97
    -    def check_states(self, client_lease):
    
    98
    -        """ Edge detector for states
    
    99
    -        """
    
    100
    -        # TODO: Handle cancelled states
    
    101
    -        try:
    
    102
    -            server_lease = self._scheduler.get_job_lease(client_lease.id)
    
    103
    -        except KeyError:
    
    104
    -            raise InvalidArgumentError("Lease not found on server: [{}]".format(client_lease))
    
    105
    -
    
    106
    -        server_state = LeaseState(server_lease.state)
    
    107
    -        client_state = LeaseState(client_lease.state)
    
    108
    -
    
    109
    -        if server_state == LeaseState.PENDING:
    
    110
    -
    
    111
    -            if client_state == LeaseState.ACTIVE:
    
    112
    -                self._scheduler.update_job_lease_state(client_lease.id,
    
    113
    -                                                       LeaseState.ACTIVE)
    
    114
    -            elif client_state == LeaseState.COMPLETED:
    
    115
    -                # TODO: Lease was rejected
    
    116
    -                raise NotImplementedError("'Not Accepted' is unsupported")
    
    117
    -            else:
    
    118
    -                raise OutOfSyncError("Server lease: [{}]. Client lease: [{}]".format(server_lease, client_lease))
    
    99
    +    def _check_lease_state(self, lease):
    
    100
    +        # Check for cancelled lease
    
    101
    +        if self._scheduler.get_lease_cancelled(lease.id):
    
    102
    +            return None
    
    119 103
     
    
    120
    -        elif server_state == LeaseState.ACTIVE:
    
    104
    +        # If not cancelled, update the status
    
    105
    +        self._scheduler.update_job_lease(lease)
    
    121 106
     
    
    122
    -            if client_state == LeaseState.ACTIVE:
    
    123
    -                pass
    
    107
    +        lease_state = LeaseState(lease.state)
    
    108
    +        if lease_state == LeaseState.COMPLETED:
    
    109
    +            return None
    
    124 110
     
    
    125
    -            elif client_state == LeaseState.COMPLETED:
    
    126
    -                self._scheduler.update_job_lease_state(client_lease.id,
    
    127
    -                                                       LeaseState.COMPLETED,
    
    128
    -                                                       lease_status=client_lease.status,
    
    129
    -                                                       lease_result=client_lease.result)
    
    130
    -                return None
    
    131
    -
    
    132
    -            else:
    
    133
    -                raise OutOfSyncError("Server lease: [{}]. Client lease: [{}]".format(server_lease, client_lease))
    
    134
    -
    
    135
    -        elif server_state == LeaseState.COMPLETED:
    
    136
    -            raise OutOfSyncError("Server lease: [{}]. Client lease: [{}]".format(server_lease, client_lease))
    
    137
    -
    
    138
    -        elif server_state == LeaseState.CANCELLED:
    
    139
    -            raise NotImplementedError("Cancelled states not supported yet")
    
    140
    -
    
    141
    -        else:
    
    142
    -            # Sould never get here
    
    143
    -            raise OutOfSyncError("State now allowed: {}".format(server_state))
    
    111
    +        elif lease_state == LeaseState.CANCELLED:
    
    112
    +            return None
    
    144 113
     
    
    145
    -        return client_lease
    
    114
    +        return lease
    
    146 115
     
    
    147 116
         def _check_bot_ids(self, bot_id, name=None):
    
    148 117
             """ Checks the ID and the name of the bot.
    

  • buildgrid/server/execution/service.py
    ... ... @@ -26,7 +26,7 @@ from functools import partial
    26 26
     
    
    27 27
     import grpc
    
    28 28
     
    
    29
    -from buildgrid._exceptions import FailedPreconditionError, InvalidArgumentError
    
    29
    +from buildgrid._exceptions import FailedPreconditionError, InvalidArgumentError, CancelledError
    
    30 30
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2_grpc
    
    31 31
     from buildgrid._protos.google.longrunning import operations_pb2
    
    32 32
     
    
    ... ... @@ -76,6 +76,12 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    76 76
                 context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
    
    77 77
                 yield operations_pb2.Operation()
    
    78 78
     
    
    79
    +        except CancelledError as e:
    
    80
    +            self.logger.error(e)
    
    81
    +            context.set_details(str(e))
    
    82
    +            context.set_code(grpc.StatusCode.CANCELLED)
    
    83
    +            yield operations_pb2.Operation()
    
    84
    +
    
    79 85
         def WaitExecution(self, request, context):
    
    80 86
             try:
    
    81 87
                 names = request.name.split("/")
    
    ... ... @@ -106,6 +112,12 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    106 112
                 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
    
    107 113
                 yield operations_pb2.Operation()
    
    108 114
     
    
    115
    +        except CancelledError as e:
    
    116
    +            self.logger.error(e)
    
    117
    +            context.set_details(str(e))
    
    118
    +            context.set_code(grpc.StatusCode.CANCELLED)
    
    119
    +            yield operations_pb2.Operation()
    
    120
    +
    
    109 121
         def _get_instance(self, name):
    
    110 122
             try:
    
    111 123
                 return self._instances[name]
    

  • buildgrid/server/job.py
    ... ... @@ -19,6 +19,7 @@ import uuid
    19 19
     from google.protobuf import timestamp_pb2
    
    20 20
     
    
    21 21
     from buildgrid._enums import LeaseState, OperationStage
    
    22
    +from buildgrid._exceptions import CancelledError
    
    22 23
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    23 24
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    24 25
     from buildgrid._protos.google.longrunning import operations_pb2
    
    ... ... @@ -37,10 +38,14 @@ class Job:
    37 38
     
    
    38 39
             self.__execute_response = None
    
    39 40
             self.__operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
    
    41
    +
    
    40 42
             self.__queued_timestamp = timestamp_pb2.Timestamp()
    
    41 43
             self.__worker_start_timestamp = timestamp_pb2.Timestamp()
    
    42 44
             self.__worker_completed_timestamp = timestamp_pb2.Timestamp()
    
    43 45
     
    
    46
    +        self.__operation_cancelled = False
    
    47
    +        self.__lease_cancelled = False
    
    48
    +
    
    44 49
             self.__operation_metadata.action_digest.CopyFrom(action_digest)
    
    45 50
             self.__operation_metadata.stage = OperationStage.UNKNOWN.value
    
    46 51
     
    
    ... ... @@ -93,6 +98,10 @@ class Job:
    93 98
             else:
    
    94 99
                 return None
    
    95 100
     
    
    101
    +    @property
    
    102
    +    def lease_cancelled(self):
    
    103
    +        return self.__lease_cancelled
    
    104
    +
    
    96 105
         @property
    
    97 106
         def n_tries(self):
    
    98 107
             return self._n_tries
    
    ... ... @@ -131,7 +140,9 @@ class Job:
    131 140
             Only one :class:`Lease` can be emitted for a given job. This method
    
    132 141
             should only be used once, any furhter calls are ignored.
    
    133 142
             """
    
    134
    -        if self._lease is not None:
    
    143
    +        if self.__operation_cancelled:
    
    144
    +            return None
    
    145
    +        elif self._lease is not None:
    
    135 146
                 return None
    
    136 147
     
    
    137 148
             self._lease = bots_pb2.Lease()
    
    ... ... @@ -189,6 +200,15 @@ class Job:
    189 200
                 self.__execute_response.cached_result = False
    
    190 201
                 self.__execute_response.status.CopyFrom(status)
    
    191 202
     
    
    203
    +    def cancel_lease(self):
    
    204
    +        """Triggers a job's :class:Lease cancellation.
    
    205
    +
    
    206
    +        This will not cancel the job's :class:Operation.
    
    207
    +        """
    
    208
    +        self.__lease_cancelled = True
    
    209
    +        if self._lease is not None:
    
    210
    +            self.update_lease_state(LeaseState.CANCELLED)
    
    211
    +
    
    192 212
         def update_operation_stage(self, stage):
    
    193 213
             """Operates a stage transition for the job's :class:Operation.
    
    194 214
     
    
    ... ... @@ -214,3 +234,20 @@ class Job:
    214 234
     
    
    215 235
             for queue in self._operation_update_queues:
    
    216 236
                 queue.put(self._operation)
    
    237
    +
    
    238
    +    def cancel_operation(self):
    
    239
    +        """Triggers a job's :class:Operation cancellation.
    
    240
    +
    
    241
    +        This will also cancel any job's :class:Lease that may have been issued.
    
    242
    +        """
    
    243
    +        self.__operation_cancelled = True
    
    244
    +        if self._lease is not None:
    
    245
    +            self.cancel_lease()
    
    246
    +
    
    247
    +        self.__execute_response = remote_execution_pb2.ExecuteResponse()
    
    248
    +        self.__execute_response.status.code = code_pb2.CANCELLED
    
    249
    +        self.__execute_response.status.message = "Operation cancelled by client."
    
    250
    +
    
    251
    +        self.update_operation_stage(OperationStage.COMPLETED)
    
    252
    +
    
    253
    +        raise CancelledError("Operation cancelled: {}".format(self._name))

  • buildgrid/server/operations/instance.py
    ... ... @@ -64,6 +64,13 @@ class OperationsInstance:
    64 64
             except KeyError:
    
    65 65
                 raise InvalidArgumentError("Operation name does not exist: [{}]".format(name))
    
    66 66
     
    
    67
    +    def cancel_operation(self, name):
    
    68
    +        try:
    
    69
    +            self._scheduler.cancel_job_operation(name)
    
    70
    +
    
    71
    +        except KeyError:
    
    72
    +            raise InvalidArgumentError("Operation name does not exist: [{}]".format(name))
    
    73
    +
    
    67 74
         def register_message_client(self, name, queue):
    
    68 75
             try:
    
    69 76
                 self._scheduler.register_client(name, queue)
    
    ... ... @@ -84,7 +91,3 @@ class OperationsInstance:
    84 91
                 yield operation
    
    85 92
                 operation = message_queue.get()
    
    86 93
             yield operation
    87
    -
    
    88
    -    def cancel_operation(self, name):
    
    89
    -        # TODO: Cancel leases
    
    90
    -        raise NotImplementedError("Cancelled operations not supported")

  • buildgrid/server/operations/service.py
    ... ... @@ -25,7 +25,7 @@ import grpc
    25 25
     
    
    26 26
     from google.protobuf.empty_pb2 import Empty
    
    27 27
     
    
    28
    -from buildgrid._exceptions import InvalidArgumentError
    
    28
    +from buildgrid._exceptions import CancelledError, InvalidArgumentError
    
    29 29
     from buildgrid._protos.google.longrunning import operations_pb2_grpc, operations_pb2
    
    30 30
     
    
    31 31
     
    
    ... ... @@ -112,10 +112,10 @@ class OperationsService(operations_pb2_grpc.OperationsServicer):
    112 112
                 operation_name = self._parse_operation_name(name)
    
    113 113
                 instance.cancel_operation(operation_name)
    
    114 114
     
    
    115
    -        except NotImplementedError as e:
    
    115
    +        except CancelledError as e:
    
    116 116
                 self.logger.error(e)
    
    117 117
                 context.set_details(str(e))
    
    118
    -            context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    118
    +            context.set_code(grpc.StatusCode.CANCELLED)
    
    119 119
     
    
    120 120
             except InvalidArgumentError as e:
    
    121 121
                 self.logger.error(e)
    

  • buildgrid/server/scheduler.py
    ... ... @@ -94,9 +94,12 @@ class Scheduler:
    94 94
             # For now, one lease at a time:
    
    95 95
             lease = job.create_lease()
    
    96 96
     
    
    97
    -        return [lease]
    
    97
    +        if lease:
    
    98
    +            return [lease]
    
    98 99
     
    
    99
    -    def update_job_lease_state(self, job_name, lease_state, lease_status=None, lease_result=None):
    
    100
    +        return None
    
    101
    +
    
    102
    +    def update_job_lease(self, lease):
    
    100 103
             """Requests a state transition for a job's current :class:Lease.
    
    101 104
     
    
    102 105
             Args:
    
    ... ... @@ -107,7 +110,9 @@ class Scheduler:
    107 110
                 lease_result (google.protobuf.Any): the lease execution result, only
    
    108 111
                     required if `lease_state` is `COMPLETED`.
    
    109 112
             """
    
    110
    -        job = self.jobs[job_name]
    
    113
    +
    
    114
    +        job = self.jobs[lease.id]
    
    115
    +        lease_state = LeaseState(lease.state)
    
    111 116
     
    
    112 117
             if lease_state == LeaseState.PENDING:
    
    113 118
                 job.update_lease_state(LeaseState.PENDING)
    
    ... ... @@ -119,7 +124,7 @@ class Scheduler:
    119 124
     
    
    120 125
             elif lease_state == LeaseState.COMPLETED:
    
    121 126
                 job.update_lease_state(LeaseState.COMPLETED,
    
    122
    -                                   status=lease_status, result=lease_result)
    
    127
    +                                   status=lease.status, result=lease.result)
    
    123 128
     
    
    124 129
                 if self._action_cache is not None and not job.do_not_cache:
    
    125 130
                     self._action_cache.update_action_result(job.action_digest, job.action_result)
    
    ... ... @@ -130,6 +135,20 @@ class Scheduler:
    130 135
             """Returns the lease associated to job, if any have been emitted yet."""
    
    131 136
             return self.jobs[job_name].lease
    
    132 137
     
    
    138
    +    def get_job_lease_cancelled(self, job_name):
    
    139
    +        """Returns true if the lease is cancelled"""
    
    140
    +        return self.jobs[job_name].lease_cancelled
    
    141
    +
    
    133 142
         def get_job_operation(self, job_name):
    
    134 143
             """Returns the operation associated to job."""
    
    135 144
             return self.jobs[job_name].operation
    
    145
    +
    
    146
    +    def cancel_job_operation(self, job_name):
    
    147
    +        """"Cancels the underlying operation of a given job.
    
    148
    +
    
    149
    +        This will also cancel any job's lease that may have been issued.
    
    150
    +
    
    151
    +        Args:
    
    152
    +            job_name (str): name of the job holding the operation to cancel.
    
    153
    +        """
    
    154
    +        self.jobs[job_name].cancel_operation()

  • tests/integration/operations_service.py
    ... ... @@ -24,6 +24,7 @@ import grpc
    24 24
     from grpc._server import _Context
    
    25 25
     import pytest
    
    26 26
     
    
    27
    +from buildgrid._enums import OperationStage
    
    27 28
     from buildgrid._exceptions import InvalidArgumentError
    
    28 29
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    29 30
     from buildgrid._protos.google.longrunning import operations_pb2
    
    ... ... @@ -236,12 +237,26 @@ def test_delete_operation_fail(instance, context):
    236 237
         context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
    
    237 238
     
    
    238 239
     
    
    239
    -def test_cancel_operation(instance, context):
    
    240
    +def test_cancel_operation(instance, controller, execute_request, context):
    
    241
    +    response_execute = controller.execution_instance.execute(execute_request.action_digest,
    
    242
    +                                                             execute_request.skip_cache_lookup)
    
    243
    +
    
    240 244
         request = operations_pb2.CancelOperationRequest()
    
    241
    -    request.name = "{}/{}".format(instance_name, "runner")
    
    245
    +    request.name = "{}/{}".format(instance_name, response_execute.name)
    
    246
    +
    
    242 247
         instance.CancelOperation(request, context)
    
    243 248
     
    
    244
    -    context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    249
    +    context.set_code.assert_called_once_with(grpc.StatusCode.CANCELLED)
    
    250
    +
    
    251
    +    request = operations_pb2.ListOperationsRequest(name=instance_name)
    
    252
    +    response = instance.ListOperations(request, context)
    
    253
    +
    
    254
    +    assert len(response.operations) is 1
    
    255
    +
    
    256
    +    for operation in response.operations:
    
    257
    +        operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
    
    258
    +        operation.metadata.Unpack(operation_metadata)
    
    259
    +        assert operation_metadata.stage == OperationStage.COMPLETED.value
    
    245 260
     
    
    246 261
     
    
    247 262
     def test_cancel_operation_blank(blank_instance, context):
    
    ... ... @@ -249,7 +264,7 @@ def test_cancel_operation_blank(blank_instance, context):
    249 264
         request.name = "runner"
    
    250 265
         blank_instance.CancelOperation(request, context)
    
    251 266
     
    
    252
    -    context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    267
    +    context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
    
    253 268
     
    
    254 269
     
    
    255 270
     def test_cancel_operation_instance_fail(instance, context):
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]