[Notes] [Git][BuildGrid/buildgrid][finn/bot-refactor] Refactored bots, simplified asyncio.



Title: GitLab

finnball pushed to branch finn/bot-refactor at BuildGrid / buildgrid

Commits:

8 changed files:

Changes:

  • app/commands/cmd_bot.py
    ... ... @@ -30,10 +30,12 @@ import os
    30 30
     import random
    
    31 31
     import subprocess
    
    32 32
     import tempfile
    
    33
    +import time
    
    33 34
     
    
    34 35
     from pathlib import Path, PurePath
    
    35 36
     
    
    36
    -from buildgrid.bot import bot
    
    37
    +from buildgrid.bot import bot, bot_interface
    
    38
    +from buildgrid.bot.bot_session import BotSession, Device, Worker
    
    37 39
     from buildgrid._exceptions import BotError
    
    38 40
     
    
    39 41
     from ..cli import pass_context
    
    ... ... @@ -45,18 +47,25 @@ from google.protobuf import any_pb2
    45 47
     @click.group(short_help = 'Create a bot client')
    
    46 48
     @click.option('--continuous', is_flag=True)
    
    47 49
     @click.option('--parent', default='bgd_test')
    
    48
    -@click.option('--number-of-leases', default=1)
    
    49 50
     @click.option('--port', default='50051')
    
    50 51
     @click.option('--host', default='localhost')
    
    51 52
     @pass_context
    
    52
    -def cli(context, host, port, number_of_leases, parent, continuous):
    
    53
    +def cli(context, host, port, parent, continuous):
    
    54
    +    channel = grpc.insecure_channel('{}:{}'.format(host, port))
    
    55
    +    interface = bot_interface.BotInterface(channel)
    
    56
    +
    
    53 57
         context.logger = logging.getLogger(__name__)
    
    54 58
         context.logger.info("Starting on port {}".format(port))
    
    55 59
     
    
    56 60
         context.continuous = continuous
    
    57
    -    context.channel = grpc.insecure_channel('{}:{}'.format(host, port))
    
    58
    -    context.number_of_leases = number_of_leases
    
    59
    -    context.parent = parent
    
    61
    +
    
    62
    +    worker = Worker()
    
    63
    +    worker.add_device(Device())
    
    64
    +
    
    65
    +    bot_session = BotSession(parent, interface)
    
    66
    +    bot_session.add_worker(worker)
    
    67
    +
    
    68
    +    context.bot_session = bot_session
    
    60 69
     
    
    61 70
     @cli.command('dummy', short_help='Create a dummy bot session')
    
    62 71
     @pass_context
    
    ... ... @@ -65,16 +74,11 @@ def dummy(context):
    65 74
         Simple dummy client. Creates a session, accepts leases, does fake work and
    
    66 75
         updates the server.
    
    67 76
         """
    
    68
    -
    
    69
    -    context.logger.info("Creating a bot session")
    
    70
    -
    
    71 77
         try:
    
    72
    -        bot.Bot(work=_work_dummy,
    
    73
    -                context=context,
    
    74
    -                channel=context.channel,
    
    75
    -                parent=context.parent,
    
    76
    -                number_of_leases=context.number_of_leases,
    
    77
    -                continuous=context.continuous)
    
    78
    +        b = bot.Bot(context.bot_session)
    
    79
    +        b.session(_work_dummy,
    
    80
    +                  context,
    
    81
    +                  context.continuous)
    
    78 82
     
    
    79 83
         except KeyboardInterrupt:
    
    80 84
             pass
    
    ... ... @@ -88,7 +92,7 @@ def dummy(context):
    88 92
     @click.option('--port', show_default = True, default=11001)
    
    89 93
     @click.option('--remote', show_default = True, default='localhost')
    
    90 94
     @pass_context
    
    91
    -def _work_buildbox(context, remote, port, server_cert, client_key, client_cert, local_cas, fuse_dir):
    
    95
    +def work_buildbox(context, remote, port, server_cert, client_key, client_cert, local_cas, fuse_dir):
    
    92 96
         """
    
    93 97
         Uses BuildBox to run commands.
    
    94 98
         """
    
    ... ... @@ -104,12 +108,14 @@ def _work_buildbox(context, remote, port, server_cert, client_key, client_cert,
    104 108
         context.fuse_dir = fuse_dir
    
    105 109
     
    
    106 110
         try:
    
    107
    -        bot.Bot(work=_work_buildbox,
    
    108
    -                context=context,
    
    109
    -                channel=context.channel,
    
    110
    -                parent=context.parent,
    
    111
    -                number_of_leases=context.number_of_leases,
    
    112
    -                continuous=context.continuous)
    
    111
    +        b = bot.Bot(work=_work_buildbox,
    
    112
    +                    bot_session=context.bot_session,
    
    113
    +                    channel=context.channel,
    
    114
    +                    parent=context.parent)
    
    115
    +
    
    116
    +        b.session(context.parent,
    
    117
    +                  _work_buildbox,
    
    118
    +                  context)
    
    113 119
     
    
    114 120
         except KeyboardInterrupt:
    
    115 121
             pass
    

  • buildgrid/bot/bot.py
    ... ... @@ -23,160 +23,46 @@ Creates a bot session.
    23 23
     """
    
    24 24
     
    
    25 25
     import asyncio
    
    26
    -import inspect
    
    26
    +import collections
    
    27 27
     import logging
    
    28
    -import platform
    
    29
    -import queue
    
    30 28
     import time
    
    31 29
     
    
    32
    -from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    33
    -
    
    34
    -from . import bot_interface
    
    30
    +from . import bot_interface, bot_session
    
    31
    +from .bot_session import BotStatus, LeaseState
    
    35 32
     from .._exceptions import BotError
    
    36 33
     
    
    37
    -class Bot(object):
    
    34
    +class Bot:
    
    38 35
         """
    
    39 36
         Creates a local BotSession.
    
    40 37
         """
    
    41 38
     
    
    42
    -    def __init__(self, work, context, channel, parent, number_of_leases, continuous=True):
    
    43
    -        if not inspect.iscoroutinefunction(work):
    
    44
    -            raise BotError("work function must be async")
    
    45
    -
    
    46
    -        print(type(context))
    
    39
    +    UPDATE_PERIOD = 1
    
    47 40
     
    
    48
    -        self.interface = bot_interface.BotInterface(channel)
    
    41
    +    def __init__(self, bot_session):
    
    49 42
             self.logger = logging.getLogger(__name__)
    
    50 43
     
    
    51
    -        self._create_session(parent, number_of_leases)
    
    52
    -        self._work_queue = queue.Queue(maxsize = number_of_leases)
    
    53
    -
    
    54
    -        while continuous:
    
    55
    -            ## TODO: Leases independently finish
    
    56
    -            ## Allow leases to queue finished work independently instead
    
    57
    -            ## of waiting for all to finish
    
    58
    -            futures = [self._do_work(work, context, lease) for lease in self._get_work()]
    
    59
    -            if futures:
    
    60
    -                loop = asyncio.new_event_loop()
    
    61
    -                leases_complete, _ = loop.run_until_complete(asyncio.wait(futures))
    
    62
    -                work_complete = [(lease.result().id, lease.result(),) for lease in leases_complete]
    
    63
    -                self._work_complete(work_complete)
    
    64
    -                loop.close()
    
    65
    -            self._update_bot_session()
    
    66
    -            time.sleep(2)
    
    67
    -
    
    68
    -    @property
    
    69
    -    def bot_session(self):
    
    70
    -        ## Read only, shouldn't have to set any of the variables in here
    
    71
    -        return self._bot_session
    
    72
    -
    
    73
    -    def close_session(self):
    
    74
    -        self.logger.warning("Session closing not yet implemented")
    
    75
    -
    
    76
    -    async def _do_work(self, work, context, lease):
    
    77
    -        """ Work is done here, work function should be asynchronous
    
    78
    -        """
    
    79
    -        self.logger.info("Work found: {}".format(lease.id))
    
    80
    -        lease = await work(context=context, lease=lease)
    
    81
    -        lease.state = bots_pb2.LeaseState.Value('COMPLETED')
    
    82
    -        self.logger.info("Work complete: {}".format(lease.id))
    
    83
    -        return lease
    
    84
    -
    
    85
    -    def _update_bot_session(self):
    
    86
    -        """ Should call the server periodically to inform the server the client
    
    87
    -        has not died.
    
    88
    -        """
    
    89
    -        self.logger.debug("Updating bot session")
    
    90
    -        self._bot_session = self.interface.update_bot_session(self._bot_session)
    
    91
    -        leases_update = ([self._update_lease(lease) for lease in self._bot_session.leases])
    
    92
    -        del self._bot_session.leases[:]
    
    93
    -        self._bot_session.leases.extend(leases_update)
    
    94
    -
    
    95
    -    def _get_work(self):
    
    96
    -        while not self._work_queue.empty():
    
    97
    -            yield self._work_queue.get()
    
    98
    -
    
    99
    -    def _work_complete(self, leases_complete):
    
    100
    -        """ Bot updates itself with any completed work.
    
    101
    -        """
    
    102
    -        # Should really improve this...
    
    103
    -        # Maybe add some call back function sentoff work...
    
    104
    -        leases_active = list(filter(self._lease_active, self._bot_session.leases))
    
    105
    -        leases_not_active = [lease for lease in self._bot_session.leases if not self._lease_active(lease)]
    
    106
    -        del self._bot_session.leases[:]
    
    107
    -        for lease in leases_active:
    
    108
    -            for lease_tuple in leases_complete:
    
    109
    -                if lease.id == lease_tuple[0]:
    
    110
    -                    leases_not_active.extend([lease_tuple[1]])
    
    111
    -        self._bot_session.leases.extend(leases_not_active)
    
    112
    -
    
    113
    -    def _update_lease(self, lease):
    
    114
    -        """
    
    115
    -        State machine for any recieved updates to the leases.
    
    116
    -        """
    
    117
    -        if self._lease_pending(lease):
    
    118
    -            lease.state = bots_pb2.LeaseState.Value('ACTIVE')
    
    119
    -            self._work_queue.put(lease)
    
    120
    -            return lease
    
    121
    -
    
    122
    -        else:
    
    123
    -            return lease
    
    124
    -
    
    125
    -    def _create_session(self, parent, number_of_leases):
    
    126
    -        self.logger.debug("Creating bot session")
    
    127
    -        worker = self._create_worker()
    
    128
    -
    
    129
    -        """ Unique bot ID within the farm used to identify this bot
    
    130
    -        Needs to be human readable.
    
    131
    -        All prior sessions with bot_id of same ID are invalidated.
    
    132
    -        If a bot attempts to update an invalid session, it must be rejected and
    
    133
    -        may be put in quarantine.
    
    134
    -        """
    
    135
    -        bot_id = '{}.{}'.format(parent, platform.node())
    
    136
    -
    
    137
    -        leases = [bots_pb2.Lease() for x in range(number_of_leases)]
    
    138
    -
    
    139
    -        bot_session = bots_pb2.BotSession(worker = worker,
    
    140
    -                                          status = bots_pb2.BotStatus.Value('OK'),
    
    141
    -                                          leases = leases,
    
    142
    -                                          bot_id = bot_id)
    
    143
    -        self._bot_session = self.interface.create_bot_session(parent, bot_session)
    
    144
    -        self.logger.info("Name: {}, Id: {}".format(self._bot_session.name,
    
    145
    -                                                      self._bot_session.bot_id))
    
    146
    -
    
    147
    -    def _create_worker(self):
    
    148
    -        devices = self._create_devices()
    
    149
    -
    
    150
    -        # Contains a list of devices and the connections between them.
    
    151
    -        worker = worker_pb2.Worker(devices = devices)
    
    152
    -
    
    153
    -        """ Keys supported:
    
    154
    -        *pool
    
    155
    -        """
    
    156
    -        worker.Property.key = "pool"
    
    157
    -        worker.Property.value = "all"
    
    158
    -
    
    159
    -        return worker
    
    160
    -
    
    161
    -    def _create_devices(self):
    
    162
    -        """ Creates devices available to the worker
    
    163
    -        The first device is know as the Primary Device - the revice which
    
    164
    -        is running a bit and responsible to actually executing commands.
    
    165
    -        All other devices are known as Attatched Devices and must be controlled
    
    166
    -        by the Primary Device.
    
    167
    -        """
    
    44
    +        self._bot_session = bot_session
    
    168 45
     
    
    169
    -        devices = []
    
    46
    +    def session(self, work, context, continuous = False):
    
    47
    +        loop = asyncio.get_event_loop()
    
    170 48
     
    
    171
    -        for i in range(0, 1): # Append one device for now
    
    172
    -            dev = worker_pb2.Device()
    
    49
    +        self._bot_session.create_bot_session(work, context)
    
    173 50
     
    
    174
    -            devices.append(dev)
    
    51
    +        try:
    
    52
    +            task = asyncio.ensure_future(self._update_bot_session())
    
    53
    +            loop.run_forever()
    
    175 54
     
    
    176
    -        return devices
    
    55
    +        except KeyboardInterrupt:
    
    56
    +            pass
    
    177 57
     
    
    178
    -    def _lease_pending(self, lease):
    
    179
    -        return lease.state == bots_pb2.LeaseState.Value('PENDING')
    
    58
    +        finally:
    
    59
    +            task.cancel()
    
    60
    +            loop.close()
    
    180 61
     
    
    181
    -    def _lease_active(self, lease):
    
    182
    -        return lease.state == bots_pb2.LeaseState.Value('ACTIVE')
    62
    +    async def _update_bot_session(self):
    
    63
    +        while True:
    
    64
    +            """ Calls the server periodically to inform the server the client
    
    65
    +            has not died.
    
    66
    +            """
    
    67
    +            self._bot_session.update_bot_session()
    
    68
    +            await asyncio.sleep(self.UPDATE_PERIOD)

  • buildgrid/bot/bot_interface.py
    ... ... @@ -29,7 +29,7 @@ from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, bo
    29 29
     
    
    30 30
     from .._exceptions import BotError
    
    31 31
     
    
    32
    -class BotInterface(object):
    
    32
    +class BotInterface:
    
    33 33
         """ Interface handles calls to the server.
    
    34 34
         """
    
    35 35
     
    
    ... ... @@ -39,22 +39,12 @@ class BotInterface(object):
    39 39
             self._stub = bots_pb2_grpc.BotsStub(channel)
    
    40 40
     
    
    41 41
         def create_bot_session(self, parent, bot_session):
    
    42
    -        try:
    
    43
    -            request = bots_pb2.CreateBotSessionRequest(parent = parent,
    
    44
    -                                                       bot_session = bot_session)
    
    45
    -            return self._stub.CreateBotSession(request)
    
    46
    -
    
    47
    -        except Exception as e:
    
    48
    -            self.logger.error(e)
    
    49
    -            raise BotError(e)
    
    42
    +        request = bots_pb2.CreateBotSessionRequest(parent = parent,
    
    43
    +                                                   bot_session = bot_session)
    
    44
    +        return self._stub.CreateBotSession(request)
    
    50 45
     
    
    51 46
         def update_bot_session(self, bot_session, update_mask = None):
    
    52
    -        try:
    
    53
    -            request = bots_pb2.UpdateBotSessionRequest(name = bot_session.name,
    
    54
    -                                                       bot_session = bot_session,
    
    55
    -                                                       update_mask = update_mask)
    
    56
    -            return self._stub.UpdateBotSession(request)
    
    57
    -
    
    58
    -        except Exception as e:
    
    59
    -            self.logger.error(e)
    
    60
    -            raise BotError(e)
    47
    +        request = bots_pb2.UpdateBotSessionRequest(name = bot_session.name,
    
    48
    +                                                   bot_session = bot_session,
    
    49
    +                                                   update_mask = update_mask)
    
    50
    +        return self._stub.UpdateBotSession(request)

  • buildgrid/bot/bot_session.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +"""
    
    16
    +Bot Session
    
    17
    +====
    
    18
    +
    
    19
    +Allows connections
    
    20
    +"""
    
    21
    +import asyncio
    
    22
    +import logging
    
    23
    +import platform
    
    24
    +import uuid
    
    25
    +
    
    26
    +from enum import Enum
    
    27
    +
    
    28
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    29
    +
    
    30
    +class BotStatus(Enum):
    
    31
    +    BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
    
    32
    +    OK                     = bots_pb2.BotStatus.Value('OK')
    
    33
    +    UNHEALTHY              = bots_pb2.BotStatus.Value('UNHEALTHY');
    
    34
    +    HOST_REBOOTING         = bots_pb2.BotStatus.Value('HOST_REBOOTING')
    
    35
    +    BOT_TERMINATING        = bots_pb2.BotStatus.Value('BOT_TERMINATING')
    
    36
    +
    
    37
    +class LeaseState(Enum):
    
    38
    +    LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
    
    39
    +    PENDING                 = bots_pb2.LeaseState.Value('PENDING')
    
    40
    +    ACTIVE                  = bots_pb2.LeaseState.Value('ACTIVE')
    
    41
    +    COMPLETED               = bots_pb2.LeaseState.Value('COMPLETED')
    
    42
    +    CANCELLED               = bots_pb2.LeaseState.Value('CANCELLED')
    
    43
    +
    
    44
    +
    
    45
    +class BotSession:
    
    46
    +    def __init__(self, parent, interface):
    
    47
    +        """ Unique bot ID within the farm used to identify this bot
    
    48
    +        Needs to be human readable.
    
    49
    +        All prior sessions with bot_id of same ID are invalidated.
    
    50
    +        If a bot attempts to update an invalid session, it must be rejected and
    
    51
    +        may be put in quarantine.
    
    52
    +        """
    
    53
    +
    
    54
    +        self.logger = logging.getLogger(__name__)
    
    55
    +
    
    56
    +        self._bot_id = '{}.{}'.format(parent, platform.node())
    
    57
    +        self._interface = interface
    
    58
    +        self._leases = {}
    
    59
    +        self._name = None
    
    60
    +        self._parent = parent
    
    61
    +        self._status = BotStatus.OK.value
    
    62
    +        self._work = None
    
    63
    +        self._worker = None
    
    64
    +
    
    65
    +    @property
    
    66
    +    def bot_id(self):
    
    67
    +        return self._bot_id
    
    68
    +
    
    69
    +    def add_worker(self, worker):
    
    70
    +        self._worker = worker
    
    71
    +
    
    72
    +    def create_bot_session(self, work, context=None):
    
    73
    +        self.logger.debug("Creating bot session")
    
    74
    +        self._work = work
    
    75
    +        self._context = context
    
    76
    +
    
    77
    +        session = self._interface.create_bot_session(self._parent, self.get_pb2())
    
    78
    +        self._name = session.name
    
    79
    +        self.logger.info("Created bot session with name: {}".format(self._name))
    
    80
    +
    
    81
    +    def update_bot_session(self):
    
    82
    +        session = self._interface.update_bot_session(self.get_pb2())
    
    83
    +        for lease in session.leases:
    
    84
    +            self._update_lease_from_server(lease)
    
    85
    +
    
    86
    +    def get_pb2(self):
    
    87
    +        leases = list(self._leases.values())
    
    88
    +        if not leases:
    
    89
    +            leases = None
    
    90
    +
    
    91
    +        return bots_pb2.BotSession(worker=self._worker.get_pb2(),
    
    92
    +                                   status=self._status,
    
    93
    +                                   leases=leases,
    
    94
    +                                   bot_id=self._bot_id,
    
    95
    +                                   name = self._name)
    
    96
    +
    
    97
    +    def lease_completed(self, lease):
    
    98
    +        lease.state = LeaseState.COMPLETED.value
    
    99
    +        self._leases[lease.id] = lease
    
    100
    +
    
    101
    +    def _update_lease_from_server(self, lease):
    
    102
    +        """
    
    103
    +        State machine for any recieved updates to the leases.
    
    104
    +        """
    
    105
    +        ## TODO: Compare with previous state of lease
    
    106
    +        lease_bot = self._leases.get(lease.id)
    
    107
    +
    
    108
    +        if lease.state == LeaseState.PENDING.value:
    
    109
    +            lease.state = LeaseState.ACTIVE.value
    
    110
    +            asyncio.ensure_future(self.create_work(lease))
    
    111
    +            self._leases[lease.id] = lease
    
    112
    +
    
    113
    +        elif lease.state == LeaseState.COMPLETED.value and \
    
    114
    +           lease_bot.state == LeaseState.COMPLETED.value:
    
    115
    +            del self._leases[lease.id]
    
    116
    +
    
    117
    +    async def create_work(self, lease):
    
    118
    +        self.logger.debug("Work created: {}".format(lease.id))
    
    119
    +        lease = await self._work(self._context, lease)
    
    120
    +        self.logger.debug("Work complete: {}".format(lease.id))
    
    121
    +        self.lease_completed(lease)
    
    122
    +
    
    123
    +class Worker:
    
    124
    +    def __init__(self, properties=None, configs=None):
    
    125
    +        self.properties = {}
    
    126
    +        self._configs = {}
    
    127
    +        self._devices = []
    
    128
    +
    
    129
    +        if properties:
    
    130
    +            for k, v in properties.items():
    
    131
    +                if k == 'pool':
    
    132
    +                    self.properties[k] = v
    
    133
    +                else:
    
    134
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    135
    +
    
    136
    +        if configs:
    
    137
    +            for k, v in configs.items():
    
    138
    +                if k == 'DockerImage':
    
    139
    +                    self.properties[k] = v
    
    140
    +                else:
    
    141
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    142
    +
    
    143
    +    def add_device(self, device):
    
    144
    +        self._devices.append(device)
    
    145
    +
    
    146
    +    def get_pb2(self):
    
    147
    +        devices = [device.get_pb2() for device in self._devices]
    
    148
    +        worker = worker_pb2.Worker(devices=devices)
    
    149
    +        property_message = worker_pb2.Worker.Property()
    
    150
    +        for k, v in self.properties.items():
    
    151
    +            property_message.key = k
    
    152
    +            property_message.value = v
    
    153
    +            worker.properties.extend([property_message])
    
    154
    +
    
    155
    +        config_message = worker_pb2.Worker.Config()
    
    156
    +        for k, v in self.properties.items():
    
    157
    +            property_message.key = k
    
    158
    +            property_message.value = v
    
    159
    +            worker.configs.extend([config_message])
    
    160
    +
    
    161
    +        return worker
    
    162
    +
    
    163
    +class Device:
    
    164
    +    def __init__(self, properties=None):
    
    165
    +        """ Creates devices available to the worker
    
    166
    +        The first device is know as the Primary Device - the revice which
    
    167
    +        is running a bit and responsible to actually executing commands.
    
    168
    +        All other devices are known as Attatched Devices and must be controlled
    
    169
    +        by the Primary Device.
    
    170
    +        """
    
    171
    +
    
    172
    +        self._name = str(uuid.uuid4())
    
    173
    +        self._properties = {}
    
    174
    +
    
    175
    +        if properties:
    
    176
    +            for k, v in properties.items():
    
    177
    +                if k == 'os':
    
    178
    +                    self._properties[k] = v
    
    179
    +
    
    180
    +                elif k == 'docker':
    
    181
    +                    if v not in ('True', 'False'):
    
    182
    +                        raise ValueError('Value not supported: {}'.format(v))
    
    183
    +                    self._properties[k] = v
    
    184
    +
    
    185
    +                else:
    
    186
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    187
    +
    
    188
    +    def get_pb2(self):
    
    189
    +        device = worker_pb2.Device(handle=self._name)
    
    190
    +        property_message = worker_pb2.Device.Property()
    
    191
    +        for k, v in self._properties.items():
    
    192
    +            property_message.key = k
    
    193
    +            property_message.value = v
    
    194
    +            device.properties.extend([property_message])
    
    195
    +        return device

  • buildgrid/server/job.py
    ... ... @@ -57,9 +57,9 @@ class Job():
    57 57
             self.lease = None
    
    58 58
             self.logger = logging.getLogger(__name__)
    
    59 59
             self.name = str(uuid.uuid4())
    
    60
    +        self.n_tries = 0
    
    60 61
             self.result = None
    
    61 62
     
    
    62
    -        self._n_tries = 0
    
    63 63
             self._operation = operations_pb2.Operation(name = self.name)
    
    64 64
             self._operation_update_queues = []
    
    65 65
     
    
    ... ... @@ -97,8 +97,7 @@ class Job():
    97 97
             action_digest = self._pack_any(self.action_digest)
    
    98 98
     
    
    99 99
             lease = bots_pb2.Lease(id = self.name,
    
    100
    -                               payload = action_digest,
    
    101
    -                               state = LeaseState.PENDING.value)
    
    100
    +                               payload = action_digest)
    
    102 101
             self.lease = lease
    
    103 102
             return lease
    
    104 103
     
    

  • buildgrid/server/scheduler.py
    ... ... @@ -50,24 +50,19 @@ class Scheduler():
    50 50
             self.queue.append(job)
    
    51 51
     
    
    52 52
         def retry_job(self, name):
    
    53
    -        job = self.jobs[name]
    
    54
    -
    
    55
    -        if job.n_tries >= self.MAX_N_TRIES:
    
    56
    -            # TODO: Decide what to do with these jobs
    
    57
    -            job.update_execute_stage(ExecuteStage.COMPLETED)
    
    58
    -        else:
    
    59
    -            job.update_execute_stage(ExecuteStage.QUEUED)
    
    60
    -            job.n_tries += 1
    
    61
    -            self.queue.appendleft(job)
    
    53
    +        job = self.jobs.get(name)
    
    62 54
     
    
    63
    -        self.jobs[name] = job
    
    55
    +        if job is not None:
    
    56
    +            if job.n_tries >= self.MAX_N_TRIES:
    
    57
    +                # TODO: Decide what to do with these jobs
    
    58
    +                job.update_execute_stage(ExecuteStage.COMPLETED)
    
    59
    +                # TODO: Mark these jobs as done
    
    60
    +            else:
    
    61
    +                job.update_execute_stage(ExecuteStage.QUEUED)
    
    62
    +                job.n_tries += 1
    
    63
    +                self.queue.appendleft(job)
    
    64 64
     
    
    65
    -    def create_job(self):
    
    66
    -        if len(self.queue) > 0:
    
    67
    -            job = self.queue.popleft()
    
    68
    -            job.update_execute_stage(ExecuteStage.EXECUTING)
    
    69
    -            self.jobs[job.name] = job
    
    70
    -            return job
    
    65
    +            self.jobs[name] = job
    
    71 66
     
    
    72 67
         def job_complete(self, name, result):
    
    73 68
             job = self.jobs[name]
    
    ... ... @@ -81,48 +76,13 @@ class Scheduler():
    81 76
                 response.operations.extend([v.get_operation()])
    
    82 77
             return response
    
    83 78
     
    
    84
    -    def update_lease(self, lease):
    
    85
    -        name = lease.id
    
    79
    +    def update_job_lease_state(self, name, state):
    
    86 80
             job = self.jobs.get(name)
    
    87
    -        state = lease.state
    
    88
    -
    
    89
    -        if state   == LeaseState.LEASE_STATE_UNSPECIFIED.value:
    
    90
    -            create_job = self.create_job()
    
    91
    -            if create_job is None:
    
    92
    -                # No job? Return lease.
    
    93
    -                return lease
    
    94
    -            else:
    
    95
    -                job = create_job
    
    96
    -                job.lease = job.create_lease()
    
    97
    -
    
    98
    -        elif state == LeaseState.PENDING.value:
    
    99
    -            job.lease = lease
    
    100
    -
    
    101
    -        elif state == LeaseState.ACTIVE.value:
    
    102
    -            job.lease = lease
    
    103
    -
    
    104
    -        elif state == LeaseState.COMPLETED.value:
    
    105
    -            self.job_complete(job.name, lease.result)
    
    106
    -
    
    107
    -            create_job = self.create_job()
    
    108
    -            if create_job is None:
    
    109
    -                # Docs say not to use this state though if job has
    
    110
    -                # completed and no more jobs, then use this state to stop
    
    111
    -                # job being processed again
    
    112
    -                job.lease = lease
    
    113
    -                job.lease.state = LeaseState.LEASE_STATE_UNSPECIFIED.value
    
    114
    -            else:
    
    115
    -                job = create_job
    
    116
    -                job.lease = job.create_lease()
    
    117
    -
    
    118
    -        elif state == LeaseState.CANCELLED.value:
    
    119
    -            job.lease = lease
    
    120
    -
    
    121
    -        else:
    
    122
    -            raise Exception("Unknown state: {}".format(state))
    
    123
    -
    
    81
    +        job.lease.state = state
    
    124 82
             self.jobs[name] = job
    
    125
    -        return job.lease
    
    83
    +
    
    84
    +    def get_job_lease_state(self, name):
    
    85
    +        return self.jobs[name].lease.state
    
    126 86
     
    
    127 87
         def cancel_session(self, name):
    
    128 88
             job = self.jobs[name]
    
    ... ... @@ -131,6 +91,15 @@ class Scheduler():
    131 91
                state == LeaseState.ACTIVE.value:
    
    132 92
                 self.retry_job(name)
    
    133 93
     
    
    94
    +    def create_leases(self):
    
    95
    +        while len(self.queue) > 0:
    
    96
    +            job = self.queue.popleft()
    
    97
    +            job.update_execute_stage(ExecuteStage.EXECUTING)
    
    98
    +            job.lease = job.create_lease()
    
    99
    +            job.lease.state = LeaseState.PENDING.value
    
    100
    +            self.jobs[job.name] = job
    
    101
    +            yield job.lease
    
    102
    +
    
    134 103
         def _update_execute_stage(self, job, stage):
    
    135 104
             job.update_execute_stage(stage)
    
    136 105
             return job

  • buildgrid/server/worker/bots_interface.py
    ... ... @@ -35,6 +35,7 @@ class BotsInterface():
    35 35
             self.logger = logging.getLogger(__name__)
    
    36 36
     
    
    37 37
             self._bot_ids = {}
    
    38
    +        self._bot_sessions = {}
    
    38 39
             self._scheduler = scheduler
    
    39 40
     
    
    40 41
         def create_bot_session(self, parent, bot_session):
    
    ... ... @@ -59,6 +60,7 @@ class BotsInterface():
    59 60
             bot_session.name = name
    
    60 61
     
    
    61 62
             self._bot_ids[name] = bot_id
    
    63
    +        self._bot_sessions[name] = bot_session
    
    62 64
             self.logger.info("Created bot session name={} with bot_id={}".format(name, bot_id))
    
    63 65
             return bot_session
    
    64 66
     
    
    ... ... @@ -69,13 +71,61 @@ class BotsInterface():
    69 71
             self.logger.debug("Updating bot session name={}".format(name))
    
    70 72
             self._check_bot_ids(bot_session.bot_id, name)
    
    71 73
     
    
    72
    -        leases = [self._scheduler.update_lease(lease) for lease in bot_session.leases]
    
    74
    +        server_session = self._bot_sessions[name]
    
    75
    +
    
    76
    +        leases = [self.check_states(lease) for lease in bot_session.leases]
    
    73 77
     
    
    74 78
             del bot_session.leases[:]
    
    75 79
             bot_session.leases.extend(leases)
    
    76 80
     
    
    81
    +        for lease in self._scheduler.create_leases():
    
    82
    +            bot_session.leases.extend([lease])
    
    83
    +
    
    84
    +        self._bot_sessions[name] = bot_session
    
    77 85
             return bot_session
    
    78 86
     
    
    87
    +    def check_states(self, lease_client):
    
    88
    +        """ Edge detector for states
    
    89
    +        """
    
    90
    +        ## TODO: Handle cancelled states
    
    91
    +        server_state = LeaseState(self._scheduler.get_job_lease_state(lease_client.id))
    
    92
    +        client_state = LeaseState(lease_client.state)
    
    93
    +
    
    94
    +        if server_state == LeaseState.PENDING:
    
    95
    +
    
    96
    +            if client_state == LeaseState.ACTIVE:
    
    97
    +                self._scheduler.update_job_lease_state(lease_client.id, lease_client.state)
    
    98
    +            elif client_state == LeaseState.COMPLETED:
    
    99
    +                # TODO: Lease was rejected
    
    100
    +                raise NotImplementedError("'Not Accepted' is unsupported")
    
    101
    +            else:
    
    102
    +                raise OutofSyncError("Server lease: {}. Client lease: {}".format(lease_server, lease_client))
    
    103
    +
    
    104
    +        elif server_state == LeaseState.ACTIVE:
    
    105
    +
    
    106
    +            if client_state == LeaseState.ACTIVE:
    
    107
    +                pass
    
    108
    +
    
    109
    +            elif client_state == LeaseState.COMPLETED:
    
    110
    +                self._scheduler.job_complete(lease_client.id, lease_client.result)
    
    111
    +                self._scheduler.update_job_lease_state(lease_client.id, lease_client.state)
    
    112
    +
    
    113
    +            else:
    
    114
    +                raise OutofSyncError("Server lease: {}. Client lease: {}".format(lease_server, lease_client))
    
    115
    +
    
    116
    +        elif server_state == LeaseState.COMPLETED:
    
    117
    +            raise OutofSyncError("Server lease: {}. Client lease: {}".format(lease_server, lease_client))
    
    118
    +
    
    119
    +        elif server_state == LeaseState.CANCELLED:
    
    120
    +            raise NotImplementedError("Cancelled states not supported yet")
    
    121
    +
    
    122
    +        else:
    
    123
    +            # Sould never get here
    
    124
    +            raise OutofSyncError("State now allowed: {}".format(server_state))
    
    125
    +
    
    126
    +        return lease_client
    
    127
    +
    
    128
    +
    
    79 129
         def _check_bot_ids(self, bot_id, name = None):
    
    80 130
             """ Checks the ID and the name of the bot.
    
    81 131
             """
    
    ... ... @@ -103,7 +153,10 @@ class BotsInterface():
    103 153
                 raise InvalidArgumentError("Bot id does not exist: {}".format(name))
    
    104 154
     
    
    105 155
             self.logger.debug("Attempting to close {} with name: {}".format(bot_id, name))
    
    106
    -        self._scheduler.retry_job(name)
    
    156
    +        for lease in self._bot_sessions[name].leases:
    
    157
    +            if lease.state != LeaseState.COMPLETED.value:
    
    158
    +                self._scheduler.retry_job(lease.id)
    
    159
    +
    
    107 160
             self.logger.debug("Closing bot session: {}".format(name))
    
    108 161
             self._bot_ids.pop(name)
    
    109 162
             self.logger.info("Closed bot {} with name: {}".format(bot_id, name))

  • tests/integration/bot_interface.py
    ... ... @@ -22,6 +22,8 @@ import mock
    22 22
     import pytest
    
    23 23
     import uuid
    
    24 24
     
    
    25
    +from google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    26
    +
    
    25 27
     from buildgrid.bot import bot, bot_interface
    
    26 28
     
    
    27 29
     async def _work_dummy(context, lease):
    
    ... ... @@ -39,16 +41,9 @@ def context():
    39 41
     # GRPC context
    
    40 42
     @pytest.fixture
    
    41 43
     def channel():
    
    42
    -    yield mock.MagicMock(spec = grpc.insecure_channel(''))
    
    44
    +    yield mock.MagicMock(spec = grpc.insecure_channel)
    
    43 45
     
    
    44
    -@pytest.fixture
    
    45
    -def instance(channel):
    
    46
    -    yield bot.Bot(work=_work_dummy,
    
    47
    -                  context=ContextMock(),
    
    48
    -                  channel=channel,
    
    49
    -                  parent='rach',
    
    50
    -                  number_of_leases=1,
    
    51
    -                  continuous=False)
    
    52
    -
    
    53
    -def test_create_job(instance):
    
    54
    -    instance.bot_session()
    46
    +@mock.patch.object(bot.bot_interface, 'bots_pb2', autospec = True)
    
    47
    +@mock.patch.object(bot.bot_interface, 'bots_pb2_grpc', autospec = True)
    
    48
    +def test_me(mock_pb2, mock_pb2_grpc, channel, context):
    
    49
    +    pass



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]