[Notes] [Git][BuildGrid/buildgrid][bloomberg/buildgrid-csande/action-cache] 6 commits: Updated command for new BotSession class



Title: GitLab

finnball pushed to branch bloomberg/buildgrid-csande/action-cache at BuildGrid / buildgrid

Commits:

19 changed files:

Changes:

  • app/commands/cmd_bot.py
    ... ... @@ -33,7 +33,8 @@ import tempfile
    33 33
     
    
    34 34
     from pathlib import Path, PurePath
    
    35 35
     
    
    36
    -from buildgrid.bot import bot
    
    36
    +from buildgrid.bot import bot, bot_interface
    
    37
    +from buildgrid.bot.bot_session import BotSession, Device, Worker
    
    37 38
     from buildgrid._exceptions import BotError
    
    38 39
     
    
    39 40
     from ..cli import pass_context
    
    ... ... @@ -44,17 +45,23 @@ from google.protobuf import any_pb2
    44 45
     
    
    45 46
     @click.group(short_help = 'Create a bot client')
    
    46 47
     @click.option('--parent', default='bgd_test')
    
    47
    -@click.option('--number-of-leases', default=1)
    
    48 48
     @click.option('--port', default='50051')
    
    49 49
     @click.option('--host', default='localhost')
    
    50 50
     @pass_context
    
    51
    -def cli(context, host, port, number_of_leases, parent):
    
    51
    +def cli(context, host, port, parent):
    
    52
    +    channel = grpc.insecure_channel('{}:{}'.format(host, port))
    
    53
    +    interface = bot_interface.BotInterface(channel)
    
    54
    +
    
    52 55
         context.logger = logging.getLogger(__name__)
    
    53 56
         context.logger.info("Starting on port {}".format(port))
    
    54 57
     
    
    55
    -    context.channel = grpc.insecure_channel('{}:{}'.format(host, port))
    
    56
    -    context.number_of_leases = number_of_leases
    
    57
    -    context.parent = parent
    
    58
    +    worker = Worker()
    
    59
    +    worker.add_device(Device())
    
    60
    +
    
    61
    +    bot_session = BotSession(parent, interface)
    
    62
    +    bot_session.add_worker(worker)
    
    63
    +
    
    64
    +    context.bot_session = bot_session
    
    58 65
     
    
    59 66
     @cli.command('dummy', short_help='Create a dummy bot session')
    
    60 67
     @pass_context
    
    ... ... @@ -63,15 +70,10 @@ def dummy(context):
    63 70
         Simple dummy client. Creates a session, accepts leases, does fake work and
    
    64 71
         updates the server.
    
    65 72
         """
    
    66
    -
    
    67
    -    context.logger.info("Creating a bot session")
    
    68
    -
    
    69 73
         try:
    
    70
    -        bot.Bot(work=_work_dummy,
    
    71
    -                context=context,
    
    72
    -                channel=context.channel,
    
    73
    -                parent=context.parent,
    
    74
    -                number_of_leases=context.number_of_leases)
    
    74
    +        b = bot.Bot(context.bot_session)
    
    75
    +        b.session(_work_dummy,
    
    76
    +                  context)
    
    75 77
     
    
    76 78
         except KeyboardInterrupt:
    
    77 79
             pass
    
    ... ... @@ -85,7 +87,7 @@ def dummy(context):
    85 87
     @click.option('--port', show_default = True, default=11001)
    
    86 88
     @click.option('--remote', show_default = True, default='localhost')
    
    87 89
     @pass_context
    
    88
    -def _work_buildbox(context, remote, port, server_cert, client_key, client_cert, local_cas, fuse_dir):
    
    90
    +def work_buildbox(context, remote, port, server_cert, client_key, client_cert, local_cas, fuse_dir):
    
    89 91
         """
    
    90 92
         Uses BuildBox to run commands.
    
    91 93
         """
    
    ... ... @@ -101,11 +103,14 @@ def _work_buildbox(context, remote, port, server_cert, client_key, client_cert,
    101 103
         context.fuse_dir = fuse_dir
    
    102 104
     
    
    103 105
         try:
    
    104
    -        bot.Bot(work=_work_buildbox,
    
    105
    -                context=context,
    
    106
    -                channel=context.channel,
    
    107
    -                parent=context.parent,
    
    108
    -                number_of_leases=context.number_of_leases)
    
    106
    +        b = bot.Bot(work=_work_buildbox,
    
    107
    +                    bot_session=context.bot_session,
    
    108
    +                    channel=context.channel,
    
    109
    +                    parent=context.parent)
    
    110
    +
    
    111
    +        b.session(context.parent,
    
    112
    +                  _work_buildbox,
    
    113
    +                  context)
    
    109 114
     
    
    110 115
         except KeyboardInterrupt:
    
    111 116
             pass
    

  • app/commands/cmd_server.py
    ... ... @@ -28,6 +28,7 @@ import click
    28 28
     import logging
    
    29 29
     
    
    30 30
     from buildgrid.server import build_grid_server
    
    31
    +from buildgrid.server.action_cache import ActionCache
    
    31 32
     from buildgrid.server.cas.storage.disk import DiskStorage
    
    32 33
     from buildgrid.server.cas.storage.lru_memory_cache import LRUMemoryCache
    
    33 34
     from buildgrid.server.cas.storage.s3 import S3Storage
    
    ... ... @@ -43,6 +44,11 @@ def cli(context):
    43 44
     
    
    44 45
     @cli.command('start', short_help='Starts server')
    
    45 46
     @click.option('--port', default='50051')
    
    47
    +@click.option('--max-cached-actions', type=int, default=50,
    
    48
    +              help='Maximum number of actions to keep in the ActionCache.')
    
    49
    +@click.option('--allow-update-action-result/--forbid-update-action-result',
    
    50
    +              'allow_uar', default=True,
    
    51
    +              help='Whether or not to allow clients to manually edit the action cache.')
    
    46 52
     @click.option('--cas',
    
    47 53
                   type=click.Choice(('lru', 's3', 'disk', 'with-cache')),
    
    48 54
                   help='CAS storage type to use.')
    
    ... ... @@ -59,15 +65,22 @@ def cli(context):
    59 65
                   type=click.Path(file_okay=False, dir_okay=True, writable=True),
    
    60 66
                   help='For --cas=disk, the folder to store CAS blobs in.')
    
    61 67
     @pass_context
    
    62
    -def start(context, port, cas, **cas_args):
    
    68
    +def start(context, port, max_cached_actions, allow_uar, cas, **cas_args):
    
    63 69
         context.logger.info("Starting on port {}".format(port))
    
    64 70
     
    
    65 71
         loop = asyncio.get_event_loop()
    
    66 72
     
    
    67 73
         cas_storage = _make_cas_storage(context, cas, cas_args)
    
    68 74
         if cas_storage is None:
    
    69
    -        context.logger.info("Running without CAS")
    
    70
    -    server = build_grid_server.BuildGridServer(port, cas_storage=cas_storage)
    
    75
    +        context.logger.info("Running without CAS - action cache will be unavailable")
    
    76
    +        action_cache = None
    
    77
    +    else:
    
    78
    +        action_cache = ActionCache(cas_storage, max_cached_actions)
    
    79
    +
    
    80
    +    server = build_grid_server.BuildGridServer(port,
    
    81
    +                                               cas_storage=cas_storage,
    
    82
    +                                               action_cache=action_cache,
    
    83
    +                                               allow_update_action_result=allow_uar)
    
    71 84
     
    
    72 85
         try:
    
    73 86
             asyncio.ensure_future(server.start())
    

  • buildgrid/bot/bot.py
    ... ... @@ -23,163 +23,45 @@ Creates a bot session.
    23 23
     """
    
    24 24
     
    
    25 25
     import asyncio
    
    26
    -import inspect
    
    26
    +import collections
    
    27 27
     import logging
    
    28
    -import platform
    
    29
    -import queue
    
    30 28
     import time
    
    31 29
     
    
    32
    -from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    33
    -
    
    34
    -from . import bot_interface
    
    30
    +from . import bot_interface, bot_session
    
    31
    +from .bot_session import BotStatus, LeaseState
    
    35 32
     from .._exceptions import BotError
    
    36 33
     
    
    37
    -class Bot(object):
    
    34
    +class Bot:
    
    38 35
         """
    
    39 36
         Creates a local BotSession.
    
    40 37
         """
    
    41 38
     
    
    42
    -    def __init__(self, work, context, channel, parent, number_of_leases):
    
    43
    -        if not inspect.iscoroutinefunction(work):
    
    44
    -            raise BotError("work function must be async")
    
    45
    -
    
    46
    -        self.interface = bot_interface.BotInterface(channel)
    
    39
    +    def __init__(self, bot_session, update_period=1):
    
    47 40
             self.logger = logging.getLogger(__name__)
    
    48 41
     
    
    49
    -        self._create_session(parent, number_of_leases)
    
    50
    -        self._work_queue = queue.Queue(maxsize = number_of_leases)
    
    51
    -
    
    52
    -        try:
    
    53
    -            while True:
    
    54
    -                ## TODO: Leases independently finish
    
    55
    -                ## Allow leases to queue finished work independently instead
    
    56
    -                ## of waiting for all to finish
    
    57
    -                futures = [self._do_work(work, context, lease) for lease in self._get_work()]
    
    58
    -                if futures:
    
    59
    -                    loop = asyncio.new_event_loop()
    
    60
    -                    leases_complete, _ = loop.run_until_complete(asyncio.wait(futures))
    
    61
    -                    work_complete = [(lease.result().id, lease.result(),) for lease in leases_complete]
    
    62
    -                    self._work_complete(work_complete)
    
    63
    -                    loop.close()
    
    64
    -                self._update_bot_session()
    
    65
    -                time.sleep(2)
    
    66
    -        except Exception as e:
    
    67
    -            self.logger.error(e)
    
    68
    -            raise BotError(e)
    
    69
    -
    
    70
    -    @property
    
    71
    -    def bot_session(self):
    
    72
    -        ## Read only, shouldn't have to set any of the variables in here
    
    73
    -        return self._bot_session
    
    74
    -
    
    75
    -    def close_session(self):
    
    76
    -        self.logger.warning("Session closing not yet implemented")
    
    77
    -
    
    78
    -    async def _do_work(self, work, context, lease):
    
    79
    -        """ Work is done here, work function should be asynchronous
    
    80
    -        """
    
    81
    -        self.logger.info("Work found: {}".format(lease.id))
    
    82
    -        lease = await work(context=context, lease=lease)
    
    83
    -        lease.state = bots_pb2.LeaseState.Value('COMPLETED')
    
    84
    -        self.logger.info("Work complete: {}".format(lease.id))
    
    85
    -        return lease
    
    86
    -
    
    87
    -    def _update_bot_session(self):
    
    88
    -        """ Should call the server periodically to inform the server the client
    
    89
    -        has not died.
    
    90
    -        """
    
    91
    -        self.logger.debug("Updating bot session")
    
    92
    -        self._bot_session = self.interface.update_bot_session(self._bot_session)
    
    93
    -        leases_update = ([self._update_lease(lease) for lease in self._bot_session.leases])
    
    94
    -        del self._bot_session.leases[:]
    
    95
    -        self._bot_session.leases.extend(leases_update)
    
    96
    -
    
    97
    -    def _get_work(self):
    
    98
    -        while not self._work_queue.empty():
    
    99
    -            yield self._work_queue.get()
    
    100
    -            self._work_queue.task_done()
    
    101
    -
    
    102
    -    def _work_complete(self, leases_complete):
    
    103
    -        """ Bot updates itself with any completed work.
    
    104
    -        """
    
    105
    -        # Should really improve this...
    
    106
    -        # Maybe add some call back function sentoff work...
    
    107
    -        leases_active = list(filter(self._lease_active, self._bot_session.leases))
    
    108
    -        leases_not_active = [lease for lease in self._bot_session.leases if not self._lease_active(lease)]
    
    109
    -        del self._bot_session.leases[:]
    
    110
    -        for lease in leases_active:
    
    111
    -            for lease_tuple in leases_complete:
    
    112
    -                if lease.id == lease_tuple[0]:
    
    113
    -                    leases_not_active.extend([lease_tuple[1]])
    
    114
    -        self._bot_session.leases.extend(leases_not_active)
    
    115
    -
    
    116
    -    def _update_lease(self, lease):
    
    117
    -        """
    
    118
    -        State machine for any recieved updates to the leases.
    
    119
    -        """
    
    120
    -        if self._lease_pending(lease):
    
    121
    -            lease.state = bots_pb2.LeaseState.Value('ACTIVE')
    
    122
    -            self._work_queue.put(lease)
    
    123
    -            return lease
    
    124
    -
    
    125
    -        else:
    
    126
    -            return lease
    
    127
    -
    
    128
    -    def _create_session(self, parent, number_of_leases):
    
    129
    -        self.logger.debug("Creating bot session")
    
    130
    -        worker = self._create_worker()
    
    42
    +        self._bot_session = bot_session
    
    43
    +        self._update_period = update_period
    
    131 44
     
    
    132
    -        """ Unique bot ID within the farm used to identify this bot
    
    133
    -        Needs to be human readable.
    
    134
    -        All prior sessions with bot_id of same ID are invalidated.
    
    135
    -        If a bot attempts to update an invalid session, it must be rejected and
    
    136
    -        may be put in quarantine.
    
    137
    -        """
    
    138
    -        bot_id = '{}.{}'.format(parent, platform.node())
    
    45
    +    def session(self, work, context):
    
    46
    +        loop = asyncio.get_event_loop()
    
    139 47
     
    
    140
    -        leases = [bots_pb2.Lease() for x in range(number_of_leases)]
    
    48
    +        self._bot_session.create_bot_session(work, context)
    
    141 49
     
    
    142
    -        bot_session = bots_pb2.BotSession(worker = worker,
    
    143
    -                                          status = bots_pb2.BotStatus.Value('OK'),
    
    144
    -                                          leases = leases,
    
    145
    -                                          bot_id = bot_id)
    
    146
    -        self._bot_session = self.interface.create_bot_session(parent, bot_session)
    
    147
    -        self.logger.info("Name: {}, Id: {}".format(self._bot_session.name,
    
    148
    -                                                      self._bot_session.bot_id))
    
    149
    -
    
    150
    -    def _create_worker(self):
    
    151
    -        devices = self._create_devices()
    
    152
    -
    
    153
    -        # Contains a list of devices and the connections between them.
    
    154
    -        worker = worker_pb2.Worker(devices = devices)
    
    155
    -
    
    156
    -        """ Keys supported:
    
    157
    -        *pool
    
    158
    -        """
    
    159
    -        worker.Property.key = "pool"
    
    160
    -        worker.Property.value = "all"
    
    161
    -
    
    162
    -        return worker
    
    163
    -
    
    164
    -    def _create_devices(self):
    
    165
    -        """ Creates devices available to the worker
    
    166
    -        The first device is know as the Primary Device - the revice which
    
    167
    -        is running a bit and responsible to actually executing commands.
    
    168
    -        All other devices are known as Attatched Devices and must be controlled
    
    169
    -        by the Primary Device.
    
    170
    -        """
    
    171
    -
    
    172
    -        devices = []
    
    173
    -
    
    174
    -        for i in range(0, 1): # Append one device for now
    
    175
    -            dev = worker_pb2.Device()
    
    176
    -
    
    177
    -            devices.append(dev)
    
    178
    -
    
    179
    -        return devices
    
    180
    -
    
    181
    -    def _lease_pending(self, lease):
    
    182
    -        return lease.state == bots_pb2.LeaseState.Value('PENDING')
    
    183
    -
    
    184
    -    def _lease_active(self, lease):
    
    185
    -        return lease.state == bots_pb2.LeaseState.Value('ACTIVE')
    50
    +        try:
    
    51
    +            task = asyncio.ensure_future(self._update_bot_session())
    
    52
    +            loop.run_forever()
    
    53
    +
    
    54
    +        except KeyboardInterrupt:
    
    55
    +            pass
    
    56
    +
    
    57
    +        finally:
    
    58
    +            task.cancel()
    
    59
    +            loop.close()
    
    60
    +
    
    61
    +    async def _update_bot_session(self):
    
    62
    +        while True:
    
    63
    +            """ Calls the server periodically to inform the server the client
    
    64
    +            has not died.
    
    65
    +            """
    
    66
    +            self._bot_session.update_bot_session()
    
    67
    +            await asyncio.sleep(self._update_period)

  • buildgrid/bot/bot_interface.py
    ... ... @@ -29,7 +29,7 @@ from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, bo
    29 29
     
    
    30 30
     from .._exceptions import BotError
    
    31 31
     
    
    32
    -class BotInterface(object):
    
    32
    +class BotInterface:
    
    33 33
         """ Interface handles calls to the server.
    
    34 34
         """
    
    35 35
     
    
    ... ... @@ -39,22 +39,12 @@ class BotInterface(object):
    39 39
             self._stub = bots_pb2_grpc.BotsStub(channel)
    
    40 40
     
    
    41 41
         def create_bot_session(self, parent, bot_session):
    
    42
    -        try:
    
    43
    -            request = bots_pb2.CreateBotSessionRequest(parent = parent,
    
    44
    -                                                       bot_session = bot_session)
    
    45
    -            return self._stub.CreateBotSession(request)
    
    46
    -
    
    47
    -        except Exception as e:
    
    48
    -            self.logger.error(e)
    
    49
    -            raise BotError(e)
    
    42
    +        request = bots_pb2.CreateBotSessionRequest(parent = parent,
    
    43
    +                                                   bot_session = bot_session)
    
    44
    +        return self._stub.CreateBotSession(request)
    
    50 45
     
    
    51 46
         def update_bot_session(self, bot_session, update_mask = None):
    
    52
    -        try:
    
    53
    -            request = bots_pb2.UpdateBotSessionRequest(name = bot_session.name,
    
    54
    -                                                       bot_session = bot_session,
    
    55
    -                                                       update_mask = update_mask)
    
    56
    -            return self._stub.UpdateBotSession(request)
    
    57
    -
    
    58
    -        except Exception as e:
    
    59
    -            self.logger.error(e)
    
    60
    -            raise BotError(e)
    47
    +        request = bots_pb2.UpdateBotSessionRequest(name = bot_session.name,
    
    48
    +                                                   bot_session = bot_session,
    
    49
    +                                                   update_mask = update_mask)
    
    50
    +        return self._stub.UpdateBotSession(request)

  • buildgrid/bot/bot_session.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +"""
    
    16
    +Bot Session
    
    17
    +====
    
    18
    +
    
    19
    +Allows connections
    
    20
    +"""
    
    21
    +import asyncio
    
    22
    +import logging
    
    23
    +import platform
    
    24
    +import uuid
    
    25
    +
    
    26
    +from enum import Enum
    
    27
    +
    
    28
    +from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2, worker_pb2
    
    29
    +
    
    30
    +class BotStatus(Enum):
    
    31
    +    BOT_STATUS_UNSPECIFIED = bots_pb2.BotStatus.Value('BOT_STATUS_UNSPECIFIED')
    
    32
    +    OK                     = bots_pb2.BotStatus.Value('OK')
    
    33
    +    UNHEALTHY              = bots_pb2.BotStatus.Value('UNHEALTHY');
    
    34
    +    HOST_REBOOTING         = bots_pb2.BotStatus.Value('HOST_REBOOTING')
    
    35
    +    BOT_TERMINATING        = bots_pb2.BotStatus.Value('BOT_TERMINATING')
    
    36
    +
    
    37
    +class LeaseState(Enum):
    
    38
    +    LEASE_STATE_UNSPECIFIED = bots_pb2.LeaseState.Value('LEASE_STATE_UNSPECIFIED')
    
    39
    +    PENDING                 = bots_pb2.LeaseState.Value('PENDING')
    
    40
    +    ACTIVE                  = bots_pb2.LeaseState.Value('ACTIVE')
    
    41
    +    COMPLETED               = bots_pb2.LeaseState.Value('COMPLETED')
    
    42
    +    CANCELLED               = bots_pb2.LeaseState.Value('CANCELLED')
    
    43
    +
    
    44
    +
    
    45
    +class BotSession:
    
    46
    +    def __init__(self, parent, interface):
    
    47
    +        """ Unique bot ID within the farm used to identify this bot
    
    48
    +        Needs to be human readable.
    
    49
    +        All prior sessions with bot_id of same ID are invalidated.
    
    50
    +        If a bot attempts to update an invalid session, it must be rejected and
    
    51
    +        may be put in quarantine.
    
    52
    +        """
    
    53
    +
    
    54
    +        self.logger = logging.getLogger(__name__)
    
    55
    +
    
    56
    +        self._bot_id = '{}.{}'.format(parent, platform.node())
    
    57
    +        self._interface = interface
    
    58
    +        self._leases = {}
    
    59
    +        self._name = None
    
    60
    +        self._parent = parent
    
    61
    +        self._status = BotStatus.OK.value
    
    62
    +        self._work = None
    
    63
    +        self._worker = None
    
    64
    +
    
    65
    +    @property
    
    66
    +    def bot_id(self):
    
    67
    +        return self._bot_id
    
    68
    +
    
    69
    +    def add_worker(self, worker):
    
    70
    +        self._worker = worker
    
    71
    +
    
    72
    +    def create_bot_session(self, work, context=None):
    
    73
    +        self.logger.debug("Creating bot session")
    
    74
    +        self._work = work
    
    75
    +        self._context = context
    
    76
    +
    
    77
    +        session = self._interface.create_bot_session(self._parent, self.get_pb2())
    
    78
    +        self._name = session.name
    
    79
    +
    
    80
    +        self.logger.info("Created bot session with name: {}".format(self._name))
    
    81
    +
    
    82
    +        for lease in session.leases:
    
    83
    +            self._update_lease_from_server(lease)
    
    84
    +
    
    85
    +    def update_bot_session(self):
    
    86
    +        session = self._interface.update_bot_session(self.get_pb2())
    
    87
    +        for lease in session.leases:
    
    88
    +            self._update_lease_from_server(lease)
    
    89
    +
    
    90
    +        for k, v in self._leases.items():
    
    91
    +            if v.state == LeaseState.COMPLETED.value:
    
    92
    +                del self._leases[k]
    
    93
    +
    
    94
    +    def get_pb2(self):
    
    95
    +        leases = list(self._leases.values())
    
    96
    +        if not leases:
    
    97
    +            leases = None
    
    98
    +
    
    99
    +        return bots_pb2.BotSession(worker=self._worker.get_pb2(),
    
    100
    +                                   status=self._status,
    
    101
    +                                   leases=leases,
    
    102
    +                                   bot_id=self._bot_id,
    
    103
    +                                   name = self._name)
    
    104
    +
    
    105
    +    def lease_completed(self, lease):
    
    106
    +        lease.state = LeaseState.COMPLETED.value
    
    107
    +        self._leases[lease.id] = lease
    
    108
    +
    
    109
    +    def _update_lease_from_server(self, lease):
    
    110
    +        """
    
    111
    +        State machine for any recieved updates to the leases.
    
    112
    +        """
    
    113
    +        ## TODO: Compare with previous state of lease
    
    114
    +        lease_bot = self._leases.get(lease.id)
    
    115
    +        if lease.state == LeaseState.PENDING.value:
    
    116
    +            lease.state = LeaseState.ACTIVE.value
    
    117
    +            asyncio.ensure_future(self.create_work(lease))
    
    118
    +            self._leases[lease.id] = lease
    
    119
    +
    
    120
    +    async def create_work(self, lease):
    
    121
    +        self.logger.debug("Work created: {}".format(lease.id))
    
    122
    +        lease = await self._work(self._context, lease)
    
    123
    +        self.logger.debug("Work complete: {}".format(lease.id))
    
    124
    +        self.lease_completed(lease)
    
    125
    +
    
    126
    +class Worker:
    
    127
    +    def __init__(self, properties=None, configs=None):
    
    128
    +        self.properties = {}
    
    129
    +        self._configs = {}
    
    130
    +        self._devices = []
    
    131
    +
    
    132
    +        if properties:
    
    133
    +            for k, v in properties.items():
    
    134
    +                if k == 'pool':
    
    135
    +                    self.properties[k] = v
    
    136
    +                else:
    
    137
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    138
    +
    
    139
    +        if configs:
    
    140
    +            for k, v in configs.items():
    
    141
    +                if k == 'DockerImage':
    
    142
    +                    self.configs[k] = v
    
    143
    +                else:
    
    144
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    145
    +
    
    146
    +    @property
    
    147
    +    def configs(self):
    
    148
    +        return self._configs
    
    149
    +
    
    150
    +    def add_device(self, device):
    
    151
    +        self._devices.append(device)
    
    152
    +
    
    153
    +    def get_pb2(self):
    
    154
    +        devices = [device.get_pb2() for device in self._devices]
    
    155
    +        worker = worker_pb2.Worker(devices=devices)
    
    156
    +        property_message = worker_pb2.Worker.Property()
    
    157
    +        for k, v in self.properties.items():
    
    158
    +            property_message.key = k
    
    159
    +            property_message.value = v
    
    160
    +            worker.properties.extend([property_message])
    
    161
    +
    
    162
    +        config_message = worker_pb2.Worker.Config()
    
    163
    +        for k, v in self.properties.items():
    
    164
    +            property_message.key = k
    
    165
    +            property_message.value = v
    
    166
    +            worker.configs.extend([config_message])
    
    167
    +
    
    168
    +        return worker
    
    169
    +
    
    170
    +class Device:
    
    171
    +    def __init__(self, properties=None):
    
    172
    +        """ Creates devices available to the worker
    
    173
    +        The first device is know as the Primary Device - the revice which
    
    174
    +        is running a bit and responsible to actually executing commands.
    
    175
    +        All other devices are known as Attatched Devices and must be controlled
    
    176
    +        by the Primary Device.
    
    177
    +        """
    
    178
    +
    
    179
    +        self._name = str(uuid.uuid4())
    
    180
    +        self._properties = {}
    
    181
    +
    
    182
    +        if properties:
    
    183
    +            for k, v in properties.items():
    
    184
    +                if k == 'os':
    
    185
    +                    self._properties[k] = v
    
    186
    +
    
    187
    +                elif k == 'docker':
    
    188
    +                    if v not in ('True', 'False'):
    
    189
    +                        raise ValueError('Value not supported: {}'.format(v))
    
    190
    +                    self._properties[k] = v
    
    191
    +
    
    192
    +                else:
    
    193
    +                    raise KeyError('Key not supported: {}'.format(k))
    
    194
    +
    
    195
    +    @property
    
    196
    +    def name(self):
    
    197
    +        return self._name
    
    198
    +
    
    199
    +    @property
    
    200
    +    def properties(self):
    
    201
    +        return self._properties
    
    202
    +
    
    203
    +
    
    204
    +    def get_pb2(self):
    
    205
    +        device = worker_pb2.Device(handle=self._name)
    
    206
    +        property_message = worker_pb2.Device.Property()
    
    207
    +        for k, v in self._properties.items():
    
    208
    +            property_message.key = k
    
    209
    +            property_message.value = v
    
    210
    +            device.properties.extend([property_message])
    
    211
    +        return device

  • buildgrid/server/action_cache.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +#
    
    15
    +# Authors:
    
    16
    +#        Carter Sande <csande bloomberg net>
    
    17
    +
    
    18
    +"""
    
    19
    +ActionCache
    
    20
    +==================
    
    21
    +
    
    22
    +Implements a simple in-memory action cache.
    
    23
    +"""
    
    24
    +
    
    25
    +import collections
    
    26
    +
    
    27
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2 as re_pb2
    
    28
    +
    
    29
    +class ActionCache:
    
    30
    +
    
    31
    +    def __init__(self, storage, max_cached_actions):
    
    32
    +        self._storage = storage
    
    33
    +        self._max_cached_actions = max_cached_actions
    
    34
    +        self._digest_map = collections.OrderedDict()
    
    35
    +
    
    36
    +    def get_action_result(self, action_digest):
    
    37
    +        """Return the cached ActionResult for the given Action digest, or None
    
    38
    +        if there isn't one.
    
    39
    +        """
    
    40
    +        key = (action_digest.hash, action_digest.size_bytes)
    
    41
    +        if key in self._digest_map:
    
    42
    +            action_result = self._storage.get_message(self._digest_map[key],
    
    43
    +                                                      re_pb2.ActionResult)
    
    44
    +            if action_result is not None:
    
    45
    +                if self._blobs_still_exist(action_result):
    
    46
    +                    self._digest_map.move_to_end(key)
    
    47
    +                    return action_result
    
    48
    +            del self._digest_map[key]
    
    49
    +        return None
    
    50
    +
    
    51
    +    def put_action_result(self, action_digest, action_result):
    
    52
    +        """Add the given ActionResult to the cache for the given Action
    
    53
    +        digest.
    
    54
    +        """
    
    55
    +        if self._max_cached_actions == 0:
    
    56
    +            return
    
    57
    +
    
    58
    +        while len(self._digest_map) >= self._max_cached_actions:
    
    59
    +            self._digest_map.popitem(last=False)
    
    60
    +
    
    61
    +        key = (action_digest.hash, action_digest.size_bytes)
    
    62
    +        action_result_digest = self._storage.put_message(action_result)
    
    63
    +        self._digest_map[key] = action_result_digest
    
    64
    +
    
    65
    +    def _blobs_still_exist(self, action_result):
    
    66
    +        """Return True if all the CAS blobs referenced by the given
    
    67
    +        ActionResult are present in CAS.
    
    68
    +        """
    
    69
    +        blobs_needed = []
    
    70
    +        
    
    71
    +        for output_file in action_result.output_files:
    
    72
    +            blobs_needed.append(output_file.digest)
    
    73
    +        
    
    74
    +        for output_directory in action_result.output_directories:
    
    75
    +            blobs_needed.append(output_directory.tree_digest)
    
    76
    +            tree = self._storage.get_message(output_directory.tree_digest,
    
    77
    +                                             re_pb2.Tree)
    
    78
    +            if tree is None:
    
    79
    +                return False
    
    80
    +            for file_node in tree.root.files:
    
    81
    +                blobs_needed.append(file_node.digest)
    
    82
    +            for child in tree.children:
    
    83
    +                for file_node in child.files:
    
    84
    +                    blobs_needed.append(file_node.digest)
    
    85
    +
    
    86
    +        if action_result.stdout_digest.hash and not action_result.stdout_raw:
    
    87
    +            blobs_needed.append(action_result.stdout_digest)
    
    88
    +        if action_result.stderr_digest.hash and not action_result.stderr_raw:
    
    89
    +            blobs_needed.append(action_result.stderr_digest)
    
    90
    +
    
    91
    +        missing = self._storage.missing_blobs(blobs_needed)
    
    92
    +        return len(missing) == 0

  • buildgrid/server/build_grid_server.py
    ... ... @@ -33,6 +33,7 @@ from buildgrid._protos.google.longrunning import operations_pb2_grpc
    33 33
     
    
    34 34
     from .cas.bytestream_service import ByteStreamService
    
    35 35
     from .cas.content_addressable_storage_service import ContentAddressableStorageService
    
    36
    +from .execution.action_cache_service import ActionCacheService
    
    36 37
     from .execution.execution_service import ExecutionService
    
    37 38
     from .execution.operations_service import OperationsService
    
    38 39
     from .execution.execution_instance import ExecutionInstance
    
    ... ... @@ -42,11 +43,11 @@ from .worker.bots_interface import BotsInterface
    42 43
     
    
    43 44
     class BuildGridServer(object):
    
    44 45
     
    
    45
    -    def __init__(self, port = '50051', max_workers = 10, cas_storage = None):
    
    46
    +    def __init__(self, port = '50051', max_workers = 10, cas_storage = None, action_cache = None, allow_update_action_result = True):
    
    46 47
             port = '[::]:{0}'.format(port)
    
    47
    -        scheduler = Scheduler()
    
    48
    +        scheduler = Scheduler(action_cache)
    
    48 49
             bots_interface = BotsInterface(scheduler)
    
    49
    -        execution_instance = ExecutionInstance(scheduler)
    
    50
    +        execution_instance = ExecutionInstance(scheduler, cas_storage)
    
    50 51
     
    
    51 52
             self._server = grpc.server(futures.ThreadPoolExecutor(max_workers))
    
    52 53
             self._server.add_insecure_port(port)
    
    ... ... @@ -63,6 +64,12 @@ class BuildGridServer(object):
    63 64
                                                                                           self._server)
    
    64 65
                 bytestream_pb2_grpc.add_ByteStreamServicer_to_server(ByteStreamService(cas_storage),
    
    65 66
                                                                      self._server)
    
    67
    +        if action_cache is not None:
    
    68
    +            action_cache_service = ActionCacheService(action_cache,
    
    69
    +                                                      allow_update_action_result)
    
    70
    +            remote_execution_pb2_grpc.add_ActionCacheServicer_to_server(action_cache_service,
    
    71
    +                                                                        self._server)
    
    72
    +
    
    66 73
     
    
    67 74
         async def start(self):
    
    68 75
             self._server.start()
    

  • buildgrid/server/cas/storage/storage_abc.py
    ... ... @@ -24,6 +24,7 @@ The abstract base class for storage providers.
    24 24
     
    
    25 25
     import abc
    
    26 26
     
    
    27
    +from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Digest
    
    27 28
     from buildgrid._protos.google.rpc.status_pb2 import Status
    
    28 29
     from buildgrid._protos.google.rpc import code_pb2
    
    29 30
     
    
    ... ... @@ -96,3 +97,23 @@ class StorageABC(abc.ABC):
    96 97
                     else:
    
    97 98
                         result.append(Status(code=code_pb2.OK))
    
    98 99
             return result
    
    100
    +
    
    101
    +    def put_message(self, message):
    
    102
    +        """Store the given Protobuf message in CAS, returning its digest."""
    
    103
    +        message_blob = message.SerializeToString()
    
    104
    +        digest = Digest(hash=HASH(message_blob).hexdigest(), size_bytes=len(message_blob))
    
    105
    +        session = self.begin_write(digest)
    
    106
    +        session.write(message_blob)
    
    107
    +        self.commit_write(digest, session)
    
    108
    +        return digest
    
    109
    +
    
    110
    +    def get_message(self, digest, message_type):
    
    111
    +        """Retrieve the Protobuf message with the given digest and type from
    
    112
    +        CAS. If the blob is not present, returns None.
    
    113
    +        """
    
    114
    +        message_blob = self.get_blob(digest)
    
    115
    +        if message_blob is None:
    
    116
    +            return None
    
    117
    +        result = message_type.FromString(message_blob.read())
    
    118
    +        message_blob.close()
    
    119
    +        return result

  • buildgrid/server/execution/action_cache_service.py
    ... ... @@ -19,7 +19,7 @@
    19 19
     ActionCacheService
    
    20 20
     ==================
    
    21 21
     
    
    22
    -Action Cache currently not implemented.
    
    22
    +Allows clients to manually query/update the action cache.
    
    23 23
     """
    
    24 24
     
    
    25 25
     import logging
    
    ... ... @@ -29,14 +29,21 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p
    29 29
     
    
    30 30
     class ActionCacheService(remote_execution_pb2_grpc.ActionCacheServicer):
    
    31 31
     
    
    32
    -    def __init__(self, instance):
    
    33
    -        self._instance = instance
    
    32
    +    def __init__(self, action_cache, allow_updates=True):
    
    33
    +        self._action_cache = action_cache
    
    34
    +        self._allow_updates = allow_updates
    
    34 35
             self.logger = logging.getLogger(__name__)
    
    35 36
     
    
    36 37
         def GetActionResult(self, request, context):
    
    37
    -        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    38
    -        return remote_execution_pb2.ActionResult()
    
    38
    +        result = self._action_cache.get_action_result(request.action_digest)
    
    39
    +        if result is None:
    
    40
    +            context.set_code(grpc.StatusCode.NOT_FOUND)
    
    41
    +            return remote_execution_pb2.ActionResult()
    
    42
    +        return result
    
    39 43
     
    
    40 44
         def UpdateActionResult(self, request, context):
    
    41
    -        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    42
    -        return remote_execution_pb2.ActionResult()
    45
    +        if not self._allow_updates:
    
    46
    +            context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    47
    +            return remote_execution_pb2.ActionResult()
    
    48
    +        self._action_cache.put_action_result(request.action_digest, request.action_result)
    
    49
    +        return request.action_result

  • buildgrid/server/execution/execution_instance.py
    ... ... @@ -24,14 +24,17 @@ An instance of the Remote Execution Server.
    24 24
     import uuid
    
    25 25
     import logging
    
    26 26
     
    
    27
    +from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import Action
    
    28
    +
    
    27 29
     from ._exceptions import InvalidArgumentError
    
    28 30
     
    
    29 31
     from ..job import Job, ExecuteStage
    
    30 32
     
    
    31 33
     class ExecutionInstance():
    
    32 34
     
    
    33
    -    def __init__(self, scheduler):
    
    35
    +    def __init__(self, scheduler, storage = None):
    
    34 36
             self.logger = logging.getLogger(__name__)
    
    37
    +        self._storage = storage
    
    35 38
             self._scheduler = scheduler
    
    36 39
     
    
    37 40
         def execute(self, action_digest, skip_cache_lookup, message_queue=None):
    
    ... ... @@ -39,13 +42,17 @@ class ExecutionInstance():
    39 42
             Queues an action and creates an Operation instance to be associated with
    
    40 43
             this action.
    
    41 44
             """
    
    42
    -        job = Job(action_digest, message_queue)
    
    45
    +
    
    46
    +        do_not_cache = False
    
    47
    +        if self._storage is not None:
    
    48
    +            action = self._storage.get_message(action_digest, Action)
    
    49
    +            if action is not None:
    
    50
    +                do_not_cache = action.do_not_cache
    
    51
    +
    
    52
    +        job = Job(action_digest, do_not_cache, message_queue)
    
    43 53
             self.logger.info("Operation name: {}".format(job.name))
    
    44 54
     
    
    45
    -        if not skip_cache_lookup:
    
    46
    -            raise NotImplementedError("ActionCache not implemented")
    
    47
    -        else:
    
    48
    -            self._scheduler.append_job(job)
    
    55
    +        self._scheduler.append_job(job, skip_cache_lookup)
    
    49 56
     
    
    50 57
             return job.get_operation()
    
    51 58
     
    

  • buildgrid/server/job.py
    ... ... @@ -18,8 +18,6 @@
    18 18
     import logging
    
    19 19
     import uuid
    
    20 20
     
    
    21
    -import buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2
    
    22
    -
    
    23 21
     from enum import Enum
    
    24 22
     
    
    25 23
     from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import ExecuteOperationMetadata, ExecuteResponse
    
    ... ... @@ -51,12 +49,14 @@ class LeaseState(Enum):
    51 49
     
    
    52 50
     class Job():
    
    53 51
     
    
    54
    -    def __init__(self, action_digest, message_queue=None):
    
    52
    +    def __init__(self, action_digest, do_not_cache=False, message_queue=None):
    
    55 53
             self.lease = None
    
    56 54
             self.logger = logging.getLogger(__name__)
    
    57 55
             self.result = None
    
    56
    +        self.result_cached = False
    
    58 57
     
    
    59 58
             self._action_digest = action_digest
    
    59
    +        self._do_not_cache = do_not_cache
    
    60 60
             self._execute_stage = ExecuteStage.UNKNOWN
    
    61 61
             self._n_tries = 0
    
    62 62
             self._name = str(uuid.uuid4())
    
    ... ... @@ -70,6 +70,14 @@ class Job():
    70 70
         def name(self):
    
    71 71
             return self._name
    
    72 72
     
    
    73
    +    @property
    
    74
    +    def action_digest(self):
    
    75
    +        return self._action_digest
    
    76
    +
    
    77
    +    @property
    
    78
    +    def do_not_cache(self):
    
    79
    +        return self._do_not_cache
    
    80
    +
    
    73 81
         def check_job_finished(self):
    
    74 82
             if not self._operation_update_queues:
    
    75 83
                 return self._operation.done
    
    ... ... @@ -88,6 +96,7 @@ class Job():
    88 96
                 self._operation.done = True
    
    89 97
                 response = ExecuteResponse()
    
    90 98
                 self.result.Unpack(response.result)
    
    99
    +            response.cached_result = self.result_cached
    
    91 100
                 self._operation.response.CopyFrom(self._pack_any(response))
    
    92 101
     
    
    93 102
             return self._operation
    
    ... ... @@ -95,6 +104,7 @@ class Job():
    95 104
         def get_operation_meta(self):
    
    96 105
             meta = ExecuteOperationMetadata()
    
    97 106
             meta.stage = self._execute_stage.value
    
    107
    +        meta.action_digest.CopyFrom(self._action_digest)
    
    98 108
     
    
    99 109
             return meta
    
    100 110
     
    

  • buildgrid/server/scheduler.py
    ... ... @@ -23,7 +23,9 @@ Schedules jobs.
    23 23
     
    
    24 24
     from collections import deque
    
    25 25
     
    
    26
    +from buildgrid._protos.build.bazel.remote.execution.v2.remote_execution_pb2 import ActionResult
    
    26 27
     from buildgrid._protos.google.longrunning import operations_pb2
    
    28
    +from google.protobuf import any_pb2
    
    27 29
     
    
    28 30
     from .job import ExecuteStage, LeaseState
    
    29 31
     
    
    ... ... @@ -31,7 +33,8 @@ class Scheduler():
    31 33
     
    
    32 34
         MAX_N_TRIES = 5
    
    33 35
     
    
    34
    -    def __init__(self):
    
    36
    +    def __init__(self, action_cache=None):
    
    37
    +        self.action_cache = action_cache
    
    35 38
             self.jobs = {}
    
    36 39
             self.queue = deque()
    
    37 40
     
    
    ... ... @@ -44,36 +47,42 @@ class Scheduler():
    44 47
             if job.check_job_finished():
    
    45 48
                 del self.jobs[name]
    
    46 49
     
    
    47
    -    def append_job(self, job):
    
    48
    -        job.update_execute_stage(ExecuteStage.QUEUED)
    
    50
    +    def append_job(self, job, skip_cache_lookup=False):
    
    49 51
             self.jobs[job.name] = job
    
    52
    +        if self.action_cache is not None and not skip_cache_lookup:
    
    53
    +            cached_result = self.action_cache.get_action_result(job.action_digest)
    
    54
    +            if cached_result is not None:
    
    55
    +                cached_result_any = any_pb2.Any()
    
    56
    +                cached_result_any.Pack(cached_result)
    
    57
    +                job.result = cached_result_any
    
    58
    +                job.result_cached = True
    
    59
    +                job.update_execute_stage(ExecuteStage.COMPLETED)
    
    60
    +                return
    
    50 61
             self.queue.append(job)
    
    62
    +        job.update_execute_stage(ExecuteStage.QUEUED)
    
    51 63
     
    
    52 64
         def retry_job(self, name):
    
    53
    -        job = self.jobs[name]
    
    54
    -
    
    55
    -        if job.n_tries >= self.MAX_N_TRIES:
    
    56
    -            # TODO: Decide what to do with these jobs
    
    57
    -            job.update_execute_stage(ExecuteStage.COMPLETED)
    
    58
    -        else:
    
    59
    -            job.update_execute_stage(ExecuteStage.QUEUED)
    
    60
    -            job.n_tries += 1
    
    61
    -            self.queue.appendleft(job)
    
    65
    +        if job in self.jobs[name]:
    
    66
    +            if job.n_tries >= self.MAX_N_TRIES:
    
    67
    +                # TODO: Decide what to do with these jobs
    
    68
    +                job.update_execute_stage(ExecuteStage.COMPLETED)
    
    69
    +                # TODO: Mark these jobs as done
    
    70
    +            else:
    
    71
    +                job.update_execute_stage(ExecuteStage.QUEUED)
    
    72
    +                job.n_tries += 1
    
    73
    +                self.queue.appendleft(job)
    
    62 74
     
    
    63
    -        self.jobs[name] = job
    
    64
    -
    
    65
    -    def create_job(self):
    
    66
    -        if len(self.queue) > 0:
    
    67
    -            job = self.queue.popleft()
    
    68
    -            job.update_execute_stage(ExecuteStage.EXECUTING)
    
    69
    -            self.jobs[job.name] = job
    
    70
    -            return job
    
    75
    +            self.jobs[name] = job
    
    71 76
     
    
    72 77
         def job_complete(self, name, result):
    
    73 78
             job = self.jobs[name]
    
    74 79
             job.result = result
    
    75 80
             job.update_execute_stage(ExecuteStage.COMPLETED)
    
    76 81
             self.jobs[name] = job
    
    82
    +        if not job.do_not_cache and self.action_cache is not None:
    
    83
    +            action_result = ActionResult()
    
    84
    +            result.Unpack(action_result)
    
    85
    +            self.action_cache.put_action_result(job.action_digest, action_result)
    
    77 86
     
    
    78 87
         def get_operations(self):
    
    79 88
             response = operations_pb2.ListOperationsResponse()
    
    ... ... @@ -81,48 +90,13 @@ class Scheduler():
    81 90
                 response.operations.extend([v.get_operation()])
    
    82 91
             return response
    
    83 92
     
    
    84
    -    def update_lease(self, lease):
    
    85
    -        name = lease.id
    
    93
    +    def update_job_lease_state(self, name, state):
    
    86 94
             job = self.jobs.get(name)
    
    87
    -        state = lease.state
    
    88
    -
    
    89
    -        if state   == LeaseState.LEASE_STATE_UNSPECIFIED.value:
    
    90
    -            create_job = self.create_job()
    
    91
    -            if create_job is None:
    
    92
    -                # No job? Return lease.
    
    93
    -                return lease
    
    94
    -            else:
    
    95
    -                job = create_job
    
    96
    -                job.create_lease()
    
    97
    -
    
    98
    -        elif state == LeaseState.PENDING.value:
    
    99
    -            job.lease = lease
    
    100
    -
    
    101
    -        elif state == LeaseState.ACTIVE.value:
    
    102
    -            job.lease = lease
    
    103
    -
    
    104
    -        elif state == LeaseState.COMPLETED.value:
    
    105
    -            self.job_complete(job.name, lease.result)
    
    106
    -
    
    107
    -            create_job = self.create_job()
    
    108
    -            if create_job is None:
    
    109
    -                # Docs say not to use this state though if job has
    
    110
    -                # completed and no more jobs, then use this state to stop
    
    111
    -                # job being processed again
    
    112
    -                job.lease = lease
    
    113
    -                job.lease.state = LeaseState.LEASE_STATE_UNSPECIFIED.value
    
    114
    -            else:
    
    115
    -                job = create_job
    
    116
    -                job.lease = job.create_lease()
    
    117
    -
    
    118
    -        elif state == LeaseState.CANCELLED.value:
    
    119
    -            job.lease = lease
    
    120
    -
    
    121
    -        else:
    
    122
    -            raise Exception("Unknown state: {}".format(state))
    
    123
    -
    
    95
    +        job.lease.state = state
    
    124 96
             self.jobs[name] = job
    
    125
    -        return job.lease
    
    97
    +
    
    98
    +    def get_job_lease(self, name):
    
    99
    +        return self.jobs[name].lease
    
    126 100
     
    
    127 101
         def cancel_session(self, name):
    
    128 102
             job = self.jobs[name]
    
    ... ... @@ -130,3 +104,12 @@ class Scheduler():
    130 104
             if state == LeaseState.PENDING.value or \
    
    131 105
                state == LeaseState.ACTIVE.value:
    
    132 106
                 self.retry_job(name)
    
    107
    +
    
    108
    +    def create_leases(self):
    
    109
    +        while len(self.queue) > 0:
    
    110
    +            job = self.queue.popleft()
    
    111
    +            job.update_execute_stage(ExecuteStage.EXECUTING)
    
    112
    +            job.lease = job.create_lease()
    
    113
    +            job.lease.state = LeaseState.PENDING.value
    
    114
    +            self.jobs[job.name] = job
    
    115
    +            yield job.lease

  • buildgrid/server/worker/bots_interface.py
    ... ... @@ -35,6 +35,7 @@ class BotsInterface():
    35 35
             self.logger = logging.getLogger(__name__)
    
    36 36
     
    
    37 37
             self._bot_ids = {}
    
    38
    +        self._bot_sessions = {}
    
    38 39
             self._scheduler = scheduler
    
    39 40
     
    
    40 41
         def create_bot_session(self, parent, bot_session):
    
    ... ... @@ -59,7 +60,12 @@ class BotsInterface():
    59 60
             bot_session.name = name
    
    60 61
     
    
    61 62
             self._bot_ids[name] = bot_id
    
    63
    +        self._bot_sessions[name] = bot_session
    
    62 64
             self.logger.info("Created bot session name={} with bot_id={}".format(name, bot_id))
    
    65
    +
    
    66
    +        for lease in self._scheduler.create_leases():
    
    67
    +            bot_session.leases.extend([lease])
    
    68
    +
    
    63 69
             return bot_session
    
    64 70
     
    
    65 71
         def update_bot_session(self, name, bot_session):
    
    ... ... @@ -69,13 +75,66 @@ class BotsInterface():
    69 75
             self.logger.debug("Updating bot session name={}".format(name))
    
    70 76
             self._check_bot_ids(bot_session.bot_id, name)
    
    71 77
     
    
    72
    -        leases = [self._scheduler.update_lease(lease) for lease in bot_session.leases]
    
    78
    +        server_session = self._bot_sessions[name]
    
    79
    +
    
    80
    +        leases = filter(None, [self.check_states(lease) for lease in bot_session.leases])
    
    73 81
     
    
    74 82
             del bot_session.leases[:]
    
    75 83
             bot_session.leases.extend(leases)
    
    76 84
     
    
    85
    +        for lease in self._scheduler.create_leases():
    
    86
    +            bot_session.leases.extend([lease])
    
    87
    +
    
    88
    +        self._bot_sessions[name] = bot_session
    
    77 89
             return bot_session
    
    78 90
     
    
    91
    +    def check_states(self, client_lease):
    
    92
    +        """ Edge detector for states
    
    93
    +        """
    
    94
    +        ## TODO: Handle cancelled states
    
    95
    +        try:
    
    96
    +            server_lease = self._scheduler.get_job_lease(client_lease.id)
    
    97
    +        except KeyError:
    
    98
    +            raise InvalidArgumentError("Lease not found on server: {}".format(client_lease))
    
    99
    +
    
    100
    +        server_state = LeaseState(server_lease.state)
    
    101
    +        client_state = LeaseState(client_lease.state)
    
    102
    +
    
    103
    +        if server_state == LeaseState.PENDING:
    
    104
    +
    
    105
    +            if client_state == LeaseState.ACTIVE:
    
    106
    +                self._scheduler.update_job_lease_state(client_lease.id, client_lease.state)
    
    107
    +            elif client_state == LeaseState.COMPLETED:
    
    108
    +                # TODO: Lease was rejected
    
    109
    +                raise NotImplementedError("'Not Accepted' is unsupported")
    
    110
    +            else:
    
    111
    +                raise OutofSyncError("Server lease: {}. Client lease: {}".format(server_lease, client_lease))
    
    112
    +
    
    113
    +        elif server_state == LeaseState.ACTIVE:
    
    114
    +
    
    115
    +            if client_state == LeaseState.ACTIVE:
    
    116
    +                pass
    
    117
    +
    
    118
    +            elif client_state == LeaseState.COMPLETED:
    
    119
    +                self._scheduler.job_complete(client_lease.id, client_lease.result)
    
    120
    +                self._scheduler.update_job_lease_state(client_lease.id, client_lease.state)
    
    121
    +                return None
    
    122
    +
    
    123
    +            else:
    
    124
    +                raise OutofSyncError("Server lease: {}. Client lease: {}".format(server_lease, client_lease))
    
    125
    +
    
    126
    +        elif server_state == LeaseState.COMPLETED:
    
    127
    +            raise OutofSyncError("Server lease: {}. Client lease: {}".format(server_lease, client_lease))
    
    128
    +
    
    129
    +        elif server_state == LeaseState.CANCELLED:
    
    130
    +            raise NotImplementedError("Cancelled states not supported yet")
    
    131
    +
    
    132
    +        else:
    
    133
    +            # Sould never get here
    
    134
    +            raise OutofSyncError("State now allowed: {}".format(server_state))
    
    135
    +
    
    136
    +        return client_lease
    
    137
    +
    
    79 138
         def _check_bot_ids(self, bot_id, name = None):
    
    80 139
             """ Checks the ID and the name of the bot.
    
    81 140
             """
    
    ... ... @@ -103,7 +162,11 @@ class BotsInterface():
    103 162
                 raise InvalidArgumentError("Bot id does not exist: {}".format(name))
    
    104 163
     
    
    105 164
             self.logger.debug("Attempting to close {} with name: {}".format(bot_id, name))
    
    106
    -        self._scheduler.retry_job(name)
    
    165
    +        for lease in self._bot_sessions[name].leases:
    
    166
    +            if lease.state != LeaseState.COMPLETED.value:
    
    167
    +                # TODO: Be wary here, may need to handle rejected leases in future
    
    168
    +                self._scheduler.retry_job(lease.id)
    
    169
    +
    
    107 170
             self.logger.debug("Closing bot session: {}".format(name))
    
    108 171
             self._bot_ids.pop(name)
    
    109 172
             self.logger.info("Closed bot {} with name: {}".format(bot_id, name))

  • buildgrid/server/worker/bots_service.py
    ... ... @@ -43,7 +43,6 @@ class BotsService(bots_pb2_grpc.BotsServicer):
    43 43
                 self.logger.error(e)
    
    44 44
                 context.set_details(str(e))
    
    45 45
                 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
    
    46
    -            return bots_pb2.BotSession()
    
    47 46
     
    
    48 47
         def UpdateBotSession(self, request, context):
    
    49 48
             try:
    
    ... ... @@ -53,13 +52,16 @@ class BotsService(bots_pb2_grpc.BotsServicer):
    53 52
                 self.logger.error(e)
    
    54 53
                 context.set_details(str(e))
    
    55 54
                 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
    
    56
    -            return bots_pb2.BotSession()
    
    57 55
     
    
    58 56
             except OutofSyncError as e:
    
    59 57
                 self.logger.error(e)
    
    60 58
                 context.set_details(str(e))
    
    61 59
                 context.set_code(grpc.StatusCode.DATA_LOSS)
    
    62
    -            return bots_pb2.BotSession()
    
    60
    +
    
    61
    +        except NotImplementedError as e:
    
    62
    +            self.logger.error(e)
    
    63
    +            context.set_details(str(e))
    
    64
    +            context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    63 65
     
    
    64 66
         def PostBotEventTemp(self, request, context):
    
    65 67
             context.set_code(grpc.StatusCode.UNIMPLEMENTED)

  • tests/action_cache.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +#
    
    15
    +# Authors:
    
    16
    +#        Carter Sande <csande bloomberg net>
    
    17
    +
    
    18
    +from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    19
    +import pytest
    
    20
    +
    
    21
    +from buildgrid.server import action_cache
    
    22
    +from buildgrid.server.cas.storage import lru_memory_cache
    
    23
    +
    
    24
    +@pytest.fixture
    
    25
    +def cas():
    
    26
    +    return lru_memory_cache.LRUMemoryCache(1024 * 1024)
    
    27
    +
    
    28
    +def test_null_action_cache(cas):
    
    29
    +    cache = action_cache.ActionCache(cas, 0)
    
    30
    +
    
    31
    +    action_digest1 = remote_execution_pb2.Digest(hash='alpha', size_bytes=4)
    
    32
    +    dummy_result = remote_execution_pb2.ActionResult()
    
    33
    +
    
    34
    +    cache.put_action_result(action_digest1, dummy_result)
    
    35
    +    assert cache.get_action_result(action_digest1) is None
    
    36
    +
    
    37
    +def test_action_cache_expiry(cas):
    
    38
    +    cache = action_cache.ActionCache(cas, 2)
    
    39
    +
    
    40
    +    action_digest1 = remote_execution_pb2.Digest(hash='alpha', size_bytes=4)
    
    41
    +    action_digest2 = remote_execution_pb2.Digest(hash='bravo', size_bytes=4)
    
    42
    +    action_digest3 = remote_execution_pb2.Digest(hash='charlie', size_bytes=4)
    
    43
    +    dummy_result = remote_execution_pb2.ActionResult()
    
    44
    +
    
    45
    +    cache.put_action_result(action_digest1, dummy_result)
    
    46
    +    cache.put_action_result(action_digest2, dummy_result)
    
    47
    +
    
    48
    +    # Get digest 1 (making 2 the least recently used)
    
    49
    +    assert cache.get_action_result(action_digest1) is not None
    
    50
    +    # Add digest 3 (so 2 gets removed from the cache)
    
    51
    +    cache.put_action_result(action_digest3, dummy_result)
    
    52
    +
    
    53
    +    assert cache.get_action_result(action_digest1) is not None
    
    54
    +    assert cache.get_action_result(action_digest2) is None
    
    55
    +    assert cache.get_action_result(action_digest3) is not None
    
    56
    +
    
    57
    +def test_action_cache_checks_cas(cas):
    
    58
    +    cache = action_cache.ActionCache(cas, 50)
    
    59
    +
    
    60
    +    action_digest1 = remote_execution_pb2.Digest(hash='alpha', size_bytes=4)
    
    61
    +    action_digest2 = remote_execution_pb2.Digest(hash='bravo', size_bytes=4)
    
    62
    +    action_digest3 = remote_execution_pb2.Digest(hash='charlie', size_bytes=4)
    
    63
    +
    
    64
    +    # Create a tree that references digests in CAS
    
    65
    +    sample_digest = cas.put_message(remote_execution_pb2.Command(arguments=["sample"]))
    
    66
    +    tree = remote_execution_pb2.Tree()
    
    67
    +    tree.root.files.add().digest.CopyFrom(sample_digest)
    
    68
    +    tree.children.add().files.add().digest.CopyFrom(sample_digest)
    
    69
    +    tree_digest = cas.put_message(tree)
    
    70
    +
    
    71
    +    # Add an ActionResult that references real digests to the cache
    
    72
    +    action_result1 = remote_execution_pb2.ActionResult()
    
    73
    +    action_result1.output_directories.add().tree_digest.CopyFrom(tree_digest)
    
    74
    +    action_result1.output_files.add().digest.CopyFrom(sample_digest)
    
    75
    +    action_result1.stdout_digest.CopyFrom(sample_digest)
    
    76
    +    action_result1.stderr_digest.CopyFrom(sample_digest)
    
    77
    +    cache.put_action_result(action_digest1, action_result1)
    
    78
    +
    
    79
    +    # Add ActionResults that reference fake digests to the cache
    
    80
    +    action_result2 = remote_execution_pb2.ActionResult()
    
    81
    +    action_result2.output_directories.add().tree_digest.hash = "nonexistent"
    
    82
    +    action_result2.output_directories[0].tree_digest.size_bytes = 8
    
    83
    +    cache.put_action_result(action_digest2, action_result2)
    
    84
    +
    
    85
    +    action_result3 = remote_execution_pb2.ActionResult()
    
    86
    +    action_result3.stdout_digest.hash = "nonexistent"
    
    87
    +    action_result3.stdout_digest.size_bytes = 8
    
    88
    +    cache.put_action_result(action_digest3, action_result3)
    
    89
    +
    
    90
    +    # Verify we can get the first ActionResult but not the others
    
    91
    +    fetched_result1 = cache.get_action_result(action_digest1)
    
    92
    +    assert fetched_result1.output_directories[0].tree_digest.hash == tree_digest.hash
    
    93
    +    assert cache.get_action_result(action_digest2) is None
    
    94
    +    assert cache.get_action_result(action_digest3) is None

  • tests/integration/action_cache_service.py
    ... ... @@ -23,45 +23,47 @@ from unittest import mock
    23 23
     from grpc._server import _Context
    
    24 24
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    25 25
     
    
    26
    -from buildgrid.server import scheduler
    
    27
    -from buildgrid.server.execution import execution_instance, action_cache_service
    
    26
    +from buildgrid.server import action_cache
    
    27
    +from buildgrid.server.cas.storage import lru_memory_cache
    
    28
    +from buildgrid.server.execution import action_cache_service
    
    28 29
     
    
    29 30
     # Can mock this
    
    30 31
     @pytest.fixture
    
    31 32
     def context():
    
    32 33
         yield mock.MagicMock(spec = _Context)
    
    33 34
     
    
    34
    -# Requests to make
    
    35 35
     @pytest.fixture
    
    36
    -def execute_request():
    
    37
    -    action = remote_execution_pb2.Action()
    
    38
    -    action.command_digest.hash = 'zhora'
    
    39
    -
    
    40
    -    yield remote_execution_pb2.ExecuteRequest(instance_name = '',
    
    41
    -                                              action = action,
    
    42
    -                                              skip_cache_lookup = True)
    
    36
    +def cas():
    
    37
    +    yield lru_memory_cache.LRUMemoryCache(1024 * 1024)
    
    43 38
     
    
    44 39
     @pytest.fixture
    
    45
    -def schedule():
    
    46
    -    yield scheduler.Scheduler()
    
    40
    +def cache(cas):
    
    41
    +    yield action_cache.ActionCache(cas, 50)
    
    47 42
     
    
    48
    -@pytest.fixture
    
    49
    -def execution(schedule):
    
    50
    -    yield execution_instance.ExecutionInstance(schedule)
    
    43
    +def test_simple_action_result(cache, context):
    
    44
    +    service = action_cache_service.ActionCacheService(cache)
    
    45
    +    action_digest = remote_execution_pb2.Digest(hash='sample', size_bytes=4)
    
    51 46
     
    
    52
    -# Instance to test
    
    53
    -@pytest.fixture
    
    54
    -def instance(execution):
    
    55
    -    yield action_cache_service.ActionCacheService(execution)
    
    47
    +    # Check that before adding the ActionResult, attempting to fetch it fails
    
    48
    +    request = remote_execution_pb2.GetActionResultRequest(action_digest=action_digest)
    
    49
    +    service.GetActionResult(request, context)
    
    50
    +    context.set_code.assert_called_once_with(grpc.StatusCode.NOT_FOUND)
    
    56 51
     
    
    57
    -def test_get_action_result(instance, context):
    
    58
    -    request = remote_execution_pb2.GetActionResultRequest()
    
    59
    -    instance.GetActionResult(request, context)
    
    52
    +    # Add an ActionResult to the cache
    
    53
    +    action_result = remote_execution_pb2.ActionResult(stdout_raw=b'example output')
    
    54
    +    request = remote_execution_pb2.UpdateActionResultRequest(action_digest=action_digest,
    
    55
    +                                                             action_result=action_result)
    
    56
    +    service.UpdateActionResult(request, context)
    
    60 57
     
    
    61
    -    context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    58
    +    # Check that fetching it now works
    
    59
    +    request = remote_execution_pb2.GetActionResultRequest(action_digest=action_digest)
    
    60
    +    fetched_result = service.GetActionResult(request, context)
    
    61
    +    assert fetched_result.stdout_raw == action_result.stdout_raw
    
    62
    +
    
    63
    +def test_disabled_update_action_result(cache, context):
    
    64
    +    service = action_cache_service.ActionCacheService(cache, False)
    
    62 65
     
    
    63
    -def test_update_action_result(instance, context):
    
    64 66
         request = remote_execution_pb2.UpdateActionResultRequest()
    
    65
    -    instance.UpdateActionResult(request, context)
    
    67
    +    service.UpdateActionResult(request, context)
    
    66 68
     
    
    67 69
         context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)

  • tests/integration/bot_session.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +import grpc
    
    16
    +import pytest
    
    17
    +import uuid
    
    18
    +
    
    19
    +from unittest import mock
    
    20
    +
    
    21
    +from buildgrid.bot import bot_session, bot_interface
    
    22
    +
    
    23
    +@pytest.mark.parametrize("docker_value", ["True", "False"])
    
    24
    +@pytest.mark.parametrize("os_value", ["nexus7", "nexus8"])
    
    25
    +def test_create_device(docker_value, os_value):
    
    26
    +    properties = {'docker' : docker_value, 'os' : os_value}
    
    27
    +    device = bot_session.Device(properties)
    
    28
    +
    
    29
    +    assert uuid.UUID(device.name, version=4)
    
    30
    +    assert properties == device.properties
    
    31
    +
    
    32
    +def test_create_device_key_fail():
    
    33
    +    properties = {'voight' : 'kampff'}
    
    34
    +
    
    35
    +    with pytest.raises(KeyError):
    
    36
    +        device = bot_session.Device(properties)
    
    37
    +
    
    38
    +def test_create_device_value_fail():
    
    39
    +    properties = {'docker' :  True}
    
    40
    +
    
    41
    +    with pytest.raises(ValueError):
    
    42
    +        device = bot_session.Device(properties)
    
    43
    +
    
    44
    +def test_create_worker():
    
    45
    +    properties = {'pool' : 'swim'}
    
    46
    +    configs = {'DockerImage' : 'Windows'}
    
    47
    +    worker = bot_session.Worker(properties, configs)
    
    48
    +
    
    49
    +    assert properties == worker.properties
    
    50
    +    assert configs == worker.configs
    
    51
    +
    
    52
    +    device = bot_session.Device()
    
    53
    +    worker.add_device(device)
    
    54
    +
    
    55
    +    assert worker._devices[0] == device
    
    56
    +
    
    57
    +def test_create_worker_key_fail():
    
    58
    +    properties = {'voight' : 'kampff'}
    
    59
    +    configs = {'voight' : 'kampff'}
    
    60
    +
    
    61
    +    with pytest.raises(KeyError):
    
    62
    +        bot_session.Worker(properties)
    
    63
    +    with pytest.raises(KeyError):
    
    64
    +        bot_session.Worker(configs)

  • tests/integration/bots_service.py
    ... ... @@ -36,6 +36,12 @@ from buildgrid.server.worker import bots_interface, bots_service
    36 36
     def context():
    
    37 37
         yield mock.MagicMock(spec = _Context)
    
    38 38
     
    
    39
    +@pytest.fixture
    
    40
    +def action_job():
    
    41
    +    action_digest = remote_execution_pb2.Digest()
    
    42
    +    j = job.Job(action_digest, None)
    
    43
    +    yield j
    
    44
    +
    
    39 45
     @pytest.fixture
    
    40 46
     def bot_session():
    
    41 47
         bot = bots_pb2.BotSession()
    
    ... ... @@ -101,7 +107,6 @@ def test_update_bot_session_zombie(bot_session, context, instance):
    101 107
     
    
    102 108
         response = instance.UpdateBotSession(request, context)
    
    103 109
     
    
    104
    -    assert isinstance(response, bots_pb2.BotSession)
    
    105 110
         context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
    
    106 111
     
    
    107 112
     def test_update_bot_session_bot_id_fail(bot_session, context, instance):
    
    ... ... @@ -113,62 +118,121 @@ def test_update_bot_session_bot_id_fail(bot_session, context, instance):
    113 118
     
    
    114 119
         context.set_code.assert_called_once_with(grpc.StatusCode.INVALID_ARGUMENT)
    
    115 120
     
    
    116
    -@pytest.mark.parametrize("number_of_leases", [1, 3, 500])
    
    117
    -def test_update_leases(number_of_leases, bot_session, context, instance):
    
    118
    -    leases = [bots_pb2.Lease() for x in range(number_of_leases)]
    
    119
    -    bot_session.leases.extend(leases)
    
    121
    +@pytest.mark.parametrize("number_of_jobs", [0, 1, 3, 500])
    
    122
    +def test_number_of_leases(number_of_jobs, bot_session, context, instance):
    
    120 123
         request = bots_pb2.CreateBotSessionRequest(parent='',
    
    121 124
                                                    bot_session=bot_session)
    
    122
    -    # Simulated the severed binding between client and server
    
    123
    -    bot = copy.deepcopy(instance.CreateBotSession(request, context))
    
    124
    -
    
    125
    -    request = bots_pb2.UpdateBotSessionRequest(name=bot.name,
    
    126
    -                                               bot_session=bot)
    
    125
    +    # Inject work
    
    126
    +    for n in range(0, number_of_jobs):
    
    127
    +        action_digest = remote_execution_pb2.Digest()
    
    128
    +        instance._instance._scheduler.append_job(job.Job(action_digest))
    
    127 129
     
    
    128
    -    response = instance.UpdateBotSession(request, context)
    
    130
    +    response = instance.CreateBotSession(request, context)
    
    129 131
     
    
    132
    +    assert len(response.leases) == number_of_jobs
    
    130 133
         assert isinstance(response, bots_pb2.BotSession)
    
    131
    -    assert len(response.leases) == len(bot.leases)
    
    132
    -    assert bot == response
    
    133 134
     
    
    134 135
     def test_update_leases_with_work(bot_session, context, instance):
    
    135
    -    leases = [bots_pb2.Lease() for x in range(2)]
    
    136
    -    bot_session.leases.extend(leases)
    
    136
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    137
    +                                               bot_session=bot_session)
    
    138
    +    # Inject work
    
    139
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    140
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    141
    +
    
    142
    +    response = instance.CreateBotSession(request, context)
    
    143
    +
    
    144
    +    assert len(response.leases) == 1
    
    145
    +    response_action = remote_execution_pb2.Digest()
    
    146
    +    response.leases[0].payload.Unpack(response_action)
    
    147
    +
    
    148
    +    assert isinstance(response, bots_pb2.BotSession)
    
    149
    +    assert response.leases[0].state == LeaseState.PENDING.value
    
    150
    +    assert uuid.UUID(response.leases[0].id, version=4)
    
    151
    +    assert response_action == action_digest
    
    152
    +
    
    153
    +def test_update_leases_work_complete(bot_session, context, instance):
    
    154
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    155
    +                                               bot_session=bot_session)
    
    156
    +    # Create bot session
    
    157
    +    # Simulated the severed binding between client and server
    
    158
    +    response = copy.deepcopy(instance.CreateBotSession(request, context))
    
    159
    +
    
    160
    +    # Inject work
    
    161
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    162
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    163
    +
    
    164
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    165
    +                                               bot_session=response)
    
    166
    +    response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    167
    +
    
    168
    +    assert response.leases[0].state == LeaseState.PENDING.value
    
    169
    +    response.leases[0].state = LeaseState.ACTIVE.value
    
    170
    +
    
    171
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    172
    +                                               bot_session=response)
    
    173
    +
    
    174
    +    response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    175
    +
    
    176
    +    response.leases[0].state = LeaseState.COMPLETED.value
    
    177
    +
    
    178
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    179
    +                                               bot_session=response)
    
    180
    +    response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    181
    +
    
    182
    +    assert len(response.leases) == 0
    
    183
    +
    
    184
    +def test_work_rejected_by_bot(bot_session, context, instance):
    
    185
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    186
    +                                               bot_session=bot_session)
    
    187
    +    # Inject work
    
    188
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    189
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    190
    +
    
    191
    +    # Simulated the severed binding between client and server
    
    192
    +    response = copy.deepcopy(instance.CreateBotSession(request, context))
    
    193
    +
    
    194
    +    # Reject work
    
    195
    +    assert response.leases[0].state == LeaseState.PENDING.value
    
    196
    +    response.leases[0].state = LeaseState.COMPLETED.value
    
    197
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    198
    +                                               bot_session=response)
    
    199
    +
    
    200
    +    response = instance.UpdateBotSession(request, context)
    
    201
    +
    
    202
    +    context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    137 203
     
    
    138
    -    # Inject some work to be done
    
    139
    -    action = remote_execution_pb2.Action()
    
    140
    -    action.command_digest.hash = 'rick'
    
    141
    -    instance._instance._scheduler.append_job(job.Job(action))
    
    142 204
     
    
    205
    +@pytest.mark.parametrize("state", [ LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
    
    206
    +def test_work_out_of_sync_from_pending(state, bot_session, context, instance):
    
    143 207
         request = bots_pb2.CreateBotSessionRequest(parent='',
    
    144 208
                                                    bot_session=bot_session)
    
    209
    +    # Inject work
    
    210
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    211
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    145 212
         # Simulated the severed binding between client and server
    
    146 213
         bot = copy.deepcopy(instance.CreateBotSession(request, context))
    
    147 214
     
    
    148 215
         request = bots_pb2.UpdateBotSessionRequest(name=bot.name,
    
    149 216
                                                    bot_session=bot)
    
    150 217
     
    
    151
    -    response = instance.UpdateBotSession(request, context)
    
    152
    -    response_action = remote_execution_pb2.Action()
    
    153
    -    _unpack_any(response.leases[0].payload, response_action)
    
    218
    +    response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    154 219
     
    
    155
    -    assert isinstance(response, bots_pb2.BotSession)
    
    156
    -    assert response.leases[0].state == LeaseState.PENDING.value
    
    157
    -    assert response.leases[1].state == LeaseState.LEASE_STATE_UNSPECIFIED.value
    
    158
    -    assert uuid.UUID(response.leases[0].id, version=4)
    
    159
    -    assert response_action == action
    
    220
    +    response.leases[0].state = state.value
    
    160 221
     
    
    161
    -def test_update_leases_work_complete(bot_session, context, instance):
    
    162
    -    leases = [bots_pb2.Lease() for x in range(2)]
    
    163
    -    bot_session.leases.extend(leases)
    
    222
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    223
    +                                               bot_session=response)
    
    224
    +
    
    225
    +    response = instance.UpdateBotSession(request, context)
    
    164 226
     
    
    165
    -    # Inject some work to be done
    
    166
    -    action = remote_execution_pb2.Action()
    
    167
    -    action.command_digest.hash = 'rick'
    
    168
    -    instance._instance._scheduler.append_job(job.Job(action))
    
    227
    +    context.set_code.assert_called_once_with(grpc.StatusCode.DATA_LOSS)
    
    169 228
     
    
    229
    +@pytest.mark.parametrize("state", [ LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
    
    230
    +def test_work_out_of_sync_from_active(state, bot_session, context, instance):
    
    170 231
         request = bots_pb2.CreateBotSessionRequest(parent='',
    
    171 232
                                                    bot_session=bot_session)
    
    233
    +    # Inject work
    
    234
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    235
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    172 236
         # Simulated the severed binding between client and server
    
    173 237
         bot = copy.deepcopy(instance.CreateBotSession(request, context))
    
    174 238
     
    
    ... ... @@ -177,26 +241,75 @@ def test_update_leases_work_complete(bot_session, context, instance):
    177 241
     
    
    178 242
         response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    179 243
     
    
    180
    -    operation_name = response.leases[0].id
    
    244
    +    response = instance.UpdateBotSession(request, context)
    
    181 245
     
    
    182
    -    assert response.leases[0].state == LeaseState.PENDING.value
    
    183
    -    response.leases[0].state = LeaseState.COMPLETED.value
    
    246
    +    context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    247
    +
    
    248
    +@pytest.mark.parametrize("state", [ LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
    
    249
    +def test_work_out_of_sync_from_pending(state, bot_session, context, instance):
    
    250
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    251
    +                                               bot_session=bot_session)
    
    252
    +    # Inject work
    
    253
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    254
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    255
    +    # Simulated the severed binding between client and server
    
    256
    +    response = copy.deepcopy(instance.CreateBotSession(request, context))
    
    257
    +
    
    258
    +    response.leases[0].state = state.value
    
    259
    +
    
    260
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    261
    +                                               bot_session=response)
    
    262
    +
    
    263
    +    response = instance.UpdateBotSession(request, context)
    
    264
    +
    
    265
    +    context.set_code.assert_called_once_with(grpc.StatusCode.DATA_LOSS)
    
    266
    +
    
    267
    +@pytest.mark.parametrize("state", [ LeaseState.LEASE_STATE_UNSPECIFIED, LeaseState.PENDING])
    
    268
    +def test_work_out_of_sync_from_active(state, bot_session, context, instance):
    
    269
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    270
    +                                               bot_session=bot_session)
    
    271
    +    # Inject work
    
    272
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    273
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    274
    +    # Simulated the severed binding between client and server
    
    275
    +    response = copy.deepcopy(instance.CreateBotSession(request, context))
    
    276
    +
    
    277
    +    response.leases[0].state = LeaseState.ACTIVE.value
    
    278
    +
    
    279
    +    request = copy.deepcopy(bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    280
    +                                                             bot_session=response))
    
    281
    +
    
    282
    +    response = instance.UpdateBotSession(request, context)
    
    283
    +
    
    284
    +    response.leases[0].state = state.value
    
    184 285
     
    
    185 286
         request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    186 287
                                                    bot_session=response)
    
    288
    +
    
    289
    +    response = instance.UpdateBotSession(request, context)
    
    290
    +
    
    291
    +    context.set_code.assert_called_once_with(grpc.StatusCode.DATA_LOSS)
    
    292
    +
    
    293
    +def test_work_active_to_active(bot_session, context, instance):
    
    294
    +    request = bots_pb2.CreateBotSessionRequest(parent='',
    
    295
    +                                               bot_session=bot_session)
    
    296
    +    # Inject work
    
    297
    +    action_digest = remote_execution_pb2.Digest(hash = 'gaff')
    
    298
    +    instance._instance._scheduler.append_job(job.Job(action_digest))
    
    187 299
         # Simulated the severed binding between client and server
    
    188
    -    response = copy.deepcopy(instance.UpdateBotSession(request, context))
    
    189
    -    assert isinstance(response, bots_pb2.BotSession)
    
    190
    -    assert instance._instance._scheduler.jobs[operation_name]._execute_stage == ExecuteStage.COMPLETED
    
    300
    +    response = copy.deepcopy(instance.CreateBotSession(request, context))
    
    301
    +
    
    302
    +    response.leases[0].state = LeaseState.ACTIVE.value
    
    303
    +
    
    304
    +    request = bots_pb2.UpdateBotSessionRequest(name=response.name,
    
    305
    +                                               bot_session=response)
    
    306
    +
    
    307
    +    response = instance.UpdateBotSession(request, context)
    
    308
    +
    
    309
    +    assert response.leases[0].state == LeaseState.ACTIVE.value
    
    191 310
     
    
    192 311
     def test_post_bot_event_temp(context, instance):
    
    193 312
         request = bots_pb2.PostBotEventTempRequest()
    
    194 313
         instance.PostBotEventTemp(request, context)
    
    195 314
     
    
    196 315
         context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    197
    -
    
    198
    -def _unpack_any(unpack_from, to):
    
    199
    -    any = any_pb2.Any()
    
    200
    -    any.CopyFrom(unpack_from)
    
    201
    -    any.Unpack(to)
    
    202
    -    return to

  • tests/integration/execution_service.py
    ... ... @@ -26,7 +26,8 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p
    26 26
     from buildgrid._protos.google.longrunning import operations_pb2
    
    27 27
     from google.protobuf import any_pb2
    
    28 28
     
    
    29
    -from buildgrid.server import scheduler, job
    
    29
    +from buildgrid.server import action_cache, scheduler, job
    
    30
    +from buildgrid.server.cas.storage import lru_memory_cache
    
    30 31
     from buildgrid.server.execution import execution_instance, execution_service
    
    31 32
     
    
    32 33
     @pytest.fixture
    
    ... ... @@ -34,13 +35,14 @@ def context():
    34 35
         cxt = mock.MagicMock(spec = _Context)
    
    35 36
         yield cxt
    
    36 37
     
    
    37
    -@pytest.fixture
    
    38
    -def schedule():
    
    39
    -    yield scheduler.Scheduler()
    
    40
    -
    
    41
    -@pytest.fixture
    
    42
    -def execution(schedule):
    
    43
    -    yield execution_instance.ExecutionInstance(schedule)
    
    38
    +@pytest.fixture(params=["action-cache", "no-action-cache"])
    
    39
    +def execution(request):
    
    40
    +    if request.param == "action-cache":
    
    41
    +        storage = lru_memory_cache.LRUMemoryCache(1024 * 1024)
    
    42
    +        cache = action_cache.ActionCache(storage, 50)
    
    43
    +        schedule = scheduler.Scheduler(cache)
    
    44
    +        return execution_instance.ExecutionInstance(schedule, storage)
    
    45
    +    return execution_instance.ExecutionInstance(scheduler.Scheduler())
    
    44 46
     
    
    45 47
     # Instance to test
    
    46 48
     @pytest.fixture
    
    ... ... @@ -56,17 +58,15 @@ def test_execute(skip_cache_lookup, instance, context):
    56 58
                                                       action_digest = action_digest,
    
    57 59
                                                       skip_cache_lookup = skip_cache_lookup)
    
    58 60
         response = instance.Execute(request, context)
    
    59
    -    if skip_cache_lookup is False:
    
    60
    -        [r for r in response]
    
    61
    -        context.set_code.assert_called_once_with(grpc.StatusCode.UNIMPLEMENTED)
    
    62
    -    else:
    
    63
    -        result = next(response)
    
    64
    -        assert isinstance(result, operations_pb2.Operation)
    
    65
    -        metadata = remote_execution_pb2.ExecuteOperationMetadata()
    
    66
    -        result.metadata.Unpack(metadata)
    
    67
    -        assert metadata.stage == job.ExecuteStage.QUEUED.value
    
    68
    -        assert uuid.UUID(result.name, version=4)
    
    69
    -        assert result.done is False
    
    61
    +
    
    62
    +    result = next(response)
    
    63
    +    assert isinstance(result, operations_pb2.Operation)
    
    64
    +    metadata = remote_execution_pb2.ExecuteOperationMetadata()
    
    65
    +    result.metadata.Unpack(metadata)
    
    66
    +    assert metadata.stage == job.ExecuteStage.QUEUED.value
    
    67
    +    assert uuid.UUID(result.name, version=4)
    
    68
    +    assert result.done is False
    
    69
    +
    
    70 70
     """
    
    71 71
     def test_wait_execution(instance, context):
    
    72 72
         # TODO: Figure out why next(response) hangs on the .get()
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]