[Notes] [Git][BuildGrid/buildgrid][raoul/126-bot-reconnects] bot: Add logic to reconnect



Title: GitLab

Raoul Hidalgo Charman pushed to branch raoul/126-bot-reconnects at BuildGrid / buildgrid

Commits:

4 changed files:

Changes:

  • buildgrid/_app/commands/cmd_bot.py
    ... ... @@ -143,8 +143,8 @@ def run_dummy(context):
    143 143
         """
    
    144 144
         try:
    
    145 145
             bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    146
    -                                         dummy.work_dummy, context)
    
    147
    -        b = bot.Bot(bot_session, context.update_period)
    
    146
    +                                         dummy.work_dummy, context, context.update_period)
    
    147
    +        b = bot.Bot(bot_session)
    
    148 148
             b.session()
    
    149 149
         except KeyboardInterrupt:
    
    150 150
             pass
    
    ... ... @@ -159,8 +159,8 @@ def run_host_tools(context):
    159 159
         """
    
    160 160
         try:
    
    161 161
             bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    162
    -                                         host.work_host_tools, context)
    
    163
    -        b = bot.Bot(bot_session, context.update_period)
    
    162
    +                                         host.work_host_tools, context, context.update_period)
    
    163
    +        b = bot.Bot(bot_session)
    
    164 164
             b.session()
    
    165 165
         except KeyboardInterrupt:
    
    166 166
             pass
    
    ... ... @@ -181,8 +181,8 @@ def run_buildbox(context, local_cas, fuse_dir):
    181 181
     
    
    182 182
         try:
    
    183 183
             bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
    
    184
    -                                         buildbox.work_buildbox, context)
    
    185
    -        b = bot.Bot(bot_session, context.update_period)
    
    184
    +                                         buildbox.work_buildbox, context, context.update_period)
    
    185
    +        b = bot.Bot(bot_session)
    
    186 186
             b.session()
    
    187 187
         except KeyboardInterrupt:
    
    188 188
             pass

  • buildgrid/bot/bot.py
    ... ... @@ -20,13 +20,10 @@ import logging
    20 20
     class Bot:
    
    21 21
         """Creates a local BotSession."""
    
    22 22
     
    
    23
    -    def __init__(self, bot_session, update_period=1):
    
    24
    -        """
    
    25
    -        """
    
    23
    +    def __init__(self, bot_session):
    
    26 24
             self.__logger = logging.getLogger(__name__)
    
    27 25
     
    
    28 26
             self.__bot_session = bot_session
    
    29
    -        self.__update_period = update_period
    
    30 27
     
    
    31 28
             self.__loop = None
    
    32 29
     
    
    ... ... @@ -37,7 +34,7 @@ class Bot:
    37 34
             self.__bot_session.create_bot_session()
    
    38 35
     
    
    39 36
             try:
    
    40
    -            task = asyncio.ensure_future(self.__update_bot_session())
    
    37
    +            task = asyncio.ensure_future(self.__bot_session.run())
    
    41 38
                 self.__loop.run_until_complete(task)
    
    42 39
     
    
    43 40
             except KeyboardInterrupt:
    
    ... ... @@ -46,16 +43,6 @@ class Bot:
    46 43
             self.__kill_everyone()
    
    47 44
             self.__logger.info("Bot shutdown.")
    
    48 45
     
    
    49
    -    async def __update_bot_session(self):
    
    50
    -        """Calls the server periodically to inform the server the client has not died."""
    
    51
    -        try:
    
    52
    -            while True:
    
    53
    -                self.__bot_session.update_bot_session()
    
    54
    -                await asyncio.sleep(self.__update_period)
    
    55
    -
    
    56
    -        except asyncio.CancelledError:
    
    57
    -            pass
    
    58
    -
    
    59 46
         def __kill_everyone(self):
    
    60 47
             """Cancels and waits for them to stop."""
    
    61 48
             self.__logger.info("Cancelling remaining tasks...")
    

  • buildgrid/bot/interface.py
    ... ... @@ -37,22 +37,25 @@ class BotInterface:
    37 37
             self._stub = bots_pb2_grpc.BotsStub(channel)
    
    38 38
     
    
    39 39
         def create_bot_session(self, parent, bot_session):
    
    40
    +        """ Create bot session request
    
    41
    +        Returns BotSession if correct else a grpc StatusCode
    
    42
    +        """
    
    40 43
             request = bots_pb2.CreateBotSessionRequest(parent=parent,
    
    41 44
                                                        bot_session=bot_session)
    
    42
    -        try:
    
    43
    -            return self._stub.CreateBotSession(request)
    
    44
    -
    
    45
    -        except grpc.RpcError as e:
    
    46
    -            self.__logger.error(e)
    
    47
    -            raise
    
    45
    +        return self._bot_call(self._stub.CreateBotSession, request)
    
    48 46
     
    
    49 47
         def update_bot_session(self, bot_session, update_mask=None):
    
    48
    +        """ Update bot session request
    
    49
    +        Returns BotSession if correct else a grpc StatusCode
    
    50
    +        """
    
    50 51
             request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
    
    51 52
                                                        bot_session=bot_session,
    
    52 53
                                                        update_mask=update_mask)
    
    53
    -        try:
    
    54
    -            return self._stub.UpdateBotSession(request)
    
    54
    +        return self._bot_call(self._stub.UpdateBotSession, request)
    
    55 55
     
    
    56
    +    def _bot_call(self, call, request):
    
    57
    +        try:
    
    58
    +            return call(request)
    
    56 59
             except grpc.RpcError as e:
    
    57
    -            self.__logger.error(e)
    
    58
    -            raise
    60
    +            self.__logger.error(e.code())
    
    61
    +            return e.code()

  • buildgrid/bot/session.py
    ... ... @@ -19,8 +19,10 @@ Bot Session
    19 19
     
    
    20 20
     Allows connections
    
    21 21
     """
    
    22
    +import asyncio
    
    22 23
     import logging
    
    23 24
     import platform
    
    25
    +import grpc
    
    24 26
     
    
    25 27
     from buildgrid._enums import BotStatus, LeaseState
    
    26 28
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    ... ... @@ -32,7 +34,8 @@ from .tenantmanager import TenantManager
    32 34
     
    
    33 35
     
    
    34 36
     class BotSession:
    
    35
    -    def __init__(self, parent, bots_interface, hardware_interface, work, context=None):
    
    37
    +    def __init__(self, parent, bots_interface, hardware_interface, work,
    
    38
    +                 context=None, update_period=1):
    
    36 39
             """ Unique bot ID within the farm used to identify this bot
    
    37 40
             Needs to be human readable.
    
    38 41
             All prior sessions with bot_id of same ID are invalidated.
    
    ... ... @@ -54,14 +57,34 @@ class BotSession:
    54 57
             self._work = work
    
    55 58
             self._context = context
    
    56 59
     
    
    60
    +        self.__connected = False
    
    61
    +        self.__update_period = update_period
    
    62
    +
    
    57 63
         @property
    
    58 64
         def bot_id(self):
    
    59 65
             return self.__bot_id
    
    60 66
     
    
    67
    +    @property
    
    68
    +    def connected(self):
    
    69
    +        return self.__connected
    
    70
    +
    
    71
    +    async def run(self):
    
    72
    +        while True:
    
    73
    +            if not self.connected:
    
    74
    +                self.create_bot_session()
    
    75
    +            else:
    
    76
    +                self.update_bot_session()
    
    77
    +
    
    78
    +            await asyncio.sleep(self.__update_period)
    
    79
    +
    
    61 80
         def create_bot_session(self):
    
    62 81
             self.__logger.debug("Creating bot session")
    
    63 82
     
    
    64 83
             session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
    
    84
    +        if session in list(grpc.StatusCode):
    
    85
    +            self.__connected = False
    
    86
    +            return
    
    87
    +        self.__connected = True
    
    65 88
             self.__name = session.name
    
    66 89
     
    
    67 90
             self.__logger.info("Created bot session with name: [%s]", self.__name)
    
    ... ... @@ -73,6 +96,10 @@ class BotSession:
    73 96
             self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
    
    74 97
     
    
    75 98
             session = self._bots_interface.update_bot_session(self.get_pb2())
    
    99
    +        if session in list(grpc.StatusCode):
    
    100
    +            self.__connected = False
    
    101
    +            return
    
    102
    +        self.__connected = True
    
    76 103
             server_ids = []
    
    77 104
     
    
    78 105
             for lease in session.leases:
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]