[Notes] [Git][BuildGrid/buildgrid][raoul/smarter-bot-calls] bot: add reconnects and waiting on job status



Title: GitLab

Raoul Hidalgo Charman pushed to branch raoul/smarter-bot-calls at BuildGrid / buildgrid

Commits:

4 changed files:

Changes:

  • buildgrid/bot/bot.py
    ... ... @@ -35,7 +35,7 @@ class Bot:
    35 35
             self.__bot_session.create_bot_session()
    
    36 36
     
    
    37 37
             try:
    
    38
    -            task = asyncio.ensure_future(self.__update_bot_session())
    
    38
    +            task = asyncio.ensure_future(self.__bot_session.run())
    
    39 39
                 self.__loop.run_until_complete(task)
    
    40 40
     
    
    41 41
             except KeyboardInterrupt:
    
    ... ... @@ -44,15 +44,6 @@ class Bot:
    44 44
             self.__kill_everyone()
    
    45 45
             self.__logger.info("Bot shutdown.")
    
    46 46
     
    
    47
    -    async def __update_bot_session(self):
    
    48
    -        """Calls the server periodically to inform the server the client has not died."""
    
    49
    -        try:
    
    50
    -            while True:
    
    51
    -                self.__bot_session.update_bot_session()
    
    52
    -
    
    53
    -        except asyncio.CancelledError:
    
    54
    -            pass
    
    55
    -
    
    56 47
         def __kill_everyone(self):
    
    57 48
             """Cancels and waits for them to stop."""
    
    58 49
             self.__logger.info("Cancelling remaining tasks...")
    

  • buildgrid/bot/interface.py
    ... ... @@ -43,22 +43,21 @@ class BotInterface:
    43 43
             return self.__interval
    
    44 44
     
    
    45 45
         def create_bot_session(self, parent, bot_session):
    
    46
    +        """ Creates a bot session returning a grpc StatusCode if it failed """
    
    46 47
             request = bots_pb2.CreateBotSessionRequest(parent=parent,
    
    47 48
                                                        bot_session=bot_session)
    
    48
    -        try:
    
    49
    -            return self._stub.CreateBotSession(request)
    
    50
    -
    
    51
    -        except grpc.RpcError as e:
    
    52
    -            self.__logger.error(e)
    
    53
    -            raise
    
    49
    +        return self._bot_call(self._stub.CreateBotSession, request)
    
    54 50
     
    
    55 51
         def update_bot_session(self, bot_session, update_mask=None):
    
    52
    +        """ Updates a bot session returning a grpc StatusCode if it failed """
    
    56 53
             request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
    
    57 54
                                                        bot_session=bot_session,
    
    58 55
                                                        update_mask=update_mask)
    
    59
    -        try:
    
    60
    -            return self._stub.UpdateBotSession(request, timeout=self.interval)
    
    56
    +        return self._bot_call(self._stub.UpdateBotSession, request)
    
    61 57
     
    
    58
    +    def _bot_call(self, call, request):
    
    59
    +        try:
    
    60
    +            return call(request, timeout=self.interval)
    
    62 61
             except grpc.RpcError as e:
    
    63
    -            self.__logger.error(e)
    
    64
    -            raise
    62
    +            self.__logger.error(e.code())
    
    63
    +            return e.code()

  • buildgrid/bot/session.py
    ... ... @@ -19,9 +19,12 @@ Bot Session
    19 19
     
    
    20 20
     Allows connections
    
    21 21
     """
    
    22
    +import asyncio
    
    22 23
     import logging
    
    23 24
     import platform
    
    24 25
     
    
    26
    +from grpc import StatusCode
    
    27
    +
    
    25 28
     from buildgrid._enums import BotStatus, LeaseState
    
    26 29
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    27 30
     from buildgrid._protos.google.rpc import code_pb2
    
    ... ... @@ -47,6 +50,8 @@ class BotSession:
    47 50
             self._status = BotStatus.OK.value
    
    48 51
             self._tenant_manager = TenantManager()
    
    49 52
     
    
    53
    +        self.connected = False
    
    54
    +
    
    50 55
             self.__parent = parent
    
    51 56
             self.__bot_id = '{}.{}'.format(parent, platform.node())
    
    52 57
             self.__name = None
    
    ... ... @@ -58,10 +63,33 @@ class BotSession:
    58 63
         def bot_id(self):
    
    59 64
             return self.__bot_id
    
    60 65
     
    
    66
    +    async def run(self):
    
    67
    +        """ Run a bot session
    
    68
    +
    
    69
    +        This connects and reconnects via create bot session and waits on update
    
    70
    +        bot session calls.
    
    71
    +        """
    
    72
    +        self.__logger.debug("Starting bot session")
    
    73
    +        interval = self._bots_interface.interval
    
    74
    +        while True:
    
    75
    +            if not self.connected:
    
    76
    +                self.create_bot_session()
    
    77
    +            else:
    
    78
    +                self.update_bot_session()
    
    79
    +
    
    80
    +            if not self.connected:
    
    81
    +                await asyncio.sleep(interval)
    
    82
    +            else:
    
    83
    +                await self._tenant_manager.wait_on_tenants(interval)
    
    84
    +
    
    61 85
         def create_bot_session(self):
    
    62 86
             self.__logger.debug("Creating bot session")
    
    63 87
     
    
    64 88
             session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
    
    89
    +        if session in list(StatusCode):
    
    90
    +            self.connected = False
    
    91
    +            return
    
    92
    +        self.connected = True
    
    65 93
             self.__name = session.name
    
    66 94
     
    
    67 95
             self.__logger.info("Created bot session with name: [%s]", self.__name)
    
    ... ... @@ -73,6 +101,13 @@ class BotSession:
    73 101
             self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
    
    74 102
     
    
    75 103
             session = self._bots_interface.update_bot_session(self.get_pb2())
    
    104
    +        if session == StatusCode.DEADLINE_EXCEEDED:
    
    105
    +            # try to continue to do update session if it passed the timeout
    
    106
    +            return
    
    107
    +        elif session in StatusCode:
    
    108
    +            self.connected = False
    
    109
    +            return
    
    110
    +        self.connected = True
    
    76 111
             server_ids = []
    
    77 112
     
    
    78 113
             for lease in session.leases:
    

  • buildgrid/bot/tenantmanager.py
    ... ... @@ -150,6 +150,13 @@ class TenantManager:
    150 150
             """
    
    151 151
             return self._tenants[lease_id].tenant_completed
    
    152 152
     
    
    153
    +    async def wait_on_tenants(self, timeout):
    
    154
    +        if self._tasks:
    
    155
    +            tasks = self._tasks.values()
    
    156
    +            await asyncio.wait(tasks,
    
    157
    +                               timeout=timeout,
    
    158
    +                               return_when=asyncio.FIRST_COMPLETED)
    
    159
    +
    
    153 160
         def _update_lease_result(self, lease_id, result):
    
    154 161
             """Updates the lease with the result."""
    
    155 162
             self._tenants[lease_id].update_lease_result(result)
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]