Raoul Hidalgo Charman pushed to branch raoul/smarter-bot-calls at BuildGrid / buildgrid
Commits:
-
16fa6d26
by Raoul Hidalgo Charman at 2018-11-30T16:18:59Z
4 changed files:
- buildgrid/bot/bot.py
- buildgrid/bot/interface.py
- buildgrid/bot/session.py
- buildgrid/bot/tenantmanager.py
Changes:
| ... | ... | @@ -35,7 +35,7 @@ class Bot: |
| 35 | 35 |
self.__bot_session.create_bot_session()
|
| 36 | 36 |
|
| 37 | 37 |
try:
|
| 38 |
- task = asyncio.ensure_future(self.__update_bot_session())
|
|
| 38 |
+ task = asyncio.ensure_future(self.__bot_session.run())
|
|
| 39 | 39 |
self.__loop.run_until_complete(task)
|
| 40 | 40 |
|
| 41 | 41 |
except KeyboardInterrupt:
|
| ... | ... | @@ -44,15 +44,6 @@ class Bot: |
| 44 | 44 |
self.__kill_everyone()
|
| 45 | 45 |
self.__logger.info("Bot shutdown.")
|
| 46 | 46 |
|
| 47 |
- async def __update_bot_session(self):
|
|
| 48 |
- """Calls the server periodically to inform the server the client has not died."""
|
|
| 49 |
- try:
|
|
| 50 |
- while True:
|
|
| 51 |
- self.__bot_session.update_bot_session()
|
|
| 52 |
- |
|
| 53 |
- except asyncio.CancelledError:
|
|
| 54 |
- pass
|
|
| 55 |
- |
|
| 56 | 47 |
def __kill_everyone(self):
|
| 57 | 48 |
"""Cancels and waits for them to stop."""
|
| 58 | 49 |
self.__logger.info("Cancelling remaining tasks...")
|
| ... | ... | @@ -43,22 +43,21 @@ class BotInterface: |
| 43 | 43 |
return self.__interval
|
| 44 | 44 |
|
| 45 | 45 |
def create_bot_session(self, parent, bot_session):
|
| 46 |
+ """ Creates a bot session returning a grpc StatusCode if it failed """
|
|
| 46 | 47 |
request = bots_pb2.CreateBotSessionRequest(parent=parent,
|
| 47 | 48 |
bot_session=bot_session)
|
| 48 |
- try:
|
|
| 49 |
- return self._stub.CreateBotSession(request)
|
|
| 50 |
- |
|
| 51 |
- except grpc.RpcError as e:
|
|
| 52 |
- self.__logger.error(e)
|
|
| 53 |
- raise
|
|
| 49 |
+ return self._bot_call(self._stub.CreateBotSession, request)
|
|
| 54 | 50 |
|
| 55 | 51 |
def update_bot_session(self, bot_session, update_mask=None):
|
| 52 |
+ """ Updates a bot session returning a grpc StatusCode if it failed """
|
|
| 56 | 53 |
request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
|
| 57 | 54 |
bot_session=bot_session,
|
| 58 | 55 |
update_mask=update_mask)
|
| 59 |
- try:
|
|
| 60 |
- return self._stub.UpdateBotSession(request, timeout=self.interval)
|
|
| 56 |
+ return self._bot_call(self._stub.UpdateBotSession, request)
|
|
| 61 | 57 |
|
| 58 |
+ def _bot_call(self, call, request):
|
|
| 59 |
+ try:
|
|
| 60 |
+ return call(request, timeout=self.interval)
|
|
| 62 | 61 |
except grpc.RpcError as e:
|
| 63 |
- self.__logger.error(e)
|
|
| 64 |
- raise
|
|
| 62 |
+ self.__logger.error(e.code())
|
|
| 63 |
+ return e.code()
|
| ... | ... | @@ -19,9 +19,12 @@ Bot Session |
| 19 | 19 |
|
| 20 | 20 |
Allows connections
|
| 21 | 21 |
"""
|
| 22 |
+import asyncio
|
|
| 22 | 23 |
import logging
|
| 23 | 24 |
import platform
|
| 24 | 25 |
|
| 26 |
+from grpc import StatusCode
|
|
| 27 |
+ |
|
| 25 | 28 |
from buildgrid._enums import BotStatus, LeaseState
|
| 26 | 29 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
| 27 | 30 |
from buildgrid._protos.google.rpc import code_pb2
|
| ... | ... | @@ -47,6 +50,8 @@ class BotSession: |
| 47 | 50 |
self._status = BotStatus.OK.value
|
| 48 | 51 |
self._tenant_manager = TenantManager()
|
| 49 | 52 |
|
| 53 |
+ self.connected = False
|
|
| 54 |
+ |
|
| 50 | 55 |
self.__parent = parent
|
| 51 | 56 |
self.__bot_id = '{}.{}'.format(parent, platform.node())
|
| 52 | 57 |
self.__name = None
|
| ... | ... | @@ -58,10 +63,33 @@ class BotSession: |
| 58 | 63 |
def bot_id(self):
|
| 59 | 64 |
return self.__bot_id
|
| 60 | 65 |
|
| 66 |
+ async def run(self):
|
|
| 67 |
+ """ Run a bot session
|
|
| 68 |
+ |
|
| 69 |
+ This connects and reconnects via create bot session and waits on update
|
|
| 70 |
+ bot session calls.
|
|
| 71 |
+ """
|
|
| 72 |
+ self.__logger.debug("Starting bot session")
|
|
| 73 |
+ interval = self._bots_interface.interval
|
|
| 74 |
+ while True:
|
|
| 75 |
+ if not self.connected:
|
|
| 76 |
+ self.create_bot_session()
|
|
| 77 |
+ else:
|
|
| 78 |
+ self.update_bot_session()
|
|
| 79 |
+ |
|
| 80 |
+ if not self.connected:
|
|
| 81 |
+ await asyncio.sleep(interval)
|
|
| 82 |
+ else:
|
|
| 83 |
+ await self._tenant_manager.wait_on_tenants(interval)
|
|
| 84 |
+ |
|
| 61 | 85 |
def create_bot_session(self):
|
| 62 | 86 |
self.__logger.debug("Creating bot session")
|
| 63 | 87 |
|
| 64 | 88 |
session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
|
| 89 |
+ if session in list(StatusCode):
|
|
| 90 |
+ self.connected = False
|
|
| 91 |
+ return
|
|
| 92 |
+ self.connected = True
|
|
| 65 | 93 |
self.__name = session.name
|
| 66 | 94 |
|
| 67 | 95 |
self.__logger.info("Created bot session with name: [%s]", self.__name)
|
| ... | ... | @@ -73,6 +101,13 @@ class BotSession: |
| 73 | 101 |
self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
|
| 74 | 102 |
|
| 75 | 103 |
session = self._bots_interface.update_bot_session(self.get_pb2())
|
| 104 |
+ if session == StatusCode.DEADLINE_EXCEEDED:
|
|
| 105 |
+ # try to continue to do update session if it passed the timeout
|
|
| 106 |
+ return
|
|
| 107 |
+ elif session in StatusCode:
|
|
| 108 |
+ self.connected = False
|
|
| 109 |
+ return
|
|
| 110 |
+ self.connected = True
|
|
| 76 | 111 |
server_ids = []
|
| 77 | 112 |
|
| 78 | 113 |
for lease in session.leases:
|
| ... | ... | @@ -150,6 +150,13 @@ class TenantManager: |
| 150 | 150 |
"""
|
| 151 | 151 |
return self._tenants[lease_id].tenant_completed
|
| 152 | 152 |
|
| 153 |
+ async def wait_on_tenants(self, timeout):
|
|
| 154 |
+ if self._tasks:
|
|
| 155 |
+ tasks = self._tasks.values()
|
|
| 156 |
+ await asyncio.wait(tasks,
|
|
| 157 |
+ timeout=timeout,
|
|
| 158 |
+ return_when=asyncio.FIRST_COMPLETED)
|
|
| 159 |
+ |
|
| 153 | 160 |
def _update_lease_result(self, lease_id, result):
|
| 154 | 161 |
"""Updates the lease with the result."""
|
| 155 | 162 |
self._tenants[lease_id].update_lease_result(result)
|
