Raoul Hidalgo Charman pushed to branch raoul/126-bot-reconnects at BuildGrid / buildgrid
Commits:
-
8b2f48d8
by Raoul Hidalgo Charman at 2018-12-06T18:20:52Z
-
b1d07576
by Raoul Hidalgo Charman at 2018-12-06T18:20:56Z
4 changed files:
- buildgrid/_app/commands/cmd_bot.py
- buildgrid/bot/bot.py
- buildgrid/bot/interface.py
- buildgrid/bot/session.py
Changes:
| ... | ... | @@ -141,13 +141,10 @@ def run_dummy(context): |
| 141 | 141 |
"""
|
| 142 | 142 |
Creates a session, accepts leases, does fake work and updates the server.
|
| 143 | 143 |
"""
|
| 144 |
- try:
|
|
| 145 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 146 |
- dummy.work_dummy, context)
|
|
| 147 |
- b = bot.Bot(bot_session, context.update_period)
|
|
| 148 |
- b.session()
|
|
| 149 |
- except KeyboardInterrupt:
|
|
| 150 |
- pass
|
|
| 144 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 145 |
+ dummy.work_dummy, context, context.update_period)
|
|
| 146 |
+ b = bot.Bot(bot_session)
|
|
| 147 |
+ b.session()
|
|
| 151 | 148 |
|
| 152 | 149 |
|
| 153 | 150 |
@cli.command('host-tools', short_help="Runs commands using the host's tools.")
|
| ... | ... | @@ -157,13 +154,10 @@ def run_host_tools(context): |
| 157 | 154 |
Downloads inputs from CAS, runs build commands using host-tools and uploads
|
| 158 | 155 |
result back to CAS.
|
| 159 | 156 |
"""
|
| 160 |
- try:
|
|
| 161 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 162 |
- host.work_host_tools, context)
|
|
| 163 |
- b = bot.Bot(bot_session, context.update_period)
|
|
| 164 |
- b.session()
|
|
| 165 |
- except KeyboardInterrupt:
|
|
| 166 |
- pass
|
|
| 157 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 158 |
+ host.work_host_tools, context, context.update_period)
|
|
| 159 |
+ b = bot.Bot(bot_session)
|
|
| 160 |
+ b.session()
|
|
| 167 | 161 |
|
| 168 | 162 |
|
| 169 | 163 |
@cli.command('buildbox', short_help="Run commands using the BuildBox tool.")
|
| ... | ... | @@ -179,10 +173,7 @@ def run_buildbox(context, local_cas, fuse_dir): |
| 179 | 173 |
context.local_cas = local_cas
|
| 180 | 174 |
context.fuse_dir = fuse_dir
|
| 181 | 175 |
|
| 182 |
- try:
|
|
| 183 |
- bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 184 |
- buildbox.work_buildbox, context)
|
|
| 185 |
- b = bot.Bot(bot_session, context.update_period)
|
|
| 186 |
- b.session()
|
|
| 187 |
- except KeyboardInterrupt:
|
|
| 188 |
- pass
|
|
| 176 |
+ bot_session = session.BotSession(context.parent, context.bot_interface, context.hardware_interface,
|
|
| 177 |
+ buildbox.work_buildbox, context, context.update_period)
|
|
| 178 |
+ b = bot.Bot(bot_session)
|
|
| 179 |
+ b.session()
|
| ... | ... | @@ -20,13 +20,10 @@ import logging |
| 20 | 20 |
class Bot:
|
| 21 | 21 |
"""Creates a local BotSession."""
|
| 22 | 22 |
|
| 23 |
- def __init__(self, bot_session, update_period=1):
|
|
| 24 |
- """
|
|
| 25 |
- """
|
|
| 23 |
+ def __init__(self, bot_session):
|
|
| 26 | 24 |
self.__logger = logging.getLogger(__name__)
|
| 27 | 25 |
|
| 28 | 26 |
self.__bot_session = bot_session
|
| 29 |
- self.__update_period = update_period
|
|
| 30 | 27 |
|
| 31 | 28 |
self.__loop = None
|
| 32 | 29 |
|
| ... | ... | @@ -34,28 +31,16 @@ class Bot: |
| 34 | 31 |
"""Will create a session and periodically call the server."""
|
| 35 | 32 |
|
| 36 | 33 |
self.__loop = asyncio.get_event_loop()
|
| 37 |
- self.__bot_session.create_bot_session()
|
|
| 38 | 34 |
|
| 39 | 35 |
try:
|
| 40 |
- task = asyncio.ensure_future(self.__update_bot_session())
|
|
| 36 |
+ task = asyncio.ensure_future(self.__bot_session.run())
|
|
| 41 | 37 |
self.__loop.run_until_complete(task)
|
| 42 |
- |
|
| 43 | 38 |
except KeyboardInterrupt:
|
| 44 | 39 |
pass
|
| 45 | 40 |
|
| 46 | 41 |
self.__kill_everyone()
|
| 47 | 42 |
self.__logger.info("Bot shutdown.")
|
| 48 | 43 |
|
| 49 |
- async def __update_bot_session(self):
|
|
| 50 |
- """Calls the server periodically to inform the server the client has not died."""
|
|
| 51 |
- try:
|
|
| 52 |
- while True:
|
|
| 53 |
- self.__bot_session.update_bot_session()
|
|
| 54 |
- await asyncio.sleep(self.__update_period)
|
|
| 55 |
- |
|
| 56 |
- except asyncio.CancelledError:
|
|
| 57 |
- pass
|
|
| 58 |
- |
|
| 59 | 44 |
def __kill_everyone(self):
|
| 60 | 45 |
"""Cancels and waits for them to stop."""
|
| 61 | 46 |
self.__logger.info("Cancelling remaining tasks...")
|
| ... | ... | @@ -37,22 +37,25 @@ class BotInterface: |
| 37 | 37 |
self._stub = bots_pb2_grpc.BotsStub(channel)
|
| 38 | 38 |
|
| 39 | 39 |
def create_bot_session(self, parent, bot_session):
|
| 40 |
+ """ Create bot session request
|
|
| 41 |
+ Returns BotSession if correct else a grpc StatusCode
|
|
| 42 |
+ """
|
|
| 40 | 43 |
request = bots_pb2.CreateBotSessionRequest(parent=parent,
|
| 41 | 44 |
bot_session=bot_session)
|
| 42 |
- try:
|
|
| 43 |
- return self._stub.CreateBotSession(request)
|
|
| 44 |
- |
|
| 45 |
- except grpc.RpcError as e:
|
|
| 46 |
- self.__logger.error(e)
|
|
| 47 |
- raise
|
|
| 45 |
+ return self._bot_call(self._stub.CreateBotSession, request)
|
|
| 48 | 46 |
|
| 49 | 47 |
def update_bot_session(self, bot_session, update_mask=None):
|
| 48 |
+ """ Update bot session request
|
|
| 49 |
+ Returns BotSession if correct else a grpc StatusCode
|
|
| 50 |
+ """
|
|
| 50 | 51 |
request = bots_pb2.UpdateBotSessionRequest(name=bot_session.name,
|
| 51 | 52 |
bot_session=bot_session,
|
| 52 | 53 |
update_mask=update_mask)
|
| 53 |
- try:
|
|
| 54 |
- return self._stub.UpdateBotSession(request)
|
|
| 54 |
+ return self._bot_call(self._stub.UpdateBotSession, request)
|
|
| 55 | 55 |
|
| 56 |
+ def _bot_call(self, call, request):
|
|
| 57 |
+ try:
|
|
| 58 |
+ return call(request)
|
|
| 56 | 59 |
except grpc.RpcError as e:
|
| 57 |
- self.__logger.error(e)
|
|
| 58 |
- raise
|
|
| 60 |
+ self.__logger.error(e.code())
|
|
| 61 |
+ return e.code()
|
| ... | ... | @@ -19,8 +19,10 @@ Bot Session |
| 19 | 19 |
|
| 20 | 20 |
Allows connections
|
| 21 | 21 |
"""
|
| 22 |
+import asyncio
|
|
| 22 | 23 |
import logging
|
| 23 | 24 |
import platform
|
| 25 |
+import grpc
|
|
| 24 | 26 |
|
| 25 | 27 |
from buildgrid._enums import BotStatus, LeaseState
|
| 26 | 28 |
from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
|
| ... | ... | @@ -32,7 +34,8 @@ from .tenantmanager import TenantManager |
| 32 | 34 |
|
| 33 | 35 |
|
| 34 | 36 |
class BotSession:
|
| 35 |
- def __init__(self, parent, bots_interface, hardware_interface, work, context=None):
|
|
| 37 |
+ def __init__(self, parent, bots_interface, hardware_interface, work,
|
|
| 38 |
+ context=None, update_period=1):
|
|
| 36 | 39 |
""" Unique bot ID within the farm used to identify this bot
|
| 37 | 40 |
Needs to be human readable.
|
| 38 | 41 |
All prior sessions with bot_id of same ID are invalidated.
|
| ... | ... | @@ -54,14 +57,37 @@ class BotSession: |
| 54 | 57 |
self._work = work
|
| 55 | 58 |
self._context = context
|
| 56 | 59 |
|
| 60 |
+ self.__connected = False
|
|
| 61 |
+ self.__update_period = update_period
|
|
| 62 |
+ |
|
| 57 | 63 |
@property
|
| 58 | 64 |
def bot_id(self):
|
| 59 | 65 |
return self.__bot_id
|
| 60 | 66 |
|
| 67 |
+ @property
|
|
| 68 |
+ def connected(self):
|
|
| 69 |
+ return self.__connected
|
|
| 70 |
+ |
|
| 71 |
+ async def run(self):
|
|
| 72 |
+ try:
|
|
| 73 |
+ while True:
|
|
| 74 |
+ if not self.connected:
|
|
| 75 |
+ self.create_bot_session()
|
|
| 76 |
+ else:
|
|
| 77 |
+ self.update_bot_session()
|
|
| 78 |
+ |
|
| 79 |
+ await asyncio.sleep(self.__update_period)
|
|
| 80 |
+ except asyncio.CancelledError:
|
|
| 81 |
+ pass
|
|
| 82 |
+ |
|
| 61 | 83 |
def create_bot_session(self):
|
| 62 | 84 |
self.__logger.debug("Creating bot session")
|
| 63 | 85 |
|
| 64 | 86 |
session = self._bots_interface.create_bot_session(self.__parent, self.get_pb2())
|
| 87 |
+ if session in list(grpc.StatusCode):
|
|
| 88 |
+ self.__connected = False
|
|
| 89 |
+ return
|
|
| 90 |
+ self.__connected = True
|
|
| 65 | 91 |
self.__name = session.name
|
| 66 | 92 |
|
| 67 | 93 |
self.__logger.info("Created bot session with name: [%s]", self.__name)
|
| ... | ... | @@ -73,6 +99,10 @@ class BotSession: |
| 73 | 99 |
self.__logger.debug("Updating bot session: [%s]", self.__bot_id)
|
| 74 | 100 |
|
| 75 | 101 |
session = self._bots_interface.update_bot_session(self.get_pb2())
|
| 102 |
+ if session in list(grpc.StatusCode):
|
|
| 103 |
+ self.__connected = False
|
|
| 104 |
+ return
|
|
| 105 |
+ self.__connected = True
|
|
| 76 | 106 |
server_ids = []
|
| 77 | 107 |
|
| 78 | 108 |
for lease in session.leases:
|
