[Notes] [Git][BuildGrid/buildgrid][mablanch/132-gather-state-metrics] 9 commits: server/instance.py: Run monitoring bus in main event loop



Title: GitLab

Martin Blanchard pushed to branch mablanch/132-gather-state-metrics at BuildGrid / buildgrid

Commits:

12 changed files:

Changes:

  • .pylintrc
    ... ... @@ -460,6 +460,7 @@ known-third-party=boto3,
    460 460
                       enchant,
    
    461 461
                       google,
    
    462 462
                       grpc,
    
    463
    +                  janus,
    
    463 464
                       moto,
    
    464 465
                       yaml
    
    465 466
     
    
    ... ... @@ -523,4 +524,4 @@ valid-metaclass-classmethod-first-arg=mcs
    523 524
     
    
    524 525
     # Exceptions that will emit a warning when being caught. Defaults to
    
    525 526
     # "Exception"
    
    526
    -overgeneral-exceptions=Exception
    527
    +overgeneral-exceptions=Exception
    \ No newline at end of file

  • buildgrid/server/bots/instance.py
    ... ... @@ -37,6 +37,10 @@ class BotsInterface:
    37 37
             self._bot_sessions = {}
    
    38 38
             self._scheduler = scheduler
    
    39 39
     
    
    40
    +    @property
    
    41
    +    def scheduler(self):
    
    42
    +        return self._scheduler
    
    43
    +
    
    40 44
         def register_instance_with_server(self, instance_name, server):
    
    41 45
             server.add_bots_interface(self, instance_name)
    
    42 46
     
    

  • buildgrid/server/bots/service.py
    ... ... @@ -23,8 +23,9 @@ import logging
    23 23
     
    
    24 24
     import grpc
    
    25 25
     
    
    26
    -from google.protobuf.empty_pb2 import Empty
    
    26
    +from google.protobuf import empty_pb2, timestamp_pb2
    
    27 27
     
    
    28
    +from buildgrid._enums import BotStatus
    
    28 29
     from buildgrid._exceptions import InvalidArgumentError, OutOfSyncError
    
    29 30
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2
    
    30 31
     from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2_grpc
    
    ... ... @@ -32,24 +33,65 @@ from buildgrid._protos.google.devtools.remoteworkers.v1test2 import bots_pb2_grp
    32 33
     
    
    33 34
     class BotsService(bots_pb2_grpc.BotsServicer):
    
    34 35
     
    
    35
    -    def __init__(self, server):
    
    36
    +    def __init__(self, server, monitor=True):
    
    36 37
             self.__logger = logging.getLogger(__name__)
    
    37 38
     
    
    39
    +        self.__bots_by_status = None
    
    40
    +        self.__bots_by_instance = None
    
    41
    +        self.__bots = None
    
    42
    +
    
    38 43
             self._instances = {}
    
    39 44
     
    
    40 45
             bots_pb2_grpc.add_BotsServicer_to_server(self, server)
    
    41 46
     
    
    42
    -    def add_instance(self, name, instance):
    
    43
    -        self._instances[name] = instance
    
    47
    +        self._is_instrumented = monitor
    
    48
    +
    
    49
    +        if self._is_instrumented:
    
    50
    +            self.__bots_by_status = {}
    
    51
    +            self.__bots_by_instance = {}
    
    52
    +            self.__bots = {}
    
    53
    +
    
    54
    +            self.__bots_by_status[BotStatus.OK] = set()
    
    55
    +            self.__bots_by_status[BotStatus.UNHEALTHY] = set()
    
    56
    +            self.__bots_by_status[BotStatus.HOST_REBOOTING] = set()
    
    57
    +            self.__bots_by_status[BotStatus.BOT_TERMINATING] = set()
    
    58
    +
    
    59
    +    # --- Public API ---
    
    60
    +
    
    61
    +    def add_instance(self, instance_name, instance):
    
    62
    +        self._instances[instance_name] = instance
    
    63
    +
    
    64
    +        if self._is_instrumented:
    
    65
    +            self.__bots_by_instance[instance_name] = 0
    
    66
    +
    
    67
    +    # --- Public API: Servicer ---
    
    44 68
     
    
    45 69
         def CreateBotSession(self, request, context):
    
    70
    +        """Handles CreateBotSessionRequest messages.
    
    71
    +
    
    72
    +        Args:
    
    73
    +            request (CreateBotSessionRequest): The incoming RPC request.
    
    74
    +            context (grpc.ServicerContext): Context for the RPC call.
    
    75
    +        """
    
    46 76
             self.__logger.debug("CreateBotSession request from [%s]", context.peer())
    
    47 77
     
    
    78
    +        instance_name = request.parent
    
    79
    +        bot_status = BotStatus(request.bot_session.status)
    
    80
    +        bot_id = request.bot_session.bot_id
    
    81
    +
    
    48 82
             try:
    
    49
    -            parent = request.parent
    
    50
    -            instance = self._get_instance(request.parent)
    
    51
    -            return instance.create_bot_session(parent,
    
    52
    -                                               request.bot_session)
    
    83
    +            instance = self._get_instance(instance_name)
    
    84
    +            bot_session = instance.create_bot_session(instance_name,
    
    85
    +                                                      request.bot_session)
    
    86
    +            now = timestamp_pb2.Timestamp()
    
    87
    +            now.GetCurrentTime()
    
    88
    +
    
    89
    +            if self._is_instrumented:
    
    90
    +                self.__bots[bot_id] = now
    
    91
    +                self.__bots_by_instance[instance_name] += 1
    
    92
    +                self.__bots_by_status[bot_status].add(bot_id)
    
    93
    +
    
    94
    +            return bot_session
    
    53 95
     
    
    54 96
             except InvalidArgumentError as e:
    
    55 97
                 self.__logger.error(e)
    
    ... ... @@ -59,17 +101,36 @@ class BotsService(bots_pb2_grpc.BotsServicer):
    59 101
             return bots_pb2.BotSession()
    
    60 102
     
    
    61 103
         def UpdateBotSession(self, request, context):
    
    104
    +        """Handles UpdateBotSessionRequest messages.
    
    105
    +
    
    106
    +        Args:
    
    107
    +            request (UpdateBotSessionRequest): The incoming RPC request.
    
    108
    +            context (grpc.ServicerContext): Context for the RPC call.
    
    109
    +        """
    
    62 110
             self.__logger.debug("UpdateBotSession request from [%s]", context.peer())
    
    63 111
     
    
    112
    +        names = request.name.split("/")
    
    113
    +        bot_status = BotStatus(request.bot_session.status)
    
    114
    +        bot_id = request.bot_session.bot_id
    
    115
    +
    
    64 116
             try:
    
    65
    -            names = request.name.split("/")
    
    66
    -            # Operation name should be in format:
    
    67
    -            # {instance/name}/{uuid}
    
    68
    -            instance_name = ''.join(names[0:-1])
    
    117
    +            instance_name = '/'.join(names[:-1])
    
    69 118
     
    
    70 119
                 instance = self._get_instance(instance_name)
    
    71
    -            return instance.update_bot_session(request.name,
    
    72
    -                                               request.bot_session)
    
    120
    +            bot_session = instance.update_bot_session(request.name,
    
    121
    +                                                      request.bot_session)
    
    122
    +
    
    123
    +            if self._is_instrumented:
    
    124
    +                self.__bots[bot_id].GetCurrentTime()
    
    125
    +                if bot_id not in self.__bots_by_status[bot_status]:
    
    126
    +                    self.__bots_by_status[BotStatus.OK].discard(bot_id)
    
    127
    +                    self.__bots_by_status[BotStatus.UNHEALTHY].discard(bot_id)
    
    128
    +                    self.__bots_by_status[BotStatus.HOST_REBOOTING].discard(bot_id)
    
    129
    +                    self.__bots_by_status[BotStatus.BOT_TERMINATING].discard(bot_id)
    
    130
    +
    
    131
    +                    self.__bots_by_status[bot_status].add(bot_id)
    
    132
    +
    
    133
    +            return bot_session
    
    73 134
     
    
    74 135
             except InvalidArgumentError as e:
    
    75 136
                 self.__logger.error(e)
    
    ... ... @@ -89,10 +150,46 @@ class BotsService(bots_pb2_grpc.BotsServicer):
    89 150
             return bots_pb2.BotSession()
    
    90 151
     
    
    91 152
         def PostBotEventTemp(self, request, context):
    
    153
    +        """Handles PostBotEventTempRequest messages.
    
    154
    +
    
    155
    +        Args:
    
    156
    +            request (PostBotEventTempRequest): The incoming RPC request.
    
    157
    +            context (grpc.ServicerContext): Context for the RPC call.
    
    158
    +        """
    
    92 159
             self.__logger.debug("PostBotEventTemp request from [%s]", context.peer())
    
    93 160
     
    
    94 161
             context.set_code(grpc.StatusCode.UNIMPLEMENTED)
    
    95
    -        return Empty()
    
    162
    +
    
    163
    +        return empty_pb2.Empty()
    
    164
    +
    
    165
    +    # --- Public API: Monitoring ---
    
    166
    +
    
    167
    +    @property
    
    168
    +    def is_instrumented(self):
    
    169
    +        return self._is_instrumented
    
    170
    +
    
    171
    +    def query_n_bots(self):
    
    172
    +        if self.__bots is not None:
    
    173
    +            return len(self.__bots)
    
    174
    +        return 0
    
    175
    +
    
    176
    +    def query_n_bots_for_instance(self, instance_name):
    
    177
    +        try:
    
    178
    +            if self.__bots_by_instance is not None:
    
    179
    +                return self.__bots_by_instance[instance_name]
    
    180
    +        except KeyError:
    
    181
    +            pass
    
    182
    +        return 0
    
    183
    +
    
    184
    +    def query_n_bots_for_status(self, bot_status):
    
    185
    +        try:
    
    186
    +            if self.__bots_by_status is not None:
    
    187
    +                return len(self.__bots_by_status[bot_status])
    
    188
    +        except KeyError:
    
    189
    +            pass
    
    190
    +        return 0
    
    191
    +
    
    192
    +    # --- Private API ---
    
    96 193
     
    
    97 194
         def _get_instance(self, name):
    
    98 195
             try:
    

  • buildgrid/server/execution/instance.py
    ... ... @@ -35,6 +35,10 @@ class ExecutionInstance:
    35 35
             self._storage = storage
    
    36 36
             self._scheduler = scheduler
    
    37 37
     
    
    38
    +    @property
    
    39
    +    def scheduler(self):
    
    40
    +        return self._scheduler
    
    41
    +
    
    38 42
         def register_instance_with_server(self, instance_name, server):
    
    39 43
             server.add_execution_instance(self, instance_name)
    
    40 44
     
    

  • buildgrid/server/execution/service.py
    ... ... @@ -33,30 +33,84 @@ from buildgrid._protos.google.longrunning import operations_pb2
    33 33
     
    
    34 34
     class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    
    35 35
     
    
    36
    -    def __init__(self, server):
    
    36
    +    def __init__(self, server, monitor=True):
    
    37 37
             self.__logger = logging.getLogger(__name__)
    
    38 38
     
    
    39
    +        self.__peers_by_instance = None
    
    40
    +        self.__peers = None
    
    41
    +
    
    39 42
             self._instances = {}
    
    43
    +
    
    40 44
             remote_execution_pb2_grpc.add_ExecutionServicer_to_server(self, server)
    
    41 45
     
    
    42
    -    def add_instance(self, name, instance):
    
    43
    -        self._instances[name] = instance
    
    46
    +        self._is_instrumented = monitor
    
    47
    +
    
    48
    +        if self._is_instrumented:
    
    49
    +            self.__peers_by_instance = {}
    
    50
    +            self.__peers = {}
    
    51
    +
    
    52
    +    # --- Public API ---
    
    53
    +
    
    54
    +    def add_instance(self, instance_name, instance):
    
    55
    +        """Registers a new servicer instance.
    
    56
    +
    
    57
    +        Args:
    
    58
    +            instance_name (str): The new instance's name.
    
    59
    +            instance (ExecutionInstance): The new instance itself.
    
    60
    +        """
    
    61
    +        self._instances[instance_name] = instance
    
    62
    +
    
    63
    +        if self._is_instrumented:
    
    64
    +            self.__peers_by_instance[instance_name] = set()
    
    65
    +
    
    66
    +    def get_scheduler(self, instance_name):
    
    67
    +        """Retrieves a reference to the scheduler for an instance.
    
    68
    +
    
    69
    +        Args:
    
    70
    +            instance_name (str): The name of the instance to query.
    
    71
    +
    
    72
    +        Returns:
    
    73
    +            Scheduler: A reference to the scheduler for `instance_name`.
    
    74
    +
    
    75
    +        Raises:
    
    76
    +            InvalidArgumentError: If no instance named `instance_name` exists.
    
    77
    +        """
    
    78
    +        instance = self._get_instance(instance_name)
    
    79
    +
    
    80
    +        return instance.scheduler
    
    81
    +
    
    82
    +    # --- Public API: Servicer ---
    
    44 83
     
    
    45 84
         def Execute(self, request, context):
    
    85
    +        """Handles ExecuteRequest messages.
    
    86
    +
    
    87
    +        Args:
    
    88
    +            request (ExecuteRequest): The incoming RPC request.
    
    89
    +            context (grpc.ServicerContext): Context for the RPC call.
    
    90
    +        """
    
    46 91
             self.__logger.debug("Execute request from [%s]", context.peer())
    
    47 92
     
    
    93
    +        instance_name = request.instance_name
    
    94
    +        message_queue = queue.Queue()
    
    95
    +        peer = context.peer()
    
    96
    +
    
    48 97
             try:
    
    49
    -            message_queue = queue.Queue()
    
    50
    -            instance = self._get_instance(request.instance_name)
    
    98
    +            instance = self._get_instance(instance_name)
    
    51 99
                 operation = instance.execute(request.action_digest,
    
    52 100
                                              request.skip_cache_lookup,
    
    53 101
                                              message_queue)
    
    54 102
     
    
    55
    -            context.add_callback(partial(instance.unregister_message_client,
    
    56
    -                                         operation.name, message_queue))
    
    103
    +            context.add_callback(partial(self._rpc_termination_callback,
    
    104
    +                                         peer, instance_name, operation.name, message_queue))
    
    105
    +
    
    106
    +            if self._is_instrumented:
    
    107
    +                if peer not in self.__peers:
    
    108
    +                    self.__peers_by_instance[instance_name].add(peer)
    
    109
    +                    self.__peers[peer] = 1
    
    110
    +                else:
    
    111
    +                    self.__peers[peer] += 1
    
    57 112
     
    
    58
    -            instanced_op_name = "{}/{}".format(request.instance_name,
    
    59
    -                                               operation.name)
    
    113
    +            instanced_op_name = "{}/{}".format(instance_name, operation.name)
    
    60 114
     
    
    61 115
                 self.__logger.info("Operation name: [%s]", instanced_op_name)
    
    62 116
     
    
    ... ... @@ -80,23 +134,37 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    80 134
                 yield operations_pb2.Operation()
    
    81 135
     
    
    82 136
         def WaitExecution(self, request, context):
    
    137
    +        """Handles WaitExecutionRequest messages.
    
    138
    +
    
    139
    +        Args:
    
    140
    +            request (WaitExecutionRequest): The incoming RPC request.
    
    141
    +            context (grpc.ServicerContext): Context for the RPC call.
    
    142
    +        """
    
    83 143
             self.__logger.debug("WaitExecution request from [%s]", context.peer())
    
    84 144
     
    
    85
    -        try:
    
    86
    -            names = request.name.split("/")
    
    145
    +        names = request.name.split('/')
    
    146
    +        instance_name = '/'.join(names[:-1])
    
    147
    +        operation_name = names[-1]
    
    148
    +        message_queue = queue.Queue()
    
    149
    +        peer = context.peer()
    
    87 150
     
    
    88
    -            # Operation name should be in format:
    
    89
    -            # {instance/name}/{operation_id}
    
    90
    -            instance_name = ''.join(names[0:-1])
    
    151
    +        try:
    
    152
    +            if instance_name != request.instance_name:
    
    153
    +                raise InvalidArgumentError("Invalid operation [{}] for instance [{}]"
    
    154
    +                                            .format(request.name, instance_name))
    
    91 155
     
    
    92
    -            message_queue = queue.Queue()
    
    93
    -            operation_name = names[-1]
    
    94 156
                 instance = self._get_instance(instance_name)
    
    95 157
     
    
    96 158
                 instance.register_message_client(operation_name, message_queue)
    
    159
    +            context.add_callback(partial(self._rpc_termination_callback,
    
    160
    +                                         peer, instance_name, operation_name, message_queue))
    
    97 161
     
    
    98
    -            context.add_callback(partial(instance.unregister_message_client,
    
    99
    -                                         operation_name, message_queue))
    
    162
    +            if self._is_instrumented:
    
    163
    +                if peer not in self.__peers:
    
    164
    +                    self.__peers_by_instance[instance_name].add(peer)
    
    165
    +                    self.__peers[peer] = 1
    
    166
    +                else:
    
    167
    +                    self.__peers[peer] += 1
    
    100 168
     
    
    101 169
                 for operation in instance.stream_operation_updates(message_queue,
    
    102 170
                                                                    operation_name):
    
    ... ... @@ -111,9 +179,42 @@ class ExecutionService(remote_execution_pb2_grpc.ExecutionServicer):
    111 179
                 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
    
    112 180
                 yield operations_pb2.Operation()
    
    113 181
     
    
    182
    +    # --- Private API ---
    
    183
    +
    
    184
    +    def _rpc_termination_callback(self, peer, instance_name, job_name, message_queue):
    
    185
    +        instance = self._get_instance(instance_name)
    
    186
    +
    
    187
    +        instance.unregister_message_client(job_name, message_queue)
    
    188
    +
    
    189
    +        if self._is_instrumented:
    
    190
    +            if self.__peers[peer] > 1:
    
    191
    +                self.__peers[peer] -= 1
    
    192
    +            else:
    
    193
    +                self.__peers_by_instance[instance_name].remove(peer)
    
    194
    +                del self.__peers[peer]
    
    195
    +
    
    114 196
         def _get_instance(self, name):
    
    115 197
             try:
    
    116 198
                 return self._instances[name]
    
    117 199
     
    
    118 200
             except KeyError:
    
    119 201
                 raise InvalidArgumentError("Instance doesn't exist on server: [{}]".format(name))
    
    202
    +
    
    203
    +    # --- Public API: Monitoring ---
    
    204
    +
    
    205
    +    @property
    
    206
    +    def is_instrumented(self):
    
    207
    +        return self._is_instrumented
    
    208
    +
    
    209
    +    def query_n_clients(self):
    
    210
    +        if self.__peers is not None:
    
    211
    +            return len(self.__peers)
    
    212
    +        return 0
    
    213
    +
    
    214
    +    def query_n_clients_for_instance(self, instance_name):
    
    215
    +        try:
    
    216
    +            if self.__peers_by_instance is not None:
    
    217
    +                return len(self.__peers_by_instance[instance_name])
    
    218
    +        except KeyError:
    
    219
    +            pass
    
    220
    +        return 0

  • buildgrid/server/instance.py
    ... ... @@ -15,18 +15,23 @@
    15 15
     
    
    16 16
     import asyncio
    
    17 17
     from concurrent import futures
    
    18
    +from datetime import timedelta
    
    18 19
     import logging
    
    19 20
     import os
    
    20 21
     import time
    
    21 22
     
    
    22 23
     import grpc
    
    23 24
     
    
    25
    +from buildgrid._enums import MetricRecordDomain, MetricRecordType
    
    26
    +from buildgrid._protos.buildgrid.v2 import monitoring_pb2
    
    24 27
     from buildgrid.server.actioncache.service import ActionCacheService
    
    25 28
     from buildgrid.server.bots.service import BotsService
    
    26 29
     from buildgrid.server.cas.service import ByteStreamService, ContentAddressableStorageService
    
    27 30
     from buildgrid.server.execution.service import ExecutionService
    
    31
    +from buildgrid.server._monitoring import MonitoringBus
    
    28 32
     from buildgrid.server.operations.service import OperationsService
    
    29 33
     from buildgrid.server.referencestorage.service import ReferenceStorageService
    
    34
    +from buildgrid.settings import MONITORING_PERIOD
    
    30 35
     
    
    31 36
     
    
    32 37
     class BuildGridServer:
    
    ... ... @@ -36,7 +41,7 @@ class BuildGridServer:
    36 41
         requisite services.
    
    37 42
         """
    
    38 43
     
    
    39
    -    def __init__(self, max_workers=None):
    
    44
    +    def __init__(self, max_workers=None, monitor=True):
    
    40 45
             """Initializes a new :class:`BuildGridServer` instance.
    
    41 46
     
    
    42 47
             Args:
    
    ... ... @@ -52,6 +57,9 @@ class BuildGridServer:
    52 57
             self.__grpc_server = grpc.server(self.__grpc_executor)
    
    53 58
     
    
    54 59
             self.__main_loop = asyncio.get_event_loop()
    
    60
    +        self.__monitoring_bus = None
    
    61
    +
    
    62
    +        self.__state_monitoring_task = None
    
    55 63
     
    
    56 64
             self._execution_service = None
    
    57 65
             self._bots_service = None
    
    ... ... @@ -61,10 +69,28 @@ class BuildGridServer:
    61 69
             self._cas_service = None
    
    62 70
             self._bytestream_service = None
    
    63 71
     
    
    72
    +        self._schedulers = {}
    
    73
    +        self._instances = set()
    
    74
    +
    
    75
    +        self._is_instrumented = monitor
    
    76
    +
    
    77
    +        if self._is_instrumented:
    
    78
    +            self.__monitoring_bus = MonitoringBus(self.__main_loop)
    
    79
    +
    
    80
    +    # --- Public API ---
    
    81
    +
    
    64 82
         def start(self):
    
    65 83
             """Starts the BuildGrid server.
    
    66 84
             """
    
    67 85
             self.__grpc_server.start()
    
    86
    +
    
    87
    +        if self._is_instrumented:
    
    88
    +            self.__monitoring_bus.start()
    
    89
    +
    
    90
    +            self.__state_monitoring_task = asyncio.ensure_future(
    
    91
    +                self._state_monitoring_worker(period=MONITORING_PERIOD),
    
    92
    +                loop=self.__main_loop)
    
    93
    +
    
    68 94
             self.__main_loop.run_forever()
    
    69 95
     
    
    70 96
         def stop(self, grace=0):
    
    ... ... @@ -73,7 +99,11 @@ class BuildGridServer:
    73 99
             Args:
    
    74 100
                 grace (int, optional): A duration of time in seconds. Defaults to 0.
    
    75 101
             """
    
    76
    -        self.__main_loop.close()
    
    102
    +        if self._is_instrumented:
    
    103
    +            if self.__state_monitoring_task is not None:
    
    104
    +                self.__state_monitoring_task.cancel()
    
    105
    +
    
    106
    +            self.__monitoring_bus.stop()
    
    77 107
     
    
    78 108
             self.__grpc_server.stop(grace)
    
    79 109
     
    
    ... ... @@ -109,9 +139,11 @@ class BuildGridServer:
    109 139
             """
    
    110 140
             if self._execution_service is None:
    
    111 141
                 self._execution_service = ExecutionService(self.__grpc_server)
    
    112
    -
    
    113 142
             self._execution_service.add_instance(instance_name, instance)
    
    114 143
     
    
    144
    +        self._schedulers[instance_name] = instance.scheduler
    
    145
    +        self._instances.add(instance_name)
    
    146
    +
    
    115 147
         def add_bots_interface(self, instance, instance_name):
    
    116 148
             """Adds a :obj:`BotsInterface` to the service.
    
    117 149
     
    
    ... ... @@ -123,9 +155,10 @@ class BuildGridServer:
    123 155
             """
    
    124 156
             if self._bots_service is None:
    
    125 157
                 self._bots_service = BotsService(self.__grpc_server)
    
    126
    -
    
    127 158
             self._bots_service.add_instance(instance_name, instance)
    
    128 159
     
    
    160
    +        self._instances.add(instance_name)
    
    161
    +
    
    129 162
         def add_operations_instance(self, instance, instance_name):
    
    130 163
             """Adds an :obj:`OperationsInstance` to the service.
    
    131 164
     
    
    ... ... @@ -137,7 +170,6 @@ class BuildGridServer:
    137 170
             """
    
    138 171
             if self._operations_service is None:
    
    139 172
                 self._operations_service = OperationsService(self.__grpc_server)
    
    140
    -
    
    141 173
             self._operations_service.add_instance(instance_name, instance)
    
    142 174
     
    
    143 175
         def add_reference_storage_instance(self, instance, instance_name):
    
    ... ... @@ -151,7 +183,6 @@ class BuildGridServer:
    151 183
             """
    
    152 184
             if self._reference_storage_service is None:
    
    153 185
                 self._reference_storage_service = ReferenceStorageService(self.__grpc_server)
    
    154
    -
    
    155 186
             self._reference_storage_service.add_instance(instance_name, instance)
    
    156 187
     
    
    157 188
         def add_action_cache_instance(self, instance, instance_name):
    
    ... ... @@ -165,7 +196,6 @@ class BuildGridServer:
    165 196
             """
    
    166 197
             if self._action_cache_service is None:
    
    167 198
                 self._action_cache_service = ActionCacheService(self.__grpc_server)
    
    168
    -
    
    169 199
             self._action_cache_service.add_instance(instance_name, instance)
    
    170 200
     
    
    171 201
         def add_cas_instance(self, instance, instance_name):
    
    ... ... @@ -179,7 +209,6 @@ class BuildGridServer:
    179 209
             """
    
    180 210
             if self._cas_service is None:
    
    181 211
                 self._cas_service = ContentAddressableStorageService(self.__grpc_server)
    
    182
    -
    
    183 212
             self._cas_service.add_instance(instance_name, instance)
    
    184 213
     
    
    185 214
         def add_bytestream_instance(self, instance, instance_name):
    
    ... ... @@ -193,5 +222,163 @@ class BuildGridServer:
    193 222
             """
    
    194 223
             if self._bytestream_service is None:
    
    195 224
                 self._bytestream_service = ByteStreamService(self.__grpc_server)
    
    196
    -
    
    197 225
             self._bytestream_service.add_instance(instance_name, instance)
    
    226
    +
    
    227
    +    # --- Public API: Monitoring ---
    
    228
    +
    
    229
    +    @property
    
    230
    +    def is_instrumented(self):
    
    231
    +        return self._is_instrumented
    
    232
    +
    
    233
    +    # --- Private API ---
    
    234
    +
    
    235
    +    async def _state_monitoring_worker(self, period=1.0):
    
    236
    +        """Periodically publishes state metrics to the monitoring bus."""
    
    237
    +        async def __state_monitoring_worker():
    
    238
    +            # Emit total clients count record:
    
    239
    +            _, record = self._query_n_clients()
    
    240
    +            await self.__monitoring_bus.send_record(record)
    
    241
    +
    
    242
    +            # Emit total bots count record:
    
    243
    +            _, record = self._query_n_bots()
    
    244
    +            await self.__monitoring_bus.send_record(record)
    
    245
    +
    
    246
    +            queue_times = []
    
    247
    +            # Emits records by instance:
    
    248
    +            for instance_name in self._instances:
    
    249
    +                # Emit instance clients count record:
    
    250
    +                _, record = self._query_n_clients_for_instance(instance_name)
    
    251
    +                await self.__monitoring_bus.send_record(record)
    
    252
    +
    
    253
    +                # Emit instance bots count record:
    
    254
    +                _, record = self._query_n_bots_for_instance(instance_name)
    
    255
    +                await self.__monitoring_bus.send_record(record)
    
    256
    +
    
    257
    +                # Emit instance average queue time record:
    
    258
    +                queue_time, record = self._query_am_queue_time_for_instance(instance_name)
    
    259
    +                await self.__monitoring_bus.send_record(record)
    
    260
    +                if queue_time:
    
    261
    +                    queue_times.append(queue_time)
    
    262
    +
    
    263
    +            # Emit overall average queue time record:
    
    264
    +            if len(queue_times) > 0:
    
    265
    +                am_queue_time = sum(queue_times, timedelta()) / len(queue_times)
    
    266
    +            else:
    
    267
    +                am_queue_time = timedelta()
    
    268
    +            record = self._forge_timer_metric_record(
    
    269
    +                MetricRecordDomain.STATE,
    
    270
    +                'average-queue-time',
    
    271
    +                am_queue_time)
    
    272
    +
    
    273
    +            await self.__monitoring_bus.send_record(record)
    
    274
    +
    
    275
    +            print('---')
    
    276
    +            n_clients = self._execution_service.query_n_clients()
    
    277
    +            n_bots = self._bots_service.query_n_bots()
    
    278
    +            print('Totals: n_clients={}, n_bots={}, am_queue_time={}'
    
    279
    +                  .format(n_clients, n_bots, am_queue_time))
    
    280
    +            print('Per instances:')
    
    281
    +            for instance_name in self._instances:
    
    282
    +                n_clients = self._execution_service.query_n_clients_for_instance(instance_name)
    
    283
    +                n_bots = self._bots_service.query_n_bots_for_instance(instance_name)
    
    284
    +                am_queue_time = self._execution_service.get_scheduler(instance_name).query_am_queue_time()
    
    285
    +                instance_name = instance_name or 'void'
    
    286
    +                print(' - {}: n_clients={}, n_bots={}, am_queue_time={}'
    
    287
    +                      .format(instance_name, n_clients, n_bots, am_queue_time))
    
    288
    +            print('---')
    
    289
    +
    
    290
    +        try:
    
    291
    +            while True:
    
    292
    +                start = time.time()
    
    293
    +                await __state_monitoring_worker()
    
    294
    +
    
    295
    +                end = time.time()
    
    296
    +                await asyncio.sleep(period - (end - start))
    
    297
    +
    
    298
    +        except asyncio.CancelledError:
    
    299
    +            pass
    
    300
    +        except BaseException as e:
    
    301
    +             print(f'__state_monitoring_worker: {e}')
    
    302
    +
    
    303
    +    def _forge_counter_metric_record(self, domain, name, count, extra=None):
    
    304
    +        counter_record = monitoring_pb2.MetricRecord()
    
    305
    +
    
    306
    +        counter_record.creation_timestamp.GetCurrentTime()
    
    307
    +        counter_record.domain = domain.value
    
    308
    +        counter_record.type = MetricRecordType.COUNTER.value
    
    309
    +        counter_record.name = name
    
    310
    +        counter_record.count = count
    
    311
    +        if extra is not None:
    
    312
    +            counter_record.extra.update(extra)
    
    313
    +
    
    314
    +        return record
    
    315
    +
    
    316
    +    def _forge_timer_metric_record(self, domain, name, duration, extra=None):
    
    317
    +        timer_record = monitoring_pb2.MetricRecord()
    
    318
    +
    
    319
    +        timer_record.creation_timestamp.GetCurrentTime()
    
    320
    +        timer_record.domain = domain.value
    
    321
    +        timer_record.type = MetricRecordType.TIMER.value
    
    322
    +        timer_record.name = name
    
    323
    +        timer_record.duration.FromTimedelta(duration)
    
    324
    +        if extra is not None:
    
    325
    +            timer_record.extra.update(extra)
    
    326
    +
    
    327
    +        return timer_record
    
    328
    +
    
    329
    +    def _forge_gauge_metric_record(self, domain, name, value, extra=None):
    
    330
    +        gauge_record = monitoring_pb2.MetricRecord()
    
    331
    +
    
    332
    +        gauge_record.creation_timestamp.GetCurrentTime()
    
    333
    +        gauge_record.domain = domain.value
    
    334
    +        gauge_record.type = MetricRecordType.GAUGE.value
    
    335
    +        gauge_record.name = name
    
    336
    +        gauge_record.value = value
    
    337
    +        if extra is not None:
    
    338
    +            gauge_record.extra.update(extra)
    
    339
    +
    
    340
    +        return gauge_record
    
    341
    +
    
    342
    +    # --- Private API: Monitoring ---
    
    343
    +
    
    344
    +    def _query_n_clients(self):
    
    345
    +        """Queries the number of clients connected."""
    
    346
    +        n_clients = self._execution_service.query_n_clients()
    
    347
    +        gauge_record = self._forge_gauge_metric_record(
    
    348
    +            MetricRecordDomain.STATE, 'clients-count', n_clients)
    
    349
    +
    
    350
    +        return n_clients, gauge_record
    
    351
    +    def _query_n_clients_for_instance(self, instance_name):
    
    352
    +        """Queries the number of clients connected for a given instance"""
    
    353
    +        n_clients = self._execution_service.query_n_clients_for_instance(instance_name)
    
    354
    +        gauge_record = self._forge_gauge_metric_record(
    
    355
    +            MetricRecordDomain.STATE, 'clients-count', n_clients,
    
    356
    +            extra={ 'instance-name': instance_name or 'void' })
    
    357
    +
    
    358
    +        return n_clients, gauge_record
    
    359
    +
    
    360
    +    def _query_n_bots(self):
    
    361
    +        """Queries the number of bots connected."""
    
    362
    +        n_bots = self._bots_service.query_n_bots()
    
    363
    +        gauge_record = self._forge_gauge_metric_record(
    
    364
    +            MetricRecordDomain.STATE, 'bots-count', n_bots)
    
    365
    +
    
    366
    +        return n_bots, gauge_record
    
    367
    +
    
    368
    +    def _query_n_bots_for_instance(self, instance_name):
    
    369
    +        """Queries the number of bots connected for a given instance."""
    
    370
    +        n_bots = self._bots_service.query_n_bots_for_instance(instance_name)
    
    371
    +        gauge_record = self._forge_gauge_metric_record(
    
    372
    +            MetricRecordDomain.STATE, 'bots-count', n_bots,
    
    373
    +            extra={ 'instance-name': instance_name or 'void' })
    
    374
    +
    
    375
    +        return n_bots, gauge_record
    
    376
    +
    
    377
    +    def _query_am_queue_time_for_instance(self, instance_name):
    
    378
    +        """Queries the average job's queue time for a given instance."""
    
    379
    +        am_queue_time = self._schedulers[instance_name].query_am_queue_time()
    
    380
    +        timer_record = self._forge_timer_metric_record(
    
    381
    +            MetricRecordDomain.STATE, 'average-queue-time', am_queue_time,
    
    382
    +            extra={ 'instance-name': instance_name or 'void' })
    
    383
    +
    
    384
    +        return am_queue_time, timer_record

  • buildgrid/server/job.py
    ... ... @@ -13,10 +13,11 @@
    13 13
     # limitations under the License.
    
    14 14
     
    
    15 15
     
    
    16
    +from datetime import datetime
    
    16 17
     import logging
    
    17 18
     import uuid
    
    18 19
     
    
    19
    -from google.protobuf import timestamp_pb2
    
    20
    +from google.protobuf import duration_pb2, timestamp_pb2
    
    20 21
     
    
    21 22
     from buildgrid._enums import LeaseState, OperationStage
    
    22 23
     from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
    
    ... ... @@ -37,6 +38,7 @@ class Job:
    37 38
             self.__execute_response = None
    
    38 39
             self.__operation_metadata = remote_execution_pb2.ExecuteOperationMetadata()
    
    39 40
             self.__queued_timestamp = timestamp_pb2.Timestamp()
    
    41
    +        self.__queued_time_duration = duration_pb2.Duration()
    
    40 42
             self.__worker_start_timestamp = timestamp_pb2.Timestamp()
    
    41 43
             self.__worker_completed_timestamp = timestamp_pb2.Timestamp()
    
    42 44
     
    
    ... ... @@ -50,6 +52,8 @@ class Job:
    50 52
             self._operation.done = False
    
    51 53
             self._n_tries = 0
    
    52 54
     
    
    55
    +    # --- Public API ---
    
    56
    +
    
    53 57
         @property
    
    54 58
         def name(self):
    
    55 59
             return self._name
    
    ... ... @@ -179,7 +183,7 @@ class Job:
    179 183
                     result.Unpack(action_result)
    
    180 184
     
    
    181 185
                 action_metadata = action_result.execution_metadata
    
    182
    -            action_metadata.queued_timestamp.CopyFrom(self.__worker_start_timestamp)
    
    186
    +            action_metadata.queued_timestamp.CopyFrom(self.__queued_timestamp)
    
    183 187
                 action_metadata.worker_start_timestamp.CopyFrom(self.__worker_start_timestamp)
    
    184 188
                 action_metadata.worker_completed_timestamp.CopyFrom(self.__worker_completed_timestamp)
    
    185 189
     
    
    ... ... @@ -204,6 +208,10 @@ class Job:
    204 208
                     self.__queued_timestamp.GetCurrentTime()
    
    205 209
                 self._n_tries += 1
    
    206 210
     
    
    211
    +        elif self.__operation_metadata.stage == OperationStage.EXECUTING.value:
    
    212
    +            queue_in, queue_out = self.__queued_timestamp.ToDatetime(), datetime.now()
    
    213
    +            self.__queued_time_duration.FromTimedelta(queue_out - queue_in)
    
    214
    +
    
    207 215
             elif self.__operation_metadata.stage == OperationStage.COMPLETED.value:
    
    208 216
                 if self.__execute_response is not None:
    
    209 217
                     self._operation.response.Pack(self.__execute_response)
    
    ... ... @@ -213,3 +221,11 @@ class Job:
    213 221
     
    
    214 222
             for queue in self._operation_update_queues:
    
    215 223
                 queue.put(self._operation)
    
    224
    +
    
    225
    +    # --- Public API: Monitoring ---
    
    226
    +
    
    227
    +    def query_queue_time(self):
    
    228
    +        return self.__queued_time_duration.ToTimedelta()
    
    229
    +
    
    230
    +    def query_n_retries(self):
    
    231
    +        return self._n_tries - 1 if self._n_tries > 0 else 0

  • buildgrid/server/operations/instance.py
    ... ... @@ -32,6 +32,10 @@ class OperationsInstance:
    32 32
     
    
    33 33
             self._scheduler = scheduler
    
    34 34
     
    
    35
    +    @property
    
    36
    +    def scheduler(self):
    
    37
    +        return self._scheduler
    
    38
    +
    
    35 39
         def register_instance_with_server(self, instance_name, server):
    
    36 40
             server.add_operations_instance(self, instance_name)
    
    37 41
     
    

  • buildgrid/server/operations/service.py
    ... ... @@ -38,8 +38,18 @@ class OperationsService(operations_pb2_grpc.OperationsServicer):
    38 38
     
    
    39 39
             operations_pb2_grpc.add_OperationsServicer_to_server(self, server)
    
    40 40
     
    
    41
    -    def add_instance(self, name, instance):
    
    42
    -        self._instances[name] = instance
    
    41
    +    # --- Public API ---
    
    42
    +
    
    43
    +    def add_instance(self, instance_name, instance):
    
    44
    +        """Registers a new servicer instance.
    
    45
    +
    
    46
    +        Args:
    
    47
    +            instance_name (str): The new instance's name.
    
    48
    +            instance (OperationsInstance): The new instance itself.
    
    49
    +        """
    
    50
    +        self._instances[instance_name] = instance
    
    51
    +
    
    52
    +    # --- Public API: Servicer ---
    
    43 53
     
    
    44 54
         def GetOperation(self, request, context):
    
    45 55
             self.__logger.debug("GetOperation request from [%s]", context.peer())
    
    ... ... @@ -132,6 +142,8 @@ class OperationsService(operations_pb2_grpc.OperationsServicer):
    132 142
     
    
    133 143
             return Empty()
    
    134 144
     
    
    145
    +    # --- Private API ---
    
    146
    +
    
    135 147
         def _parse_instance_name(self, name):
    
    136 148
             """ If the instance name is not blank, 'name' will have the form
    
    137 149
             {instance_name}/{operation_uuid}. Otherwise, it will just be
    

  • buildgrid/server/scheduler.py
    ... ... @@ -20,24 +20,38 @@ Schedules jobs.
    20 20
     """
    
    21 21
     
    
    22 22
     from collections import deque
    
    23
    +from datetime import timedelta
    
    23 24
     import logging
    
    24 25
     
    
    26
    +from buildgrid._enums import LeaseState, OperationStage
    
    25 27
     from buildgrid._exceptions import NotFoundError
    
    26 28
     
    
    27
    -from .job import OperationStage, LeaseState
    
    28
    -
    
    29 29
     
    
    30 30
     class Scheduler:
    
    31 31
     
    
    32 32
         MAX_N_TRIES = 5
    
    33 33
     
    
    34
    -    def __init__(self, action_cache=None):
    
    34
    +    def __init__(self, action_cache=None, monitor=True):
    
    35 35
             self.__logger = logging.getLogger(__name__)
    
    36 36
     
    
    37
    +        self.__queue_times_by_priority = None
    
    38
    +        self.__queue_time_average = None
    
    39
    +        self.__retries_by_error = None
    
    40
    +        self.__retries_count = 0
    
    41
    +
    
    37 42
             self._action_cache = action_cache
    
    38 43
             self.jobs = {}
    
    39 44
             self.queue = deque()
    
    40 45
     
    
    46
    +        self._is_instrumented = monitor
    
    47
    +
    
    48
    +        if self._is_instrumented:
    
    49
    +            self.__queue_time_average = 0, timedelta()
    
    50
    +            self.__queue_times_by_priority = {}
    
    51
    +            self.__retries_by_error = {}
    
    52
    +
    
    53
    +    # --- Public API ---
    
    54
    +
    
    41 55
         def register_client(self, job_name, queue):
    
    42 56
             self.jobs[job_name].register_client(queue)
    
    43 57
     
    
    ... ... @@ -66,18 +80,22 @@ class Scheduler:
    66 80
                 operation_stage = OperationStage.QUEUED
    
    67 81
                 self.queue.append(job)
    
    68 82
     
    
    69
    -        job.update_operation_stage(operation_stage)
    
    83
    +        self._update_job_operation_stage(job.name, operation_stage)
    
    70 84
     
    
    71 85
         def retry_job(self, job_name):
    
    72
    -        if job_name in self.jobs:
    
    73
    -            job = self.jobs[job_name]
    
    74
    -            if job.n_tries >= self.MAX_N_TRIES:
    
    75
    -                # TODO: Decide what to do with these jobs
    
    76
    -                job.update_operation_stage(OperationStage.COMPLETED)
    
    77
    -                # TODO: Mark these jobs as done
    
    78
    -            else:
    
    79
    -                job.update_operation_stage(OperationStage.QUEUED)
    
    80
    -                self.queue.appendleft(job)
    
    86
    +        job = self.jobs[job_name]
    
    87
    +
    
    88
    +        operation_stage = None
    
    89
    +        if job.n_tries >= self.MAX_N_TRIES:
    
    90
    +            # TODO: Decide what to do with these jobs
    
    91
    +            operation_stage = OperationStage.COMPLETED
    
    92
    +            # TODO: Mark these jobs as done
    
    93
    +
    
    94
    +        else:
    
    95
    +            operation_stage = OperationStage.QUEUED
    
    96
    +            self.queue.appendleft(job)
    
    97
    +
    
    98
    +        self._update_job_operation_stage(job_name, operation_stage)
    
    81 99
     
    
    82 100
         def list_jobs(self):
    
    83 101
             return self.jobs.values()
    
    ... ... @@ -112,13 +130,14 @@ class Scheduler:
    112 130
             """
    
    113 131
             job = self.jobs[job_name]
    
    114 132
     
    
    133
    +        operation_stage = None
    
    115 134
             if lease_state == LeaseState.PENDING:
    
    116 135
                 job.update_lease_state(LeaseState.PENDING)
    
    117
    -            job.update_operation_stage(OperationStage.QUEUED)
    
    136
    +            operation_stage = OperationStage.QUEUED
    
    118 137
     
    
    119 138
             elif lease_state == LeaseState.ACTIVE:
    
    120 139
                 job.update_lease_state(LeaseState.ACTIVE)
    
    121
    -            job.update_operation_stage(OperationStage.EXECUTING)
    
    140
    +            operation_stage = OperationStage.EXECUTING
    
    122 141
     
    
    123 142
             elif lease_state == LeaseState.COMPLETED:
    
    124 143
                 job.update_lease_state(LeaseState.COMPLETED,
    
    ... ... @@ -127,7 +146,9 @@ class Scheduler:
    127 146
                 if self._action_cache is not None and not job.do_not_cache:
    
    128 147
                     self._action_cache.update_action_result(job.action_digest, job.action_result)
    
    129 148
     
    
    130
    -            job.update_operation_stage(OperationStage.COMPLETED)
    
    149
    +            operation_stage = OperationStage.COMPLETED
    
    150
    +
    
    151
    +        self._update_job_operation_stage(job_name, operation_stage)
    
    131 152
     
    
    132 153
         def get_job_lease(self, job_name):
    
    133 154
             """Returns the lease associated to job, if any have been emitted yet."""
    
    ... ... @@ -136,3 +157,83 @@ class Scheduler:
    136 157
         def get_job_operation(self, job_name):
    
    137 158
             """Returns the operation associated to job."""
    
    138 159
             return self.jobs[job_name].operation
    
    160
    +
    
    161
    +    # --- Public API: Monitoring ---
    
    162
    +
    
    163
    +    @property
    
    164
    +    def is_instrumented(self):
    
    165
    +        return self._is_instrumented
    
    166
    +
    
    167
    +    def query_n_jobs(self):
    
    168
    +        return len(self.jobs)
    
    169
    +
    
    170
    +    def query_n_operations(self):
    
    171
    +        return len(self.jobs)
    
    172
    +
    
    173
    +    def query_n_operations_by_stage(self):
    
    174
    +        return len(self.jobs)
    
    175
    +
    
    176
    +    def query_n_leases(self):
    
    177
    +        return len(self.jobs)
    
    178
    +
    
    179
    +    def query_n_leases_by_state(self):
    
    180
    +        return len(self.jobs)
    
    181
    +
    
    182
    +    def query_n_retries(self):
    
    183
    +        return self.__retries_count
    
    184
    +
    
    185
    +    def query_n_retries_for_error(self, error_type):
    
    186
    +        try:
    
    187
    +            if self.__retries_by_error is not None:
    
    188
    +                return self.__retries_by_error[error_type]
    
    189
    +        except KeyError:
    
    190
    +            pass
    
    191
    +        return 0
    
    192
    +
    
    193
    +    def query_am_queue_time(self):
    
    194
    +        if self.__queue_time_average is not None:
    
    195
    +            return self.__queue_time_average[1]
    
    196
    +        return 0
    
    197
    +
    
    198
    +    def query_am_queue_time_for_priority(self, priority_level):
    
    199
    +        try:
    
    200
    +            if self.__queue_times_by_priority is not None:
    
    201
    +                return self.__queue_times_by_priority[priority_level]
    
    202
    +        except KeyError:
    
    203
    +            pass
    
    204
    +        return 0
    
    205
    +
    
    206
    +    # --- Private API ---
    
    207
    +
    
    208
    +    def _update_job_operation_stage(self, job_name, operation_stage):
    
    209
    +        """Requests a stage transition for the job's :class:Operations.
    
    210
    +
    
    211
    +        Args:
    
    212
    +            job_name (str): name of the job to query.
    
    213
    +            operation_stage (OperationStage): the stage to transition to.
    
    214
    +        """
    
    215
    +        job = self.jobs[job_name]
    
    216
    +
    
    217
    +        if operation_stage == OperationStage.CACHE_CHECK:
    
    218
    +            job.update_operation_stage(OperationStage.CACHE_CHECK)
    
    219
    +
    
    220
    +        elif operation_stage == OperationStage.QUEUED:
    
    221
    +            job.update_operation_stage(OperationStage.QUEUED)
    
    222
    +
    
    223
    +        elif operation_stage == OperationStage.EXECUTING:
    
    224
    +            job.update_operation_stage(OperationStage.EXECUTING)
    
    225
    +
    
    226
    +        elif operation_stage == OperationStage.COMPLETED:
    
    227
    +            job.update_operation_stage(OperationStage.COMPLETED)
    
    228
    +
    
    229
    +            if self._is_instrumented:
    
    230
    +                average_order, average_time = self.__queue_time_average
    
    231
    +
    
    232
    +                average_order += 1
    
    233
    +                if average_order <= 1:
    
    234
    +                    average_time = job.query_queue_time()
    
    235
    +                else:
    
    236
    +                    queue_time = job.query_queue_time()
    
    237
    +                    average_time = average_time + ((queue_time - average_time) / average_order)
    
    238
    +
    
    239
    +                self.__queue_time_average = average_order, average_time

  • buildgrid/settings.py
    1
    +# Copyright (C) 2018 Bloomberg LP
    
    2
    +#
    
    3
    +# Licensed under the Apache License, Version 2.0 (the "License");
    
    4
    +# you may not use this file except in compliance with the License.
    
    5
    +# You may obtain a copy of the License at
    
    6
    +#
    
    7
    +#  <http://www.apache.org/licenses/LICENSE-2.0>
    
    8
    +#
    
    9
    +# Unless required by applicable law or agreed to in writing, software
    
    10
    +# distributed under the License is distributed on an "AS IS" BASIS,
    
    11
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    
    12
    +# See the License for the specific language governing permissions and
    
    13
    +# limitations under the License.
    
    14
    +
    
    15
    +
    
    1 16
     import hashlib
    
    2 17
     
    
    3 18
     
    
    4
    -# The hash function that CAS uses
    
    19
    +# Hash function used for computing digests:
    
    5 20
     HASH = hashlib.sha256
    
    21
    +
    
    22
    +# Lenght in bytes of a hash string returned by HASH:
    
    6 23
     HASH_LENGTH = HASH().digest_size * 2
    
    24
    +
    
    25
    +# Period, in seconds, for the monitoring cycle:
    
    26
    +MONITORING_PERIOD = 5.0

  • setup.py
    ... ... @@ -112,13 +112,15 @@ setup(
    112 112
         license="Apache License, Version 2.0",
    
    113 113
         description="A remote execution service",
    
    114 114
         packages=find_packages(),
    
    115
    +    python_requires='>= 3.5.3',  # janus requirement
    
    115 116
         install_requires=[
    
    116
    -        'protobuf',
    
    117
    -        'grpcio',
    
    118
    -        'Click',
    
    119
    -        'PyYAML',
    
    120 117
             'boto3 < 1.8.0',
    
    121 118
             'botocore < 1.11.0',
    
    119
    +        'click',
    
    120
    +        'grpcio',
    
    121
    +        'janus',
    
    122
    +        'protobuf',
    
    123
    +        'pyyaml',
    
    122 124
         ],
    
    123 125
         entry_points={
    
    124 126
             'console_scripts': [
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]