[Notes] [Git][BuildGrid/buildgrid][master] Downloader._write_directory(): request dirs in batches



Title: GitLab

Martin Blanchard pushed to branch master at BuildGrid / buildgrid

Commits:

1 changed file:

Changes:

  • buildgrid/client/cas.py
    ... ... @@ -25,7 +25,7 @@ from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_p
    25 25
     from buildgrid._protos.google.bytestream import bytestream_pb2, bytestream_pb2_grpc
    
    26 26
     from buildgrid._protos.google.rpc import code_pb2
    
    27 27
     from buildgrid.settings import HASH, MAX_REQUEST_SIZE, MAX_REQUEST_COUNT
    
    28
    -from buildgrid.utils import merkle_tree_maker
    
    28
    +from buildgrid.utils import create_digest, merkle_tree_maker
    
    29 29
     
    
    30 30
     # Maximum size for a queueable file:
    
    31 31
     FILE_SIZE_THRESHOLD = 1 * 1024 * 1024
    
    ... ... @@ -418,9 +418,8 @@ class Downloader:
    418 418
                     else:
    
    419 419
                         assert False
    
    420 420
     
    
    421
    -        # TODO: Try with BatchReadBlobs().
    
    422
    -
    
    423
    -        # Fallback to Read() if no GetTree():
    
    421
    +        # If no GetTree(), _write_directory() will use BatchReadBlobs()
    
    422
    +        # if available or Read() if not.
    
    424 423
             if not directory_fetched:
    
    425 424
                 directory = remote_execution_pb2.Directory()
    
    426 425
                 directory.ParseFromString(self._fetch_blob(digest))
    
    ... ... @@ -428,26 +427,61 @@ class Downloader:
    428 427
                 self._write_directory(directory, directory_path,
    
    429 428
                                       root_barrier=directory_path)
    
    430 429
     
    
    431
    -    def _write_directory(self, root_directory, root_path, directories=None, root_barrier=None):
    
    430
    +    def _write_directory(self, root_directory, root_path, directories=None,
    
    431
    +                         root_barrier=None):
    
    432 432
             """Generates a local directory structure"""
    
    433
    +
    
    434
    +        # i) Files:
    
    433 435
             for file_node in root_directory.files:
    
    434 436
                 file_path = os.path.join(root_path, file_node.name)
    
    435 437
     
    
    436
    -            self._queue_file(file_node.digest, file_path, is_executable=file_node.is_executable)
    
    438
    +            self._queue_file(file_node.digest, file_path,
    
    439
    +                             is_executable=file_node.is_executable)
    
    437 440
     
    
    441
    +        # ii) Directories:
    
    442
    +        pending_directory_digests = []
    
    443
    +        pending_directory_paths = {}
    
    438 444
             for directory_node in root_directory.directories:
    
    445
    +            directory_hash = directory_node.digest.hash
    
    446
    +
    
    439 447
                 directory_path = os.path.join(root_path, directory_node.name)
    
    448
    +            os.makedirs(directory_path, exist_ok=True)
    
    449
    +
    
    440 450
                 if directories and directory_node.digest.hash in directories:
    
    441
    -                directory = directories[directory_node.digest.hash]
    
    451
    +                # We already have the directory; just write it:
    
    452
    +                directory = directories[directory_hash]
    
    453
    +
    
    454
    +                self._write_directory(directory, directory_path,
    
    455
    +                                      directories=directories,
    
    456
    +                                      root_barrier=root_barrier)
    
    442 457
                 else:
    
    458
    +                # Gather all the directories that we need to get to
    
    459
    +                # try fetching them in a single batch request:
    
    460
    +                pending_directory_digests.append(directory_node.digest)
    
    461
    +                pending_directory_paths[directory_hash] = directory_path
    
    462
    +
    
    463
    +        if pending_directory_paths:
    
    464
    +            fetched_blobs = self._fetch_blob_batch(pending_directory_digests)
    
    465
    +
    
    466
    +            for directory_blob in fetched_blobs:
    
    443 467
                     directory = remote_execution_pb2.Directory()
    
    444
    -                directory.ParseFromString(self._fetch_blob(directory_node.digest))
    
    468
    +                directory.ParseFromString(directory_blob)
    
    445 469
     
    
    446
    -            os.makedirs(directory_path, exist_ok=True)
    
    470
    +                # Assuming that the server might not return the blobs in
    
    471
    +                # the same order than they were asked for, we read
    
    472
    +                # the hashes of the returned blobs:
    
    473
    +                directory_hash = create_digest(directory_blob).hash
    
    474
    +                # Guarantees for the reply orderings might change in
    
    475
    +                # the specification at some point.
    
    476
    +                # See: github.com/bazelbuild/remote-apis/issues/52
    
    447 477
     
    
    448
    -            self._write_directory(directory, directory_path,
    
    449
    -                                  directories=directories, root_barrier=root_barrier)
    
    478
    +                directory_path = pending_directory_paths[directory_hash]
    
    479
    +
    
    480
    +                self._write_directory(directory, directory_path,
    
    481
    +                                      directories=directories,
    
    482
    +                                      root_barrier=root_barrier)
    
    450 483
     
    
    484
    +        # iii) Symlinks:
    
    451 485
             for symlink_node in root_directory.symlinks:
    
    452 486
                 symlink_path = os.path.join(root_path, symlink_node.name)
    
    453 487
                 if not os.path.isabs(symlink_node.target):
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]