[Notes] [Git][BuildGrid/buildgrid][santi/146-digest-multiple-copies] 2 c

Santiago Gil pushed to branch santi/146-digest-multiple-copies at BuildGrid / buildgrid

Commits:

e97fdc93

by Santiago Gil at 2019-01-25T09:55:23Z

Add files to the `tests/cas/data/` directory

This allows tests to consider more complex directory structures.

(This is the tests/ directory mentioned in
https://gitlab.com/BuildGrid/buildgrid/issues/146).

4178286c

by Santiago Gil at 2019-01-25T10:01:39Z

Downloader.download_directory(): allow digest to multiple paths

These changes refactor the request queue, adding support for the
scenario where we want to download multiple copies of a file with the
same digest. (Previously only one instance of that file was created,
which resulted in files missing from the output.)

Addresses https://gitlab.com/BuildGrid/buildgrid/issues/146

5 changed files:

buildgrid/client/cas.py
+ tests/cas/data/hello/docs/reference/api.xml
+ tests/cas/data/hello/utils/hello.h
+ tests/cas/data/hello/utils/hello2.sh
tests/cas/test_client.py

Changes:

buildgrid/client/cas.py

@@ -12,10 +12,10 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
+-
 +from collections import namedtuple
  from contextlib import contextmanager
 -import uuid
  import os
 +import uuid
  import grpc
@@ -26,10 +26,11 @@ from buildgrid._protos.google.rpc import code_pb2
  from buildgrid.settings import HASH, MAX_REQUEST_SIZE, MAX_REQUEST_COUNT
  from buildgrid.utils import merkle_tree_maker
+-
  # Maximum size for a queueable file:
  FILE_SIZE_THRESHOLD = 1 * 1024 * 1024
 +_FileRequest = namedtuple('FileRequest', ['digest', 'output_paths'])
++
  class _CallCache:
      """Per remote grpc.StatusCode.UNIMPLEMENTED call cache."""
@@ -339,24 +340,41 @@ class Downloader:
          elif self.__file_request_count >= MAX_REQUEST_COUNT:
              self.flush()
 -        self.__file_requests[digest.hash] = (digest, file_path, is_executable)
 -        self.__file_request_count += 1
 -        self.__file_request_size += digest.ByteSize()
 -        self.__file_response_size += digest.size_bytes
 +        output_path = (file_path, is_executable)
++
 +        # When queueing a file we take into account the cases where
 +        # we might want to download the same digest to different paths.
 +        if digest.hash not in self.__file_requests:
 +            request = _FileRequest(digest=digest, output_paths=[output_path])
 +            self.__file_requests[digest.hash] = request
 -    def _fetch_file_batch(self, batch):
 -        """Sends queued data using ContentAddressableStorage.BatchReadBlobs()"""
 -        batch_digests = [digest for digest, _, _ in batch.values()]
 +            self.__file_request_count += 1
 +            self.__file_request_size += digest.ByteSize()
 +            self.__file_response_size += digest.size_bytes
 +        else:
 +            # We already have that hash queued; we'll fetch the blob
 +            # once and write copies of it:
 +            self.__file_requests[digest.hash].output_paths.append(output_path)
++
 +    def _fetch_file_batch(self, requests):
 +        """Sends queued data using ContentAddressableStorage.BatchReadBlobs().
++
 +        Takes a dictionary (digest.hash, _FileRequest) as input.
 +        """
 +        batch_digests = [request.digest for request in requests.values()]
          batch_blobs = self._fetch_blob_batch(batch_digests)
 -        for (_, file_path, is_executable), file_blob in zip(batch.values(), batch_blobs):
 -            os.makedirs(os.path.dirname(file_path), exist_ok=True)
 +        for file_digest, file_blob in zip(batch_digests, batch_blobs):
 +            output_paths = requests[file_digest.hash].output_paths
++
 +            for file_path, is_executable in output_paths:
 +                os.makedirs(os.path.dirname(file_path), exist_ok=True)
 -            with open(file_path, 'wb') as byte_file:
 -                byte_file.write(file_blob)
 +                with open(file_path, 'wb') as byte_file:
 +                    byte_file.write(file_blob)
 -            if is_executable:
 -                os.chmod(file_path, 0o755)  # rwxr-xr-x / 755
 +                if is_executable:
 +                    os.chmod(file_path, 0o755)  # rwxr-xr-x / 755
      def _fetch_directory(self, digest, directory_path):
          """Fetches a file using ByteStream.GetTree()"""

tests/cas/data/hello/docs/reference/api.xml

tests/cas/data/hello/utils/hello.h

+#define HELLO_WORLD "Hello, World!"

tests/cas/data/hello/utils/hello2.sh

 +#!/bin/bash
++
 +echo "Hello, World!"

tests/cas/test_client.py

@@ -44,12 +44,18 @@ FILES = [
      (os.path.join(DATA_DIR, 'void'),),
      (os.path.join(DATA_DIR, 'hello.cc'),),
      (os.path.join(DATA_DIR, 'hello', 'hello.c'),
 -     os.path.join(DATA_DIR, 'hello', 'hello.h'))]
 +     os.path.join(DATA_DIR, 'hello', 'hello.h'),
 +     os.path.join(DATA_DIR, 'hello', 'hello.sh')),
 +    (os.path.join(DATA_DIR, 'hello', 'docs', 'reference', 'api.xml'),)]
  FOLDERS = [
 -    (os.path.join(DATA_DIR, 'hello'),)]
 -DIRECTORIES = [
 +    (DATA_DIR,),
      (os.path.join(DATA_DIR, 'hello'),),
 -    (os.path.join(DATA_DIR, 'hello'), DATA_DIR)]
 +    (os.path.join(DATA_DIR, 'hello', 'docs'),),
 +    (os.path.join(DATA_DIR, 'hello', 'utils'),),
 +    (os.path.join(DATA_DIR, 'hello', 'docs', 'reference'),)]
 +DIRECTORIES = [
 +    (DATA_DIR,),
 +    (os.path.join(DATA_DIR, 'hello'),)]
  @pytest.mark.parametrize('blobs', BLOBS)

[Notes] [Git][BuildGrid/buildgrid][santi/146-digest-multiple-copies] 2 commits: Add files to the `tests/cas/data/` directory

Santiago Gil pushed to branch santi/146-digest-multiple-copies at BuildGrid / buildgrid

Commits:

5 changed files:

Changes: