Martin Blanchard pushed to branch master at BuildGrid / buildgrid
Commits:
-
f12ec81d
by Martin Blanchard at 2018-09-06T14:28:35Z
-
32a80c82
by Martin Blanchard at 2018-09-10T12:42:15Z
-
b3cc82a0
by Martin Blanchard at 2018-09-10T12:42:17Z
-
2c363d24
by Martin Blanchard at 2018-09-10T12:42:17Z
-
9948d3a7
by Martin Blanchard at 2018-09-10T12:42:17Z
3 changed files:
Changes:
| ... | ... | @@ -19,71 +19,94 @@ import tempfile |
| 19 | 19 |
|
| 20 | 20 |
from google.protobuf import any_pb2
|
| 21 | 21 |
|
| 22 |
-from buildgrid.utils import read_file, create_digest, write_fetch_directory, parse_to_pb2_from_fetch
|
|
| 23 |
-from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
| 22 |
+from buildgrid.client.cas import upload
|
|
| 23 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
|
|
| 24 | 24 |
from buildgrid._protos.google.bytestream import bytestream_pb2_grpc
|
| 25 |
+from buildgrid.utils import write_fetch_directory, parse_to_pb2_from_fetch
|
|
| 26 |
+from buildgrid.utils import output_file_maker, output_directory_maker
|
|
| 25 | 27 |
|
| 26 | 28 |
|
| 27 | 29 |
def work_temp_directory(context, lease):
|
| 28 |
- """ Bot downloads directories and files into a temp directory,
|
|
| 29 |
- then uploads results back to CAS
|
|
| 30 |
+ """Executes a lease for a build action, using host tools.
|
|
| 30 | 31 |
"""
|
| 31 | 32 |
|
| 32 |
- parent = context.parent
|
|
| 33 | 33 |
stub_bytestream = bytestream_pb2_grpc.ByteStreamStub(context.cas_channel)
|
| 34 |
+ instance_name = context.parent
|
|
| 35 |
+ logger = context.logger
|
|
| 34 | 36 |
|
| 35 | 37 |
action_digest = remote_execution_pb2.Digest()
|
| 36 | 38 |
lease.payload.Unpack(action_digest)
|
| 37 | 39 |
|
| 38 |
- action = remote_execution_pb2.Action()
|
|
| 40 |
+ action = parse_to_pb2_from_fetch(remote_execution_pb2.Action(),
|
|
| 41 |
+ stub_bytestream, action_digest, instance_name)
|
|
| 39 | 42 |
|
| 40 |
- action = parse_to_pb2_from_fetch(action, stub_bytestream, action_digest, parent)
|
|
| 43 |
+ with tempfile.TemporaryDirectory() as temp_directory:
|
|
| 44 |
+ command = parse_to_pb2_from_fetch(remote_execution_pb2.Command(),
|
|
| 45 |
+ stub_bytestream, action.command_digest, instance_name)
|
|
| 41 | 46 |
|
| 42 |
- with tempfile.TemporaryDirectory() as temp_dir:
|
|
| 47 |
+ write_fetch_directory(temp_directory, stub_bytestream,
|
|
| 48 |
+ action.input_root_digest, instance_name)
|
|
| 43 | 49 |
|
| 44 |
- command = remote_execution_pb2.Command()
|
|
| 45 |
- command = parse_to_pb2_from_fetch(command, stub_bytestream, action.command_digest, parent)
|
|
| 46 |
- |
|
| 47 |
- arguments = "cd {} &&".format(temp_dir)
|
|
| 50 |
+ environment = os.environ.copy()
|
|
| 51 |
+ for variable in command.environment_variables:
|
|
| 52 |
+ if variable.name not in ['PATH', 'PWD']:
|
|
| 53 |
+ environment[variable.name] = variable.value
|
|
| 48 | 54 |
|
| 55 |
+ command_line = list()
|
|
| 49 | 56 |
for argument in command.arguments:
|
| 50 |
- arguments += " {}".format(argument)
|
|
| 51 |
- |
|
| 52 |
- context.logger.info(arguments)
|
|
| 53 |
- |
|
| 54 |
- write_fetch_directory(temp_dir, stub_bytestream, action.input_root_digest, parent)
|
|
| 55 |
- |
|
| 56 |
- proc = subprocess.Popen(arguments,
|
|
| 57 |
- shell=True,
|
|
| 58 |
- stdin=subprocess.PIPE,
|
|
| 59 |
- stdout=subprocess.PIPE)
|
|
| 60 |
- |
|
| 61 |
- # TODO: Should return the std_out to the user
|
|
| 62 |
- proc.communicate()
|
|
| 63 |
- |
|
| 64 |
- result = remote_execution_pb2.ActionResult()
|
|
| 65 |
- requests = []
|
|
| 66 |
- for output_file in command.output_files:
|
|
| 67 |
- path = os.path.join(temp_dir, output_file)
|
|
| 68 |
- chunk = read_file(path)
|
|
| 69 |
- |
|
| 70 |
- digest = create_digest(chunk)
|
|
| 71 |
- |
|
| 72 |
- result.output_files.extend([remote_execution_pb2.OutputFile(path=output_file,
|
|
| 73 |
- digest=digest)])
|
|
| 74 |
- |
|
| 75 |
- requests.append(remote_execution_pb2.BatchUpdateBlobsRequest.Request(
|
|
| 76 |
- digest=digest, data=chunk))
|
|
| 77 |
- |
|
| 78 |
- request = remote_execution_pb2.BatchUpdateBlobsRequest(instance_name=parent,
|
|
| 79 |
- requests=requests)
|
|
| 80 |
- |
|
| 81 |
- stub_cas = remote_execution_pb2_grpc.ContentAddressableStorageStub(context.cas_channel)
|
|
| 82 |
- stub_cas.BatchUpdateBlobs(request)
|
|
| 83 |
- |
|
| 84 |
- result_any = any_pb2.Any()
|
|
| 85 |
- result_any.Pack(result)
|
|
| 86 |
- |
|
| 87 |
- lease.result.CopyFrom(result_any)
|
|
| 57 |
+ command_line.append(argument.strip())
|
|
| 58 |
+ |
|
| 59 |
+ working_directory = None
|
|
| 60 |
+ if command.working_directory:
|
|
| 61 |
+ working_directory = os.path.join(temp_directory,
|
|
| 62 |
+ command.working_directory)
|
|
| 63 |
+ os.makedirs(working_directory, exist_ok=True)
|
|
| 64 |
+ else:
|
|
| 65 |
+ working_directory = temp_directory
|
|
| 66 |
+ |
|
| 67 |
+ # Ensure that output files structure exists:
|
|
| 68 |
+ for output_path in command.output_files:
|
|
| 69 |
+ directory_path = os.path.join(working_directory,
|
|
| 70 |
+ os.path.dirname(output_path))
|
|
| 71 |
+ os.makedirs(directory_path, exist_ok=True)
|
|
| 72 |
+ |
|
| 73 |
+ logger.debug(' '.join(command_line))
|
|
| 74 |
+ |
|
| 75 |
+ process = subprocess.Popen(command_line,
|
|
| 76 |
+ cwd=working_directory,
|
|
| 77 |
+ universal_newlines=True,
|
|
| 78 |
+ env=environment,
|
|
| 79 |
+ stdin=subprocess.PIPE,
|
|
| 80 |
+ stdout=subprocess.PIPE)
|
|
| 81 |
+ # TODO: Should return the stdout and stderr in the ActionResult.
|
|
| 82 |
+ process.communicate()
|
|
| 83 |
+ |
|
| 84 |
+ action_result = remote_execution_pb2.ActionResult()
|
|
| 85 |
+ |
|
| 86 |
+ with upload(context.cas_channel, instance=instance_name) as cas:
|
|
| 87 |
+ for output_path in command.output_files:
|
|
| 88 |
+ file_path = os.path.join(working_directory, output_path)
|
|
| 89 |
+ # Missing outputs should simply be omitted in ActionResult:
|
|
| 90 |
+ if not os.path.isfile(file_path):
|
|
| 91 |
+ continue
|
|
| 92 |
+ |
|
| 93 |
+ output_file = output_file_maker(file_path, working_directory, cas=cas)
|
|
| 94 |
+ action_result.output_files.extend([output_file])
|
|
| 95 |
+ |
|
| 96 |
+ for output_path in command.output_directories:
|
|
| 97 |
+ directory_path = os.path.join(working_directory, output_path)
|
|
| 98 |
+ # Missing outputs should simply be omitted in ActionResult:
|
|
| 99 |
+ if not os.path.isdir(directory_path):
|
|
| 100 |
+ continue
|
|
| 101 |
+ |
|
| 102 |
+ # OutputDirectory.path should be relative to the working direcory:
|
|
| 103 |
+ output_directory = output_directory_maker(directory_path, working_directory, cas=cas)
|
|
| 104 |
+ |
|
| 105 |
+ action_result.output_directories.extend([output_directory])
|
|
| 106 |
+ |
|
| 107 |
+ action_result_any = any_pb2.Any()
|
|
| 108 |
+ action_result_any.Pack(action_result)
|
|
| 109 |
+ |
|
| 110 |
+ lease.result.CopyFrom(action_result_any)
|
|
| 88 | 111 |
|
| 89 | 112 |
return lease
|
| 1 |
+# Copyright (C) 2018 Bloomberg LP
|
|
| 2 |
+#
|
|
| 3 |
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
| 4 |
+# you may not use this file except in compliance with the License.
|
|
| 5 |
+# You may obtain a copy of the License at
|
|
| 6 |
+#
|
|
| 7 |
+# <http://www.apache.org/licenses/LICENSE-2.0>
|
|
| 8 |
+#
|
|
| 9 |
+# Unless required by applicable law or agreed to in writing, software
|
|
| 10 |
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
| 11 |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
| 12 |
+# See the License for the specific language governing permissions and
|
|
| 13 |
+# limitations under the License.
|
|
| 14 |
+ |
|
| 15 |
+ |
|
| 16 |
+from contextlib import contextmanager
|
|
| 17 |
+import uuid
|
|
| 18 |
+import os
|
|
| 19 |
+ |
|
| 20 |
+from buildgrid.settings import HASH
|
|
| 21 |
+from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2, remote_execution_pb2_grpc
|
|
| 22 |
+from buildgrid._protos.google.bytestream import bytestream_pb2, bytestream_pb2_grpc
|
|
| 23 |
+ |
|
| 24 |
+ |
|
| 25 |
+@contextmanager
|
|
| 26 |
+def upload(channel, instance=None, u_uid=None):
|
|
| 27 |
+ uploader = Uploader(channel, instance=instance, u_uid=u_uid)
|
|
| 28 |
+ try:
|
|
| 29 |
+ yield uploader
|
|
| 30 |
+ finally:
|
|
| 31 |
+ uploader.flush()
|
|
| 32 |
+ |
|
| 33 |
+ |
|
| 34 |
+class Uploader:
|
|
| 35 |
+ """Remote CAS files, directories and messages upload helper.
|
|
| 36 |
+ |
|
| 37 |
+ The :class:`Uploader` class comes with a generator factory function that can
|
|
| 38 |
+ be used together with the `with` statement for context management::
|
|
| 39 |
+ |
|
| 40 |
+ with upload(channel, instance='build') as cas:
|
|
| 41 |
+ cas.upload_file('/path/to/local/file')
|
|
| 42 |
+ |
|
| 43 |
+ Attributes:
|
|
| 44 |
+ FILE_SIZE_THRESHOLD (int): maximum size for a queueable file.
|
|
| 45 |
+ MAX_REQUEST_SIZE (int): maximum size for a single gRPC request.
|
|
| 46 |
+ """
|
|
| 47 |
+ |
|
| 48 |
+ FILE_SIZE_THRESHOLD = 1 * 1024 * 1024
|
|
| 49 |
+ MAX_REQUEST_SIZE = 2 * 1024 * 1024
|
|
| 50 |
+ |
|
| 51 |
+ def __init__(self, channel, instance=None, u_uid=None):
|
|
| 52 |
+ """Initializes a new :class:`Uploader` instance.
|
|
| 53 |
+ |
|
| 54 |
+ Args:
|
|
| 55 |
+ channel (grpc.Channel): A gRPC channel to the CAS endpoint.
|
|
| 56 |
+ instance (str, optional): the targeted instance's name.
|
|
| 57 |
+ u_uid (str, optional): a UUID for CAS transactions.
|
|
| 58 |
+ """
|
|
| 59 |
+ self.channel = channel
|
|
| 60 |
+ |
|
| 61 |
+ self.instance_name = instance
|
|
| 62 |
+ if u_uid is not None:
|
|
| 63 |
+ self.u_uid = u_uid
|
|
| 64 |
+ else:
|
|
| 65 |
+ self.u_uid = str(uuid.uuid4())
|
|
| 66 |
+ |
|
| 67 |
+ self.__bytestream_stub = bytestream_pb2_grpc.ByteStreamStub(self.channel)
|
|
| 68 |
+ self.__cas_stub = remote_execution_pb2_grpc.ContentAddressableStorageStub(self.channel)
|
|
| 69 |
+ |
|
| 70 |
+ self.__requests = dict()
|
|
| 71 |
+ self.__request_size = 0
|
|
| 72 |
+ |
|
| 73 |
+ def upload_file(self, file_path, queue=True):
|
|
| 74 |
+ """Stores a local file into the remote CAS storage.
|
|
| 75 |
+ |
|
| 76 |
+ If queuing is allowed (`queue=True`), the upload request **may** be
|
|
| 77 |
+ defer. An explicit call to :method:`flush` can force the request to be
|
|
| 78 |
+ send immediately (allong with the rest of the queued batch).
|
|
| 79 |
+ |
|
| 80 |
+ Args:
|
|
| 81 |
+ file_path (str): absolute or relative path to a local file.
|
|
| 82 |
+ queue (bool, optional): wheter or not the upload request may be
|
|
| 83 |
+ queued and submitted as part of a batch upload request. Defaults
|
|
| 84 |
+ to True.
|
|
| 85 |
+ |
|
| 86 |
+ Returns:
|
|
| 87 |
+ :obj:`Digest`: The digest of the file's content.
|
|
| 88 |
+ |
|
| 89 |
+ Raises:
|
|
| 90 |
+ OSError: If `file_path` does not exist or is not readable.
|
|
| 91 |
+ """
|
|
| 92 |
+ if not os.path.isabs(file_path):
|
|
| 93 |
+ file_path = os.path.abspath(file_path)
|
|
| 94 |
+ |
|
| 95 |
+ with open(file_path, 'rb') as bytes_steam:
|
|
| 96 |
+ file_bytes = bytes_steam.read()
|
|
| 97 |
+ |
|
| 98 |
+ if not queue or len(file_bytes) > Uploader.FILE_SIZE_THRESHOLD:
|
|
| 99 |
+ blob_digest = self._send_blob(file_bytes)
|
|
| 100 |
+ else:
|
|
| 101 |
+ blob_digest = self._queue_blob(file_bytes)
|
|
| 102 |
+ |
|
| 103 |
+ return blob_digest
|
|
| 104 |
+ |
|
| 105 |
+ def upload_directory(self, directory, queue=True):
|
|
| 106 |
+ """Stores a :obj:`Directory` into the remote CAS storage.
|
|
| 107 |
+ |
|
| 108 |
+ If queuing is allowed (`queue=True`), the upload request **may** be
|
|
| 109 |
+ defer. An explicit call to :method:`flush` can force the request to be
|
|
| 110 |
+ send immediately (allong with the rest of the queued batch).
|
|
| 111 |
+ |
|
| 112 |
+ Args:
|
|
| 113 |
+ directory (:obj:`Directory`): a :obj:`Directory` object.
|
|
| 114 |
+ queue (bool, optional): wheter or not the upload request may be
|
|
| 115 |
+ queued and submitted as part of a batch upload request. Defaults
|
|
| 116 |
+ to True.
|
|
| 117 |
+ |
|
| 118 |
+ Returns:
|
|
| 119 |
+ :obj:`Digest`: The digest of the :obj:`Directory`.
|
|
| 120 |
+ """
|
|
| 121 |
+ if not isinstance(directory, remote_execution_pb2.Directory):
|
|
| 122 |
+ raise TypeError
|
|
| 123 |
+ |
|
| 124 |
+ if not queue:
|
|
| 125 |
+ return self._send_blob(directory.SerializeToString())
|
|
| 126 |
+ else:
|
|
| 127 |
+ return self._queue_blob(directory.SerializeToString())
|
|
| 128 |
+ |
|
| 129 |
+ def send_message(self, message):
|
|
| 130 |
+ """Stores a message into the remote CAS storage.
|
|
| 131 |
+ |
|
| 132 |
+ Args:
|
|
| 133 |
+ message (:obj:`Message`): a protobuf message object.
|
|
| 134 |
+ |
|
| 135 |
+ Returns:
|
|
| 136 |
+ :obj:`Digest`: The digest of the message.
|
|
| 137 |
+ """
|
|
| 138 |
+ return self._send_blob(message.SerializeToString())
|
|
| 139 |
+ |
|
| 140 |
+ def flush(self):
|
|
| 141 |
+ """Ensures any queued request gets sent."""
|
|
| 142 |
+ if self.__requests:
|
|
| 143 |
+ self._send_batch()
|
|
| 144 |
+ |
|
| 145 |
+ def _queue_blob(self, blob):
|
|
| 146 |
+ """Queues a memory block for later batch upload"""
|
|
| 147 |
+ blob_digest = remote_execution_pb2.Digest()
|
|
| 148 |
+ blob_digest.hash = HASH(blob).hexdigest()
|
|
| 149 |
+ blob_digest.size_bytes = len(blob)
|
|
| 150 |
+ |
|
| 151 |
+ if self.__request_size + len(blob) > Uploader.MAX_REQUEST_SIZE:
|
|
| 152 |
+ self._send_batch()
|
|
| 153 |
+ |
|
| 154 |
+ update_request = remote_execution_pb2.BatchUpdateBlobsRequest.Request()
|
|
| 155 |
+ update_request.digest.CopyFrom(blob_digest)
|
|
| 156 |
+ update_request.data = blob
|
|
| 157 |
+ |
|
| 158 |
+ update_request_size = update_request.ByteSize()
|
|
| 159 |
+ if self.__request_size + update_request_size > Uploader.MAX_REQUEST_SIZE:
|
|
| 160 |
+ self._send_batch()
|
|
| 161 |
+ |
|
| 162 |
+ self.__requests[update_request.digest.hash] = update_request
|
|
| 163 |
+ self.__request_size += update_request_size
|
|
| 164 |
+ |
|
| 165 |
+ return blob_digest
|
|
| 166 |
+ |
|
| 167 |
+ def _send_blob(self, blob):
|
|
| 168 |
+ """Sends a memory block using ByteStream.Write()"""
|
|
| 169 |
+ blob_digest = remote_execution_pb2.Digest()
|
|
| 170 |
+ blob_digest.hash = HASH(blob).hexdigest()
|
|
| 171 |
+ blob_digest.size_bytes = len(blob)
|
|
| 172 |
+ |
|
| 173 |
+ if self.instance_name is not None:
|
|
| 174 |
+ resource_name = '/'.join([self.instance_name, 'uploads', self.u_uid, 'blobs',
|
|
| 175 |
+ blob_digest.hash, str(blob_digest.size_bytes)])
|
|
| 176 |
+ else:
|
|
| 177 |
+ resource_name = '/'.join(['uploads', self.u_uid, 'blobs',
|
|
| 178 |
+ blob_digest.hash, str(blob_digest.size_bytes)])
|
|
| 179 |
+ |
|
| 180 |
+ def __write_request_stream(resource, content):
|
|
| 181 |
+ offset = 0
|
|
| 182 |
+ finished = False
|
|
| 183 |
+ remaining = len(content)
|
|
| 184 |
+ while not finished:
|
|
| 185 |
+ chunk_size = min(remaining, 64 * 1024)
|
|
| 186 |
+ remaining -= chunk_size
|
|
| 187 |
+ |
|
| 188 |
+ request = bytestream_pb2.WriteRequest()
|
|
| 189 |
+ request.resource_name = resource
|
|
| 190 |
+ request.data = content[offset:offset + chunk_size]
|
|
| 191 |
+ request.write_offset = offset
|
|
| 192 |
+ request.finish_write = remaining <= 0
|
|
| 193 |
+ |
|
| 194 |
+ yield request
|
|
| 195 |
+ |
|
| 196 |
+ offset += chunk_size
|
|
| 197 |
+ finished = request.finish_write
|
|
| 198 |
+ |
|
| 199 |
+ write_resquests = __write_request_stream(resource_name, blob)
|
|
| 200 |
+ # TODO: Handle connection loss/recovery using QueryWriteStatus()
|
|
| 201 |
+ write_response = self.__bytestream_stub.Write(write_resquests)
|
|
| 202 |
+ |
|
| 203 |
+ assert write_response.committed_size == blob_digest.size_bytes
|
|
| 204 |
+ |
|
| 205 |
+ return blob_digest
|
|
| 206 |
+ |
|
| 207 |
+ def _send_batch(self):
|
|
| 208 |
+ """Sends queued data using ContentAddressableStorage.BatchUpdateBlobs()"""
|
|
| 209 |
+ batch_request = remote_execution_pb2.BatchUpdateBlobsRequest()
|
|
| 210 |
+ batch_request.requests.extend(self.__requests.values())
|
|
| 211 |
+ if self.instance_name is not None:
|
|
| 212 |
+ batch_request.instance_name = self.instance_name
|
|
| 213 |
+ |
|
| 214 |
+ batch_response = self.__cas_stub.BatchUpdateBlobs(batch_request)
|
|
| 215 |
+ |
|
| 216 |
+ for response in batch_response.responses:
|
|
| 217 |
+ assert response.digest.hash in self.__requests
|
|
| 218 |
+ assert response.status.code is 0
|
|
| 219 |
+ |
|
| 220 |
+ self.__requests.clear()
|
|
| 221 |
+ self.__request_size = 0
|
| ... | ... | @@ -13,6 +13,7 @@ |
| 13 | 13 |
# limitations under the License.
|
| 14 | 14 |
|
| 15 | 15 |
|
| 16 |
+from operator import attrgetter
|
|
| 16 | 17 |
import os
|
| 17 | 18 |
|
| 18 | 19 |
from buildgrid.settings import HASH
|
| ... | ... | @@ -31,30 +32,59 @@ def gen_fetch_blob(stub, digest, instance_name=""): |
| 31 | 32 |
yield response.data
|
| 32 | 33 |
|
| 33 | 34 |
|
| 34 |
-def write_fetch_directory(directory, stub, digest, instance_name=""):
|
|
| 35 |
- """ Given a directory digest, fetches files and writes them to a directory
|
|
| 35 |
+def write_fetch_directory(root_directory, stub, digest, instance_name=None):
|
|
| 36 |
+ """Locally replicates a directory from CAS.
|
|
| 37 |
+ |
|
| 38 |
+ Args:
|
|
| 39 |
+ root_directory (str): local directory to populate.
|
|
| 40 |
+ stub (): gRPC stub for CAS communication.
|
|
| 41 |
+ digest (Digest): digest for the directory to fetch from CAS.
|
|
| 42 |
+ instance_name (str, optional): farm instance name to query data from.
|
|
| 36 | 43 |
"""
|
| 37 |
- # TODO: Extend to symlinks and inner directories
|
|
| 38 |
- # pathlib.Path('/my/directory').mkdir(parents=True, exist_ok=True)
|
|
| 44 |
+ if not os.path.isabs(root_directory):
|
|
| 45 |
+ root_directory = os.path.abspath(root_directory)
|
|
| 46 |
+ if not os.path.exists(root_directory):
|
|
| 47 |
+ os.makedirs(root_directory, exist_ok=True)
|
|
| 39 | 48 |
|
| 40 |
- directory_pb2 = remote_execution_pb2.Directory()
|
|
| 41 |
- directory_pb2 = parse_to_pb2_from_fetch(directory_pb2, stub, digest, instance_name)
|
|
| 49 |
+ directory = parse_to_pb2_from_fetch(remote_execution_pb2.Directory(),
|
|
| 50 |
+ stub, digest, instance_name)
|
|
| 51 |
+ |
|
| 52 |
+ for directory_node in directory.directories:
|
|
| 53 |
+ child_path = os.path.join(root_directory, directory_node.name)
|
|
| 54 |
+ |
|
| 55 |
+ write_fetch_directory(child_path, stub, directory_node.digest, instance_name)
|
|
| 56 |
+ |
|
| 57 |
+ for file_node in directory.files:
|
|
| 58 |
+ child_path = os.path.join(root_directory, file_node.name)
|
|
| 59 |
+ |
|
| 60 |
+ with open(child_path, 'wb') as child_file:
|
|
| 61 |
+ write_fetch_blob(child_file, stub, file_node.digest, instance_name)
|
|
| 62 |
+ |
|
| 63 |
+ for symlink_node in directory.symlinks:
|
|
| 64 |
+ child_path = os.path.join(root_directory, symlink_node.name)
|
|
| 65 |
+ |
|
| 66 |
+ if os.path.isabs(symlink_node.target):
|
|
| 67 |
+ continue # No out of temp-directory links for now.
|
|
| 68 |
+ target_path = os.path.join(root_directory, symlink_node.target)
|
|
| 69 |
+ |
|
| 70 |
+ os.symlink(child_path, target_path)
|
|
| 42 | 71 |
|
| 43 |
- for file_node in directory_pb2.files:
|
|
| 44 |
- path = os.path.join(directory, file_node.name)
|
|
| 45 |
- with open(path, 'wb') as f:
|
|
| 46 |
- write_fetch_blob(f, stub, file_node.digest, instance_name)
|
|
| 47 | 72 |
|
| 73 |
+def write_fetch_blob(target_file, stub, digest, instance_name=None):
|
|
| 74 |
+ """Extracts a blob from CAS into a local file.
|
|
| 48 | 75 |
|
| 49 |
-def write_fetch_blob(out, stub, digest, instance_name=""):
|
|
| 50 |
- """ Given an output buffer, fetches blob and writes to buffer
|
|
| 76 |
+ Args:
|
|
| 77 |
+ target_file (str): local file to write.
|
|
| 78 |
+ stub (): gRPC stub for CAS communication.
|
|
| 79 |
+ digest (Digest): digest for the blob to fetch from CAS.
|
|
| 80 |
+ instance_name (str, optional): farm instance name to query data from.
|
|
| 51 | 81 |
"""
|
| 52 | 82 |
|
| 53 | 83 |
for stream in gen_fetch_blob(stub, digest, instance_name):
|
| 54 |
- out.write(stream)
|
|
| 84 |
+ target_file.write(stream)
|
|
| 85 |
+ target_file.flush()
|
|
| 55 | 86 |
|
| 56 |
- out.flush()
|
|
| 57 |
- assert digest.size_bytes == os.fstat(out.fileno()).st_size
|
|
| 87 |
+ assert digest.size_bytes == os.fstat(target_file.fileno()).st_size
|
|
| 58 | 88 |
|
| 59 | 89 |
|
| 60 | 90 |
def parse_to_pb2_from_fetch(pb2, stub, digest, instance_name=""):
|
| ... | ... | @@ -70,7 +100,15 @@ def parse_to_pb2_from_fetch(pb2, stub, digest, instance_name=""): |
| 70 | 100 |
|
| 71 | 101 |
|
| 72 | 102 |
def create_digest(bytes_to_digest):
|
| 73 |
- """ Creates a hash based on the hex digest and returns the digest
|
|
| 103 |
+ """Computes the :obj:`Digest` of a piece of data.
|
|
| 104 |
+ |
|
| 105 |
+ The :obj:`Digest` of a data is a function of its hash **and** size.
|
|
| 106 |
+ |
|
| 107 |
+ Args:
|
|
| 108 |
+ bytes_to_digest (bytes): byte data to digest.
|
|
| 109 |
+ |
|
| 110 |
+ Returns:
|
|
| 111 |
+ :obj:`Digest`: The gRPC :obj:`Digest` for the given byte data.
|
|
| 74 | 112 |
"""
|
| 75 | 113 |
return remote_execution_pb2.Digest(hash=HASH(bytes_to_digest).hexdigest(),
|
| 76 | 114 |
size_bytes=len(bytes_to_digest))
|
| ... | ... | @@ -107,6 +145,200 @@ def file_maker(file_path, file_digest): |
| 107 | 145 |
is_executable=os.access(file_path, os.X_OK))
|
| 108 | 146 |
|
| 109 | 147 |
|
| 110 |
-def read_file(read):
|
|
| 111 |
- with open(read, 'rb') as f:
|
|
| 112 |
- return f.read()
|
|
| 148 |
+def directory_maker(directory_path, child_directories=None, cas=None, upload_directories=True):
|
|
| 149 |
+ """Creates a :obj:`Directory` from a local directory and possibly upload it.
|
|
| 150 |
+ |
|
| 151 |
+ Args:
|
|
| 152 |
+ directory_path (str): absolute or relative path to a local directory.
|
|
| 153 |
+ child_directories (list): output list of of children :obj:`Directory`
|
|
| 154 |
+ objects.
|
|
| 155 |
+ cas (:obj:`Uploader`): a CAS client uploader.
|
|
| 156 |
+ upload_directories (bool): wheter or not to upload the :obj:`Directory`
|
|
| 157 |
+ objects along with the files.
|
|
| 158 |
+ |
|
| 159 |
+ Returns:
|
|
| 160 |
+ :obj:`Directory`, :obj:`Digest`: Tuple of a new gRPC :obj:`Directory`
|
|
| 161 |
+ for the local directory pointed by `directory_path` and the digest
|
|
| 162 |
+ for that object.
|
|
| 163 |
+ """
|
|
| 164 |
+ if not os.path.isabs(directory_path):
|
|
| 165 |
+ directory_path = os.path.abspath(directory_path)
|
|
| 166 |
+ |
|
| 167 |
+ files, directories, symlinks = list(), list(), list()
|
|
| 168 |
+ for directory_entry in os.scandir(directory_path):
|
|
| 169 |
+ # Create a FileNode and corresponding BatchUpdateBlobsRequest:
|
|
| 170 |
+ if directory_entry.is_file(follow_symlinks=False):
|
|
| 171 |
+ if cas is not None:
|
|
| 172 |
+ node_digest = cas.upload_file(directory_entry.path)
|
|
| 173 |
+ else:
|
|
| 174 |
+ node_digest = create_digest(read_file(directory_entry.path))
|
|
| 175 |
+ |
|
| 176 |
+ node = remote_execution_pb2.FileNode()
|
|
| 177 |
+ node.name = directory_entry.name
|
|
| 178 |
+ node.digest.CopyFrom(node_digest)
|
|
| 179 |
+ node.is_executable = os.access(directory_entry.path, os.X_OK)
|
|
| 180 |
+ |
|
| 181 |
+ files.append(node)
|
|
| 182 |
+ |
|
| 183 |
+ # Create a DirectoryNode and corresponding BatchUpdateBlobsRequest:
|
|
| 184 |
+ elif directory_entry.is_dir(follow_symlinks=False):
|
|
| 185 |
+ _, node_digest = directory_maker(directory_entry.path,
|
|
| 186 |
+ child_directories=child_directories,
|
|
| 187 |
+ upload_directories=upload_directories,
|
|
| 188 |
+ cas=cas)
|
|
| 189 |
+ |
|
| 190 |
+ node = remote_execution_pb2.DirectoryNode()
|
|
| 191 |
+ node.name = directory_entry.name
|
|
| 192 |
+ node.digest.CopyFrom(node_digest)
|
|
| 193 |
+ |
|
| 194 |
+ directories.append(node)
|
|
| 195 |
+ |
|
| 196 |
+ # Create a SymlinkNode if necessary;
|
|
| 197 |
+ elif os.path.islink(directory_entry.path):
|
|
| 198 |
+ node_target = os.readlink(directory_entry.path)
|
|
| 199 |
+ |
|
| 200 |
+ node = remote_execution_pb2.SymlinkNode()
|
|
| 201 |
+ node.name = directory_entry.name
|
|
| 202 |
+ node.target = node_target
|
|
| 203 |
+ |
|
| 204 |
+ symlinks.append(node)
|
|
| 205 |
+ |
|
| 206 |
+ files.sort(key=attrgetter('name'))
|
|
| 207 |
+ directories.sort(key=attrgetter('name'))
|
|
| 208 |
+ symlinks.sort(key=attrgetter('name'))
|
|
| 209 |
+ |
|
| 210 |
+ directory = remote_execution_pb2.Directory()
|
|
| 211 |
+ directory.files.extend(files)
|
|
| 212 |
+ directory.directories.extend(directories)
|
|
| 213 |
+ directory.symlinks.extend(symlinks)
|
|
| 214 |
+ |
|
| 215 |
+ if child_directories is not None:
|
|
| 216 |
+ child_directories.append(directory)
|
|
| 217 |
+ |
|
| 218 |
+ if cas is not None and upload_directories:
|
|
| 219 |
+ directory_digest = cas.upload_directory(directory)
|
|
| 220 |
+ else:
|
|
| 221 |
+ directory_digest = create_digest(directory.SerializeToString())
|
|
| 222 |
+ |
|
| 223 |
+ return directory, directory_digest
|
|
| 224 |
+ |
|
| 225 |
+ |
|
| 226 |
+def tree_maker(directory_path, cas=None):
|
|
| 227 |
+ """Creates a :obj:`Tree` from a local directory and possibly upload it.
|
|
| 228 |
+ |
|
| 229 |
+ If `cas` is specified, the local directory content will be uploded/stored
|
|
| 230 |
+ in remote CAS (the :obj:`Tree` message won't).
|
|
| 231 |
+ |
|
| 232 |
+ Args:
|
|
| 233 |
+ directory_path (str): absolute or relative path to a local directory.
|
|
| 234 |
+ cas (:obj:`Uploader`): a CAS client uploader.
|
|
| 235 |
+ |
|
| 236 |
+ Returns:
|
|
| 237 |
+ :obj:`Tree`, :obj:`Digest`: Tuple of a new gRPC :obj:`Tree` for the
|
|
| 238 |
+ local directory pointed by `directory_path` and the digest for that
|
|
| 239 |
+ object.
|
|
| 240 |
+ """
|
|
| 241 |
+ if not os.path.isabs(directory_path):
|
|
| 242 |
+ directory_path = os.path.abspath(directory_path)
|
|
| 243 |
+ |
|
| 244 |
+ child_directories = list()
|
|
| 245 |
+ directory, _ = directory_maker(directory_path,
|
|
| 246 |
+ child_directories=child_directories,
|
|
| 247 |
+ upload_directories=False,
|
|
| 248 |
+ cas=cas)
|
|
| 249 |
+ |
|
| 250 |
+ tree = remote_execution_pb2.Tree()
|
|
| 251 |
+ tree.children.extend(child_directories)
|
|
| 252 |
+ tree.root.CopyFrom(directory)
|
|
| 253 |
+ |
|
| 254 |
+ if cas is not None:
|
|
| 255 |
+ tree_digest = cas.send_message(tree)
|
|
| 256 |
+ else:
|
|
| 257 |
+ tree_digest = create_digest(tree.SerializeToString())
|
|
| 258 |
+ |
|
| 259 |
+ return tree, tree_digest
|
|
| 260 |
+ |
|
| 261 |
+ |
|
| 262 |
+def read_file(file_path):
|
|
| 263 |
+ """Loads raw file content in memory.
|
|
| 264 |
+ |
|
| 265 |
+ Args:
|
|
| 266 |
+ file_path (str): path to the target file.
|
|
| 267 |
+ |
|
| 268 |
+ Returns:
|
|
| 269 |
+ bytes: Raw file's content until EOF.
|
|
| 270 |
+ |
|
| 271 |
+ Raises:
|
|
| 272 |
+ OSError: If `file_path` does not exist or is not readable.
|
|
| 273 |
+ """
|
|
| 274 |
+ with open(file_path, 'rb') as byte_file:
|
|
| 275 |
+ return byte_file.read()
|
|
| 276 |
+ |
|
| 277 |
+ |
|
| 278 |
+def output_file_maker(file_path, input_path, cas=None):
|
|
| 279 |
+ """Creates an :obj:`OutputFile` from a local file and possibly upload it.
|
|
| 280 |
+ |
|
| 281 |
+ If `cas` is specified, the local file will be uploded/stored in remote CAS
|
|
| 282 |
+ (the :obj:`OutputFile` message won't).
|
|
| 283 |
+ |
|
| 284 |
+ Note:
|
|
| 285 |
+ `file_path` **must** point inside or be relative to `input_path`.
|
|
| 286 |
+ |
|
| 287 |
+ Args:
|
|
| 288 |
+ file_path (str): absolute or relative path to a local file.
|
|
| 289 |
+ input_path (str): absolute or relative path to the input root directory.
|
|
| 290 |
+ cas (:obj:`Uploader`): a CAS client uploader.
|
|
| 291 |
+ |
|
| 292 |
+ Returns:
|
|
| 293 |
+ :obj:`OutputFile`: a new gRPC :obj:`OutputFile` object for the file
|
|
| 294 |
+ pointed by `file_path`.
|
|
| 295 |
+ """
|
|
| 296 |
+ if not os.path.isabs(file_path):
|
|
| 297 |
+ file_path = os.path.abspath(file_path)
|
|
| 298 |
+ if not os.path.isabs(input_path):
|
|
| 299 |
+ input_path = os.path.abspath(input_path)
|
|
| 300 |
+ |
|
| 301 |
+ if cas is not None:
|
|
| 302 |
+ file_digest = cas.upload_file(file_path)
|
|
| 303 |
+ else:
|
|
| 304 |
+ file_digest = create_digest(read_file(file_path))
|
|
| 305 |
+ |
|
| 306 |
+ output_file = remote_execution_pb2.OutputFile()
|
|
| 307 |
+ output_file.digest.CopyFrom(file_digest)
|
|
| 308 |
+ # OutputFile.path should be relative to the working direcory:
|
|
| 309 |
+ output_file.path = os.path.relpath(file_path, start=input_path)
|
|
| 310 |
+ output_file.is_executable = os.access(file_path, os.X_OK)
|
|
| 311 |
+ |
|
| 312 |
+ return output_file
|
|
| 313 |
+ |
|
| 314 |
+ |
|
| 315 |
+def output_directory_maker(directory_path, working_path, cas=None):
|
|
| 316 |
+ """Creates an :obj:`OutputDirectory` from a local directory.
|
|
| 317 |
+ |
|
| 318 |
+ If `cas` is specified, the local directory content will be uploded/stored
|
|
| 319 |
+ in remote CAS (the :obj:`OutputDirectory` message won't).
|
|
| 320 |
+ |
|
| 321 |
+ Note:
|
|
| 322 |
+ `directory_path` **must** point inside or be relative to `input_path`.
|
|
| 323 |
+ |
|
| 324 |
+ Args:
|
|
| 325 |
+ directory_path (str): absolute or relative path to a local directory.
|
|
| 326 |
+ working_path (str): absolute or relative path to the working directory.
|
|
| 327 |
+ cas (:obj:`Uploader`): a CAS client uploader.
|
|
| 328 |
+ |
|
| 329 |
+ Returns:
|
|
| 330 |
+ :obj:`OutputDirectory`: a new gRPC :obj:`OutputDirectory` for the
|
|
| 331 |
+ directory pointed by `directory_path`.
|
|
| 332 |
+ """
|
|
| 333 |
+ if not os.path.isabs(directory_path):
|
|
| 334 |
+ directory_path = os.path.abspath(directory_path)
|
|
| 335 |
+ if not os.path.isabs(working_path):
|
|
| 336 |
+ working_path = os.path.abspath(working_path)
|
|
| 337 |
+ |
|
| 338 |
+ _, tree_digest = tree_maker(directory_path, cas=cas)
|
|
| 339 |
+ |
|
| 340 |
+ output_directory = remote_execution_pb2.OutputDirectory()
|
|
| 341 |
+ output_directory.tree_digest.CopyFrom(tree_digest)
|
|
| 342 |
+ output_directory.path = os.path.relpath(directory_path, start=working_path)
|
|
| 343 |
+ |
|
| 344 |
+ return output_directory
|
