Jonathan Maw pushed to branch jonathan/pickle-yaml at BuildStream / buildstream
Commits:
-
9b327eb6
by Ben Brewer at 2018-09-04T14:41:23Z
-
3e67e64a
by Javier Jardón at 2018-09-04T16:43:13Z
-
3409609e
by Daniel Silverstone at 2018-09-04T16:55:51Z
-
7b32e1ec
by Tristan Maat at 2018-09-04T17:20:55Z
-
5adfa6b0
by Jonathan Maw at 2018-09-07T16:46:06Z
7 changed files:
- buildstream/_loader/loadelement.py
- buildstream/_loader/loader.py
- + buildstream/_pickler.py
- buildstream/_project.py
- buildstream/_scheduler/jobs/job.py
- buildstream/_yaml.py
- doc/source/install_artifacts.rst
Changes:
... | ... | @@ -185,6 +185,7 @@ def _extract_depends_from_node(node, *, key=None): |
185 | 185 |
output_deps.append(dependency)
|
186 | 186 |
|
187 | 187 |
# Now delete the field, we dont want it anymore
|
188 |
- del node[key]
|
|
188 |
+ if key in node:
|
|
189 |
+ del node[key]
|
|
189 | 190 |
|
190 | 191 |
return output_deps
|
... | ... | @@ -30,6 +30,7 @@ from ..element import Element |
30 | 30 |
from .._profile import Topics, profile_start, profile_end
|
31 | 31 |
from .._platform import Platform
|
32 | 32 |
from .._includes import Includes
|
33 |
+from .._pickler import YamlCache
|
|
33 | 34 |
|
34 | 35 |
from .types import Symbol, Dependency
|
35 | 36 |
from .loadelement import LoadElement
|
... | ... | @@ -113,7 +114,8 @@ class Loader(): |
113 | 114 |
profile_start(Topics.LOAD_PROJECT, target)
|
114 | 115 |
junction, name, loader = self._parse_name(target, rewritable, ticker,
|
115 | 116 |
fetch_subprojects=fetch_subprojects)
|
116 |
- loader._load_file(name, rewritable, ticker, fetch_subprojects)
|
|
117 |
+ with YamlCache.get_cache(self._context) as yaml_cache:
|
|
118 |
+ loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
117 | 119 |
deps.append(Dependency(name, junction=junction))
|
118 | 120 |
profile_end(Topics.LOAD_PROJECT, target)
|
119 | 121 |
|
... | ... | @@ -202,11 +204,12 @@ class Loader(): |
202 | 204 |
# rewritable (bool): Whether we should load in round trippable mode
|
203 | 205 |
# ticker (callable): A callback to report loaded filenames to the frontend
|
204 | 206 |
# fetch_subprojects (bool): Whether to fetch subprojects while loading
|
207 |
+ # yaml_cache (YamlCache): A yaml cache
|
|
205 | 208 |
#
|
206 | 209 |
# Returns:
|
207 | 210 |
# (LoadElement): A loaded LoadElement
|
208 | 211 |
#
|
209 |
- def _load_file(self, filename, rewritable, ticker, fetch_subprojects):
|
|
212 |
+ def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None):
|
|
210 | 213 |
|
211 | 214 |
# Silently ignore already loaded files
|
212 | 215 |
if filename in self._elements:
|
... | ... | @@ -219,7 +222,8 @@ class Loader(): |
219 | 222 |
# Load the data and process any conditional statements therein
|
220 | 223 |
fullpath = os.path.join(self._basedir, filename)
|
221 | 224 |
try:
|
222 |
- node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project)
|
|
225 |
+ node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable,
|
|
226 |
+ project=self.project, yaml_cache=yaml_cache)
|
|
223 | 227 |
except LoadError as e:
|
224 | 228 |
if e.reason == LoadErrorReason.MISSING_FILE:
|
225 | 229 |
# If we can't find the file, try to suggest plausible
|
... | ... | @@ -262,13 +266,13 @@ class Loader(): |
262 | 266 |
# Load all dependency files for the new LoadElement
|
263 | 267 |
for dep in element.deps:
|
264 | 268 |
if dep.junction:
|
265 |
- self._load_file(dep.junction, rewritable, ticker, fetch_subprojects)
|
|
269 |
+ self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
266 | 270 |
loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker,
|
267 | 271 |
fetch_subprojects=fetch_subprojects)
|
268 | 272 |
else:
|
269 | 273 |
loader = self
|
270 | 274 |
|
271 |
- dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects)
|
|
275 |
+ dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
272 | 276 |
|
273 | 277 |
if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction':
|
274 | 278 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
1 |
+#
|
|
2 |
+# Copyright 2018 Bloomberg Finance LP
|
|
3 |
+#
|
|
4 |
+# This program is free software; you can redistribute it and/or
|
|
5 |
+# modify it under the terms of the GNU Lesser General Public
|
|
6 |
+# License as published by the Free Software Foundation; either
|
|
7 |
+# version 2 of the License, or (at your option) any later version.
|
|
8 |
+#
|
|
9 |
+# This library is distributed in the hope that it will be useful,
|
|
10 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
+# Lesser General Public License for more details.
|
|
13 |
+#
|
|
14 |
+# You should have received a copy of the GNU Lesser General Public
|
|
15 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
+#
|
|
17 |
+# Authors:
|
|
18 |
+# Jonathan Maw <jonathan maw codethink co uk>
|
|
19 |
+ |
|
20 |
+import os
|
|
21 |
+import pickle
|
|
22 |
+import hashlib
|
|
23 |
+ |
|
24 |
+from contextlib import contextmanager
|
|
25 |
+from collections import namedtuple
|
|
26 |
+ |
|
27 |
+from ._cachekey import generate_key
|
|
28 |
+from . import utils, _yaml
|
|
29 |
+ |
|
30 |
+ |
|
31 |
+YAML_CACHE_FILENAME = "yaml_cache.pickle"
|
|
32 |
+ |
|
33 |
+ |
|
34 |
+# In _yaml.load, we have a ProvenanceFile that stores the project the file
|
|
35 |
+# came from. Projects can't be pickled, but it's always going to be the same
|
|
36 |
+# project between invocations (unless the entire project is moved but the
|
|
37 |
+# file stayed in the same place)
|
|
38 |
+class BstPickler(pickle.Pickler):
|
|
39 |
+ def persistent_id(self, obj):
|
|
40 |
+ # Inline import to break import loop
|
|
41 |
+ from ._project import Project
|
|
42 |
+ if isinstance(obj, Project):
|
|
43 |
+ return ("Project", obj.name)
|
|
44 |
+ else:
|
|
45 |
+ return None
|
|
46 |
+ |
|
47 |
+ |
|
48 |
+class BstUnpickler(pickle.Unpickler):
|
|
49 |
+ def __init__(self, file, context):
|
|
50 |
+ super().__init__(file)
|
|
51 |
+ self._context = context
|
|
52 |
+ |
|
53 |
+ def persistent_load(self, pid):
|
|
54 |
+ type_tag, key_id = pid
|
|
55 |
+ if type_tag == "Project":
|
|
56 |
+ # XXX: This doesn't actually help, we need to load the junction
|
|
57 |
+ # to create the project
|
|
58 |
+ for project in self._context.get_projects():
|
|
59 |
+ if key_id == project.name:
|
|
60 |
+ return project
|
|
61 |
+ |
|
62 |
+ projects = [p.name for p in self._context.get_projects()]
|
|
63 |
+ raise pickle.UnpicklingError("No project with name {} found in {}".format(key_id, projects))
|
|
64 |
+ else:
|
|
65 |
+ raise pickle.UnpicklingError("Unsupported persistent object")
|
|
66 |
+ |
|
67 |
+ |
|
68 |
+CachedProject = namedtuple('CachedProject', ['path', 'project_sum', 'elements'])
|
|
69 |
+CachedYaml = namedtuple('CachedYaml', ['key', 'contents'])
|
|
70 |
+ |
|
71 |
+ |
|
72 |
+class YamlCache():
|
|
73 |
+ |
|
74 |
+ def __init__(self):
|
|
75 |
+ self.project_caches = {}
|
|
76 |
+ |
|
77 |
+ def write(self, path):
|
|
78 |
+ parent_dir = os.path.dirname(path)
|
|
79 |
+ os.makedirs(parent_dir, exist_ok=True)
|
|
80 |
+ with open(path, "wb") as f:
|
|
81 |
+ BstPickler(f).dump(self)
|
|
82 |
+ |
|
83 |
+ def get(self, project, filepath, key):
|
|
84 |
+ if project.name in self.project_caches:
|
|
85 |
+ project_cache = self.project_caches[project.name]
|
|
86 |
+ if filepath in project_cache.elements:
|
|
87 |
+ cachedyaml = project_cache.elements[filepath]
|
|
88 |
+ if cachedyaml.key == key:
|
|
89 |
+ return _yaml.node_copy(cachedyaml.contents)
|
|
90 |
+ return None
|
|
91 |
+ |
|
92 |
+ def put(self, project, filepath, key, value):
|
|
93 |
+ if project.name in self.project_caches:
|
|
94 |
+ # XXX: Needs a check that the project hasn't changed
|
|
95 |
+ project_cache = self.project_caches[project.name]
|
|
96 |
+ else:
|
|
97 |
+ project_cache = self.project_caches[project.name] = CachedProject(project.directory, project.shasum, {})
|
|
98 |
+ |
|
99 |
+ if filepath in project_cache.elements and project_cache.elements[filepath].key == key:
|
|
100 |
+ project_cache.elements[filepath].contents = _yaml.node_copy(value)
|
|
101 |
+ else:
|
|
102 |
+ project_cache.elements[filepath] = CachedYaml(key, _yaml.node_copy(value))
|
|
103 |
+ |
|
104 |
+ @staticmethod
|
|
105 |
+ @contextmanager
|
|
106 |
+ def get_cache(context):
|
|
107 |
+ # Try to load from disk first
|
|
108 |
+ cachefile = YamlCache._get_cache_file(context)
|
|
109 |
+ cache = None
|
|
110 |
+ if os.path.exists(cachefile):
|
|
111 |
+ try:
|
|
112 |
+ with open(cachefile, "rb") as f:
|
|
113 |
+ cache = BstUnpickler(f, context).load()
|
|
114 |
+ except pickle.UnpicklingError as e:
|
|
115 |
+ pass
|
|
116 |
+ |
|
117 |
+ if not cache:
|
|
118 |
+ cache = YamlCache()
|
|
119 |
+ |
|
120 |
+ yield cache
|
|
121 |
+ |
|
122 |
+ cache.write(cachefile)
|
|
123 |
+ |
|
124 |
+ @staticmethod
|
|
125 |
+ def _get_cache_file(context):
|
|
126 |
+ toplevel_project = context.get_toplevel_project()
|
|
127 |
+ return os.path.join(toplevel_project.directory, ".bst", YAML_CACHE_FILENAME)
|
|
128 |
+ |
|
129 |
+ @staticmethod
|
|
130 |
+ def calculate_key(*args):
|
|
131 |
+ string = pickle.dumps(args)
|
|
132 |
+ return hashlib.sha1(string).hexdigest()
|
... | ... | @@ -19,6 +19,7 @@ |
19 | 19 |
# Tiago Gomes <tiago gomes codethink co uk>
|
20 | 20 |
|
21 | 21 |
import os
|
22 |
+import hashlib
|
|
22 | 23 |
from collections import Mapping, OrderedDict
|
23 | 24 |
from pluginbase import PluginBase
|
24 | 25 |
from . import utils
|
... | ... | @@ -110,6 +111,7 @@ class Project(): |
110 | 111 |
self.ref_storage = None # ProjectRefStorage setting
|
111 | 112 |
self.base_environment = {} # The base set of environment variables
|
112 | 113 |
self.base_env_nocache = None # The base nocache mask (list) for the environment
|
114 |
+ self.shasum = None # A SHA-1 sum of the project file
|
|
113 | 115 |
|
114 | 116 |
#
|
115 | 117 |
# Private Members
|
... | ... | @@ -381,6 +383,10 @@ class Project(): |
381 | 383 |
|
382 | 384 |
# Load project local config and override the builtin
|
383 | 385 |
try:
|
386 |
+ with open(projectfile, "r") as f:
|
|
387 |
+ contents = f.read()
|
|
388 |
+ self.shasum = hashlib.sha1(contents.encode('utf-8')).hexdigest()
|
|
389 |
+ |
|
384 | 390 |
self._project_conf = _yaml.load(projectfile)
|
385 | 391 |
except LoadError as e:
|
386 | 392 |
# Raise a more specific error here
|
... | ... | @@ -109,7 +109,7 @@ class Job(): |
109 | 109 |
# Private members
|
110 | 110 |
#
|
111 | 111 |
self._scheduler = scheduler # The scheduler
|
112 |
- self._queue = multiprocessing.Queue() # A message passing queue
|
|
112 |
+ self._queue = None # A message passing queue
|
|
113 | 113 |
self._process = None # The Process object
|
114 | 114 |
self._watcher = None # Child process watcher
|
115 | 115 |
self._listening = False # Whether the parent is currently listening
|
... | ... | @@ -130,6 +130,8 @@ class Job(): |
130 | 130 |
#
|
131 | 131 |
def spawn(self):
|
132 | 132 |
|
133 |
+ self._queue = multiprocessing.Queue()
|
|
134 |
+ |
|
133 | 135 |
self._tries += 1
|
134 | 136 |
self._parent_start_listening()
|
135 | 137 |
|
... | ... | @@ -552,6 +554,9 @@ class Job(): |
552 | 554 |
self.parent_complete(returncode == RC_OK, self._result)
|
553 | 555 |
self._scheduler.job_completed(self, returncode == RC_OK)
|
554 | 556 |
|
557 |
+ # Force the deletion of the queue and process objects to try and clean up FDs
|
|
558 |
+ self._queue = self._process = None
|
|
559 |
+ |
|
555 | 560 |
# _parent_process_envelope()
|
556 | 561 |
#
|
557 | 562 |
# Processes a message Envelope deserialized form the message queue.
|
... | ... | @@ -23,11 +23,14 @@ import string |
23 | 23 |
from copy import deepcopy
|
24 | 24 |
from contextlib import ExitStack
|
25 | 25 |
from pathlib import Path
|
26 |
+import hashlib
|
|
27 |
+import os
|
|
26 | 28 |
|
27 | 29 |
from ruamel import yaml
|
28 | 30 |
from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
|
29 | 31 |
from ruamel.yaml.constructor import RoundTripConstructor
|
30 | 32 |
from ._exceptions import LoadError, LoadErrorReason
|
33 |
+from ._platform import Platform
|
|
31 | 34 |
|
32 | 35 |
# This overrides the ruamel constructor to treat everything as a string
|
33 | 36 |
RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int', RoundTripConstructor.construct_yaml_str)
|
... | ... | @@ -183,12 +186,13 @@ class CompositeTypeError(CompositeError): |
183 | 186 |
# shortname (str): The filename in shorthand for error reporting (or None)
|
184 | 187 |
# copy_tree (bool): Whether to make a copy, preserving the original toplevels
|
185 | 188 |
# for later serialization
|
189 |
+# yaml_cache (YamlCache): A yaml cache to consult rather than parsing
|
|
186 | 190 |
#
|
187 | 191 |
# Returns (dict): A loaded copy of the YAML file with provenance information
|
188 | 192 |
#
|
189 | 193 |
# Raises: LoadError
|
190 | 194 |
#
|
191 |
-def load(filename, shortname=None, copy_tree=False, *, project=None):
|
|
195 |
+def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
|
|
192 | 196 |
if not shortname:
|
193 | 197 |
shortname = filename
|
194 | 198 |
|
... | ... | @@ -196,7 +200,7 @@ def load(filename, shortname=None, copy_tree=False, *, project=None): |
196 | 200 |
|
197 | 201 |
try:
|
198 | 202 |
with open(filename) as f:
|
199 |
- return load_data(f, file, copy_tree=copy_tree)
|
|
203 |
+ return load_data(f, file, copy_tree=copy_tree, yaml_cache=yaml_cache)
|
|
200 | 204 |
except FileNotFoundError as e:
|
201 | 205 |
raise LoadError(LoadErrorReason.MISSING_FILE,
|
202 | 206 |
"Could not find file at {}".format(filename)) from e
|
... | ... | @@ -208,24 +212,46 @@ def load(filename, shortname=None, copy_tree=False, *, project=None): |
208 | 212 |
|
209 | 213 |
# Like load(), but doesnt require the data to be in a file
|
210 | 214 |
#
|
211 |
-def load_data(data, file=None, copy_tree=False):
|
|
215 |
+def load_data(data, file=None, copy_tree=False, yaml_cache=None):
|
|
212 | 216 |
|
213 |
- try:
|
|
214 |
- contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
|
|
215 |
- except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
|
|
216 |
- raise LoadError(LoadErrorReason.INVALID_YAML,
|
|
217 |
- "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
|
|
218 |
- |
|
219 |
- if not isinstance(contents, dict):
|
|
220 |
- # Special case allowance for None, when the loaded file has only comments in it.
|
|
221 |
- if contents is None:
|
|
222 |
- contents = {}
|
|
223 |
- else:
|
|
217 |
+ if hasattr(data, 'read'):
|
|
218 |
+ file_contents = data.read()
|
|
219 |
+ else:
|
|
220 |
+ file_contents = data
|
|
221 |
+ # Forced to compare sums of contents because elements in junctions are stored in tmpdirs
|
|
222 |
+ decorated_yaml = None
|
|
223 |
+ if yaml_cache:
|
|
224 |
+ assert file
|
|
225 |
+ project = file.project
|
|
226 |
+ filename = os.path.relpath(file.name, project.directory)
|
|
227 |
+ key = yaml_cache.calculate_key(file_contents, copy_tree)
|
|
228 |
+ decorated_yaml = yaml_cache.get(project, filename, key)
|
|
229 |
+ |
|
230 |
+ if not decorated_yaml:
|
|
231 |
+ try:
|
|
232 |
+ contents = yaml.load(file_contents, yaml.loader.RoundTripLoader, preserve_quotes=True)
|
|
233 |
+ except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
|
|
224 | 234 |
raise LoadError(LoadErrorReason.INVALID_YAML,
|
225 |
- "YAML file has content of type '{}' instead of expected type 'dict': {}"
|
|
226 |
- .format(type(contents).__name__, file.name))
|
|
235 |
+ "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
|
|
227 | 236 |
|
228 |
- return node_decorated_copy(file, contents, copy_tree=copy_tree)
|
|
237 |
+ if not isinstance(contents, dict):
|
|
238 |
+ # Special case allowance for None, when the loaded file has only comments in it.
|
|
239 |
+ if contents is None:
|
|
240 |
+ contents = {}
|
|
241 |
+ else:
|
|
242 |
+ raise LoadError(LoadErrorReason.INVALID_YAML,
|
|
243 |
+ "YAML file has content of type '{}' instead of expected type 'dict': {}"
|
|
244 |
+ .format(type(contents).__name__, file.name))
|
|
245 |
+ |
|
246 |
+ decorated_yaml = node_decorated_copy(file, contents, copy_tree=copy_tree)
|
|
247 |
+ if yaml_cache:
|
|
248 |
+ assert file
|
|
249 |
+ project = file.project
|
|
250 |
+ filename = os.path.relpath(file.name, project.directory)
|
|
251 |
+ key = yaml_cache.calculate_key(file_contents, copy_tree)
|
|
252 |
+ yaml_cache.put(project, filename, key, decorated_yaml)
|
|
253 |
+ |
|
254 |
+ return decorated_yaml
|
|
229 | 255 |
|
230 | 256 |
|
231 | 257 |
# Dumps a previously loaded YAML node to a file
|
... | ... | @@ -161,13 +161,13 @@ Below are two examples of how to run the cache server as a systemd service, one |
161 | 161 |
|
162 | 162 |
[Service]
|
163 | 163 |
Environment="LC_ALL=C.UTF-8"
|
164 |
- ExecStart=/usr/local/bin/bst-artifact-server --port 11001 --server-key {{certs_path}}/privkey.pem --
|
|
165 |
- server-cert {{certs_path}}/fullchain.pem {{artifacts_path}}
|
|
164 |
+ ExecStart=/usr/local/bin/bst-artifact-server --port 11001 --server-key {{certs_path}}/server.key --server-cert {{certs_path}}/server.crt {{artifacts_path}}
|
|
166 | 165 |
User=artifacts
|
167 | 166 |
|
168 | 167 |
[Install]
|
169 | 168 |
WantedBy=multi-user.target
|
170 | 169 |
|
170 |
+.. code:: ini
|
|
171 | 171 |
|
172 | 172 |
#
|
173 | 173 |
# Pull/Push
|
... | ... | @@ -178,9 +178,7 @@ Below are two examples of how to run the cache server as a systemd service, one |
178 | 178 |
|
179 | 179 |
[Service]
|
180 | 180 |
Environment="LC_ALL=C.UTF-8"
|
181 |
- ExecStart=/usr/local/bin/bst-artifact-server --port 11002 --server-key {{certs_path}}/privkey.pem --
|
|
182 |
- server-cert {{certs_path}}/fullchain.pem --client-certs /home/artifacts/authorized.crt --enable-push /
|
|
183 |
- {{artifacts_path}}
|
|
181 |
+ ExecStart=/usr/local/bin/bst-artifact-server --port 11002 --server-key {{certs_path}}/server.key --server-cert {{certs_path}}/server.crt --client-certs {{certs_path}}/authorized.crt --enable-push {{artifacts_path}}
|
|
184 | 182 |
User=artifacts
|
185 | 183 |
|
186 | 184 |
[Install]
|
... | ... | @@ -188,11 +186,16 @@ Below are two examples of how to run the cache server as a systemd service, one |
188 | 186 |
|
189 | 187 |
Here we define when systemd should start the service, which is after the networking stack has been started, we then define how to run the cache with the desired configuration, under the artifacts user. The {{ }} are there to denote where you should change these files to point to your desired locations.
|
190 | 188 |
|
189 |
+For more information on systemd services see:
|
|
190 |
+`Creating Systemd Service Files <https://www.devdungeon.com/content/creating-systemd-service-files>`_.
|
|
191 |
+ |
|
191 | 192 |
User configuration
|
192 | 193 |
~~~~~~~~~~~~~~~~~~
|
193 | 194 |
The user configuration for artifacts is documented with the rest
|
194 | 195 |
of the :ref:`user configuration documentation <user_config>`.
|
195 | 196 |
|
197 |
+Note that for self-signed certificates, the public key fields are mandatory.
|
|
198 |
+ |
|
196 | 199 |
Assuming you have the same setup used in this document, and that your
|
197 | 200 |
host is reachable on the internet as ``artifacts.com`` (for example),
|
198 | 201 |
then a user can use the following user configuration:
|