Jonathan Maw pushed to branch jonathan/pickle-yaml at BuildStream / buildstream
Commits:
5 changed files:
- buildstream/_loader/loadelement.py
- buildstream/_loader/loader.py
- + buildstream/_pickler.py
- buildstream/_project.py
- buildstream/_yaml.py
Changes:
... | ... | @@ -185,6 +185,6 @@ def _extract_depends_from_node(node, *, key=None): |
185 | 185 |
output_deps.append(dependency)
|
186 | 186 |
|
187 | 187 |
# Now delete the field, we dont want it anymore
|
188 |
- del node[key]
|
|
188 |
+ node.pop(key, None)
|
|
189 | 189 |
|
190 | 190 |
return output_deps
|
... | ... | @@ -30,6 +30,7 @@ from ..element import Element |
30 | 30 |
from .._profile import Topics, profile_start, profile_end
|
31 | 31 |
from .._platform import Platform
|
32 | 32 |
from .._includes import Includes
|
33 |
+from .._pickler import YamlCache
|
|
33 | 34 |
|
34 | 35 |
from .types import Symbol, Dependency
|
35 | 36 |
from .loadelement import LoadElement
|
... | ... | @@ -113,7 +114,8 @@ class Loader(): |
113 | 114 |
profile_start(Topics.LOAD_PROJECT, target)
|
114 | 115 |
junction, name, loader = self._parse_name(target, rewritable, ticker,
|
115 | 116 |
fetch_subprojects=fetch_subprojects)
|
116 |
- loader._load_file(name, rewritable, ticker, fetch_subprojects)
|
|
117 |
+ with YamlCache.get_cache(self._context) as yaml_cache:
|
|
118 |
+ loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
117 | 119 |
deps.append(Dependency(name, junction=junction))
|
118 | 120 |
profile_end(Topics.LOAD_PROJECT, target)
|
119 | 121 |
|
... | ... | @@ -202,11 +204,12 @@ class Loader(): |
202 | 204 |
# rewritable (bool): Whether we should load in round trippable mode
|
203 | 205 |
# ticker (callable): A callback to report loaded filenames to the frontend
|
204 | 206 |
# fetch_subprojects (bool): Whether to fetch subprojects while loading
|
207 |
+ # yaml_cache (YamlCache): A yaml cache
|
|
205 | 208 |
#
|
206 | 209 |
# Returns:
|
207 | 210 |
# (LoadElement): A loaded LoadElement
|
208 | 211 |
#
|
209 |
- def _load_file(self, filename, rewritable, ticker, fetch_subprojects):
|
|
212 |
+ def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None):
|
|
210 | 213 |
|
211 | 214 |
# Silently ignore already loaded files
|
212 | 215 |
if filename in self._elements:
|
... | ... | @@ -219,7 +222,8 @@ class Loader(): |
219 | 222 |
# Load the data and process any conditional statements therein
|
220 | 223 |
fullpath = os.path.join(self._basedir, filename)
|
221 | 224 |
try:
|
222 |
- node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project)
|
|
225 |
+ node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable,
|
|
226 |
+ project=self.project, yaml_cache=yaml_cache)
|
|
223 | 227 |
except LoadError as e:
|
224 | 228 |
if e.reason == LoadErrorReason.MISSING_FILE:
|
225 | 229 |
# If we can't find the file, try to suggest plausible
|
... | ... | @@ -262,13 +266,13 @@ class Loader(): |
262 | 266 |
# Load all dependency files for the new LoadElement
|
263 | 267 |
for dep in element.deps:
|
264 | 268 |
if dep.junction:
|
265 |
- self._load_file(dep.junction, rewritable, ticker, fetch_subprojects)
|
|
269 |
+ self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
266 | 270 |
loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker,
|
267 | 271 |
fetch_subprojects=fetch_subprojects)
|
268 | 272 |
else:
|
269 | 273 |
loader = self
|
270 | 274 |
|
271 |
- dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects)
|
|
275 |
+ dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache)
|
|
272 | 276 |
|
273 | 277 |
if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction':
|
274 | 278 |
raise LoadError(LoadErrorReason.INVALID_DATA,
|
1 |
+#
|
|
2 |
+# Copyright 2018 Bloomberg Finance LP
|
|
3 |
+#
|
|
4 |
+# This program is free software; you can redistribute it and/or
|
|
5 |
+# modify it under the terms of the GNU Lesser General Public
|
|
6 |
+# License as published by the Free Software Foundation; either
|
|
7 |
+# version 2 of the License, or (at your option) any later version.
|
|
8 |
+#
|
|
9 |
+# This library is distributed in the hope that it will be useful,
|
|
10 |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
12 |
+# Lesser General Public License for more details.
|
|
13 |
+#
|
|
14 |
+# You should have received a copy of the GNU Lesser General Public
|
|
15 |
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
+#
|
|
17 |
+# Authors:
|
|
18 |
+# Jonathan Maw <jonathan maw codethink co uk>
|
|
19 |
+ |
|
20 |
+import os
|
|
21 |
+import pickle
|
|
22 |
+import hashlib
|
|
23 |
+import io
|
|
24 |
+ |
|
25 |
+from contextlib import contextmanager
|
|
26 |
+from collections import namedtuple
|
|
27 |
+ |
|
28 |
+from ._cachekey import generate_key
|
|
29 |
+from . import utils, _yaml
|
|
30 |
+ |
|
31 |
+ |
|
32 |
+YAML_CACHE_FILENAME = "yaml_cache.pickle"
|
|
33 |
+ |
|
34 |
+ |
|
35 |
+# In _yaml.load, we have a ProvenanceFile that stores the project the file
|
|
36 |
+# came from. Projects can't be pickled, but it's always going to be the same
|
|
37 |
+# project between invocations (unless the entire project is moved but the
|
|
38 |
+# file stayed in the same place)
|
|
39 |
+class BstPickler(pickle.Pickler):
|
|
40 |
+ def persistent_id(self, obj):
|
|
41 |
+ if isinstance(obj, _yaml.ProvenanceFile):
|
|
42 |
+ if obj.project:
|
|
43 |
+ # ProvenanceFile's project object cannot be stored as it is.
|
|
44 |
+ project_tag = obj.project.name
|
|
45 |
+ # ProvenanceFile's filename must be stored relative to the
|
|
46 |
+ # project, as the project dir may move.
|
|
47 |
+ name = os.path.relpath(obj.name, obj.project.directory)
|
|
48 |
+ else:
|
|
49 |
+ project_tag = None
|
|
50 |
+ name = obj.name
|
|
51 |
+ return ("ProvenanceFile", name, obj.shortname, project_tag)
|
|
52 |
+ else:
|
|
53 |
+ return None
|
|
54 |
+ |
|
55 |
+ @staticmethod
|
|
56 |
+ def dumps(obj):
|
|
57 |
+ stream = io.BytesIO()
|
|
58 |
+ BstPickler(stream).dump(obj)
|
|
59 |
+ stream.seek(0)
|
|
60 |
+ return stream.read()
|
|
61 |
+ |
|
62 |
+ |
|
63 |
+class BstUnpickler(pickle.Unpickler):
|
|
64 |
+ def __init__(self, file, context):
|
|
65 |
+ super().__init__(file)
|
|
66 |
+ self._context = context
|
|
67 |
+ |
|
68 |
+ def persistent_load(self, pid):
|
|
69 |
+ if pid[0] == "ProvenanceFile":
|
|
70 |
+ _, tagged_name, shortname, project_tag = pid
|
|
71 |
+ |
|
72 |
+ if project_tag is not None:
|
|
73 |
+ for p in self._context.get_projects():
|
|
74 |
+ if project_tag == p.name:
|
|
75 |
+ project = p
|
|
76 |
+ break
|
|
77 |
+ |
|
78 |
+ name = os.path.join(project.directory, tagged_name)
|
|
79 |
+ |
|
80 |
+ if not project:
|
|
81 |
+ projects = [p.name for p in self._context.get_projects()]
|
|
82 |
+ raise pickle.UnpicklingError("No project with name {} found in {}"
|
|
83 |
+ .format(key_id, projects))
|
|
84 |
+ else:
|
|
85 |
+ project = None
|
|
86 |
+ name = tagged_name
|
|
87 |
+ |
|
88 |
+ return _yaml.ProvenanceFile(name, shortname, project)
|
|
89 |
+ |
|
90 |
+ else:
|
|
91 |
+ raise pickle.UnpicklingError("Unsupported persistent object")
|
|
92 |
+ |
|
93 |
+ @staticmethod
|
|
94 |
+ def loads(text, context):
|
|
95 |
+ stream = io.BytesIO()
|
|
96 |
+ stream.write(bytes(text))
|
|
97 |
+ stream.seek(0)
|
|
98 |
+ return BstUnpickler(stream, context).load()
|
|
99 |
+ |
|
100 |
+ |
|
101 |
+CachedProject = namedtuple('CachedProject', ['path', 'project_sum', 'elements'])
|
|
102 |
+ |
|
103 |
+ |
|
104 |
+class CachedYaml():
|
|
105 |
+ def __init__(self, key, contents):
|
|
106 |
+ self._key = key
|
|
107 |
+ self.set_contents(contents)
|
|
108 |
+ |
|
109 |
+ # Sets the contents of the CachedYaml.
|
|
110 |
+ #
|
|
111 |
+ # Args:
|
|
112 |
+ # contents (provenanced dict): The contents to put in the cache.
|
|
113 |
+ #
|
|
114 |
+ def set_contents(self, contents):
|
|
115 |
+ self._contents = contents
|
|
116 |
+ self._pickled_contents = BstPickler.dumps(contents)
|
|
117 |
+ |
|
118 |
+ # Pickling helper method, prevents 'contents' from being serialised
|
|
119 |
+ def __getstate__(self):
|
|
120 |
+ data = self.__dict__.copy()
|
|
121 |
+ data['_contents'] = None
|
|
122 |
+ return data
|
|
123 |
+ |
|
124 |
+ |
|
125 |
+class YamlCache():
|
|
126 |
+ |
|
127 |
+ def __init__(self):
|
|
128 |
+ self._project_caches = {}
|
|
129 |
+ |
|
130 |
+ # Writes the yaml cache to the specified path.
|
|
131 |
+ #
|
|
132 |
+ # Args:
|
|
133 |
+ # path (str): The file to write the cache to.
|
|
134 |
+ #
|
|
135 |
+ def write(self, path):
|
|
136 |
+ parent_dir = os.path.dirname(path)
|
|
137 |
+ os.makedirs(parent_dir, exist_ok=True)
|
|
138 |
+ with open(path, "wb") as f:
|
|
139 |
+ BstPickler(f).dump(self)
|
|
140 |
+ |
|
141 |
+ # Gets a parsed file from the cache.
|
|
142 |
+ #
|
|
143 |
+ # Args:
|
|
144 |
+ # project (Project): The project this file is in.
|
|
145 |
+ # filepath (str): The path to the file, *relative to the project's directory*.
|
|
146 |
+ # key (str): The key to the file within the cache. Typically, this is the
|
|
147 |
+ # value of `calculate_key()` with the file's unparsed contents
|
|
148 |
+ # and any relevant metadata passed in.
|
|
149 |
+ #
|
|
150 |
+ # Returns:
|
|
151 |
+ # (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
|
|
152 |
+ def get(self, project, filepath, key):
|
|
153 |
+ if project.name in self._project_caches:
|
|
154 |
+ project_cache = self._project_caches[project.name]
|
|
155 |
+ if filepath in project_cache.elements:
|
|
156 |
+ cachedyaml = project_cache.elements[filepath]
|
|
157 |
+ if cachedyaml._key == key:
|
|
158 |
+ if cachedyaml._contents is None:
|
|
159 |
+ cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, project._context)
|
|
160 |
+ return cachedyaml._contents
|
|
161 |
+ return None
|
|
162 |
+ |
|
163 |
+ # Put a parsed file into the cache.
|
|
164 |
+ #
|
|
165 |
+ # Args:
|
|
166 |
+ # project (Project): The project this file is in.
|
|
167 |
+ # filepath (str): The path to the file, *relative to the project's directory*.
|
|
168 |
+ # key (str): The key to the file within the cache. Typically, this is the
|
|
169 |
+ # value of `calculate_key()` with the file's unparsed contents
|
|
170 |
+ # and any relevant metadata passed in.
|
|
171 |
+ # value (decorated dict): The data to put into the cache.
|
|
172 |
+ def put(self, project, filepath, key, value):
|
|
173 |
+ if project.name in self._project_caches \
|
|
174 |
+ and project.shasum == self._project_caches[project.name].project_sum:
|
|
175 |
+ project_cache = self._project_caches[project.name]
|
|
176 |
+ else:
|
|
177 |
+ project_cache = self._project_caches[project.name] = CachedProject(project.directory, project.shasum, {})
|
|
178 |
+ |
|
179 |
+ project_cache.elements[filepath] = CachedYaml(key, value)
|
|
180 |
+ |
|
181 |
+ # Return an instance of the YamlCache which writes to disk when it leaves scope.
|
|
182 |
+ #
|
|
183 |
+ # Args:
|
|
184 |
+ # context (Context): The context.
|
|
185 |
+ #
|
|
186 |
+ # Returns:
|
|
187 |
+ # (YamlCache): A YamlCache.
|
|
188 |
+ @staticmethod
|
|
189 |
+ @contextmanager
|
|
190 |
+ def get_cache(context):
|
|
191 |
+ # Try to load from disk first
|
|
192 |
+ cachefile = YamlCache._get_cache_file(context)
|
|
193 |
+ cache = None
|
|
194 |
+ if os.path.exists(cachefile):
|
|
195 |
+ try:
|
|
196 |
+ with open(cachefile, "rb") as f:
|
|
197 |
+ cache = BstUnpickler(f, context).load()
|
|
198 |
+ except pickle.UnpicklingError as e:
|
|
199 |
+ pass
|
|
200 |
+ |
|
201 |
+ if not cache:
|
|
202 |
+ cache = YamlCache()
|
|
203 |
+ |
|
204 |
+ yield cache
|
|
205 |
+ |
|
206 |
+ cache.write(cachefile)
|
|
207 |
+ |
|
208 |
+ # Calculates a key for putting into the cache.
|
|
209 |
+ @staticmethod
|
|
210 |
+ def calculate_key(*args):
|
|
211 |
+ string = pickle.dumps(args)
|
|
212 |
+ return hashlib.sha1(string).hexdigest()
|
|
213 |
+ |
|
214 |
+ # Retrieves a path to the yaml cache file.
|
|
215 |
+ @staticmethod
|
|
216 |
+ def _get_cache_file(context):
|
|
217 |
+ toplevel_project = context.get_toplevel_project()
|
|
218 |
+ return os.path.join(toplevel_project.directory, ".bst", YAML_CACHE_FILENAME)
|
... | ... | @@ -19,6 +19,7 @@ |
19 | 19 |
# Tiago Gomes <tiago gomes codethink co uk>
|
20 | 20 |
|
21 | 21 |
import os
|
22 |
+import hashlib
|
|
22 | 23 |
from collections import Mapping, OrderedDict
|
23 | 24 |
from pluginbase import PluginBase
|
24 | 25 |
from . import utils
|
... | ... | @@ -110,6 +111,7 @@ class Project(): |
110 | 111 |
self.ref_storage = None # ProjectRefStorage setting
|
111 | 112 |
self.base_environment = {} # The base set of environment variables
|
112 | 113 |
self.base_env_nocache = None # The base nocache mask (list) for the environment
|
114 |
+ self.shasum = None # A SHA-1 sum of the project file
|
|
113 | 115 |
|
114 | 116 |
#
|
115 | 117 |
# Private Members
|
... | ... | @@ -382,6 +384,10 @@ class Project(): |
382 | 384 |
|
383 | 385 |
# Load project local config and override the builtin
|
384 | 386 |
try:
|
387 |
+ with open(projectfile, "r") as f:
|
|
388 |
+ contents = f.read()
|
|
389 |
+ self.shasum = hashlib.sha1(contents.encode('utf-8')).hexdigest()
|
|
390 |
+ |
|
385 | 391 |
self._project_conf = _yaml.load(projectfile)
|
386 | 392 |
except LoadError as e:
|
387 | 393 |
# Raise a more specific error here
|
... | ... | @@ -23,6 +23,8 @@ import string |
23 | 23 |
from copy import deepcopy
|
24 | 24 |
from contextlib import ExitStack
|
25 | 25 |
from pathlib import Path
|
26 |
+import hashlib
|
|
27 |
+import os
|
|
26 | 28 |
|
27 | 29 |
from ruamel import yaml
|
28 | 30 |
from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
|
... | ... | @@ -183,12 +185,13 @@ class CompositeTypeError(CompositeError): |
183 | 185 |
# shortname (str): The filename in shorthand for error reporting (or None)
|
184 | 186 |
# copy_tree (bool): Whether to make a copy, preserving the original toplevels
|
185 | 187 |
# for later serialization
|
188 |
+# yaml_cache (YamlCache): A yaml cache to consult rather than parsing
|
|
186 | 189 |
#
|
187 | 190 |
# Returns (dict): A loaded copy of the YAML file with provenance information
|
188 | 191 |
#
|
189 | 192 |
# Raises: LoadError
|
190 | 193 |
#
|
191 |
-def load(filename, shortname=None, copy_tree=False, *, project=None):
|
|
194 |
+def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
|
|
192 | 195 |
if not shortname:
|
193 | 196 |
shortname = filename
|
194 | 197 |
|
... | ... | @@ -196,7 +199,7 @@ def load(filename, shortname=None, copy_tree=False, *, project=None): |
196 | 199 |
|
197 | 200 |
try:
|
198 | 201 |
with open(filename) as f:
|
199 |
- return load_data(f, file, copy_tree=copy_tree)
|
|
202 |
+ return load_data(f, file, copy_tree=copy_tree, yaml_cache=yaml_cache)
|
|
200 | 203 |
except FileNotFoundError as e:
|
201 | 204 |
raise LoadError(LoadErrorReason.MISSING_FILE,
|
202 | 205 |
"Could not find file at {}".format(filename)) from e
|
... | ... | @@ -208,24 +211,46 @@ def load(filename, shortname=None, copy_tree=False, *, project=None): |
208 | 211 |
|
209 | 212 |
# Like load(), but doesnt require the data to be in a file
|
210 | 213 |
#
|
211 |
-def load_data(data, file=None, copy_tree=False):
|
|
214 |
+def load_data(data, file=None, copy_tree=False, yaml_cache=None):
|
|
212 | 215 |
|
213 |
- try:
|
|
214 |
- contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
|
|
215 |
- except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
|
|
216 |
- raise LoadError(LoadErrorReason.INVALID_YAML,
|
|
217 |
- "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
|
|
218 |
- |
|
219 |
- if not isinstance(contents, dict):
|
|
220 |
- # Special case allowance for None, when the loaded file has only comments in it.
|
|
221 |
- if contents is None:
|
|
222 |
- contents = {}
|
|
223 |
- else:
|
|
216 |
+ if hasattr(data, 'read'):
|
|
217 |
+ file_contents = data.read()
|
|
218 |
+ else:
|
|
219 |
+ file_contents = data
|
|
220 |
+ # Forced to compare sums of contents because elements in junctions are stored in tmpdirs
|
|
221 |
+ decorated_yaml = None
|
|
222 |
+ if yaml_cache:
|
|
223 |
+ assert file
|
|
224 |
+ project = file.project
|
|
225 |
+ filename = os.path.relpath(file.name, project.directory)
|
|
226 |
+ key = yaml_cache.calculate_key(file_contents, copy_tree)
|
|
227 |
+ decorated_yaml = yaml_cache.get(project, filename, key)
|
|
228 |
+ |
|
229 |
+ if not decorated_yaml:
|
|
230 |
+ try:
|
|
231 |
+ contents = yaml.load(file_contents, yaml.loader.RoundTripLoader, preserve_quotes=True)
|
|
232 |
+ except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
|
|
224 | 233 |
raise LoadError(LoadErrorReason.INVALID_YAML,
|
225 |
- "YAML file has content of type '{}' instead of expected type 'dict': {}"
|
|
226 |
- .format(type(contents).__name__, file.name))
|
|
234 |
+ "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
|
|
227 | 235 |
|
228 |
- return node_decorated_copy(file, contents, copy_tree=copy_tree)
|
|
236 |
+ if not isinstance(contents, dict):
|
|
237 |
+ # Special case allowance for None, when the loaded file has only comments in it.
|
|
238 |
+ if contents is None:
|
|
239 |
+ contents = {}
|
|
240 |
+ else:
|
|
241 |
+ raise LoadError(LoadErrorReason.INVALID_YAML,
|
|
242 |
+ "YAML file has content of type '{}' instead of expected type 'dict': {}"
|
|
243 |
+ .format(type(contents).__name__, file.name))
|
|
244 |
+ |
|
245 |
+ decorated_yaml = node_decorated_copy(file, contents, copy_tree=copy_tree)
|
|
246 |
+ if yaml_cache:
|
|
247 |
+ assert file
|
|
248 |
+ project = file.project
|
|
249 |
+ filename = os.path.relpath(file.name, project.directory)
|
|
250 |
+ key = yaml_cache.calculate_key(file_contents, copy_tree)
|
|
251 |
+ yaml_cache.put(project, filename, key, decorated_yaml)
|
|
252 |
+ |
|
253 |
+ return decorated_yaml
|
|
229 | 254 |
|
230 | 255 |
|
231 | 256 |
# Dumps a previously loaded YAML node to a file
|