1
|
1
|
#
|
2
|
2
|
# Copyright (C) 2016 Codethink Limited
|
|
3
|
+# Copyright (C) 2019 Bloomberg L.P.
|
3
|
4
|
#
|
4
|
5
|
# This program is free software; you can redistribute it and/or
|
5
|
6
|
# modify it under the terms of the GNU Lesser General Public
|
... |
... |
@@ -16,15 +17,37 @@ |
16
|
17
|
#
|
17
|
18
|
# Authors:
|
18
|
19
|
# Tristan Van Berkom <tristan vanberkom codethink co uk>
|
|
20
|
+# Daniel Silverstone <daniel silverstone codethink co uk>
|
19
|
21
|
|
20
|
22
|
import re
|
|
23
|
+import sys
|
21
|
24
|
|
22
|
25
|
from ._exceptions import LoadError, LoadErrorReason
|
23
|
26
|
from . import _yaml
|
24
|
27
|
|
25
|
28
|
# Variables are allowed to have dashes here
|
26
|
29
|
#
|
27
|
|
-_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
|
|
30
|
+PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+# Throughout this code you will see variables named things like `expstr`.
|
|
34
|
+# These hold data structures called "expansion strings" and are the parsed
|
|
35
|
+# form of the strings which are the input to this subsystem. Strings
|
|
36
|
+# such as "Hello %{name}, how are you?" are parsed into the form:
|
|
37
|
+# (3, ["Hello ", "name", ", how are you?"])
|
|
38
|
+# i.e. a tuple of an integer and a list, where the integer is the cached
|
|
39
|
+# length of the list, and the list consists of one or more strings.
|
|
40
|
+# Strings in even indices of the list (0, 2, 4, etc) are constants which
|
|
41
|
+# are copied into the output of the expansion algorithm. Strings in the
|
|
42
|
+# odd indices (1, 3, 5, etc) are the names of further expansions to make.
|
|
43
|
+# In the example above, first "Hello " is copied, then "name" is expanded
|
|
44
|
+# and so must be another named expansion string passed in to the constructor
|
|
45
|
+# of the Variables class, and whatever is yielded from the expansion of "name"
|
|
46
|
+# is added to the concatenation for the result. Finally ", how are you?" is
|
|
47
|
+# copied in and the whole lot concatenated for return.
|
|
48
|
+#
|
|
49
|
+# To see how strings are parsed, see `_parse_expstr()` after the class, and
|
|
50
|
+# to see how expansion strings are expanded, see `_expand_expstr()` after that.
|
28
|
51
|
|
29
|
52
|
|
30
|
53
|
# The Variables helper object will resolve the variable references in
|
... |
... |
@@ -38,14 +61,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' |
38
|
61
|
# node (dict): A node loaded and composited with yaml tools
|
39
|
62
|
#
|
40
|
63
|
# Raises:
|
41
|
|
-# LoadError, if unresolved variables occur.
|
|
64
|
+# LoadError, if unresolved variables, or cycles in resolution, occur.
|
42
|
65
|
#
|
43
|
66
|
class Variables():
|
44
|
67
|
|
45
|
68
|
def __init__(self, node):
|
46
|
69
|
|
47
|
70
|
self.original = node
|
48
|
|
- self.variables = self._resolve(node)
|
|
71
|
+ self._expstr_map = self._resolve(node)
|
|
72
|
+ self.flat = self._flatten()
|
49
|
73
|
|
50
|
74
|
# subst():
|
51
|
75
|
#
|
... |
... |
@@ -61,139 +85,167 @@ class Variables(): |
61
|
85
|
# LoadError, if the string contains unresolved variable references.
|
62
|
86
|
#
|
63
|
87
|
def subst(self, string):
|
64
|
|
- substitute, unmatched, _ = self._subst(string, self.variables)
|
65
|
|
- unmatched = list(set(unmatched))
|
66
|
|
- if unmatched:
|
67
|
|
- if len(unmatched) == 1:
|
68
|
|
- message = "Unresolved variable '{var}'".format(var=unmatched[0])
|
69
|
|
- else:
|
70
|
|
- message = "Unresolved variables: "
|
71
|
|
- for unmatch in unmatched:
|
72
|
|
- if unmatched.index(unmatch) > 0:
|
73
|
|
- message += ', '
|
74
|
|
- message += unmatch
|
75
|
|
-
|
76
|
|
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
|
77
|
|
-
|
78
|
|
- return substitute
|
79
|
|
-
|
80
|
|
- def _subst(self, string, variables):
|
81
|
|
-
|
82
|
|
- def subst_callback(match):
|
83
|
|
- nonlocal variables
|
84
|
|
- nonlocal unmatched
|
85
|
|
- nonlocal matched
|
86
|
|
-
|
87
|
|
- token = match.group(0)
|
88
|
|
- varname = match.group(1)
|
89
|
|
-
|
90
|
|
- value = _yaml.node_get(variables, str, varname, default_value=None)
|
91
|
|
- if value is not None:
|
92
|
|
- # We have to check if the inner string has variables
|
93
|
|
- # and return unmatches for those
|
94
|
|
- unmatched += re.findall(_VARIABLE_MATCH, value)
|
95
|
|
- matched += [varname]
|
96
|
|
- else:
|
97
|
|
- # Return unmodified token
|
98
|
|
- unmatched += [varname]
|
99
|
|
- value = token
|
100
|
|
-
|
101
|
|
- return value
|
102
|
|
-
|
103
|
|
- matched = []
|
104
|
|
- unmatched = []
|
105
|
|
- replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
|
106
|
|
-
|
107
|
|
- return (replacement, unmatched, matched)
|
|
88
|
+ expstr = _parse_expstr(string)
|
|
89
|
+
|
|
90
|
+ try:
|
|
91
|
+ return _expand_expstr(self._expstr_map, expstr)
|
|
92
|
+ except KeyError:
|
|
93
|
+ unmatched = []
|
|
94
|
+
|
|
95
|
+ # Look for any unmatched variable names in the expansion string
|
|
96
|
+ for var in expstr[1][1::2]:
|
|
97
|
+ if var not in self._expstr_map:
|
|
98
|
+ unmatched.append(var)
|
|
99
|
+
|
|
100
|
+ if unmatched:
|
|
101
|
+ message = "Unresolved variable{}: {}".format(
|
|
102
|
+ "s" if len(unmatched) > 1 else "",
|
|
103
|
+ ", ".join(unmatched)
|
|
104
|
+ )
|
|
105
|
+
|
|
106
|
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
|
|
107
|
+ # Otherwise, re-raise the KeyError since it clearly came from some
|
|
108
|
+ # other unknowable cause.
|
|
109
|
+ raise
|
108
|
110
|
|
109
|
111
|
# Variable resolving code
|
110
|
112
|
#
|
111
|
|
- # Here we substitute variables for values (resolve variables) repeatedly
|
112
|
|
- # in a dictionary, each time creating a new dictionary until there is no
|
113
|
|
- # more unresolved variables to resolve, or, until resolving further no
|
114
|
|
- # longer resolves anything, in which case we throw an exception.
|
|
113
|
+ # Here we resolve all of our inputs into a dictionary, ready for use
|
|
114
|
+ # in subst()
|
115
|
115
|
def _resolve(self, node):
|
116
|
|
- variables = node
|
117
|
|
-
|
118
|
116
|
# Special case, if notparallel is specified in the variables for this
|
119
|
117
|
# element, then override max-jobs to be 1.
|
120
|
118
|
# Initialize it as a string as all variables are processed as strings.
|
121
|
119
|
#
|
122
|
|
- if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
|
123
|
|
- variables['max-jobs'] = str(1)
|
124
|
|
-
|
125
|
|
- # Resolve the dictionary once, reporting the new dictionary with things
|
126
|
|
- # substituted in it, and reporting unmatched tokens.
|
127
|
|
- #
|
128
|
|
- def resolve_one(variables):
|
129
|
|
- unmatched = []
|
130
|
|
- resolved = {}
|
131
|
|
-
|
132
|
|
- for key, value in _yaml.node_items(variables):
|
133
|
|
-
|
134
|
|
- # Ensure stringness of the value before substitution
|
135
|
|
- value = _yaml.node_get(variables, str, key)
|
136
|
|
-
|
137
|
|
- resolved_var, item_unmatched, matched = self._subst(value, variables)
|
138
|
|
-
|
139
|
|
- if _wrap_variable(key) in resolved_var:
|
140
|
|
- referenced_through = find_recursive_variable(key, matched, variables)
|
|
120
|
+ if _yaml.node_get(node, bool, 'notparallel', default_value=False):
|
|
121
|
+ node['max-jobs'] = str(1)
|
|
122
|
+
|
|
123
|
+ ret = {}
|
|
124
|
+ for key, value in _yaml.node_items(node):
|
|
125
|
+ value = _yaml.node_get(node, str, key)
|
|
126
|
+ ret[sys.intern(key)] = _parse_expstr(value)
|
|
127
|
+ return ret
|
|
128
|
+
|
|
129
|
+ def _check_for_missing(self):
|
|
130
|
+ # First the check for anything unresolvable
|
|
131
|
+ summary = []
|
|
132
|
+ for key, expstr in self._expstr_map.items():
|
|
133
|
+ for var in expstr[1][1::2]:
|
|
134
|
+ if var not in self._expstr_map:
|
|
135
|
+ line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
|
|
136
|
+ provenance = _yaml.node_get_provenance(self.original, key)
|
|
137
|
+ summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
|
|
138
|
+ if summary:
|
|
139
|
+ raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
|
|
140
|
+ "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
|
|
141
|
+
|
|
142
|
+ def _check_for_cycles(self):
|
|
143
|
+ # And now the cycle checks
|
|
144
|
+ def cycle_check(expstr, visited, cleared):
|
|
145
|
+ for var in expstr[1][1::2]:
|
|
146
|
+ if var in cleared:
|
|
147
|
+ continue
|
|
148
|
+ if var in visited:
|
141
|
149
|
raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
|
142
|
|
- "{}: ".format(_yaml.node_get_provenance(variables, key)) +
|
|
150
|
+ "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
|
143
|
151
|
("Variable '{}' expands to contain a reference to itself. " +
|
144
|
|
- "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
|
145
|
|
-
|
146
|
|
- resolved[key] = resolved_var
|
147
|
|
- unmatched += item_unmatched
|
148
|
|
-
|
149
|
|
- # Carry over provenance
|
150
|
|
- resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
|
151
|
|
- return (resolved, unmatched)
|
152
|
|
-
|
153
|
|
- # Resolve it until it's resolved or broken
|
154
|
|
- #
|
155
|
|
- resolved = variables
|
156
|
|
- unmatched = ['dummy']
|
157
|
|
- last_unmatched = ['dummy']
|
158
|
|
- while unmatched:
|
159
|
|
- resolved, unmatched = resolve_one(resolved)
|
160
|
|
-
|
161
|
|
- # Lists of strings can be compared like this
|
162
|
|
- if unmatched == last_unmatched:
|
163
|
|
- # We've got the same result twice without matching everything,
|
164
|
|
- # something is undeclared or cyclic, compose a summary.
|
165
|
|
- #
|
166
|
|
- summary = ''
|
167
|
|
- for unmatch in set(unmatched):
|
168
|
|
- for var, provenance in self._find_references(unmatch):
|
169
|
|
- line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
|
170
|
|
- summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
|
171
|
|
-
|
172
|
|
- raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
|
173
|
|
- "Failed to resolve one or more variable:\n{}".format(summary))
|
174
|
|
-
|
175
|
|
- last_unmatched = unmatched
|
176
|
|
-
|
177
|
|
- return resolved
|
178
|
|
-
|
179
|
|
- # Helper function to fetch information about the node referring to a variable
|
|
152
|
+ "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
|
|
153
|
+ visited.append(var)
|
|
154
|
+ cycle_check(self._expstr_map[var], visited, cleared)
|
|
155
|
+ visited.pop()
|
|
156
|
+ cleared.add(var)
|
|
157
|
+
|
|
158
|
+ cleared = set()
|
|
159
|
+ for key, expstr in self._expstr_map.items():
|
|
160
|
+ if key not in cleared:
|
|
161
|
+ cycle_check(expstr, [key], cleared)
|
|
162
|
+
|
|
163
|
+ # _flatten():
|
180
|
164
|
#
|
181
|
|
- def _find_references(self, varname):
|
182
|
|
- fullname = _wrap_variable(varname)
|
183
|
|
- for key, value in _yaml.node_items(self.original):
|
184
|
|
- if fullname in value:
|
185
|
|
- provenance = _yaml.node_get_provenance(self.original, key)
|
186
|
|
- yield (key, provenance)
|
187
|
|
-
|
188
|
|
-
|
189
|
|
-def find_recursive_variable(variable, matched_variables, all_vars):
|
190
|
|
- matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
|
191
|
|
- for key, value in zip(matched_variables, matched_values):
|
192
|
|
- if _wrap_variable(variable) in value:
|
193
|
|
- return key
|
194
|
|
- # We failed to find a recursive variable
|
195
|
|
- return None
|
196
|
|
-
|
197
|
|
-
|
198
|
|
-def _wrap_variable(var):
|
199
|
|
- return "%{" + var + "}"
|
|
165
|
+ # Turn our dictionary of expansion strings into a flattened dict
|
|
166
|
+ # so that we can run expansions faster in the future
|
|
167
|
+ #
|
|
168
|
+ # Raises:
|
|
169
|
+ # LoadError, if the string contains unresolved variable references or
|
|
170
|
+ # if cycles are detected in the variable references
|
|
171
|
+ #
|
|
172
|
+ def _flatten(self):
|
|
173
|
+ flat = {}
|
|
174
|
+ try:
|
|
175
|
+ for key, expstr in self._expstr_map.items():
|
|
176
|
+ if expstr[0] > 1:
|
|
177
|
+ expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))])
|
|
178
|
+ self._expstr_map[key] = expstr
|
|
179
|
+ flat[key] = expstr[1][0]
|
|
180
|
+ except KeyError:
|
|
181
|
+ self._check_for_missing()
|
|
182
|
+ raise
|
|
183
|
+ except RecursionError:
|
|
184
|
+ self._check_for_cycles()
|
|
185
|
+ raise
|
|
186
|
+ return flat
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+# Cache for the parsed expansion strings. While this is nominally
|
|
190
|
+# something which might "waste" memory, in reality each of these
|
|
191
|
+# will live as long as the element which uses it, which is the
|
|
192
|
+# vast majority of the memory usage across the execution of BuildStream.
|
|
193
|
+PARSE_CACHE = {
|
|
194
|
+ # Prime the cache with the empty string since otherwise that can
|
|
195
|
+ # cause issues with the parser, complications to which cause slowdown
|
|
196
|
+ "": (1, [""]),
|
|
197
|
+}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+# Helper to parse a string into an expansion string tuple, caching
|
|
201
|
+# the results so that future parse requests don't need to think about
|
|
202
|
+# the string
|
|
203
|
+def _parse_expstr(instr):
|
|
204
|
+ try:
|
|
205
|
+ return PARSE_CACHE[instr]
|
|
206
|
+ except KeyError:
|
|
207
|
+ # This use of the regex turns a string like "foo %{bar} baz" into
|
|
208
|
+ # a list ["foo ", "bar", " baz"]
|
|
209
|
+ splits = PARSE_EXPANSION.split(instr)
|
|
210
|
+ # If an expansion ends the string, we get an empty string on the end
|
|
211
|
+ # which we can optimise away, making the expansion routines not need
|
|
212
|
+ # a test for this.
|
|
213
|
+ if splits[-1] == '':
|
|
214
|
+ splits = splits[:-1]
|
|
215
|
+ # Cache an interned copy of this. We intern it to try and reduce the
|
|
216
|
+ # memory impact of the cache. It seems odd to cache the list length
|
|
217
|
+ # but this is measurably cheaper than calculating it each time during
|
|
218
|
+ # string expansion.
|
|
219
|
+ PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
|
|
220
|
+ return PARSE_CACHE[instr]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+# Helper to expand a given top level expansion string tuple in the context
|
|
224
|
+# of the given dictionary of expansion strings.
|
|
225
|
+#
|
|
226
|
+# Note: Will raise KeyError if any expansion is missing
|
|
227
|
+def _expand_expstr(content, topvalue):
|
|
228
|
+ # Short-circuit constant strings
|
|
229
|
+ if topvalue[0] == 1:
|
|
230
|
+ return topvalue[1][0]
|
|
231
|
+
|
|
232
|
+ # Short-circuit strings which are entirely an expansion of another variable
|
|
233
|
+ # e.g. "%{another}"
|
|
234
|
+ if topvalue[0] == 2 and topvalue[1][0] == "":
|
|
235
|
+ return _expand_expstr(content, content[topvalue[1][1]])
|
|
236
|
+
|
|
237
|
+ # Otherwise process fully...
|
|
238
|
+ def internal_expand(value):
|
|
239
|
+ (expansion_len, expansion_bits) = value
|
|
240
|
+ idx = 0
|
|
241
|
+ while idx < expansion_len:
|
|
242
|
+ # First yield any constant string content
|
|
243
|
+ yield expansion_bits[idx]
|
|
244
|
+ idx += 1
|
|
245
|
+ # Now, if there is an expansion variable left to expand, yield
|
|
246
|
+ # the expansion of that variable too
|
|
247
|
+ if idx < expansion_len:
|
|
248
|
+ yield from internal_expand(content[expansion_bits[idx]])
|
|
249
|
+ idx += 1
|
|
250
|
+
|
|
251
|
+ return "".join(internal_expand(topvalue))
|