| 1 | 1 |  #
 | 
| 2 | 2 |  #  Copyright (C) 2016 Codethink Limited
 | 
|  | 3 | +#  Copyright (C) 2019 Bloomberg L.P.
 | 
| 3 | 4 |  #
 | 
| 4 | 5 |  #  This program is free software; you can redistribute it and/or
 | 
| 5 | 6 |  #  modify it under the terms of the GNU Lesser General Public
 | 
| ... | ... | @@ -16,15 +17,17 @@ | 
| 16 | 17 |  #
 | 
| 17 | 18 |  #  Authors:
 | 
| 18 | 19 |  #        Tristan Van Berkom <tristan vanberkom codethink co uk>
 | 
|  | 20 | +#        Daniel Silverstone <daniel silverstone codethink co uk>
 | 
| 19 | 21 |  
 | 
| 20 | 22 |  import re
 | 
|  | 23 | +import sys
 | 
| 21 | 24 |  
 | 
| 22 | 25 |  from ._exceptions import LoadError, LoadErrorReason
 | 
| 23 | 26 |  from . import _yaml
 | 
| 24 | 27 |  
 | 
| 25 | 28 |  # Variables are allowed to have dashes here
 | 
| 26 | 29 |  #
 | 
| 27 |  | -_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
 | 
|  | 30 | +PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
 | 
| 28 | 31 |  
 | 
| 29 | 32 |  
 | 
| 30 | 33 |  # The Variables helper object will resolve the variable references in
 | 
| ... | ... | @@ -38,14 +41,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' | 
| 38 | 41 |  #     node (dict): A node loaded and composited with yaml tools
 | 
| 39 | 42 |  #
 | 
| 40 | 43 |  # Raises:
 | 
| 41 |  | -#     LoadError, if unresolved variables occur.
 | 
|  | 44 | +#     LoadError, if unresolved variables, or cycles in resolution, occur.
 | 
| 42 | 45 |  #
 | 
| 43 | 46 |  class Variables():
 | 
| 44 | 47 |  
 | 
| 45 | 48 |      def __init__(self, node):
 | 
| 46 | 49 |  
 | 
| 47 | 50 |          self.original = node
 | 
| 48 |  | -        self.variables = self._resolve(node)
 | 
|  | 51 | +        self.newexp = self._resolve(node)
 | 
|  | 52 | +        self.flat = self._flatten()
 | 
| 49 | 53 |  
 | 
| 50 | 54 |      # subst():
 | 
| 51 | 55 |      #
 | 
| ... | ... | @@ -61,139 +65,161 @@ class Variables(): | 
| 61 | 65 |      #    LoadError, if the string contains unresolved variable references.
 | 
| 62 | 66 |      #
 | 
| 63 | 67 |      def subst(self, string):
 | 
| 64 |  | -        substitute, unmatched, _ = self._subst(string, self.variables)
 | 
| 65 |  | -        unmatched = list(set(unmatched))
 | 
| 66 |  | -        if unmatched:
 | 
| 67 |  | -            if len(unmatched) == 1:
 | 
| 68 |  | -                message = "Unresolved variable '{var}'".format(var=unmatched[0])
 | 
| 69 |  | -            else:
 | 
| 70 |  | -                message = "Unresolved variables: "
 | 
| 71 |  | -                for unmatch in unmatched:
 | 
| 72 |  | -                    if unmatched.index(unmatch) > 0:
 | 
| 73 |  | -                        message += ', '
 | 
| 74 |  | -                    message += unmatch
 | 
| 75 |  | -
 | 
| 76 |  | -            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
 | 
| 77 |  | -
 | 
| 78 |  | -        return substitute
 | 
| 79 |  | -
 | 
| 80 |  | -    def _subst(self, string, variables):
 | 
| 81 |  | -
 | 
| 82 |  | -        def subst_callback(match):
 | 
| 83 |  | -            nonlocal variables
 | 
| 84 |  | -            nonlocal unmatched
 | 
| 85 |  | -            nonlocal matched
 | 
| 86 |  | -
 | 
| 87 |  | -            token = match.group(0)
 | 
| 88 |  | -            varname = match.group(1)
 | 
| 89 |  | -
 | 
| 90 |  | -            value = _yaml.node_get(variables, str, varname, default_value=None)
 | 
| 91 |  | -            if value is not None:
 | 
| 92 |  | -                # We have to check if the inner string has variables
 | 
| 93 |  | -                # and return unmatches for those
 | 
| 94 |  | -                unmatched += re.findall(_VARIABLE_MATCH, value)
 | 
| 95 |  | -                matched += [varname]
 | 
| 96 |  | -            else:
 | 
| 97 |  | -                # Return unmodified token
 | 
| 98 |  | -                unmatched += [varname]
 | 
| 99 |  | -                value = token
 | 
| 100 |  | -
 | 
| 101 |  | -            return value
 | 
| 102 |  | -
 | 
| 103 |  | -        matched = []
 | 
| 104 |  | -        unmatched = []
 | 
| 105 |  | -        replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
 | 
| 106 |  | -
 | 
| 107 |  | -        return (replacement, unmatched, matched)
 | 
|  | 68 | +        exp = _parse_expstr(string)
 | 
|  | 69 | +
 | 
|  | 70 | +        try:
 | 
|  | 71 | +            return _expand_expstr(self.newexp, exp)
 | 
|  | 72 | +        except KeyError:
 | 
|  | 73 | +            unmatched = []
 | 
|  | 74 | +
 | 
|  | 75 | +            for v in exp[1][1::2]:
 | 
|  | 76 | +                if v not in self.newexp:
 | 
|  | 77 | +                    unmatched.append(v)
 | 
|  | 78 | +
 | 
|  | 79 | +            if unmatched:
 | 
|  | 80 | +                if len(unmatched) == 1:
 | 
|  | 81 | +                    message = "Unresolved variable '{var}'".format(var=unmatched[0])
 | 
|  | 82 | +                else:
 | 
|  | 83 | +                    message = "Unresolved variables: "
 | 
|  | 84 | +                    for unmatch in unmatched:
 | 
|  | 85 | +                        if unmatched.index(unmatch) > 0:
 | 
|  | 86 | +                            message += ', '
 | 
|  | 87 | +                            message += unmatch
 | 
|  | 88 | +
 | 
|  | 89 | +                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
 | 
|  | 90 | +            raise
 | 
| 108 | 91 |  
 | 
| 109 | 92 |      # Variable resolving code
 | 
| 110 | 93 |      #
 | 
| 111 |  | -    # Here we substitute variables for values (resolve variables) repeatedly
 | 
| 112 |  | -    # in a dictionary, each time creating a new dictionary until there is no
 | 
| 113 |  | -    # more unresolved variables to resolve, or, until resolving further no
 | 
| 114 |  | -    # longer resolves anything, in which case we throw an exception.
 | 
|  | 94 | +    # Here we resolve all of our inputs into a dictionary, ready for use
 | 
|  | 95 | +    # in subst()
 | 
| 115 | 96 |      def _resolve(self, node):
 | 
| 116 |  | -        variables = node
 | 
| 117 |  | -
 | 
| 118 | 97 |          # Special case, if notparallel is specified in the variables for this
 | 
| 119 | 98 |          # element, then override max-jobs to be 1.
 | 
| 120 | 99 |          # Initialize it as a string as all variables are processed as strings.
 | 
| 121 | 100 |          #
 | 
| 122 |  | -        if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
 | 
| 123 |  | -            variables['max-jobs'] = str(1)
 | 
| 124 |  | -
 | 
| 125 |  | -        # Resolve the dictionary once, reporting the new dictionary with things
 | 
| 126 |  | -        # substituted in it, and reporting unmatched tokens.
 | 
| 127 |  | -        #
 | 
| 128 |  | -        def resolve_one(variables):
 | 
| 129 |  | -            unmatched = []
 | 
| 130 |  | -            resolved = {}
 | 
| 131 |  | -
 | 
| 132 |  | -            for key, value in _yaml.node_items(variables):
 | 
| 133 |  | -
 | 
| 134 |  | -                # Ensure stringness of the value before substitution
 | 
| 135 |  | -                value = _yaml.node_get(variables, str, key)
 | 
| 136 |  | -
 | 
| 137 |  | -                resolved_var, item_unmatched, matched = self._subst(value, variables)
 | 
| 138 |  | -
 | 
| 139 |  | -                if _wrap_variable(key) in resolved_var:
 | 
| 140 |  | -                    referenced_through = find_recursive_variable(key, matched, variables)
 | 
|  | 101 | +        if _yaml.node_get(node, bool, 'notparallel', default_value=False):
 | 
|  | 102 | +            node['max-jobs'] = str(1)
 | 
|  | 103 | +
 | 
|  | 104 | +        ret = {}
 | 
|  | 105 | +        for key, value in _yaml.node_items(node):
 | 
|  | 106 | +            value = _yaml.node_get(node, str, key)
 | 
|  | 107 | +            ret[sys.intern(key)] = _parse_expstr(value)
 | 
|  | 108 | +        return ret
 | 
|  | 109 | +
 | 
|  | 110 | +    def _check_for_missing(self):
 | 
|  | 111 | +        # First the check for anything unresolvable
 | 
|  | 112 | +        summary = []
 | 
|  | 113 | +        for key, expstr in self.newexp.items():
 | 
|  | 114 | +            for var in expstr[1][1::2]:
 | 
|  | 115 | +                if var not in self.newexp:
 | 
|  | 116 | +                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
 | 
|  | 117 | +                    provenance = _yaml.node_get_provenance(self.original, key)
 | 
|  | 118 | +                    summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
 | 
|  | 119 | +        if summary:
 | 
|  | 120 | +            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
 | 
|  | 121 | +                            "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
 | 
|  | 122 | +
 | 
|  | 123 | +    def _check_for_cycles(self):
 | 
|  | 124 | +        # And now the cycle checks
 | 
|  | 125 | +        def cycle_check(exp, visited, cleared):
 | 
|  | 126 | +            for var in exp[1][1::2]:
 | 
|  | 127 | +                if var in cleared:
 | 
|  | 128 | +                    continue
 | 
|  | 129 | +                if var in visited:
 | 
| 141 | 130 |                      raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
 | 
| 142 |  | -                                    "{}: ".format(_yaml.node_get_provenance(variables, key)) +
 | 
|  | 131 | +                                    "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
 | 
| 143 | 132 |                                      ("Variable '{}' expands to contain a reference to itself. " +
 | 
| 144 |  | -                                     "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
 | 
| 145 |  | -
 | 
| 146 |  | -                resolved[key] = resolved_var
 | 
| 147 |  | -                unmatched += item_unmatched
 | 
| 148 |  | -
 | 
| 149 |  | -            # Carry over provenance
 | 
| 150 |  | -            resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
 | 
| 151 |  | -            return (resolved, unmatched)
 | 
| 152 |  | -
 | 
| 153 |  | -        # Resolve it until it's resolved or broken
 | 
| 154 |  | -        #
 | 
| 155 |  | -        resolved = variables
 | 
| 156 |  | -        unmatched = ['dummy']
 | 
| 157 |  | -        last_unmatched = ['dummy']
 | 
| 158 |  | -        while unmatched:
 | 
| 159 |  | -            resolved, unmatched = resolve_one(resolved)
 | 
| 160 |  | -
 | 
| 161 |  | -            # Lists of strings can be compared like this
 | 
| 162 |  | -            if unmatched == last_unmatched:
 | 
| 163 |  | -                # We've got the same result twice without matching everything,
 | 
| 164 |  | -                # something is undeclared or cyclic, compose a summary.
 | 
| 165 |  | -                #
 | 
| 166 |  | -                summary = ''
 | 
| 167 |  | -                for unmatch in set(unmatched):
 | 
| 168 |  | -                    for var, provenance in self._find_references(unmatch):
 | 
| 169 |  | -                        line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
 | 
| 170 |  | -                        summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
 | 
| 171 |  | -
 | 
| 172 |  | -                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
 | 
| 173 |  | -                                "Failed to resolve one or more variable:\n{}".format(summary))
 | 
| 174 |  | -
 | 
| 175 |  | -            last_unmatched = unmatched
 | 
| 176 |  | -
 | 
| 177 |  | -        return resolved
 | 
| 178 |  | -
 | 
| 179 |  | -    # Helper function to fetch information about the node referring to a variable
 | 
|  | 133 | +                                     "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
 | 
|  | 134 | +                visited.append(var)
 | 
|  | 135 | +                cycle_check(self.newexp[var], visited, cleared)
 | 
|  | 136 | +                visited.pop()
 | 
|  | 137 | +                cleared.add(var)
 | 
|  | 138 | +
 | 
|  | 139 | +        cleared = set()
 | 
|  | 140 | +        for key, expstr in self.newexp.items():
 | 
|  | 141 | +            if key not in cleared:
 | 
|  | 142 | +                cycle_check(expstr, [key], cleared)
 | 
|  | 143 | +
 | 
|  | 144 | +    # _flatten():
 | 
| 180 | 145 |      #
 | 
| 181 |  | -    def _find_references(self, varname):
 | 
| 182 |  | -        fullname = _wrap_variable(varname)
 | 
| 183 |  | -        for key, value in _yaml.node_items(self.original):
 | 
| 184 |  | -            if fullname in value:
 | 
| 185 |  | -                provenance = _yaml.node_get_provenance(self.original, key)
 | 
| 186 |  | -                yield (key, provenance)
 | 
| 187 |  | -
 | 
| 188 |  | -
 | 
| 189 |  | -def find_recursive_variable(variable, matched_variables, all_vars):
 | 
| 190 |  | -    matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
 | 
| 191 |  | -    for key, value in zip(matched_variables, matched_values):
 | 
| 192 |  | -        if _wrap_variable(variable) in value:
 | 
| 193 |  | -            return key
 | 
| 194 |  | -    # We failed to find a recursive variable
 | 
| 195 |  | -    return None
 | 
| 196 |  | -
 | 
| 197 |  | -
 | 
| 198 |  | -def _wrap_variable(var):
 | 
| 199 |  | -    return "%{" + var + "}" | 
|  | 146 | +    # Turn our dictionary of expansion strings into a flattened set
 | 
|  | 147 | +    # so that we can run expansions faster in the future
 | 
|  | 148 | +    #
 | 
|  | 149 | +    # Raises:
 | 
|  | 150 | +    #    LoadError, if the string contains unresolved variable references or
 | 
|  | 151 | +    #               if cycles are detected in the variable references
 | 
|  | 152 | +    #
 | 
|  | 153 | +    def _flatten(self):
 | 
|  | 154 | +        flat = {}
 | 
|  | 155 | +        try:
 | 
|  | 156 | +            for key, expstr in self.newexp.items():
 | 
|  | 157 | +                if expstr[0] > 1:
 | 
|  | 158 | +                    expstr = (1, [sys.intern(_expand_expstr(self.newexp, expstr))])
 | 
|  | 159 | +                    self.newexp[key] = expstr
 | 
|  | 160 | +                flat[key] = expstr[1][0]
 | 
|  | 161 | +        except KeyError:
 | 
|  | 162 | +            self._check_for_missing()
 | 
|  | 163 | +            raise
 | 
|  | 164 | +        except RecursionError:
 | 
|  | 165 | +            self._check_for_cycles()
 | 
|  | 166 | +            raise
 | 
|  | 167 | +        return flat
 | 
|  | 168 | +
 | 
|  | 169 | +
 | 
|  | 170 | +# Cache for the parsed expansion strings.  While this is nominally
 | 
|  | 171 | +# something which might "waste" memory, in reality each of these
 | 
|  | 172 | +# will live as long as the element which uses it, which is the
 | 
|  | 173 | +# vast majority of the memory usage across the execution of BuildStream.
 | 
|  | 174 | +PARSE_CACHE = {
 | 
|  | 175 | +    # Prime the cache with the empty string since otherwise that can
 | 
|  | 176 | +    # cause issues with the parser, complications to which cause slowdown
 | 
|  | 177 | +    "": (1, [""]),
 | 
|  | 178 | +}
 | 
|  | 179 | +
 | 
|  | 180 | +
 | 
|  | 181 | +# Helper to parse a string into an expansion string tuple, caching
 | 
|  | 182 | +# the results so that future parse requests don't need to think about
 | 
|  | 183 | +# the string
 | 
|  | 184 | +def _parse_expstr(instr):
 | 
|  | 185 | +    try:
 | 
|  | 186 | +        return PARSE_CACHE[instr]
 | 
|  | 187 | +    except KeyError:
 | 
|  | 188 | +        # This use of the regex turns a string like "foo %{bar} baz" into
 | 
|  | 189 | +        # a list ["foo ", "bar", " baz"]
 | 
|  | 190 | +        splits = PARSE_EXPANSION.split(instr)
 | 
|  | 191 | +        # If an expansion ends the string, we get an empty string on the end
 | 
|  | 192 | +        # which we can optimise away, making the expansion routines not need
 | 
|  | 193 | +        # a test for this.
 | 
|  | 194 | +        if splits[-1] == '':
 | 
|  | 195 | +            splits = splits[:-1]
 | 
|  | 196 | +        # Cache an interned copy of this.  We intern it to try and reduce the
 | 
|  | 197 | +        # memory impact of the cache.  It seems odd to cache the list length
 | 
|  | 198 | +        # but this is measurably cheaper than calculating it each time during
 | 
|  | 199 | +        # string expansion.
 | 
|  | 200 | +        PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
 | 
|  | 201 | +        return PARSE_CACHE[instr]
 | 
|  | 202 | +
 | 
|  | 203 | +
 | 
|  | 204 | +# Helper to expand a given top level expansion string tuple in the context
 | 
|  | 205 | +# of the given dictionary of expansion strings.
 | 
|  | 206 | +#
 | 
|  | 207 | +# Note: Will raise KeyError if any expansion is missing
 | 
|  | 208 | +def _expand_expstr(content, topvalue):
 | 
|  | 209 | +    if topvalue[0] == 1:
 | 
|  | 210 | +        return topvalue[1][0]
 | 
|  | 211 | +
 | 
|  | 212 | +    def __expand(value):
 | 
|  | 213 | +        (expansion_len, expansion_bits) = value
 | 
|  | 214 | +        idx = 0
 | 
|  | 215 | +        while idx < expansion_len:
 | 
|  | 216 | +            # First yield any constant string content
 | 
|  | 217 | +            yield expansion_bits[idx]
 | 
|  | 218 | +            idx += 1
 | 
|  | 219 | +            # Now, if there is an expansion variable left to expand, yield
 | 
|  | 220 | +            # the expansion of that variable too
 | 
|  | 221 | +            if idx < expansion_len:
 | 
|  | 222 | +                yield from __expand(content[expansion_bits[idx]])
 | 
|  | 223 | +            idx += 1
 | 
|  | 224 | +
 | 
|  | 225 | +    return "".join(__expand(topvalue)) |