[Notes] [Git][BuildStream/buildstream][danielsilverstone-ct/variables-rework] Variables: Rework how expansion strings work



Title: GitLab

Daniel Silverstone pushed to branch danielsilverstone-ct/variables-rework at BuildStream / buildstream

Commits:

3 changed files:

Changes:

  • buildstream/_frontend/widget.py
    ... ... @@ -398,7 +398,7 @@ class LogLine(Widget):
    398 398
     
    
    399 399
                 # Variables
    
    400 400
                 if "%{vars" in format_:
    
    401
    -                variables = _yaml.node_sanitize(element._Element__variables.variables)
    
    401
    +                variables = _yaml.node_sanitize(element._Element__variables.flat)
    
    402 402
                     line = p.fmt_subst(
    
    403 403
                         line, 'vars',
    
    404 404
                         yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True))
    

  • buildstream/_variables.py
    1 1
     #
    
    2 2
     #  Copyright (C) 2016 Codethink Limited
    
    3
    +#  Copyright (C) 2019 Bloomberg L.P.
    
    3 4
     #
    
    4 5
     #  This program is free software; you can redistribute it and/or
    
    5 6
     #  modify it under the terms of the GNU Lesser General Public
    
    ... ... @@ -16,15 +17,37 @@
    16 17
     #
    
    17 18
     #  Authors:
    
    18 19
     #        Tristan Van Berkom <tristan vanberkom codethink co uk>
    
    20
    +#        Daniel Silverstone <daniel silverstone codethink co uk>
    
    19 21
     
    
    20 22
     import re
    
    23
    +import sys
    
    21 24
     
    
    22 25
     from ._exceptions import LoadError, LoadErrorReason
    
    23 26
     from . import _yaml
    
    24 27
     
    
    25 28
     # Variables are allowed to have dashes here
    
    26 29
     #
    
    27
    -_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
    
    30
    +PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
    
    31
    +
    
    32
    +
    
    33
    +# Throughout this code you will see variables named things like `expstr`.
    
    34
    +# These hold data structures called "expansion strings" and are the parsed
    
    35
    +# form of the strings which are the input to this subsystem.  Strings
    
    36
    +# such as "Hello %{name}, how are you?" are parsed into the form:
    
    37
    +# (3, ["Hello ", "name", ", how are you?"])
    
    38
    +# i.e. a tuple of an integer and a list, where the integer is the cached
    
    39
    +# length of the list, and the list consists of one or more strings.
    
    40
    +# Strings in even indices of the list (0, 2, 4, etc) are constants which
    
    41
    +# are copied into the output of the expansion algorithm.  Strings in the
    
    42
    +# odd indices (1, 3, 5, etc) are the names of further expansions to make.
    
    43
    +# In the example above, first "Hello " is copied, then "name" is expanded
    
    44
    +# and so must be another named expansion string passed in to the constructor
    
    45
    +# of the Variables class, and whatever is yielded from the expansion of "name"
    
    46
    +# is added to the concatenation for the result.  Finally ", how are you?" is
    
    47
    +# copied in and the whole lot concatenated for return.
    
    48
    +#
    
    49
    +# To see how strings are parsed, see `_parse_expstr()` after the class, and
    
    50
    +# to see how expansion strings are expanded, see `_expand_expstr()` after that.
    
    28 51
     
    
    29 52
     
    
    30 53
     # The Variables helper object will resolve the variable references in
    
    ... ... @@ -38,14 +61,15 @@ _VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}'
    38 61
     #     node (dict): A node loaded and composited with yaml tools
    
    39 62
     #
    
    40 63
     # Raises:
    
    41
    -#     LoadError, if unresolved variables occur.
    
    64
    +#     LoadError, if unresolved variables, or cycles in resolution, occur.
    
    42 65
     #
    
    43 66
     class Variables():
    
    44 67
     
    
    45 68
         def __init__(self, node):
    
    46 69
     
    
    47 70
             self.original = node
    
    48
    -        self.variables = self._resolve(node)
    
    71
    +        self._expstr_map = self._resolve(node)
    
    72
    +        self.flat = self._flatten()
    
    49 73
     
    
    50 74
         # subst():
    
    51 75
         #
    
    ... ... @@ -61,139 +85,167 @@ class Variables():
    61 85
         #    LoadError, if the string contains unresolved variable references.
    
    62 86
         #
    
    63 87
         def subst(self, string):
    
    64
    -        substitute, unmatched, _ = self._subst(string, self.variables)
    
    65
    -        unmatched = list(set(unmatched))
    
    66
    -        if unmatched:
    
    67
    -            if len(unmatched) == 1:
    
    68
    -                message = "Unresolved variable '{var}'".format(var=unmatched[0])
    
    69
    -            else:
    
    70
    -                message = "Unresolved variables: "
    
    71
    -                for unmatch in unmatched:
    
    72
    -                    if unmatched.index(unmatch) > 0:
    
    73
    -                        message += ', '
    
    74
    -                    message += unmatch
    
    75
    -
    
    76
    -            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
    
    77
    -
    
    78
    -        return substitute
    
    79
    -
    
    80
    -    def _subst(self, string, variables):
    
    81
    -
    
    82
    -        def subst_callback(match):
    
    83
    -            nonlocal variables
    
    84
    -            nonlocal unmatched
    
    85
    -            nonlocal matched
    
    86
    -
    
    87
    -            token = match.group(0)
    
    88
    -            varname = match.group(1)
    
    89
    -
    
    90
    -            value = _yaml.node_get(variables, str, varname, default_value=None)
    
    91
    -            if value is not None:
    
    92
    -                # We have to check if the inner string has variables
    
    93
    -                # and return unmatches for those
    
    94
    -                unmatched += re.findall(_VARIABLE_MATCH, value)
    
    95
    -                matched += [varname]
    
    96
    -            else:
    
    97
    -                # Return unmodified token
    
    98
    -                unmatched += [varname]
    
    99
    -                value = token
    
    100
    -
    
    101
    -            return value
    
    102
    -
    
    103
    -        matched = []
    
    104
    -        unmatched = []
    
    105
    -        replacement = re.sub(_VARIABLE_MATCH, subst_callback, string)
    
    106
    -
    
    107
    -        return (replacement, unmatched, matched)
    
    88
    +        expstr = _parse_expstr(string)
    
    89
    +
    
    90
    +        try:
    
    91
    +            return _expand_expstr(self._expstr_map, expstr)
    
    92
    +        except KeyError:
    
    93
    +            unmatched = []
    
    94
    +
    
    95
    +            # Look for any unmatched variable names in the expansion string
    
    96
    +            for var in expstr[1][1::2]:
    
    97
    +                if var not in self._expstr_map:
    
    98
    +                    unmatched.append(var)
    
    99
    +
    
    100
    +            if unmatched:
    
    101
    +                message = "Unresolved variable{}: {}".format(
    
    102
    +                    "s" if len(unmatched) > 1 else "",
    
    103
    +                    ", ".join(unmatched)
    
    104
    +                )
    
    105
    +
    
    106
    +                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message)
    
    107
    +            # Otherwise, re-raise the KeyError since it clearly came from some
    
    108
    +            # other unknowable cause.
    
    109
    +            raise
    
    108 110
     
    
    109 111
         # Variable resolving code
    
    110 112
         #
    
    111
    -    # Here we substitute variables for values (resolve variables) repeatedly
    
    112
    -    # in a dictionary, each time creating a new dictionary until there is no
    
    113
    -    # more unresolved variables to resolve, or, until resolving further no
    
    114
    -    # longer resolves anything, in which case we throw an exception.
    
    113
    +    # Here we resolve all of our inputs into a dictionary, ready for use
    
    114
    +    # in subst()
    
    115 115
         def _resolve(self, node):
    
    116
    -        variables = node
    
    117
    -
    
    118 116
             # Special case, if notparallel is specified in the variables for this
    
    119 117
             # element, then override max-jobs to be 1.
    
    120 118
             # Initialize it as a string as all variables are processed as strings.
    
    121 119
             #
    
    122
    -        if _yaml.node_get(variables, bool, 'notparallel', default_value=False):
    
    123
    -            variables['max-jobs'] = str(1)
    
    124
    -
    
    125
    -        # Resolve the dictionary once, reporting the new dictionary with things
    
    126
    -        # substituted in it, and reporting unmatched tokens.
    
    127
    -        #
    
    128
    -        def resolve_one(variables):
    
    129
    -            unmatched = []
    
    130
    -            resolved = {}
    
    131
    -
    
    132
    -            for key, value in _yaml.node_items(variables):
    
    133
    -
    
    134
    -                # Ensure stringness of the value before substitution
    
    135
    -                value = _yaml.node_get(variables, str, key)
    
    136
    -
    
    137
    -                resolved_var, item_unmatched, matched = self._subst(value, variables)
    
    138
    -
    
    139
    -                if _wrap_variable(key) in resolved_var:
    
    140
    -                    referenced_through = find_recursive_variable(key, matched, variables)
    
    120
    +        if _yaml.node_get(node, bool, 'notparallel', default_value=False):
    
    121
    +            node['max-jobs'] = str(1)
    
    122
    +
    
    123
    +        ret = {}
    
    124
    +        for key, value in _yaml.node_items(node):
    
    125
    +            value = _yaml.node_get(node, str, key)
    
    126
    +            ret[sys.intern(key)] = _parse_expstr(value)
    
    127
    +        return ret
    
    128
    +
    
    129
    +    def _check_for_missing(self):
    
    130
    +        # First the check for anything unresolvable
    
    131
    +        summary = []
    
    132
    +        for key, expstr in self._expstr_map.items():
    
    133
    +            for var in expstr[1][1::2]:
    
    134
    +                if var not in self._expstr_map:
    
    135
    +                    line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
    
    136
    +                    provenance = _yaml.node_get_provenance(self.original, key)
    
    137
    +                    summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
    
    138
    +        if summary:
    
    139
    +            raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
    
    140
    +                            "Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)))
    
    141
    +
    
    142
    +    def _check_for_cycles(self):
    
    143
    +        # And now the cycle checks
    
    144
    +        def cycle_check(expstr, visited, cleared):
    
    145
    +            for var in expstr[1][1::2]:
    
    146
    +                if var in cleared:
    
    147
    +                    continue
    
    148
    +                if var in visited:
    
    141 149
                         raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE,
    
    142
    -                                    "{}: ".format(_yaml.node_get_provenance(variables, key)) +
    
    150
    +                                    "{}: ".format(_yaml.node_get_provenance(self.original, var)) +
    
    143 151
                                         ("Variable '{}' expands to contain a reference to itself. " +
    
    144
    -                                     "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key)))
    
    145
    -
    
    146
    -                resolved[key] = resolved_var
    
    147
    -                unmatched += item_unmatched
    
    148
    -
    
    149
    -            # Carry over provenance
    
    150
    -            resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY]
    
    151
    -            return (resolved, unmatched)
    
    152
    -
    
    153
    -        # Resolve it until it's resolved or broken
    
    154
    -        #
    
    155
    -        resolved = variables
    
    156
    -        unmatched = ['dummy']
    
    157
    -        last_unmatched = ['dummy']
    
    158
    -        while unmatched:
    
    159
    -            resolved, unmatched = resolve_one(resolved)
    
    160
    -
    
    161
    -            # Lists of strings can be compared like this
    
    162
    -            if unmatched == last_unmatched:
    
    163
    -                # We've got the same result twice without matching everything,
    
    164
    -                # something is undeclared or cyclic, compose a summary.
    
    165
    -                #
    
    166
    -                summary = ''
    
    167
    -                for unmatch in set(unmatched):
    
    168
    -                    for var, provenance in self._find_references(unmatch):
    
    169
    -                        line = "  unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n"
    
    170
    -                        summary += line.format(unmatched=unmatch, variable=var, provenance=provenance)
    
    171
    -
    
    172
    -                raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE,
    
    173
    -                                "Failed to resolve one or more variable:\n{}".format(summary))
    
    174
    -
    
    175
    -            last_unmatched = unmatched
    
    176
    -
    
    177
    -        return resolved
    
    178
    -
    
    179
    -    # Helper function to fetch information about the node referring to a variable
    
    152
    +                                     "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var))
    
    153
    +                visited.append(var)
    
    154
    +                cycle_check(self._expstr_map[var], visited, cleared)
    
    155
    +                visited.pop()
    
    156
    +                cleared.add(var)
    
    157
    +
    
    158
    +        cleared = set()
    
    159
    +        for key, expstr in self._expstr_map.items():
    
    160
    +            if key not in cleared:
    
    161
    +                cycle_check(expstr, [key], cleared)
    
    162
    +
    
    163
    +    # _flatten():
    
    180 164
         #
    
    181
    -    def _find_references(self, varname):
    
    182
    -        fullname = _wrap_variable(varname)
    
    183
    -        for key, value in _yaml.node_items(self.original):
    
    184
    -            if fullname in value:
    
    185
    -                provenance = _yaml.node_get_provenance(self.original, key)
    
    186
    -                yield (key, provenance)
    
    187
    -
    
    188
    -
    
    189
    -def find_recursive_variable(variable, matched_variables, all_vars):
    
    190
    -    matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables)
    
    191
    -    for key, value in zip(matched_variables, matched_values):
    
    192
    -        if _wrap_variable(variable) in value:
    
    193
    -            return key
    
    194
    -    # We failed to find a recursive variable
    
    195
    -    return None
    
    196
    -
    
    197
    -
    
    198
    -def _wrap_variable(var):
    
    199
    -    return "%{" + var + "}"
    165
    +    # Turn our dictionary of expansion strings into a flattened dict
    
    166
    +    # so that we can run expansions faster in the future
    
    167
    +    #
    
    168
    +    # Raises:
    
    169
    +    #    LoadError, if the string contains unresolved variable references or
    
    170
    +    #               if cycles are detected in the variable references
    
    171
    +    #
    
    172
    +    def _flatten(self):
    
    173
    +        flat = {}
    
    174
    +        try:
    
    175
    +            for key, expstr in self._expstr_map.items():
    
    176
    +                if expstr[0] > 1:
    
    177
    +                    expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))])
    
    178
    +                    self._expstr_map[key] = expstr
    
    179
    +                flat[key] = expstr[1][0]
    
    180
    +        except KeyError:
    
    181
    +            self._check_for_missing()
    
    182
    +            raise
    
    183
    +        except RecursionError:
    
    184
    +            self._check_for_cycles()
    
    185
    +            raise
    
    186
    +        return flat
    
    187
    +
    
    188
    +
    
    189
    +# Cache for the parsed expansion strings.  While this is nominally
    
    190
    +# something which might "waste" memory, in reality each of these
    
    191
    +# will live as long as the element which uses it, which is the
    
    192
    +# vast majority of the memory usage across the execution of BuildStream.
    
    193
    +PARSE_CACHE = {
    
    194
    +    # Prime the cache with the empty string since otherwise that can
    
    195
    +    # cause issues with the parser, complications to which cause slowdown
    
    196
    +    "": (1, [""]),
    
    197
    +}
    
    198
    +
    
    199
    +
    
    200
    +# Helper to parse a string into an expansion string tuple, caching
    
    201
    +# the results so that future parse requests don't need to think about
    
    202
    +# the string
    
    203
    +def _parse_expstr(instr):
    
    204
    +    try:
    
    205
    +        return PARSE_CACHE[instr]
    
    206
    +    except KeyError:
    
    207
    +        # This use of the regex turns a string like "foo %{bar} baz" into
    
    208
    +        # a list ["foo ", "bar", " baz"]
    
    209
    +        splits = PARSE_EXPANSION.split(instr)
    
    210
    +        # If an expansion ends the string, we get an empty string on the end
    
    211
    +        # which we can optimise away, making the expansion routines not need
    
    212
    +        # a test for this.
    
    213
    +        if splits[-1] == '':
    
    214
    +            splits = splits[:-1]
    
    215
    +        # Cache an interned copy of this.  We intern it to try and reduce the
    
    216
    +        # memory impact of the cache.  It seems odd to cache the list length
    
    217
    +        # but this is measurably cheaper than calculating it each time during
    
    218
    +        # string expansion.
    
    219
    +        PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits])
    
    220
    +        return PARSE_CACHE[instr]
    
    221
    +
    
    222
    +
    
    223
    +# Helper to expand a given top level expansion string tuple in the context
    
    224
    +# of the given dictionary of expansion strings.
    
    225
    +#
    
    226
    +# Note: Will raise KeyError if any expansion is missing
    
    227
    +def _expand_expstr(content, topvalue):
    
    228
    +    # Short-circuit constant strings
    
    229
    +    if topvalue[0] == 1:
    
    230
    +        return topvalue[1][0]
    
    231
    +
    
    232
    +    # Short-circuit strings which are entirely an expansion of another variable
    
    233
    +    # e.g. "%{another}"
    
    234
    +    if topvalue[0] == 2 and topvalue[1][0] == "":
    
    235
    +        return _expand_expstr(content, content[topvalue[1][1]])
    
    236
    +
    
    237
    +    # Otherwise process fully...
    
    238
    +    def internal_expand(value):
    
    239
    +        (expansion_len, expansion_bits) = value
    
    240
    +        idx = 0
    
    241
    +        while idx < expansion_len:
    
    242
    +            # First yield any constant string content
    
    243
    +            yield expansion_bits[idx]
    
    244
    +            idx += 1
    
    245
    +            # Now, if there is an expansion variable left to expand, yield
    
    246
    +            # the expansion of that variable too
    
    247
    +            if idx < expansion_len:
    
    248
    +                yield from internal_expand(content[expansion_bits[idx]])
    
    249
    +            idx += 1
    
    250
    +
    
    251
    +    return "".join(internal_expand(topvalue))

  • buildstream/element.py
    ... ... @@ -894,10 +894,7 @@ class Element(Plugin):
    894 894
                (str): The resolved value for *varname*, or None if no
    
    895 895
                variable was declared with the given name.
    
    896 896
             """
    
    897
    -        if varname in self.__variables.variables:
    
    898
    -            return self.__variables.variables[varname]
    
    899
    -
    
    900
    -        return None
    
    897
    +        return self.__variables.flat.get(varname)
    
    901 898
     
    
    902 899
         def batch_prepare_assemble(self, flags, *, collect=None):
    
    903 900
             """ Configure command batching across prepare() and assemble()
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]