[Notes] [Git][BuildStream/buildstream][bschubert/rework-circular-check] 8 commits: _yaml.py: Only retrieve provenance in node_get() when needed



Title: GitLab

Benjamin Schubert pushed to branch bschubert/rework-circular-check at BuildStream / buildstream

Commits:

5 changed files:

Changes:

  • buildstream/_context.py
    ... ... @@ -361,14 +361,17 @@ class Context():
    361 361
         #    (bool): Whether or not to use strict build plan
    
    362 362
         #
    
    363 363
         def get_strict(self):
    
    364
    +        if self._strict_build_plan is None:
    
    365
    +            # Either we're not overridden or we've never worked it out before
    
    366
    +            # so work out if we should be strict, and then cache the result
    
    367
    +            toplevel = self.get_toplevel_project()
    
    368
    +            overrides = self.get_overrides(toplevel.name)
    
    369
    +            self._strict_build_plan = _yaml.node_get(overrides, bool, 'strict', default_value=True)
    
    364 370
     
    
    365 371
             # If it was set by the CLI, it overrides any config
    
    366
    -        if self._strict_build_plan is not None:
    
    367
    -            return self._strict_build_plan
    
    368
    -
    
    369
    -        toplevel = self.get_toplevel_project()
    
    370
    -        overrides = self.get_overrides(toplevel.name)
    
    371
    -        return _yaml.node_get(overrides, bool, 'strict', default_value=True)
    
    372
    +        # Ditto if we've already computed this, then we return the computed
    
    373
    +        # value which we cache here too.
    
    374
    +        return self._strict_build_plan
    
    372 375
     
    
    373 376
         # get_cache_key():
    
    374 377
         #
    

  • buildstream/_loader/loadelement.py
    ... ... @@ -67,14 +67,14 @@ class LoadElement():
    67 67
             self.node = node       # The YAML node
    
    68 68
             self.name = filename   # The element name
    
    69 69
             self.full_name = None  # The element full name (with associated junction)
    
    70
    -        self.deps = None       # The list of Dependency objects
    
    71
    -        self.node_id = next(self._counter)
    
    70
    +        self.dependencies = []
    
    72 71
     
    
    73 72
             #
    
    74 73
             # Private members
    
    75 74
             #
    
    76 75
             self._loader = loader   # The Loader object
    
    77
    -        self._dep_cache = None  # The dependency cache, to speed up depends()
    
    76
    +        self._dep_cache = BitMap()  # The dependency cache, to speed up depends()
    
    77
    +        self._node_id = next(self._counter)
    
    78 78
     
    
    79 79
             #
    
    80 80
             # Initialization
    
    ... ... @@ -94,12 +94,27 @@ class LoadElement():
    94 94
                 'build-depends', 'runtime-depends',
    
    95 95
             ])
    
    96 96
     
    
    97
    -        self.dependencies = []
    
    98
    -
    
    99 97
         @property
    
    100 98
         def junction(self):
    
    101 99
             return self._loader.project.junction
    
    102 100
     
    
    101
    +    # _add_dependency()
    
    102
    +    #
    
    103
    +    # Add an element as a dependency of the current element
    
    104
    +    #
    
    105
    +    # Args:
    
    106
    +    #     dependency (LoadElement): element to add as a dependency
    
    107
    +    #     dep_type (str): type of dependency to add
    
    108
    +    #
    
    109
    +    # Raises:
    
    110
    +    #     LoadError: if adding the current dependency would create a circular
    
    111
    +    #                dependency
    
    112
    +    #
    
    113
    +    def _add_dependency(self, dependency, dep_type):
    
    114
    +        self.dependencies.append(LoadElement.Dependency(dependency, dep_type))
    
    115
    +        self._dep_cache.update(dependency._dep_cache)
    
    116
    +        self._dep_cache.add(dependency._node_id)
    
    117
    +
    
    103 118
         # depends():
    
    104 119
         #
    
    105 120
         # Checks if this element depends on another element, directly
    
    ... ... @@ -112,32 +127,59 @@ class LoadElement():
    112 127
         #    (bool): True if this LoadElement depends on 'other'
    
    113 128
         #
    
    114 129
         def depends(self, other):
    
    115
    -        self._ensure_depends_cache()
    
    116
    -        return other.node_id in self._dep_cache
    
    130
    +        return other._node_id in self._dep_cache
    
    117 131
     
    
    118
    -    ###########################################
    
    119
    -    #            Private Methods              #
    
    120
    -    ###########################################
    
    121
    -    def _ensure_depends_cache(self):
    
    132
    +    # _ensure_no_circular_deps()
    
    133
    +    #
    
    134
    +    # Ensure the node is not part of a dependency cycle
    
    135
    +    #
    
    136
    +    # Raises:
    
    137
    +    #   (LoadError): In case there was a circular dependency
    
    138
    +    #
    
    139
    +    def _ensure_no_circular_deps(self):
    
    140
    +        if self._node_id in self._dep_cache:
    
    141
    +            self._find_circular_deps(set(), set(), [])
    
    122 142
     
    
    123
    -        if self._dep_cache:
    
    143
    +    # _find_circular_deps():
    
    144
    +    #
    
    145
    +    # Detect circular dependencies on LoadElements with
    
    146
    +    # dependencies already resolved.
    
    147
    +    #
    
    148
    +    # Args:
    
    149
    +    #    check_elements (set[LoadElement]): The elements in the chain being checked
    
    150
    +    #    validated (set[LoadElement]): The elements already validated to not have
    
    151
    +    #                                  circular dependencies
    
    152
    +    #    sequence (list[Element]): current sequence of elements that is checked
    
    153
    +    #
    
    154
    +    # Raises:
    
    155
    +    #    (LoadError): In case there was a circular dependency error
    
    156
    +    #
    
    157
    +    def _find_circular_deps(self, check_elements, validated, sequence):
    
    158
    +        # Skip already validated branches
    
    159
    +        if self in validated:
    
    124 160
                 return
    
    125 161
     
    
    126
    -        self._dep_cache = BitMap()
    
    127
    -
    
    162
    +        if self in check_elements:
    
    163
    +            # Create `chain`, the loop of element dependencies from this
    
    164
    +            # element back to itself, by trimming everything before this
    
    165
    +            # element from the sequence under consideration.
    
    166
    +            chain = sequence[sequence.index(self.full_name):]
    
    167
    +            chain.append(self.full_name)
    
    168
    +            raise LoadError(LoadErrorReason.CIRCULAR_DEPENDENCY,
    
    169
    +                            ("Circular dependency detected at element: {}\n" +
    
    170
    +                             "Dependency chain: {}")
    
    171
    +                            .format(self.full_name, " -> ".join(chain)))
    
    172
    +
    
    173
    +        # Push / Check each dependency / Pop
    
    174
    +        check_elements.add(self)
    
    175
    +        sequence.append(self.full_name)
    
    128 176
             for dep in self.dependencies:
    
    129
    -            elt = dep.element
    
    130
    -
    
    131
    -            # Ensure the cache of the element we depend on
    
    132
    -            elt._ensure_depends_cache()
    
    133
    -
    
    134
    -            # We depend on this element
    
    135
    -            self._dep_cache.add(elt.node_id)
    
    136
    -
    
    137
    -            # And we depend on everything this element depends on
    
    138
    -            self._dep_cache.update(elt._dep_cache)
    
    177
    +            dep.element._check_circular_deps(check_elements, validated, sequence)
    
    178
    +        check_elements.remove(self)
    
    179
    +        sequence.pop()
    
    139 180
     
    
    140
    -        self._dep_cache = FrozenBitMap(self._dep_cache)
    
    181
    +        # Eliminate duplicate paths
    
    182
    +        validated.add(self)
    
    141 183
     
    
    142 184
     
    
    143 185
     # _extract_depends_from_node():
    

  • buildstream/_loader/loader.py
    ... ... @@ -127,23 +127,6 @@ class Loader():
    127 127
                     target_elements.append(element)
    
    128 128
                     profile_end(Topics.LOAD_PROJECT, target)
    
    129 129
     
    
    130
    -        #
    
    131
    -        # Now that we've resolve the dependencies, scan them for circular dependencies
    
    132
    -        #
    
    133
    -
    
    134
    -        # Set up a dummy element that depends on all top-level targets
    
    135
    -        # to resolve potential circular dependencies between them
    
    136
    -        dummy_target = LoadElement("", "", self)
    
    137
    -        dummy_target.dependencies.extend(
    
    138
    -            LoadElement.Dependency(element, Symbol.RUNTIME)
    
    139
    -            for element in target_elements
    
    140
    -        )
    
    141
    -
    
    142
    -        profile_key = "_".join(t for t in targets)
    
    143
    -        profile_start(Topics.CIRCULAR_CHECK, profile_key)
    
    144
    -        self._check_circular_deps(dummy_target)
    
    145
    -        profile_end(Topics.CIRCULAR_CHECK, profile_key)
    
    146
    -
    
    147 130
             ret = []
    
    148 131
             #
    
    149 132
             # Sort direct dependencies of elements by their dependency ordering
    
    ... ... @@ -273,58 +256,14 @@ class Loader():
    273 256
                                     "{}: Cannot depend on junction"
    
    274 257
                                     .format(dep.provenance))
    
    275 258
     
    
    276
    -            element.dependencies.append(LoadElement.Dependency(dep_element, dep.dep_type))
    
    259
    +            element._add_dependency(dep_element, dep.dep_type)
    
    277 260
     
    
    278 261
             deps_names = [dep.name for dep in dependencies]
    
    279 262
             self._warn_invalid_elements(deps_names)
    
    280 263
     
    
    281
    -        return element
    
    264
    +        element._ensure_no_circular_deps()
    
    282 265
     
    
    283
    -    # _check_circular_deps():
    
    284
    -    #
    
    285
    -    # Detect circular dependencies on LoadElements with
    
    286
    -    # dependencies already resolved.
    
    287
    -    #
    
    288
    -    # Args:
    
    289
    -    #    element (str): The element to check
    
    290
    -    #
    
    291
    -    # Raises:
    
    292
    -    #    (LoadError): In case there was a circular dependency error
    
    293
    -    #
    
    294
    -    def _check_circular_deps(self, element, check_elements=None, validated=None, sequence=None):
    
    295
    -
    
    296
    -        if check_elements is None:
    
    297
    -            check_elements = {}
    
    298
    -        if validated is None:
    
    299
    -            validated = {}
    
    300
    -        if sequence is None:
    
    301
    -            sequence = []
    
    302
    -
    
    303
    -        # Skip already validated branches
    
    304
    -        if validated.get(element) is not None:
    
    305
    -            return
    
    306
    -
    
    307
    -        if check_elements.get(element) is not None:
    
    308
    -            # Create `chain`, the loop of element dependencies from this
    
    309
    -            # element back to itself, by trimming everything before this
    
    310
    -            # element from the sequence under consideration.
    
    311
    -            chain = sequence[sequence.index(element.full_name):]
    
    312
    -            chain.append(element.full_name)
    
    313
    -            raise LoadError(LoadErrorReason.CIRCULAR_DEPENDENCY,
    
    314
    -                            ("Circular dependency detected at element: {}\n" +
    
    315
    -                             "Dependency chain: {}")
    
    316
    -                            .format(element.full_name, " -> ".join(chain)))
    
    317
    -
    
    318
    -        # Push / Check each dependency / Pop
    
    319
    -        check_elements[element] = True
    
    320
    -        sequence.append(element.full_name)
    
    321
    -        for dep in element.dependencies:
    
    322
    -            dep.element._loader._check_circular_deps(dep.element, check_elements, validated, sequence)
    
    323
    -        del check_elements[element]
    
    324
    -        sequence.pop()
    
    325
    -
    
    326
    -        # Eliminate duplicate paths
    
    327
    -        validated[element] = True
    
    266
    +        return element
    
    328 267
     
    
    329 268
         # _sort_dependencies():
    
    330 269
         #
    

  • buildstream/_profile.py
    ... ... @@ -42,7 +42,6 @@ initialized = False
    42 42
     #
    
    43 43
     # The special 'all' value will enable all profiles.
    
    44 44
     class Topics():
    
    45
    -    CIRCULAR_CHECK = 'circ-dep-check'
    
    46 45
         SORT_DEPENDENCIES = 'sort-deps'
    
    47 46
         LOAD_LOADER = 'load-loader'
    
    48 47
         LOAD_CONTEXT = 'load-context'
    

  • buildstream/_yaml.py
    ... ... @@ -365,8 +365,8 @@ _sentinel = object()
    365 365
     #
    
    366 366
     def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False):
    
    367 367
         value = node.get(key, default_value)
    
    368
    -    provenance = node_get_provenance(node)
    
    369 368
         if value is _sentinel:
    
    369
    +        provenance = node_get_provenance(node)
    
    370 370
             raise LoadError(LoadErrorReason.INVALID_DATA,
    
    371 371
                             "{}: Dictionary did not contain expected key '{}'".format(provenance, key))
    
    372 372
     
    
    ... ... @@ -914,6 +914,10 @@ RoundTripRepresenter.add_representer(SanitizedDict,
    914 914
                                          SafeRepresenter.represent_dict)
    
    915 915
     
    
    916 916
     
    
    917
    +# Types we can short-circuit in node_sanitize for speed.
    
    918
    +__SANITIZE_SHORT_CIRCUIT_TYPES = (int, float, str, bool, tuple)
    
    919
    +
    
    920
    +
    
    917 921
     # node_sanitize()
    
    918 922
     #
    
    919 923
     # Returnes an alphabetically ordered recursive copy
    
    ... ... @@ -922,9 +926,21 @@ RoundTripRepresenter.add_representer(SanitizedDict,
    922 926
     # Only dicts are ordered, list elements are left in order.
    
    923 927
     #
    
    924 928
     def node_sanitize(node):
    
    929
    +    # Short-circuit None which occurs ca. twice per element
    
    930
    +    if node is None:
    
    931
    +        return node
    
    932
    +
    
    933
    +    node_type = type(node)
    
    934
    +    # Next short-circuit integers, floats, strings, booleans, and tuples
    
    935
    +    if node_type in __SANITIZE_SHORT_CIRCUIT_TYPES:
    
    936
    +        return node
    
    937
    +    # Now short-circuit lists.  Note this is only for the raw list
    
    938
    +    # type, CommentedSeq and others get caught later.
    
    939
    +    elif node_type is list:
    
    940
    +        return [node_sanitize(elt) for elt in node]
    
    925 941
     
    
    926
    -    if isinstance(node, collections.abc.Mapping):
    
    927
    -
    
    942
    +    # Finally ChainMap and dict, and other Mappings need special handling
    
    943
    +    if node_type in (dict, ChainMap) or isinstance(node, collections.Mapping):
    
    928 944
             result = SanitizedDict()
    
    929 945
     
    
    930 946
             key_list = [key for key, _ in node_items(node)]
    
    ... ... @@ -932,10 +948,12 @@ def node_sanitize(node):
    932 948
                 result[key] = node_sanitize(node[key])
    
    933 949
     
    
    934 950
             return result
    
    935
    -
    
    951
    +    # Catch the case of CommentedSeq and friends.  This is more rare and so
    
    952
    +    # we keep complexity down by still using isinstance here.
    
    936 953
         elif isinstance(node, list):
    
    937 954
             return [node_sanitize(elt) for elt in node]
    
    938 955
     
    
    956
    +    # Everything else (such as commented scalars) just gets returned as-is.
    
    939 957
         return node
    
    940 958
     
    
    941 959
     
    
    ... ... @@ -1064,15 +1082,52 @@ class ChainMap(collections.ChainMap):
    1064 1082
                 return default
    
    1065 1083
     
    
    1066 1084
     
    
    1085
    +# Node copying
    
    1086
    +#
    
    1087
    +# Unfortunately we copy nodes a *lot* and `isinstance()` is super-slow when
    
    1088
    +# things from collections.abc get involved.  The result is the following
    
    1089
    +# intricate but substantially faster group of tuples and the use of `in`.
    
    1090
    +#
    
    1091
    +# If any of the {node,list}_{chain_,}_copy routines raise a ValueError
    
    1092
    +# then it's likely additional types need adding to these tuples.
    
    1093
    +
    
    1094
    +# When chaining a copy, these types are skipped since the ChainMap will
    
    1095
    +# retrieve them from the source node when needed.  Other copiers might copy
    
    1096
    +# them, so we call them __QUICK_TYPES.
    
    1097
    +__QUICK_TYPES = (str, bool,
    
    1098
    +                 yaml.scalarstring.PreservedScalarString,
    
    1099
    +                 yaml.scalarstring.SingleQuotedScalarString,
    
    1100
    +                 yaml.scalarstring.DoubleQuotedScalarString)
    
    1101
    +
    
    1102
    +# These types have to be iterated like a dictionary
    
    1103
    +__DICT_TYPES = (dict, ChainMap, yaml.comments.CommentedMap)
    
    1104
    +
    
    1105
    +# These types have to be iterated like a list
    
    1106
    +__LIST_TYPES = (list, yaml.comments.CommentedSeq)
    
    1107
    +
    
    1108
    +# These are the provenance types, which have to be cloned rather than any other
    
    1109
    +# copying tactic.
    
    1110
    +__PROVENANCE_TYPES = (Provenance, DictProvenance, MemberProvenance, ElementProvenance)
    
    1111
    +
    
    1112
    +# These are the directives used to compose lists, we need this because it's
    
    1113
    +# slightly faster during the node_final_assertions checks
    
    1114
    +__NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)')
    
    1115
    +
    
    1116
    +
    
    1067 1117
     def node_chain_copy(source):
    
    1068 1118
         copy = ChainMap({}, source)
    
    1069 1119
         for key, value in source.items():
    
    1070
    -        if isinstance(value, collections.abc.Mapping):
    
    1120
    +        value_type = type(value)
    
    1121
    +        if value_type in __DICT_TYPES:
    
    1071 1122
                 copy[key] = node_chain_copy(value)
    
    1072
    -        elif isinstance(value, list):
    
    1123
    +        elif value_type in __LIST_TYPES:
    
    1073 1124
                 copy[key] = list_chain_copy(value)
    
    1074
    -        elif isinstance(value, Provenance):
    
    1125
    +        elif value_type in __PROVENANCE_TYPES:
    
    1075 1126
                 copy[key] = value.clone()
    
    1127
    +        elif value_type in __QUICK_TYPES:
    
    1128
    +            pass  # No need to copy these, the chainmap deals with it
    
    1129
    +        else:
    
    1130
    +            raise ValueError("Unable to be quick about node_chain_copy of {}".format(value_type))
    
    1076 1131
     
    
    1077 1132
         return copy
    
    1078 1133
     
    
    ... ... @@ -1080,14 +1135,17 @@ def node_chain_copy(source):
    1080 1135
     def list_chain_copy(source):
    
    1081 1136
         copy = []
    
    1082 1137
         for item in source:
    
    1083
    -        if isinstance(item, collections.abc.Mapping):
    
    1138
    +        item_type = type(item)
    
    1139
    +        if item_type in __DICT_TYPES:
    
    1084 1140
                 copy.append(node_chain_copy(item))
    
    1085
    -        elif isinstance(item, list):
    
    1141
    +        elif item_type in __LIST_TYPES:
    
    1086 1142
                 copy.append(list_chain_copy(item))
    
    1087
    -        elif isinstance(item, Provenance):
    
    1143
    +        elif item_type in __PROVENANCE_TYPES:
    
    1088 1144
                 copy.append(item.clone())
    
    1089
    -        else:
    
    1145
    +        elif item_type in __QUICK_TYPES:
    
    1090 1146
                 copy.append(item)
    
    1147
    +        else:  # Fallback
    
    1148
    +            raise ValueError("Unable to be quick about list_chain_copy of {}".format(item_type))
    
    1091 1149
     
    
    1092 1150
         return copy
    
    1093 1151
     
    
    ... ... @@ -1095,14 +1153,17 @@ def list_chain_copy(source):
    1095 1153
     def node_copy(source):
    
    1096 1154
         copy = {}
    
    1097 1155
         for key, value in source.items():
    
    1098
    -        if isinstance(value, collections.abc.Mapping):
    
    1156
    +        value_type = type(value)
    
    1157
    +        if value_type in __DICT_TYPES:
    
    1099 1158
                 copy[key] = node_copy(value)
    
    1100
    -        elif isinstance(value, list):
    
    1159
    +        elif value_type in __LIST_TYPES:
    
    1101 1160
                 copy[key] = list_copy(value)
    
    1102
    -        elif isinstance(value, Provenance):
    
    1161
    +        elif value_type in __PROVENANCE_TYPES:
    
    1103 1162
                 copy[key] = value.clone()
    
    1104
    -        else:
    
    1163
    +        elif value_type in __QUICK_TYPES:
    
    1105 1164
                 copy[key] = value
    
    1165
    +        else:
    
    1166
    +            raise ValueError("Unable to be quick about node_copy of {}".format(value_type))
    
    1106 1167
     
    
    1107 1168
         ensure_provenance(copy)
    
    1108 1169
     
    
    ... ... @@ -1112,14 +1173,17 @@ def node_copy(source):
    1112 1173
     def list_copy(source):
    
    1113 1174
         copy = []
    
    1114 1175
         for item in source:
    
    1115
    -        if isinstance(item, collections.abc.Mapping):
    
    1176
    +        item_type = type(item)
    
    1177
    +        if item_type in __DICT_TYPES:
    
    1116 1178
                 copy.append(node_copy(item))
    
    1117
    -        elif isinstance(item, list):
    
    1179
    +        elif item_type in __LIST_TYPES:
    
    1118 1180
                 copy.append(list_copy(item))
    
    1119
    -        elif isinstance(item, Provenance):
    
    1181
    +        elif item_type in __PROVENANCE_TYPES:
    
    1120 1182
                 copy.append(item.clone())
    
    1121
    -        else:
    
    1183
    +        elif item_type in __QUICK_TYPES:
    
    1122 1184
                 copy.append(item)
    
    1185
    +        else:
    
    1186
    +            raise ValueError("Unable to be quick about list_copy of {}".format(item_type))
    
    1123 1187
     
    
    1124 1188
         return copy
    
    1125 1189
     
    
    ... ... @@ -1142,22 +1206,26 @@ def node_final_assertions(node):
    1142 1206
             # indicates that the user intended to override a list which
    
    1143 1207
             # never existed in the underlying data
    
    1144 1208
             #
    
    1145
    -        if key in ['(>)', '(<)', '(=)']:
    
    1209
    +        if key in __NODE_ASSERT_COMPOSITION_DIRECTIVES:
    
    1146 1210
                 provenance = node_get_provenance(node, key)
    
    1147 1211
                 raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE,
    
    1148 1212
                                 "{}: Attempt to override non-existing list".format(provenance))
    
    1149 1213
     
    
    1150
    -        if isinstance(value, collections.abc.Mapping):
    
    1214
    +        value_type = type(value)
    
    1215
    +
    
    1216
    +        if value_type in __DICT_TYPES:
    
    1151 1217
                 node_final_assertions(value)
    
    1152
    -        elif isinstance(value, list):
    
    1218
    +        elif value_type in __LIST_TYPES:
    
    1153 1219
                 list_final_assertions(value)
    
    1154 1220
     
    
    1155 1221
     
    
    1156 1222
     def list_final_assertions(values):
    
    1157 1223
         for value in values:
    
    1158
    -        if isinstance(value, collections.abc.Mapping):
    
    1224
    +        value_type = type(value)
    
    1225
    +
    
    1226
    +        if value_type in __DICT_TYPES:
    
    1159 1227
                 node_final_assertions(value)
    
    1160
    -        elif isinstance(value, list):
    
    1228
    +        elif value_type in __LIST_TYPES:
    
    1161 1229
                 list_final_assertions(value)
    
    1162 1230
     
    
    1163 1231
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]