[Notes] [Git][BuildStream/buildstream][danielsilverstone-ct/further-optimisations] 5 commits: _yaml.py: Only retrieve provenance in node_get() when needed



Title: GitLab

Daniel Silverstone pushed to branch danielsilverstone-ct/further-optimisations at BuildStream / buildstream

Commits:

3 changed files:

Changes:

  • buildstream/_cachekey.py
    ... ... @@ -40,3 +40,20 @@ def generate_key(value):
    40 40
         ordered = _yaml.node_sanitize(value)
    
    41 41
         string = pickle.dumps(ordered)
    
    42 42
         return hashlib.sha256(string).hexdigest()
    
    43
    +
    
    44
    +
    
    45
    +# generate_key_pre_sanitized()
    
    46
    +#
    
    47
    +# Generate an sha256 hex digest from the given value. The value
    
    48
    +# must be (a) compatible with generate_key() and (b) already have
    
    49
    +# been passed through _yaml.node_sanitize()
    
    50
    +#
    
    51
    +# Args:
    
    52
    +#    value: A sanitized value to get a key for
    
    53
    +#
    
    54
    +# Returns:
    
    55
    +#    (str): An sha256 hex digest of the given value
    
    56
    +#
    
    57
    +def generate_key_pre_sanitized(value):
    
    58
    +    string = pickle.dumps(value)
    
    59
    +    return hashlib.sha256(string).hexdigest()

  • buildstream/_yaml.py
    ... ... @@ -365,8 +365,8 @@ _sentinel = object()
    365 365
     #
    
    366 366
     def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False):
    
    367 367
         value = node.get(key, default_value)
    
    368
    -    provenance = node_get_provenance(node)
    
    369 368
         if value is _sentinel:
    
    369
    +        provenance = node_get_provenance(node)
    
    370 370
             raise LoadError(LoadErrorReason.INVALID_DATA,
    
    371 371
                             "{}: Dictionary did not contain expected key '{}'".format(provenance, key))
    
    372 372
     
    
    ... ... @@ -922,9 +922,20 @@ RoundTripRepresenter.add_representer(SanitizedDict,
    922 922
     # Only dicts are ordered, list elements are left in order.
    
    923 923
     #
    
    924 924
     def node_sanitize(node):
    
    925
    +    # Short-circuit None which occurs ca. twice per element
    
    926
    +    if node is None:
    
    927
    +        return node
    
    928
    +
    
    929
    +    node_type = type(node)
    
    930
    +    # Next short-circuit integers, floats, strings, booleans, and tuples
    
    931
    +    if node_type in (int, float, str, bool, tuple):
    
    932
    +        return node
    
    933
    +    # Now short-circuit lists
    
    934
    +    elif node_type is list:
    
    935
    +        return [node_sanitize(elt) for elt in node]
    
    925 936
     
    
    926
    -    if isinstance(node, collections.abc.Mapping):
    
    927
    -
    
    937
    +    # Finally ChainMap and dict, and other Mappings need special handling
    
    938
    +    if node_type in (dict, ChainMap) or isinstance(node, collections.Mapping):
    
    928 939
             result = SanitizedDict()
    
    929 940
     
    
    930 941
             key_list = [key for key, _ in node_items(node)]
    
    ... ... @@ -932,10 +943,10 @@ def node_sanitize(node):
    932 943
                 result[key] = node_sanitize(node[key])
    
    933 944
     
    
    934 945
             return result
    
    935
    -
    
    936 946
         elif isinstance(node, list):
    
    937 947
             return [node_sanitize(elt) for elt in node]
    
    938 948
     
    
    949
    +    # Everything else (such as commented scalars) just gets returned as-is.
    
    939 950
         return node
    
    940 951
     
    
    941 952
     
    
    ... ... @@ -1063,16 +1074,48 @@ class ChainMap(collections.ChainMap):
    1063 1074
             except KeyError:
    
    1064 1075
                 return default
    
    1065 1076
     
    
    1077
    +# Node copying
    
    1078
    +#
    
    1079
    +# Unfortunately we copy nodes a *lot* and `isinstance()` is super-slow when
    
    1080
    +# things from collections.abc get involved.  The result is the following
    
    1081
    +# intricate but substantially faster group of tuples and the use of `in`.
    
    1082
    +#
    
    1083
    +# If any of the {node,list}_{chain_,}_copy routines raise a ValueError
    
    1084
    +# then it's likely additional types need adding to these tuples.
    
    1085
    +
    
    1086
    +# When chaining a copy, these types are skipped since the ChainMap will
    
    1087
    +# retrieve them from the source node when needed.  Other copiers might copy
    
    1088
    +# them, so we call them __quick_types.
    
    1089
    +__quick_types = (str, bool,
    
    1090
    +                 yaml.scalarstring.PreservedScalarString,
    
    1091
    +                 yaml.scalarstring.SingleQuotedScalarString,
    
    1092
    +                 yaml.scalarstring.DoubleQuotedScalarString)
    
    1093
    +
    
    1094
    +# These types have to be iterated like a dictionary
    
    1095
    +__dict_types = (dict, ChainMap, yaml.comments.CommentedMap)
    
    1096
    +
    
    1097
    +# These types have to be iterated like a list
    
    1098
    +__list_types = (list, yaml.comments.CommentedSeq)
    
    1099
    +
    
    1100
    +# These are the provenance types, which have to be cloned rather than any other
    
    1101
    +# copying tactic.
    
    1102
    +__provenance_types = (Provenance, DictProvenance, MemberProvenance, ElementProvenance)
    
    1103
    +
    
    1066 1104
     
    
    1067 1105
     def node_chain_copy(source):
    
    1068 1106
         copy = ChainMap({}, source)
    
    1069 1107
         for key, value in source.items():
    
    1070
    -        if isinstance(value, collections.abc.Mapping):
    
    1108
    +        value_type = type(value)
    
    1109
    +        if value_type in __dict_types:
    
    1071 1110
                 copy[key] = node_chain_copy(value)
    
    1072
    -        elif isinstance(value, list):
    
    1111
    +        elif value_type in __list_types:
    
    1073 1112
                 copy[key] = list_chain_copy(value)
    
    1074
    -        elif isinstance(value, Provenance):
    
    1113
    +        elif value_type in __provenance_types:
    
    1075 1114
                 copy[key] = value.clone()
    
    1115
    +        elif value_type in __quick_types:
    
    1116
    +            pass  # No need to copy these, the chainmap deals with it
    
    1117
    +        else:
    
    1118
    +            raise ValueError("Unable to be quick about node_chain_copy of {}".format(value_type))
    
    1076 1119
     
    
    1077 1120
         return copy
    
    1078 1121
     
    
    ... ... @@ -1080,14 +1123,17 @@ def node_chain_copy(source):
    1080 1123
     def list_chain_copy(source):
    
    1081 1124
         copy = []
    
    1082 1125
         for item in source:
    
    1083
    -        if isinstance(item, collections.abc.Mapping):
    
    1126
    +        item_type = type(item)
    
    1127
    +        if item_type in __dict_types:
    
    1084 1128
                 copy.append(node_chain_copy(item))
    
    1085
    -        elif isinstance(item, list):
    
    1129
    +        elif item_type in __list_types:
    
    1086 1130
                 copy.append(list_chain_copy(item))
    
    1087
    -        elif isinstance(item, Provenance):
    
    1131
    +        elif item_type in __provenance_types:
    
    1088 1132
                 copy.append(item.clone())
    
    1089
    -        else:
    
    1133
    +        elif item_type in __quick_types:
    
    1090 1134
                 copy.append(item)
    
    1135
    +        else:  # Fallback
    
    1136
    +            raise ValueError("Unable to be quick about list_chain_copy of {}".format(item_type))
    
    1091 1137
     
    
    1092 1138
         return copy
    
    1093 1139
     
    
    ... ... @@ -1095,14 +1141,17 @@ def list_chain_copy(source):
    1095 1141
     def node_copy(source):
    
    1096 1142
         copy = {}
    
    1097 1143
         for key, value in source.items():
    
    1098
    -        if isinstance(value, collections.abc.Mapping):
    
    1144
    +        value_type = type(value)
    
    1145
    +        if value_type in __dict_types:
    
    1099 1146
                 copy[key] = node_copy(value)
    
    1100
    -        elif isinstance(value, list):
    
    1147
    +        elif value_type in __list_types:
    
    1101 1148
                 copy[key] = list_copy(value)
    
    1102
    -        elif isinstance(value, Provenance):
    
    1149
    +        elif value_type in __provenance_types:
    
    1103 1150
                 copy[key] = value.clone()
    
    1104
    -        else:
    
    1151
    +        elif value_type in __quick_types:
    
    1105 1152
                 copy[key] = value
    
    1153
    +        else:
    
    1154
    +            raise ValueError("Unable to be quick about node_copy of {}".format(value_type))
    
    1106 1155
     
    
    1107 1156
         ensure_provenance(copy)
    
    1108 1157
     
    
    ... ... @@ -1112,14 +1161,17 @@ def node_copy(source):
    1112 1161
     def list_copy(source):
    
    1113 1162
         copy = []
    
    1114 1163
         for item in source:
    
    1115
    -        if isinstance(item, collections.abc.Mapping):
    
    1164
    +        item_type = type(item)
    
    1165
    +        if item_type in __dict_types:
    
    1116 1166
                 copy.append(node_copy(item))
    
    1117
    -        elif isinstance(item, list):
    
    1167
    +        elif item_type in __list_types:
    
    1118 1168
                 copy.append(list_copy(item))
    
    1119
    -        elif isinstance(item, Provenance):
    
    1169
    +        elif item_type in __provenance_types:
    
    1120 1170
                 copy.append(item.clone())
    
    1121
    -        else:
    
    1171
    +        elif item_type in __quick_types:
    
    1122 1172
                 copy.append(item)
    
    1173
    +        else:
    
    1174
    +            raise ValueError("Unable to be quick about list_copy of {}".format(item_type))
    
    1123 1175
     
    
    1124 1176
         return copy
    
    1125 1177
     
    

  • buildstream/element.py
    ... ... @@ -2108,11 +2108,14 @@ class Element(Plugin):
    2108 2108
                 }
    
    2109 2109
     
    
    2110 2110
                 self.__cache_key_dict['fatal-warnings'] = sorted(project._fatal_warnings)
    
    2111
    +            self.__cache_key_dict['dependencies'] = []
    
    2112
    +            self.__cache_key_dict = _yaml.node_sanitize(self.__cache_key_dict)
    
    2111 2113
     
    
    2112
    -        cache_key_dict = self.__cache_key_dict.copy()
    
    2113
    -        cache_key_dict['dependencies'] = dependencies
    
    2114
    +        # This replacement is safe since OrderedDict replaces the value,
    
    2115
    +        # leaving its location in the dictionary alone.
    
    2116
    +        self.__cache_key_dict['dependencies'] = _yaml.node_sanitize(dependencies)
    
    2114 2117
     
    
    2115
    -        return _cachekey.generate_key(cache_key_dict)
    
    2118
    +        return _cachekey.generate_key_pre_sanitized(self.__cache_key_dict)
    
    2116 2119
     
    
    2117 2120
         # __can_build_incrementally()
    
    2118 2121
         #
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]