[Notes] [Git][BuildStream/buildstream][jonathan/pickle-yaml] 2 commits: Loader: Make _extract_depends_from_node not 'del' nonexistent fields



Title: GitLab

Jonathan Maw pushed to branch jonathan/pickle-yaml at BuildStream / buildstream

Commits:

5 changed files:

Changes:

  • buildstream/_loader/loadelement.py
    ... ... @@ -185,6 +185,6 @@ def _extract_depends_from_node(node, *, key=None):
    185 185
             output_deps.append(dependency)
    
    186 186
     
    
    187 187
         # Now delete the field, we dont want it anymore
    
    188
    -    del node[key]
    
    188
    +    node.pop(key, None)
    
    189 189
     
    
    190 190
         return output_deps

  • buildstream/_loader/loader.py
    ... ... @@ -30,6 +30,7 @@ from ..element import Element
    30 30
     from .._profile import Topics, profile_start, profile_end
    
    31 31
     from .._platform import Platform
    
    32 32
     from .._includes import Includes
    
    33
    +from .._pickler import YamlCache
    
    33 34
     
    
    34 35
     from .types import Symbol, Dependency
    
    35 36
     from .loadelement import LoadElement
    
    ... ... @@ -113,7 +114,8 @@ class Loader():
    113 114
                 profile_start(Topics.LOAD_PROJECT, target)
    
    114 115
                 junction, name, loader = self._parse_name(target, rewritable, ticker,
    
    115 116
                                                           fetch_subprojects=fetch_subprojects)
    
    116
    -            loader._load_file(name, rewritable, ticker, fetch_subprojects)
    
    117
    +            with YamlCache.get_cache(self._context) as yaml_cache:
    
    118
    +                loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    117 119
                 deps.append(Dependency(name, junction=junction))
    
    118 120
                 profile_end(Topics.LOAD_PROJECT, target)
    
    119 121
     
    
    ... ... @@ -202,11 +204,12 @@ class Loader():
    202 204
         #    rewritable (bool): Whether we should load in round trippable mode
    
    203 205
         #    ticker (callable): A callback to report loaded filenames to the frontend
    
    204 206
         #    fetch_subprojects (bool): Whether to fetch subprojects while loading
    
    207
    +    #    yaml_cache (YamlCache): A yaml cache
    
    205 208
         #
    
    206 209
         # Returns:
    
    207 210
         #    (LoadElement): A loaded LoadElement
    
    208 211
         #
    
    209
    -    def _load_file(self, filename, rewritable, ticker, fetch_subprojects):
    
    212
    +    def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None):
    
    210 213
     
    
    211 214
             # Silently ignore already loaded files
    
    212 215
             if filename in self._elements:
    
    ... ... @@ -219,7 +222,8 @@ class Loader():
    219 222
             # Load the data and process any conditional statements therein
    
    220 223
             fullpath = os.path.join(self._basedir, filename)
    
    221 224
             try:
    
    222
    -            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project)
    
    225
    +            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable,
    
    226
    +                              project=self.project, yaml_cache=yaml_cache)
    
    223 227
             except LoadError as e:
    
    224 228
                 if e.reason == LoadErrorReason.MISSING_FILE:
    
    225 229
                     # If we can't find the file, try to suggest plausible
    
    ... ... @@ -262,13 +266,13 @@ class Loader():
    262 266
             # Load all dependency files for the new LoadElement
    
    263 267
             for dep in element.deps:
    
    264 268
                 if dep.junction:
    
    265
    -                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects)
    
    269
    +                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    266 270
                     loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker,
    
    267 271
                                               fetch_subprojects=fetch_subprojects)
    
    268 272
                 else:
    
    269 273
                     loader = self
    
    270 274
     
    
    271
    -            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects)
    
    275
    +            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    272 276
     
    
    273 277
                 if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction':
    
    274 278
                     raise LoadError(LoadErrorReason.INVALID_DATA,
    

  • buildstream/_pickler.py
    1
    +#
    
    2
    +#  Copyright 2018 Bloomberg Finance LP
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Jonathan Maw <jonathan maw codethink co uk>
    
    19
    +
    
    20
    +import os
    
    21
    +import pickle
    
    22
    +import hashlib
    
    23
    +import io
    
    24
    +
    
    25
    +from contextlib import contextmanager
    
    26
    +from collections import namedtuple
    
    27
    +
    
    28
    +from ._cachekey import generate_key
    
    29
    +from . import utils, _yaml
    
    30
    +
    
    31
    +
    
    32
    +YAML_CACHE_FILENAME = "yaml_cache.pickle"
    
    33
    +
    
    34
    +
    
    35
    +# In _yaml.load, we have a ProvenanceFile that stores the project the file
    
    36
    +# came from. Projects can't be pickled, but it's always going to be the same
    
    37
    +# project between invocations (unless the entire project is moved but the
    
    38
    +# file stayed in the same place)
    
    39
    +class BstPickler(pickle.Pickler):
    
    40
    +    def persistent_id(self, obj):
    
    41
    +        if isinstance(obj, _yaml.ProvenanceFile):
    
    42
    +            if obj.project:
    
    43
    +                # ProvenanceFile's project object cannot be stored as it is.
    
    44
    +                project_tag = obj.project.name
    
    45
    +                # ProvenanceFile's filename must be stored relative to the
    
    46
    +                # project, as the project dir may move.
    
    47
    +                name = os.path.relpath(obj.name, obj.project.directory)
    
    48
    +            else:
    
    49
    +                project_tag = None
    
    50
    +                name = obj.name
    
    51
    +            return ("ProvenanceFile", name, obj.shortname, project_tag)
    
    52
    +        else:
    
    53
    +            return None
    
    54
    +
    
    55
    +    @staticmethod
    
    56
    +    def dumps(obj):
    
    57
    +        stream = io.BytesIO()
    
    58
    +        BstPickler(stream).dump(obj)
    
    59
    +        stream.seek(0)
    
    60
    +        return stream.read()
    
    61
    +
    
    62
    +
    
    63
    +class BstUnpickler(pickle.Unpickler):
    
    64
    +    def __init__(self, file, context):
    
    65
    +        super().__init__(file)
    
    66
    +        self._context = context
    
    67
    +
    
    68
    +    def persistent_load(self, pid):
    
    69
    +        if pid[0] == "ProvenanceFile":
    
    70
    +            _, tagged_name, shortname, project_tag = pid
    
    71
    +
    
    72
    +            if project_tag is not None:
    
    73
    +                for p in self._context.get_projects():
    
    74
    +                    if project_tag == p.name:
    
    75
    +                        project = p
    
    76
    +                        break
    
    77
    +
    
    78
    +                name = os.path.join(project.directory, tagged_name)
    
    79
    +
    
    80
    +                if not project:
    
    81
    +                    projects = [p.name for p in self._context.get_projects()]
    
    82
    +                    raise pickle.UnpicklingError("No project with name {} found in {}"
    
    83
    +                                                 .format(key_id, projects))
    
    84
    +            else:
    
    85
    +                project = None
    
    86
    +                name = tagged_name
    
    87
    +
    
    88
    +            return _yaml.ProvenanceFile(name, shortname, project)
    
    89
    +
    
    90
    +        else:
    
    91
    +            raise pickle.UnpicklingError("Unsupported persistent object")
    
    92
    +
    
    93
    +    @staticmethod
    
    94
    +    def loads(text, context):
    
    95
    +        stream = io.BytesIO()
    
    96
    +        stream.write(bytes(text))
    
    97
    +        stream.seek(0)
    
    98
    +        return BstUnpickler(stream, context).load()
    
    99
    +
    
    100
    +
    
    101
    +CachedProject = namedtuple('CachedProject', ['path', 'project_sum', 'elements'])
    
    102
    +
    
    103
    +
    
    104
    +class CachedYaml():
    
    105
    +    def __init__(self, key, contents):
    
    106
    +        self._key = key
    
    107
    +        self.set_contents(contents)
    
    108
    +
    
    109
    +    # Sets the contents of the CachedYaml.
    
    110
    +    #
    
    111
    +    # Args:
    
    112
    +    #    contents (provenanced dict): The contents to put in the cache.
    
    113
    +    #
    
    114
    +    def set_contents(self, contents):
    
    115
    +        self._contents = contents
    
    116
    +        self._pickled_contents = BstPickler.dumps(contents)
    
    117
    +
    
    118
    +    # Pickling helper method, prevents 'contents' from being serialised
    
    119
    +    def __getstate__(self):
    
    120
    +        data = self.__dict__.copy()
    
    121
    +        data['_contents'] = None
    
    122
    +        return data
    
    123
    +
    
    124
    +
    
    125
    +class YamlCache():
    
    126
    +
    
    127
    +    def __init__(self):
    
    128
    +        self._project_caches = {}
    
    129
    +
    
    130
    +    # Writes the yaml cache to the specified path.
    
    131
    +    #
    
    132
    +    # Args:
    
    133
    +    #    path (str): The file to write the cache to.
    
    134
    +    #
    
    135
    +    def write(self, path):
    
    136
    +        parent_dir = os.path.dirname(path)
    
    137
    +        os.makedirs(parent_dir, exist_ok=True)
    
    138
    +        with open(path, "wb") as f:
    
    139
    +            BstPickler(f).dump(self)
    
    140
    +
    
    141
    +    # Gets a parsed file from the cache.
    
    142
    +    #
    
    143
    +    # Args:
    
    144
    +    #    project (Project): The project this file is in.
    
    145
    +    #    filepath (str): The path to the file, *relative to the project's directory*.
    
    146
    +    #    key (str): The key to the file within the cache. Typically, this is the
    
    147
    +    #               value of `calculate_key()` with the file's unparsed contents
    
    148
    +    #               and any relevant metadata passed in.
    
    149
    +    #
    
    150
    +    # Returns:
    
    151
    +    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    
    152
    +    def get(self, project, filepath, key):
    
    153
    +        if project.name in self._project_caches:
    
    154
    +            project_cache = self._project_caches[project.name]
    
    155
    +            if filepath in project_cache.elements:
    
    156
    +                cachedyaml = project_cache.elements[filepath]
    
    157
    +                if cachedyaml._key == key:
    
    158
    +                    if cachedyaml._contents is None:
    
    159
    +                        cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, project._context)
    
    160
    +                    return cachedyaml._contents
    
    161
    +        return None
    
    162
    +
    
    163
    +    # Put a parsed file into the cache.
    
    164
    +    #
    
    165
    +    # Args:
    
    166
    +    #    project (Project): The project this file is in.
    
    167
    +    #    filepath (str): The path to the file, *relative to the project's directory*.
    
    168
    +    #    key (str): The key to the file within the cache. Typically, this is the
    
    169
    +    #               value of `calculate_key()` with the file's unparsed contents
    
    170
    +    #               and any relevant metadata passed in.
    
    171
    +    #    value (decorated dict): The data to put into the cache.
    
    172
    +    def put(self, project, filepath, key, value):
    
    173
    +        if project.name in self._project_caches \
    
    174
    +                and project.shasum == self._project_caches[project.name].project_sum:
    
    175
    +            project_cache = self._project_caches[project.name]
    
    176
    +        else:
    
    177
    +            project_cache = self._project_caches[project.name] = CachedProject(project.directory, project.shasum, {})
    
    178
    +
    
    179
    +        project_cache.elements[filepath] = CachedYaml(key, value)
    
    180
    +
    
    181
    +    # Return an instance of the YamlCache which writes to disk when it leaves scope.
    
    182
    +    #
    
    183
    +    # Args:
    
    184
    +    #    context (Context): The context.
    
    185
    +    #
    
    186
    +    # Returns:
    
    187
    +    #    (YamlCache): A YamlCache.
    
    188
    +    @staticmethod
    
    189
    +    @contextmanager
    
    190
    +    def get_cache(context):
    
    191
    +        # Try to load from disk first
    
    192
    +        cachefile = YamlCache._get_cache_file(context)
    
    193
    +        cache = None
    
    194
    +        if os.path.exists(cachefile):
    
    195
    +            try:
    
    196
    +                with open(cachefile, "rb") as f:
    
    197
    +                    cache = BstUnpickler(f, context).load()
    
    198
    +            except pickle.UnpicklingError as e:
    
    199
    +                pass
    
    200
    +
    
    201
    +        if not cache:
    
    202
    +            cache = YamlCache()
    
    203
    +
    
    204
    +        yield cache
    
    205
    +
    
    206
    +        cache.write(cachefile)
    
    207
    +
    
    208
    +    # Calculates a key for putting into the cache.
    
    209
    +    @staticmethod
    
    210
    +    def calculate_key(*args):
    
    211
    +        string = pickle.dumps(args)
    
    212
    +        return hashlib.sha1(string).hexdigest()
    
    213
    +
    
    214
    +    # Retrieves a path to the yaml cache file.
    
    215
    +    @staticmethod
    
    216
    +    def _get_cache_file(context):
    
    217
    +        toplevel_project = context.get_toplevel_project()
    
    218
    +        return os.path.join(toplevel_project.directory, ".bst", YAML_CACHE_FILENAME)

  • buildstream/_project.py
    ... ... @@ -19,6 +19,7 @@
    19 19
     #        Tiago Gomes <tiago gomes codethink co uk>
    
    20 20
     
    
    21 21
     import os
    
    22
    +import hashlib
    
    22 23
     from collections import Mapping, OrderedDict
    
    23 24
     from pluginbase import PluginBase
    
    24 25
     from . import utils
    
    ... ... @@ -110,6 +111,7 @@ class Project():
    110 111
             self.ref_storage = None                  # ProjectRefStorage setting
    
    111 112
             self.base_environment = {}               # The base set of environment variables
    
    112 113
             self.base_env_nocache = None             # The base nocache mask (list) for the environment
    
    114
    +        self.shasum = None                       # A SHA-1 sum of the project file
    
    113 115
     
    
    114 116
             #
    
    115 117
             # Private Members
    
    ... ... @@ -382,6 +384,10 @@ class Project():
    382 384
     
    
    383 385
             # Load project local config and override the builtin
    
    384 386
             try:
    
    387
    +            with open(projectfile, "r") as f:
    
    388
    +                contents = f.read()
    
    389
    +            self.shasum = hashlib.sha1(contents.encode('utf-8')).hexdigest()
    
    390
    +
    
    385 391
                 self._project_conf = _yaml.load(projectfile)
    
    386 392
             except LoadError as e:
    
    387 393
                 # Raise a more specific error here
    

  • buildstream/_yaml.py
    ... ... @@ -23,6 +23,8 @@ import string
    23 23
     from copy import deepcopy
    
    24 24
     from contextlib import ExitStack
    
    25 25
     from pathlib import Path
    
    26
    +import hashlib
    
    27
    +import os
    
    26 28
     
    
    27 29
     from ruamel import yaml
    
    28 30
     from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
    
    ... ... @@ -183,12 +185,13 @@ class CompositeTypeError(CompositeError):
    183 185
     #    shortname (str): The filename in shorthand for error reporting (or None)
    
    184 186
     #    copy_tree (bool): Whether to make a copy, preserving the original toplevels
    
    185 187
     #                      for later serialization
    
    188
    +#    yaml_cache (YamlCache): A yaml cache to consult rather than parsing
    
    186 189
     #
    
    187 190
     # Returns (dict): A loaded copy of the YAML file with provenance information
    
    188 191
     #
    
    189 192
     # Raises: LoadError
    
    190 193
     #
    
    191
    -def load(filename, shortname=None, copy_tree=False, *, project=None):
    
    194
    +def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
    
    192 195
         if not shortname:
    
    193 196
             shortname = filename
    
    194 197
     
    
    ... ... @@ -196,7 +199,7 @@ def load(filename, shortname=None, copy_tree=False, *, project=None):
    196 199
     
    
    197 200
         try:
    
    198 201
             with open(filename) as f:
    
    199
    -            return load_data(f, file, copy_tree=copy_tree)
    
    202
    +            return load_data(f, file, copy_tree=copy_tree, yaml_cache=yaml_cache)
    
    200 203
         except FileNotFoundError as e:
    
    201 204
             raise LoadError(LoadErrorReason.MISSING_FILE,
    
    202 205
                             "Could not find file at {}".format(filename)) from e
    
    ... ... @@ -208,24 +211,46 @@ def load(filename, shortname=None, copy_tree=False, *, project=None):
    208 211
     
    
    209 212
     # Like load(), but doesnt require the data to be in a file
    
    210 213
     #
    
    211
    -def load_data(data, file=None, copy_tree=False):
    
    214
    +def load_data(data, file=None, copy_tree=False, yaml_cache=None):
    
    212 215
     
    
    213
    -    try:
    
    214
    -        contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
    
    215
    -    except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
    
    216
    -        raise LoadError(LoadErrorReason.INVALID_YAML,
    
    217
    -                        "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
    
    218
    -
    
    219
    -    if not isinstance(contents, dict):
    
    220
    -        # Special case allowance for None, when the loaded file has only comments in it.
    
    221
    -        if contents is None:
    
    222
    -            contents = {}
    
    223
    -        else:
    
    216
    +    if hasattr(data, 'read'):
    
    217
    +        file_contents = data.read()
    
    218
    +    else:
    
    219
    +        file_contents = data
    
    220
    +    # Forced to compare sums of contents because elements in junctions are stored in tmpdirs
    
    221
    +    decorated_yaml = None
    
    222
    +    if yaml_cache:
    
    223
    +        assert file
    
    224
    +        project = file.project
    
    225
    +        filename = os.path.relpath(file.name, project.directory)
    
    226
    +        key = yaml_cache.calculate_key(file_contents, copy_tree)
    
    227
    +        decorated_yaml = yaml_cache.get(project, filename, key)
    
    228
    +
    
    229
    +    if not decorated_yaml:
    
    230
    +        try:
    
    231
    +            contents = yaml.load(file_contents, yaml.loader.RoundTripLoader, preserve_quotes=True)
    
    232
    +        except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
    
    224 233
                 raise LoadError(LoadErrorReason.INVALID_YAML,
    
    225
    -                            "YAML file has content of type '{}' instead of expected type 'dict': {}"
    
    226
    -                            .format(type(contents).__name__, file.name))
    
    234
    +                            "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
    
    227 235
     
    
    228
    -    return node_decorated_copy(file, contents, copy_tree=copy_tree)
    
    236
    +        if not isinstance(contents, dict):
    
    237
    +            # Special case allowance for None, when the loaded file has only comments in it.
    
    238
    +            if contents is None:
    
    239
    +                contents = {}
    
    240
    +            else:
    
    241
    +                raise LoadError(LoadErrorReason.INVALID_YAML,
    
    242
    +                                "YAML file has content of type '{}' instead of expected type 'dict': {}"
    
    243
    +                                .format(type(contents).__name__, file.name))
    
    244
    +
    
    245
    +        decorated_yaml = node_decorated_copy(file, contents, copy_tree=copy_tree)
    
    246
    +        if yaml_cache:
    
    247
    +            assert file
    
    248
    +            project = file.project
    
    249
    +            filename = os.path.relpath(file.name, project.directory)
    
    250
    +            key = yaml_cache.calculate_key(file_contents, copy_tree)
    
    251
    +            yaml_cache.put(project, filename, key, decorated_yaml)
    
    252
    +
    
    253
    +    return decorated_yaml
    
    229 254
     
    
    230 255
     
    
    231 256
     # Dumps a previously loaded YAML node to a file
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]