[Notes] [Git][BuildStream/buildstream][jonathan/pickle-yaml] 5 commits: Improve documentation for artifact cache installation



Title: GitLab

Jonathan Maw pushed to branch jonathan/pickle-yaml at BuildStream / buildstream

Commits:

7 changed files:

Changes:

  • buildstream/_loader/loadelement.py
    ... ... @@ -185,6 +185,7 @@ def _extract_depends_from_node(node, *, key=None):
    185 185
             output_deps.append(dependency)
    
    186 186
     
    
    187 187
         # Now delete the field, we dont want it anymore
    
    188
    -    del node[key]
    
    188
    +    if key in node:
    
    189
    +        del node[key]
    
    189 190
     
    
    190 191
         return output_deps

  • buildstream/_loader/loader.py
    ... ... @@ -30,6 +30,7 @@ from ..element import Element
    30 30
     from .._profile import Topics, profile_start, profile_end
    
    31 31
     from .._platform import Platform
    
    32 32
     from .._includes import Includes
    
    33
    +from .._pickler import YamlCache
    
    33 34
     
    
    34 35
     from .types import Symbol, Dependency
    
    35 36
     from .loadelement import LoadElement
    
    ... ... @@ -113,7 +114,8 @@ class Loader():
    113 114
                 profile_start(Topics.LOAD_PROJECT, target)
    
    114 115
                 junction, name, loader = self._parse_name(target, rewritable, ticker,
    
    115 116
                                                           fetch_subprojects=fetch_subprojects)
    
    116
    -            loader._load_file(name, rewritable, ticker, fetch_subprojects)
    
    117
    +            with YamlCache.get_cache(self._context) as yaml_cache:
    
    118
    +                loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    117 119
                 deps.append(Dependency(name, junction=junction))
    
    118 120
                 profile_end(Topics.LOAD_PROJECT, target)
    
    119 121
     
    
    ... ... @@ -202,11 +204,12 @@ class Loader():
    202 204
         #    rewritable (bool): Whether we should load in round trippable mode
    
    203 205
         #    ticker (callable): A callback to report loaded filenames to the frontend
    
    204 206
         #    fetch_subprojects (bool): Whether to fetch subprojects while loading
    
    207
    +    #    yaml_cache (YamlCache): A yaml cache
    
    205 208
         #
    
    206 209
         # Returns:
    
    207 210
         #    (LoadElement): A loaded LoadElement
    
    208 211
         #
    
    209
    -    def _load_file(self, filename, rewritable, ticker, fetch_subprojects):
    
    212
    +    def _load_file(self, filename, rewritable, ticker, fetch_subprojects, yaml_cache=None):
    
    210 213
     
    
    211 214
             # Silently ignore already loaded files
    
    212 215
             if filename in self._elements:
    
    ... ... @@ -219,7 +222,8 @@ class Loader():
    219 222
             # Load the data and process any conditional statements therein
    
    220 223
             fullpath = os.path.join(self._basedir, filename)
    
    221 224
             try:
    
    222
    -            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable, project=self.project)
    
    225
    +            node = _yaml.load(fullpath, shortname=filename, copy_tree=rewritable,
    
    226
    +                              project=self.project, yaml_cache=yaml_cache)
    
    223 227
             except LoadError as e:
    
    224 228
                 if e.reason == LoadErrorReason.MISSING_FILE:
    
    225 229
                     # If we can't find the file, try to suggest plausible
    
    ... ... @@ -262,13 +266,13 @@ class Loader():
    262 266
             # Load all dependency files for the new LoadElement
    
    263 267
             for dep in element.deps:
    
    264 268
                 if dep.junction:
    
    265
    -                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects)
    
    269
    +                self._load_file(dep.junction, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    266 270
                     loader = self._get_loader(dep.junction, rewritable=rewritable, ticker=ticker,
    
    267 271
                                               fetch_subprojects=fetch_subprojects)
    
    268 272
                 else:
    
    269 273
                     loader = self
    
    270 274
     
    
    271
    -            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects)
    
    275
    +            dep_element = loader._load_file(dep.name, rewritable, ticker, fetch_subprojects, yaml_cache)
    
    272 276
     
    
    273 277
                 if _yaml.node_get(dep_element.node, str, Symbol.KIND) == 'junction':
    
    274 278
                     raise LoadError(LoadErrorReason.INVALID_DATA,
    

  • buildstream/_pickler.py
    1
    +#
    
    2
    +#  Copyright 2018 Bloomberg Finance LP
    
    3
    +#
    
    4
    +#  This program is free software; you can redistribute it and/or
    
    5
    +#  modify it under the terms of the GNU Lesser General Public
    
    6
    +#  License as published by the Free Software Foundation; either
    
    7
    +#  version 2 of the License, or (at your option) any later version.
    
    8
    +#
    
    9
    +#  This library is distributed in the hope that it will be useful,
    
    10
    +#  but WITHOUT ANY WARRANTY; without even the implied warranty of
    
    11
    +#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
    12
    +#  Lesser General Public License for more details.
    
    13
    +#
    
    14
    +#  You should have received a copy of the GNU Lesser General Public
    
    15
    +#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
    
    16
    +#
    
    17
    +#  Authors:
    
    18
    +#        Jonathan Maw <jonathan maw codethink co uk>
    
    19
    +
    
    20
    +import os
    
    21
    +import pickle
    
    22
    +import hashlib
    
    23
    +
    
    24
    +from contextlib import contextmanager
    
    25
    +from collections import namedtuple
    
    26
    +
    
    27
    +from ._cachekey import generate_key
    
    28
    +from . import utils, _yaml
    
    29
    +
    
    30
    +
    
    31
    +YAML_CACHE_FILENAME = "yaml_cache.pickle"
    
    32
    +
    
    33
    +
    
    34
    +# In _yaml.load, we have a ProvenanceFile that stores the project the file
    
    35
    +# came from. Projects can't be pickled, but it's always going to be the same
    
    36
    +# project between invocations (unless the entire project is moved but the
    
    37
    +# file stayed in the same place)
    
    38
    +class BstPickler(pickle.Pickler):
    
    39
    +    def persistent_id(self, obj):
    
    40
    +        # Inline import to break import loop
    
    41
    +        from ._project import Project
    
    42
    +        if isinstance(obj, Project):
    
    43
    +            return ("Project", obj.name)
    
    44
    +        else:
    
    45
    +            return None
    
    46
    +
    
    47
    +
    
    48
    +class BstUnpickler(pickle.Unpickler):
    
    49
    +    def __init__(self, file, context):
    
    50
    +        super().__init__(file)
    
    51
    +        self._context = context
    
    52
    +
    
    53
    +    def persistent_load(self, pid):
    
    54
    +        type_tag, key_id = pid
    
    55
    +        if type_tag == "Project":
    
    56
    +            # XXX: This doesn't actually help, we need to load the junction
    
    57
    +            #      to create the project
    
    58
    +            for project in self._context.get_projects():
    
    59
    +                if key_id == project.name:
    
    60
    +                    return project
    
    61
    +
    
    62
    +            projects = [p.name for p in self._context.get_projects()]
    
    63
    +            raise pickle.UnpicklingError("No project with name {} found in {}".format(key_id, projects))
    
    64
    +        else:
    
    65
    +            raise pickle.UnpicklingError("Unsupported persistent object")
    
    66
    +
    
    67
    +
    
    68
    +CachedProject = namedtuple('CachedProject', ['path', 'project_sum', 'elements'])
    
    69
    +CachedYaml = namedtuple('CachedYaml', ['key', 'contents'])
    
    70
    +
    
    71
    +
    
    72
    +class YamlCache():
    
    73
    +
    
    74
    +    def __init__(self):
    
    75
    +        self.project_caches = {}
    
    76
    +
    
    77
    +    def write(self, path):
    
    78
    +        parent_dir = os.path.dirname(path)
    
    79
    +        os.makedirs(parent_dir, exist_ok=True)
    
    80
    +        with open(path, "wb") as f:
    
    81
    +            BstPickler(f).dump(self)
    
    82
    +
    
    83
    +    def get(self, project, filepath, key):
    
    84
    +        if project.name in self.project_caches:
    
    85
    +            project_cache = self.project_caches[project.name]
    
    86
    +            if filepath in project_cache.elements:
    
    87
    +                cachedyaml = project_cache.elements[filepath]
    
    88
    +                if cachedyaml.key == key:
    
    89
    +                    return _yaml.node_copy(cachedyaml.contents)
    
    90
    +        return None
    
    91
    +
    
    92
    +    def put(self, project, filepath, key, value):
    
    93
    +        if project.name in self.project_caches:
    
    94
    +            # XXX: Needs a check that the project hasn't changed
    
    95
    +            project_cache = self.project_caches[project.name]
    
    96
    +        else:
    
    97
    +            project_cache = self.project_caches[project.name] = CachedProject(project.directory, project.shasum, {})
    
    98
    +
    
    99
    +        if filepath in project_cache.elements and project_cache.elements[filepath].key == key:
    
    100
    +            project_cache.elements[filepath].contents = _yaml.node_copy(value)
    
    101
    +        else:
    
    102
    +            project_cache.elements[filepath] = CachedYaml(key, _yaml.node_copy(value))
    
    103
    +
    
    104
    +    @staticmethod
    
    105
    +    @contextmanager
    
    106
    +    def get_cache(context):
    
    107
    +        # Try to load from disk first
    
    108
    +        cachefile = YamlCache._get_cache_file(context)
    
    109
    +        cache = None
    
    110
    +        if os.path.exists(cachefile):
    
    111
    +            try:
    
    112
    +                with open(cachefile, "rb") as f:
    
    113
    +                    cache = BstUnpickler(f, context).load()
    
    114
    +            except pickle.UnpicklingError as e:
    
    115
    +                pass
    
    116
    +
    
    117
    +        if not cache:
    
    118
    +            cache = YamlCache()
    
    119
    +
    
    120
    +        yield cache
    
    121
    +
    
    122
    +        cache.write(cachefile)
    
    123
    +
    
    124
    +    @staticmethod
    
    125
    +    def _get_cache_file(context):
    
    126
    +        toplevel_project = context.get_toplevel_project()
    
    127
    +        return os.path.join(toplevel_project.directory, ".bst", YAML_CACHE_FILENAME)
    
    128
    +
    
    129
    +    @staticmethod
    
    130
    +    def calculate_key(*args):
    
    131
    +        string = pickle.dumps(args)
    
    132
    +        return hashlib.sha1(string).hexdigest()

  • buildstream/_project.py
    ... ... @@ -19,6 +19,7 @@
    19 19
     #        Tiago Gomes <tiago gomes codethink co uk>
    
    20 20
     
    
    21 21
     import os
    
    22
    +import hashlib
    
    22 23
     from collections import Mapping, OrderedDict
    
    23 24
     from pluginbase import PluginBase
    
    24 25
     from . import utils
    
    ... ... @@ -110,6 +111,7 @@ class Project():
    110 111
             self.ref_storage = None                  # ProjectRefStorage setting
    
    111 112
             self.base_environment = {}               # The base set of environment variables
    
    112 113
             self.base_env_nocache = None             # The base nocache mask (list) for the environment
    
    114
    +        self.shasum = None                       # A SHA-1 sum of the project file
    
    113 115
     
    
    114 116
             #
    
    115 117
             # Private Members
    
    ... ... @@ -381,6 +383,10 @@ class Project():
    381 383
     
    
    382 384
             # Load project local config and override the builtin
    
    383 385
             try:
    
    386
    +            with open(projectfile, "r") as f:
    
    387
    +                contents = f.read()
    
    388
    +            self.shasum = hashlib.sha1(contents.encode('utf-8')).hexdigest()
    
    389
    +
    
    384 390
                 self._project_conf = _yaml.load(projectfile)
    
    385 391
             except LoadError as e:
    
    386 392
                 # Raise a more specific error here
    

  • buildstream/_scheduler/jobs/job.py
    ... ... @@ -109,7 +109,7 @@ class Job():
    109 109
             # Private members
    
    110 110
             #
    
    111 111
             self._scheduler = scheduler            # The scheduler
    
    112
    -        self._queue = multiprocessing.Queue()  # A message passing queue
    
    112
    +        self._queue = None                     # A message passing queue
    
    113 113
             self._process = None                   # The Process object
    
    114 114
             self._watcher = None                   # Child process watcher
    
    115 115
             self._listening = False                # Whether the parent is currently listening
    
    ... ... @@ -130,6 +130,8 @@ class Job():
    130 130
         #
    
    131 131
         def spawn(self):
    
    132 132
     
    
    133
    +        self._queue = multiprocessing.Queue()
    
    134
    +
    
    133 135
             self._tries += 1
    
    134 136
             self._parent_start_listening()
    
    135 137
     
    
    ... ... @@ -552,6 +554,9 @@ class Job():
    552 554
             self.parent_complete(returncode == RC_OK, self._result)
    
    553 555
             self._scheduler.job_completed(self, returncode == RC_OK)
    
    554 556
     
    
    557
    +        # Force the deletion of the queue and process objects to try and clean up FDs
    
    558
    +        self._queue = self._process = None
    
    559
    +
    
    555 560
         # _parent_process_envelope()
    
    556 561
         #
    
    557 562
         # Processes a message Envelope deserialized form the message queue.
    

  • buildstream/_yaml.py
    ... ... @@ -23,11 +23,14 @@ import string
    23 23
     from copy import deepcopy
    
    24 24
     from contextlib import ExitStack
    
    25 25
     from pathlib import Path
    
    26
    +import hashlib
    
    27
    +import os
    
    26 28
     
    
    27 29
     from ruamel import yaml
    
    28 30
     from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
    
    29 31
     from ruamel.yaml.constructor import RoundTripConstructor
    
    30 32
     from ._exceptions import LoadError, LoadErrorReason
    
    33
    +from ._platform import Platform
    
    31 34
     
    
    32 35
     # This overrides the ruamel constructor to treat everything as a string
    
    33 36
     RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int', RoundTripConstructor.construct_yaml_str)
    
    ... ... @@ -183,12 +186,13 @@ class CompositeTypeError(CompositeError):
    183 186
     #    shortname (str): The filename in shorthand for error reporting (or None)
    
    184 187
     #    copy_tree (bool): Whether to make a copy, preserving the original toplevels
    
    185 188
     #                      for later serialization
    
    189
    +#    yaml_cache (YamlCache): A yaml cache to consult rather than parsing
    
    186 190
     #
    
    187 191
     # Returns (dict): A loaded copy of the YAML file with provenance information
    
    188 192
     #
    
    189 193
     # Raises: LoadError
    
    190 194
     #
    
    191
    -def load(filename, shortname=None, copy_tree=False, *, project=None):
    
    195
    +def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
    
    192 196
         if not shortname:
    
    193 197
             shortname = filename
    
    194 198
     
    
    ... ... @@ -196,7 +200,7 @@ def load(filename, shortname=None, copy_tree=False, *, project=None):
    196 200
     
    
    197 201
         try:
    
    198 202
             with open(filename) as f:
    
    199
    -            return load_data(f, file, copy_tree=copy_tree)
    
    203
    +            return load_data(f, file, copy_tree=copy_tree, yaml_cache=yaml_cache)
    
    200 204
         except FileNotFoundError as e:
    
    201 205
             raise LoadError(LoadErrorReason.MISSING_FILE,
    
    202 206
                             "Could not find file at {}".format(filename)) from e
    
    ... ... @@ -208,24 +212,46 @@ def load(filename, shortname=None, copy_tree=False, *, project=None):
    208 212
     
    
    209 213
     # Like load(), but doesnt require the data to be in a file
    
    210 214
     #
    
    211
    -def load_data(data, file=None, copy_tree=False):
    
    215
    +def load_data(data, file=None, copy_tree=False, yaml_cache=None):
    
    212 216
     
    
    213
    -    try:
    
    214
    -        contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
    
    215
    -    except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
    
    216
    -        raise LoadError(LoadErrorReason.INVALID_YAML,
    
    217
    -                        "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
    
    218
    -
    
    219
    -    if not isinstance(contents, dict):
    
    220
    -        # Special case allowance for None, when the loaded file has only comments in it.
    
    221
    -        if contents is None:
    
    222
    -            contents = {}
    
    223
    -        else:
    
    217
    +    if hasattr(data, 'read'):
    
    218
    +        file_contents = data.read()
    
    219
    +    else:
    
    220
    +        file_contents = data
    
    221
    +    # Forced to compare sums of contents because elements in junctions are stored in tmpdirs
    
    222
    +    decorated_yaml = None
    
    223
    +    if yaml_cache:
    
    224
    +        assert file
    
    225
    +        project = file.project
    
    226
    +        filename = os.path.relpath(file.name, project.directory)
    
    227
    +        key = yaml_cache.calculate_key(file_contents, copy_tree)
    
    228
    +        decorated_yaml = yaml_cache.get(project, filename, key)
    
    229
    +
    
    230
    +    if not decorated_yaml:
    
    231
    +        try:
    
    232
    +            contents = yaml.load(file_contents, yaml.loader.RoundTripLoader, preserve_quotes=True)
    
    233
    +        except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
    
    224 234
                 raise LoadError(LoadErrorReason.INVALID_YAML,
    
    225
    -                            "YAML file has content of type '{}' instead of expected type 'dict': {}"
    
    226
    -                            .format(type(contents).__name__, file.name))
    
    235
    +                            "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
    
    227 236
     
    
    228
    -    return node_decorated_copy(file, contents, copy_tree=copy_tree)
    
    237
    +        if not isinstance(contents, dict):
    
    238
    +            # Special case allowance for None, when the loaded file has only comments in it.
    
    239
    +            if contents is None:
    
    240
    +                contents = {}
    
    241
    +            else:
    
    242
    +                raise LoadError(LoadErrorReason.INVALID_YAML,
    
    243
    +                                "YAML file has content of type '{}' instead of expected type 'dict': {}"
    
    244
    +                                .format(type(contents).__name__, file.name))
    
    245
    +
    
    246
    +        decorated_yaml = node_decorated_copy(file, contents, copy_tree=copy_tree)
    
    247
    +        if yaml_cache:
    
    248
    +            assert file
    
    249
    +            project = file.project
    
    250
    +            filename = os.path.relpath(file.name, project.directory)
    
    251
    +            key = yaml_cache.calculate_key(file_contents, copy_tree)
    
    252
    +            yaml_cache.put(project, filename, key, decorated_yaml)
    
    253
    +
    
    254
    +    return decorated_yaml
    
    229 255
     
    
    230 256
     
    
    231 257
     # Dumps a previously loaded YAML node to a file
    

  • doc/source/install_artifacts.rst
    ... ... @@ -161,13 +161,13 @@ Below are two examples of how to run the cache server as a systemd service, one
    161 161
     
    
    162 162
        [Service]
    
    163 163
        Environment="LC_ALL=C.UTF-8"
    
    164
    -   ExecStart=/usr/local/bin/bst-artifact-server --port 11001 --server-key {{certs_path}}/privkey.pem --
    
    165
    -   server-cert {{certs_path}}/fullchain.pem {{artifacts_path}}
    
    164
    +   ExecStart=/usr/local/bin/bst-artifact-server --port 11001 --server-key {{certs_path}}/server.key --server-cert {{certs_path}}/server.crt {{artifacts_path}}
    
    166 165
        User=artifacts
    
    167 166
     
    
    168 167
        [Install]
    
    169 168
        WantedBy=multi-user.target
    
    170 169
     
    
    170
    +.. code:: ini
    
    171 171
     
    
    172 172
        #
    
    173 173
        # Pull/Push
    
    ... ... @@ -178,9 +178,7 @@ Below are two examples of how to run the cache server as a systemd service, one
    178 178
     
    
    179 179
        [Service]
    
    180 180
        Environment="LC_ALL=C.UTF-8"
    
    181
    -   ExecStart=/usr/local/bin/bst-artifact-server --port 11002 --server-key {{certs_path}}/privkey.pem --
    
    182
    -   server-cert {{certs_path}}/fullchain.pem --client-certs /home/artifacts/authorized.crt --enable-push /
    
    183
    -   {{artifacts_path}}
    
    181
    +   ExecStart=/usr/local/bin/bst-artifact-server --port 11002 --server-key {{certs_path}}/server.key --server-cert {{certs_path}}/server.crt --client-certs {{certs_path}}/authorized.crt --enable-push {{artifacts_path}}
    
    184 182
        User=artifacts
    
    185 183
     
    
    186 184
        [Install]
    
    ... ... @@ -188,11 +186,16 @@ Below are two examples of how to run the cache server as a systemd service, one
    188 186
     
    
    189 187
     Here we define when systemd should start the service, which is after the networking stack has been started, we then define how to run the cache with the desired configuration, under the artifacts user. The {{ }} are there to denote where you should change these files to point to your desired locations.
    
    190 188
     
    
    189
    +For more information on systemd services see: 
    
    190
    +`Creating Systemd Service Files <https://www.devdungeon.com/content/creating-systemd-service-files>`_.
    
    191
    +
    
    191 192
     User configuration
    
    192 193
     ~~~~~~~~~~~~~~~~~~
    
    193 194
     The user configuration for artifacts is documented with the rest
    
    194 195
     of the :ref:`user configuration documentation <user_config>`.
    
    195 196
     
    
    197
    +Note that for self-signed certificates, the public key fields are mandatory.
    
    198
    +
    
    196 199
     Assuming you have the same setup used in this document, and that your
    
    197 200
     host is reachable on the internet as ``artifacts.com`` (for example),
    
    198 201
     then a user can use the following user configuration:
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]