Jonathan Maw pushed to branch lachlan/pickle-yaml-test-list-composite at BuildStream / buildstream
Commits:
5 changed files:
- buildstream/_loader/loader.py
- buildstream/_yaml.py
- buildstream/_yamlcache.py
- tests/frontend/yamlcache.py
- tests/yaml/yaml.py
Changes:
| ... | ... | @@ -109,20 +109,19 @@ class Loader(): | 
| 109 | 109 |          #
 | 
| 110 | 110 |          deps = []
 | 
| 111 | 111 |  | 
| 112 | -        for target in targets:
 | |
| 113 | -            profile_start(Topics.LOAD_PROJECT, target)
 | |
| 114 | -            junction, name, loader = self._parse_name(target, rewritable, ticker,
 | |
| 115 | -                                                      fetch_subprojects=fetch_subprojects)
 | |
| 116 | - | |
| 117 | -            # XXX This will need to be changed to the context's top-level project if this method
 | |
| 118 | -            # is ever used for subprojects
 | |
| 119 | -            top_dir = self.project.directory
 | |
| 120 | - | |
| 121 | -            cache_file = YamlCache.get_cache_file(top_dir)
 | |
| 122 | -            with YamlCache.open(self._context, cache_file) as yaml_cache:
 | |
| 112 | +        # XXX This will need to be changed to the context's top-level project if this method
 | |
| 113 | +        # is ever used for subprojects
 | |
| 114 | +        top_dir = self.project.directory
 | |
| 115 | + | |
| 116 | +        cache_file = YamlCache.get_cache_file(top_dir)
 | |
| 117 | +        with YamlCache.open(self._context, cache_file) as yaml_cache:
 | |
| 118 | +            for target in targets:
 | |
| 119 | +                profile_start(Topics.LOAD_PROJECT, target)
 | |
| 120 | +                junction, name, loader = self._parse_name(target, rewritable, ticker,
 | |
| 121 | +                                                          fetch_subprojects=fetch_subprojects)
 | |
| 123 | 122 |                  loader._load_file(name, rewritable, ticker, fetch_subprojects, yaml_cache)
 | 
| 124 | -            deps.append(Dependency(name, junction=junction))
 | |
| 125 | -            profile_end(Topics.LOAD_PROJECT, target)
 | |
| 123 | +                deps.append(Dependency(name, junction=junction))
 | |
| 124 | +                profile_end(Topics.LOAD_PROJECT, target)
 | |
| 126 | 125 |  | 
| 127 | 126 |          #
 | 
| 128 | 127 |          # Now that we've resolve the dependencies, scan them for circular dependencies
 | 
| ... | ... | @@ -200,14 +200,13 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache= | 
| 200 | 200 |          with open(filename) as f:
 | 
| 201 | 201 |              contents = f.read()
 | 
| 202 | 202 |          if yaml_cache:
 | 
| 203 | -            key = yaml_cache.calculate_key(contents, copy_tree)
 | |
| 204 | -            data = yaml_cache.get(project, filename, key)
 | |
| 203 | +            data, key = yaml_cache.get(project, filename, contents, copy_tree)
 | |
| 205 | 204 |  | 
| 206 | 205 |          if not data:
 | 
| 207 | 206 |              data = load_data(contents, file, copy_tree=copy_tree)
 | 
| 208 | 207 |  | 
| 209 | 208 |          if yaml_cache:
 | 
| 210 | -            yaml_cache.put(project, filename, key, data)
 | |
| 209 | +            yaml_cache.put_from_key(project, filename, key, data)
 | |
| 211 | 210 |  | 
| 212 | 211 |          return data
 | 
| 213 | 212 |      except FileNotFoundError as e:
 | 
| ... | ... | @@ -53,71 +53,20 @@ class YamlCache(): | 
| 53 | 53 |          self._project_caches = {}
 | 
| 54 | 54 |          self._context = context
 | 
| 55 | 55 |  | 
| 56 | -    # Writes the yaml cache to the specified path.
 | |
| 57 | -    #
 | |
| 58 | -    # Args:
 | |
| 59 | -    #    path (str): The path to the cache file.
 | |
| 60 | -    def write(self, path):
 | |
| 61 | -        parent_dir = os.path.dirname(path)
 | |
| 62 | -        os.makedirs(parent_dir, exist_ok=True)
 | |
| 63 | -        with open(path, "wb") as f:
 | |
| 64 | -            BstPickler(f).dump(self)
 | |
| 56 | +    ##################
 | |
| 57 | +    # Public Methods #
 | |
| 58 | +    ##################
 | |
| 65 | 59 |  | 
| 66 | -    # Gets a parsed file from the cache.
 | |
| 60 | +    # is_cached():
 | |
| 67 | 61 |      #
 | 
| 68 | -    # Args:
 | |
| 69 | -    #    project (Project): The project this file is in.
 | |
| 70 | -    #    filepath (str): The path to the file.
 | |
| 71 | -    #    key (str): The key to the file within the cache. Typically, this is the
 | |
| 72 | -    #               value of `calculate_key()` with the file's unparsed contents
 | |
| 73 | -    #               and any relevant metadata passed in.
 | |
| 74 | -    #
 | |
| 75 | -    # Returns:
 | |
| 76 | -    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
 | |
| 77 | -    def get(self, project, filepath, key):
 | |
| 78 | -        cache_path = self._get_filepath(project, filepath)
 | |
| 79 | -        project_name = project.name if project else ""
 | |
| 80 | -        try:
 | |
| 81 | -            project_cache = self._project_caches[project_name]
 | |
| 82 | -            try:
 | |
| 83 | -                cachedyaml = project_cache.elements[cache_path]
 | |
| 84 | -                if cachedyaml._key == key:
 | |
| 85 | -                    # We've unpickled the YamlCache, but not the specific file
 | |
| 86 | -                    if cachedyaml._contents is None:
 | |
| 87 | -                        cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context)
 | |
| 88 | -                    return cachedyaml._contents
 | |
| 89 | -            except KeyError:
 | |
| 90 | -                pass
 | |
| 91 | -        except KeyError:
 | |
| 92 | -            pass
 | |
| 93 | -        return None
 | |
| 94 | - | |
| 95 | -    # Put a parsed file into the cache.
 | |
| 62 | +    # Checks whether a file is cached.
 | |
| 96 | 63 |      #
 | 
| 97 | 64 |      # Args:
 | 
| 98 | 65 |      #    project (Project): The project this file is in.
 | 
| 99 | -    #    filepath (str): The path to the file.
 | |
| 100 | -    #    key (str): The key to the file within the cache. Typically, this is the
 | |
| 101 | -    #               value of `calculate_key()` with the file's unparsed contents
 | |
| 102 | -    #               and any relevant metadata passed in.
 | |
| 103 | -    #    value (decorated dict): The data to put into the cache.
 | |
| 104 | -    def put(self, project, filepath, key, value):
 | |
| 105 | -        cache_path = self._get_filepath(project, filepath)
 | |
| 106 | -        project_name = project.name if project else ""
 | |
| 107 | -        try:
 | |
| 108 | -            project_cache = self._project_caches[project_name]
 | |
| 109 | -        except KeyError:
 | |
| 110 | -            project_cache = self._project_caches[project_name] = CachedProject({})
 | |
| 111 | - | |
| 112 | -        project_cache.elements[cache_path] = CachedYaml(key, value)
 | |
| 113 | - | |
| 114 | -    # Checks whether a file is cached
 | |
| 115 | -    # Args:
 | |
| 116 | -    #    project (Project): The project this file is in.
 | |
| 117 | 66 |      #    filepath (str): The path to the file, *relative to the project's directory*.
 | 
| 118 | 67 |      #
 | 
| 119 | 68 |      # Returns:
 | 
| 120 | -    #    (bool): Whether the file is cached
 | |
| 69 | +    #    (bool): Whether the file is cached.
 | |
| 121 | 70 |      def is_cached(self, project, filepath):
 | 
| 122 | 71 |          cache_path = self._get_filepath(project, filepath)
 | 
| 123 | 72 |          project_name = project.name if project else ""
 | 
| ... | ... | @@ -129,14 +78,8 @@ class YamlCache(): | 
| 129 | 78 |              pass
 | 
| 130 | 79 |          return False
 | 
| 131 | 80 |  | 
| 132 | -    def _get_filepath(self, project, full_path):
 | |
| 133 | -        if project:
 | |
| 134 | -            assert full_path.startswith(project.directory)
 | |
| 135 | -            filepath = os.path.relpath(full_path, project.directory)
 | |
| 136 | -        else:
 | |
| 137 | -            filepath = full_path
 | |
| 138 | -        return full_path
 | |
| 139 | - | |
| 81 | +    # open():
 | |
| 82 | +    #
 | |
| 140 | 83 |      # Return an instance of the YamlCache which writes to disk when it leaves scope.
 | 
| 141 | 84 |      #
 | 
| 142 | 85 |      # Args:
 | 
| ... | ... | @@ -154,6 +97,9 @@ class YamlCache(): | 
| 154 | 97 |              try:
 | 
| 155 | 98 |                  with open(cachefile, "rb") as f:
 | 
| 156 | 99 |                      cache = BstUnpickler(f, context).load()
 | 
| 100 | +            except EOFError:
 | |
| 101 | +                # The file was empty
 | |
| 102 | +                pass
 | |
| 157 | 103 |              except pickle.UnpicklingError as e:
 | 
| 158 | 104 |                  sys.stderr.write("Failed to load YamlCache, {}\n".format(e))
 | 
| 159 | 105 |  | 
| ... | ... | @@ -163,18 +109,150 @@ class YamlCache(): | 
| 163 | 109 |  | 
| 164 | 110 |          yield cache
 | 
| 165 | 111 |  | 
| 166 | -        cache.write(cachefile)
 | |
| 112 | +        cache._write(cachefile)
 | |
| 167 | 113 |  | 
| 114 | +    # get_cache_file():
 | |
| 115 | +    #
 | |
| 116 | +    # Retrieves a path to the yaml cache file.
 | |
| 117 | +    #
 | |
| 118 | +    # Returns:
 | |
| 119 | +    #   (str): The path to the cache file
 | |
| 120 | +    @staticmethod
 | |
| 121 | +    def get_cache_file(top_dir):
 | |
| 122 | +        return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME)
 | |
| 123 | + | |
| 124 | +    # get():
 | |
| 125 | +    #
 | |
| 126 | +    # Gets a parsed file from the cache.
 | |
| 127 | +    #
 | |
| 128 | +    # Args:
 | |
| 129 | +    #    project (Project) or None: The project this file is in, if it exists.
 | |
| 130 | +    #    filepath (str): The absolute path to the file.
 | |
| 131 | +    #    contents (str): The contents of the file to be cached
 | |
| 132 | +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
 | |
| 133 | +    #                      (i.e. exactly as when called in yaml)
 | |
| 134 | +    #
 | |
| 135 | +    # Returns:
 | |
| 136 | +    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
 | |
| 137 | +    #    (str):            The key used to look up the parsed yaml in the cache
 | |
| 138 | +    def get(self, project, filepath, contents, copy_tree):
 | |
| 139 | +        key = self._calculate_key(contents, copy_tree)
 | |
| 140 | +        data = self._get(project, filepath, key)
 | |
| 141 | +        return data, key
 | |
| 142 | + | |
| 143 | +    # put():
 | |
| 144 | +    #
 | |
| 145 | +    # Puts a parsed file into the cache.
 | |
| 146 | +    #
 | |
| 147 | +    # Args:
 | |
| 148 | +    #    project (Project): The project this file is in.
 | |
| 149 | +    #    filepath (str): The path to the file.
 | |
| 150 | +    #    contents (str): The contents of the file that has been cached
 | |
| 151 | +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
 | |
| 152 | +    #                      (i.e. exactly as when called in yaml)
 | |
| 153 | +    #    value (decorated dict): The data to put into the cache.
 | |
| 154 | +    def put(self, project, filepath, contents, copy_tree, value):
 | |
| 155 | +        key = self._calculate_key(contents, copy_tree)
 | |
| 156 | +        self.put_from_key(project, filepath, key, value)
 | |
| 157 | + | |
| 158 | +    # put_from_key():
 | |
| 159 | +    #
 | |
| 160 | +    # Put a parsed file into the cache when given a key.
 | |
| 161 | +    #
 | |
| 162 | +    # Args:
 | |
| 163 | +    #    project (Project): The project this file is in.
 | |
| 164 | +    #    filepath (str): The path to the file.
 | |
| 165 | +    #    key (str): The key to the file within the cache. Typically, this is the
 | |
| 166 | +    #               value of `calculate_key()` with the file's unparsed contents
 | |
| 167 | +    #               and any relevant metadata passed in.
 | |
| 168 | +    #    value (decorated dict): The data to put into the cache.
 | |
| 169 | +    def put_from_key(self, project, filepath, key, value):
 | |
| 170 | +        cache_path = self._get_filepath(project, filepath)
 | |
| 171 | +        project_name = project.name if project else ""
 | |
| 172 | +        try:
 | |
| 173 | +            project_cache = self._project_caches[project_name]
 | |
| 174 | +        except KeyError:
 | |
| 175 | +            project_cache = self._project_caches[project_name] = CachedProject({})
 | |
| 176 | + | |
| 177 | +        project_cache.elements[cache_path] = CachedYaml(key, value)
 | |
| 178 | + | |
| 179 | +    ###################
 | |
| 180 | +    # Private Methods #
 | |
| 181 | +    ###################
 | |
| 182 | + | |
| 183 | +    # Writes the yaml cache to the specified path.
 | |
| 184 | +    #
 | |
| 185 | +    # Args:
 | |
| 186 | +    #    path (str): The path to the cache file.
 | |
| 187 | +    def _write(self, path):
 | |
| 188 | +        parent_dir = os.path.dirname(path)
 | |
| 189 | +        os.makedirs(parent_dir, exist_ok=True)
 | |
| 190 | +        with open(path, "wb") as f:
 | |
| 191 | +            BstPickler(f).dump(self)
 | |
| 192 | + | |
| 193 | +    # _get_filepath():
 | |
| 194 | +    #
 | |
| 195 | +    # Returns a file path relative to a project if passed, or the original path if
 | |
| 196 | +    # the project is None
 | |
| 197 | +    #
 | |
| 198 | +    # Args:
 | |
| 199 | +    #    project (Project) or None: The project the filepath exists within
 | |
| 200 | +    #    full_path (str): The path that the returned path is based on
 | |
| 201 | +    #
 | |
| 202 | +    # Returns:
 | |
| 203 | +    #    (str): The path to the file, relative to a project if it exists
 | |
| 204 | +    def _get_filepath(self, project, full_path):
 | |
| 205 | +        if project:
 | |
| 206 | +            assert full_path.startswith(project.directory)
 | |
| 207 | +            filepath = os.path.relpath(full_path, project.directory)
 | |
| 208 | +        else:
 | |
| 209 | +            filepath = full_path
 | |
| 210 | +        return full_path
 | |
| 211 | + | |
| 212 | +    # _calculate_key():
 | |
| 213 | +    #
 | |
| 168 | 214 |      # Calculates a key for putting into the cache.
 | 
| 215 | +    #
 | |
| 216 | +    # Args:
 | |
| 217 | +    #    (basic object)... : Any number of strictly-ordered basic objects
 | |
| 218 | +    #
 | |
| 219 | +    # Returns:
 | |
| 220 | +    #   (str): A key made out of every arg passed in
 | |
| 169 | 221 |      @staticmethod
 | 
| 170 | -    def calculate_key(*args):
 | |
| 222 | +    def _calculate_key(*args):
 | |
| 171 | 223 |          string = pickle.dumps(args)
 | 
| 172 | 224 |          return hashlib.sha1(string).hexdigest()
 | 
| 173 | 225 |  | 
| 174 | -    # Retrieves a path to the yaml cache file.
 | |
| 175 | -    @staticmethod
 | |
| 176 | -    def get_cache_file(top_dir):
 | |
| 177 | -        return os.path.join(top_dir, ".bst", YAML_CACHE_FILENAME)
 | |
| 226 | +    # _get():
 | |
| 227 | +    #
 | |
| 228 | +    # Gets a parsed file from the cache when given a key.
 | |
| 229 | +    #
 | |
| 230 | +    # Args:
 | |
| 231 | +    #    project (Project): The project this file is in.
 | |
| 232 | +    #    filepath (str): The path to the file.
 | |
| 233 | +    #    key (str): The key to the file within the cache. Typically, this is the
 | |
| 234 | +    #               value of `calculate_key()` with the file's unparsed contents
 | |
| 235 | +    #               and any relevant metadata passed in.
 | |
| 236 | +    #
 | |
| 237 | +    # Returns:
 | |
| 238 | +    #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
 | |
| 239 | +    def _get(self, project, filepath, key):
 | |
| 240 | +        cache_path = self._get_filepath(project, filepath)
 | |
| 241 | +        project_name = project.name if project else ""
 | |
| 242 | +        try:
 | |
| 243 | +            project_cache = self._project_caches[project_name]
 | |
| 244 | +            try:
 | |
| 245 | +                cachedyaml = project_cache.elements[cache_path]
 | |
| 246 | +                if cachedyaml._key == key:
 | |
| 247 | +                    # We've unpickled the YamlCache, but not the specific file
 | |
| 248 | +                    if cachedyaml._contents is None:
 | |
| 249 | +                        cachedyaml._contents = BstUnpickler.loads(cachedyaml._pickled_contents, self._context)
 | |
| 250 | +                    return cachedyaml._contents
 | |
| 251 | +            except KeyError:
 | |
| 252 | +                pass
 | |
| 253 | +        except KeyError:
 | |
| 254 | +            pass
 | |
| 255 | +        return None
 | |
| 178 | 256 |  | 
| 179 | 257 |  | 
| 180 | 258 |  CachedProject = namedtuple('CachedProject', ['elements'])
 | 
| ... | ... | @@ -59,7 +59,7 @@ def with_yamlcache(project_dir): | 
| 59 | 59 |  | 
| 60 | 60 |  def yamlcache_key(yamlcache, in_file, copy_tree=False):
 | 
| 61 | 61 |      with open(in_file) as f:
 | 
| 62 | -        key = yamlcache.calculate_key(f.read(), copy_tree)
 | |
| 62 | +        key = yamlcache._calculate_key(f.read(), copy_tree)
 | |
| 63 | 63 |      return key
 | 
| 64 | 64 |  | 
| 65 | 65 |  | 
| ... | ... | @@ -100,7 +100,7 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project): | 
| 100 | 100 |          temppath = modified_file(element_path, str(tmpdir))
 | 
| 101 | 101 |          contents = _yaml.load(temppath, copy_tree=False, project=prj)
 | 
| 102 | 102 |          key = yamlcache_key(yc, element_path)
 | 
| 103 | -        yc.put(prj, element_path, key, contents)
 | |
| 103 | +        yc.put_from_key(prj, element_path, key, contents)
 | |
| 104 | 104 |  | 
| 105 | 105 |      # Show that a variable has been added
 | 
| 106 | 106 |      result = cli.run(project=project, args=['show', '--format', '%{vars}', 'test.bst'])
 | 
| ... | ... | @@ -114,13 +114,6 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project): | 
| 114 | 114 |  @pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
 | 
| 115 | 115 |  @pytest.mark.parametrize('move_project', ['move', 'no-move'])
 | 
| 116 | 116 |  def test_yamlcache_changed_file(cli, ref_storage, with_junction, move_project):
 | 
| 117 | +    # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
 | |
| 117 | 118 |      # inline and junction can only be changed by opening a workspace
 | 
| 118 | 119 |      pass | 
| 119 | - | |
| 120 | - | |
| 121 | -@pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
 | |
| 122 | -@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
 | |
| 123 | -@pytest.mark.parametrize('move_project', ['move', 'no-move'])
 | |
| 124 | -def test_yamlcache_track_changed(cli, ref_storage, with_junction, move_project):
 | |
| 125 | -    # Skip inline junction, tracking those is forbidden
 | |
| 126 | -    pass | 
| ... | ... | @@ -155,10 +155,10 @@ def test_composite_preserve_originals(datafiles): | 
| 155 | 155 |  | 
| 156 | 156 |  def load_yaml_file(filename, *, cache_path, shortname=None, from_cache='raw'):
 | 
| 157 | 157 |  | 
| 158 | -    temppath = tempfile.mkstemp(dir=str(cache_path), text=True)
 | |
| 158 | +    _, temppath = tempfile.mkstemp(dir=os.path.join(cache_path.dirname, cache_path.basename), text=True)
 | |
| 159 | 159 |      context = Context()
 | 
| 160 | 160 |  | 
| 161 | -    with YamlCache.open(context, str(temppath)) as yc:
 | |
| 161 | +    with YamlCache.open(context, temppath) as yc:
 | |
| 162 | 162 |          if from_cache == 'raw':
 | 
| 163 | 163 |              return _yaml.load(filename, shortname)
 | 
| 164 | 164 |          elif from_cache == 'cached':
 | 
