[Notes] [Git][BuildStream/buildstream][jonathan/yamlcache-no-read] yamlcache: Lookup files in the cache without reading the file



Title: GitLab

Jonathan Maw pushed to branch jonathan/yamlcache-no-read at BuildStream / buildstream

Commits:

2 changed files:

Changes:

  • buildstream/_yaml.py
    ... ... @@ -197,12 +197,12 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
    197 197
     
    
    198 198
         try:
    
    199 199
             data = None
    
    200
    -        with open(filename) as f:
    
    201
    -            contents = f.read()
    
    202
    -        if yaml_cache:
    
    203
    -            data, key = yaml_cache.get(project, filename, contents, copy_tree)
    
    204 200
     
    
    201
    +        if yaml_cache:
    
    202
    +            data, key = yaml_cache.get(project, filename, copy_tree)
    
    205 203
             if not data:
    
    204
    +            with open(filename) as f:
    
    205
    +                contents = f.read()
    
    206 206
                 data = load_data(contents, file, copy_tree=copy_tree)
    
    207 207
                 if yaml_cache:
    
    208 208
                     yaml_cache.put_from_key(project, filename, key, data)
    

  • buildstream/_yamlcache.py
    ... ... @@ -127,15 +127,14 @@ class YamlCache():
    127 127
         # Args:
    
    128 128
         #    project (Project) or None: The project this file is in, if it exists.
    
    129 129
         #    filepath (str): The absolute path to the file.
    
    130
    -    #    contents (str): The contents of the file to be cached
    
    131 130
         #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    132 131
         #                      (i.e. exactly as when called in yaml)
    
    133 132
         #
    
    134 133
         # Returns:
    
    135 134
         #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    
    136 135
         #    (str):            The key used to look up the parsed yaml in the cache
    
    137
    -    def get(self, project, filepath, contents, copy_tree):
    
    138
    -        key = self._calculate_key(contents, copy_tree)
    
    136
    +    def get(self, project, filepath, copy_tree):
    
    137
    +        key = self._calculate_key(project, filepath, copy_tree)
    
    139 138
             data = self._get(project, filepath, key)
    
    140 139
             return data, key
    
    141 140
     
    
    ... ... @@ -146,12 +145,11 @@ class YamlCache():
    146 145
         # Args:
    
    147 146
         #    project (Project): The project this file is in.
    
    148 147
         #    filepath (str): The path to the file.
    
    149
    -    #    contents (str): The contents of the file that has been cached
    
    150 148
         #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    151 149
         #                      (i.e. exactly as when called in yaml)
    
    152 150
         #    value (decorated dict): The data to put into the cache.
    
    153
    -    def put(self, project, filepath, contents, copy_tree, value):
    
    154
    -        key = self._calculate_key(contents, copy_tree)
    
    151
    +    def put(self, project, filepath, copy_tree, value):
    
    152
    +        key = self._calculate_key(project, filepath, copy_tree)
    
    155 153
             self.put_from_key(project, filepath, key, value)
    
    156 154
     
    
    157 155
         # put_from_key():
    
    ... ... @@ -213,13 +211,23 @@ class YamlCache():
    213 211
         # Calculates a key for putting into the cache.
    
    214 212
         #
    
    215 213
         # Args:
    
    216
    -    #    (basic object)... : Any number of strictly-ordered basic objects
    
    214
    +    #    project (Project) or None: The project this file is in.
    
    215
    +    #    filepath (str): The path to the file.
    
    216
    +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    217
    +    #                      (i.e. exactly as when called in yaml)
    
    217 218
         #
    
    218 219
         # Returns:
    
    219 220
         #   (str): A key made out of every arg passed in
    
    220 221
         @staticmethod
    
    221
    -    def _calculate_key(*args):
    
    222
    -        string = pickle.dumps(args)
    
    222
    +    def _calculate_key(project, filepath, copy_tree):
    
    223
    +        if project and project.junction:
    
    224
    +            # files in a junction only change if the junction element changes
    
    225
    +            # NOTE: This may change when junction workspaces are revisited/fixed
    
    226
    +            content_key = project.junction._get_cache_key()
    
    227
    +        else:
    
    228
    +            stat = os.stat(filepath)
    
    229
    +            content_key = stat.st_mtime
    
    230
    +        string = pickle.dumps(content_key, copy_tree)
    
    223 231
             return hashlib.sha1(string).hexdigest()
    
    224 232
     
    
    225 233
         # _get():
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]