[Notes] [Git][BuildStream/buildstream][jonathan/yamlcache-no-read] yamlcache: Lookup files in the cache without reading the file



Title: GitLab

Jonathan Maw pushed to branch jonathan/yamlcache-no-read at BuildStream / buildstream

Commits:

3 changed files:

Changes:

  • buildstream/_yaml.py
    ... ... @@ -197,12 +197,12 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
    197 197
     
    
    198 198
         try:
    
    199 199
             data = None
    
    200
    -        with open(filename) as f:
    
    201
    -            contents = f.read()
    
    202
    -        if yaml_cache:
    
    203
    -            data, key = yaml_cache.get(project, filename, contents, copy_tree)
    
    204 200
     
    
    201
    +        if yaml_cache:
    
    202
    +            data, key = yaml_cache.get(project, filename, copy_tree)
    
    205 203
             if not data:
    
    204
    +            with open(filename) as f:
    
    205
    +                contents = f.read()
    
    206 206
                 data = load_data(contents, file, copy_tree=copy_tree)
    
    207 207
                 if yaml_cache:
    
    208 208
                     yaml_cache.put_from_key(project, filename, key, data)
    

  • buildstream/_yamlcache.py
    ... ... @@ -127,15 +127,14 @@ class YamlCache():
    127 127
         # Args:
    
    128 128
         #    project (Project) or None: The project this file is in, if it exists.
    
    129 129
         #    filepath (str): The absolute path to the file.
    
    130
    -    #    contents (str): The contents of the file to be cached
    
    131 130
         #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    132 131
         #                      (i.e. exactly as when called in yaml)
    
    133 132
         #
    
    134 133
         # Returns:
    
    135 134
         #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
    
    136 135
         #    (str):            The key used to look up the parsed yaml in the cache
    
    137
    -    def get(self, project, filepath, contents, copy_tree):
    
    138
    -        key = self._calculate_key(contents, copy_tree)
    
    136
    +    def get(self, project, filepath, copy_tree):
    
    137
    +        key = self._calculate_key(project, filepath, copy_tree)
    
    139 138
             data = self._get(project, filepath, key)
    
    140 139
             return data, key
    
    141 140
     
    
    ... ... @@ -146,12 +145,11 @@ class YamlCache():
    146 145
         # Args:
    
    147 146
         #    project (Project): The project this file is in.
    
    148 147
         #    filepath (str): The path to the file.
    
    149
    -    #    contents (str): The contents of the file that has been cached
    
    150 148
         #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    151 149
         #                      (i.e. exactly as when called in yaml)
    
    152 150
         #    value (decorated dict): The data to put into the cache.
    
    153
    -    def put(self, project, filepath, contents, copy_tree, value):
    
    154
    -        key = self._calculate_key(contents, copy_tree)
    
    151
    +    def put(self, project, filepath, copy_tree, value):
    
    152
    +        key = self._calculate_key(project, filepath, copy_tree)
    
    155 153
             self.put_from_key(project, filepath, key, value)
    
    156 154
     
    
    157 155
         # put_from_key():
    
    ... ... @@ -213,13 +211,23 @@ class YamlCache():
    213 211
         # Calculates a key for putting into the cache.
    
    214 212
         #
    
    215 213
         # Args:
    
    216
    -    #    (basic object)... : Any number of strictly-ordered basic objects
    
    214
    +    #    project (Project) or None: The project this file is in.
    
    215
    +    #    filepath (str): The path to the file.
    
    216
    +    #    copy_tree (bool): Whether the data should make a copy when it's being generated
    
    217
    +    #                      (i.e. exactly as when called in yaml)
    
    217 218
         #
    
    218 219
         # Returns:
    
    219 220
         #   (str): A key made out of every arg passed in
    
    220 221
         @staticmethod
    
    221
    -    def _calculate_key(*args):
    
    222
    -        string = pickle.dumps(args)
    
    222
    +    def _calculate_key(project, filepath, copy_tree):
    
    223
    +        if project and project.junction:
    
    224
    +            # files in a junction only change if the junction element changes
    
    225
    +            # NOTE: This may change when junction workspaces are revisited/fixed
    
    226
    +            content_key = project.junction._get_cache_key()
    
    227
    +        else:
    
    228
    +            stat = os.stat(filepath)
    
    229
    +            content_key = stat.st_mtime
    
    230
    +        string = pickle.dumps(content_key, copy_tree)
    
    223 231
             return hashlib.sha1(string).hexdigest()
    
    224 232
     
    
    225 233
         # _get():
    

  • tests/frontend/yamlcache.py
    ... ... @@ -13,10 +13,10 @@ from contextlib import contextmanager
    13 13
     
    
    14 14
     
    
    15 15
     def generate_project(tmpdir, ref_storage, with_junction, name="test"):
    
    16
    -    if with_junction == 'junction':
    
    16
    +    if with_junction:
    
    17 17
             subproject_dir = generate_project(
    
    18 18
                 tmpdir, ref_storage,
    
    19
    -            'no-junction', name='test-subproject'
    
    19
    +            False, name='test-subproject'
    
    20 20
             )
    
    21 21
     
    
    22 22
         project_dir = os.path.join(tmpdir, name)
    
    ... ... @@ -32,7 +32,7 @@ def generate_project(tmpdir, ref_storage, with_junction, name="test"):
    32 32
         _yaml.dump(project_conf, project_conf_path)
    
    33 33
     
    
    34 34
         # elements
    
    35
    -    if with_junction == 'junction':
    
    35
    +    if with_junction:
    
    36 36
             junction_name = 'junction.bst'
    
    37 37
             junction_dir = os.path.join(project_dir, elements_path)
    
    38 38
             junction_path = os.path.join(project_dir, elements_path, junction_name)
    
    ... ... @@ -57,12 +57,6 @@ def with_yamlcache(project_dir):
    57 57
             yield yamlcache, project
    
    58 58
     
    
    59 59
     
    
    60
    -def yamlcache_key(yamlcache, in_file, copy_tree=False):
    
    61
    -    with open(in_file) as f:
    
    62
    -        key = yamlcache._calculate_key(f.read(), copy_tree)
    
    63
    -    return key
    
    64
    -
    
    65
    -
    
    66 60
     def modified_file(input_file, tmpdir):
    
    67 61
         with open(input_file) as f:
    
    68 62
             data = f.read()
    
    ... ... @@ -76,12 +70,13 @@ def modified_file(input_file, tmpdir):
    76 70
     
    
    77 71
     
    
    78 72
     @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
    
    79
    -@pytest.mark.parametrize('with_junction', ['no-junction', 'junction'])
    
    80
    -@pytest.mark.parametrize('move_project', ['move', 'no-move'])
    
    81
    -def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
    
    73
    +@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
    
    74
    +def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction):
    
    82 75
         # Generate the project
    
    83 76
         project = generate_project(str(tmpdir), ref_storage, with_junction)
    
    84
    -    if with_junction == 'junction':
    
    77
    +    element_path = os.path.join(project, 'elements', 'test.bst')
    
    78
    +    element_mtime = 0
    
    79
    +    if with_junction:
    
    85 80
             result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
    
    86 81
             result.assert_success()
    
    87 82
     
    
    ... ... @@ -89,17 +84,14 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
    89 84
         result = cli.run(project=project, args=['show', 'test.bst'])
    
    90 85
         result.assert_success()
    
    91 86
     
    
    92
    -    element_path = os.path.join(project, 'elements', 'test.bst')
    
    93 87
         with with_yamlcache(project) as (yc, prj):
    
    94 88
             # Check that it's in the cache
    
    95 89
             assert yc.is_cached(prj, element_path)
    
    96 90
     
    
    97
    -        # *Absolutely* horrible cache corruption to check it's being used
    
    98
    -        # Modifying the data from the cache is fraught with danger,
    
    99
    -        # so instead I'll load a modified version of the original file
    
    91
    +        # Modify files in the yaml cache to test whether it's being used
    
    100 92
             temppath = modified_file(element_path, str(tmpdir))
    
    101 93
             contents = _yaml.load(temppath, copy_tree=False, project=prj)
    
    102
    -        key = yamlcache_key(yc, element_path)
    
    94
    +        key = yc._calculate_key(prj, element_path, copy_tree=False)
    
    103 95
             yc.put_from_key(prj, element_path, key, contents)
    
    104 96
     
    
    105 97
         # Show that a variable has been added
    
    ... ... @@ -111,13 +103,13 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
    111 103
     
    
    112 104
     
    
    113 105
     @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
    
    114
    -@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
    
    106
    +@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
    
    115 107
     def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction):
    
    116 108
         # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
    
    117 109
         # inline and junction can only be changed by opening a workspace
    
    118 110
         # Generate the project
    
    119 111
         project = generate_project(str(tmpdir), ref_storage, with_junction)
    
    120
    -    if with_junction == 'junction':
    
    112
    +    if with_junction:
    
    121 113
             result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
    
    122 114
             result.assert_success()
    
    123 115
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]