[Notes] [Git][BuildStream/buildstream][valentindavid/git_shallow_fetch] Fetch git shallow clone when possible



Title: GitLab

Valentin David pushed to branch valentindavid/git_shallow_fetch at BuildStream / buildstream

Commits:

2 changed files:

Changes:

  • buildstream/plugins/sources/git.py
    ... ... @@ -175,11 +175,87 @@ class GitMirror(SourceFetcher):
    175 175
             self.ref = ref
    
    176 176
             self.tags = tags
    
    177 177
             self.primary = primary
    
    178
    +        dirname = utils.url_directory_name(url)
    
    178 179
             self.mirror = os.path.join(source.get_mirror_directory(), utils.url_directory_name(url))
    
    180
    +        self.fetch_mirror = os.path.join(source.get_mirror_directory(), '{}-{}'.format(dirname, ref))
    
    179 181
             self.mark_download_url(url)
    
    180 182
     
    
    183
    +    def ensure_fetchable(self, alias_override=None):
    
    184
    +
    
    185
    +        if os.path.exists(self.mirror):
    
    186
    +            return
    
    187
    +
    
    188
    +        if self.tags:
    
    189
    +            for tag, commit, _ in self.tags:
    
    190
    +                if commit != self.ref:
    
    191
    +                    self.source.status("{}: tag '{}' is not on commit '{}', so a full clone is required"
    
    192
    +                                       .format(self.source, tag, commit))
    
    193
    +                    self.ensure_trackable(alias_override=alias_override)
    
    194
    +                    return
    
    195
    +
    
    196
    +        if os.path.exists(self.fetch_mirror):
    
    197
    +            return
    
    198
    +
    
    199
    +        with self.source.tempdir() as tmpdir:
    
    200
    +            self.source.call([self.source.host_git, 'init', '--bare', tmpdir],
    
    201
    +                             fail="Failed to init git repository",
    
    202
    +                             fail_temporarily=True)
    
    203
    +
    
    204
    +            url = self.source.translate_url(self.url, alias_override=alias_override,
    
    205
    +                                            primary=self.primary)
    
    206
    +
    
    207
    +            self.source.call([self.source.host_git, 'remote', 'add', '--mirror=fetch', 'origin', url],
    
    208
    +                             cwd=tmpdir,
    
    209
    +                             fail="Failed to init git repository",
    
    210
    +                             fail_temporarily=True)
    
    211
    +
    
    212
    +            _, refs = self.source.check_output([self.source.host_git, 'ls-remote', 'origin'],
    
    213
    +                                               cwd=tmpdir,
    
    214
    +                                               fail="Failed to clone git repository {}".format(url),
    
    215
    +                                               fail_temporarily=True)
    
    216
    +
    
    217
    +            advertised = None
    
    218
    +            for ref_line in refs.splitlines():
    
    219
    +                commit, ref = ref_line.split('\t', 1)
    
    220
    +                if ref == 'HEAD':
    
    221
    +                    continue
    
    222
    +                if self.ref == commit:
    
    223
    +                    if ref.endswith('^{}'):
    
    224
    +                        ref = ref[:-3]
    
    225
    +                    advertised = ref
    
    226
    +                    break
    
    227
    +
    
    228
    +            if advertised is None:
    
    229
    +                self.source.status("{}: {} is not advertised on {}, so a full clone is required"
    
    230
    +                                   .format(self.source, self.ref, url))
    
    231
    +
    
    232
    +                self.ensure_trackable(alias_override=alias_override)
    
    233
    +                return
    
    234
    +
    
    235
    +            self.source.call([self.source.host_git, 'fetch', '--depth=1', 'origin', advertised],
    
    236
    +                             cwd=tmpdir,
    
    237
    +                             fail="Failed to fetch repository",
    
    238
    +                             fail_temporarily=True)
    
    239
    +
    
    240
    +            # We need to have a ref to make it clonable
    
    241
    +            self.source.call([self.source.host_git, 'update-ref', 'HEAD', self.ref],
    
    242
    +                             cwd=tmpdir,
    
    243
    +                             fail="Failed to tag HEAD",
    
    244
    +                             fail_temporarily=True)
    
    245
    +
    
    246
    +            try:
    
    247
    +                move_atomic(tmpdir, self.fetch_mirror)
    
    248
    +            except DirectoryExistsError:
    
    249
    +                # Another process was quicker to download this repository.
    
    250
    +                # Let's discard our own
    
    251
    +                self.source.status("{}: Discarding duplicate clone of {}"
    
    252
    +                                   .format(self.source, url))
    
    253
    +            except OSError as e:
    
    254
    +                raise SourceError("{}: Failed to move cloned git repository {} from '{}' to '{}': {}"
    
    255
    +                                  .format(self.source, url, tmpdir, self.fetch_mirror, e)) from e
    
    256
    +
    
    181 257
         # Ensures that the mirror exists
    
    182
    -    def ensure(self, alias_override=None):
    
    258
    +    def ensure_trackable(self, alias_override=None):
    
    183 259
     
    
    184 260
             # Unfortunately, git does not know how to only clone just a specific ref,
    
    185 261
             # so we have to download all of those gigs even if we only need a couple
    
    ... ... @@ -214,18 +290,23 @@ class GitMirror(SourceFetcher):
    214 290
                                             alias_override=alias_override,
    
    215 291
                                             primary=self.primary)
    
    216 292
     
    
    293
    +        if os.path.exists(self.mirror):
    
    294
    +            mirror = self.mirror
    
    295
    +        else:
    
    296
    +            mirror = self.fetch_mirror
    
    297
    +
    
    217 298
             if alias_override:
    
    218 299
                 remote_name = utils.url_directory_name(alias_override)
    
    219 300
                 _, remotes = self.source.check_output(
    
    220 301
                     [self.source.host_git, 'remote'],
    
    221
    -                fail="Failed to retrieve list of remotes in {}".format(self.mirror),
    
    302
    +                fail="Failed to retrieve list of remotes in {}".format(mirror),
    
    222 303
                     cwd=self.mirror
    
    223 304
                 )
    
    224 305
                 if remote_name not in remotes:
    
    225 306
                     self.source.call(
    
    226 307
                         [self.source.host_git, 'remote', 'add', remote_name, url],
    
    227 308
                         fail="Failed to add remote {} with url {}".format(remote_name, url),
    
    228
    -                    cwd=self.mirror
    
    309
    +                    cwd=mirror
    
    229 310
                     )
    
    230 311
             else:
    
    231 312
                 remote_name = "origin"
    
    ... ... @@ -233,7 +314,7 @@ class GitMirror(SourceFetcher):
    233 314
             self.source.call([self.source.host_git, 'fetch', remote_name, '--prune'],
    
    234 315
                              fail="Failed to fetch from remote git repository: {}".format(url),
    
    235 316
                              fail_temporarily=True,
    
    236
    -                         cwd=self.mirror)
    
    317
    +                         cwd=mirror)
    
    237 318
     
    
    238 319
         def fetch(self, alias_override=None):
    
    239 320
             # Resolve the URL for the message
    
    ... ... @@ -244,7 +325,7 @@ class GitMirror(SourceFetcher):
    244 325
             with self.source.timed_activity("Fetching from {}"
    
    245 326
                                             .format(resolved_url),
    
    246 327
                                             silent_nested=True):
    
    247
    -            self.ensure(alias_override)
    
    328
    +            self.ensure_fetchable(alias_override)
    
    248 329
                 if not self.has_ref():
    
    249 330
                     self._fetch(alias_override)
    
    250 331
                 self.assert_ref()
    
    ... ... @@ -253,12 +334,16 @@ class GitMirror(SourceFetcher):
    253 334
             if not self.ref:
    
    254 335
                 return False
    
    255 336
     
    
    256
    -        # If the mirror doesnt exist, we also dont have the ref
    
    257
    -        if not os.path.exists(self.mirror):
    
    258
    -            return False
    
    337
    +        if os.path.exists(self.mirror):
    
    338
    +            mirror = self.mirror
    
    339
    +        else:
    
    340
    +            # If the mirror doesnt exist, we also dont have the ref
    
    341
    +            if not os.path.exists(self.fetch_mirror):
    
    342
    +                return False
    
    343
    +            mirror = self.fetch_mirror
    
    259 344
     
    
    260 345
             # Check if the ref is really there
    
    261
    -        rc = self.source.call([self.source.host_git, 'cat-file', '-t', self.ref], cwd=self.mirror)
    
    346
    +        rc = self.source.call([self.source.host_git, 'cat-file', '-t', self.ref], cwd=mirror)
    
    262 347
             return rc == 0
    
    263 348
     
    
    264 349
         def assert_ref(self):
    
    ... ... @@ -308,11 +393,16 @@ class GitMirror(SourceFetcher):
    308 393
         def stage(self, directory, track=None):
    
    309 394
             fullpath = os.path.join(directory, self.path)
    
    310 395
     
    
    396
    +        if os.path.exists(self.mirror):
    
    397
    +            mirror = self.mirror
    
    398
    +        else:
    
    399
    +            mirror = self.fetch_mirror
    
    400
    +
    
    311 401
             # Using --shared here avoids copying the objects into the checkout, in any
    
    312 402
             # case we're just checking out a specific commit and then removing the .git/
    
    313 403
             # directory.
    
    314
    -        self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', self.mirror, fullpath],
    
    315
    -                         fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath),
    
    404
    +        self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', mirror, fullpath],
    
    405
    +                         fail="Failed to create git mirror {} in directory: {}".format(mirror, fullpath),
    
    316 406
                              fail_temporarily=True)
    
    317 407
     
    
    318 408
             self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
    
    ... ... @@ -350,9 +440,14 @@ class GitMirror(SourceFetcher):
    350 440
     
    
    351 441
         # List the submodules (path/url tuples) present at the given ref of this repo
    
    352 442
         def submodule_list(self):
    
    443
    +        if os.path.exists(self.mirror):
    
    444
    +            mirror = self.mirror
    
    445
    +        else:
    
    446
    +            mirror = self.fetch_mirror
    
    447
    +
    
    353 448
             modules = "{}:{}".format(self.ref, GIT_MODULES)
    
    354 449
             exit_code, output = self.source.check_output(
    
    355
    -            [self.source.host_git, 'show', modules], cwd=self.mirror)
    
    450
    +            [self.source.host_git, 'show', modules], cwd=mirror)
    
    356 451
     
    
    357 452
             # If git show reports error code 128 here, we take it to mean there is
    
    358 453
             # no .gitmodules file to display for the given revision.
    
    ... ... @@ -380,6 +475,11 @@ class GitMirror(SourceFetcher):
    380 475
         # Fetch the ref which this mirror requires its submodule to have,
    
    381 476
         # at the given ref of this mirror.
    
    382 477
         def submodule_ref(self, submodule, ref=None):
    
    478
    +        if os.path.exists(self.mirror):
    
    479
    +            mirror = self.mirror
    
    480
    +        else:
    
    481
    +            mirror = self.fetch_mirror
    
    482
    +
    
    383 483
             if not ref:
    
    384 484
                 ref = self.ref
    
    385 485
     
    
    ... ... @@ -388,7 +488,7 @@ class GitMirror(SourceFetcher):
    388 488
             _, output = self.source.check_output([self.source.host_git, 'ls-tree', ref, submodule],
    
    389 489
                                                  fail="ls-tree failed for commit {} and submodule: {}".format(
    
    390 490
                                                      ref, submodule),
    
    391
    -                                             cwd=self.mirror)
    
    491
    +                                             cwd=mirror)
    
    392 492
     
    
    393 493
             # read the commit hash from the output
    
    394 494
             fields = output.split()
    
    ... ... @@ -646,7 +746,7 @@ class GitSource(Source):
    646 746
             with self.timed_activity("Tracking {} from {}"
    
    647 747
                                      .format(self.tracking, resolved_url),
    
    648 748
                                      silent_nested=True):
    
    649
    -            self.mirror.ensure()
    
    749
    +            self.mirror.ensure_trackable()
    
    650 750
                 self.mirror._fetch()
    
    651 751
     
    
    652 752
                 # Update self.mirror.ref and node.ref from the self.tracking branch
    
    ... ... @@ -658,6 +758,7 @@ class GitSource(Source):
    658 758
     
    
    659 759
         def init_workspace(self, directory):
    
    660 760
             # XXX: may wish to refactor this as some code dupe with stage()
    
    761
    +        self.mirror.ensure_trackable()
    
    661 762
             self.refresh_submodules()
    
    662 763
     
    
    663 764
             with self.timed_activity('Setting up workspace "{}"'.format(directory), silent_nested=True):
    
    ... ... @@ -702,7 +803,7 @@ class GitSource(Source):
    702 803
     
    
    703 804
             self.refresh_submodules()
    
    704 805
             for mirror in self.submodules:
    
    705
    -            if not os.path.exists(mirror.mirror):
    
    806
    +            if not os.path.exists(mirror.mirror) and not os.path.exists(mirror.fetch_mirror):
    
    706 807
                     return False
    
    707 808
                 if not mirror.has_ref():
    
    708 809
                     return False
    
    ... ... @@ -714,7 +815,7 @@ class GitSource(Source):
    714 815
         # Assumes that we have our mirror and we have the ref which we point to
    
    715 816
         #
    
    716 817
         def refresh_submodules(self):
    
    717
    -        self.mirror.ensure()
    
    818
    +        self.mirror.ensure_fetchable()
    
    718 819
             submodules = []
    
    719 820
     
    
    720 821
             # XXX Here we should issue a warning if either:
    

  • tests/sources/git.py
    ... ... @@ -27,6 +27,7 @@ import subprocess
    27 27
     from buildstream._exceptions import ErrorDomain
    
    28 28
     from buildstream import _yaml
    
    29 29
     from buildstream.plugin import CoreWarnings
    
    30
    +from buildstream.utils import url_directory_name
    
    30 31
     
    
    31 32
     from tests.testutils import cli, create_repo
    
    32 33
     from tests.testutils.site import HAVE_GIT
    
    ... ... @@ -676,3 +677,193 @@ def test_default_do_not_track_tags(cli, tmpdir, datafiles):
    676 677
     
    
    677 678
         element = _yaml.load(element_path)
    
    678 679
         assert 'tags' not in element['sources'][0]
    
    680
    +
    
    681
    +
    
    682
    +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
    
    683
    +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
    
    684
    +def test_fetch_shallow(cli, tmpdir, datafiles):
    
    685
    +    project = str(datafiles)
    
    686
    +
    
    687
    +    repo = create_repo('git', str(tmpdir))
    
    688
    +    previous_ref = repo.create(os.path.join(project, 'repofiles'))
    
    689
    +
    
    690
    +    file1 = os.path.join(str(tmpdir), 'file1')
    
    691
    +    with open(file1, 'w') as f:
    
    692
    +        f.write('test\n')
    
    693
    +    ref = repo.add_file(file1)
    
    694
    +
    
    695
    +    source_config = repo.source_config(ref=ref)
    
    696
    +
    
    697
    +    # Write out our test target with a bad ref
    
    698
    +    element = {
    
    699
    +        'kind': 'import',
    
    700
    +        'sources': [
    
    701
    +            source_config
    
    702
    +        ]
    
    703
    +    }
    
    704
    +    _yaml.dump(element, os.path.join(project, 'target.bst'))
    
    705
    +
    
    706
    +    sources_dir = os.path.join(str(tmpdir), 'sources')
    
    707
    +    os.makedirs(sources_dir, exist_ok=True)
    
    708
    +    config = {
    
    709
    +        'sourcedir': sources_dir
    
    710
    +    }
    
    711
    +    cli.configure(config)
    
    712
    +
    
    713
    +    result = cli.run(project=project, args=[
    
    714
    +        'fetch', 'target.bst'
    
    715
    +    ])
    
    716
    +    result.assert_success()
    
    717
    +
    
    718
    +    cache_dir_name = url_directory_name(source_config['url'])
    
    719
    +    full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
    
    720
    +    shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, ref))
    
    721
    +
    
    722
    +    assert os.path.exists(shallow_cache_path)
    
    723
    +    assert not os.path.exists(full_cache_path)
    
    724
    +
    
    725
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    726
    +                            cwd=shallow_cache_path,
    
    727
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    728
    +    assert output.splitlines() == [ref]
    
    729
    +
    
    730
    +    result = cli.run(project=project, args=[
    
    731
    +        'build', 'target.bst'
    
    732
    +    ])
    
    733
    +    result.assert_success()
    
    734
    +
    
    735
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    736
    +                            cwd=shallow_cache_path,
    
    737
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    738
    +    assert output.splitlines() == [ref]
    
    739
    +
    
    740
    +    assert os.path.exists(shallow_cache_path)
    
    741
    +    assert not os.path.exists(full_cache_path)
    
    742
    +
    
    743
    +    result = cli.run(project=project, args=[
    
    744
    +        'track', 'target.bst'
    
    745
    +    ])
    
    746
    +    result.assert_success()
    
    747
    +
    
    748
    +    assert os.path.exists(full_cache_path)
    
    749
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    750
    +                            cwd=full_cache_path,
    
    751
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    752
    +    assert output.splitlines() == [ref, previous_ref]
    
    753
    +
    
    754
    +
    
    755
    +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
    
    756
    +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
    
    757
    +def test_fetch_shallow_not_tagged(cli, tmpdir, datafiles):
    
    758
    +    """When a ref is not tagged and not head of branch on remote we cannot
    
    759
    +    get a shallow clone.  It should automatically get a full clone.
    
    760
    +    """
    
    761
    +
    
    762
    +    project = str(datafiles)
    
    763
    +
    
    764
    +    repo = create_repo('git', str(tmpdir))
    
    765
    +    previous_ref = repo.create(os.path.join(project, 'repofiles'))
    
    766
    +
    
    767
    +    file1 = os.path.join(str(tmpdir), 'file1')
    
    768
    +    with open(file1, 'w') as f:
    
    769
    +        f.write('test\n')
    
    770
    +    ref = repo.add_file(file1)
    
    771
    +
    
    772
    +    source_config = repo.source_config(ref=previous_ref)
    
    773
    +
    
    774
    +    # Write out our test target with a bad ref
    
    775
    +    element = {
    
    776
    +        'kind': 'import',
    
    777
    +        'sources': [
    
    778
    +            source_config
    
    779
    +        ]
    
    780
    +    }
    
    781
    +    _yaml.dump(element, os.path.join(project, 'target.bst'))
    
    782
    +
    
    783
    +    sources_dir = os.path.join(str(tmpdir), 'sources')
    
    784
    +    os.makedirs(sources_dir, exist_ok=True)
    
    785
    +    config = {
    
    786
    +        'sourcedir': sources_dir
    
    787
    +    }
    
    788
    +    cli.configure(config)
    
    789
    +
    
    790
    +    result = cli.run(project=project, args=[
    
    791
    +        'fetch', 'target.bst'
    
    792
    +    ])
    
    793
    +    result.assert_success()
    
    794
    +
    
    795
    +    cache_dir_name = url_directory_name(source_config['url'])
    
    796
    +    full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
    
    797
    +    shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, previous_ref))
    
    798
    +
    
    799
    +    assert not os.path.exists(shallow_cache_path)
    
    800
    +    assert os.path.exists(full_cache_path)
    
    801
    +
    
    802
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    803
    +                            cwd=full_cache_path,
    
    804
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    805
    +    assert output.splitlines() == [ref, previous_ref]
    
    806
    +
    
    807
    +
    
    808
    +@pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
    
    809
    +@pytest.mark.datafiles(os.path.join(DATA_DIR, 'template'))
    
    810
    +def test_fetch_shallow_workspace_open(cli, tmpdir, datafiles):
    
    811
    +    """
    
    812
    +    Workspaces should get a full clone.
    
    813
    +    """
    
    814
    +    project = str(datafiles)
    
    815
    +
    
    816
    +    repo = create_repo('git', str(tmpdir))
    
    817
    +    previous_ref = repo.create(os.path.join(project, 'repofiles'))
    
    818
    +
    
    819
    +    file1 = os.path.join(str(tmpdir), 'file1')
    
    820
    +    with open(file1, 'w') as f:
    
    821
    +        f.write('test\n')
    
    822
    +    ref = repo.add_file(file1)
    
    823
    +
    
    824
    +    source_config = repo.source_config(ref=ref)
    
    825
    +
    
    826
    +    # Write out our test target with a bad ref
    
    827
    +    element = {
    
    828
    +        'kind': 'import',
    
    829
    +        'sources': [
    
    830
    +            source_config
    
    831
    +        ]
    
    832
    +    }
    
    833
    +    _yaml.dump(element, os.path.join(project, 'target.bst'))
    
    834
    +
    
    835
    +    sources_dir = os.path.join(str(tmpdir), 'sources')
    
    836
    +    os.makedirs(sources_dir, exist_ok=True)
    
    837
    +    config = {
    
    838
    +        'sourcedir': sources_dir
    
    839
    +    }
    
    840
    +    cli.configure(config)
    
    841
    +
    
    842
    +    result = cli.run(project=project, args=[
    
    843
    +        'fetch', 'target.bst'
    
    844
    +    ])
    
    845
    +    result.assert_success()
    
    846
    +
    
    847
    +    cache_dir_name = url_directory_name(source_config['url'])
    
    848
    +    full_cache_path = os.path.join(sources_dir, 'git', cache_dir_name)
    
    849
    +    shallow_cache_path = os.path.join(sources_dir, 'git', '{}-{}'.format(cache_dir_name, ref))
    
    850
    +
    
    851
    +    assert os.path.exists(shallow_cache_path)
    
    852
    +    assert not os.path.exists(full_cache_path)
    
    853
    +
    
    854
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    855
    +                            cwd=shallow_cache_path,
    
    856
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    857
    +    assert output.splitlines() == [ref]
    
    858
    +
    
    859
    +    workspace = os.path.join(tmpdir, 'workspace')
    
    860
    +
    
    861
    +    result = cli.run(project=project, args=[
    
    862
    +        'workspace', 'open', 'target.bst', '--directory', workspace
    
    863
    +    ])
    
    864
    +    result.assert_success()
    
    865
    +
    
    866
    +    output = subprocess.run(['git', 'log', '--format=format:%H'],
    
    867
    +                            cwd=workspace,
    
    868
    +                            stdout=subprocess.PIPE).stdout.decode('ascii')
    
    869
    +    assert output.splitlines() == [ref, previous_ref]



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]