[Notes] [Git][BuildStream/buildstream][jmac/cache_artifacts_with_vdir] 6 commits: _casbaseddirectory.py: Convert absolute symlinks to relative



Title: GitLab

Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream

Commits:

6 changed files:

Changes:

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -39,6 +39,7 @@ from .. import utils
    39 39
     from .._exceptions import CASError, LoadError, LoadErrorReason
    
    40 40
     from .. import _yaml
    
    41 41
     
    
    42
    +from ..storage._casbaseddirectory import CasBasedDirectory
    
    42 43
     
    
    43 44
     # The default limit for gRPC messages is 4 MiB.
    
    44 45
     # Limit payload to 1 MiB to leave sufficient headroom for metadata.
    
    ... ... @@ -768,6 +769,9 @@ class CASCache():
    768 769
         #     (Digest): Digest object for the directory added.
    
    769 770
         #
    
    770 771
         def _commit_directory(self, path, *, dir_digest=None):
    
    772
    +        if isinstance(path, CasBasedDirectory):
    
    773
    +            return self.add_object(digest=dir_digest, buffer=path.pb2_directory.SerializeToString())
    
    774
    +
    
    771 775
             directory = remote_execution_pb2.Directory()
    
    772 776
     
    
    773 777
             for name in sorted(os.listdir(path)):
    

  • buildstream/element.py
    ... ... @@ -102,6 +102,7 @@ from .types import _KeyStrength, CoreWarnings
    102 102
     
    
    103 103
     from .storage.directory import Directory
    
    104 104
     from .storage._filebaseddirectory import FileBasedDirectory
    
    105
    +from .storage._casbaseddirectory import CasBasedDirectory
    
    105 106
     from .storage.directory import VirtualDirectoryError
    
    106 107
     
    
    107 108
     
    
    ... ... @@ -1634,35 +1635,38 @@ class Element(Plugin):
    1634 1635
                     # No collect directory existed
    
    1635 1636
                     collectvdir = None
    
    1636 1637
     
    
    1638
    +        assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
    
    1639
    +        logsvdir = assemblevdir.descend("logs", create=True)
    
    1640
    +        metavdir = assemblevdir.descend("meta", create=True)
    
    1641
    +
    
    1637 1642
             # Create artifact directory structure
    
    1638 1643
             assembledir = os.path.join(rootdir, 'artifact')
    
    1639
    -        filesdir = os.path.join(assembledir, 'files')
    
    1640 1644
             logsdir = os.path.join(assembledir, 'logs')
    
    1641 1645
             metadir = os.path.join(assembledir, 'meta')
    
    1642
    -        buildtreedir = os.path.join(assembledir, 'buildtree')
    
    1643 1646
             os.mkdir(assembledir)
    
    1644
    -        if collect is not None and collectvdir is not None:
    
    1645
    -            os.mkdir(filesdir)
    
    1646 1647
             os.mkdir(logsdir)
    
    1647 1648
             os.mkdir(metadir)
    
    1648
    -        os.mkdir(buildtreedir)
    
    1649 1649
     
    
    1650
    -        # Hard link files from collect dir to files directory
    
    1651 1650
             if collect is not None and collectvdir is not None:
    
    1652
    -            collectvdir.export_files(filesdir, can_link=True)
    
    1651
    +            if isinstance(collectvdir, CasBasedDirectory):
    
    1652
    +                assemblevdir.fast_directory_import("files", collectvdir)
    
    1653
    +            else:
    
    1654
    +                filesvdir = assemblevdir.descend("files", create=True)
    
    1655
    +                filesvdir.import_files(collectvdir, can_link=True)
    
    1653 1656
     
    
    1657
    +        sandbox_vroot = sandbox.get_virtual_directory()
    
    1654 1658
             try:
    
    1655
    -            sandbox_vroot = sandbox.get_virtual_directory()
    
    1656 1659
                 sandbox_build_dir = sandbox_vroot.descend(
    
    1657 1660
                     self.get_variable('build-root').lstrip(os.sep).split(os.sep))
    
    1658
    -            # Hard link files from build-root dir to buildtreedir directory
    
    1659
    -            sandbox_build_dir.export_files(buildtreedir)
    
    1661
    +            assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
    
    1660 1662
             except VirtualDirectoryError:
    
    1661 1663
                 # Directory could not be found. Pre-virtual
    
    1662 1664
                 # directory behaviour was to continue silently
    
    1663
    -            # if the directory could not be found.
    
    1664
    -            pass
    
    1665
    +            # if the directory could not be found, but we must create
    
    1666
    +            # the directory.
    
    1667
    +            assemblevdir.descend("buildtree", create=True)
    
    1665 1668
     
    
    1669
    +        # Write some logs out to normal directories: logsdir and metadir
    
    1666 1670
             # Copy build log
    
    1667 1671
             log_filename = self._get_context().get_log_filename()
    
    1668 1672
             self._build_log_path = os.path.join(logsdir, 'build.log')
    
    ... ... @@ -1705,9 +1709,12 @@ class Element(Plugin):
    1705 1709
                 ]
    
    1706 1710
             }), os.path.join(metadir, 'workspaced-dependencies.yaml'))
    
    1707 1711
     
    
    1708
    -        with self.timed_activity("Caching artifact"):
    
    1709
    -            artifact_size = utils._get_dir_size(assembledir)
    
    1710
    -            self.__artifacts.commit(self, assembledir, self.__get_cache_keys_for_commit())
    
    1712
    +        metavdir.import_files(metadir)
    
    1713
    +        logsvdir.import_files(logsdir)
    
    1714
    +
    
    1715
    +        artifact_size = assemblevdir.get_size()
    
    1716
    +        with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
    
    1717
    +            self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
    
    1711 1718
     
    
    1712 1719
             if collect is not None and collectvdir is None:
    
    1713 1720
                 raise ElementError(
    

  • buildstream/storage/_casbaseddirectory.py
    ... ... @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory):
    350 350
             filenode.is_executable = is_executable
    
    351 351
             self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
    
    352 352
     
    
    353
    -    def _copy_link_from_filesystem(self, basename, filename):
    
    354
    -        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
    
    353
    +    def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
    
    354
    +        """ filesystem_path should be a full path point to the source symlink.
    
    355
    +        relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
    
    356
    +        destination_name should be the destination name in this directory. """
    
    357
    +        self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
    
    355 358
     
    
    356
    -    def _add_new_link_direct(self, name, target):
    
    359
    +    def _add_new_link_direct(self, relative_path, name, target):
    
    357 360
             existing_link = self._find_pb2_entry(name)
    
    358 361
             if existing_link:
    
    359 362
                 symlinknode = existing_link
    
    ... ... @@ -361,8 +364,16 @@ class CasBasedDirectory(Directory):
    361 364
                 symlinknode = self.pb2_directory.symlinks.add()
    
    362 365
             assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
    
    363 366
             symlinknode.name = name
    
    364
    -        # A symlink node has no digest.
    
    367
    +
    
    368
    +        absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
    
    369
    +        if absolute:
    
    370
    +            distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
    
    371
    +            # TODO: Using os.path.join and _pb2_path_sep in the same place is illogical
    
    372
    +            target = os.path.join(CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root), target[1:])
    
    365 373
             symlinknode.target = target
    
    374
    +
    
    375
    +        # A symlink node has no digest.
    
    376
    +
    
    366 377
             self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    367 378
     
    
    368 379
         def delete_entry(self, name):
    
    ... ... @@ -527,7 +538,7 @@ class CasBasedDirectory(Directory):
    527 538
                     result.combine(subdir_result)
    
    528 539
                 elif os.path.islink(import_file):
    
    529 540
                     if self._check_replacement(entry, path_prefix, result):
    
    530
    -                    self._copy_link_from_filesystem(source_directory, entry)
    
    541
    +                    self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
    
    531 542
                         result.files_written.append(relative_pathname)
    
    532 543
                 elif os.path.isdir(import_file):
    
    533 544
                     # A plain directory which already exists isn't a problem; just ignore it.
    
    ... ... @@ -602,7 +613,7 @@ class CasBasedDirectory(Directory):
    602 613
                             self.index[f] = IndexEntry(filenode, modified=True)
    
    603 614
                         else:
    
    604 615
                             assert isinstance(item, remote_execution_pb2.SymlinkNode)
    
    605
    -                        self._add_new_link_direct(name=f, target=item.target)
    
    616
    +                        self._add_new_link_direct(path_prefix, name=f, target=item.target)
    
    606 617
                     else:
    
    607 618
                         result.ignored.append(os.path.join(path_prefix, f))
    
    608 619
             return result
    
    ... ... @@ -637,7 +648,7 @@ class CasBasedDirectory(Directory):
    637 648
                     files = external_pathspec.list_relative_paths()
    
    638 649
     
    
    639 650
             if isinstance(external_pathspec, FileBasedDirectory):
    
    640
    -            source_directory = external_pathspec.get_underlying_directory()
    
    651
    +            source_directory = external_pathspec._get_underlying_directory()
    
    641 652
                 result = self._import_files_from_directory(source_directory, files=files)
    
    642 653
             elif isinstance(external_pathspec, str):
    
    643 654
                 source_directory = external_pathspec
    
    ... ... @@ -836,6 +847,28 @@ class CasBasedDirectory(Directory):
    836 847
             self._recalculate_recursing_up()
    
    837 848
             self._recalculate_recursing_down()
    
    838 849
     
    
    850
    +    def get_size(self):
    
    851
    +        total = len(self.pb2_directory.SerializeToString())
    
    852
    +        for i in self.index.values():
    
    853
    +            if isinstance(i.buildstream_object, CasBasedDirectory):
    
    854
    +                total += i.buildstream_object.get_size()
    
    855
    +            elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
    
    856
    +                src_name = self.cas_cache.objpath(i.pb_object.digest)
    
    857
    +                filesize = os.stat(src_name).st_size
    
    858
    +                total += filesize
    
    859
    +            # Symlink nodes are encoded as part of the directory serialization.
    
    860
    +        return total
    
    861
    +
    
    862
    +    def fast_directory_import(self, dirname, other_directory):
    
    863
    +        assert dirname not in self.index
    
    864
    +        if isinstance(other_directory, CasBasedDirectory):
    
    865
    +            self.index[dirname] = IndexEntry(other_directory.pb_object,
    
    866
    +                                             buildstream_object=other_directory.buildstream_object)
    
    867
    +        else:
    
    868
    +            # Revert to the old method.
    
    869
    +            subdir = self.descend(dirname, create=True)
    
    870
    +            subdir.import_files(other_directory, can_link=True)
    
    871
    +
    
    839 872
         def _get_identifier(self):
    
    840 873
             path = ""
    
    841 874
             if self.parent:
    

  • buildstream/storage/_filebaseddirectory.py
    ... ... @@ -30,6 +30,7 @@ See also: :ref:`sandboxing`.
    30 30
     import os
    
    31 31
     import time
    
    32 32
     from .directory import Directory, VirtualDirectoryError
    
    33
    +from .. import utils
    
    33 34
     from ..utils import link_files, copy_files, list_relative_paths, _get_link_mtime, _magic_timestamp
    
    34 35
     from ..utils import _set_deterministic_user, _set_deterministic_mtime
    
    35 36
     
    
    ... ... @@ -125,6 +126,13 @@ class FileBasedDirectory(Directory):
    125 126
             self._mark_changed()
    
    126 127
             return import_result
    
    127 128
     
    
    129
    +    def fast_directory_import(self, dirname, other_directory):
    
    130
    +        # We can't do a fast import into a FileBasedDirectory, so this
    
    131
    +        # falls back to import_files.
    
    132
    +        assert dirname not in self.index
    
    133
    +        subdir = self.descend(dirname, create=True)
    
    134
    +        subdir.import_files(other_directory, can_link=True)
    
    135
    +
    
    128 136
         def _mark_changed(self):
    
    129 137
             self._directory_read = False
    
    130 138
     
    
    ... ... @@ -201,6 +209,9 @@ class FileBasedDirectory(Directory):
    201 209
     
    
    202 210
             return list_relative_paths(self.external_directory)
    
    203 211
     
    
    212
    +    def get_size(self):
    
    213
    +        return utils._get_dir_size(self.external_directory)
    
    214
    +
    
    204 215
         def __str__(self):
    
    205 216
             # This returns the whole path (since we don't know where the directory started)
    
    206 217
             # which exposes the sandbox directory; we will have to assume for the time being
    

  • buildstream/storage/directory.py
    ... ... @@ -99,6 +99,30 @@ class Directory():
    99 99
     
    
    100 100
             raise NotImplementedError()
    
    101 101
     
    
    102
    +    def fast_directory_import(self, dirname, other_directory):
    
    103
    +        """Import other_directory as a new directory in this one.
    
    104
    +
    
    105
    +        This is a potentially faster method than import_directory with
    
    106
    +        fewer options. dirname must not already exist, and all files
    
    107
    +        are imported unconditionally. It is assumed that it is
    
    108
    +        acceptable to use filesystem hard links to files in
    
    109
    +        other_directory. You cannot update utimes or get a
    
    110
    +        FileListResult.
    
    111
    +
    
    112
    +        This only provides a benefit if both this and other_directory
    
    113
    +        are CAS-based directories. In other cases, it will fall back
    
    114
    +        to import_directory.
    
    115
    +
    
    116
    +        Args:
    
    117
    +          dirname: The name to call the subdirectory in this
    
    118
    +          directory. This must not already exist in this directory.
    
    119
    +
    
    120
    +          other_directory: The directory to import.
    
    121
    +
    
    122
    +        """
    
    123
    +
    
    124
    +        raise NotImplementedError()
    
    125
    +
    
    102 126
         def export_files(self, to_directory, *, can_link=False, can_destroy=False):
    
    103 127
             """Copies everything from this into to_directory.
    
    104 128
     
    
    ... ... @@ -176,3 +200,9 @@ class Directory():
    176 200
     
    
    177 201
             """
    
    178 202
             raise NotImplementedError()
    
    203
    +
    
    204
    +    def get_size(self):
    
    205
    +        """ Get an approximation of the storage space in bytes used by this directory
    
    206
    +        and all files and subdirectories in it. Storage space varies by implementation
    
    207
    +        and effective space used may be lower than this number due to deduplication. """
    
    208
    +        raise NotImplementedError()

  • tests/storage/virtual_directory_import.py
    ... ... @@ -149,10 +149,10 @@ def resolve_symlinks(path, root):
    149 149
                 if target.startswith(os.path.sep):
    
    150 150
                     # Absolute link - relative to root
    
    151 151
                     location = os.path.join(root, target, tail)
    
    152
    +                return resolve_symlinks(location, root)
    
    152 153
                 else:
    
    153
    -                # Relative link - relative to symlink location
    
    154
    -                location = os.path.join(location, target)
    
    155
    -            return resolve_symlinks(location, root)
    
    154
    +                return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
    
    155
    +
    
    156 156
         # If we got here, no symlinks were found. Add on the final component and return.
    
    157 157
         location = os.path.join(location, components[-1])
    
    158 158
         return location
    
    ... ... @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
    199 199
                         pass
    
    200 200
                     else:
    
    201 201
                         assert os.path.islink(realpath)
    
    202
    -                    assert os.readlink(realpath) == content
    
    202
    +                    # We expect all storage to normalise absolute symlinks.
    
    203
    +                    depth = len(path.split(os.path.sep)) - 1
    
    204
    +                    if content.startswith(os.path.sep):
    
    205
    +                        assert os.readlink(realpath) == os.path.join(os.path.sep.join([".."] * depth), content[1:])
    
    206
    +                    else:
    
    207
    +                        assert os.readlink(realpath) == content
    
    208
    +
    
    203 209
                 elif typename == 'D':
    
    204 210
                     # We can't do any more tests than this because it
    
    205 211
                     # depends on things present in the original. Blank
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]