[Notes] [Git][BuildStream/buildstream][jmac/cache_artifacts_with_vdir] 8 commits: Add get_size to CasBasedDirectory



Title: GitLab

Jim MacArthur pushed to branch jmac/cache_artifacts_with_vdir at BuildStream / buildstream

Commits:

5 changed files:

Changes:

  • buildstream/element.py
    ... ... @@ -1638,9 +1638,7 @@ class Element(Plugin):
    1638 1638
                     collectvdir = None
    
    1639 1639
     
    
    1640 1640
             assemblevdir = CasBasedDirectory(cas_cache=self._get_context().artifactcache.cas, ref=None)
    
    1641
    -        filesvdir = assemblevdir.descend("files", create=True)
    
    1642 1641
             logsvdir = assemblevdir.descend("logs", create=True)
    
    1643
    -        buildtreevdir = assemblevdir.descend("buildtree", create=True)
    
    1644 1642
             metavdir = assemblevdir.descend("meta", create=True)
    
    1645 1643
     
    
    1646 1644
             # Create artifact directory structure
    
    ... ... @@ -1652,14 +1650,14 @@ class Element(Plugin):
    1652 1650
             os.mkdir(metadir)
    
    1653 1651
     
    
    1654 1652
             if collect is not None and collectvdir is not None:
    
    1655
    -            filesvdir.import_files(collectvdir, can_link=True)
    
    1656
    -
    
    1653
    +            assemblevdir.fast_directory_import("files", collectvdir)
    
    1657 1654
             try:
    
    1658 1655
                 sandbox_vroot = sandbox.get_virtual_directory()
    
    1659 1656
                 sandbox_build_dir = sandbox_vroot.descend(
    
    1660 1657
                     self.get_variable('build-root').lstrip(os.sep).split(os.sep))
    
    1661
    -            buildtreevdir.import_files(sandbox_build_dir)
    
    1658
    +            assemblevdir.fast_directory_import("buildtree", sandbox_build_dir)
    
    1662 1659
             except VirtualDirectoryError:
    
    1660
    +            assemblevdir.descend("buildtree", create=True)
    
    1663 1661
                 # Directory could not be found. Pre-virtual
    
    1664 1662
                 # directory behaviour was to continue silently
    
    1665 1663
                 # if the directory could not be found.
    
    ... ... @@ -1709,10 +1707,10 @@ class Element(Plugin):
    1709 1707
             }), os.path.join(metadir, 'workspaced-dependencies.yaml'))
    
    1710 1708
     
    
    1711 1709
             metavdir.import_files(metadir)
    
    1710
    +        logsvdir.import_files(logsdir)
    
    1712 1711
     
    
    1713
    -        with self.timed_activity("Caching artifact"):
    
    1714
    -            # TODO: I don't know the artifact size! Can we get _get_dir_size to work for vdirs?
    
    1715
    -            artifact_size = 1024
    
    1712
    +        artifact_size = assemblevdir.get_size()
    
    1713
    +        with self.timed_activity("Caching artifact of size {}".format(artifact_size)):
    
    1716 1714
                 self.__artifacts.commit(self, assemblevdir, self.__get_cache_keys_for_commit())
    
    1717 1715
     
    
    1718 1716
             if collect is not None and collectvdir is None:
    

  • buildstream/storage/_casbaseddirectory.py
    ... ... @@ -350,10 +350,13 @@ class CasBasedDirectory(Directory):
    350 350
             filenode.is_executable = is_executable
    
    351 351
             self.index[filename] = IndexEntry(filenode, modified=modified or filename in self.index)
    
    352 352
     
    
    353
    -    def _copy_link_from_filesystem(self, basename, filename):
    
    354
    -        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
    
    353
    +    def _copy_link_from_filesystem(self, filesystem_path, relative_path, destination_name):
    
    354
    +        """ filesystem_path should be a full path point to the source symlink.
    
    355
    +        relative_path should be the path we're importing to, which is used to turn absolute paths into relative ones.
    
    356
    +        destination_name should be the destination name in this directory. """
    
    357
    +        self._add_new_link_direct(relative_path, destination_name, os.readlink(filesystem_path))
    
    355 358
     
    
    356
    -    def _add_new_link_direct(self, name, target):
    
    359
    +    def _add_new_link_direct(self, relative_path, name, target):
    
    357 360
             existing_link = self._find_pb2_entry(name)
    
    358 361
             if existing_link:
    
    359 362
                 symlinknode = existing_link
    
    ... ... @@ -361,8 +364,16 @@ class CasBasedDirectory(Directory):
    361 364
                 symlinknode = self.pb2_directory.symlinks.add()
    
    362 365
             assert isinstance(symlinknode, remote_execution_pb2.SymlinkNode)
    
    363 366
             symlinknode.name = name
    
    364
    -        # A symlink node has no digest.
    
    367
    +
    
    368
    +        absolute = target.startswith(CasBasedDirectory._pb2_absolute_path_prefix)
    
    369
    +        if absolute:
    
    370
    +            distance_to_root = len(relative_path.split(CasBasedDirectory._pb2_path_sep))
    
    371
    +            # TODO: Using os.path.join and _pb2_path_sep in the same place is illogical
    
    372
    +            target = os.path.join(CasBasedDirectory._pb2_path_sep.join([".."] * distance_to_root), target[1:])
    
    365 373
             symlinknode.target = target
    
    374
    +
    
    375
    +        # A symlink node has no digest.
    
    376
    +
    
    366 377
             self.index[name] = IndexEntry(symlinknode, modified=(existing_link is not None))
    
    367 378
     
    
    368 379
         def delete_entry(self, name):
    
    ... ... @@ -527,7 +538,7 @@ class CasBasedDirectory(Directory):
    527 538
                     result.combine(subdir_result)
    
    528 539
                 elif os.path.islink(import_file):
    
    529 540
                     if self._check_replacement(entry, path_prefix, result):
    
    530
    -                    self._copy_link_from_filesystem(source_directory, entry)
    
    541
    +                    self._copy_link_from_filesystem(os.path.join(source_directory, entry), path_prefix, entry)
    
    531 542
                         result.files_written.append(relative_pathname)
    
    532 543
                 elif os.path.isdir(import_file):
    
    533 544
                     # A plain directory which already exists isn't a problem; just ignore it.
    
    ... ... @@ -600,11 +611,9 @@ class CasBasedDirectory(Directory):
    600 611
                             filenode = self.pb2_directory.files.add(digest=item.digest, name=f,
    
    601 612
                                                                     is_executable=item.is_executable)
    
    602 613
                             self.index[f] = IndexEntry(filenode, modified=True)
    
    603
    -                        # TODO: No idea if we actually need this (the digest already existed...)
    
    604
    -                        self.cas_cache.add_object(path=os.path.join(path_prefix, f), digest=item.digest)
    
    605 614
                         else:
    
    606 615
                             assert isinstance(item, remote_execution_pb2.SymlinkNode)
    
    607
    -                        self._add_new_link_direct(name=f, target=item.target)
    
    616
    +                        self._add_new_link_direct(path_prefix, name=f, target=item.target)
    
    608 617
                     else:
    
    609 618
                         result.ignored.append(os.path.join(path_prefix, f))
    
    610 619
             return result
    
    ... ... @@ -838,6 +847,28 @@ class CasBasedDirectory(Directory):
    838 847
             self._recalculate_recursing_up()
    
    839 848
             self._recalculate_recursing_down()
    
    840 849
     
    
    850
    +    def get_size(self):
    
    851
    +        total = len(self.pb2_directory.SerializeToString())
    
    852
    +        for i in self.index.values():
    
    853
    +            if isinstance(i.buildstream_object, CasBasedDirectory):
    
    854
    +                total += i.buildstream_object.get_size()
    
    855
    +            elif isinstance(i.pb_object, remote_execution_pb2.FileNode):
    
    856
    +                src_name = self.cas_cache.objpath(i.pb_object.digest)
    
    857
    +                filesize = os.stat(src_name).st_size
    
    858
    +                total += filesize
    
    859
    +            # Symlink nodes are encoded as part of the directory serialization.
    
    860
    +        return total
    
    861
    +
    
    862
    +    def fast_directory_import(self, dirname, other_directory):
    
    863
    +        assert dirname not in self.index
    
    864
    +        if isinstance(other_directory, CasBasedDirectory):
    
    865
    +            self.index[dirname] = IndexEntry(other_directory.pb_object,
    
    866
    +                                             buildstream_object=other_directory.buildstream_object)
    
    867
    +        else:
    
    868
    +            # Revert to the old method.
    
    869
    +            subdir = self.descend(dirname, create=True)
    
    870
    +            subdir.import_files(other_directory, can_link=True)
    
    871
    +
    
    841 872
         def _get_identifier(self):
    
    842 873
             path = ""
    
    843 874
             if self.parent:
    

  • buildstream/storage/_filebaseddirectory.py
    ... ... @@ -125,6 +125,13 @@ class FileBasedDirectory(Directory):
    125 125
             self._mark_changed()
    
    126 126
             return import_result
    
    127 127
     
    
    128
    +    def fast_directory_import(self, dirname, other_directory):
    
    129
    +        # We can't do a fast import into a FileBasedDirectory, so this
    
    130
    +        # falls back to import_files.
    
    131
    +        assert dirname not in self.index
    
    132
    +        subdir = self.descend(dirname, create=True)
    
    133
    +        subdir.import_files(other_directory, can_link=True)
    
    134
    +
    
    128 135
         def _mark_changed(self):
    
    129 136
             self._directory_read = False
    
    130 137
     
    

  • buildstream/storage/directory.py
    ... ... @@ -99,6 +99,30 @@ class Directory():
    99 99
     
    
    100 100
             raise NotImplementedError()
    
    101 101
     
    
    102
    +    def fast_directory_import(self, dirname, other_directory):
    
    103
    +        """Import other_directory as a new directory in this one.
    
    104
    +
    
    105
    +        This is a potentially faster method than import_directory with
    
    106
    +        fewer options. dirname must not already exist, and all files
    
    107
    +        are imported unconditionally. It is assumed that it is
    
    108
    +        acceptable to use filesystem hard links to files in
    
    109
    +        other_directory. You cannot update utimes or get a
    
    110
    +        FileListResult.
    
    111
    +
    
    112
    +        This only provides a benefit if both this and other_directory
    
    113
    +        are CAS-based directories. In other cases, it will fall back
    
    114
    +        to import_directory.
    
    115
    +
    
    116
    +        Args:
    
    117
    +          dirname: The name to call the subdirectory in this
    
    118
    +          directory. This must not already exist in this directory.
    
    119
    +
    
    120
    +          other_directory: The directory to import.
    
    121
    +
    
    122
    +        """
    
    123
    +
    
    124
    +        raise NotImplementedError()
    
    125
    +
    
    102 126
         def export_files(self, to_directory, *, can_link=False, can_destroy=False):
    
    103 127
             """Copies everything from this into to_directory.
    
    104 128
     
    

  • tests/storage/virtual_directory_import.py
    ... ... @@ -149,10 +149,10 @@ def resolve_symlinks(path, root):
    149 149
                 if target.startswith(os.path.sep):
    
    150 150
                     # Absolute link - relative to root
    
    151 151
                     location = os.path.join(root, target, tail)
    
    152
    +                return resolve_symlinks(location, root)
    
    152 153
                 else:
    
    153
    -                # Relative link - relative to symlink location
    
    154
    -                location = os.path.join(location, target)
    
    155
    -            return resolve_symlinks(location, root)
    
    154
    +                return resolve_symlinks(os.path.join(os.path.join(*components[:i]), target, tail), root)
    
    155
    +
    
    156 156
         # If we got here, no symlinks were found. Add on the final component and return.
    
    157 157
         location = os.path.join(location, components[-1])
    
    158 158
         return location
    
    ... ... @@ -199,7 +199,13 @@ def _import_test(tmpdir, original, overlay, generator_function, verify_contents=
    199 199
                         pass
    
    200 200
                     else:
    
    201 201
                         assert os.path.islink(realpath)
    
    202
    -                    assert os.readlink(realpath) == content
    
    202
    +                    # We expect all storage to normalise absolute symlinks.
    
    203
    +                    depth = len(path.split(os.path.sep)) - 1
    
    204
    +                    if content.startswith(os.path.sep):
    
    205
    +                        assert os.readlink(realpath) == os.path.join(os.path.sep.join([".."] * depth), content[1:])
    
    206
    +                    else:
    
    207
    +                        assert os.readlink(realpath) == content
    
    208
    +
    
    203 209
                 elif typename == 'D':
    
    204 210
                     # We can't do any more tests than this because it
    
    205 211
                     # depends on things present in the original. Blank
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]