[Notes] [Git][BuildStream/buildstream][jonathan/expose-downloadablefilesource] Make DownloadableFileSource clearly define public and private fields



Title: GitLab

Jonathan Maw pushed to branch jonathan/expose-downloadablefilesource at BuildStream / buildstream

Commits:

5 changed files:

Changes:

  • buildstream/downloadablefilesource.py
    ... ... @@ -2,6 +2,13 @@
    2 2
     DownloadableFileSource - Abstract class for downloading files
    
    3 3
     =============================================================
    
    4 4
     A base abstract class for source implementations which download a file.
    
    5
    +
    
    6
    +Derived classes must write their own stage() implementation, using the
    
    7
    +public APIs exposed in this class.
    
    8
    +
    
    9
    +Derived classes must also chain up to the parent method in their get_unique_key()
    
    10
    +implementations.
    
    11
    +
    
    5 12
     """
    
    6 13
     
    
    7 14
     import os
    
    ... ... @@ -19,50 +26,54 @@ class DownloadableFileSource(Source):
    19 26
     
    
    20 27
         COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref', 'etag']
    
    21 28
     
    
    29
    +    #####################################
    
    30
    +    # Implementations of abstract methods
    
    31
    +    #####################################
    
    32
    +
    
    22 33
         def configure(self, node):
    
    23
    -        self.original_url = self.node_get_member(node, str, 'url')
    
    24
    -        self.ref = self.node_get_member(node, str, 'ref', None)
    
    25
    -        self.url = self.translate_url(self.original_url)
    
    26
    -        self._warn_deprecated_etag(node)
    
    34
    +        self.__original_url = self.node_get_member(node, str, 'url')
    
    35
    +        self.__ref = self.node_get_member(node, str, 'ref', None)
    
    36
    +        self.__url = self.translate_url(self.__original_url)
    
    37
    +        self.__warn_deprecated_etag(node)
    
    27 38
     
    
    28 39
         def preflight(self):
    
    29 40
             return
    
    30 41
     
    
    31 42
         def get_unique_key(self):
    
    32
    -        return [self.original_url, self.ref]
    
    43
    +        return [self.__original_url, self.__ref]
    
    33 44
     
    
    34 45
         def get_consistency(self):
    
    35
    -        if self.ref is None:
    
    46
    +        if self.__ref is None:
    
    36 47
                 return Consistency.INCONSISTENT
    
    37 48
     
    
    38
    -        if os.path.isfile(self._get_mirror_file()):
    
    49
    +        if os.path.isfile(self.get_mirror_file()):
    
    39 50
                 return Consistency.CACHED
    
    40 51
     
    
    41 52
             else:
    
    42 53
                 return Consistency.RESOLVED
    
    43 54
     
    
    44 55
         def load_ref(self, node):
    
    45
    -        self.ref = self.node_get_member(node, str, 'ref', None)
    
    46
    -        self._warn_deprecated_etag(node)
    
    56
    +        self.__ref = self.node_get_member(node, str, 'ref', None)
    
    57
    +        self.__warn_deprecated_etag(node)
    
    47 58
     
    
    48 59
         def get_ref(self):
    
    49
    -        return self.ref
    
    60
    +        return self.__ref
    
    50 61
     
    
    51 62
         def set_ref(self, ref, node):
    
    52
    -        node['ref'] = self.ref = ref
    
    63
    +        node['ref'] = self.__ref = ref
    
    53 64
     
    
    54 65
         def track(self):
    
    55 66
             # there is no 'track' field in the source to determine what/whether
    
    56 67
             # or not to update refs, because tracking a ref is always a conscious
    
    57 68
             # decision by the user.
    
    58
    -        with self.timed_activity("Tracking {}".format(self.url),
    
    69
    +        with self.timed_activity("Tracking {}".format(self.__url),
    
    59 70
                                      silent_nested=True):
    
    60
    -            new_ref = self._ensure_mirror()
    
    71
    +            new_ref = self.ensure_mirror()
    
    61 72
     
    
    62
    -            if self.ref and self.ref != new_ref:
    
    73
    +            if self.__ref and self.__ref != new_ref:
    
    63 74
                     detail = "When tracking, new ref differs from current ref:\n" \
    
    64
    -                    + "  Tracked URL: {}\n".format(self.url) \
    
    65
    -                    + "  Current ref: {}\n".format(self.ref) \
    
    75
    +                    + "  Tracked URL: {}\n".format(self.__url) \
    
    76
    +                    + "  Current ref: {}\n".format(self.__ref) \
    
    66 77
                         + "  New ref: {}\n".format(new_ref)
    
    67 78
                     self.warn("Potential man-in-the-middle attack!", detail=detail)
    
    68 79
     
    
    ... ... @@ -74,49 +85,40 @@ class DownloadableFileSource(Source):
    74 85
             # file to be already cached because Source.fetch() will
    
    75 86
             # not be called if the source is already Consistency.CACHED.
    
    76 87
             #
    
    77
    -        if os.path.isfile(self._get_mirror_file()):
    
    88
    +        if os.path.isfile(self.get_mirror_file()):
    
    78 89
                 return  # pragma: nocover
    
    79 90
     
    
    80 91
             # Download the file, raise hell if the sha256sums don't match,
    
    81 92
             # and mirror the file otherwise.
    
    82
    -        with self.timed_activity("Fetching {}".format(self.url), silent_nested=True):
    
    83
    -            sha256 = self._ensure_mirror()
    
    84
    -            if sha256 != self.ref:
    
    93
    +        with self.timed_activity("Fetching {}".format(self.__url), silent_nested=True):
    
    94
    +            sha256 = self.ensure_mirror()
    
    95
    +            if sha256 != self.__ref:
    
    85 96
                     raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!"
    
    86
    -                                  .format(self.url, sha256, self.ref))
    
    97
    +                                  .format(self.__url, sha256, self.__ref))
    
    98
    +    ################
    
    99
    +    # Public methods
    
    100
    +    ################
    
    87 101
     
    
    88
    -    def _warn_deprecated_etag(self, node):
    
    89
    -        etag = self.node_get_member(node, str, 'etag', None)
    
    90
    -        if etag:
    
    91
    -            provenance = self.node_provenance(node, member_name='etag')
    
    92
    -            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
    
    102
    +    def ensure_mirror(self):
    
    103
    +        """Downloads from the url and caches it according to its sha256sum
    
    93 104
     
    
    94
    -    def _get_etag(self, ref):
    
    95
    -        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
    
    96
    -        if os.path.exists(etagfilename):
    
    97
    -            with open(etagfilename, 'r') as etagfile:
    
    98
    -                return etagfile.read()
    
    99
    -
    
    100
    -        return None
    
    105
    +        Returns:
    
    106
    +           (str): The sha256sum of the mirrored file
    
    101 107
     
    
    102
    -    def _store_etag(self, ref, etag):
    
    103
    -        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
    
    104
    -        with utils.save_file_atomic(etagfilename) as etagfile:
    
    105
    -            etagfile.write(etag)
    
    106
    -
    
    107
    -    def _ensure_mirror(self):
    
    108
    -        # Downloads from the url and caches it according to its sha256sum.
    
    108
    +        Raises:
    
    109
    +           :class:`.SourceError`
    
    110
    +        """
    
    109 111
             try:
    
    110 112
                 with self.tempdir() as td:
    
    111
    -                default_name = os.path.basename(self.url)
    
    112
    -                request = urllib.request.Request(self.url)
    
    113
    +                default_name = os.path.basename(self.__url)
    
    114
    +                request = urllib.request.Request(self.__url)
    
    113 115
                     request.add_header('Accept', '*/*')
    
    114 116
     
    
    115 117
                     # We do not use etag in case what we have in cache is
    
    116 118
                     # not matching ref in order to be able to recover from
    
    117 119
                     # corrupted download.
    
    118
    -                if self.ref:
    
    119
    -                    etag = self._get_etag(self.ref)
    
    120
    +                if self.__ref:
    
    121
    +                    etag = self.__get_etag(self.__ref)
    
    120 122
     
    
    121 123
                         # Do not re-download the file if the ETag matches.
    
    122 124
                         if etag and self.get_consistency() == Consistency.CACHED:
    
    ... ... @@ -134,17 +136,17 @@ class DownloadableFileSource(Source):
    134 136
                             shutil.copyfileobj(response, dest)
    
    135 137
     
    
    136 138
                     # Make sure url-specific mirror dir exists.
    
    137
    -                if not os.path.isdir(self._get_mirror_dir()):
    
    138
    -                    os.makedirs(self._get_mirror_dir())
    
    139
    +                if not os.path.isdir(self.__get_mirror_dir()):
    
    140
    +                    os.makedirs(self.__get_mirror_dir())
    
    139 141
     
    
    140 142
                     # Store by sha256sum
    
    141 143
                     sha256 = utils.sha256sum(local_file)
    
    142 144
                     # Even if the file already exists, move the new file over.
    
    143 145
                     # In case the old file was corrupted somehow.
    
    144
    -                os.rename(local_file, self._get_mirror_file(sha256))
    
    146
    +                os.rename(local_file, self.get_mirror_file(sha=sha256))
    
    145 147
     
    
    146 148
                     if etag:
    
    147
    -                    self._store_etag(sha256, etag)
    
    149
    +                    self.__store_etag(sha256, etag)
    
    148 150
                     return sha256
    
    149 151
     
    
    150 152
             except urllib.error.HTTPError as e:
    
    ... ... @@ -152,17 +154,51 @@ class DownloadableFileSource(Source):
    152 154
                     # 304 Not Modified.
    
    153 155
                     # Because we use etag only for matching ref, currently specified ref is what
    
    154 156
                     # we would have downloaded.
    
    155
    -                return self.ref
    
    157
    +                return self.__ref
    
    156 158
                 raise SourceError("{}: Error mirroring {}: {}"
    
    157
    -                              .format(self, self.url, e), temporary=True) from e
    
    159
    +                              .format(self, self.__url, e), temporary=True) from e
    
    158 160
     
    
    159 161
             except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e:
    
    160 162
                 raise SourceError("{}: Error mirroring {}: {}"
    
    161
    -                              .format(self, self.url, e), temporary=True) from e
    
    163
    +                              .format(self, self.__url, e), temporary=True) from e
    
    164
    +
    
    165
    +    def get_mirror_file(self, *, sha=None):
    
    166
    +        """Calculates the path to where this source stores the downloaded file
    
    162 167
     
    
    163
    -    def _get_mirror_dir(self):
    
    168
    +        Users are expected to read the file this points to when staging their source.
    
    169
    +
    
    170
    +        Returns:
    
    171
    +           (str): A path to the file the source should be cached at
    
    172
    +        """
    
    173
    +        return os.path.join(self.__get_mirror_dir(), sha or self.__ref)
    
    174
    +
    
    175
    +    #######################
    
    176
    +    # Local Private methods
    
    177
    +    #######################
    
    178
    +
    
    179
    +    def __get_mirror_dir(self):
    
    180
    +        # Get the directory this source should store things in, for a given URL
    
    164 181
             return os.path.join(self.get_mirror_directory(),
    
    165
    -                            utils.url_directory_name(self.original_url))
    
    182
    +                            utils.url_directory_name(self.__original_url))
    
    183
    +
    
    184
    +    def __warn_deprecated_etag(self, node):
    
    185
    +        # Warn the user if the 'etag' field is being used
    
    186
    +        etag = self.node_get_member(node, str, 'etag', None)
    
    187
    +        if etag:
    
    188
    +            provenance = self.node_provenance(node, member_name='etag')
    
    189
    +            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
    
    166 190
     
    
    167
    -    def _get_mirror_file(self, sha=None):
    
    168
    -        return os.path.join(self._get_mirror_dir(), sha or self.ref)
    191
    +    def __get_etag(self, ref):
    
    192
    +        # Retrieve the etag's data from disk
    
    193
    +        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
    
    194
    +        if os.path.exists(etagfilename):
    
    195
    +            with open(etagfilename, 'r') as etagfile:
    
    196
    +                return etagfile.read()
    
    197
    +
    
    198
    +        return None
    
    199
    +
    
    200
    +    def __store_etag(self, ref, etag):
    
    201
    +        # Write the etag's data to disk
    
    202
    +        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
    
    203
    +        with utils.save_file_atomic(etagfilename) as etagfile:
    
    204
    +            etagfile.write(etag)

  • buildstream/plugins/sources/deb.py
    ... ... @@ -70,7 +70,7 @@ class DebSource(TarSource):
    70 70
         @contextmanager
    
    71 71
         def _get_tar(self):
    
    72 72
             with ExitStack() as context:
    
    73
    -            deb_file = context.enter_context(open(self._get_mirror_file(), 'rb'))
    
    73
    +            deb_file = context.enter_context(open(self.get_mirror_file(), 'rb'))
    
    74 74
                 arpy_archive = arpy.Archive(fileobj=deb_file)
    
    75 75
                 arpy_archive.read_all_headers()
    
    76 76
                 data_tar_arpy = [v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k][0]
    

  • buildstream/plugins/sources/remote.py
    ... ... @@ -64,7 +64,13 @@ class RemoteSource(DownloadableFileSource):
    64 64
         def configure(self, node):
    
    65 65
             super().configure(node)
    
    66 66
     
    
    67
    -        self.filename = self.node_get_member(node, str, 'filename', os.path.basename(self.url))
    
    67
    +        self.filename = self.node_get_member(node, str, 'filename', None)
    
    68
    +        if not self.filename:
    
    69
    +            # url in DownloadableFileSource is private, so we read it again
    
    70
    +            original_url = self.node_get_member(node, str, 'url')
    
    71
    +            url = self.translate_url(original_url)
    
    72
    +            self.filename = os.path.basename(url)
    
    73
    +
    
    68 74
             self.executable = self.node_get_member(node, bool, 'executable', False)
    
    69 75
     
    
    70 76
             if os.sep in self.filename:
    
    ... ... @@ -81,7 +87,7 @@ class RemoteSource(DownloadableFileSource):
    81 87
             dest = os.path.join(directory, self.filename)
    
    82 88
             with self.timed_activity("Staging remote file to {}".format(dest)):
    
    83 89
     
    
    84
    -            utils.safe_copy(self._get_mirror_file(), dest)
    
    90
    +            utils.safe_copy(self.get_mirror_file(), dest)
    
    85 91
     
    
    86 92
                 # To prevent user's umask introducing variability here, explicitly set
    
    87 93
                 # file modes.
    

  • buildstream/plugins/sources/tar.py
    ... ... @@ -71,6 +71,10 @@ class TarSource(DownloadableFileSource):
    71 71
         def configure(self, node):
    
    72 72
             super().configure(node)
    
    73 73
     
    
    74
    +        # url in DownloadableFileSource is private, so we read it again
    
    75
    +        original_url = self.node_get_member(node, str, 'url')
    
    76
    +        self.url = self.translate_url(original_url)
    
    77
    +
    
    74 78
             self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None
    
    75 79
     
    
    76 80
             self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir'])
    
    ... ... @@ -88,7 +92,7 @@ class TarSource(DownloadableFileSource):
    88 92
             assert self.host_lzip
    
    89 93
             with TemporaryFile() as lzip_stdout:
    
    90 94
                 with ExitStack() as context:
    
    91
    -                lzip_file = context.enter_context(open(self._get_mirror_file(), 'r'))
    
    95
    +                lzip_file = context.enter_context(open(self.get_mirror_file(), 'r'))
    
    92 96
                     self.call([self.host_lzip, '-d'],
    
    93 97
                               stdin=lzip_file,
    
    94 98
                               stdout=lzip_stdout)
    
    ... ... @@ -103,7 +107,7 @@ class TarSource(DownloadableFileSource):
    103 107
                     with tarfile.open(fileobj=lzip_dec, mode='r:') as tar:
    
    104 108
                         yield tar
    
    105 109
             else:
    
    106
    -            with tarfile.open(self._get_mirror_file()) as tar:
    
    110
    +            with tarfile.open(self.get_mirror_file()) as tar:
    
    107 111
                     yield tar
    
    108 112
     
    
    109 113
         def stage(self, directory):
    

  • buildstream/plugins/sources/zip.py
    ... ... @@ -84,7 +84,7 @@ class ZipSource(DownloadableFileSource):
    84 84
             noexec_rights = exec_rights & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
    
    85 85
     
    
    86 86
             try:
    
    87
    -            with zipfile.ZipFile(self._get_mirror_file()) as archive:
    
    87
    +            with zipfile.ZipFile(self.get_mirror_file()) as archive:
    
    88 88
                     base_dir = None
    
    89 89
                     if self.base_dir:
    
    90 90
                         base_dir = self._find_base_dir(archive, self.base_dir)
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]