[Notes] [Git][BuildStream/buildstream][507-some-log-lines-appear-to-be-duplicates] 11 commits: _exceptions.py: Modify BstError API to allow optional retry



Title: GitLab

Tristan Maat pushed to branch 507-some-log-lines-appear-to-be-duplicates at BuildStream / buildstream

Commits:

12 changed files:

Changes:

  • buildstream/_artifactcache/cascache.py
    ... ... @@ -240,7 +240,8 @@ class CASCache(ArtifactCache):
    240 240
     
    
    241 241
                 except grpc.RpcError as e:
    
    242 242
                     if e.code() != grpc.StatusCode.NOT_FOUND:
    
    243
    -                    raise
    
    243
    +                    raise ArtifactError("Failed to pull artifact {}: {}".format(
    
    244
    +                        element._get_brief_display_key(), e)) from e
    
    244 245
     
    
    245 246
             return False
    
    246 247
     
    
    ... ... @@ -285,6 +286,7 @@ class CASCache(ArtifactCache):
    285 286
     
    
    286 287
                         except grpc.RpcError as e:
    
    287 288
                             if e.code() != grpc.StatusCode.NOT_FOUND:
    
    289
    +                            # Intentionally re-raise RpcError for outer except block.
    
    288 290
                                 raise
    
    289 291
     
    
    290 292
                         missing_blobs = {}
    
    ... ... @@ -340,7 +342,7 @@ class CASCache(ArtifactCache):
    340 342
     
    
    341 343
                 except grpc.RpcError as e:
    
    342 344
                     if e.code() != grpc.StatusCode.RESOURCE_EXHAUSTED:
    
    343
    -                    raise ArtifactError("Failed to push artifact {}: {}".format(refs, e)) from e
    
    345
    +                    raise ArtifactError("Failed to push artifact {}: {}".format(refs, e), temporary=True) from e
    
    344 346
     
    
    345 347
             return pushed
    
    346 348
     
    

  • buildstream/_exceptions.py
    ... ... @@ -99,7 +99,7 @@ class ErrorDomain(Enum):
    99 99
     #
    
    100 100
     class BstError(Exception):
    
    101 101
     
    
    102
    -    def __init__(self, message, *, detail=None, domain=None, reason=None):
    
    102
    +    def __init__(self, message, *, detail=None, domain=None, reason=None, temporary=False):
    
    103 103
             global _last_exception
    
    104 104
     
    
    105 105
             super().__init__(message)
    
    ... ... @@ -114,6 +114,11 @@ class BstError(Exception):
    114 114
             #
    
    115 115
             self.sandbox = None
    
    116 116
     
    
    117
    +        # When this exception occurred during the handling of a job, indicate
    
    118
    +        # whether or not there is any point retrying the job.
    
    119
    +        #
    
    120
    +        self.temporary = temporary
    
    121
    +
    
    117 122
             # Error domain and reason
    
    118 123
             #
    
    119 124
             self.domain = domain
    
    ... ... @@ -131,8 +136,8 @@ class BstError(Exception):
    131 136
     # or by the base :class:`.Plugin` element itself.
    
    132 137
     #
    
    133 138
     class PluginError(BstError):
    
    134
    -    def __init__(self, message, reason=None):
    
    135
    -        super().__init__(message, domain=ErrorDomain.PLUGIN, reason=reason)
    
    139
    +    def __init__(self, message, reason=None, temporary=False):
    
    140
    +        super().__init__(message, domain=ErrorDomain.PLUGIN, reason=reason, temporary=False)
    
    136 141
     
    
    137 142
     
    
    138 143
     # LoadErrorReason
    
    ... ... @@ -249,8 +254,8 @@ class SandboxError(BstError):
    249 254
     # Raised when errors are encountered in the artifact caches
    
    250 255
     #
    
    251 256
     class ArtifactError(BstError):
    
    252
    -    def __init__(self, message, *, detail=None, reason=None):
    
    253
    -        super().__init__(message, detail=detail, domain=ErrorDomain.ARTIFACT, reason=reason)
    
    257
    +    def __init__(self, message, *, detail=None, reason=None, temporary=False):
    
    258
    +        super().__init__(message, detail=detail, domain=ErrorDomain.ARTIFACT, reason=reason, temporary=True)
    
    254 259
     
    
    255 260
     
    
    256 261
     # PipelineError
    

  • buildstream/_scheduler/jobs/job.py
    ... ... @@ -35,6 +35,12 @@ from ..._exceptions import ImplError, BstError, set_last_task_error
    35 35
     from ..._message import Message, MessageType, unconditional_messages
    
    36 36
     from ... import _signals, utils
    
    37 37
     
    
    38
    +# Return code values shutdown of job handling child processes
    
    39
    +#
    
    40
    +RC_OK = 0
    
    41
    +RC_FAIL = 1
    
    42
    +RC_PERM_FAIL = 2
    
    43
    +
    
    38 44
     
    
    39 45
     # Used to distinguish between status messages and return values
    
    40 46
     class Envelope():
    
    ... ... @@ -111,6 +117,10 @@ class Job():
    111 117
             self._max_retries = max_retries        # Maximum number of automatic retries
    
    112 118
             self._result = None                    # Return value of child action in the parent
    
    113 119
             self._tries = 0                        # Try count, for retryable jobs
    
    120
    +
    
    121
    +        # If False, a retry will not be attempted regardless of whether _tries is less than _max_retries.
    
    122
    +        #
    
    123
    +        self._retry_flag = True
    
    114 124
             self._logfile = logfile
    
    115 125
             self._task_id = None
    
    116 126
     
    
    ... ... @@ -388,8 +398,9 @@ class Job():
    388 398
                     result = self.child_process()
    
    389 399
                 except BstError as e:
    
    390 400
                     elapsed = datetime.datetime.now() - starttime
    
    401
    +                self._retry_flag = e.temporary
    
    391 402
     
    
    392
    -                if self._tries <= self._max_retries:
    
    403
    +                if self._retry_flag and (self._tries <= self._max_retries):
    
    393 404
                         self.message(MessageType.FAIL,
    
    394 405
                                      "Try #{} failed, retrying".format(self._tries),
    
    395 406
                                      elapsed=elapsed)
    
    ... ... @@ -402,7 +413,10 @@ class Job():
    402 413
     
    
    403 414
                     # Report the exception to the parent (for internal testing purposes)
    
    404 415
                     self._child_send_error(e)
    
    405
    -                self._child_shutdown(1)
    
    416
    +
    
    417
    +                # Set return code based on whether or not the error was temporary.
    
    418
    +                #
    
    419
    +                self._child_shutdown(RC_FAIL if self._retry_flag else RC_PERM_FAIL)
    
    406 420
     
    
    407 421
                 except Exception as e:                        # pylint: disable=broad-except
    
    408 422
     
    
    ... ... @@ -416,7 +430,7 @@ class Job():
    416 430
                     self.message(MessageType.BUG, self.action_name,
    
    417 431
                                  elapsed=elapsed, detail=detail,
    
    418 432
                                  logfile=filename)
    
    419
    -                self._child_shutdown(1)
    
    433
    +                self._child_shutdown(RC_FAIL)
    
    420 434
     
    
    421 435
                 else:
    
    422 436
                     # No exception occurred in the action
    
    ... ... @@ -430,7 +444,7 @@ class Job():
    430 444
                     # Shutdown needs to stay outside of the above context manager,
    
    431 445
                     # make sure we dont try to handle SIGTERM while the process
    
    432 446
                     # is already busy in sys.exit()
    
    433
    -                self._child_shutdown(0)
    
    447
    +                self._child_shutdown(RC_OK)
    
    434 448
     
    
    435 449
         # _child_send_error()
    
    436 450
         #
    
    ... ... @@ -495,7 +509,8 @@ class Job():
    495 509
             message.action_name = self.action_name
    
    496 510
             message.task_id = self._task_id
    
    497 511
     
    
    498
    -        if message.message_type == MessageType.FAIL and self._tries <= self._max_retries:
    
    512
    +        if (message.message_type == MessageType.FAIL and
    
    513
    +                self._tries <= self._max_retries and self._retry_flag):
    
    499 514
                 # Job will be retried, display failures as warnings in the frontend
    
    500 515
                 message.message_type = MessageType.WARN
    
    501 516
     
    
    ... ... @@ -529,12 +544,17 @@ class Job():
    529 544
         def _parent_child_completed(self, pid, returncode):
    
    530 545
             self._parent_shutdown()
    
    531 546
     
    
    532
    -        if returncode != 0 and self._tries <= self._max_retries:
    
    547
    +        # We don't want to retry if we got OK or a permanent fail.
    
    548
    +        # This is set in _child_action but must also be set for the parent.
    
    549
    +        #
    
    550
    +        self._retry_flag = returncode not in (RC_OK, RC_PERM_FAIL)
    
    551
    +
    
    552
    +        if self._retry_flag and (self._tries <= self._max_retries):
    
    533 553
                 self.spawn()
    
    534 554
                 return
    
    535 555
     
    
    536
    -        self.parent_complete(returncode == 0, self._result)
    
    537
    -        self._scheduler.job_completed(self, returncode == 0)
    
    556
    +        self.parent_complete(returncode == RC_OK, self._result)
    
    557
    +        self._scheduler.job_completed(self, returncode == RC_OK)
    
    538 558
     
    
    539 559
         # _parent_process_envelope()
    
    540 560
         #
    

  • buildstream/buildelement.py
    1 1
     #
    
    2 2
     #  Copyright (C) 2016 Codethink Limited
    
    3
    +#  Copyright (C) 2018 Bloomberg Finance LP
    
    3 4
     #
    
    4 5
     #  This program is free software; you can redistribute it and/or
    
    5 6
     #  modify it under the terms of the GNU Lesser General Public
    
    ... ... @@ -204,8 +205,9 @@ class BuildElement(Element):
    204 205
         def prepare(self, sandbox):
    
    205 206
             commands = self.__commands['configure-commands']
    
    206 207
             if commands:
    
    207
    -            for cmd in commands:
    
    208
    -                self.__run_command(sandbox, cmd, 'configure-commands')
    
    208
    +            with self.timed_activity("Running configure-commands"):
    
    209
    +                for cmd in commands:
    
    210
    +                    self.__run_command(sandbox, cmd, 'configure-commands')
    
    209 211
     
    
    210 212
         def generate_script(self):
    
    211 213
             script = ""
    
    ... ... @@ -231,13 +233,12 @@ class BuildElement(Element):
    231 233
             return commands
    
    232 234
     
    
    233 235
         def __run_command(self, sandbox, cmd, cmd_name):
    
    234
    -        with self.timed_activity("Running {}".format(cmd_name)):
    
    235
    -            self.status("Running {}".format(cmd_name), detail=cmd)
    
    236
    -
    
    237
    -            # Note the -e switch to 'sh' means to exit with an error
    
    238
    -            # if any untested command fails.
    
    239
    -            #
    
    240
    -            exitcode = sandbox.run(['sh', '-c', '-e', cmd + '\n'],
    
    241
    -                                   SandboxFlags.ROOT_READ_ONLY)
    
    242
    -            if exitcode != 0:
    
    243
    -                raise ElementError("Command '{}' failed with exitcode {}".format(cmd, exitcode))
    236
    +        self.status("Running {}".format(cmd_name), detail=cmd)
    
    237
    +
    
    238
    +        # Note the -e switch to 'sh' means to exit with an error
    
    239
    +        # if any untested command fails.
    
    240
    +        #
    
    241
    +        exitcode = sandbox.run(['sh', '-c', '-e', cmd + '\n'],
    
    242
    +                               SandboxFlags.ROOT_READ_ONLY)
    
    243
    +        if exitcode != 0:
    
    244
    +            raise ElementError("Command '{}' failed with exitcode {}".format(cmd, exitcode))

  • buildstream/element.py
    ... ... @@ -140,9 +140,10 @@ class ElementError(BstError):
    140 140
            message (str): The error message to report to the user
    
    141 141
            detail (str): A possibly multiline, more detailed error message
    
    142 142
            reason (str): An optional machine readable reason string, used for test cases
    
    143
    +       temporary (bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.2*)
    
    143 144
         """
    
    144
    -    def __init__(self, message, *, detail=None, reason=None):
    
    145
    -        super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason)
    
    145
    +    def __init__(self, message, *, detail=None, reason=None, temporary=False):
    
    146
    +        super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason, temporary=temporary)
    
    146 147
     
    
    147 148
     
    
    148 149
     class Element(Plugin):
    

  • buildstream/plugin.py
    ... ... @@ -478,13 +478,15 @@ class Plugin():
    478 478
                                                silent_nested=silent_nested):
    
    479 479
                 yield
    
    480 480
     
    
    481
    -    def call(self, *popenargs, fail=None, **kwargs):
    
    481
    +    def call(self, *popenargs, fail=None, fail_temporarily=False, **kwargs):
    
    482 482
             """A wrapper for subprocess.call()
    
    483 483
     
    
    484 484
             Args:
    
    485 485
                popenargs (list): Popen() arguments
    
    486 486
                fail (str): A message to display if the process returns
    
    487 487
                            a non zero exit code
    
    488
    +           fail_temporarily (bool): Whether any exceptions should
    
    489
    +                                    be raised as temporary. (*Since: 1.2*)
    
    488 490
                rest_of_args (kwargs): Remaining arguments to subprocess.call()
    
    489 491
     
    
    490 492
             Returns:
    
    ... ... @@ -507,16 +509,18 @@ class Plugin():
    507 509
                   "Failed to download ponies from {}".format(
    
    508 510
                       self.mirror_directory))
    
    509 511
             """
    
    510
    -        exit_code, _ = self.__call(*popenargs, fail=fail, **kwargs)
    
    512
    +        exit_code, _ = self.__call(*popenargs, fail=fail, fail_temporarily=fail_temporarily, **kwargs)
    
    511 513
             return exit_code
    
    512 514
     
    
    513
    -    def check_output(self, *popenargs, fail=None, **kwargs):
    
    515
    +    def check_output(self, *popenargs, fail=None, fail_temporarily=False, **kwargs):
    
    514 516
             """A wrapper for subprocess.check_output()
    
    515 517
     
    
    516 518
             Args:
    
    517 519
                popenargs (list): Popen() arguments
    
    518 520
                fail (str): A message to display if the process returns
    
    519 521
                            a non zero exit code
    
    522
    +           fail_temporarily (bool): Whether any exceptions should
    
    523
    +                                    be raised as temporary. (*Since: 1.2*)
    
    520 524
                rest_of_args (kwargs): Remaining arguments to subprocess.call()
    
    521 525
     
    
    522 526
             Returns:
    
    ... ... @@ -555,7 +559,7 @@ class Plugin():
    555 559
                   raise SourceError(
    
    556 560
                       fmt.format(plugin=self, track=tracking)) from e
    
    557 561
             """
    
    558
    -        return self.__call(*popenargs, collect_stdout=True, fail=fail, **kwargs)
    
    562
    +        return self.__call(*popenargs, collect_stdout=True, fail=fail, fail_temporarily=fail_temporarily, **kwargs)
    
    559 563
     
    
    560 564
         #############################################################
    
    561 565
         #            Private Methods used in BuildStream            #
    
    ... ... @@ -619,7 +623,7 @@ class Plugin():
    619 623
     
    
    620 624
         # Internal subprocess implementation for the call() and check_output() APIs
    
    621 625
         #
    
    622
    -    def __call(self, *popenargs, collect_stdout=False, fail=None, **kwargs):
    
    626
    +    def __call(self, *popenargs, collect_stdout=False, fail=None, fail_temporarily=False, **kwargs):
    
    623 627
     
    
    624 628
             with self._output_file() as output_file:
    
    625 629
                 if 'stdout' not in kwargs:
    
    ... ... @@ -634,7 +638,8 @@ class Plugin():
    634 638
                 exit_code, output = utils._call(*popenargs, **kwargs)
    
    635 639
     
    
    636 640
                 if fail and exit_code:
    
    637
    -                raise PluginError("{plugin}: {message}".format(plugin=self, message=fail))
    
    641
    +                raise PluginError("{plugin}: {message}".format(plugin=self, message=fail),
    
    642
    +                                  temporary=fail_temporarily)
    
    638 643
     
    
    639 644
             return (exit_code, output)
    
    640 645
     
    

  • buildstream/plugins/sources/_downloadablefilesource.py
    ... ... @@ -150,11 +150,11 @@ class DownloadableFileSource(Source):
    150 150
                     # we would have downloaded.
    
    151 151
                     return self.ref
    
    152 152
                 raise SourceError("{}: Error mirroring {}: {}"
    
    153
    -                              .format(self, self.url, e)) from e
    
    153
    +                              .format(self, self.url, e), temporary=True) from e
    
    154 154
     
    
    155 155
             except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError) as e:
    
    156 156
                 raise SourceError("{}: Error mirroring {}: {}"
    
    157
    -                              .format(self, self.url, e)) from e
    
    157
    +                              .format(self, self.url, e), temporary=True) from e
    
    158 158
     
    
    159 159
         def _get_mirror_dir(self):
    
    160 160
             return os.path.join(self.get_mirror_directory(),
    

  • buildstream/plugins/sources/git.py
    ... ... @@ -113,7 +113,8 @@ class GitMirror():
    113 113
                 #
    
    114 114
                 with self.source.tempdir() as tmpdir:
    
    115 115
                     self.source.call([self.source.host_git, 'clone', '--mirror', '-n', self.url, tmpdir],
    
    116
    -                                 fail="Failed to clone git repository {}".format(self.url))
    
    116
    +                                 fail="Failed to clone git repository {}".format(self.url),
    
    117
    +                                 fail_temporarily=True)
    
    117 118
     
    
    118 119
                     try:
    
    119 120
                         shutil.move(tmpdir, self.mirror)
    
    ... ... @@ -124,6 +125,7 @@ class GitMirror():
    124 125
         def fetch(self):
    
    125 126
             self.source.call([self.source.host_git, 'fetch', 'origin', '--prune'],
    
    126 127
                              fail="Failed to fetch from remote git repository: {}".format(self.url),
    
    128
    +                         fail_temporarily=True,
    
    127 129
                              cwd=self.mirror)
    
    128 130
     
    
    129 131
         def has_ref(self):
    
    ... ... @@ -157,7 +159,8 @@ class GitMirror():
    157 159
             # case we're just checking out a specific commit and then removing the .git/
    
    158 160
             # directory.
    
    159 161
             self.source.call([self.source.host_git, 'clone', '--no-checkout', '--shared', self.mirror, fullpath],
    
    160
    -                         fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath))
    
    162
    +                         fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath),
    
    163
    +                         fail_temporarily=True)
    
    161 164
     
    
    162 165
             self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
    
    163 166
                              fail="Failed to checkout git ref {}".format(self.ref),
    
    ... ... @@ -170,7 +173,8 @@ class GitMirror():
    170 173
             fullpath = os.path.join(directory, self.path)
    
    171 174
     
    
    172 175
             self.source.call([self.source.host_git, 'clone', '--no-checkout', self.mirror, fullpath],
    
    173
    -                         fail="Failed to clone git mirror {} in directory: {}".format(self.mirror, fullpath))
    
    176
    +                         fail="Failed to clone git mirror {} in directory: {}".format(self.mirror, fullpath),
    
    177
    +                         fail_temporarily=True)
    
    174 178
     
    
    175 179
             self.source.call([self.source.host_git, 'remote', 'set-url', 'origin', self.url],
    
    176 180
                              fail='Failed to add remote origin "{}"'.format(self.url),
    

  • buildstream/source.py
    ... ... @@ -108,9 +108,10 @@ class SourceError(BstError):
    108 108
            message (str): The breif error description to report to the user
    
    109 109
            detail (str): A possibly multiline, more detailed error message
    
    110 110
            reason (str): An optional machine readable reason string, used for test cases
    
    111
    +       temporary (bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.2*)
    
    111 112
         """
    
    112
    -    def __init__(self, message, *, detail=None, reason=None):
    
    113
    -        super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason)
    
    113
    +    def __init__(self, message, *, detail=None, reason=None, temporary=False):
    
    114
    +        super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason, temporary=temporary)
    
    114 115
     
    
    115 116
     
    
    116 117
     class Source(Plugin):
    

  • tests/sources/deb.py
    ... ... @@ -45,7 +45,7 @@ def test_no_ref(cli, tmpdir, datafiles):
    45 45
         assert cli.get_element_state(project, 'target.bst') == 'no reference'
    
    46 46
     
    
    47 47
     
    
    48
    -# Test that when I fetch a nonexistent URL, errors are handled gracefully.
    
    48
    +# Test that when I fetch a nonexistent URL, errors are handled gracefully and a retry is performed.
    
    49 49
     @pytest.mark.skipif(HAVE_ARPY is False, reason="arpy is not available")
    
    50 50
     @pytest.mark.datafiles(os.path.join(DATA_DIR, 'fetch'))
    
    51 51
     def test_fetch_bad_url(cli, tmpdir, datafiles):
    
    ... ... @@ -56,6 +56,7 @@ def test_fetch_bad_url(cli, tmpdir, datafiles):
    56 56
         result = cli.run(project=project, args=[
    
    57 57
             'fetch', 'target.bst'
    
    58 58
         ])
    
    59
    +    assert "Try #" in result.stderr
    
    59 60
         result.assert_main_error(ErrorDomain.STREAM, None)
    
    60 61
         result.assert_task_error(ErrorDomain.SOURCE, None)
    
    61 62
     
    

  • tests/sources/tar.py
    ... ... @@ -56,7 +56,7 @@ def test_no_ref(cli, tmpdir, datafiles):
    56 56
         assert cli.get_element_state(project, 'target.bst') == 'no reference'
    
    57 57
     
    
    58 58
     
    
    59
    -# Test that when I fetch a nonexistent URL, errors are handled gracefully.
    
    59
    +# Test that when I fetch a nonexistent URL, errors are handled gracefully and a retry is performed.
    
    60 60
     @pytest.mark.datafiles(os.path.join(DATA_DIR, 'fetch'))
    
    61 61
     def test_fetch_bad_url(cli, tmpdir, datafiles):
    
    62 62
         project = os.path.join(datafiles.dirname, datafiles.basename)
    
    ... ... @@ -66,6 +66,7 @@ def test_fetch_bad_url(cli, tmpdir, datafiles):
    66 66
         result = cli.run(project=project, args=[
    
    67 67
             'fetch', 'target.bst'
    
    68 68
         ])
    
    69
    +    assert "Try #" in result.stderr
    
    69 70
         result.assert_main_error(ErrorDomain.STREAM, None)
    
    70 71
         result.assert_task_error(ErrorDomain.SOURCE, None)
    
    71 72
     
    

  • tests/sources/zip.py
    ... ... @@ -43,7 +43,7 @@ def test_no_ref(cli, tmpdir, datafiles):
    43 43
         assert cli.get_element_state(project, 'target.bst') == 'no reference'
    
    44 44
     
    
    45 45
     
    
    46
    -# Test that when I fetch a nonexistent URL, errors are handled gracefully.
    
    46
    +# Test that when I fetch a nonexistent URL, errors are handled gracefully and a retry is performed.
    
    47 47
     @pytest.mark.datafiles(os.path.join(DATA_DIR, 'fetch'))
    
    48 48
     def test_fetch_bad_url(cli, tmpdir, datafiles):
    
    49 49
         project = os.path.join(datafiles.dirname, datafiles.basename)
    
    ... ... @@ -53,6 +53,7 @@ def test_fetch_bad_url(cli, tmpdir, datafiles):
    53 53
         result = cli.run(project=project, args=[
    
    54 54
             'fetch', 'target.bst'
    
    55 55
         ])
    
    56
    +    assert "Try #" in result.stderr
    
    56 57
         result.assert_main_error(ErrorDomain.STREAM, None)
    
    57 58
         result.assert_task_error(ErrorDomain.SOURCE, None)
    
    58 59
     
    



  • [Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]