[gimp-web/wip/Jehan/fix-ci: 1/3] tools: don't wait forever for long downloads.




commit 00727e0f8b1e5eeae1f06242494249ccd550add5
Author: Jehan <jehan girinstud io>
Date:   Sun May 1 21:32:56 2022 +0200

    tools: don't wait forever for long downloads.
    
    Despite optimizing the script, it was still taking more than an hour on
    most CI jobs… until it took 8 minutes in one random jobs! Adding more
    debug output, it looks like some downloads are just taking forever, so
    let's not wait for them if it is unreasonably long and count it as error
    after a timeout.
    
    Note that even though some mirrors seem consistently slower, there might
    be some bottlenecks on runner side too. In particular, the time where it
    took only 8 minutes for the whole check, I was using a different runner
    than all the failed ones (for the record, the efficient runner is: #268
    (xncHmCHa) progress.gnome.org).

 tools/downloads/gimp-check-mirrors.py | 45 +++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 18 deletions(-)
---
diff --git a/tools/downloads/gimp-check-mirrors.py b/tools/downloads/gimp-check-mirrors.py
index 1a8aab00..6941d7f7 100755
--- a/tools/downloads/gimp-check-mirrors.py
+++ b/tools/downloads/gimp-check-mirrors.py
@@ -149,8 +149,9 @@ else:
         dgo_uri_local = dgo_uri_local.replace('https://download.gimp.org/mirror/pub/gimp/', '')
         local_uris += [ dgo_uri_local ]
 
-error_count = 0
-check_count = 0
+failed_checks = 0
+error_count   = 0
+check_count   = 0
 
 def get_checksum(local_uri):
   start = time.monotonic()
@@ -196,20 +197,28 @@ with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor:
   # Print results!
   for local_uri in local_uris:
     print("Checking: {}".format(local_uri))
-    for future in concurrent.futures.as_completed(test_results[local_uri]):
-      uri, success, status, checksum_ok = future.result()
-
-      checksum_text = ''
-      if checksum_ok is not None:
-        if checksum_ok:
-          checksum_text = ' (checksum OK)'
-        else:
-          checksum_text = ' (checksum KO)'
-
-      print(status + ' : ' + uri + checksum_text)
-      sys.stdout.flush()
-      if not success:
-        error_count += 1
+    checked = 0
+    try:
+      # Waiting 10 min which is already a lot!
+      for future in concurrent.futures.as_completed(test_results[local_uri], timeout=600):
+        checked += 1
+        uri, success, status, checksum_ok = future.result()
+
+        checksum_text = ''
+        if checksum_ok is not None:
+          if checksum_ok:
+            checksum_text = ' (checksum OK)'
+          else:
+            checksum_text = ' (checksum KO)'
+
+        print(status + ' : ' + uri + checksum_text)
+        sys.stdout.flush()
+        if not success:
+          error_count += 1
+    except concurrent.futures.TimeoutError:
+      failed = check_count / 3 - checked
+      print("Some downloads took too long. Dropping {} checks.".format(failed))
+      failed_checks += failed
     print()
 
 if origin_executor is not None:
@@ -218,8 +227,8 @@ if origin_executor is not None:
 if error_count == 0:
     sys.exit(os.EX_OK)
 else:
-    sys.stderr.write("{} / {} errors reported.\n".format(error_count, check_count))
+    sys.stderr.write("{} / {} errors reported.\n".format(error_count + failed_checks, check_count))
     # Return negative error count as information error code.
-    sys.exit(- error_count)
+    sys.exit(- error_count - failed_checks)
 
 sys.exit(os.EX_DATAERR)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]