[gimp-web] tools: optimize further the gimp-check-mirrors script.



commit 5a9443680b2b919848f042862f34ed46dfa9c391
Author: Jehan <jehan girinstud io>
Date:   Sun May 1 19:12:28 2022 +0200

    tools: optimize further the gimp-check-mirrors script.
    
    Go into even more concurrency improvements:
    - Use a single thread pool for all local URIs. In particular, when
      checking the source tarball, Windows installer and macOS DMG, we had
      to wait for the previous set of checks to be finished before starting
      the new one. Now just run them all at once and reorder results in the
      end when printing.
    - Create a new thread pool for the original checksum checks. Otherwise
      even when using a single thread pool for comparison checks, we still
      have some unneeded delay before each new hash is computed.

 tools/downloads/gimp-check-mirrors.py | 108 +++++++++++++++++++++-------------
 1 file changed, 68 insertions(+), 40 deletions(-)
---
diff --git a/tools/downloads/gimp-check-mirrors.py b/tools/downloads/gimp-check-mirrors.py
index 2ba16305..314a8fc7 100755
--- a/tools/downloads/gimp-check-mirrors.py
+++ b/tools/downloads/gimp-check-mirrors.py
@@ -94,7 +94,9 @@ def find_latest():
     return latest
 
 def verify_remote(uri, checksum):
-  success = False
+  success     = False
+  status      = None
+  checksum_ok = None
   try:
     if checksum is not None:
       with requests.get(uri, stream=True) as response:
@@ -106,30 +108,29 @@ def verify_remote(uri, checksum):
         # a reasonable chunk size to process.
         for line in response.iter_content(chunk_size=65536, decode_unicode=False):
           m.update(line)
-        if m.digest() == origin_sum.digest():
-          checksum_text = ' (checksum OK)'
+        if m.digest() == checksum:
+          checksum_ok = True
           success = True
         else:
-          checksum_text = ' (checksum KO)'
-        print(str(response.status_code) + ' : ' + uri + checksum_text)
+          checksum_ok = False
+        status = str(response.status_code)
     else:
       response = requests.head(url=uri, timeout=20, allow_redirects=True)
-      print(str(response.status_code) + ' : ' + uri)
+      status = str(response.status_code)
       if response.status_code == 200:
         success = True
   except requests.exceptions.ConnectionError as error:
-    print('Connection error: ' + uri)
+    status = 'Connection error'
   except requests.exceptions.ConnectTimeout as error:
-    print('Connection timed out: ' + uri)
+    status = 'Connection timed out'
   except requests.exceptions.ReadTimeout as error:
-    print('Read timed out: ' + uri)
+    status = 'Read timed out'
   except requests.exceptions.TooManyRedirects as error:
-    print('Too many redirects: ' + uri)
+    status = 'Too many redirects'
   except OSError as error:
-    print(str(error.strerror) + ' : ' + uri)
-  sys.stdout.flush()
+    status = str(error.strerror)
 
-  return success
+  return uri, success, status, checksum_ok
 
 if len(args.uris) == 0:
     print("No URIs given as argument. Trying to guess the last packages.")
@@ -146,39 +147,66 @@ else:
 error_count = 0
 check_count = 0
 
-for local_uri in local_uris:
-    print("Checking: {}".format(local_uri))
+def get_checksum(local_uri):
+  with requests.get('https://download.gimp.org/pub/gimp/' + local_uri, stream=True) as response:
+    origin_sum = hashlib.sha256()
+    for line in response.iter_content(chunk_size=65536, decode_unicode=False):
+      origin_sum.update(line)
+  return origin_sum.digest()
+
+origin_checksums = {}
+origin_executor  = None
+if args.verify_checksum:
+  origin_executor = concurrent.futures.ThreadPoolExecutor(max_workers=None)
+  for local_uri in local_uris:
+    origin_checksums[local_uri] = origin_executor.submit(get_checksum, local_uri)
+
+# Docs says: "If max_workers is None or not given, it will default
+# to the number of processors on the machine, multiplied by 5",
+# which is fine by us.
+with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor:
+  test_results = {}
+  for local_uri in local_uris:
+    test_results[local_uri] = []
 
     origin_checksum = None
     if args.verify_checksum:
-      with requests.get('https://download.gimp.org/pub/gimp/' + local_uri, stream=True) as response:
-        origin_sum = hashlib.sha256()
-        for line in response.iter_content(chunk_size=65536, decode_unicode=False):
-          origin_sum.update(line)
-      origin_checksum = origin_sum.digest()
-
-    test_results = []
-
-    # Docs says: "If max_workers is None or not given, it will default
-    # to the number of processors on the machine, multiplied by 5",
-    # which is fine by us.
-    with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor:
-      with fileinput.input(files=(args.mirrorsfile), mode='r') as f:
-        for line in f:
-          if args.verify_checksum and line.strip() == 'https://download.gimp.org/pub/gimp/':
-            # Passing the main server which is also in the list. It's
-            # our checksum base, we don't need to check it again.
-            continue
-          mirror_uri = line.strip() + local_uri
-          check_count += 1
-
-          future = executor.submit(verify_remote, mirror_uri, origin_checksum)
-          test_results += [future]
-    for future in test_results:
-      if not future.result():
+      origin_checksum = origin_checksums[local_uri].result()
+
+    with fileinput.input(files=(args.mirrorsfile), mode='r') as f:
+      for line in f:
+        if args.verify_checksum and line.strip() == 'https://download.gimp.org/pub/gimp/':
+          # Passing the main server which is also in the list. It's
+          # our checksum base, we don't need to check it again.
+          continue
+        mirror_uri = line.strip() + local_uri
+        check_count += 1
+
+        future = executor.submit(verify_remote, mirror_uri, origin_checksum)
+        test_results[local_uri] += [future]
+
+  # Print results!
+  for local_uri in local_uris:
+    print("Checking: {}".format(local_uri))
+    for future in concurrent.futures.as_completed(test_results[local_uri]):
+      uri, success, status, checksum_ok = future.result()
+
+      checksum_text = ''
+      if checksum_ok is not None:
+        if checksum_ok:
+          checksum_text = ' (checksum OK)'
+        else:
+          checksum_text = ' (checksum KO)'
+
+      print(status + ' : ' + uri + checksum_text)
+      sys.stdout.flush()
+      if not success:
         error_count += 1
     print()
 
+if origin_executor is not None:
+  origin_executor.shutdown()
+
 if error_count == 0:
     sys.exit(os.EX_OK)
 else:


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]