[gimp-web/wip/Jehan/issue-236-new-sponsor-page: 16/25] tools: add --verify-checksum option to gimp-check-mirrors.py.




commit 4b0435b22cd79ae40a2155406aac86c5d50bc139
Author: Jehan <jehan girinstud io>
Date:   Tue Aug 31 16:14:41 2021 +0200

    tools: add --verify-checksum option to gimp-check-mirrors.py.
    
    As the option name implies, it will not only check availability, but
    also compute the checksum of each mirror file to compare it with the
    original. This way, we can randomly check for errors or worse malevolent
    changes of our mirrored data.
    
    As expected, it is not the fastest process, so it stays an option (maybe
    a good improvement could be to make the checks multi-threaded,
    downloading several files and computing several checksums in same time).

 tools/downloads/gimp-check-mirrors.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)
---
diff --git a/tools/downloads/gimp-check-mirrors.py b/tools/downloads/gimp-check-mirrors.py
index e097081d..158aa702 100755
--- a/tools/downloads/gimp-check-mirrors.py
+++ b/tools/downloads/gimp-check-mirrors.py
@@ -3,14 +3,18 @@
 import os
 import argparse
 import fileinput
+import hashlib
 import requests
 from urllib.error import HTTPError, URLError
+from urllib.request import urlretrieve, urlopen
 import concurrent.futures
 
 # argparse for mirrorsfile and uri
 parser = argparse.ArgumentParser(description='Check if GIMP download mirrors have a file from 
download.gimp.org.')
 parser.add_argument('--mirrorsfile', metavar='<file>', default=os.path.dirname(__file__) + 
'/downloads.http.txt',
                     help='A file with one download mirror per line, either https:// or http://. Each line is 
expected to point to the equivalent of https://download.gimp.org/pub/gimp/, as some mirrors only have that 
directory structure.')
+parser.add_argument('--verify-checksum', dest='verify_checksum', action='store_true',
+                    help='Whether to do a deep verification by validating identical checksums.')
 parser.add_argument(dest='uris', metavar='<uri>', nargs='+',
                     help='One or more URIs pointing to the file on download.gimp.org, e.g. 
https://download.gimp.org/pub/gimp/v2.10/gimp-2.10.20.tar.bz2')
 
@@ -27,6 +31,10 @@ def load_url(url, timeout):
     with requests.head(url, timeout=timeout) as conn:
         return conn
 
+with urlopen('https://download.gimp.org/pub/gimp/' + dgo_uri_local, timeout = 5.0) as remote_file:
+    origin_sum = hashlib.sha256()
+    origin_sum.update(remote_file.read())
+
 # read mirrors file
 # fileinput.
 #with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
@@ -36,7 +44,18 @@ with fileinput.input(files=(args.mirrorsfile), mode='r') as f:
 
         try:
             response = requests.head(url=mirror_uri, timeout=10, allow_redirects=True)
-            print(str(response.status_code) + ' : ' + mirror_uri)
+
+            checksum_text = ''
+            if args.verify_checksum:
+              with urlopen(mirror_uri, timeout = 5.0) as remote_file:
+                m = hashlib.sha256()
+                m.update(remote_file.read())
+                if m.digest() == origin_sum.digest():
+                  checksum_text = ' (checksum OK)'
+                else:
+                  checksum_text = ' (checksum KO)'
+
+            print(str(response.status_code) + ' : ' + mirror_uri + checksum_text)
         except HTTPError as error:
             print(str(error.code) + ' : ' + mirror_uri)
         except URLError as error:


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]