[gimp-web] tools: improve gimp-check-mirrors.py.



commit d69e11c41385604281c6ae44b14b4f0873a68c8e
Author: Jehan <jehan girinstud io>
Date:   Fri Sep 24 20:07:12 2021 +0200

    tools: improve gimp-check-mirrors.py.
    
    - Make it accept a list of URIs: there was already some logics for this
      in the defined argument, but only the last URI of the list was
      actually checked.
    - Accept no URIs given in command line: when this happens, we try and
      detect the last stable version available (based on the download server
      vX.Y/ directories which we parse with regexp). We also check if a
      Windows installer or macOS DMG is available for this version and add
      the last revisions for these in the list of URIs to check.
    
    Note that we could also use content/gimp_versions.json to determine the
    last versions instead. Actually we should probably do both (in a future
    version of the script) to make sure that the website data is consistent
    with the download server.

 tools/downloads/gimp-check-mirrors.py | 188 ++++++++++++++++++++++++----------
 1 file changed, 134 insertions(+), 54 deletions(-)
---
diff --git a/tools/downloads/gimp-check-mirrors.py b/tools/downloads/gimp-check-mirrors.py
index 208d3f2e..208ab46b 100755
--- a/tools/downloads/gimp-check-mirrors.py
+++ b/tools/downloads/gimp-check-mirrors.py
@@ -2,12 +2,13 @@
 
 import os
 import argparse
+#import concurrent.futures
 import fileinput
 import hashlib
+import re
 import requests
 from urllib.error import HTTPError, URLError
 from urllib.request import urlretrieve, urlopen
-import concurrent.futures
 import sys
 
 # argparse for mirrorsfile and uri
@@ -16,70 +17,149 @@ parser.add_argument('--mirrorsfile', metavar='<file>', default=os.path.dirname(_
                     help='A file with one download mirror per line, either https:// or http://. Each line is 
expected to point to the equivalent of https://download.gimp.org/pub/gimp/, as some mirrors only have that 
directory structure.')
 parser.add_argument('--verify-checksum', dest='verify_checksum', action='store_true',
                     help='Whether to do a deep verification by validating identical checksums.')
-parser.add_argument(dest='uris', metavar='<uri>', nargs='+',
+parser.add_argument(dest='uris', metavar='<uri>', nargs='*',
                     help='One or more URIs pointing to the file on download.gimp.org, e.g. 
https://download.gimp.org/pub/gimp/v2.10/gimp-2.10.20.tar.bz2')
 
 args = parser.parse_args()
 
-# get local path
-for uri in args.uris:
-    dgo_uri = uri
-    dgo_uri_local = dgo_uri.replace('https://download.gimp.org/pub/gimp/', '')
-    dgo_uri_local = dgo_uri_local.replace('https://download.gimp.org/mirror/pub/gimp/', '')
-pass
-
 def load_url(url, timeout):
     with requests.head(url, timeout=timeout) as conn:
         return conn
 
-if args.verify_checksum:
-    with urlopen('https://download.gimp.org/pub/gimp/' + dgo_uri_local, timeout = 5.0) as remote_file:
-        origin_sum = hashlib.sha256()
-        origin_sum.update(remote_file.read())
+def find_latest():
+    # Just parse the HTML with some regexp as it is not xhtml and it
+    # cannot be parsed by a proper XML parser.
+    latest = []
+    base_url = 'https://download.gimp.org/pub/gimp/'
+
+    # Find latest series.
+    major = 0
+    minor = 0
+    html = requests.get(base_url).text
+    pattern = re.compile('<a href="v([0-9]+)\.([0-9]+)/"')
+    for match in pattern.finditer(html):
+      if int(match.group(2)) % 2 == 1:
+        # Ignore development versions.
+        pass
+      elif int(match.group(1)) > major:
+        major = int(match.group(1))
+        minor = int(match.group(2))
+      elif int(match.group(1)) == major and int(match.group(2)) > minor:
+        minor = int(match.group(2))
+
+    # Find latest tarball.
+    micro = 0
+    base_path = 'v{}.{}/'.format(major, minor)
+    html = requests.get('{}{}'.format(base_url, base_path)).text
+    pattern = re.compile('<a href="0.0_LATEST-IS-{}.{}.([0-9]+)"'.format(major, minor))
+    match = pattern.search(html)
+    micro = int(match.group(1))
+
+    latest = [ "{}gimp-{}.{}.{}.tar.bz2".format(base_path, major, minor, micro) ]
+
+    # Find latest Windows installer
+    found = False
+    rev = None
+    path = '{}windows/'.format(base_path)
+    html = requests.get('{}{}'.format(base_url, path)).text
+    pattern = re.compile('<a href="gimp-{}\\.{}\\.{}-setup(-([0-9]+))?\\.exe"'.format(major, minor, micro))
+    for m in pattern.finditer(html):
+      found = True
+      if m.group(2) is not None:
+        if rev is None or rev < int(m.group(2)):
+          rev = int(m.group(2))
+
+    if found:
+      latest += ["{}gimp-{}.{}.{}-setup{}.exe".format(path, major, minor, micro,
+                                                      '-{}'.format(rev) if rev is not None else '')]
+    else:
+      sys.stderr.write("WARNING: no Windows installer.\n\n")
+
+    # Find latest macOS DMG
+    found = False
+    rev = None
+    path = '{}macos/'.format(base_path)
+    html = requests.get('{}{}'.format(base_url, path)).text
+    pattern = re.compile('<a href="gimp-{}\\.{}\\.{}-x86_64(-([0-9]+))?\\.dmg"'.format(major, minor, micro))
+    for m in pattern.finditer(html):
+      found = True
+      if m.group(2) is not None:
+        if rev is None or rev < int(m.group(2)):
+          rev = int(m.group(2))
+
+    if found:
+      latest += ["{}gimp-{}.{}.{}-x86_64{}.dmg".format(path, major, minor, micro,
+                                                       '-{}'.format(rev) if rev is not None else '')]
+    else:
+      sys.stderr.write("WARNING: no macOS DMG package.\n\n")
+
+    return latest
+
+if len(args.uris) == 0:
+    print("No URIs given as argument. Trying to guess the last packages.")
+    local_uris = find_latest()
+else:
+    # get local path
+    local_uris = []
+    for uri in args.uris:
+        dgo_uri = uri
+        dgo_uri_local = dgo_uri.replace('https://download.gimp.org/pub/gimp/', '')
+        dgo_uri_local = dgo_uri_local.replace('https://download.gimp.org/mirror/pub/gimp/', '')
+        local_uris += [ dgo_uri_local ]
 
 error_count = 0
 
-# read mirrors file
-# fileinput.
-#with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
-with fileinput.input(files=(args.mirrorsfile), mode='r') as f:
-    for line in f:
-        mirror_uri = line.strip() + dgo_uri_local
-
-        try:
-            response = requests.head(url=mirror_uri, timeout=10, allow_redirects=True)
-
-            checksum_text = ''
-            if args.verify_checksum:
-              with urlopen(mirror_uri, timeout = 5.0) as remote_file:
-                m = hashlib.sha256()
-                m.update(remote_file.read())
-                if m.digest() == origin_sum.digest():
-                  checksum_text = ' (checksum OK)'
-                else:
-                  checksum_text = ' (checksum KO)'
-                  error_count += 1
-
-            print(str(response.status_code) + ' : ' + mirror_uri + checksum_text)
-            if response.status_code != 200:
+for local_uri in local_uris:
+    print("Checking: {}".format(local_uri))
+
+    if args.verify_checksum:
+        with urlopen('https://download.gimp.org/pub/gimp/' + local_uri, timeout = 5.0) as remote_file:
+            origin_sum = hashlib.sha256()
+            origin_sum.update(remote_file.read())
+
+    # read mirrors file
+    # fileinput.
+    #with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
+    with fileinput.input(files=(args.mirrorsfile), mode='r') as f:
+        for line in f:
+            mirror_uri = line.strip() + local_uri
+
+            try:
+                response = requests.head(url=mirror_uri, timeout=10, allow_redirects=True)
+
+                checksum_text = ''
+                if args.verify_checksum:
+                  with urlopen(mirror_uri, timeout = 5.0) as remote_file:
+                    m = hashlib.sha256()
+                    m.update(remote_file.read())
+                    if m.digest() == origin_sum.digest():
+                      checksum_text = ' (checksum OK)'
+                    else:
+                      checksum_text = ' (checksum KO)'
+                      error_count += 1
+
+                print(str(response.status_code) + ' : ' + mirror_uri + checksum_text)
+                if response.status_code != 200:
+                    error_count += 1
+            except HTTPError as error:
                 error_count += 1
-        except HTTPError as error:
-            error_count += 1
-            print(str(error.code) + ' : ' + mirror_uri)
-        except URLError as error:
-            error_count += 1
-            print(str(error.reason) + ' : ' + mirror_uri)
-        except requests.exceptions.ConnectTimeout as error:
-            error_count += 1
-            print('Timed out: ' + mirror_uri)
-        except OSError as error:
-            error_count += 1
-            print(str(error.strerror) + ' : ' + mirror_uri)
-    if error_count == 0:
-        sys.exit(os.EX_OK)
-    else:
-        sys.stderr.write("{} errors reported.\n".format(error_count))
-        # Return negative error count as information error code.
-        sys.exit(- error_count)
+                print(str(error.code) + ' : ' + mirror_uri)
+            except URLError as error:
+                error_count += 1
+                print(str(error.reason) + ' : ' + mirror_uri)
+            except requests.exceptions.ConnectTimeout as error:
+                error_count += 1
+                print('Timed out: ' + mirror_uri)
+            except OSError as error:
+                error_count += 1
+                print(str(error.strerror) + ' : ' + mirror_uri)
+        print()
+
+if error_count == 0:
+    sys.exit(os.EX_OK)
+else:
+    sys.stderr.write("{} errors reported.\n".format(error_count))
+    # Return negative error count as information error code.
+    sys.exit(- error_count)
 
 sys.exit(os.EX_DATAERR)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]