[gimp-web/wip/Jehan/fix-ci: 2/3] tools: HTTP calls moved to requests module instead of urllib.request.

From: Jehan <jehanp src gnome org>
To: commits-list gnome org
Cc:
Subject: [gimp-web/wip/Jehan/fix-ci: 2/3] tools: HTTP calls moved to requests module instead of urllib.request.
Date: Sun, 1 May 2022 14:13:08 +0000 (UTC)

commit ffe5aece18856cbc02ac97a41fe0b39ed2588891
Author: Jehan <jehan girinstud io>
Date:   Sun May 1 15:37:42 2022 +0200

    tools: HTTP calls moved to requests module instead of urllib.request.
    
    Both modules were used in different places in this script. Let's only
    use one, and synce Python 3 docs recommends the Requests module for
    high-level HTTP calls, then let's choose it as our one import.

 tools/downloads/gimp-check-mirrors.py | 39 ++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 17 deletions(-)
---
diff --git a/tools/downloads/gimp-check-mirrors.py b/tools/downloads/gimp-check-mirrors.py
index 2e51e972..8fd31496 100755
--- a/tools/downloads/gimp-check-mirrors.py
+++ b/tools/downloads/gimp-check-mirrors.py
@@ -7,9 +7,6 @@ import fileinput
 import hashlib
 import re
 import requests
-import urllib.request
-from urllib.error import HTTPError, URLError
-from urllib.request import urlretrieve, urlopen
 import sys
 
 # argparse for mirrorsfile and uri
@@ -115,9 +112,10 @@ for local_uri in local_uris:
     print("Checking: {}".format(local_uri))
 
     if args.verify_checksum:
-        with urlopen('https://download.gimp.org/pub/gimp/' + local_uri, timeout = 5.0) as remote_file:
-            origin_sum = hashlib.sha256()
-            origin_sum.update(remote_file.read())
+        with requests.get('https://download.gimp.org/pub/gimp/' + local_uri, stream=True) as response:
+          origin_sum = hashlib.sha256()
+          for line in response.iter_content(chunk_size=65536, decode_unicode=False):
+            origin_sum.update(line)
 
     # read mirrors file
     # fileinput.
@@ -133,31 +131,38 @@ for local_uri in local_uris:
 
             try:
                 if args.verify_checksum:
-                  request = urllib.request.Request(mirror_uri, headers={'User-Agent': 'Mozilla/5.0'})
-                  with urlopen(request, timeout = 5.0) as remote_file:
-                    checksum_text = ''
+                  with requests.get(mirror_uri, stream=True) as response:
                     m = hashlib.sha256()
-                    m.update(remote_file.read())
+                    # I don't think the chunk_size is useful, since docs
+                    # says that "stream=True will read data as it arrives
+                    # in whatever size the chunks are received" which is
+                    # the ideal way. But if it doesn't, let's use 2**16 as
+                    # a reasonable chunk size to process.
+                    for line in response.iter_content(chunk_size=65536, decode_unicode=False):
+                      m.update(line)
                     if m.digest() == origin_sum.digest():
                       checksum_text = ' (checksum OK)'
                     else:
                       checksum_text = ' (checksum KO)'
                       error_count += 1
-                    print(str(remote_file.status) + ' : ' + mirror_uri + checksum_text)
+                    print(str(response.status_code) + ' : ' + mirror_uri + checksum_text)
                 else:
                   response = requests.head(url=mirror_uri, timeout=20, allow_redirects=True)
                   print(str(response.status_code) + ' : ' + mirror_uri)
                   if response.status_code != 200:
                       error_count += 1
-            except HTTPError as error:
+            except requests.exceptions.ConnectionError as error:
                 error_count += 1
-                print(str(error.code) + ' : ' + mirror_uri)
-            except URLError as error:
-                error_count += 1
-                print(str(error.reason) + ' : ' + mirror_uri)
+                print('Connection error: ' + mirror_uri)
             except requests.exceptions.ConnectTimeout as error:
                 error_count += 1
-                print('Timed out: ' + mirror_uri)
+                print('Connection timed out: ' + mirror_uri)
+            except requests.exceptions.ReadTimeout as error:
+                error_count += 1
+                print('Read timed out: ' + mirror_uri)
+            except requests.exceptions.TooManyRedirects as error:
+                error_count += 1
+                print('Too many redirects: ' + mirror_uri)
             except OSError as error:
                 error_count += 1
                 print(str(error.strerror) + ' : ' + mirror_uri)
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]