gget r134 - trunk/gget
- From: johans svn gnome org
- To: svn-commits-list gnome org
- Subject: gget r134 - trunk/gget
- Date: Sun, 18 Jan 2009 21:16:38 +0000 (UTC)
Author: johans
Date: Sun Jan 18 21:16:38 2009
New Revision: 134
URL: http://svn.gnome.org/viewvc/gget?rev=134&view=rev
Log:
Updated to metalink checker SVN and added support for --header command line argument.
Modified:
trunk/gget/application.py
trunk/gget/dbus_service.py
trunk/gget/dialogs.py
trunk/gget/download.py
trunk/gget/download_list.py
trunk/gget/download_manager.py
trunk/gget/metalink.py
Modified: trunk/gget/application.py
==============================================================================
--- trunk/gget/application.py (original)
+++ trunk/gget/application.py Sun Jan 18 21:16:38 2009
@@ -43,7 +43,7 @@
class Application:
def run(self):
self.__init_i18n()
- args = self.__get_options()
+ [args, headers] = self.__get_options()
gnome.init(NAME, VERSION)
gtk.gdk.threads_init()
@@ -53,9 +53,12 @@
self.download_manager = DownloadManager()
self.dbus_service = dbus_service.DBusService()
+
+ # If the DBus service is already running, add downloads using it
if not self.dbus_service.register():
for uri in args:
- self.dbus_service.download_manager.AddDownload(uri, os.getcwd())
+ self.dbus_service.download_manager.AddDownload(uri,
+ os.getcwd(), headers)
return 0
self.dbus_service.register_object(dbus_service.DOWNLOAD_MGR_OBJ_PATH,
@@ -77,10 +80,11 @@
for uri in args:
if self.config.ask_for_location:
- add = AddDownloadDialog(uri)
+ add = AddDownloadDialog(uri, headers)
add.dialog.run()
else:
- self.download_list.add_download(uri, self.config.default_folder)
+ self.download_list.add_download(uri,
+ self.config.default_folder, headers)
gtk.main()
@@ -94,20 +98,26 @@
def __get_options(self):
"""Get command line options."""
try:
- opts, args = getopt.getopt(sys.argv[1:], "dh", ["debug", "help"])
+ opts, args = getopt.getopt(sys.argv[1:], "dh", ["debug", "header=",
+ "help"])
except getopt.GetoptError:
opts = []
args = sys.argv[1:]
debug = False
+ headers = {}
for o, a in opts:
if o in ("-d", "--debug"):
debug = True
+ elif o in ("--header"):
+ kv = a.split("=")
+ if (len(kv) == 2):
+ headers[kv[0]] = kv[1]
elif o in ("-h", "--help"):
self.__print_usage()
self.config = config.Configuration(debug)
- return args
+ return [args, headers]
def __print_usage(self):
"""Output usage information and exit."""
Modified: trunk/gget/dbus_service.py
==============================================================================
--- trunk/gget/dbus_service.py (original)
+++ trunk/gget/dbus_service.py Sun Jan 18 21:16:38 2009
@@ -155,22 +155,22 @@
# Methods
- @dbus.service.method(DOWNLOAD_MGR_IFACE, in_signature='ss',
+ @dbus.service.method(DOWNLOAD_MGR_IFACE, in_signature='ssa{ss}',
out_signature='s')
- def AddDownload(self, uri, path):
+ def AddDownload(self, uri, path, headers):
utils.debug_print("Invoked DBus method: %s.%s" % (DOWNLOAD_MGR_IFACE,
"AddDownload"))
r = ""
if self.config.ask_for_location:
gtk.gdk.threads_enter()
- add = dialogs.AddDownloadDialog(uri)
+ add = dialogs.AddDownloadDialog(uri, headers)
if add.dialog.run() == 1:
download = add.download
if download:
r = DOWNLOADS_OBJ_PATH + "/" + download.id
gtk.gdk.threads_leave()
else:
- download = self.download_list.add_download(uri, path)
+ download = self.download_list.add_download(uri, path, headers)
r = DOWNLOADS_OBJ_PATH + "/" + download.id
return r
Modified: trunk/gget/dialogs.py
==============================================================================
--- trunk/gget/dialogs.py (original)
+++ trunk/gget/dialogs.py Sun Jan 18 21:16:38 2009
@@ -61,7 +61,8 @@
gnome.ui.url_show_on_screen(scheme + url, widget.get_screen())
class AddDownloadDialog:
- def __init__(self, uri=""):
+ def __init__(self, uri="", headers={}):
+ self.headers = headers
self.config = config.Configuration()
self.__get_widgets()
@@ -129,7 +130,8 @@
def __add_button_clicked(self, button):
download_list = DownloadList()
self.download = download_list.add_download(self.uri_entry.get_text(),
- self.download_filechooserbutton.get_current_folder())
+ self.download_filechooserbutton.get_current_folder(),
+ self.headers)
self.clipboard.disconnect(self.owner_change_id)
self.dialog.destroy()
Modified: trunk/gget/download.py
==============================================================================
--- trunk/gget/download.py (original)
+++ trunk/gget/download.py Sun Jan 18 21:16:38 2009
@@ -61,7 +61,8 @@
"bitrate": (gobject.SIGNAL_RUN_LAST, None, (float,)),
"status-changed": (gobject.SIGNAL_RUN_LAST, None, (int,))}
- def __init__(self, uri, path, date_started="", date_completed=""):
+ def __init__(self, uri, path, headers={}, date_started="",
+ date_completed=""):
gobject.GObject.__init__(self)
self.config = config.Configuration()
self.dbus_service = dbus_service.DBusService()
@@ -70,6 +71,7 @@
self.file_name = os.path.basename(self.uri)
self.path = path
+ self.headers = headers
if not self.config.ask_for_location:
folder = utils.get_folder_for_extension(uri)
Modified: trunk/gget/download_list.py
==============================================================================
--- trunk/gget/download_list.py (original)
+++ trunk/gget/download_list.py Sun Jan 18 21:16:38 2009
@@ -87,13 +87,13 @@
self.tree.write(file)
file.close()
- def add_download(self, uri, path=None):
+ def add_download(self, uri, path=None, headers={}):
"""Constructs a new download object and adds it to the list and xml
tree."""
if path is None:
path = self.config.default_folder
- download = Download(uri, path)
+ download = Download(uri, path, headers)
self.__append_download(download)
self.__add_download_to_xml(download)
return download
Modified: trunk/gget/download_manager.py
==============================================================================
--- trunk/gget/download_manager.py (original)
+++ trunk/gget/download_manager.py Sun Jan 18 21:16:38 2009
@@ -132,7 +132,8 @@
handlers={"status": download.update,
"bitrate": download.bitrate,
"cancel": download.is_canceled,
- "pause": download.is_paused})
+ "pause": download.is_paused},
+ headers=download.headers)
if not result:
download.set_status(ERROR)
Modified: trunk/gget/metalink.py
==============================================================================
--- trunk/gget/metalink.py (original)
+++ trunk/gget/metalink.py Sun Jan 18 21:16:38 2009
@@ -223,6 +223,7 @@
import math
import logging
import re
+import HTMLParser
import time
import subprocess
import StringIO
@@ -258,8 +259,8 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Filename: $URL: https://metalinks.svn.sourceforge.net/svnroot/metalinks/checker/checker.py $
-# Last Updated: $Date: 2008-10-21 05:06:03 +0200 (tis, 21 okt 2008) $
-# Version: $Rev: 270 $
+# Last Updated: $Date: 2009-01-11 10:32:01 +0100 (son, 11 jan 2009) $
+# Version: $Rev: 294 $
# Author(s): Neil McNab
#
# Description:
@@ -315,6 +316,30 @@
_("License") + ": " + _("GNU General Public License, Version 2") + "\n\n" + \
NAME + _(" comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions, see LICENSE.txt for details.")
+
+class Webpage(HTMLParser.HTMLParser):
+
+ def __init__(self, *args):
+ self.urls = []
+ self.url = ""
+ HTMLParser.HTMLParser.__init__(self, *args)
+
+ def set_url(self, url):
+ self.url = url
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "a":
+ for item in attrs:
+ if item[0] == "href":
+ url = item[1]
+ if not download.is_remote(item):
+ #fix relative links
+ url = download.path_join(self.url, url)
+ if not url.startswith("mailto:"):
+ self.urls.append(url)
+ #print url
+
+
class Checker:
def __init__(self):
self.threadlist = []
@@ -688,6 +713,7 @@
checker.URLCheck = URLCheck
checker.VERSION = VERSION
checker.WEBSITE = WEBSITE
+checker.Webpage = Webpage
checker._ = _
checker.translate = translate
#!/usr/bin/env python
@@ -716,7 +742,7 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Filename: $URL: https://metalinks.svn.sourceforge.net/svnroot/metalinks/checker/download.py $
-# Last Updated: $Date: 2008-11-02 03:05:35 +0100 (son, 02 nov 2008) $
+# Last Updated: $Date: 2009-01-13 09:45:07 +0100 (tis, 13 jan 2009) $
# Author(s): Neil McNab
#
# Description:
@@ -919,10 +945,10 @@
_ = translate()
-def urlopen(url, data = None, metalink=False):
- #print "URLOPEN:", url
+def urlopen(url, data = None, metalink=False, headers = {}):
+ #print "URLOPEN:", url, headers
url = complete_url(url)
- req = urllib2.Request(url, data)
+ req = urllib2.Request(url, data, headers)
req.add_header('User-agent', USER_AGENT)
req.add_header('Cache-Control', "no-cache")
req.add_header('Pragma', "no-cache")
@@ -938,12 +964,13 @@
return fp
-def urlhead(url, metalink=False):
+def urlhead(url, metalink=False, headers = {}):
'''
raise IOError for example if the URL does not exist
'''
+ #print "URLHEAD:", url, headers
url = complete_url(url)
- req = urllib2.Request(url, None)
+ req = urllib2.Request(url, None, headers)
req.add_header('User-agent', USER_AGENT)
req.add_header('Cache-Control', "no-cache")
req.add_header('Pragma', "no-cache")
@@ -973,7 +1000,7 @@
# install this opener
urllib2.install_opener(opener)
-def get(src, path, checksums = {}, force = False, handlers = {}, segmented = SEGMENTED):
+def get(src, path, checksums = {}, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
'''
Download a file, decodes metalinks.
First parameter, file to download, URL or file path to download from
@@ -987,18 +1014,18 @@
raise socket.error e.g. "Operation timed out"
'''
if src.endswith(".jigdo"):
- return download_jigdo(src, path, force, handlers, segmented)
+ return download_jigdo(src, path, force, handlers, segmented, headers)
# assume metalink if ends with .metalink
if src.endswith(".metalink"):
- return download_metalink(src, path, force, handlers, segmented)
+ return download_metalink(src, path, force, handlers, segmented, headers)
else:
# not all servers support HEAD where GET is also supported
# also a WindowsError is thrown if a local file does not exist
try:
# add head check for metalink type, if MIME_TYPE or application/xml? treat as metalink
- if urlhead(src, metalink=True)["content-type"].startswith(MIME_TYPE):
+ if urlhead(src, metalink=True, headers = headers)["content-type"].startswith(MIME_TYPE):
print _("Metalink content-type detected.")
- return download_metalink(src, path, force, handlers, segmented)
+ return download_metalink(src, path, force, handlers, segmented, headers)
except:
pass
@@ -1006,13 +1033,13 @@
# parse out filename portion here
filename = os.path.basename(src)
result = download_file(src, os.path.join(path, filename),
- 0, checksums, force, handlers, segmented = segmented)
+ 0, checksums, force, handlers, segmented = segmented, headers = headers)
if result:
return [result]
return False
def download_file(url, local_file, size=0, checksums={}, force = False,
- handlers = {}, segmented = SEGMENTED, chunksums = {}, chunk_size = 0):
+ handlers = {}, segmented = SEGMENTED, chunksums = {}, chunk_size = 0, headers = {}):
'''
url {string->URL} locations of the file
local_file string local file name to save to
@@ -1037,9 +1064,9 @@
fileobj.piecelength = chunk_size
fileobj.add_url(url)
- return download_file_urls(fileobj, force, handlers, segmented)
+ return download_file_urls(fileobj, force, handlers, segmented, headers)
-def download_file_urls(metalinkfile, force = False, handlers = {}, segmented = SEGMENTED):
+def download_file_urls(metalinkfile, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
'''
Download a file.
MetalinkFile object to download
@@ -1075,7 +1102,7 @@
seg_result = False
if segmented:
- manager = Segment_Manager(metalinkfile)
+ manager = Segment_Manager(metalinkfile, headers)
manager.set_callbacks(handlers)
seg_result = manager.run()
@@ -1084,7 +1111,7 @@
print "\n" + _("Could not download all segments of the file, trying one mirror at a time.")
if (not segmented) or (not seg_result):
- manager = NormalManager(metalinkfile)
+ manager = NormalManager(metalinkfile, headers)
manager.set_callbacks(handlers)
manager.run()
@@ -1156,7 +1183,7 @@
return 0
class NormalManager(Manager):
- def __init__(self, metalinkfile):
+ def __init__(self, metalinkfile, headers = {}):
Manager.__init__(self)
self.local_file = metalinkfile.filename
self.size = metalinkfile.size
@@ -1166,6 +1193,7 @@
self.start_number = 0
self.number = 0
self.count = 1
+ self.headers = headers
def random_start(self):
# do it the old way
@@ -1182,7 +1210,7 @@
self.status = True
remote_file = complete_url(self.urllist[self.number])
- manager = URLManager(remote_file, self.local_file, self.checksums)
+ manager = URLManager(remote_file, self.local_file, self.checksums, self.headers)
manager.set_status_callback(self.status_handler)
manager.set_cancel_callback(self.cancel_handler)
manager.set_pause_callback(self.pause_handler)
@@ -1202,7 +1230,7 @@
return False
class URLManager(Manager):
- def __init__(self, remote_file, filename, checksums = {}):
+ def __init__(self, remote_file, filename, checksums = {}, headers = {}):
'''
modernized replacement for urllib.urlretrieve() for use with proxy
'''
@@ -1220,7 +1248,7 @@
self.data = ThreadSafeFile(filename, 'wb+')
try:
- self.temp = urlopen(remote_file)
+ self.temp = urlopen(remote_file, headers = headers)
except:
self.status = False
self.close_handler()
@@ -1302,7 +1330,7 @@
print "\n" + _("Checksum failed for %s.") % os.path.basename(local_file)
return False
-def download_metalink(src, path, force = False, handlers = {}, segmented = SEGMENTED):
+def download_metalink(src, path, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
'''
Decode a metalink file, can be local or remote
First parameter, file to download, URL or file path to download from
@@ -1314,7 +1342,7 @@
'''
src = complete_url(src)
try:
- datasource = urlopen(src, metalink=True)
+ datasource = urlopen(src, metalink=True, headers = headers)
except:
return False
@@ -1327,7 +1355,7 @@
if origin != src and origin != "":
print _("Downloading update from %s") % origin
try:
- return download_metalink(origin, path, force, handlers, segmented)
+ return download_metalink(origin, path, force, handlers, segmented, headers)
except: pass
urllist = metalink.files
@@ -1342,7 +1370,7 @@
if OS == None or len(ostag) == 0 or ostag[0].lower() == OS.lower():
if "any" in LANG or len(langtag) == 0 or langtag.lower() in LANG:
- result = download_file_node(filenode, path, force, handlers, segmented)
+ result = download_file_node(filenode, path, force, handlers, segmented, headers)
if result:
results.append(result)
if len(results) == 0:
@@ -1351,7 +1379,7 @@
return results
-def download_jigdo(src, path, force = False, handlers = {}, segmented = SEGMENTED):
+def download_jigdo(src, path, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
'''
Decode a jigdo file, can be local or remote
First parameter, file to download, URL or file path to download from
@@ -1363,7 +1391,7 @@
'''
newsrc = complete_url(src)
try:
- datasource = urlopen(newsrc, metalink=True)
+ datasource = urlopen(newsrc, metalink=True, headers = headers)
except:
return False
@@ -1372,7 +1400,7 @@
datasource.close()
#print path_join(src, jigdo.template)
- template = get(path_join(src, jigdo.template), path, {"md5": jigdo.template_md5}, force, handlers, segmented)
+ template = get(path_join(src, jigdo.template), path, {"md5": jigdo.template_md5}, force, handlers, segmented, headers)
if not template:
print _("Could not download template file!")
return False
@@ -1385,7 +1413,7 @@
results = []
results.extend(template)
for filenode in urllist:
- result = download_file_node(filenode, path, force, handlers, segmented)
+ result = download_file_node(filenode, path, force, handlers, segmented, headers)
if result:
results.append(result)
if len(results) == 0:
@@ -1396,7 +1424,7 @@
return results
-def convert_jigdo(src):
+def convert_jigdo(src, headers = {}):
'''
Decode a jigdo file, can be local or remote
First parameter, file to download, URL or file path to download from
@@ -1405,7 +1433,7 @@
newsrc = complete_url(src)
try:
- datasource = urlopen(newsrc, metalink=True)
+ datasource = urlopen(newsrc, metalink=True, headers = headers)
except:
return False
@@ -1426,7 +1454,7 @@
return jigdo.generate()
-def download_file_node(item, path, force = False, handler = None, segmented=SEGMENTED):
+def download_file_node(item, path, force = False, handler = None, segmented=SEGMENTED, headers = {}):
'''
Downloads a specific version of a program
First parameter, file XML node
@@ -1460,7 +1488,7 @@
chunksums = {}
chunksums[item.piecetype] = item.pieces
- return download_file_urls(item, force, handler, segmented)
+ return download_file_urls(item, force, handler, segmented, headers)
def complete_url(url):
'''
@@ -1476,14 +1504,14 @@
return "file://" + absfile
return url
-def urlretrieve(url, filename, reporthook = None):
+def urlretrieve(url, filename, reporthook = None, headers = {}):
'''
modernized replacement for urllib.urlretrieve() for use with proxy
'''
block_size = 1024
i = 0
counter = 0
- temp = urlopen(url)
+ temp = urlopen(url, headers = headers)
headers = temp.info()
try:
@@ -1887,9 +1915,10 @@
return self.lock.release()
class Segment_Manager(Manager):
- def __init__(self, metalinkfile):
+ def __init__(self, metalinkfile, headers = {}):
Manager.__init__(self)
-
+
+ self.headers = headers
self.sockets = []
self.chunks = []
self.limit_per_host = LIMIT_PER_HOST
@@ -1952,7 +1981,7 @@
while (status == httplib.MOVED_PERMANENTLY or status == httplib.FOUND) and count < MAX_REDIRECTS:
http = Http_Host(url)
if http.conn != None:
- http.conn.request("HEAD", url)
+ http.conn.request("HEAD", url, headers = self.headers)
try:
response = http.conn.getresponse()
status = response.status
@@ -2081,7 +2110,7 @@
end = self.size
if next.protocol == "http" or next.protocol == "https":
- segment = Http_Host_Segment(next, start, end, self.size, self.get_chunksum(index))
+ segment = Http_Host_Segment(next, start, end, self.size, self.get_chunksum(index), self.headers)
segment.set_cancel_callback(self.cancel_handler)
self.chunks[index] = segment
self.segment_init(index)
@@ -2190,9 +2219,11 @@
#print item.error
if item.error == httplib.MOVED_PERMANENTLY or item.error == httplib.FOUND:
#print "location:", item.location
- newitem = copy.deepcopy(self.urls[item.url])
- newitem.url = item.location
- self.urls[item.location] = newitem
+ try:
+ newitem = copy.deepcopy(self.urls[item.url])
+ newitem.url = item.location
+ self.urls[item.location] = newitem
+ except KeyError: pass
self.filter_urls()
#print "removed %s" % item.url
@@ -2373,7 +2404,7 @@
'''
Base class for various segment protocol types. Not to be used directly.
'''
- def __init__(self, host, start, end, filesize, checksums = {}):
+ def __init__(self, host, start, end, filesize, checksums = {}, headers = {}):
threading.Thread.__init__(self)
self.host = host
self.host.set_active(True)
@@ -2391,6 +2422,7 @@
self.buffer = ""
self.temp = ""
self.cancel_handler = None
+ self.headers = headers
def set_cancel_callback(self, handler):
self.cancel_handler = handler
@@ -2604,7 +2636,8 @@
return
try:
- self.host.conn.request("GET", self.url, "", {"Range": "bytes=%lu-%lu\r\n" % (self.byte_start, self.byte_end - 1)})
+ self.headers.update({"Range": "bytes=%lu-%lu\r\n" % (self.byte_start, self.byte_end - 1)})
+ self.host.conn.request("GET", self.url, "", self.headers)
except:
self.error = _("socket exception")
self.close()
@@ -2815,6 +2848,8 @@
raise socket.error e.g. "Operation timed out"
'''
headers.update(self.headers)
+
+ #print "HTTP REQUEST:", headers
if HTTP_PROXY == "":
urlparts = urlparse.urlsplit(url)
url = urlparts.path + "?" + urlparts.query
@@ -4267,8 +4302,8 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Filename: $URL: https://metalinks.svn.sourceforge.net/svnroot/metalinks/checker/console.py $
-# Last Updated: $Date: 2008-11-25 06:50:27 +0100 (tis, 25 nov 2008) $
-# Version: $Rev: 281 $
+# Last Updated: $Date: 2009-01-13 09:45:07 +0100 (tis, 13 jan 2009) $
+# Version: $Rev: 298 $
# Author(s): Neil McNab
#
# Description:
@@ -4326,9 +4361,10 @@
parser.add_option("--gpg-binary", "-g", dest="gpg", help=_("(optional) Location of gpg binary path if not in the default search path"))
parser.add_option("--convert-jigdo", "-j", action="store_true", dest="jigdo", help=_("Convert Jigdo format file to Metalink"))
parser.add_option("--port", dest="port", help=_("Streaming server port to use (default: No streaming server)"))
+ parser.add_option("--html", dest="html", help=_("Extract links from HTML webpage"))
(options, args) = parser.parse_args()
- if options.filevar == None and len(args) == 0:
+ if options.filevar == None and len(args) == 0 and options.html == None:
parser.print_help()
return
@@ -4360,6 +4396,25 @@
print download.convert_jigdo(args[0])
return
+ if options.html:
+ handle = download.urlopen(options.html)
+ text = handle.read()
+ handle.close()
+
+ page = checker.Webpage()
+ page.set_url(options.html)
+ page.feed(text)
+
+ for item in page.urls:
+ if item.endswith(".metalink"):
+ print "=" * 79
+ print item
+ mcheck = checker.Checker()
+ mcheck.check_metalink(item)
+ results = mcheck.get_results()
+ print_totals(results)
+ return
+
if options.check:
# remove filevar eventually
mcheck = checker.Checker()
@@ -4367,7 +4422,11 @@
results = mcheck.get_results()
print_totals(results)
for item in args:
- results = checker.check_metalink(item)
+ print "=" * 79
+ print item
+ mcheck = checker.Checker()
+ mcheck.check_metalink(item)
+ results = mcheck.get_results()
print_totals(results)
return
@@ -4399,6 +4458,8 @@
mcheck.check_metalink(item)
results = mcheck.get_results()
print_totals(results)
+
+ sys.exit(0)
def print_totals(results):
for key in results.keys():
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]