urlfetcher: Switch to requests and urllib2 instead of urlgrabber

urlgrabber is largely dead upstream and isn't going to be ported to
python3 AFAIK. So we will need to move off of it eventually.

Use requests for http handling which is the most common library nowadays,
and just plain old urllib2 for ftp fetching.
This commit is contained in:
Cole Robinson 2015-09-18 18:31:56 -04:00
parent 9a4f15f4ad
commit 5584863d18
3 changed files with 94 additions and 37 deletions

View File

@ -70,6 +70,7 @@ Group: Applications/Emulators
# however varying amounts of functionality will not be enabled.
Requires: libvirt-python >= 0.7.0
Requires: libxml2-python
Requires: python-requests
Requires: python-urlgrabber
Requires: python-ipaddr
Requires: libosinfo >= 0.2.10

View File

@ -250,6 +250,8 @@ def setupLogging(appname, debug_stdout, do_quiet, cli_app=True):
sys.__excepthook__(typ, val, tb)
sys.excepthook = exception_log
logging.getLogger("requests").setLevel(logging.ERROR)
# Log the app command string
logging.debug("Launched with command line: %s", " ".join(sys.argv))

View File

@ -24,6 +24,7 @@ import ftplib
import logging
import os
import re
import requests
import stat
import StringIO
import subprocess
@ -31,8 +32,6 @@ import tempfile
import urllib2
import urlparse
import urlgrabber.grabber as grabber
from .osdict import OSDB
@ -45,6 +44,8 @@ class _URLFetcher(object):
This is a generic base class for fetching/extracting files from
a media source, such as CD ISO, NFS server, or HTTP/FTP server
"""
_block_size = 16384
def __init__(self, location, scratchdir, meter):
self.location = location
self.scratchdir = scratchdir
@ -54,6 +55,7 @@ class _URLFetcher(object):
logging.debug("Using scratchdir=%s", scratchdir)
####################
# Internal helpers #
####################
@ -71,33 +73,49 @@ class _URLFetcher(object):
ret += "/"
return ret + filename
def _writeURLToFileobj(self, urlobj, fileobj):
def _grabURL(self, filename, fileobj):
"""
Write the urlobj contents into the passed python style file object
"""
block_size = 16384
while 1:
buff = urlobj.read(block_size)
if not buff:
break
fileobj.write(buff)
def _grabURL(self, filename):
"""
Return the urlobj from grabber.urlopen
Download the filename from self.location, and write contents to
fileobj
"""
url = self._make_full_url(filename)
base = os.path.basename(filename)
logging.debug("Fetching URI: %s", url)
try:
return grabber.urlopen(url,
progress_obj=self.meter,
text=_("Retrieving file %s...") % base)
urlobj, size = self._grabber(url)
except Exception, e:
raise ValueError(_("Couldn't acquire file %s: %s") %
(url, str(e)))
logging.debug("Fetching URI: %s", url)
self.meter.start(
text=_("Retrieving file %s...") % os.path.basename(filename),
size=size)
total = self._write(urlobj, fileobj)
self.meter.end(total)
def _write(self, urlobj, fileobj):
"""
Write the contents of urlobj to python file like object fileobj
"""
total = 0
while 1:
buff = urlobj.read(self._block_size)
if not buff:
break
fileobj.write(buff)
total += len(buff)
self.meter.update(total)
return total
def _grabber(self, url):
"""
Returns the urlobj, size for the passed URL. urlobj is whatever
data needs to be passed to self._write
"""
raise NotImplementedError("must be implemented in subclass")
##############
# Public API #
##############
@ -125,43 +143,64 @@ class _URLFetcher(object):
Grab the passed filename from self.location and save it to
a temporary file, returning the temp filename
"""
urlobj = self._grabURL(filename)
prefix = "virtinst-" + os.path.basename(filename) + "."
if "VIRTINST_TEST_SUITE" in os.environ:
filename = os.path.join("/tmp", prefix)
fileobj = file(filename, "w+b")
fn = os.path.join("/tmp", prefix)
fileobj = file(fn, "w")
else:
fileobj = tempfile.NamedTemporaryFile(
dir=self.scratchdir, prefix=prefix, delete=False)
filename = fileobj.name
fn = fileobj.name
self._writeURLToFileobj(urlobj, fileobj)
logging.debug("Saved file to " + filename)
return filename
self._grabURL(filename, fileobj)
logging.debug("Saved file to " + fn)
return fn
def acquireFileContent(self, filename):
"""
Grab the passed filename from self.location and return it as a string
"""
fileobj = StringIO.StringIO()
urlobj = self._grabURL(filename)
self._writeURLToFileobj(urlobj, fileobj)
self._grabURL(filename, fileobj)
return fileobj.getvalue()
class _HTTPURLFetcher(_URLFetcher):
def hasFile(self, filename):
"""
We just do a HEAD request to see if the file exists
"""
url = self._make_full_url(filename)
try:
request = urllib2.Request(url)
request.get_method = lambda: "HEAD"
urllib2.urlopen(request)
response = requests.head(url)
response.raise_for_status()
except Exception, e:
logging.debug("HTTP hasFile: didn't find %s: %s", url, str(e))
return False
return True
def _grabber(self, url):
"""
Use requests for this
"""
response = requests.get(url, stream=True)
response.raise_for_status()
size = response.headers.get('content-length')
return response, size.isdigit() and int(size) or None
def _write(self, urlobj, fileobj):
"""
The requests object doesn't have a file-like read() option, so
we need to implemente it ourselves
"""
total = 0
for data in urlobj.iter_content(chunk_size=self._block_size):
fileobj.write(data)
total += len(data)
self.meter.update(total)
return total
class _FTPURLFetcher(_URLFetcher):
_ftp = None
@ -171,13 +210,23 @@ class _FTPURLFetcher(_URLFetcher):
return
try:
urlret = urlparse.urlparse(self._make_full_url(""))
self._ftp = ftplib.FTP(urlret[1])
server = urlparse.urlparse(self.location)[1]
self._ftp = ftplib.FTP(server)
self._ftp.login()
except Exception, e:
raise ValueError(_("Opening URL %s failed: %s.") %
(self.location, str(e)))
def _grabber(self, url):
"""
Use urllib2 and ftplib to grab the file
"""
request = urllib2.Request(url)
urlobj = urllib2.urlopen(request)
size = self._ftp.size(urlparse.urlparse(url)[2])
return urlobj, size
def cleanupLocation(self):
if not self._ftp:
return
@ -191,15 +240,15 @@ class _FTPURLFetcher(_URLFetcher):
def hasFile(self, filename):
url = self._make_full_url(filename)
urlret = urlparse.urlparse(url)
path = urlparse.urlparse(url)[2]
try:
try:
# If it's a file
self._ftp.size(urlret[2])
self._ftp.size(path)
except ftplib.all_errors:
# If it's a dir
self._ftp.cwd(urlret[2])
self._ftp.cwd(path)
except ftplib.all_errors, e:
logging.debug("FTP hasFile: couldn't access %s: %s",
url, str(e))
@ -219,6 +268,11 @@ class _LocalURLFetcher(_URLFetcher):
logging.debug("local hasFile: Couldn't find %s", url)
return ret
def _grabber(self, url):
urlobj = file(url, "r")
size = os.path.getsize(url)
return urlobj, size
class _MountedURLFetcher(_LocalURLFetcher):
"""