Commit d6c2c583 authored by Julien Muchembled's avatar Julien Muchembled Committed by Xavier Thompson

[feat] download: add support for slapos.libnetworkcache

When specifying an alternate URL as fallback, the main URL is always
used for both downloading & uploading from/to networkcache.
parent 0f1d5830
...@@ -400,6 +400,9 @@ def _get_user_config(): ...@@ -400,6 +400,9 @@ def _get_user_config():
return os.path.join(buildout_home, 'default.cfg') return os.path.join(buildout_home, 'default.cfg')
networkcache_client = None
@commands @commands
class Buildout(DictMixin): class Buildout(DictMixin):
...@@ -699,6 +702,19 @@ class Buildout(DictMixin): ...@@ -699,6 +702,19 @@ class Buildout(DictMixin):
os.chdir(options['directory']) os.chdir(options['directory'])
networkcache_section_name = options.get('networkcache-section')
if networkcache_section_name:
networkcache_section = self[networkcache_section_name]
try:
from slapos.libnetworkcache import NetworkcacheClient
global networkcache_client
networkcache_client = NetworkcacheClient(networkcache_section)
except ImportError:
pass
except Exception:
self._logger.exception(
"Failed to setup Networkcache. Continue without.")
def _buildout_path(self, name): def _buildout_path(self, name):
if '${' in name: if '${' in name:
return name return name
......
...@@ -27,7 +27,7 @@ except ImportError: ...@@ -27,7 +27,7 @@ except ImportError:
# Python 2 # Python 2
from urlparse import urlparse from urlparse import urlparse
from urlparse import urlunparse from urlparse import urlunparse
from urllib2 import Request, urlopen, HTTPError from urllib2 import HTTPError, Request, urlopen
from zc.buildout.easy_install import realpath from zc.buildout.easy_install import realpath
from base64 import b64encode from base64 import b64encode
...@@ -220,27 +220,31 @@ class Download(object): ...@@ -220,27 +220,31 @@ class Download(object):
if not path: if not path:
handle, tmp_path = tempfile.mkstemp(prefix='buildout-') handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
os.close(handle) os.close(handle)
self._download(url, tmp_path, md5sum, alternate_url)
cleanup = False
finally:
if cleanup and tmp_path:
remove(tmp_path)
return tmp_path, not path
def _download(self, url, path, md5sum=None, alternate_url=None):
download_url = url
try:
try: try:
tmp_path, headers = self.urlretrieve(url, tmp_path) self.urlretrieve(url, path)
except HTTPError: except HTTPError:
if not alternate_url: if not alternate_url:
raise raise
self.logger.info('using alternate URL: %s', alternate_url) self.logger.info('using alternate URL: %s', alternate_url)
download_url = alternate_url download_url = alternate_url
tmp_path, headers = self.urlretrieve( self.urlretrieve(alternate_url, path)
alternate_url, tmp_path) if not check_md5sum(path, md5sum):
if not check_md5sum(tmp_path, md5sum): raise ChecksumError('MD5 checksum mismatch downloading %r'
raise ChecksumError( % download_url)
'MD5 checksum mismatch downloading %r' % download_url)
cleanup = False
except IOError as e: except IOError as e:
raise zc.buildout.UserError("Error downloading %s: %s" raise zc.buildout.UserError("Error downloading %s: %s"
% (download_url, e)) % (download_url, e))
finally:
if cleanup and tmp_path:
remove(tmp_path)
return tmp_path, not path
def filename(self, url): def filename(self, url):
"""Determine a file name from a URL according to the configuration. """Determine a file name from a URL according to the configuration.
...@@ -269,29 +273,61 @@ class Download(object): ...@@ -269,29 +273,61 @@ class Download(object):
url_host, url_port = parsed[-2:] url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port) return '%s:%s' % (url_host, url_port)
def urlretrieve(self, url, tmp_path): def _auth(self, url):
parsed_url = urlparse(url) parsed_url = urlparse(url)
req = url if parsed_url.scheme in ('http', 'https'):
while parsed_url.scheme in ('http', 'https'): # not a loop
auth_host = parsed_url.netloc.rsplit('@', 1) auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1: if len(auth_host) > 1:
auth = auth_host[0] return (auth_host[0],
url = parsed_url._replace(netloc=auth_host[1]).geturl() parsed_url._replace(netloc=auth_host[1]).geturl())
else: auth = netrc.authenticators(parsed_url.hostname)
auth = netrc.authenticators(parsed_url.hostname) if auth:
if not auth: return '{0}:{2}'.format(*auth), url
break
auth = '{0}:{2}'.format(*auth) def urlretrieve(self, url, tmp_path):
req = Request(url) auth = self._auth(url)
if auth:
req = Request(auth[1])
req.add_header("Authorization", req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth)))) "Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
break else:
req = url
with closing(urlopen(req)) as src: with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst: with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst) shutil.copyfileobj(src, dst)
return tmp_path, src.info() return tmp_path, src.info()
class Download(Download):
def _download(self, url, path, md5sum=None, alternate_url=None):
from .buildout import networkcache_client as nc
while nc: # not a loop
if self._auth(url): # do not cache restricted data
nc = None
break
key = 'file-urlmd5:' + md5(url.encode()).hexdigest()
if not nc.tryDownload(key):
break
with nc:
entry = next(nc.select(key, {'url': url}), None)
if entry is None:
err = 'no matching entry'
else:
with closing(nc.download(entry['sha512'])) as src, \
open(path, 'wb') as dst:
shutil.copyfileobj(src, dst)
if check_md5sum(path, md5sum):
return
err = 'MD5 checksum mismatch'
self.logger.info('Cannot download from network cache: %s', err)
break
super(Download, self)._download(url, path, md5sum, alternate_url)
if nc and nc.tryUpload(key):
with nc, open(path, 'rb') as f:
nc.upload(f, key, url=url)
def check_md5sum(path, md5sum): def check_md5sum(path, md5sum):
"""Tell whether the MD5 checksum of the file at path matches. """Tell whether the MD5 checksum of the file at path matches.
......
...@@ -67,8 +67,8 @@ An alternate URL can be used in case of HTTPError with the main one. ...@@ -67,8 +67,8 @@ An alternate URL can be used in case of HTTPError with the main one.
Useful when a version of a resource can only be downloaded with a temporary Useful when a version of a resource can only be downloaded with a temporary
URL as long as it's the last version, and this version is then moved to a URL as long as it's the last version, and this version is then moved to a
permanent place when a newer version is released. In such case, when using permanent place when a newer version is released. In such case, when using
a cache, it's important that the main URL (`url`) is always used as cache key. a cache (in particular networkcache), it's important that the main URL (`url`)
And `alternate_url` shall be the temporary URL. is always used as cache key. And `alternate_url` shall be the temporary URL.
>>> path, is_temp = download(server_url+'not-there', >>> path, is_temp = download(server_url+'not-there',
... alternate_url=server_url+'foo.txt') ... alternate_url=server_url+'foo.txt')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment