Cache pypi index, extensions and recipes using buildout internals.

parent b0bcdf03
......@@ -1193,7 +1193,17 @@ class Buildout(UserDict.DictMixin):
links = self['buildout'].get('find-links', '').split(),
index = self['buildout'].get('index'),
newest=self.newest, allow_hosts=self._allow_hosts,
prefer_final=not self.accept_buildout_test_releases)
prefer_final=not self.accept_buildout_test_releases,
download_cache_url=self.download_cache_url,
download_dir_url=self.download_dir_url,
upload_cache_url=self.upload_cache_url,
upload_dir_url=self.upload_dir_url,
signature_private_key_file=self.signature_private_key_file,
signature_certificate_list=self.signature_certificate_list,
shacache_cert_file=self.shacache_cert_file,
shacache_key_file=self.shacache_key_file,
shadir_cert_file=self.shadir_cert_file,
shadir_key_file=self.shadir_key_file,)
# Clear cache because extensions might now let us read pages we
# couldn't read before.
......
......@@ -44,7 +44,9 @@ def realpath(path):
from zc.buildout.networkcache import get_filename_from_url, \
upload_network_cached, \
download_network_cached
download_network_cached, \
download_index_network_cached, \
upload_index_network_cached
default_index_url = os.environ.get(
......@@ -199,22 +201,137 @@ def _get_version(executable):
return version
FILE_SCHEME = re.compile('file://', re.I).match
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
from setuptools.package_index import distros_for_url, htmldecode
import urlparse
class AllowHostsPackageIndex(setuptools.package_index.PackageIndex):
"""Will allow urls that are local to the system.
No matter what is allow_hosts.
"""
def __init__(self, index_url, hosts, search_path, python,):
setuptools.package_index.PackageIndex.__init__(self,
index_url, hosts=hosts, search_path=search_path,
python=python)
def url_ok(self, url, fatal=False):
if FILE_SCHEME(url):
return True
return setuptools.package_index.PackageIndex.url_ok(self, url, False)
def obtain(self, requirement, installer=None):
# XXX BEGIN HARDCODE
self._current_requirement = requirement.__str__()
# XXX END HARDCODE
self.prescan(); self.find_packages(requirement)
for dist in self[requirement.key]:
if dist in requirement:
return dist
self.debug("%s does not match %s", requirement, dist)
return super(PackageIndex, self).obtain(requirement,installer)
def find_packages(self, requirement):
# XXX BEGIN HARDCODE
self._current_requirement = requirement.__str__()
# XXX END HARDCODE
self.scan_url(self.index_url + requirement.unsafe_name+'/')
if not self.package_pages.get(requirement.key):
# Fall back to safe version of the name
self.scan_url(self.index_url + requirement.project_name+'/')
if not self.package_pages.get(requirement.key):
# We couldn't find the target package, so search the index page too
self.not_found_in_index(requirement)
for url in list(self.package_pages.get(requirement.key,())):
# scan each page that might be related to the desired package
self.scan_url(url)
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
if url in self.scanned_urls and not retrieve:
return
self.scanned_urls[url] = True
if not URL_SCHEME(url):
self.process_filename(url)
return
else:
dists = list(distros_for_url(url))
if dists:
if not self.url_ok(url):
return
self.debug("Found link: %s", url)
if dists or not retrieve or url in self.fetched_urls:
map(self.add, dists)
return # don't need the actual page
if not self.url_ok(url):
self.fetched_urls[url] = True
return
self.info("Reading %s", url)
self.fetched_urls[url] = True # prevent multiple fetch attempts
# We download from cache only if we have a specific version for egg requirement
# Reason: If no specific version, we don't want to be blocked with an old index.
# If there is specific version, we want index to freeze forever.
download_result = False
if getattr(self, '_current_requirement', None) \
and "==" in self._current_requirement \
and globals().get('_download_dir_url') \
and globals().get('_download_dir_url') \
and globals().get('_signature_certificate_list'):
download_result = download_index_network_cached(_download_dir_url,
_download_cache_url, url, self._current_requirement, logger,
_signature_certificate_list)
if download_result:
# Successfully fetched from cache. Hardcode some things...
page, base = download_result
f = None
self.fetched_urls[base] = True
else:
f = self.open_url(url, "Download error: %s -- Some packages may not be found!")
if f is None: return
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
return
base = f.url # handle redirects
page = f.read()
f.close()
# Check if shacache options exist in global vars, then check if
# it is not null
if globals().get('_upload_cache_url') \
and globals().get('_upload_dir_url') \
and getattr(self, '_current_requirement', None) \
and "==" in self._current_requirement \
and getattr(f,'code',None)!=404:
upload_index_network_cached(_upload_dir_url,
_upload_cache_url, url, base, self._current_requirement, page, logger,
globals().get('_signature_private_key_file'),
globals().get('_shacache_cert_file'),
globals().get('_shacache_key_file'),
globals().get('_shadir_cert_file'),
globals().get('_shadir_key_file'))
if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
page = self.process_index(url, page)
for match in HREF.finditer(page):
link = urlparse.urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
_indexes = {}
def _get_index(executable, index_url, find_links, allow_hosts=('*',),
path=None):
path=None,):
# If path is None, the index will use sys.path. If you provide an empty
# path ([]), it will complain uselessly about missing index pages for
# packages found in the paths that you expect to use. Therefore, this path
......@@ -228,7 +345,7 @@ def _get_index(executable, index_url, find_links, allow_hosts=('*',),
index_url = default_index_url
index = AllowHostsPackageIndex(
index_url, hosts=allow_hosts, search_path=path,
python=_get_version(executable)
python=_get_version(executable),
)
if find_links:
......@@ -352,6 +469,39 @@ class Installer:
shadir_cert_file=None,
shadir_key_file=None,
):
# XXX Setting shacahe options to global variables in order to be able
# to use them from recipes not aware of it. I now hate myself.
if download_dir_url:
global _download_dir_url
_download_dir_url = download_dir_url
if download_cache_url:
global _download_cache_url
_download_cache_url = download_cache_url
if upload_dir_url:
global _upload_dir_url
_upload_dir_url = upload_dir_url
if upload_cache_url:
global _upload_cache_url
_upload_cache_url = upload_cache_url
if signature_certificate_list:
global _signature_certificate_list
_signature_certificate_list = signature_certificate_list
if signature_private_key_file:
global _signature_private_key_file
_signature_private_key_file = signature_private_key_file
if shacache_cert_file:
global _shacache_cert_file
_shacache_cert_file = shacache_cert_file
if shacache_key_file:
global _shacache_key_file
_shacache_key_file = shacache_key_file
if shadir_cert_file:
global _shadir_cert_file
_shadir_cert_file = shadir_cert_file
if shadir_key_file:
global _shadir_key_file
_shadir_key_file = shadir_key_file
self._dest = dest
self._allow_hosts = allow_hosts
......@@ -414,22 +564,11 @@ class Installer:
self._env = pkg_resources.Environment(path,
python=_get_version(executable))
self._index = _get_index(executable, index, links, self._allow_hosts,
self._path)
self._path,)
if versions is not None:
self._versions = versions
self._download_dir_url = download_dir_url
self._download_cache_url = download_cache_url
self._upload_dir_url = upload_dir_url
self._upload_cache_url = upload_cache_url
self._signature_certificate_list = signature_certificate_list
self._signature_private_key_file = signature_private_key_file
self._shacache_cert_file = shacache_cert_file
self._shacache_key_file = shacache_key_file
self._shadir_cert_file = shadir_cert_file
self._shadir_key_file = shadir_key_file
_allowed_eggs_from_site_packages_regex = None
def allow_site_package_egg(self, name):
if (not self._include_site_packages or
......@@ -731,16 +870,25 @@ class Installer:
filename = get_filename_from_url(dist.location)
new_location = os.path.join(tmp, filename)
if not download_network_cached(self._download_dir_url,
self._download_cache_url, new_location, dist.location, logger,
self._signature_certificate_list):
# Try to download from shacache first. If not possible, downloads from
# Original location and uploads it to shacache.
downloaded_from_cache = False
if globals().get('_download_dir_url') \
and globals().get('_download_dir_url') \
and globals().get('_signature_certificate_list'):
downloaded_from_cache = download_network_cached(_download_dir_url,
_download_cache_url, new_location, dist.location, logger,
_signature_certificate_list)
if not downloaded_from_cache:
new_location = self._index.download(dist.location, tmp)
if self._upload_cache_url and self._upload_dir_url:
upload_network_cached(self._upload_dir_url,
self._upload_cache_url, dist.location, new_location, logger,
self._signature_private_key_file,
self._shacache_cert_file, self._shacache_key_file,
self._shadir_cert_file, self._shadir_key_file)
# XXX Better check for unbound global variables
if globals().get('_upload_cache_url') \
and globals().get('_upload_dir_url'):
upload_network_cached(_upload_dir_url,
_upload_cache_url, dist.location, new_location, logger,
_signature_private_key_file,
_shacache_cert_file, _shacache_key_file,
_shadir_cert_file, _shadir_key_file)
if (download_cache
and (realpath(new_location) == realpath(dist.location))
......@@ -854,7 +1002,7 @@ class Installer:
self._links.append(link)
self._index = _get_index(self._executable,
self._index_url, self._links,
self._allow_hosts, self._path)
self._allow_hosts, self._path,)
for dist in dists:
# Check whether we picked a version and, if we did, report it:
......
......@@ -12,12 +12,12 @@
#
##############################################################################
#XXX factor with slapos/grid/networkcache.py and use libnetworkcache helpers
import hashlib
import os
import posixpath
import re
import shutil
import urllib2
import urlparse
import traceback
......@@ -25,7 +25,11 @@ import traceback
try:
try:
from slapos.libnetworkcache import NetworkcacheClient, UploadError, \
DirectoryNotFound
DirectoryNotFound
from slapos.networkcachehelper import \
helper_download_network_cached, \
helper_upload_network_cached_from_file, \
helper_download_network_cached_to_file
except ImportError:
LIBNETWORKCACHE_ENABLED = False
else:
......@@ -68,12 +72,19 @@ def get_directory_key(url):
Basically check if the url belongs to pypi:
- if yes, the directory key will be pypi-buildout-urlmd5
- if not, the directory key will be slapos-buildout-urlmd5
# XXX why is that?
"""
urlmd5 = hashlib.md5(url).hexdigest()
if 'pypi' in url:
return 'pypi-buildout-%s' % urlmd5
return 'slapos-buildout-%s' % urlmd5
@fallback_call
def get_index_directory_key(url, requirement):
"""Returns directory hash based on egg requirement.
"""
return 'pypi-index-%s-%s' % (hashlib.md5(url).hexdigest(), requirement)
@fallback_call
def download_network_cached(dir_url, cache_url, path, url, logger,
......@@ -88,56 +99,74 @@ def download_network_cached(dir_url, cache_url, path, url, logger,
if not LIBNETWORKCACHE_ENABLED:
return False
if not(dir_url and cache_url):
return False
if md5sum is None:
md5sum = _get_md5_from_url(url)
directory_key = get_directory_key(url)
url = os.path.basename(url)
if len(signature_certificate_list) == 0:
# convert [] into None in order to call nc nicely
signature_certificate_list = None
try:
nc = NetworkcacheClient(cache_url, dir_url,
signature_certificate_list=signature_certificate_list)
except TypeError:
logger.warning('Incompatible version of networkcache, not using it.')
return False
logger.debug('Trying to download %s from network cache...' % url)
try:
file_descriptor = nc.select(directory_key)
f = open(path, 'w+b')
try:
shutil.copyfileobj(file_descriptor, f)
finally:
f.close()
file_descriptor.close()
logger.info('Downloaded %s from network cache.' % url)
if helper_download_network_cached_to_file(
dir_url=dir_url,
cache_url=cache_url,
signature_certificate_list=signature_certificate_list,
directory_key=directory_key,
path=path):
logger.info('Downloaded %s from network cache.' % url)
if not check_md5sum(path, md5sum):
logger.info('MD5 checksum mismatch downloading %s' % url)
return False
return True
return False
except (IOError, DirectoryNotFound), e:
if isinstance(e, urllib2.HTTPError) and e.code == 404:
logger.debug('%s does not exist in network cache.' % url)
else:
logger.debug('Failed to download from network cache %s: %s' % \
(url, str(e)))
@fallback_call
def download_index_network_cached(dir_url, cache_url, url, requirement, logger,
signature_certificate_list):
"""
XXX description
Downloads pypi index from a network cache provider
If something fail (providor be offline, or hash_string fail), we ignore
network cached index.
return index if succeeded, False otherwise.
"""
if not LIBNETWORKCACHE_ENABLED:
return False
return True
directory_key = get_index_directory_key(url, requirement)
wanted_metadata_dict = {
'urlmd5':hashlib.md5(url).hexdigest(),
'requirement':requirement
}
required_key_list = ['base']
result = helper_download_network_cached(dir_url, cache_url,
signature_certificate_list,
directory_key, wanted_metadata_dict, required_key_list)
if result:
file_descriptor, metadata = result
try:
content = file_descriptor.read()
logger.info('Downloaded %s from network cache.' % url)
return content, metadata['base']
except (IOError, DirectoryNotFound), e:
if isinstance(e, urllib2.HTTPError) and e.code == 404:
logger.debug('%s does not exist in network cache.' % url)
else:
logger.debug('Failed to download from network cache %s: %s' % \
(url, str(e)))
return False
@fallback_call
def upload_network_cached(dir_url, cache_url, external_url, path, logger,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file):
"""Upload file to a network cache server"""
# XXX use helper and FACTOR code
if not LIBNETWORKCACHE_ENABLED:
return False
......@@ -187,6 +216,64 @@ def upload_network_cached(dir_url, cache_url, external_url, path, logger,
return True
@fallback_call
def upload_index_network_cached(dir_url, cache_url, external_url, base, requirement, content, logger,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file):
# XXX use helper and FACTOR code
"""Upload content of a web page to a network cache server"""
if not LIBNETWORKCACHE_ENABLED:
return False
if not (dir_url and cache_url):
return False
logger.info('Uploading %s content into network cache.' % external_url)
directory_key = get_index_directory_key(external_url, requirement)
kw = dict(file="file",
base=base,
urlmd5=hashlib.md5(external_url).hexdigest(),
requirement=requirement)
import tempfile
f = tempfile.TemporaryFile()
f.write(content)
# convert '' into None in order to call nc nicely
if not signature_private_key_file:
signature_private_key_file = None
if not shacache_cert_file:
shacache_cert_file = None
if not shacache_key_file:
shacache_key_file = None
if not shadir_cert_file:
shadir_cert_file = None
if not shadir_key_file:
shadir_key_file = None
try:
nc = NetworkcacheClient(cache_url, dir_url,
signature_private_key_file=signature_private_key_file,
shacache_cert_file=shacache_cert_file,
shacache_key_file=shacache_key_file,
shadir_cert_file=shadir_cert_file,
shadir_key_file=shadir_key_file)
except TypeError:
logger.warning('Incompatible version of networkcache, not using it.')
return False
try:
return nc.upload_generic(f, directory_key, **kw)
except (IOError, UploadError), e:
logger.info('Fail to upload file. %s' % \
(str(e)))
return False
finally:
f.close()
return True
@fallback_call
def get_filename_from_url(url):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment