Cache pypi index, extensions and recipes using buildout internals.

parent b0bcdf03
...@@ -1193,7 +1193,17 @@ class Buildout(UserDict.DictMixin): ...@@ -1193,7 +1193,17 @@ class Buildout(UserDict.DictMixin):
links = self['buildout'].get('find-links', '').split(), links = self['buildout'].get('find-links', '').split(),
index = self['buildout'].get('index'), index = self['buildout'].get('index'),
newest=self.newest, allow_hosts=self._allow_hosts, newest=self.newest, allow_hosts=self._allow_hosts,
prefer_final=not self.accept_buildout_test_releases) prefer_final=not self.accept_buildout_test_releases,
download_cache_url=self.download_cache_url,
download_dir_url=self.download_dir_url,
upload_cache_url=self.upload_cache_url,
upload_dir_url=self.upload_dir_url,
signature_private_key_file=self.signature_private_key_file,
signature_certificate_list=self.signature_certificate_list,
shacache_cert_file=self.shacache_cert_file,
shacache_key_file=self.shacache_key_file,
shadir_cert_file=self.shadir_cert_file,
shadir_key_file=self.shadir_key_file,)
# Clear cache because extensions might now let us read pages we # Clear cache because extensions might now let us read pages we
# couldn't read before. # couldn't read before.
......
...@@ -44,7 +44,9 @@ def realpath(path): ...@@ -44,7 +44,9 @@ def realpath(path):
from zc.buildout.networkcache import get_filename_from_url, \ from zc.buildout.networkcache import get_filename_from_url, \
upload_network_cached, \ upload_network_cached, \
download_network_cached download_network_cached, \
download_index_network_cached, \
upload_index_network_cached
default_index_url = os.environ.get( default_index_url = os.environ.get(
...@@ -199,22 +201,137 @@ def _get_version(executable): ...@@ -199,22 +201,137 @@ def _get_version(executable):
return version return version
FILE_SCHEME = re.compile('file://', re.I).match FILE_SCHEME = re.compile('file://', re.I).match
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
from setuptools.package_index import distros_for_url, htmldecode
import urlparse
class AllowHostsPackageIndex(setuptools.package_index.PackageIndex): class AllowHostsPackageIndex(setuptools.package_index.PackageIndex):
"""Will allow urls that are local to the system. """Will allow urls that are local to the system.
No matter what is allow_hosts. No matter what is allow_hosts.
""" """
def __init__(self, index_url, hosts, search_path, python,):
setuptools.package_index.PackageIndex.__init__(self,
index_url, hosts=hosts, search_path=search_path,
python=python)
def url_ok(self, url, fatal=False): def url_ok(self, url, fatal=False):
if FILE_SCHEME(url): if FILE_SCHEME(url):
return True return True
return setuptools.package_index.PackageIndex.url_ok(self, url, False) return setuptools.package_index.PackageIndex.url_ok(self, url, False)
def obtain(self, requirement, installer=None):
# XXX BEGIN HARDCODE
self._current_requirement = requirement.__str__()
# XXX END HARDCODE
self.prescan(); self.find_packages(requirement)
for dist in self[requirement.key]:
if dist in requirement:
return dist
self.debug("%s does not match %s", requirement, dist)
return super(PackageIndex, self).obtain(requirement,installer)
def find_packages(self, requirement):
# XXX BEGIN HARDCODE
self._current_requirement = requirement.__str__()
# XXX END HARDCODE
self.scan_url(self.index_url + requirement.unsafe_name+'/')
if not self.package_pages.get(requirement.key):
# Fall back to safe version of the name
self.scan_url(self.index_url + requirement.project_name+'/')
if not self.package_pages.get(requirement.key):
# We couldn't find the target package, so search the index page too
self.not_found_in_index(requirement)
for url in list(self.package_pages.get(requirement.key,())):
# scan each page that might be related to the desired package
self.scan_url(url)
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
if url in self.scanned_urls and not retrieve:
return
self.scanned_urls[url] = True
if not URL_SCHEME(url):
self.process_filename(url)
return
else:
dists = list(distros_for_url(url))
if dists:
if not self.url_ok(url):
return
self.debug("Found link: %s", url)
if dists or not retrieve or url in self.fetched_urls:
map(self.add, dists)
return # don't need the actual page
if not self.url_ok(url):
self.fetched_urls[url] = True
return
self.info("Reading %s", url)
self.fetched_urls[url] = True # prevent multiple fetch attempts
# We download from cache only if we have a specific version for egg requirement
# Reason: If no specific version, we don't want to be blocked with an old index.
# If there is specific version, we want index to freeze forever.
download_result = False
if getattr(self, '_current_requirement', None) \
and "==" in self._current_requirement \
and globals().get('_download_dir_url') \
and globals().get('_download_dir_url') \
and globals().get('_signature_certificate_list'):
download_result = download_index_network_cached(_download_dir_url,
_download_cache_url, url, self._current_requirement, logger,
_signature_certificate_list)
if download_result:
# Successfully fetched from cache. Hardcode some things...
page, base = download_result
f = None
self.fetched_urls[base] = True
else:
f = self.open_url(url, "Download error: %s -- Some packages may not be found!")
if f is None: return
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
return
base = f.url # handle redirects
page = f.read()
f.close()
# Check if shacache options exist in global vars, then check if
# it is not null
if globals().get('_upload_cache_url') \
and globals().get('_upload_dir_url') \
and getattr(self, '_current_requirement', None) \
and "==" in self._current_requirement \
and getattr(f,'code',None)!=404:
upload_index_network_cached(_upload_dir_url,
_upload_cache_url, url, base, self._current_requirement, page, logger,
globals().get('_signature_private_key_file'),
globals().get('_shacache_cert_file'),
globals().get('_shacache_key_file'),
globals().get('_shadir_cert_file'),
globals().get('_shadir_key_file'))
if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
page = self.process_index(url, page)
for match in HREF.finditer(page):
link = urlparse.urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
_indexes = {} _indexes = {}
def _get_index(executable, index_url, find_links, allow_hosts=('*',), def _get_index(executable, index_url, find_links, allow_hosts=('*',),
path=None): path=None,):
# If path is None, the index will use sys.path. If you provide an empty # If path is None, the index will use sys.path. If you provide an empty
# path ([]), it will complain uselessly about missing index pages for # path ([]), it will complain uselessly about missing index pages for
# packages found in the paths that you expect to use. Therefore, this path # packages found in the paths that you expect to use. Therefore, this path
...@@ -228,7 +345,7 @@ def _get_index(executable, index_url, find_links, allow_hosts=('*',), ...@@ -228,7 +345,7 @@ def _get_index(executable, index_url, find_links, allow_hosts=('*',),
index_url = default_index_url index_url = default_index_url
index = AllowHostsPackageIndex( index = AllowHostsPackageIndex(
index_url, hosts=allow_hosts, search_path=path, index_url, hosts=allow_hosts, search_path=path,
python=_get_version(executable) python=_get_version(executable),
) )
if find_links: if find_links:
...@@ -352,6 +469,39 @@ class Installer: ...@@ -352,6 +469,39 @@ class Installer:
shadir_cert_file=None, shadir_cert_file=None,
shadir_key_file=None, shadir_key_file=None,
): ):
# XXX Setting shacahe options to global variables in order to be able
# to use them from recipes not aware of it. I now hate myself.
if download_dir_url:
global _download_dir_url
_download_dir_url = download_dir_url
if download_cache_url:
global _download_cache_url
_download_cache_url = download_cache_url
if upload_dir_url:
global _upload_dir_url
_upload_dir_url = upload_dir_url
if upload_cache_url:
global _upload_cache_url
_upload_cache_url = upload_cache_url
if signature_certificate_list:
global _signature_certificate_list
_signature_certificate_list = signature_certificate_list
if signature_private_key_file:
global _signature_private_key_file
_signature_private_key_file = signature_private_key_file
if shacache_cert_file:
global _shacache_cert_file
_shacache_cert_file = shacache_cert_file
if shacache_key_file:
global _shacache_key_file
_shacache_key_file = shacache_key_file
if shadir_cert_file:
global _shadir_cert_file
_shadir_cert_file = shadir_cert_file
if shadir_key_file:
global _shadir_key_file
_shadir_key_file = shadir_key_file
self._dest = dest self._dest = dest
self._allow_hosts = allow_hosts self._allow_hosts = allow_hosts
...@@ -414,22 +564,11 @@ class Installer: ...@@ -414,22 +564,11 @@ class Installer:
self._env = pkg_resources.Environment(path, self._env = pkg_resources.Environment(path,
python=_get_version(executable)) python=_get_version(executable))
self._index = _get_index(executable, index, links, self._allow_hosts, self._index = _get_index(executable, index, links, self._allow_hosts,
self._path) self._path,)
if versions is not None: if versions is not None:
self._versions = versions self._versions = versions
self._download_dir_url = download_dir_url
self._download_cache_url = download_cache_url
self._upload_dir_url = upload_dir_url
self._upload_cache_url = upload_cache_url
self._signature_certificate_list = signature_certificate_list
self._signature_private_key_file = signature_private_key_file
self._shacache_cert_file = shacache_cert_file
self._shacache_key_file = shacache_key_file
self._shadir_cert_file = shadir_cert_file
self._shadir_key_file = shadir_key_file
_allowed_eggs_from_site_packages_regex = None _allowed_eggs_from_site_packages_regex = None
def allow_site_package_egg(self, name): def allow_site_package_egg(self, name):
if (not self._include_site_packages or if (not self._include_site_packages or
...@@ -731,16 +870,25 @@ class Installer: ...@@ -731,16 +870,25 @@ class Installer:
filename = get_filename_from_url(dist.location) filename = get_filename_from_url(dist.location)
new_location = os.path.join(tmp, filename) new_location = os.path.join(tmp, filename)
if not download_network_cached(self._download_dir_url, # Try to download from shacache first. If not possible, downloads from
self._download_cache_url, new_location, dist.location, logger, # Original location and uploads it to shacache.
self._signature_certificate_list): downloaded_from_cache = False
if globals().get('_download_dir_url') \
and globals().get('_download_dir_url') \
and globals().get('_signature_certificate_list'):
downloaded_from_cache = download_network_cached(_download_dir_url,
_download_cache_url, new_location, dist.location, logger,
_signature_certificate_list)
if not downloaded_from_cache:
new_location = self._index.download(dist.location, tmp) new_location = self._index.download(dist.location, tmp)
if self._upload_cache_url and self._upload_dir_url: # XXX Better check for unbound global variables
upload_network_cached(self._upload_dir_url, if globals().get('_upload_cache_url') \
self._upload_cache_url, dist.location, new_location, logger, and globals().get('_upload_dir_url'):
self._signature_private_key_file, upload_network_cached(_upload_dir_url,
self._shacache_cert_file, self._shacache_key_file, _upload_cache_url, dist.location, new_location, logger,
self._shadir_cert_file, self._shadir_key_file) _signature_private_key_file,
_shacache_cert_file, _shacache_key_file,
_shadir_cert_file, _shadir_key_file)
if (download_cache if (download_cache
and (realpath(new_location) == realpath(dist.location)) and (realpath(new_location) == realpath(dist.location))
...@@ -854,7 +1002,7 @@ class Installer: ...@@ -854,7 +1002,7 @@ class Installer:
self._links.append(link) self._links.append(link)
self._index = _get_index(self._executable, self._index = _get_index(self._executable,
self._index_url, self._links, self._index_url, self._links,
self._allow_hosts, self._path) self._allow_hosts, self._path,)
for dist in dists: for dist in dists:
# Check whether we picked a version and, if we did, report it: # Check whether we picked a version and, if we did, report it:
......
...@@ -12,12 +12,12 @@ ...@@ -12,12 +12,12 @@
# #
############################################################################## ##############################################################################
#XXX factor with slapos/grid/networkcache.py and use libnetworkcache helpers
import hashlib import hashlib
import os import os
import posixpath import posixpath
import re import re
import shutil
import urllib2 import urllib2
import urlparse import urlparse
import traceback import traceback
...@@ -26,6 +26,10 @@ try: ...@@ -26,6 +26,10 @@ try:
try: try:
from slapos.libnetworkcache import NetworkcacheClient, UploadError, \ from slapos.libnetworkcache import NetworkcacheClient, UploadError, \
DirectoryNotFound DirectoryNotFound
from slapos.networkcachehelper import \
helper_download_network_cached, \
helper_upload_network_cached_from_file, \
helper_download_network_cached_to_file
except ImportError: except ImportError:
LIBNETWORKCACHE_ENABLED = False LIBNETWORKCACHE_ENABLED = False
else: else:
...@@ -68,12 +72,19 @@ def get_directory_key(url): ...@@ -68,12 +72,19 @@ def get_directory_key(url):
Basically check if the url belongs to pypi: Basically check if the url belongs to pypi:
- if yes, the directory key will be pypi-buildout-urlmd5 - if yes, the directory key will be pypi-buildout-urlmd5
- if not, the directory key will be slapos-buildout-urlmd5 - if not, the directory key will be slapos-buildout-urlmd5
# XXX why is that?
""" """
urlmd5 = hashlib.md5(url).hexdigest() urlmd5 = hashlib.md5(url).hexdigest()
if 'pypi' in url: if 'pypi' in url:
return 'pypi-buildout-%s' % urlmd5 return 'pypi-buildout-%s' % urlmd5
return 'slapos-buildout-%s' % urlmd5 return 'slapos-buildout-%s' % urlmd5
@fallback_call
def get_index_directory_key(url, requirement):
"""Returns directory hash based on egg requirement.
"""
return 'pypi-index-%s-%s' % (hashlib.md5(url).hexdigest(), requirement)
@fallback_call @fallback_call
def download_network_cached(dir_url, cache_url, path, url, logger, def download_network_cached(dir_url, cache_url, path, url, logger,
...@@ -88,41 +99,60 @@ def download_network_cached(dir_url, cache_url, path, url, logger, ...@@ -88,41 +99,60 @@ def download_network_cached(dir_url, cache_url, path, url, logger,
if not LIBNETWORKCACHE_ENABLED: if not LIBNETWORKCACHE_ENABLED:
return False return False
if not(dir_url and cache_url):
return False
if md5sum is None: if md5sum is None:
md5sum = _get_md5_from_url(url) md5sum = _get_md5_from_url(url)
directory_key = get_directory_key(url) directory_key = get_directory_key(url)
url = os.path.basename(url) url = os.path.basename(url)
if len(signature_certificate_list) == 0:
# convert [] into None in order to call nc nicely
signature_certificate_list = None
try:
nc = NetworkcacheClient(cache_url, dir_url,
signature_certificate_list=signature_certificate_list)
except TypeError:
logger.warning('Incompatible version of networkcache, not using it.')
return False
logger.debug('Trying to download %s from network cache...' % url) logger.debug('Trying to download %s from network cache...' % url)
try:
file_descriptor = nc.select(directory_key)
f = open(path, 'w+b') if helper_download_network_cached_to_file(
try: dir_url=dir_url,
shutil.copyfileobj(file_descriptor, f) cache_url=cache_url,
finally: signature_certificate_list=signature_certificate_list,
f.close() directory_key=directory_key,
file_descriptor.close() path=path):
logger.info('Downloaded %s from network cache.' % url) logger.info('Downloaded %s from network cache.' % url)
if not check_md5sum(path, md5sum): if not check_md5sum(path, md5sum):
logger.info('MD5 checksum mismatch downloading %s' % url) logger.info('MD5 checksum mismatch downloading %s' % url)
return False return False
return True
return False
@fallback_call
def download_index_network_cached(dir_url, cache_url, url, requirement, logger,
signature_certificate_list):
"""
XXX description
Downloads pypi index from a network cache provider
If something fail (providor be offline, or hash_string fail), we ignore
network cached index.
return index if succeeded, False otherwise.
"""
if not LIBNETWORKCACHE_ENABLED:
return False
directory_key = get_index_directory_key(url, requirement)
wanted_metadata_dict = {
'urlmd5':hashlib.md5(url).hexdigest(),
'requirement':requirement
}
required_key_list = ['base']
result = helper_download_network_cached(dir_url, cache_url,
signature_certificate_list,
directory_key, wanted_metadata_dict, required_key_list)
if result:
file_descriptor, metadata = result
try:
content = file_descriptor.read()
logger.info('Downloaded %s from network cache.' % url)
return content, metadata['base']
except (IOError, DirectoryNotFound), e: except (IOError, DirectoryNotFound), e:
if isinstance(e, urllib2.HTTPError) and e.code == 404: if isinstance(e, urllib2.HTTPError) and e.code == 404:
logger.debug('%s does not exist in network cache.' % url) logger.debug('%s does not exist in network cache.' % url)
...@@ -130,14 +160,13 @@ def download_network_cached(dir_url, cache_url, path, url, logger, ...@@ -130,14 +160,13 @@ def download_network_cached(dir_url, cache_url, path, url, logger,
logger.debug('Failed to download from network cache %s: %s' % \ logger.debug('Failed to download from network cache %s: %s' % \
(url, str(e))) (url, str(e)))
return False return False
return True
@fallback_call @fallback_call
def upload_network_cached(dir_url, cache_url, external_url, path, logger, def upload_network_cached(dir_url, cache_url, external_url, path, logger,
signature_private_key_file, shacache_cert_file, shacache_key_file, signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file): shadir_cert_file, shadir_key_file):
"""Upload file to a network cache server""" """Upload file to a network cache server"""
# XXX use helper and FACTOR code
if not LIBNETWORKCACHE_ENABLED: if not LIBNETWORKCACHE_ENABLED:
return False return False
...@@ -187,6 +216,64 @@ def upload_network_cached(dir_url, cache_url, external_url, path, logger, ...@@ -187,6 +216,64 @@ def upload_network_cached(dir_url, cache_url, external_url, path, logger,
return True return True
@fallback_call
def upload_index_network_cached(dir_url, cache_url, external_url, base, requirement, content, logger,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file):
# XXX use helper and FACTOR code
"""Upload content of a web page to a network cache server"""
if not LIBNETWORKCACHE_ENABLED:
return False
if not (dir_url and cache_url):
return False
logger.info('Uploading %s content into network cache.' % external_url)
directory_key = get_index_directory_key(external_url, requirement)
kw = dict(file="file",
base=base,
urlmd5=hashlib.md5(external_url).hexdigest(),
requirement=requirement)
import tempfile
f = tempfile.TemporaryFile()
f.write(content)
# convert '' into None in order to call nc nicely
if not signature_private_key_file:
signature_private_key_file = None
if not shacache_cert_file:
shacache_cert_file = None
if not shacache_key_file:
shacache_key_file = None
if not shadir_cert_file:
shadir_cert_file = None
if not shadir_key_file:
shadir_key_file = None
try:
nc = NetworkcacheClient(cache_url, dir_url,
signature_private_key_file=signature_private_key_file,
shacache_cert_file=shacache_cert_file,
shacache_key_file=shacache_key_file,
shadir_cert_file=shadir_cert_file,
shadir_key_file=shadir_key_file)
except TypeError:
logger.warning('Incompatible version of networkcache, not using it.')
return False
try:
return nc.upload_generic(f, directory_key, **kw)
except (IOError, UploadError), e:
logger.info('Fail to upload file. %s' % \
(str(e)))
return False
finally:
f.close()
return True
@fallback_call @fallback_call
def get_filename_from_url(url): def get_filename_from_url(url):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment