Commit c542669d authored by PJ Eby's avatar PJ Eby

Allow explicit selection of Sourceforge mirror(s) with ``--sf-mirror``, and

further refine download/retry algorithm.
(backport from trunk)

--HG--
branch : setuptools-0.6
extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/branches/setuptools-0.6%4052013
parent 12860312
......@@ -357,11 +357,37 @@ command line.
Note that if you want to allow downloads from Sourceforge, you need to enable
the ``dl.sourceforge.net`` host. All Sourceforge mirror downloads are treated
as if they had this hostname. (If a download attempt from
``dl.sourceforge.net`` fails, it is automatically retried using a randomly
selected mirror IP drawn from the ``sf-mirrors.telecommunity.com`` round-robin
addres. The IP's, however, are not checked against the ``--allow-hosts``
mask.)
as if they had this hostname, regardless of which mirror is actually used to
do the downloading. If you want to restrict downloading to specific
Sourceforge hosts, you must use the ``--sf-mirrors`` option to set what hosts
will be substituted for ``dl.sourceforge.net``. See the next section for more
details.
Selecting Your Preferred Sourceforge Mirror(s)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can use the ``--sf-mirrors`` option on the command line, or via the
standard `configuration files`_, to select what Sourceforge mirrors you want
downloads to take place from. For example, putting this in your
configuration::
[easy_install]
sf_mirrors = superb-east, superb-west, easynews
Would cause ``easy_install`` to always perform Sourceforge downloads using
``superb-east.dl.sourceforge.net``, ``superb-west.dl.sourceforge.net``, or
``easynews.dl.sourceforge.net``. You can also specify IP addresses or full
domain names.
The actual mirror used will be selected at runtime. If the selected mirror
fails, another mirror will be selected, after eliminating the failed mirror as
a possible choice.
If you don't specify any mirrors, ``easy_install`` will randomly select mirrors
from the list of IP addresses for ``sf-mirrors.telecommunity.com``. And if
the DNS lookup fails, it will simply make a single download attempt to
``dl.sourceforge.net``, without using any mirrors at all.
Installing on Un-networked Machines
......@@ -883,6 +909,32 @@ Command-Line Options
setting for this option in their `configuration files`_, and then manually
override the setting on the command line as needed.
Note that if you wish to allow Sourceforge downloads, you must allow access
to ``dl.sourceforge.net``. You do not have to list individual Sourceforge
mirror addresses, as mirror selection is controlled by the ``--sf-mirrors``
option.
``--sf-mirrors=NAMES`` (New in 0.6c4)
Set the list of Sourceforge mirror sites to use for downloads published by
Sourceforge. EasyInstall will randomly select one for each Sourceforge
download attempt.
Mirror sites can be given by name (e.g. ``easynews``, ``superb-east``,
etc.) or by full hostname/IP address (e.g. ``easynews.dl.sf.net``). Use a
comma to separate mirrors.
If you do not provide any names, EasyInstall will use the list of IP
addresses provided by the ``sf-mirrors.telecommunity.com`` subdomain, which
is automatically updated daily from Sourceforge's UI pages and DNS.
If, due to firewall protections or server failure, it isn't possible to get
the mirror list from ``sf-mirrors.telecommunity.com``, EasyInstall will
attempt to perform all downloads directly from ``dl.sourceforge.net``
without selecting a mirror. (Note, however, that this is extremely
unreliable due to Sourceforge not keeping the ``dl.sourceforge.net`` IP
addresses up to date with their UI! This is why the backup system at
``sf-mirrors.telecommunity.com`` exists.)
``--prefix=DIR`` (New in 0.6a10)
Use the specified directory as a base for computing the default
installation and script directories. On Windows, the resulting default
......@@ -1210,6 +1262,9 @@ Release Notes/Change History
should've returned a 404. Fall back to ``sf-mirrors.telecommunity.com``
round-robin address for SF mirrors if ``dl.sourceforge.net`` doesn't work.
* Allow explicit selection of Sourceforge mirror(s) with ``--sf-mirror``, and
further refine download/retry algorithm.
0.6c3
* You once again use "python -m easy_install" with Python 2.4 and above.
......
......@@ -70,6 +70,7 @@ class easy_install(Command):
('editable', 'e', "Install specified packages in editable form"),
('no-deps', 'N', "don't install dependencies"),
('allow-hosts=', 'H', "pattern(s) that hostnames must match"),
('sf-mirrors=', None, "Sourceforge mirror(s) to use"),
]
boolean_options = [
'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy',
......@@ -79,7 +80,6 @@ class easy_install(Command):
negative_opt = {'always-unzip': 'zip-ok'}
create_index = PackageIndex
def initialize_options(self):
self.zip_ok = None
self.install_dir = self.script_dir = self.exclude_scripts = None
......@@ -90,7 +90,7 @@ class easy_install(Command):
self.optimize = self.record = None
self.upgrade = self.always_copy = self.multi_version = None
self.editable = self.no_deps = self.allow_hosts = None
self.root = self.prefix = self.no_report = None
self.root = self.prefix = self.no_report = self.sf_mirrors = None
# Options not specifiable via command line
self.package_index = None
......@@ -166,10 +166,10 @@ class easy_install(Command):
hosts = [s.strip() for s in self.allow_hosts.split(',')]
else:
hosts = ['*']
if self.package_index is None:
self.package_index = self.create_index(
self.index_url, search_path = self.shadow_path, hosts=hosts
self.index_url, search_path = self.shadow_path, hosts=hosts,
sf_mirrors = self.sf_mirrors
)
self.local_index = Environment(self.shadow_path+sys.path)
......
......@@ -14,7 +14,7 @@ PYPI_MD5 = re.compile(
'<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
)
SF_DOWNLOAD = 'dl.sourceforge.net'
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
......@@ -165,7 +165,9 @@ user_agent = "Python-urllib/%s setuptools/%s" % (
class PackageIndex(Environment):
"""A distribution index that scans web pages for download URLs"""
def __init__(self,index_url="http://www.python.org/pypi",hosts=('*',),*args,**kw):
def __init__(self, index_url="http://www.python.org/pypi", hosts=('*',),
sf_mirrors=None, *args, **kw
):
Environment.__init__(self,*args,**kw)
self.index_url = index_url + "/"[:not index_url.endswith('/')]
self.scanned_urls = {}
......@@ -173,6 +175,33 @@ class PackageIndex(Environment):
self.package_pages = {}
self.allows = re.compile('|'.join(map(translate,hosts))).match
self.to_scan = []
if sf_mirrors:
if isinstance(sf_mirrors,str):
self.sf_mirrors = map(str.strip, sf_mirrors.split(','))
else:
self.sf_mirrors = map(str.strip, sf_mirrors)
else:
self.sf_mirrors = ()
def _get_mirrors(self):
mirrors = []
for mirror in self.sf_mirrors:
if mirror:
if '.' not in mirror:
mirror += '.dl.sourceforge.net'
mirrors.append(mirror)
if not mirrors:
try:
mirrors.extend(
socket.gethostbyname_ex('sf-mirrors.telecommunity.com')[-1]
)
except socket.error:
# DNS-bl0ck1n9 f1r3w4llz sUx0rs!
mirrors[:] = [SF_DOWNLOAD]
return mirrors
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
......@@ -202,7 +231,6 @@ class PackageIndex(Environment):
f = self.open_url(url)
self.fetched_urls[url] = self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
return
......@@ -212,7 +240,6 @@ class PackageIndex(Environment):
f.close()
if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
page = self.process_index(url, page)
for match in HREF.finditer(page):
link = urlparse.urljoin(base, match.group(1))
self.process_url(link)
......@@ -244,6 +271,20 @@ class PackageIndex(Environment):
def process_index(self,url,page):
"""Process the contents of a PyPI page"""
def scan(link):
......@@ -581,27 +622,27 @@ class PackageIndex(Environment):
def _retry_sf_download(self, url, filename):
self.url_ok(url, True) # raises error if not allowed
try:
return self._attempt_download(url, filename)
except (KeyboardInterrupt,SystemExit):
raise
except:
scheme, server, path, param, query, frag = urlparse.urlparse(url)
if server!='dl.sourceforge.net':
raise
mirror = get_sf_ip()
if server == SF_DOWNLOAD:
mirrors = self._get_mirrors()
query = ''
else:
mirrors = [server]
while mirrors or server != SF_DOWNLOAD:
mirror = random.choice(mirrors)
url = urlparse.urlunparse((scheme,mirror,path,param,query,frag))
while _sf_mirrors:
self.warn("Download failed: %s", sys.exc_info()[1])
url = urlparse.urlunparse((scheme, mirror, path, param, '', frag))
try:
return self._attempt_download(url, filename)
except (KeyboardInterrupt,SystemExit):
raise
except:
_sf_mirrors.remove(mirror) # don't retry the same mirror
mirror = get_sf_ip()
if server != SF_DOWNLOAD:
raise
self.warn("Download failed: %s", sys.exc_info()[1])
mirrors.remove(mirror)
raise # fail if no mirror works
......@@ -692,22 +733,9 @@ def fix_sf_url(url):
if server!='prdownloads.sourceforge.net':
return url
return urlparse.urlunparse(
(scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag)
(scheme, SF_DOWNLOAD, 'sourceforge'+path, param, '', frag)
)
_sf_mirrors = []
def get_sf_ip():
if not _sf_mirrors:
try:
_sf_mirrors[:] = socket.gethostbyname_ex(
'sf-mirrors.telecommunity.com')[-1]
except socket.error:
# DNS-bl0ck1n9 f1r3w4llz sUx0rs!
_sf_mirrors[:] = ['dl.sourceforge.net']
return random.choice(_sf_mirrors)
def local_open(url):
"""Read a local path, with special support for directories"""
scheme, server, path, param, query, frag = urlparse.urlparse(url)
......@@ -735,4 +763,17 @@ def local_open(url):
# this line is a kludge to keep the trailing blank lines for pje's editor
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment