Move package index/downloading stuff to setuptools.package_index module.

--HG-- branch : setuptools extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4041051

Move package index/downloading stuff to setuptools.package_index module.
--HG-- branch : setuptools extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4041051
8f64fbe5 · PJ Eby · 449f3574 · 8f64fbe5 · 8f64fbe5 · 8f64fbe5
Commit 8f64fbe5 authored Jun 12, 2005 by PJ Eby
Hide whitespace changes
Inline Side-by-side

Showing with 377 additions and 335 deletions

EasyInstall.txt EasyInstall.txt +2 -1

easy_install.py easy_install.py +6 -334

setuptools/package_index.py setuptools/package_index.py +369 -0

No files found.
--- a/EasyInstall.txt
+++ b/EasyInstall.txt
@@ -280,7 +280,8 @@ Known Issues

 * All downloads are now managed by the ``PackageIndex`` class (which is now
   subclassable and replaceable), so that embedders can more easily override
-   download logic, give download progress reports, etc.
+   download logic, give download progress reports, etc.  The class has also
+   been moved to the new ``setuptools.package_index`` module.

 * The ``Installer`` class no longer handles downloading, manages a temporary
   directory, or tracks the ``zip_ok`` option.  Downloading is now handled

--- a/easy_install.py
+++ b/easy_install.py
@@ -12,311 +12,14 @@ __ http://peak.telecommunity.com/DevCenter/EasyInstall

 """

-import sys
-import os.path
-import pkg_resources
-import re
-import zipimport
-import shutil
-import urlparse
-import urllib2
-import tempfile
+import sys, os.path, zipimport, shutil, tempfile

 from setuptools.sandbox import run_setup
-from setuptools.archive_util import unpack_archive
 from distutils.sysconfig import get_python_lib
-from pkg_resources import *
-
-
-
-
-
-
-
-
-
-
-
-
-
-HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
-EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
-
-def distros_for_url(url, metadata=None):
-    """Yield egg or source distribution objects that might be found at a URL"""
-
-    path = urlparse.urlparse(url)[2]
-    base = urllib2.unquote(path.split('/')[-1])
-
-    if base.endswith('.egg'):
-        dist = Distribution.from_filename(base, metadata)
-        dist.path = url
-        yield dist
-        return  # only one, unambiguous interpretation
-
-    for ext in EXTENSIONS:
-        if base.endswith(ext):
-            base = base[:-len(ext)]
-            break
-    else:
-        return  # no extension matched
-
-    # Generate alternative interpretations of a source distro name
-    # Because some packages are ambiguous as to name/versions split
-    # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
-    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
-    # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
-    # the spurious interpretations should be ignored, because in the event
-    # there's also an "adns" package, the spurious "python-1.1.0" version will
-    # compare lower than any numeric version number, and is therefore unlikely
-    # to match a request for it.  It's still a potential problem, though, and
-    # in the long run PyPI and the distutils should go for "safe" names and
-    # versions in distribution archive names (sdist and bdist).
-
-    parts = base.split('-')
-    for p in range(1,len(parts)+1):
-        yield Distribution(
-            url, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
-            distro_type = SOURCE_DIST
-        )
-
-class PackageIndex(AvailableDistributions):
-    """A distribution index that scans web pages for download URLs"""
-
-    def __init__(self,index_url="http://www.python.org/pypi",*args,**kw):
-        AvailableDistributions.__init__(self,*args,**kw)
-        self.index_url = index_url + "/"[:not index_url.endswith('/')]
-        self.scanned_urls = {}
-        self.fetched_urls = {}
-        self.package_pages = {}
-
-    def scan_url(self, url):
-        self.process_url(url, True)
-
-    def process_url(self, url, retrieve=False):
-        if url in self.scanned_urls and not retrieve:
-            return
-
-        self.scanned_urls[url] = True
-        dists = list(distros_for_url(url))
-        map(self.add, dists)
-
-        if dists or not retrieve or url in self.fetched_urls:
-            # don't need the actual page
-            return
-
-        f = self.open_url(url)
-        self.fetched_urls[url] = self.fetched_urls[f.url] = True
-        if 'html' not in f.headers['content-type'].lower():
-            f.close()   # not html, we can't process it
-            return
-
-        base = f.url     # handle redirects
-        page = f.read()
-        f.close()
-        if url.startswith(self.index_url):
-            self.process_index(url, page)
-        else:
-            for match in HREF.finditer(page):
-                link = urlparse.urljoin(base, match.group(1))
-                self.process_url(link)
-
-    def find_packages(self,requirement):
-        self.scan_url(self.index_url + requirement.distname)
-        if not self.package_pages.get(requirement.key):
-            # We couldn't find the target package, so search the index page too
-            self.scan_url(self.index_url)
-        for url in self.package_pages.get(requirement.key,()):
-            # scan each page that might be related to the desired package
-            self.scan_url(url)
-
-    def process_index(self,url,page):
-        def scan(link):
-            if link.startswith(self.index_url):
-                parts = map(
-                    urllib2.unquote, link[len(self.index_url):].split('/')
-                )
-                if len(parts)==2:
-                    # it's a package page, sanitize and index it
-                    pkg = safe_name(parts[0])
-                    ver = safe_version(parts[1])
-                    self.package_pages.setdefault(pkg.lower(),{})[link] = True
-        if url==self.index_url or 'Index of Packages</title>' in page:
-            # process an index page into the package-page index
-            for match in HREF.finditer(page):
-                scan( urlparse.urljoin(url, match.group(1)) )
-        else:
-            scan(url)   # ensure this page is in the page index
-            # process individual package page
-            for tag in ("<th>Home Page", "<th>Download URL"):
-                pos = page.find(tag)
-                if pos!=-1:
-                    match = HREF.search(page,pos)
-                    if match:
-                        # Process the found URL
-                        self.scan_url(urlparse.urljoin(url, match.group(1)))
-
-    def obtain(self,requirement):
-        self.find_packages(requirement)
-        for dist in self.get(requirement.key, ()):
-            if dist in requirement:
-                return dist
-
-    def download(self, spec, tmpdir):
-        """Locate and/or download `spec`, returning a local filename
-
-        `spec` may be a ``Requirement`` object, or a string containing a URL,
-        an existing local filename, or a package/version requirement spec
-        (i.e. the string form of a ``Requirement`` object).
-
-        If necessary, the requirement is searched for in the package index.
-        If the download is successful, the return value is a local file path,
-        and it is a subpath of `tmpdir` if the distribution had to be
-        downloaded.  If no matching distribution is found, return ``None``.
-        Various errors may be raised if a problem occurs during downloading.
-        """
-
-        if not isinstance(spec,Requirement):
-            scheme = URL_SCHEME(spec)
-            if scheme:
-                # It's a url, download it to tmpdir
-                return self._download_url(scheme.group(1), spec, tmpdir)
-
-            elif os.path.exists(spec):
-                # Existing file or directory, just return it
-                return spec
-            else:
-                try:
-                    spec = Requirement.parse(spec)
-                except ValueError:
-                    raise RuntimeError(
-                        "Not a URL, existing file, or requirement spec: %r" %
-                        (spec,)
-                    )
-
-        # process a Requirement
-        dist = self.best_match(spec,[])
-        if dist is not None:
-            return self.download(dist.path, tmpdir)
-
-        return None
-
-
-
-    dl_blocksize = 8192
-    
-    def _download_to(self, url, filename):
-        # Download the file
-        fp, tfp = None, None
-        try:
-            fp = self.open_url(url)
-            if isinstance(fp, urllib2.HTTPError):
-                raise RuntimeError(
-                    "Can't download %s: %s %s" % (url, fp.code,fp.msg)
-                )
-
-            headers = fp.info()
-            blocknum = 0
-            bs = self.dl_blocksize
-            size = -1
-
-            if "content-length" in headers:
-                size = int(headers["Content-Length"])
-                self.reporthook(url, filename, blocknum, bs, size)
-
-            tfp = open(filename,'wb')      
-            while True:
-                block = fp.read(bs)
-                if block:
-                    tfp.write(block)
-                    blocknum += 1
-                    self.reporthook(url, filename, blocknum, bs, size)
-                else:
-                    break
-            return headers
-
-        finally:
-            if fp: fp.close()
-            if tfp: tfp.close()
-
-    def reporthook(self, url, filename, blocknum, blksize, size):
-        pass    # no-op
-
-
-
-    def open_url(self, url):
-        try:
-            return urllib2.urlopen(url)
-        except urllib2.HTTPError, v:
-            return v
-        except urllib2.URLError, v:
-            raise RuntimeError("Download error: %s" % v.reason)
-
-
-    def _download_url(self, scheme, url, tmpdir):
-
-        # Determine download filename
-        #
-        name = filter(None,urlparse.urlparse(url)[2].split('/'))
-        if name:
-            name = name[-1]
-            while '..' in name:
-                name = name.replace('..','.').replace('\\','_')
-        else:
-            name = "__downloaded__"    # default if URL has no path contents
-
-        filename = os.path.join(tmpdir,name)
-
-        # Download the file
-        #
-        if scheme=='svn' or scheme.startswith('svn+'):
-            return self._download_svn(url, filename)
-        else:
-            headers = self._download_to(url, filename)
-            if 'html' in headers['content-type'].lower():
-                return self._download_html(url, headers, filename, tmpdir)
-            else:
-                return filename
-
-
-
-
-
-
-
-
-    def _download_html(self, url, headers, filename, tmpdir):
-        # Check for a sourceforge URL
-        sf_url = url.startswith('http://prdownloads.')
-        file = open(filename)
-        for line in file:
-            if line.strip():
-                # Check for a subversion index page
-                if re.search(r'<title>Revision \d+:', line):
-                    # it's a subversion index page:
-                    file.close()
-                    os.unlink(filename)
-                    return self._download_svn(url, filename)
-                # Check for a SourceForge header
-                elif sf_url:
-                    if re.search(r'^<HTML><HEAD>', line, re.I):
-                        continue    # skip first line
-                    elif re.search(r'<TITLE>Select a Mirror for File:',line):
-                        # Sourceforge mirror page
-                        page = file.read()
-                        file.close()
-                        os.unlink(filename)
-                        return self._download_sourceforge(url, page, tmpdir)
-                break   # not an index page
-        file.close()
-        raise RuntimeError("Unexpected HTML page found at "+url)
-
-
-    def _download_svn(self, url, filename):
-        os.system("svn checkout -q %s %s" % (url, filename))
-        return filename
-

+from setuptools.archive_util import unpack_archive
+from setuptools.package_index import PackageIndex
+from pkg_resources import *



@@ -326,37 +29,6 @@ class PackageIndex(AvailableDistributions):



-    def _download_sourceforge(self, source_url, sf_page, tmpdir):
-        """Download package from randomly-selected SourceForge mirror"""
-
-        mirror_regex = re.compile(r'HREF=(/.*?\?use_mirror=[^>]*)')
-        urls = [m.group(1) for m in mirror_regex.finditer(sf_page)]
-        if not urls:
-            raise RuntimeError(
-                "URL looks like a Sourceforge mirror page, but no URLs found"
-            )
-
-        import random
-        url = urlparse.urljoin(source_url, random.choice(urls))
-        f = self.open_url(url)
-        match = re.search(
-            r'<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"',
-            f.read()
-        )
-        f.close()
-
-        if match:
-            download_url = match.group(1)
-            scheme = URL_SCHEME(download_url)
-            return self._download_url(scheme.group(1), download_url, tmpdir)
-        else:
-            raise RuntimeError(
-                'No META HTTP-EQUIV="refresh" found in Sourceforge page at %s'
-                % url
-            )
-
-
-



@@ -572,8 +244,6 @@ class PthDistributions(AvailableDistributions):



-URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
-
 def main(argv, installer_type=Installer, index_type=PackageIndex):

    from optparse import OptionParser
@@ -613,6 +283,8 @@ def main(argv, installer_type=Installer, index_type=PackageIndex):



+
+
    def alloc_tmp():
        if options.tmpdir is None:
            return tempfile.mkdtemp(prefix="easy_install-")

--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
+"""PyPI and direct package downloading"""
+
+import sys, os.path, re, urlparse, urllib2
+from pkg_resources import *
+
+HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
+URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
+EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
+
+__all__ = [
+    'PackageIndex', 'distros_for_url', 
+]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def distros_for_url(url, metadata=None):
+    """Yield egg or source distribution objects that might be found at a URL"""
+
+    path = urlparse.urlparse(url)[2]
+    base = urllib2.unquote(path.split('/')[-1])
+
+    if base.endswith('.egg'):
+        dist = Distribution.from_filename(base, metadata)
+        dist.path = url
+        yield dist
+        return  # only one, unambiguous interpretation
+
+    for ext in EXTENSIONS:
+        if base.endswith(ext):
+            base = base[:-len(ext)]
+            break
+    else:
+        return  # no extension matched
+
+    # Generate alternative interpretations of a source distro name
+    # Because some packages are ambiguous as to name/versions split
+    # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
+    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
+    # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
+    # the spurious interpretations should be ignored, because in the event
+    # there's also an "adns" package, the spurious "python-1.1.0" version will
+    # compare lower than any numeric version number, and is therefore unlikely
+    # to match a request for it.  It's still a potential problem, though, and
+    # in the long run PyPI and the distutils should go for "safe" names and
+    # versions in distribution archive names (sdist and bdist).
+
+    parts = base.split('-')
+    for p in range(1,len(parts)+1):
+        yield Distribution(
+            url, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
+            distro_type = SOURCE_DIST
+        )
+
+
+
+
+class PackageIndex(AvailableDistributions):
+    """A distribution index that scans web pages for download URLs"""
+
+    def __init__(self,index_url="http://www.python.org/pypi",*args,**kw):
+        AvailableDistributions.__init__(self,*args,**kw)
+        self.index_url = index_url + "/"[:not index_url.endswith('/')]
+        self.scanned_urls = {}
+        self.fetched_urls = {}
+        self.package_pages = {}
+
+    def scan_url(self, url):
+        self.process_url(url, True)
+
+    def process_url(self, url, retrieve=False):
+        if url in self.scanned_urls and not retrieve:
+            return
+
+        self.scanned_urls[url] = True
+        dists = list(distros_for_url(url))
+        map(self.add, dists)
+
+        if dists or not retrieve or url in self.fetched_urls:
+            # don't need the actual page
+            return
+
+        f = self.open_url(url)
+        self.fetched_urls[url] = self.fetched_urls[f.url] = True
+        if 'html' not in f.headers['content-type'].lower():
+            f.close()   # not html, we can't process it
+            return
+
+        base = f.url     # handle redirects
+        page = f.read()
+        f.close()
+        if url.startswith(self.index_url):
+            self.process_index(url, page)
+        else:
+            for match in HREF.finditer(page):
+                link = urlparse.urljoin(base, match.group(1))
+                self.process_url(link)
+
+    def find_packages(self,requirement):
+        self.scan_url(self.index_url + requirement.distname)
+        if not self.package_pages.get(requirement.key):
+            # We couldn't find the target package, so search the index page too
+            self.scan_url(self.index_url)
+        for url in self.package_pages.get(requirement.key,()):
+            # scan each page that might be related to the desired package
+            self.scan_url(url)
+
+    def process_index(self,url,page):
+        def scan(link):
+            if link.startswith(self.index_url):
+                parts = map(
+                    urllib2.unquote, link[len(self.index_url):].split('/')
+                )
+                if len(parts)==2:
+                    # it's a package page, sanitize and index it
+                    pkg = safe_name(parts[0])
+                    ver = safe_version(parts[1])
+                    self.package_pages.setdefault(pkg.lower(),{})[link] = True
+        if url==self.index_url or 'Index of Packages</title>' in page:
+            # process an index page into the package-page index
+            for match in HREF.finditer(page):
+                scan( urlparse.urljoin(url, match.group(1)) )
+        else:
+            scan(url)   # ensure this page is in the page index
+            # process individual package page
+            for tag in ("<th>Home Page", "<th>Download URL"):
+                pos = page.find(tag)
+                if pos!=-1:
+                    match = HREF.search(page,pos)
+                    if match:
+                        # Process the found URL
+                        self.scan_url(urlparse.urljoin(url, match.group(1)))
+
+    def obtain(self,requirement):
+        self.find_packages(requirement)
+        for dist in self.get(requirement.key, ()):
+            if dist in requirement:
+                return dist
+
+    def download(self, spec, tmpdir):
+        """Locate and/or download `spec`, returning a local filename
+
+        `spec` may be a ``Requirement`` object, or a string containing a URL,
+        an existing local filename, or a package/version requirement spec
+        (i.e. the string form of a ``Requirement`` object).
+
+        If necessary, the requirement is searched for in the package index.
+        If the download is successful, the return value is a local file path,
+        and it is a subpath of `tmpdir` if the distribution had to be
+        downloaded.  If no matching distribution is found, return ``None``.
+        Various errors may be raised if a problem occurs during downloading.
+        """
+
+        if not isinstance(spec,Requirement):
+            scheme = URL_SCHEME(spec)
+            if scheme:
+                # It's a url, download it to tmpdir
+                return self._download_url(scheme.group(1), spec, tmpdir)
+
+            elif os.path.exists(spec):
+                # Existing file or directory, just return it
+                return spec
+            else:
+                try:
+                    spec = Requirement.parse(spec)
+                except ValueError:
+                    raise RuntimeError(
+                        "Not a URL, existing file, or requirement spec: %r" %
+                        (spec,)
+                    )
+
+        # process a Requirement
+        dist = self.best_match(spec,[])
+        if dist is not None:
+            return self.download(dist.path, tmpdir)
+
+        return None
+
+
+
+    dl_blocksize = 8192
+
+    def _download_to(self, url, filename):
+        # Download the file
+        fp, tfp = None, None
+        try:
+            fp = self.open_url(url)
+            if isinstance(fp, urllib2.HTTPError):
+                raise RuntimeError(
+                    "Can't download %s: %s %s" % (url, fp.code,fp.msg)
+                )
+
+            headers = fp.info()
+            blocknum = 0
+            bs = self.dl_blocksize
+            size = -1
+
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+                self.reporthook(url, filename, blocknum, bs, size)
+
+            tfp = open(filename,'wb')
+            while True:
+                block = fp.read(bs)
+                if block:
+                    tfp.write(block)
+                    blocknum += 1
+                    self.reporthook(url, filename, blocknum, bs, size)
+                else:
+                    break
+            return headers
+
+        finally:
+            if fp: fp.close()
+            if tfp: tfp.close()
+
+    def reporthook(self, url, filename, blocknum, blksize, size):
+        pass    # no-op
+
+
+
+    def open_url(self, url):
+        try:
+            return urllib2.urlopen(url)
+        except urllib2.HTTPError, v:
+            return v
+        except urllib2.URLError, v:
+            raise RuntimeError("Download error: %s" % v.reason)
+
+
+    def _download_url(self, scheme, url, tmpdir):
+
+        # Determine download filename
+        #
+        name = filter(None,urlparse.urlparse(url)[2].split('/'))
+        if name:
+            name = name[-1]
+            while '..' in name:
+                name = name.replace('..','.').replace('\\','_')
+        else:
+            name = "__downloaded__"    # default if URL has no path contents
+
+        filename = os.path.join(tmpdir,name)
+
+        # Download the file
+        #
+        if scheme=='svn' or scheme.startswith('svn+'):
+            return self._download_svn(url, filename)
+        else:
+            headers = self._download_to(url, filename)
+            if 'html' in headers['content-type'].lower():
+                return self._download_html(url, headers, filename, tmpdir)
+            else:
+                return filename
+
+
+
+
+
+
+
+
+    def _download_html(self, url, headers, filename, tmpdir):
+        # Check for a sourceforge URL
+        sf_url = url.startswith('http://prdownloads.')
+        file = open(filename)
+        for line in file:
+            if line.strip():
+                # Check for a subversion index page
+                if re.search(r'<title>Revision \d+:', line):
+                    # it's a subversion index page:
+                    file.close()
+                    os.unlink(filename)
+                    return self._download_svn(url, filename)
+                # Check for a SourceForge header
+                elif sf_url:
+                    if re.search(r'^<HTML><HEAD>', line, re.I):
+                        continue    # skip first line
+                    elif re.search(r'<TITLE>Select a Mirror for File:',line):
+                        # Sourceforge mirror page
+                        page = file.read()
+                        file.close()
+                        os.unlink(filename)
+                        return self._download_sourceforge(url, page, tmpdir)
+                break   # not an index page
+        file.close()
+        raise RuntimeError("Unexpected HTML page found at "+url)
+
+
+    def _download_svn(self, url, filename):
+        os.system("svn checkout -q %s %s" % (url, filename))
+        return filename
+
+
+
+
+
+
+
+
+
+
+
+    def _download_sourceforge(self, source_url, sf_page, tmpdir):
+        """Download package from randomly-selected SourceForge mirror"""
+
+        mirror_regex = re.compile(r'HREF=(/.*?\?use_mirror=[^>]*)')
+        urls = [m.group(1) for m in mirror_regex.finditer(sf_page)]
+        if not urls:
+            raise RuntimeError(
+                "URL looks like a Sourceforge mirror page, but no URLs found"
+            )
+
+        import random
+        url = urlparse.urljoin(source_url, random.choice(urls))
+        f = self.open_url(url)
+        match = re.search(
+            r'<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"',
+            f.read()
+        )
+        f.close()
+
+        if match:
+            download_url = match.group(1)
+            scheme = URL_SCHEME(download_url)
+            return self._download_url(scheme.group(1), download_url, tmpdir)
+        else:
+            raise RuntimeError(
+                'No META HTTP-EQUIV="refresh" found in Sourceforge page at %s'
+                % url
+            )
+
+
+
+
+
+
+
+
+
+
+
+
+