Commit 8618cfa8 authored by PJ Eby's avatar PJ Eby

Fix eager resource extraction. Add eager_resources setup() argument. Add

support for obtaining project-level resources by making get_provider()
accept Requirement objects.

--HG--
branch : setuptools
extra : convert_revision : svn%3A6015fed2-1504-0410-9fe1-9d1591cc4771/sandbox/trunk/setuptools%4041151
parent 68b9a791
......@@ -25,6 +25,7 @@ __all__ = [
'safe_name', 'safe_version', 'run_main', 'BINARY_DIST', 'run_script',
'get_default_cache', 'EmptyProvider', 'empty_provider', 'normalize_path',
'WorkingSet', 'working_set', 'add_activation_listener', 'CHECKOUT_DIST',
'list_resources', 'resource_exists', 'resource_isdir',
]
import sys, os, zipimport, time, re, imp
......@@ -38,7 +39,6 @@ from sets import ImmutableSet
class ResolutionError(Exception):
"""Abstract base for dependency resolution errors"""
......@@ -68,18 +68,18 @@ def register_loader_type(loader_type, provider_factory):
"""
_provider_factories[loader_type] = provider_factory
def get_provider(moduleName):
"""Return an IResourceProvider for the named module"""
def get_provider(moduleOrReq):
"""Return an IResourceProvider for the named module or requirement"""
if isinstance(moduleOrReq,Requirement):
return working_set.find(moduleOrReq) or require(str(moduleOrReq))[0]
try:
module = sys.modules[moduleName]
module = sys.modules[moduleOrReq]
except KeyError:
__import__(moduleName)
module = sys.modules[moduleName]
__import__(moduleOrReq)
module = sys.modules[moduleOrReq]
loader = getattr(module, '__loader__', None)
return _find_adapter(_provider_factories, loader)(module)
def _macosx_vers(_cache=[]):
if not _cache:
info = os.popen('/usr/bin/sw_vers').read().splitlines()
......@@ -627,7 +627,7 @@ class ResourceManager:
def resource_isdir(self, package_name, resource_name):
"""Does the named resource exist in the named package?"""
return get_provider(package_name).resource_isdir(self, resource_name)
return get_provider(package_name).resource_isdir(resource_name)
def resource_filename(self, package_name, resource_name):
"""Return a true filesystem path for specified resource"""
......@@ -648,7 +648,7 @@ class ResourceManager:
)
def list_resources(self, package_name, resource_name):
return get_provider(package_name).resource_listdir(self, resource_name)
return get_provider(package_name).resource_listdir(resource_name)
......@@ -913,8 +913,8 @@ class NullProvider:
register_loader_type(object, NullProvider)
class DefaultProvider(NullProvider):
"""Provides access to package resources in the filesystem"""
class EggProvider(NullProvider):
"""Provider based on a virtual filesystem"""
def __init__(self,module):
NullProvider.__init__(self,module)
......@@ -925,22 +925,28 @@ class DefaultProvider(NullProvider):
# of multiple eggs; that's why we use module_path instead of .archive
path = self.module_path
old = None
self.prefix = []
while path!=old:
if path.lower().endswith('.egg'):
self.egg_name = os.path.basename(path)
self.egg_info = os.path.join(path, 'EGG-INFO')
self.egg_root = path
break
old = path
path, base = os.path.split(path)
self.prefix.append(base)
self.prefix.reverse()
def _has(self, path):
return os.path.exists(path)
class DefaultProvider(EggProvider):
"""Provides access to package resources in the filesystem"""
def _has(self, path):
return os.path.exists(path)
def _isdir(self,path):
return os.path.isdir(path)
......@@ -976,67 +982,63 @@ empty_provider = EmptyProvider()
class ZipProvider(DefaultProvider):
class ZipProvider(EggProvider):
"""Resource support for zips and eggs"""
eagers = None
def __init__(self, module):
DefaultProvider.__init__(self,module)
EggProvider.__init__(self,module)
self.zipinfo = zipimport._zip_directory_cache[self.loader.archive]
self.zip_pre = self.loader.archive+os.sep
def _short_name(self, path):
if path.startswith(self.zip_pre):
return path[len(self.zip_pre):]
return path
def _zipinfo_name(self, fspath):
# Convert a virtual filename (full path to file) into a zipfile subpath
# usable with the zipimport directory cache for our target archive
if fspath.startswith(self.zip_pre):
return fspath[len(self.zip_pre):]
raise AssertionError(
"%s is not a subpath of %s" % (fspath,self.zip_pre)
)
def get_resource_stream(self, manager, resource_name):
return StringIO(self.get_resource_string(manager, resource_name))
def _parts(self,zip_path):
# Convert a zipfile subpath into an egg-relative path part list
fspath = self.zip_pre+zip_path # pseudo-fs path
if fspath.startswith(self.egg_root+os.sep):
return fspath[len(self.egg_root)+1:].split(os.sep)
raise AssertionError(
"%s is not a subpath of %s" % (fspath,self.egg_root)
)
def get_resource_filename(self, manager, resource_name):
if not self.egg_name:
raise NotImplementedError(
"resource_filename() only supported for .egg, not .zip"
)
# no need to lock for extraction, since we use temp names
zip_path = self._resource_to_zip(resource_name)
eagers = self._get_eager_resources()
if resource_name in eagers:
if '/'.join(self._parts(zip_path)) in eagers:
for name in eagers:
self._extract_resource(manager, name)
return self._extract_resource(manager, resource_name)
def _extract_directory(self, manager, resource_name):
if resource_name.endswith('/'):
resource_name = resource_name[:-1]
for resource in self.resource_listdir(resource_name):
last = self._extract_resource(manager, resource_name+'/'+resource)
return os.path.dirname(last) # return the directory path
def _extract_resource(self, manager, resource_name):
if self.resource_isdir(resource_name):
return self._extract_directory(manager, resource_name)
self._extract_resource(manager, self._eager_to_zip(name))
return self._extract_resource(manager, zip_path)
def _extract_resource(self, manager, zip_path):
if zip_path in self._index():
for name in self._index()[zip_path]:
last = self._extract_resource(
manager, os.path.join(zip_path, name)
)
return os.path.dirname(last) # return the extracted directory name
parts = resource_name.split('/')
zip_path = os.path.join(self.module_path, *parts)
zip_stat = self.zipinfo[os.path.join(*self.prefix+parts)]
zip_stat = self.zipinfo[zip_path]
t,d,size = zip_stat[5], zip_stat[6], zip_stat[3]
date_time = (
(d>>9)+1980, (d>>5)&0xF, d&0x1F, # ymd
(t&0xFFFF)>>11, (t>>5)&0x3F, (t&0x1F) * 2, 0, 0, -1 # hms, etc.
)
timestamp = time.mktime(date_time)
real_path = manager.get_cache_path(self.egg_name, self.prefix+parts)
real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path))
if os.path.isfile(real_path):
stat = os.stat(real_path)
......@@ -1060,10 +1062,8 @@ class ZipProvider(DefaultProvider):
# so we're done
return real_path
raise
return real_path
def _get_eager_resources(self):
if self.eagers is None:
eagers = []
......@@ -1077,12 +1077,9 @@ class ZipProvider(DefaultProvider):
try:
return self._dirindex
except AttributeError:
ind = {}; skip = len(self.prefix)
ind = {}
for path in self.zipinfo:
parts = path.split(os.sep)
if parts[:skip] != self.prefix:
continue # only include items under our prefix
parts = parts[skip:] # but don't include prefix in paths
while parts:
parent = '/'.join(parts[:-1])
if parent in ind:
......@@ -1093,26 +1090,26 @@ class ZipProvider(DefaultProvider):
self._dirindex = ind
return ind
def _has(self, path):
return self._short_name(path) in self.zipinfo or self._isdir(path)
def _has(self, fspath):
zip_path = self._zipinfo_name(fspath)
return zip_path in self.zipinfo or zip_path in self._index()
def _isdir(self,path):
return self._dir_name(path) in self._index()
def _isdir(self,fspath):
return self._zipinfo_name(fspath) in self._index()
def _listdir(self,path):
return list(self._index().get(self._dir_name(path), ()))
def _listdir(self,fspath):
return list(self._index().get(self._zipinfo_name(fspath), ()))
def _dir_name(self,path):
if path.startswith(self.module_path+os.sep):
path = path[len(self.module_path+os.sep):]
path = path.replace(os.sep,'/')
if path.endswith('/'): path = path[:-1]
return path
_get = NullProvider._get
def _eager_to_zip(self,resource_name):
return self._zipinfo_name(self._fn(self.egg_root,resource_name))
def _resource_to_zip(self,resource_name):
return self._zipinfo_name(self._fn(self.module_path,resource_name))
register_loader_type(zipimport.zipimporter, ZipProvider)
......@@ -1142,6 +1139,9 @@ register_loader_type(zipimport.zipimporter, ZipProvider)
......
......@@ -180,6 +180,22 @@ unless you need the associated ``setuptools`` feature.
does not contain any code. See the section below on `Namespace Packages`_
for more information.
``eager_resources``
A list of strings naming resources that should be extracted together, if
any of them is needed, or if any C extensions included in the project are
imported. This argument is only useful if the project will be installed as
a zipfile, and there is a need to have all of the listed resources be
extracted to the filesystem *as a unit*. Resources listed here
should be '/'-separated paths, relative to the source root, so to list a
resource ``foo.png`` in package ``bar.baz``, you would include the string
``bar/baz/foo.png`` in this argument.
If you only need to obtain resources one at a time, or you don't have any C
extensions that access other files in the project (such as data files or
shared libraries), you probably do NOT need this argument and shouldn't
mess with it. For more details on how this argument works, see the section
below on `Automatic Resource Extraction`_.
Using ``find_packages()``
-------------------------
......@@ -414,6 +430,7 @@ python.org website.)
__ http://docs.python.org/dist/node11.html
Accessing Data Files at Runtime
-------------------------------
......@@ -432,6 +449,76 @@ a quick example of converting code that uses ``__file__`` to use
.. _Accessing Package Resources: http://peak.telecommunity.com/DevCenter/PythonEggs#accessing-package-resources
Non-Package Data Files
----------------------
The ``distutils`` normally install general "data files" to a platform-specific
location (e.g. ``/usr/share``). This feature intended to be used for things
like documentation, example configuration files, and the like. ``setuptools``
does not install these data files in a separate location, however. They are
bundled inside the egg file or directory, alongside the Python modules and
packages. The data files can also be accessed using the `Resource Management
API`_, by specifying a ``Requirement`` instead of a package name::
from pkg_resources import Requirement, resource_filename
filename = resource_filename(Requirement.parse("MyProject"),"sample.conf")
The above code will obtain the filename of the "sample.conf" file in the data
root of the "MyProject" distribution.
Note, by the way, that this encapsulation of data files means that you can't
actually install data files to some arbitrary location on a user's machine;
this is a feature, not a bug. You can always include a script in your
distribution that extracts and copies your the documentation or data files to
a user-specified location, at their discretion. If you put related data files
in a single directory, you can use ``resource_filename()`` with the directory
name to get a filesystem directory that then can be copied with the ``shutil``
module. (Even if your package is installed as a zipfile, calling
``resource_filename()`` on a directory will return an actual filesystem
directory, whose contents will be that entire subtree of your distribution.)
(Of course, if you're writing a new package, you can just as easily place your
data files or directories inside one of your packages, rather than using the
distutils' approach. However, if you're updating an existing application, it
may be simpler not to change the way it currently specifies these data files.)
Automatic Resource Extraction
-----------------------------
If you are using tools that expect your resources to be "real" files, or your
project includes non-extension native libraries or other files that your C
extensions expect to be able to access, you may need to list those files in
the ``eager_resources`` argument to ``setup()``, so that the files will be
extracted together, whenever a C extension in the project is imported. This
is especially important if your project includes shared libraries *other* than
distutils-built C extensions. Those shared libraries should be listed as
``eager_resources``, because they need to be present in the filesystem when the
C extensions that link to them are used.
The ``pkg_resources`` runtime for compressed packages will automatically
extract *all* C extensions and ``eager_resources`` at the same time, whenever
*any* C extension or eager resource is requested via the ``resource_filename()``
API. (C extensions are imported using ``resource_filename()`` internally.)
This ensures that C extensions will see all of the "real" files that they
expect to see.
Note also that you can list directory resource names in ``eager_resources`` as
well, in which case the directory's contents (including subdirectories) will be
extracted whenever any C extension or eager resource is requested.
Please note that if you're not sure whether you need to use this argument, you
don't! It's really intended to support projects with lots of non-Python
dependencies and as a last resort for crufty projects that can't otherwise
handle being compressed. If your package is pure Python, Python plus data
files, or Python plus C, you really don't need this. You've got to be using
either C or an external program that needs "real" files in your project before
there's any possibility of ``eager_resources`` being relevant to your project.
"Development Mode"
==================
......@@ -1397,13 +1484,31 @@ Release Notes/Change History
latest revision number; it was using the revision number of the directory
containing ``setup.py``, not the highest revision number in the project.
* Added ``eager_resources`` setup argument
* Fixed some problems using ``pkg_resources`` w/PEP 302 loaders other than
``zipimport``.
``zipimport``, and the previously-broken "eager resource" support.
* Fixed ``pkg_resources.resource_exists()`` not working correctly, along with
some other resource API bugs.
* Fixed ``pkg_resources.resource_exists()`` not working correctly.
* Many ``pkg_resources`` API changes and enhancements:
* Resource API functions like ``resource_string()`` that accepted a package
name and resource name, will now also accept a ``Requirement`` object in
place of the package name (to allow access to non-package data files in
an egg).
* ``get_provider()`` will now accept a ``Requirement`` instance or a module
name. If it is given a ``Requirement``, it will return a corresponding
``Distribution`` (by calling ``require()`` if a suitable distribution
isn't already in the working set), rather than returning a metadata and
resource provider for a specific module. (The difference is in how
resource paths are interpreted; supplying a module name means resources
path will be module-relative, rather than relative to the distribution's
root.)
* ``Distribution`` objects now implement the ``IResourceProvider`` and
``IMetadataProvider`` interfaces, so you don't need to reference the (no
longer available) ``metadata`` attribute to get at these interfaces.
......
......@@ -96,13 +96,13 @@ class egg_info(Command):
finally:
metadata.name, metadata.version = oldname, oldver
self.write_namespace_packages()
self.write_requirements()
self.write_toplevel_names()
self.write_or_delete_dist_arg('namespace_packages')
self.write_or_delete_dist_arg('eager_resources')
if os.path.exists(os.path.join(self.egg_info,'depends.txt')):
log.warn(
"WARNING: 'depends.txt' will not be used by setuptools 0.6!\n"
"WARNING: 'depends.txt' is not used by setuptools 0.6!\n"
"Use the install_requires/extras_require setup() args instead."
)
......@@ -162,18 +162,19 @@ class egg_info(Command):
def write_namespace_packages(self):
nsp = getattr(self.distribution,'namespace_packages',None)
if nsp is None:
def write_or_delete_dist_arg(self, argname, filename=None):
value = getattr(self.distribution, argname, None)
if value is None:
return
filename = os.path.join(self.egg_info,"namespace_packages.txt")
filename = filename or argname+'.txt'
filename = os.path.join(self.egg_info,filename)
if nsp:
if value:
log.info("writing %s", filename)
if not self.dry_run:
f = open(filename, 'wt')
f.write('\n'.join(nsp))
f.write('\n'.join(value))
f.write('\n')
f.close()
......@@ -200,6 +201,5 @@ class egg_info(Command):
......@@ -92,6 +92,7 @@ class Distribution(_Distribution):
self.dist_files = []
self.zip_safe = None
self.namespace_packages = None
self.eager_resources = None
_Distribution.__init__(self,attrs)
if not have_package_data:
from setuptools.command.build_py import build_py
......@@ -120,16 +121,18 @@ class Distribution(_Distribution):
def finalize_options(self):
_Distribution.finalize_options(self)
if self.features:
self._set_global_opts_from_features()
if self.extra_path:
raise DistutilsSetupError(
"The 'extra_path' parameter is not needed when using "
"setuptools. Please remove it from your setup script."
)
try:
list(pkg_resources.parse_requirements(self.install_requires))
except (TypeError,ValueError):
......@@ -137,6 +140,7 @@ class Distribution(_Distribution):
"'install_requires' must be a string or list of strings "
"containing valid project/version requirement specifiers"
)
try:
for k,v in self.extras_require.items():
list(pkg_resources.parse_requirements(v))
......@@ -146,14 +150,19 @@ class Distribution(_Distribution):
"strings or lists of strings containing valid project/version "
"requirement specifiers."
)
if self.namespace_packages is not None:
for attr in 'namespace_packages','eager_resources':
value = getattr(self,attr,None)
if value is not None:
try:
assert ''.join(self.namespace_packages)!=self.namespace_packages
assert ''.join(value)!=value
except (TypeError,ValueError,AttributeError,AssertionError):
raise DistutilsSetupError(
"'namespace_packages' must be a sequence of strings"
"%r must be a list of strings (got %r)" % (attr,value)
)
for nsp in self.namespace_packages:
for nsp in self.namespace_packages or ():
for name in iter_distribution_names(self):
if name.startswith(nsp+'.'): break
else:
......@@ -186,6 +195,14 @@ class Distribution(_Distribution):
self.global_options = self.feature_options = go + self.global_options
self.negative_opt = self.feature_negopt = no
def _finalize_features(self):
"""Add/remove features and resolve dependencies between them"""
......@@ -203,6 +220,30 @@ class Distribution(_Distribution):
feature.exclude_from(self)
self._set_feature(name,0)
def _set_feature(self,name,status):
"""Set feature's inclusion status"""
setattr(self,self._feature_attrname(name),status)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment