Commit cf1cb5ba authored by Jurko Gospodnetić's avatar Jurko Gospodnetić

pkg_resources.Environment.__getitem__() code cleanup

Better commented the code, especially the result caching & lazy distribution
list sorting strategies implemented there. Added several TODO comments
indicating things to look into there in the future.

General code cleanup:
 - stopped reusing the project_name variable for different purposes
 - added an assertion to detect corrupt distribution list caches

--HG--
extra : source : 86ef8f59ef4e1fc253c155b9ca0856497455372d
parent 0f0c8924
......@@ -811,30 +811,70 @@ class Environment(object):
for dist in find_distributions(item):
self.add(dist)
def __getitem__(self,project_name):
def __getitem__(self, project_name):
"""Return a newest-to-oldest list of distributions for `project_name`
Uses case-insensitive `project_name` comparison, assuming all the
project's distributions use their project's name converted to all
lowercase as their key.
"""
# Result caching here serves two purposes:
# 1. Speed up the project_name --> distribution list lookup.
# 2. 'First access' flag indicating the distribution list requires
# sorting before it can be returned to the user.
#TODO: This caching smells like premature optimization. It could be
# that the distribution list lookup speed is not really affected by
# this, in which case the whole cache could be removed and replaced
# with a single 'dist_list_sorted' flag. This seems strongly indicated
# by the fact that this function does not really cache the distribution
# list under the given project name but only under its canonical
# distribution key variant. That means that repeated access using a non
# canonical project name does not get any speedup at all.
try:
return self._cache[project_name]
except KeyError:
project_name = project_name.lower()
if project_name not in self._distmap:
return []
pass
if project_name not in self._cache:
dists = self._cache[project_name] = self._distmap[project_name]
# We expect all distribution keys to contain lower-case characters
# only.
#TODO: See if this expectation can be implemented better, e.g. by using
# some sort of a name --> key conversion function on the Distribution
# class or something similar.
#TODO: This requires all classes derived from Distribution to use
# lower-case only keys even if they do not calculate them from the
# project's name. It might be better to make this function simpler by
# passing it the the exact distribution key as a parameter and have the
# caller convert a `project_name` to its corresponding distribution key
# as needed.
distribution_key = project_name.lower()
try:
dists = self._distmap[distribution_key]
except KeyError:
return []
# Sort the project's distribution list lazily on first access.
if distribution_key not in self._cache:
self._cache[distribution_key] = dists
_sort_dists(dists)
return self._cache[project_name]
return dists
def add(self,dist):
"""Add `dist` if we ``can_add()`` it and it isn't already added"""
def add(self, dist):
"""Add `dist` if we ``can_add()`` it and it has not already been added
"""
if self.can_add(dist) and dist.has_version():
dists = self._distmap.setdefault(dist.key,[])
dists = self._distmap.setdefault(dist.key, [])
if dist not in dists:
dists.append(dist)
if dist.key in self._cache:
_sort_dists(self._cache[dist.key])
cached_dists = self._cache.get(dist.key)
if cached_dists:
# The distribution list has been cached on first access,
# therefore we know it has already been sorted lazily and
# we are expected to keep it in sorted order.
_sort_dists(dists)
assert cached_dists is dists, \
"Distribution list cache corrupt."
def best_match(self, req, working_set, installer=None):
"""Find distribution best matching `req` and usable on `working_set`
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment