Commit a340cb9d authored by Jeremy Hylton's avatar Jeremy Hylton

Many small cleanups and simplifications.

_indexedSearch():

    Simplify logic that called _apply_index() for each index in the
    catalog.  The if statement under the comment "Optimization" had
    identical code on either branch.  Perhaps the odd indentation made
    this confusing.  Regardless, remove the conditional.

    Change computation of normalized scores to multiply first, then
    divide.  Use literal 100. to make sure mult and div are floating
    point ops.

searchResults():

    Simplify logic at beginning of searchResults().  The first two
    conditionals depended on kw, so organize the logic to make that
    clearer.

    Write helper method to find "sort-on" and "sort-index" instead of
    duplicating code in searchResults().

    For case were results are sorted, simplify construction of the
    final LazyCat and make it more efficient to boot.  Instead of use
    a list comprehension and a reduce + lambda to construct list and
    length of contained lists, do it with one explicit for loop that
    constructs both values.

        Note: I did detailed timing stats on three ways to compute the
        length of a sequence of sequences.  reduce + lambda was the
        slowest.  For short lists, an explicit for loop is fastest.
        For long lists, reduce(operater.add, map(len, list)) is
        fastest.  The explicit for loop is big win here, because we've
        got to walk over the elements anyway to undo the Schwarzian
        transform.

Sundry:

Use getattr() with default value of None in preference to hasattr()
followed by getattr().  This gets the same result with half the work.

Changes for consistent and frequent use of whitespace.

Use types.StringType and isinstance() to test for strings.
parent 8b2a64dc
...@@ -29,7 +29,7 @@ from BTrees.IOBTree import IOBTree ...@@ -29,7 +29,7 @@ from BTrees.IOBTree import IOBTree
import BTrees.Length import BTrees.Length
from Products.PluginIndexes.common.randid import randid from Products.PluginIndexes.common.randid import randid
import time, sys import time, sys, types
class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
""" An Object Catalog """ An Object Catalog
...@@ -67,7 +67,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -67,7 +67,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# we instantiate a lexicon to be shared by all text indexes. # we instantiate a lexicon to be shared by all text indexes.
# This may change. # This may change.
if type(vocabulary) is type(''): if isinstance(vocabulary, types.StringType):
self.lexicon = vocabulary self.lexicon = vocabulary
else: else:
self.lexicon = Lexicon() self.lexicon = Lexicon()
...@@ -116,7 +116,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -116,7 +116,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
index._convertBTrees(threshold) index._convertBTrees(threshold)
lexicon=self.lexicon lexicon=self.lexicon
if type(lexicon) is type(''): if isistance(lexicon, types.StringType):
lexicon=getattr(self, lexicon).lexicon lexicon=getattr(self, lexicon).lexicon
lexicon._convertBTrees(threshold) lexicon._convertBTrees(threshold)
...@@ -271,7 +271,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -271,7 +271,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
indexes = self.indexes indexes = self.indexes
if type(index_type) == type(''): if isinstance(index_type, types.StringType):
raise TypeError,"""Catalog addIndex now requires the index type to raise TypeError,"""Catalog addIndex now requires the index type to
be resolved prior to adding; create the proper index in the caller.""" be resolved prior to adding; create the proper index in the caller."""
...@@ -446,12 +446,11 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -446,12 +446,11 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
## on below here... Most of this stuff came from ZTables with tweaks. ## on below here... Most of this stuff came from ZTables with tweaks.
## But I worry about :-) ## But I worry about :-)
def _indexedSearch(self, request , sort_index, append, used, optimize): def _indexedSearch(self, request, sort_index, append, used, optimize):
""" """
Iterate through the indexes, applying the query to each one. Iterate through the indexes, applying the query to each one.
""" """
rs = None # resultset
rs = None # resultset
data = self.data data = self.data
# We can optimize queries by only calling index._apply_index() # We can optimize queries by only calling index._apply_index()
...@@ -468,40 +467,35 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -468,40 +467,35 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# what they are looking for (WYGIWYSF - what you get is what # what they are looking for (WYGIWYSF - what you get is what
# you search for). # you search for).
if hasattr(request,'environ'): # we have a request instance if hasattr(request, 'environ'): # we have a request instance
optimize = 0 optimize = 0
if used is None: used={} if used is None:
used = {}
for i in self.indexes.keys(): for i in self.indexes.keys():
index = self.indexes[i].__of__(self) index = self.indexes[i].__of__(self)
if hasattr(index,'_apply_index'): _apply_index = getattr(index, "_apply_index", None)
if _apply_index is None:
r = None continue
r = _apply_index(request)
# Optimization: we check if there is some work for the index.
#
if optimize and request.has_key(index.getId()) :
r=index._apply_index(request)
else:
r=index._apply_index(request)
if r is not None: if r is not None:
r, u = r r, u = r
for name in u: used[name]=1 for name in u:
w, rs = weightedIntersection(rs, r) used[name] = 1
w, rs = weightedIntersection(rs, r)
#assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys') #assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
if rs is None: if rs is None:
# return everything # return everything
if sort_index is None: if sort_index is None:
rs=data.items() rs = data.items()
append(LazyMap(self.instantiate, rs, len(self))) append(LazyMap(self.instantiate, rs, len(self)))
else: else:
self._build_sorted_results(data,sort_index,append) self._build_sorted_results(data, sort_index, append)
elif rs: elif rs:
# this is reached by having an empty result set (ie non-None) # this is reached by having an empty result set (ie non-None)
# XXX Isn't this reached by having a non-empty, non-None set?
if sort_index is None and hasattr(rs, 'values'): if sort_index is None and hasattr(rs, 'values'):
# having a 'values' means we have a data structure with # having a 'values' means we have a data structure with
# scores. Build a new result set, sort it by score, reverse # scores. Build a new result set, sort it by score, reverse
...@@ -511,12 +505,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -511,12 +505,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
rs = [] rs = []
for score, key in rset: for score, key in rset:
# compute normalized scores # compute normalized scores
rs.append(( int((score/max)*100), score, key)) rs.append((int(100. * score / max), score, key))
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
elif sort_index is None and not hasattr(rs, 'values'): elif sort_index is None and not hasattr(rs, 'values'):
# no scores? Just Lazify. # no scores? Just Lazify.
if hasattr(rs, 'keys'): rs=rs.keys() if hasattr(rs, 'keys'):
rs = rs.keys()
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
else: else:
# sort. If there are scores, then this block is not # sort. If there are scores, then this block is not
...@@ -530,7 +525,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -530,7 +525,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
def _build_sorted_results(self,rs,sort_index,append): def _build_sorted_results(self,rs,sort_index,append):
# This function will .append pairs where the first item # This function will .append pairs where the first item
# in the pair is a sort key, and the second item in the # in the pair is a sort key, and the second item in the
# pair is a squence of results which share the same # pair is a sequence of results which share the same
# sort key. Later on the list to which these things # sort key. Later on the list to which these things
# are .append()ed will be .sort()ed, and the first element # are .append()ed will be .sort()ed, and the first element
# of each pair stripped. # of each pair stripped.
...@@ -555,14 +550,15 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -555,14 +550,15 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# keys is much less then the number of results. # keys is much less then the number of results.
intset = _intersection(rs, intset) intset = _intersection(rs, intset)
if intset: if intset:
keys = getattr(intset,'keys',_None) keys = getattr(intset, 'keys', _None)
if keys is not _None: if keys is not _None:
# Is this ever true? # Is this ever true?
intset = keys() intset = keys()
append((k,_lazymap(_self__getitem__, intset))) append((k, _lazymap(_self__getitem__, intset)))
# Note that sort keys are unique. # Note that sort keys are unique.
else: else:
if hasattr(rs, 'keys'): rs=rs.keys() if hasattr(rs, 'keys'):
rs = rs.keys()
_sort_index_keyForDocument = sort_index.keyForDocument _sort_index_keyForDocument = sort_index.keyForDocument
_keyerror = KeyError _keyerror = KeyError
for did in rs: for did in rs:
...@@ -579,72 +575,87 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -579,72 +575,87 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# uniqueness the first element of each pair is # uniqueness the first element of each pair is
# actually a tuple of: # actually a tuple of:
# (real sort key, some unique number) # (real sort key, some unique number)
lm = _lazymap(_self__getitem__,[did]) lm = _lazymap(_self__getitem__, [did])
key = (key,id(lm)) key = key, id(lm)
append((key,lm)) append((key,lm))
def _get_sort_attr(self, attr, kw):
"""Helper function to find sort-on or sort-order."""
# There are three different ways to find the attribute:
# 1. kw[sort-attr]
# 2. self.sort-attr
# 3. kw[sort_attr]
# kw may be a dict or an ExtensionClass MultiMapping, which
# differ in what get() returns with no default value.
name = "sort-%s" % attr
val = kw.get(name, None)
if val is not None:
return val
val = getattr(self, name, None)
if val is not None:
return val
return kw.get("sort_%s" % attr, None)
def searchResults(self, REQUEST=None, used=None, optimize=1, **kw): def searchResults(self, REQUEST=None, used=None, optimize=1, **kw):
# Get search arguments: # Get search arguments:
if REQUEST is None and not kw:
try: REQUEST=self.REQUEST
except AttributeError: pass
if kw: if kw:
if REQUEST: if REQUEST:
m=MultiMapping() m = MultiMapping()
m.push(REQUEST) m.push(REQUEST)
m.push(kw) m.push(kw)
kw=m kw = m
elif REQUEST: kw=REQUEST else:
if REQUEST is None:
try:
REQUEST = self.REQUEST
except AttributeError:
pass
if REQUEST:
kw = REQUEST
# Compute "sort_index", which is a sort index, or none: # Compute "sort_index", which is a sort index, or none:
if kw.has_key('sort-on'): sort_index = self._get_sort_attr("on", kw)
sort_index=kw['sort-on']
elif hasattr(self, 'sort-on'):
sort_index=getattr(self, 'sort-on')
elif kw.has_key('sort_on'):
sort_index=kw['sort_on']
else: sort_index=None
sort_order=''
if sort_index is not None: if sort_index is not None:
if self.indexes.has_key(sort_index): # self.indexes is always a dict, so get() w/ 1 arg works
sort_index=self.indexes[sort_index] sort_index = self.indexes.get(sort_index)
if sort_index is None:
raise CatalogError, ('Unknown sort_on index %s' % sort_index)
else:
if not hasattr(sort_index, 'keyForDocument'): if not hasattr(sort_index, 'keyForDocument'):
raise CatalogError( raise CatalogError(
'The index chosen for sort_on is not capable of being' 'The index chosen for sort_on is not capable of being'
' used as a sort index.' ' used as a sort index.'
) )
else:
raise CatalogError, ('Unknown sort_on index %s' % sort_index)
# Perform searches with indexes and sort_index # Perform searches with indexes and sort_index
r=[] r = []
used = self._indexedSearch(kw, sort_index, r.append, used, optimize)
used=self._indexedSearch(kw, sort_index, r.append, used, optimize)
if not r: if not r:
return LazyCat(r) return LazyCat(r)
# Sort/merge sub-results # Sort/merge sub-results
if len(r)==1: if len(r) == 1:
if sort_index is None: r=r[0] if sort_index is None:
else: r=r[0][1] r = r[0]
else:
r = r[0][1]
else: else:
if sort_index is None: r=LazyCat(r, len(r)) if sort_index is None:
r = LazyCat(r, len(r))
else: else:
r.sort() r.sort()
if kw.has_key('sort-order'): so = self._get_sort_attr("order", kw)
so=kw['sort-order'] if (isinstance(so, types.StringType) and
elif hasattr(self, 'sort-order'):
so=getattr(self, 'sort-order')
elif kw.has_key('sort_order'):
so=kw['sort_order']
else: so=None
if (type(so) is type('') and
so.lower() in ('reverse', 'descending')): so.lower() in ('reverse', 'descending')):
r.reverse() r.reverse()
r= [i[1] for i in r] size = 0
r=LazyCat(r, reduce(lambda x,y: x+len(y), r, 0)) tmp = []
for i in r:
elt = i[1]
tmp.append(elt)
size += len(elt)
r = LazyCat(tmp, size)
return r return r
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment