Commit d6376fb6 authored by Hanno Schlichting's avatar Hanno Schlichting

More catalog work, merge in the non-queryplan and non-btree changes of...

More catalog work, merge in the non-queryplan and non-btree changes of queryplan and optimize more of date range index internals
parent 850cb0a6
......@@ -56,6 +56,17 @@ Restructuring
Features Added
++++++++++++++
- Various optimizations to indexes _apply_index and the catalog's search
method inspired by experimental.catalogqueryplan.
- Added a new ILimitedResultIndex to Products.PluginIndexes and made most
built-in indexes compatible with it. This allows indexes to consider the
already calculated result set inside their own calculations.
- Changed the internals of the DateRangeIndex to always use IITreeSet and do
an inline migration from IISet. Some datum tend to have large number of
documents, for example when using default floor or ceiling dates.
- Added a new reporting tab to `Products.ZCatalog` instances. You can use this
to get an overview of slow catalog queries, as specified by a configurable
threshold value. The reports are per running Zope process.
......
......@@ -157,7 +157,7 @@ class DateIndex(UnIndex, PropertyManager):
return returnStatus
def _apply_index(self, request):
def _apply_index(self, request, resultset=None):
"""Apply the index to query parameters given in the argument
Normalize the 'query' arguments into integer values at minute
......@@ -176,7 +176,7 @@ class DateIndex(UnIndex, PropertyManager):
#experimental code for specifing the operator
operator = record.get( 'operator', self.useOperator )
if not operator in self.operators :
raise RuntimeError, "operator not valid: %s" % operator
raise RuntimeError("operator not valid: %s" % operator)
# depending on the operator we use intersection or union
if operator=="or":
......@@ -223,6 +223,9 @@ class DateIndex(UnIndex, PropertyManager):
if set is not None:
if isinstance(set, int):
set = IISet((set,))
else:
# set can't be bigger than resultset
set = intersection(set, resultset)
r = set_func(r, set)
if isinstance(r, int):
......
......@@ -28,7 +28,6 @@ from BTrees.IIBTree import IISet
from BTrees.IIBTree import IITreeSet
from BTrees.IIBTree import intersection
from BTrees.IIBTree import multiunion
from BTrees.IIBTree import union
from BTrees.IOBTree import IOBTree
from BTrees.Length import Length
from DateTime.DateTime import DateTime
......@@ -242,7 +241,7 @@ class DateRangeIndex(UnIndex):
return tuple( result )
def _apply_index(self, request):
def _apply_index(self, request, resultset=None):
"""
Apply the index to query parameters given in 'request', which
should be a mapping object.
......@@ -265,34 +264,17 @@ class DateRangeIndex(UnIndex):
# Aggregate sets for each bucket separately, to avoid
# large-small union penalties.
#
#until_only = IISet()
#map( until_only.update, self._until_only.values( term ) )
# XXX use multi-union
until_only = multiunion( self._until_only.values( term ) )
#since_only = IISet()
#map( since_only.update, self._since_only.values( None, term ) )
# XXX use multi-union
since_only = multiunion( self._since_only.values( None, term ) )
#until = IISet()
#map( until.update, self._until.values( term ) )
# XXX use multi-union
until = multiunion( self._until.values( term ) )
#since = IISet()
#map( since.update, self._since.values( None, term ) )
# XXX use multi-union
since = multiunion( self._since.values( None, term ) )
bounded = intersection( until, since )
# Total result is bound by resultset
until = intersection(resultset, until)
since = multiunion(self._since.values(None, term))
bounded = intersection(until, since)
# Merge from smallest to largest.
#result = union( self._always, until_only )
result = union( bounded, until_only )
result = union( result, since_only )
#result = union( result, bounded )
result = union( result, self._always )
result = multiunion([bounded, until_only, since_only, self._always])
return result, ( self._since_field, self._until_field )
......@@ -314,8 +296,8 @@ class DateRangeIndex(UnIndex):
if set is None:
self._until_only[ until ] = documentId
else:
if isinstance(set, int):
set = self._until_only[ until ] = IISet((set, documentId))
if isinstance(set, (int, IISet)):
set = self._until_only[until] = IITreeSet((set, documentId))
else:
set.insert( documentId )
elif until is None:
......@@ -324,8 +306,8 @@ class DateRangeIndex(UnIndex):
if set is None:
self._since_only[ since ] = documentId
else:
if isinstance(set, int):
set = self._since_only[ since ] = IISet((set, documentId))
if isinstance(set, (int, IISet)):
set = self._since_only[since] = IITreeSet((set, documentId))
else:
set.insert( documentId )
......@@ -335,8 +317,8 @@ class DateRangeIndex(UnIndex):
if set is None:
self._since[ since ] = documentId
else:
if isinstance(set, int):
set = self._since[ since ] = IISet((set, documentId))
if isinstance(set, (int, IISet)):
set = self._since[since] = IITreeSet((set, documentId))
else:
set.insert( documentId )
......@@ -344,8 +326,8 @@ class DateRangeIndex(UnIndex):
if set is None:
self._until[ until ] = documentId
else:
if isinstance(set, int):
set = self._until[ until ] = IISet((set, documentId))
if isinstance(set, (int, IISet)):
set = self._until[until] = IITreeSet((set, documentId))
else:
set.insert( documentId )
......
......@@ -21,7 +21,7 @@ import sys
from BTrees.IIBTree import intersection
from BTrees.IIBTree import IITreeSet
from BTrees.IIBTree import IISet
from BTrees.IIBTree import union
from BTrees.IIBTree import multiunion
from BTrees.IOBTree import IOBTree
from BTrees.Length import Length
from BTrees.OOBTree import OOBTree
......@@ -31,7 +31,7 @@ from zope.interface import implements
from Products.PluginIndexes.common import safe_callable
from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.interfaces import IPluggableIndex
from Products.PluginIndexes.interfaces import ILimitedResultIndex
from Products.PluginIndexes.interfaces import ISortIndex
from Products.PluginIndexes.interfaces import IUniqueValueIndex
......@@ -43,7 +43,7 @@ class UnIndex(SimpleItem):
"""Simple forward and reverse index.
"""
implements(IPluggableIndex, IUniqueValueIndex, ISortIndex)
implements(ILimitedResultIndex, IUniqueValueIndex, ISortIndex)
def __init__(
self, id, ignore_ex=None, call_methods=None, extra=None, caller=None):
......@@ -302,7 +302,7 @@ class UnIndex(SimpleItem):
LOG.debug('Attempt to unindex nonexistent document'
' with id %s' % documentId,exc_info=True)
def _apply_index(self, request):
def _apply_index(self, request, resultset=None):
"""Apply the index to query parameters given in the request arg.
The request argument should be a mapping object.
......@@ -348,11 +348,7 @@ class UnIndex(SimpleItem):
# experimental code for specifing the operator
operator = record.get('operator',self.useOperator)
if not operator in self.operators :
raise RuntimeError,"operator not valid: %s" % escape(operator)
# depending on the operator we use intersection or union
if operator=="or": set_func = union
else: set_func = intersection
raise RuntimeError("operator not valid: %s" % escape(operator))
# Range parameter
range_parm = record.get('range',None)
......@@ -375,24 +371,84 @@ class UnIndex(SimpleItem):
if 'max' in opr_args: hi = max(record.keys)
else: hi = None
if hi:
setlist = index.items(lo,hi)
setlist = index.values(lo,hi)
else:
setlist = index.values(lo)
# If we only use one key, intersect and return immediately
if len(setlist) == 1:
result = setlist[0]
if isinstance(result, int):
result = IISet((result,))
return result, (self.id,)
if operator == 'or':
tmp = []
for s in setlist:
if isinstance(s, int):
s = IISet((s,))
tmp.append(s)
r = multiunion(tmp)
else:
setlist = index.items(lo)
# For intersection, sort with smallest data set first
tmp = []
for s in setlist:
if isinstance(s, int):
s = IISet((s,))
tmp.append(s)
if len(tmp) > 2:
setlist = sorted(tmp, key=len)
else:
setlist = tmp
r = resultset
for s in setlist:
# the result is bound by the resultset
r = intersection(r, s)
for k, set in setlist:
if isinstance(set, int):
set = IISet((set,))
r = set_func(r, set)
else: # not a range search
for key in record.keys:
set=index.get(key, None)
if set is None:
set = IISet(())
elif isinstance(set, int):
set = IISet((set,))
r = set_func(r, set)
if isinstance(r, int): r=IISet((r,))
# Filter duplicates
setlist = []
for k in record.keys:
s = index.get(k, None)
# If None, try to bail early
if s is None:
if operator == 'or':
# If union, we can't possibly get a bigger result
continue
# If intersection, we can't possibly get a smaller result
return IISet(), (self.id,)
elif isinstance(s, int):
s = IISet((s,))
setlist.append(s)
# If we only use one key return immediately
if len(setlist) == 1:
result = setlist[0]
if isinstance(result, int):
result = IISet((result,))
return result, (self.id,)
if operator == 'or':
# If we already get a small result set passed in, intersecting
# the various indexes with it and doing the union later is
# faster than creating a multiunion first.
if resultset is not None and len(resultset) < 200:
smalllist = []
for s in setlist:
smalllist.append(intersection(resultset, s))
r = multiunion(smalllist)
else:
r = multiunion(setlist)
else:
# For intersection, sort with smallest data set first
if len(setlist) > 2:
setlist = sorted(setlist, key=len)
r = resultset
for s in setlist:
r = intersection(r, s)
if isinstance(r, int):
r = IISet((r, ))
if r is None:
return IISet(), (self.id,)
else:
......
......@@ -85,6 +85,15 @@ class IPluggableIndex(Interface):
"""Empty the index"""
class ILimitedResultIndex(IPluggableIndex):
def _apply_index(request, resultset=None):
"""Same as IPluggableIndex' _apply_index method. The additional
resultset argument contains the resultset, as already calculated by
ZCatalog's search method.
"""
class IUniqueValueIndex(IPluggableIndex):
"""An index which can return lists of unique values contained in it"""
......
......@@ -22,6 +22,7 @@ import Acquisition
import ExtensionClass
from Missing import MV
from Persistence import Persistent
from Products.PluginIndexes.interfaces import ILimitedResultIndex
import BTrees.Length
from BTrees.IIBTree import intersection, weightedIntersection, IISet
......@@ -475,6 +476,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
query[iid] = value
return query
def _sorted_search_indexes(self, query):
# Simple implementation doing no ordering.
return self.indexes.keys()
def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
"""Iterate through the indexes, applying the query to each one. If
merge is true then return a lazy result set (sorted if appropriate)
......@@ -497,22 +502,41 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Canonicalize the request into a sensible query before passing it on
query = self.make_query(query)
query_keys = query.keys()
cr = self.getCatalogReport(query)
cr.start()
for i in self.indexes.keys():
for i in self._sorted_search_indexes(query):
if i not in query_keys:
# Do not ask indexes to restrict the result, which aren't
# part of the query
continue
index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None)
if _apply_index is None:
continue
limit_result = False
if ILimitedResultIndex.providedBy(index):
limit_result = True
cr.split(i)
if limit_result:
r = _apply_index(query, rs)
else:
r = _apply_index(query)
cr.split(i)
if r is not None:
r, u = r
# Short circuit if empty result
# BBB: We can remove the "r is not None" check in Zope 2.14
# once we don't need to support the "return everything" case
# anymore
if r is not None and not r:
return LazyCat([])
w, rs = weightedIntersection(rs, r)
if not rs:
break
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment