Commit e6b5d0c3 authored by Jim Fulton's avatar Jim Fulton

Merged changes from Catalog-BTrees-Integration branch.

parent 22eec3b7
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE. DAMAGE.
$Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $ $Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $
If you have questions regarding this software, If you have questions regarding this software,
contact: contact:
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
*/ */
static char ThreadLock_module_documentation[] = static char ThreadLock_module_documentation[] =
"" ""
"\n$Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $" "\n$Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $"
; ;
#include "Python.h" #include "Python.h"
...@@ -93,8 +93,9 @@ typedef struct { ...@@ -93,8 +93,9 @@ typedef struct {
staticforward PyTypeObject ThreadLockType; staticforward PyTypeObject ThreadLockType;
static int static int
cacquire(ThreadLockObject *self) cacquire(ThreadLockObject *self, int wait)
{ {
int acquired = 1;
#ifdef WITH_THREAD #ifdef WITH_THREAD
long id = get_thread_ident(); long id = get_thread_ident();
#else #else
...@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self) ...@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self)
{ {
#ifdef WITH_THREAD #ifdef WITH_THREAD
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
acquire_lock(self->lock, 1); acquired = acquire_lock(self->lock, wait ? WAIT_LOCK : NOWAIT_LOCK);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
#endif #endif
if (acquired)
{
self->count=0; self->count=0;
self->id=id; self->id=id;
} }
return 0; }
return acquired;
} }
static PyObject * static PyObject *
acquire(ThreadLockObject *self, PyObject *args) acquire(ThreadLockObject *self, PyObject *args)
{ {
if(cacquire(self) < 0) return NULL; int wait = -1, acquired;
if (! PyArg_ParseTuple(args, "|i", &wait)) return NULL;
acquired=cacquire(self, wait);
if(acquired < 0) return NULL;
if (wait >= 0) return PyInt_FromLong(acquired);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
...@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self) ...@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self)
#else #else
long id = 1; long id = 1;
#endif #endif
if(self->count >= 0 && self->id==id) if(self->count >= 0 && self->id==id)
{ {
/* Somebody has locked me. It is either the current thread or /* Somebody has locked me. It is either the current thread or
...@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self) ...@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self)
static PyObject * static PyObject *
release(ThreadLockObject *self, PyObject *args) release(ThreadLockObject *self, PyObject *args)
{ {
if (! PyArg_ParseTuple(args, "")) return NULL;
if(crelease(self) < 0) return NULL; if(crelease(self) < 0) return NULL;
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
...@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args) ...@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args)
PyObject *f, *a=0, *k=0; PyObject *f, *a=0, *k=0;
UNLESS(PyArg_ParseTuple(args,"OO|O",&f, &a, &k)) return NULL; UNLESS(PyArg_ParseTuple(args,"OO|O",&f, &a, &k)) return NULL;
if(cacquire(self) < 0) return NULL; if(cacquire(self, -1) < 0) return NULL;
f=PyEval_CallObjectWithKeywords(f,a,k); f=PyEval_CallObjectWithKeywords(f,a,k);
if(crelease(self) < 0) if(crelease(self) < 0)
{ {
...@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = { ...@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = {
"Acquire the lock, call the function, and then release the lock.\n" "Acquire the lock, call the function, and then release the lock.\n"
}, },
{"acquire", (PyCFunction)acquire, 1, {"acquire", (PyCFunction)acquire, 1,
"acquire() -- Acquire a lock, taking the thread ID into account" "acquire([wait]) -- Acquire a lock, taking the thread ID into account"
}, },
{"release", (PyCFunction)release, 1, {"release", (PyCFunction)release, 1,
"release() -- Release a lock, taking the thread ID into account" "release() -- Release a lock, taking the thread ID into account"
...@@ -296,7 +306,7 @@ void ...@@ -296,7 +306,7 @@ void
initThreadLock() initThreadLock()
{ {
PyObject *m, *d; PyObject *m, *d;
char *rev="$Revision: 1.7 $"; char *rev="$Revision: 1.8 $";
m = Py_InitModule4("ThreadLock", Module_methods, m = Py_InitModule4("ThreadLock", Module_methods,
ThreadLock_module_documentation, ThreadLock_module_documentation,
......
...@@ -86,9 +86,6 @@ ...@@ -86,9 +86,6 @@
from Persistence import Persistent from Persistence import Persistent
import Acquisition import Acquisition
import ExtensionClass import ExtensionClass
import BTree, OIBTree, IOBTree, IIBTree
IIBucket=IIBTree.Bucket
from intSet import intSet
from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
from SearchIndex.Lexicon import Lexicon from SearchIndex.Lexicon import Lexicon
import regex, pdb import regex, pdb
...@@ -101,14 +98,13 @@ from zLOG import LOG, ERROR ...@@ -101,14 +98,13 @@ from zLOG import LOG, ERROR
from Lazy import LazyMap, LazyFilter, LazyCat from Lazy import LazyMap, LazyFilter, LazyCat
from CatalogBrains import AbstractCatalogBrain, NoBrainer from CatalogBrains import AbstractCatalogBrain, NoBrainer
from BTrees.IIBTree import intersection, weightedIntersection
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
import BTrees.Length
from SearchIndex.randid import randid
import time import time
class KWMultiMapping(MultiMapping):
def has_key(self, name):
try:
r=self[name]
return 1
except KeyError:
return 0
def orify(seq, def orify(seq,
query_map={ query_map={
...@@ -118,7 +114,7 @@ def orify(seq, ...@@ -118,7 +114,7 @@ def orify(seq,
subqueries=[] subqueries=[]
for q in seq: for q in seq:
try: q=query_map[type(q)](q) try: q=query_map[type(q)](q)
except: q=Query.Cmp(q) except KeyError: q=Query.Cmp(q)
subqueries.append(q) subqueries.append(q)
return apply(Query.Or,tuple(subqueries)) return apply(Query.Or,tuple(subqueries))
...@@ -152,9 +148,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -152,9 +148,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# object unique identifier to the rid, and self.paths is a # object unique identifier to the rid, and self.paths is a
# mapping of the rid to the unique identifier. # mapping of the rid to the unique identifier.
self.data = BTree.BTree() # mapping of rid to meta_data self.__len__=BTrees.Length.Length()
self.uids = OIBTree.BTree() # mapping of uid to rid self.clear()
self.paths = IOBTree.BTree() # mapping of rid to uid
# indexes can share a lexicon or have a private copy. Here, # indexes can share a lexicon or have a private copy. Here,
# we instantiate a lexicon to be shared by all text indexes. # we instantiate a lexicon to be shared by all text indexes.
...@@ -163,7 +158,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -163,7 +158,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if type(vocabulary) is type(''): if type(vocabulary) is type(''):
self.lexicon = vocabulary self.lexicon = vocabulary
else: else:
#ack!
self.lexicon = Lexicon() self.lexicon = Lexicon()
if brains is not None: if brains is not None:
...@@ -171,6 +165,52 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -171,6 +165,52 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
self.updateBrains() self.updateBrains()
def clear(self):
""" clear catalog """
self.data = IOBTree() # mapping of rid to meta_data
self.uids = OIBTree() # mapping of uid to rid
self.paths = IOBTree() # mapping of rid to uid
# convert old-style Catalog object to new in-place
try: self.__len__.set(0)
except AttributeError: self.__len__=BTrees.Length.Length()
for x in self.indexes.values():
x.clear()
def _convertBTrees(self, threshold=200):
from BTrees.convert import convert
if type(self.data) is not IOBTree:
data=self.data
self.data=IOBTree()
convert(data, self.data, threshold)
uids=self.uids
self.uids=OIBTree()
convert(uids, self.uids, threshold)
paths=self.paths
self.paths=IOBTree()
convert(paths, self.paths, threshold)
self.__len__=BTrees.Length.Length()
for index in self.indexes.values():
index._convertBTrees(threshold)
lexicon=self.lexicon
if type(lexicon) is type(''):
lexicon=getattr(self, lexicon).lexicon
lexicon._convertBTrees(threshold)
def __len__(self):
# NOTE, this is never called for new catalogs, since
# each instance overrides this.
return len(self.data)
def updateBrains(self): def updateBrains(self):
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
...@@ -213,7 +253,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -213,7 +253,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
scopy = self.schema.copy() scopy = self.schema.copy()
# it is useful for our brains to know these things
scopy['data_record_id_']=len(self.schema.keys()) scopy['data_record_id_']=len(self.schema.keys())
scopy['data_record_score_']=len(self.schema.keys())+1 scopy['data_record_score_']=len(self.schema.keys())+1
scopy['data_record_normalized_score_']=len(self.schema.keys())+2 scopy['data_record_normalized_score_']=len(self.schema.keys())+2
...@@ -345,33 +384,54 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -345,33 +384,54 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
'uid' is the unique Catalog identifier for this object 'uid' is the unique Catalog identifier for this object
""" """
data = self.data
if self.uids.has_key(uid):
index = self.uids[uid]
elif data:
index = data.keys()[-1] + 1 # find the next available unique id
self.uids[uid] = index
self.paths[index] = uid
else:
index = 0
self.uids[uid] = index
self.paths[index] = uid
data = self.data
# meta_data is stored as a tuple for efficiency # meta_data is stored as a tuple for efficiency
newDataRecord = self.recordify(object) newDataRecord = self.recordify(object)
oldDataRecord = data.get(index, None)
# Now we need to compare the tuples before we update them! index=self.uids.get(uid, None)
if oldDataRecord is not None: if index is not None:
for i in range(len(newDataRecord)): # old data
if newDataRecord[i] != oldDataRecord[i]:
if data.get(index, 0) != newDataRecord:
# Update the meta-data, if necessary
data[index] = newDataRecord data[index] = newDataRecord
break
else: else:
# new data
if type(data) is IOBTree:
# New style, get radom id
index=getattr(self, '_v_nextid', 0)
if index%4000 == 0: index = randid()
while not data.insert(index, newDataRecord):
index=randid()
# We want ids to be somewhat random, but there are
# advantages for having some ids generated
# sequentially when many catalog updates are done at
# once, such as when reindexing or bulk indexing.
# We allocate ids sequentially using a volatile base,
# so different threads get different bases. This
# further reduces conflict and reduces churn in
# here and it result sets when bulk indexing.
self._v_nextid=index+1
else:
if data:
# find the next available unique id
index = data.keys()[-1] + 1
else:
index=0
data[index] = newDataRecord data[index] = newDataRecord
try: self.__len__.change(1)
except AttributeError: pass # No managed length (old-style)
self.uids[uid] = index
self.paths[index] = uid
total = 0 total = 0
for x in self.indexes.values(): for x in self.indexes.values():
## tricky! indexes need to acquire now, and because they ## tricky! indexes need to acquire now, and because they
...@@ -418,6 +478,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -418,6 +478,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
LOG('Catalog', ERROR, ('uncatalogObject unsuccessfully ' LOG('Catalog', ERROR, ('uncatalogObject unsuccessfully '
'attempted to delete rid %s ' 'attempted to delete rid %s '
'from paths or data btree.' % rid)) 'from paths or data btree.' % rid))
else:
try: self.__len__.change(-1)
except AttributeError: pass # No managed length
del uids[uid] del uids[uid]
self.data = data self.data = data
else: else:
...@@ -425,15 +489,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -425,15 +489,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
'attempted to uncatalog an object ' 'attempted to uncatalog an object '
'with a uid of %s. ' % uid)) 'with a uid of %s. ' % uid))
def clear(self):
""" clear catalog """
self.data = BTree.BTree()
self.uids = OIBTree.BTree()
self.paths = IOBTree.BTree()
for x in self.indexes.values():
x.clear()
def uniqueValuesFor(self, name): def uniqueValuesFor(self, name):
""" return unique values for FieldIndex name """ """ return unique values for FieldIndex name """
...@@ -441,26 +496,16 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -441,26 +496,16 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
def hasuid(self, uid): def hasuid(self, uid):
""" return the rid if catalog contains an object with uid """ """ return the rid if catalog contains an object with uid """
if self.uids.has_key(uid): return self.uids.get(uid)
return self.uids[uid]
else:
return None
def recordify(self, object): def recordify(self, object):
""" turns an object into a record tuple """ """ turns an object into a record tuple """
record = [] record = []
# the unique id is allways the first element # the unique id is allways the first element
for x in self.names: for x in self.names:
try: attr=getattr(object, x, MV)
attr = getattr(object, x) if(attr is not MV and callable(attr)): attr=attr()
if(callable(attr)):
attr = attr()
except:
attr = MV
record.append(attr) record.append(attr)
return tuple(record) return tuple(record)
def instantiate(self, record): def instantiate(self, record):
...@@ -485,12 +530,9 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -485,12 +530,9 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
## Searching engine. You don't really have to worry about what goes ## Searching engine. You don't really have to worry about what goes
## on below here... Most of this stuff came from ZTables with tweaks. ## on below here... Most of this stuff came from ZTables with tweaks.
def _indexedSearch(self, args, sort_index, append, used, def _indexedSearch(self, args, sort_index, append, used):
IIBType=type(IIBucket()), intSType=type(intSet())):
""" """
Iterate through the indexes, applying the query to each one. Iterate through the indexes, applying the query to each one.
Do some magic to join result sets. Be intelligent about
handling intSets and IIBuckets.
""" """
rs=None rs=None
...@@ -498,7 +540,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -498,7 +540,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if used is None: used={} if used is None: used={}
for i in self.indexes.keys(): for i in self.indexes.keys():
try:
index = self.indexes[i].__of__(self) index = self.indexes[i].__of__(self)
if hasattr(index,'_apply_index'): if hasattr(index,'_apply_index'):
r=index._apply_index(args) r=index._apply_index(args)
...@@ -506,74 +547,66 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -506,74 +547,66 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
r, u = r r, u = r
for name in u: for name in u:
used[name]=1 used[name]=1
if rs is None: w, rs = weightedIntersection(rs, r)
rs = r
else:
# you can't intersect an IIBucket into an
# intSet, but you can go the other way
# around. Make sure we're facing the
# right direction...
if type(rs) is intSType and type(r) is IIBType:
rs=r.intersection(rs)
else:
rs=rs.intersection(r)
except:
return used
#assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
if rs is None: if rs is None:
# return everything
if sort_index is None: if sort_index is None:
rs=data.items() rs=data.items()
append(LazyMap(self.instantiate, rs)) append(LazyMap(self.instantiate, rs, len(self)))
else: else:
try: try:
for k, intset in sort_index._index.items(): for k, intset in sort_index.items():
append((k,LazyMap(self.__getitem__, intset))) append((k,LazyMap(self.__getitem__, intset)))
except AttributeError: except AttributeError:
raise ValueError, "Incorrect index name passed as " \ raise ValueError, (
"'sort_on' parameter. Note that you may only " \ "Incorrect index name passed as"
"sort on values for which there is a matching " \ " 'sort_on' parameter. Note that you may only"
"index available." " sort on values for which there is a matching"
" index available.")
elif rs: elif rs:
if sort_index is None and type(rs) is IIBType: # this is reached by having an empty result set (ie non-None)
# then there is score information. Build a new result if sort_index is None and hasattr(rs, 'values'):
# set, sort it by score, reverse it, compute the # having a 'values' means we have a data structure with
# normalized score, and Lazify it. # scores. Build a new result set, sort it by score, reverse
rset = [] # it, compute the normalized score, and Lazify it.
for key, score in rs.items(): rset = rs.byValue(0) # sort it by score
rset.append((score, key))
rset.sort()
rset.reverse()
max = float(rset[0][0]) max = float(rset[0][0])
rs = [] rs = []
for score, key in rset: for score, key in rset:
# compute normalized scores
rs.append(( int((score/max)*100), score, key)) rs.append(( int((score/max)*100), score, key))
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
elif sort_index is None and type(rs) is intSType: elif sort_index is None and not hasattr(rs, 'values'):
# no scores? Just Lazify. # no scores? Just Lazify.
if hasattr(rs, 'keys'): rs=rs.keys()
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
else: else:
# sort. If there are scores, then this block is not # sort. If there are scores, then this block is not
# reached, therefor 'sort-on' does not happen in the # reached, therefor 'sort-on' does not happen in the
# context of text index query. This should probably # context of text index query. This should probably
# sort by relevance first, then the 'sort-on' attribute. # sort by relevance first, then the 'sort-on' attribute.
if len(rs)>len(sort_index._index): if ((len(rs) / 4) > len(sort_index)):
for k, intset in sort_index._index.items(): # if the sorted index has a quarter as many keys as
if type(rs) is IIBType: # the result set
intset=rs.intersection(intset) for k, intset in sort_index.items():
# Since we still have an IIBucket, let's convert # We have an index that has a set of values for
# it to its set of keys # each sort key, so we interset with each set and
intset=intset.keys() # get a sorted sequence of the intersections.
else:
intset=intset.intersection(rs) # This only makes sense if the number of
# keys is much less then the number of results.
intset = intersection(rs, intset)
if intset: if intset:
if hasattr(intset, 'keys'): intset=intset.keys()
append((k,LazyMap(self.__getitem__, intset))) append((k,LazyMap(self.__getitem__, intset)))
else: else:
if type(rs) is IIBType: if hasattr(rs, 'keys'): rs=rs.keys()
rs=rs.keys() for did in rs:
for r in rs: append((sort_index.keyForDocument(did),
append((sort_index._unindex[r], LazyMap(self.__getitem__,[did])))
LazyMap(self.__getitem__,[r])))
return used return used
...@@ -587,10 +620,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -587,10 +620,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Get search arguments: # Get search arguments:
if REQUEST is None and not kw: if REQUEST is None and not kw:
try: REQUEST=self.REQUEST try: REQUEST=self.REQUEST
except: pass except AttributeError: pass
if kw: if kw:
if REQUEST: if REQUEST:
m=KWMultiMapping() m=MultiMapping()
m.push(REQUEST) m.push(REQUEST)
m.push(kw) m.push(kw)
kw=m kw=m
...@@ -599,7 +632,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -599,7 +632,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Make sure batch size is set # Make sure batch size is set
if REQUEST and not REQUEST.has_key('batch_size'): if REQUEST and not REQUEST.has_key('batch_size'):
try: batch_size=self.default_batch_size try: batch_size=self.default_batch_size
except: batch_size=20 except AttributeError: batch_size=20
REQUEST['batch_size']=batch_size REQUEST['batch_size']=batch_size
# Compute "sort_index", which is a sort index, or none: # Compute "sort_index", which is a sort index, or none:
...@@ -611,8 +644,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -611,8 +644,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
sort_index=kw['sort_on'] sort_index=kw['sort_on']
else: sort_index=None else: sort_index=None
sort_order='' sort_order=''
if sort_index is not None and sort_index in self.indexes.keys(): if sort_index is not None and self.indexes.has_key(sort_index):
sort_index=self.indexes[sort_index] sort_index=self.indexes[sort_index]
if not hasattr(sort_index, 'keyForDocument'):
raise CatalogError('Invalid sort index')
# Perform searches with indexes and sort_index # Perform searches with indexes and sort_index
r=[] r=[]
...@@ -645,9 +680,4 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -645,9 +680,4 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__ = searchResults __call__ = searchResults
class CatalogError(Exception): pass
...@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit): ...@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit):
def getObject(self, REQUEST=None): def getObject(self, REQUEST=None):
"""Try to return the object for this record""" """Try to return the object for this record"""
try: try:
obj = self.aq_parent.restrictedTraverse(self.getPath()) obj = self.aq_parent.unrestrictedTraverse(self.getPath())
if not obj: if not obj:
if REQUEST is None: if REQUEST is None:
REQUEST = self.REQUEST REQUEST = self.REQUEST
......
...@@ -82,8 +82,8 @@ ...@@ -82,8 +82,8 @@
# attributions are listed in the accompanying credits file. # attributions are listed in the accompanying credits file.
# #
############################################################################## ##############################################################################
__doc__='''$Id: Lazy.py,v 1.3 2001/01/15 16:29:23 petrilli Exp $''' __doc__='''$Id: Lazy.py,v 1.4 2001/03/15 13:16:23 jim Exp $'''
__version__='$Revision: 1.3 $'[11:-2] __version__='$Revision: 1.4 $'[11:-2]
class Lazy: class Lazy:
...@@ -148,11 +148,12 @@ class LazyCat(Lazy): ...@@ -148,11 +148,12 @@ class LazyCat(Lazy):
# Lazy concatenation of one or more sequences. Should be handy # Lazy concatenation of one or more sequences. Should be handy
# for accessing small parts of big searches. # for accessing small parts of big searches.
def __init__(self, sequences): def __init__(self, sequences, length=None):
self._seq=sequences self._seq=sequences
self._data=[] self._data=[]
self._sindex=0 self._sindex=0
self._eindex=-1 self._eindex=-1
if length is not None: self._len=length
def __getitem__(self,index): def __getitem__(self,index):
...@@ -194,11 +195,12 @@ class LazyMap(Lazy): ...@@ -194,11 +195,12 @@ class LazyMap(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,func,seq): def __init__(self, func, seq, length=None):
self._seq=seq self._seq=seq
self._len=len(seq)
self._data=[] self._data=[]
self._func=func self._func=func
if length is not None: self._len=length
else: self._len = len(seq)
def __getitem__(self,index): def __getitem__(self,index):
...@@ -229,7 +231,7 @@ class LazyFilter(Lazy): ...@@ -229,7 +231,7 @@ class LazyFilter(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,test,seq): def __init__(self, test, seq):
self._seq=seq self._seq=seq
self._data=[] self._data=[]
self._eindex=-1 self._eindex=-1
...@@ -270,7 +272,7 @@ class LazyMop(Lazy): ...@@ -270,7 +272,7 @@ class LazyMop(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,test,seq): def __init__(self, test, seq):
self._seq=seq self._seq=seq
self._data=[] self._data=[]
self._eindex=-1 self._eindex=-1
......
...@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit, ...@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit,
AccessControl.Role.RoleManager, AccessControl.Role.RoleManager,
): ):
""" """
A Vocabulary is a user managable relization of a Lexicon object. A Vocabulary is a user-managable realization of a Lexicon object.
""" """
...@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit, ...@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit,
""" create the lexicon to manage... """ """ create the lexicon to manage... """
self.id = id self.id = id
self.title = title self.title = title
self.globbing = globbing self.globbing = not not globbing
if globbing: if globbing:
self.lexicon = GlobbingLexicon.GlobbingLexicon() self.lexicon = GlobbingLexicon.GlobbingLexicon()
......
...@@ -97,14 +97,15 @@ from Persistence import Persistent ...@@ -97,14 +97,15 @@ from Persistence import Persistent
from DocumentTemplate.DT_Util import InstanceDict, TemplateDict from DocumentTemplate.DT_Util import InstanceDict, TemplateDict
from DocumentTemplate.DT_Util import Eval, expr_globals from DocumentTemplate.DT_Util import Eval, expr_globals
from AccessControl.Permission import name_trans from AccessControl.Permission import name_trans
from Catalog import Catalog, orify from Catalog import Catalog, orify, CatalogError
from SearchIndex import UnIndex, UnTextIndex from SearchIndex import UnIndex, UnTextIndex
from Vocabulary import Vocabulary from Vocabulary import Vocabulary
import IOBTree
from Shared.DC.ZRDB.TM import TM from Shared.DC.ZRDB.TM import TM
from AccessControl import getSecurityManager from AccessControl import getSecurityManager
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
StringType=type('')
manage_addZCatalogForm=DTMLFile('dtml/addZCatalog',globals()) manage_addZCatalogForm=DTMLFile('dtml/addZCatalog',globals())
def manage_addZCatalog(self, id, title, def manage_addZCatalog(self, id, title,
...@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit):
def __init__(self, id, title='', vocab_id=None, container=None): def __init__(self, id, title='', vocab_id=None, container=None):
self.id=id self.id=id
self.title=title self.title=title
self.vocab_id = vocab_id
self.threshold = 10000 self.threshold = 10000
self._v_total = 0 self._v_total = 0
...@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit):
if vocab_id is None: if vocab_id is None:
v = Vocabulary('Vocabulary', 'Vocabulary', globbing=1) v = Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
self._setObject('Vocabulary', v) self._setObject('Vocabulary', v)
v = 'Vocabulary' self.vocab_id = 'Vocabulary'
else: else:
v = vocab_id self.vocab_id = vocab_id
self._catalog = Catalog(vocabulary=v) self._catalog = Catalog(vocabulary=self.vocab_id)
self._catalog.addColumn('id') self._catalog.addColumn('id')
self._catalog.addIndex('id', 'FieldIndex') self._catalog.addIndex('id', 'FieldIndex')
...@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog.addColumn('summary') self._catalog.addColumn('summary')
self._catalog.addIndex('PrincipiaSearchSource', 'TextIndex') self._catalog.addIndex('PrincipiaSearchSource', 'TextIndex')
def __len__(self): return len(self._catalog)
def getVocabulary(self): def getVocabulary(self):
""" more ack! """ """ more ack! """
...@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit):
RESPONSE.redirect(URL1 + '/manage_catalogIndexes?manage_tabs_message=Index%20Deleted') RESPONSE.redirect(URL1 + '/manage_catalogIndexes?manage_tabs_message=Index%20Deleted')
def catalog_object(self, obj, uid): def catalog_object(self, obj, uid=None):
""" wrapper around catalog """ """ wrapper around catalog """
if uid is None:
try: uid = obj.getPhysicalPath
except AttributeError:
raise CatalogError(
"A cataloged object must support the 'getPhysicalPath' "
"method if no unique id is provided when cataloging"
)
else: uid=string.join(uid(), '/')
elif type(uid) is not StringType:
raise CatalogError('The object unique id must be a string.')
self._catalog.catalogObject(obj, uid, None) self._catalog.catalogObject(obj, uid, None)
# None passed in to catalogObject as third argument indicates # None passed in to catalogObject as third argument indicates
# that we shouldn't try to commit subtransactions within any # that we shouldn't try to commit subtransactions within any
...@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit):
# exceeded within the boundaries of the current transaction. # exceeded within the boundaries of the current transaction.
if self._v_total > self.threshold: if self._v_total > self.threshold:
get_transaction().commit(1) get_transaction().commit(1)
self._p_jar.cacheFullSweep(1) self._p_jar.cacheFullSweep(3)
self._v_total = 0 self._v_total = 0
def uncatalog_object(self, uid): def uncatalog_object(self, uid):
...@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if hasattr(self, '_product_meta_types'): pmt=self._product_meta_types if hasattr(self, '_product_meta_types'): pmt=self._product_meta_types
elif hasattr(self, 'aq_acquire'): elif hasattr(self, 'aq_acquire'):
try: pmt=self.aq_acquire('_product_meta_types') try: pmt=self.aq_acquire('_product_meta_types')
except: pass except AttributeError: pass
return self.meta_types+Products.meta_types+pmt return self.meta_types+Products.meta_types+pmt
def valid_roles(self): def valid_roles(self):
...@@ -659,7 +672,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -659,7 +672,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if string.find(path, script) != 0: if string.find(path, script) != 0:
path='%s/%s' % (script, path) path='%s/%s' % (script, path)
try: return REQUEST.resolve_url(path) try: return REQUEST.resolve_url(path)
except: return None except: pass
def resolve_path(self, path): def resolve_path(self, path):
""" """
...@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit):
style url. If no object is found, None is returned. style url. If no object is found, None is returned.
No exceptions are raised. No exceptions are raised.
""" """
try: try: return self.unrestrictedTraverse(path)
return self.unrestrictedTraverse(path) except: pass
except:
return None
def manage_normalize_paths(self, REQUEST): def manage_normalize_paths(self, REQUEST):
"""Ensure that all catalog paths are full physical paths """Ensure that all catalog paths are full physical paths
...@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit):
'%s unchanged.' % (len(fixed), len(removed), unchanged), '%s unchanged.' % (len(fixed), len(removed), unchanged),
action='./manage_main') action='./manage_main')
def manage_convertBTrees(self, threshold=200):
"""Convert the catalog's data structures to use BTrees package"""
tt=time.time()
ct=time.clock()
self._catalog._convertBTrees(threshold
*1 #make sure ints an int)
)
tt=time.time()-tt
ct=time.clock()-ct
return 'Finished conversion in %s seconds (%s cpu)' % (tt, ct)
Globals.default__class_init__(ZCatalog) Globals.default__class_init__(ZCatalog)
......
...@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query. ...@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query.
<div class="list-item">Index Name</div></td> <div class="list-item">Index Name</div></td>
<td width="20%" align="left" valign="top"> <td width="20%" align="left" valign="top">
<div class="list-item">Index Type</div></td> <div class="list-item">Index Type</div></td>
<td width="15%" align="left" valign="top">
<div class="list-item">Size</div></td>
</tr> </tr>
</dtml-if> </dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal"> <dtml-if name="sequence-odd"><tr class="row-normal">
...@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query. ...@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query.
</td> </td>
<td width="60%" align="left" valign="top"> <td width="60%" align="left" valign="top">
<div class="list-item"> <div class="list-item">
<a href="" target="_index_info_&dtml-id;">&dtml-id;</a></div></td> &dtml-id;
</div>
</td>
<td width="20%" align="left" valign="top"> <td width="20%" align="left" valign="top">
<div class="list-item">&dtml-meta_type;</div></td> <div class="list-item">&dtml-meta_type;</div></td>
<td width="15%" align="left" valign="top"><div class="list-item"
><dtml-var expr="_.len(_['sequence-item'])" thousands_commas>
</div> </div>
</td> </td>
</tr> </tr>
......
# Making tests a package makes debugging easier.
import rfc822,mailbox,cPickle,string
class Keywords:
""" stupid class to read a list of rfc822 messages and extract
all words from the subject header. We use this class for testing
purposes only
"""
def __init__(self):
self.kw = []
def build(self,mbox,limit):
mb = mailbox.UnixMailbox(open(mbox))
msg = mb.next()
while msg and len(self.kw) < limit:
sub = string.split( msg.dict.get("subject") , ' ')
for f in sub:
ok = 1
for c in f:
if not c in string.letters: ok=0
if ok==1 and not f in self.kw : self.kw.append(f)
msg = mb.next()
P = cPickle.Pickler(open('data/keywords','w'))
P.dump(self.kw)
def reload(self):
P = cPickle.Unpickler(open('data/keywords','r'))
self.kw = P.load()
def keywords(self):
return self.kw
#!/usr/bin/env python1.5
"""
Testsuite for testing Catalogs
$Id: testCatalog.py,v 1.2 2001/03/15 13:16:24 jim Exp $
Andreas Jung, andreas@digicool.com
$Log: testCatalog.py,v $
Revision 1.2 2001/03/15 13:16:24 jim
Merged changes from Catalog-BTrees-Integration branch.
Revision 1.1.4.11 2001/03/14 18:43:16 andreas
rearranged source code
Revision 1.1.4.10 2001/03/14 15:12:24 andreas
minor changes
Revision 1.1.4.9 2001/03/13 22:45:07 andreas
yet another try/except clause (zope mbox file seems to contain some sloppy
messages)
Revision 1.1.4.8 2001/03/13 22:04:20 andreas
added try/except while reading and parsing the mbox file
Revision 1.1.4.7 2001/03/13 16:51:07 andreas
code cleanup
Revision 1.1.4.6 2001/03/13 14:37:40 andreas
prelimary version for integration into the Zope testsuites
Revision 1.1.4.5 2001/03/11 22:33:40 andreas
commit
Revision 1.1.2.23 2001/03/09 16:06:10 andreas
integrated chris unittestCatalog.py
Revision 1.1.2.22 2001/03/09 15:05:28 andreas
rewrote testUpdates()
Revision 1.1.2.21 2001/03/08 18:42:28 andreas
fixed typo
Revision 1.1.4.4 2001/03/08 12:14:27 andreas
minor changes
Revision 1.1.2.20 2001/03/07 14:58:40 andreas
*** empty log message ***
Revision 1.1.2.19 2001/03/07 14:07:51 andreas
Code cleanup
Revision 1.1.2.18 2001/03/07 12:46:32 andreas
added advanced tests
Revision 1.1.2.17 2001/03/07 10:28:27 andreas
reworked version now using the new thread dispatcher
Revision 1.1.2.16 2001/03/05 15:14:51 andreas
- minor changes in testing catalog/uncatalogObject
- tests must now be started in the lib/python directory
- older input sets are no longer valid (must be recreated)
"""
import os,sys
sys.path.insert(0,'.')
try:
import Testing
except ImportError:
sys.path[0] = "../../.."
import Testing
os.environ['STUPID_LOG_FILE']= "debug.log"
here = os.getcwd()
import Zope
import ZODB, ZODB.FileStorage
from Products.ZCatalog import Catalog,ZCatalog,Vocabulary
import Persistence
import ExtensionClass
from Testing import dispatcher
import keywords
from zLOG import LOG
from SearchIndex.UnIndex import UnIndex
from SearchIndex.UnTextIndex import UnTextIndex
from SearchIndex.UnKeywordIndex import UnKeywordIndex
from SearchIndex.Lexicon import Lexicon
import getopt,whrandom,time,string,mailbox,rfc822
from Testing import unittest
# maximum number of files to read for the test suite
maxFiles = 1000
# maximum number of threads for stress testa
numThreads = 4
# number of iterations for searches
searchIterations = 1000
# number of iterations for catalog/uncatalog operations
updateIterations = 100
# input mailbox file
mbox = os.environ.get("TESTCATALOG_MBOX","/usr/home/andreas/zope.mbox")
mbox2 = "/usr/home/andreas/python.mbox"
dataDir = ""
#
# Don't change anything below
#
class testZODB:
""" some wrapper stuff around ZODB """
def __init__(self, file = "data/work/Data.fs",open=1):
self.db = ZODB.DB( ZODB.FileStorage.FileStorage(file) )
if open==1:
self.connection = self.db.open()
self.root = self.connection.root()
def write(self,name,obj):
self.root[name] = obj
get_transaction().commit()
def read(self,name):
return self.root[name]
def __del__(self):
self.db.close()
class testCatalog(Persistence.Persistent,unittest.TestCase):
""" Wrapper around the catalog stuff """
def __init__(self,mboxname,maxfiles):
self.msg_ids = []
self.num_files = 0
self.keywords = []
self.maxfiles = maxfiles
self._vocabulary = Vocabulary.Vocabulary('Vocabulary',
'Vocabulary', globbing=1)
self._catalog = Catalog.Catalog()
self._catalog.addIndex('to', 'TextIndex')
self._catalog.addIndex('sender', 'TextIndex')
self._catalog.addIndex('subject', 'TextIndex')
self._catalog.addIndex('content', 'TextIndex')
self._catalog.addIndex('file_id', 'TextIndex')
self._catalog.addColumn('file_id')
self._catalog.addIndex('length', 'FieldIndex')
self._catalog.addColumn('length')
self._catalog.addIndex('date', 'FieldIndex')
self._catalog.addIndex('keywords', "KeywordIndex")
self.build_catalog(mboxname)
def build_catalog(self,mboxname):
mb = mailbox.UnixMailbox(open(mboxname,"r"))
i = 0
msg = mb.next()
while msg and self.num_files<self.maxfiles:
try:
self.catMessage(msg)
self.msg_ids.append(msg.dict["message-id"])
except:
msg = mb.next()
continue
msg = mb.next()
self.num_files = self.num_files + 1
if self.num_files % 100==0: print self.num_files
try:
sub = string.split(msg.dict.get("subject",""))
except:
msg = mb.next()
continue
for s in sub:
if not s in self.keywords: self.keywords.append(s)
self._catalog.aq_parent = None
def catMessage(self,m):
self._catalog.catalogObject( testMessage(m) ,
m.dict["message-id"] )
def uncatMessage(self,uid):
self._catalog.uncatalogObject( uid )
class testMessage(ExtensionClass.Base):
def __init__(self,msg,modify_doc=0):
self.sender = msg.dict.get("from","")
self.subject = msg.dict.get("subject","")
self.to = msg.dict.get("to","")
self.content = str(msg)
self.keywords= string.split(self.subject , " ")
if modify_doc !=0:
self.keywords = map(self.reverse,self.keywords)
self.file_id = msg.dict.get("message-id","")
self.length = len(str(msg))
date = msg.dict.get("date","")
try:
self.date = time.mktime(rfc822.parsedate(date)[:9])
except: pass
def reverse(self,s):
l = list(s)
l.reverse()
return string.join(l,"")
def __del__(self):
pass
class BuildEnv(dispatcher.Dispatcher,unittest.TestCase):
""" build environment """
def __init__(self,func,*args,**kw):
unittest.TestCase.__init__(self,func,args,kw)
dispatcher.Dispatcher.__init__(self,func)
self.init_phase = 0
self.setlog( open("dispatcher.log","a") )
self.logn('treads=%d searchiterations=%d' %
(numThreads,searchIterations))
self.logn('updateiterations=%d maxfiles=%d' %
(updateIterations,maxFiles))
#############################################################
# Build up ZODB
#############################################################
def buildTestEnvironment(self,args,kw):
self.init_phase = 1
self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,args,kw))
def funcTestEnvironment(self,dataDir,maxFiles):
env = self.th_setup()
if not os.path.exists(dataDir): os.makedirs(dataDir)
os.system("rm -f %s/*" % dataDir)
zodb = testZODB("%s/Data_orig.fs" % dataDir)
print "parsing and reading mailbox file %s....please wait" % mbox
tc = testCatalog( mbox,maxFiles )
print "writing Catalog to ZODB"
zodb.write("catalog" , tc)
print "Creating keywords file"
kw = keywords.Keywords()
kw.build(mbox,1000)
print tc.num_files, "files read"
print "Initalization complete"
self.th_teardown(env)
class testSearches(dispatcher.Dispatcher,unittest.TestCase):
""" test searches """
def __init__(self,func,*args,**kw):
unittest.TestCase.__init__(self,func,args,kw)
dispatcher.Dispatcher.__init__(self,func)
self.init_phase = 0
self.setlog( open("dispatcher.log","a") )
def setUp(self):
os.system("rm -fr data/work")
if not os.path.exists("data/work"): os.makedirs("data/work")
assert os.system("cp %s/Data_orig.fs data/work/Data.fs" % dataDir)==0, \
"Error while replicating original data"
self.zodb = testZODB("data/work/Data.fs",open=0)
self.threads = {}
self.init_zodb_size = self.zodb_size()
kw = keywords.Keywords()
kw.reload()
self.keywords = kw.keywords()
self.logn("-" * 80)
self.logn('treads=%d searchiterations=%d' %
(numThreads,searchIterations))
self.logn('updateiterations=%d maxfiles=%d' %
(updateIterations,maxFiles))
def tearDown(self):
self.log_zodb_size("before",self.init_zodb_size)
self.log_zodb_size("after ",self.zodb_size())
del self.zodb
self.zodb = self.catalog = None
def log_zodb_size(self,s,n):
self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,n) )
def zodb_size(self):
return self.size2size(os.stat("data/work/Data.fs")[6])
def size2size(self,n):
import math
if n <1024.0: return "%8.3lf Bytes" % n
if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
#############################################################
# Fulltext test
#############################################################
def testFulltextIndex(self,args,kw):
""" benchmark FulltextIndex """
self.dispatcher('funcFulltextIndex' ,
('funcFulltextIndex', kw["numThreads"] , () , {} ) )
def funcFulltextIndex(self,*args):
""" benchmark FulltextIndex """
cat,msg_ids = self.get_catalog()
env = self.th_setup()
for kw in self.keywords:
res = cat.searchResults( {"content" : kw } )
self.th_teardown(env)
#############################################################
# Field index test
#############################################################
def testFieldIndex(self,args,kw):
""" benchmark field index"""
self.dispatcher('funcFieldIndex' ,
('funcFieldIndex',kw["numThreads"] , () , {} ) )
def funcFieldIndex(self,*args):
""" benchmark FieldIndex """
cat,msg_ids = self.get_catalog()
env = self.th_setup()
for i in range(0,searchIterations):
res = cat.searchResults( {"length" : i } )
for r in res:
assert i==r.length , "%s should have size %d but is %s" % \
(r.file_id,i,r.length)
self.th_teardown(env)
#############################################################
# Keyword index test
#############################################################
def testKeywordIndex(self,args,kw):
""" benchmark Keyword index"""
self.dispatcher('funcKeywordIndex' ,
('funcKeywordIndex', kw["numThreads"] , () , {} ) )
def funcKeywordIndex(self,*args):
""" benchmark KeywordIndex """
cat,msg_ids = self.get_catalog()
env = self.th_setup()
for kw in self.keywords:
res = cat.searchResults( {"subject" : kw } )
# assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
self.th_teardown(env)
#############################################################
# Field range index test
#############################################################
def testFieldRangeIndex(self,args,kw):
""" benchmark field range index"""
self.dispatcher('funcFieldRangeIndex' ,
('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
def funcFieldRangeIndex(self,*args):
""" benchmark FieldRangeIndex """
cat,msg_ids = self.get_catalog()
env = self.th_setup()
rg = []
for i in range(searchIterations):
m = whrandom.randint(0,10000)
n = m + 200
rg.append((m,n))
for i in range(searchIterations):
for r in cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ):
size = r.length
assert rg[i][0]<=size and size<=rg[i][1] , \
"Filesize of %s is out of range (%d,%d) %d" % (r.file_id,rg[i][0],rg[i][1],size)
self.th_teardown(env)
#############################################################
# Keyword + range index test
#############################################################
def testKeywordRangeIndex(self,args,kw):
""" benchmark Keyword range index"""
self.dispatcher('funcKeywordRangeIndex' ,
('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
def funcKeywordRangeIndex(self,*args):
""" benchmark Keyword & IndexRange search """
cat,msg_ids = self.get_catalog()
rg = []
for i in range(len(self.keywords)):
m = whrandom.randint(0,10000)
n = m + 200
rg.append(m,n)
env = self.th_setup()
results = []
for i in range(len(self.keywords)):
results.append( cat.searchResults( {"keywords":self.keywords[i],
"length" : rg[i],
"length_usage" : "range:min:max" } )
)
self.th_teardown(env)
#############################################################
# Test full reindexing
#############################################################
def testUpdates(self,args,kw):
""" benchmark concurrent catalog/uncatalog operations """
self.dispatcher("testUpdates" ,
("funcUpdates", kw["numThreads"] , args, kw ))
def funcUpdates(self,*args,**kw):
""" benchmark concurrent catalog/uncatalog operations """
uncat_conflicts = cat_conflicts = 0
cat,msg_ids = self.get_catalog()
msgs = self.setupUpdatesMethod(kw["numUpdates"])
keys = msgs.keys()
rdgen = whrandom.whrandom()
rdgen.seed(int(time.time()) % 256,int(time.time()) % 256,int(time.time()) % 256)
env = self.th_setup()
for i in range(len(keys)):
r = rdgen.randint(0,len(msgs)-1)
mid = keys[r]
obj = msgs[mid]
try:
cat.uncatalogObject(mid)
if kw.get("commit",1)==1:
get_transaction().commit()
time.sleep(0.1)
except ZODB.POSException.ConflictError:
uncat_conflicts = uncat_conflicts + 1
try:
cat.catalogObject(obj,mid)
if kw.get("commit",1)==1:
get_transaction().commit()
time.sleep(0.1)
except ZODB.POSException.ConflictError:
cat_conflicts = cat_conflicts + 1
try:
get_transaction().commit()
except: pass
self.th_teardown(env,cat_conflicts=cat_conflicts,uncat_conflicts=uncat_conflicts)
def setupUpdatesMethod(self,numUpdates):
""" this method prepares a datastructure for the updates test.
we are reading the first n mails from the primary mailbox.
they are used for the update test
"""
i = 0
dict = {}
mb = mailbox.UnixMailbox(open(mbox,"r"))
msg = mb.next()
while msg and i<numUpdates:
obj = testMessage(msg)
mid = msg.dict["message-id"]
dict[mid] = obj
msg = mb.next()
i = i+1
return dict
#############################################################
# Test full reindexing
#############################################################
def testReindexing(self,args,kw):
""" test reindexing of existing data """
self.dispatcher("testReindexing" ,
("funcReindexing",kw["numThreads"] , (mbox,1000) , {} ))
def testReindexingAndModify(self,args,kw):
""" test reindexing of existing data but with modifications"""
self.dispatcher("testReindexing" ,
("funcReindexing",kw["numThreads"] , (mbox,1000,1) , {} ))
def funcReindexing(self,mbox,numfiles=100,modify_doc=0):
""" test reindexing of existing data """
cat_conflicts = 0
cat,msg_ids = self.get_catalog()
env = self.th_setup()
mb = mailbox.UnixMailbox(open(mbox,"r"))
i = 0
msg = mb.next()
while msg and i<numfiles:
obj = testMessage(msg,modify_doc)
mid = msg.dict["message-id"]
try:
cat.catalogObject(obj,mid)
get_transaction().commit()
except:
cat_conflicts = cat_conflicts + 1
msg = mb.next()
i = i+1
if i%100==0: print i
self.th_teardown(env,cat_conflicts=cat_conflicts)
#############################################################
# Test full reindexing
#############################################################
def testIncrementalIndexing(self,args,kw):
""" testing incremental indexing """
self.dispatcher("testIncrementalIndexing" ,
("funcReindexing",kw["numThreads"], (mbox2,1000) , {}))
def get_catalog(self):
""" return a catalog object """
# depended we are running in multithreaded mode we must take
# care how threads open the ZODB
connection = self.zodb.db.open()
root = connection.root()
cat = root["catalog"]._catalog
msg_ids = root['catalog'].msg_ids
return cat,msg_ids
################################################################################
# Stuff of Chris
################################################################################
class CatalogBase:
def setUp(self):
self._vocabulary = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary',
globbing=1)
self._catalog = Catalog.Catalog()
def tearDown(self):
self._vocabulary = self._catalog = None
class TestAddDelColumn(CatalogBase, unittest.TestCase):
def checkAdd(self):
self._catalog.addColumn('id')
assert self._catalog.schema.has_key('id') == 1, 'add column failed'
def checkAddBad(self):
try:
self._catalog.addColumn('_id')
except:
pass
else:
raise 'invalid metadata column check failed'
def checkDel(self):
self._catalog.addColumn('id')
self._catalog.delColumn('id')
assert self._catalog.schema.has_key('id') != 1, 'del column failed'
class TestAddDelIndexes(CatalogBase, unittest.TestCase):
def checkAddFieldIndex(self):
self._catalog.addIndex('id', 'FieldIndex')
assert type(self._catalog.indexes['id']) is type(UnIndex('id')),\
'add field index failed'
def checkAddTextIndex(self):
self._catalog.addIndex('id', 'TextIndex')
i = self._catalog.indexes['id']
assert type(i) is type(UnTextIndex('id', None, None, Lexicon())),\
'add text index failed'
def checkAddKeywordIndex(self):
self._catalog.addIndex('id', 'KeywordIndex')
i = self._catalog.indexes['id']
assert type(i) is type(UnKeywordIndex('id')), 'add kw index failed'
def checkDelFieldIndex(self):
self._catalog.addIndex('id', 'FieldIndex')
self._catalog.delIndex('id')
assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
def checkDelTextIndex(self):
self._catalog.addIndex('id', 'TextIndex')
self._catalog.delIndex('id')
assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
def checkDelKeywordIndex(self):
self._catalog.addIndex('id', 'KeywordIndex')
self._catalog.delIndex('id')
assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
class TestSimultaneousAddAndRead(CatalogBase, unittest.TestCase):
def checkMultiThread(self):
pass
class TestZCatalogObject(unittest.TestCase):
def checkInstantiateWithoutVocab(self):
v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
zc = ZCatalog.ZCatalog('acatalog')
assert hasattr(zc, 'Vocabulary')
assert zc.getVocabulary().__class__ == v.__class__
def checkInstantiateWithGlobbingVocab(self):
v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
zc._setObject('vocab', v)
assert zc.getVocabulary() == v
def checkInstantiateWithNormalVocab(self):
v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=0)
zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
zc._setObject('vocab', v)
assert zc.getVocabulary() == v
class TestCatalogObject(unittest.TestCase):
def setUp(self):
self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary',
globbing=1)
self._catalog = Catalog.Catalog()
self._catalog.addIndex('col1', 'FieldIndex')
self._catalog.addIndex('col2', 'TextIndex')
self._catalog.addIndex('col3', 'KeywordIndex')
self._catalog.addColumn('col1')
self._catalog.addColumn('col2')
self._catalog.addColumn('col3')
self._catalog.addIndex('att1', 'FieldIndex')
self._catalog.addIndex('att2', 'TextIndex')
self._catalog.addIndex('att3', 'KeywordIndex')
self._catalog.addColumn('att1')
self._catalog.addColumn('att2')
self._catalog.addColumn('att3')
self._catalog.addColumn('num')
self.upper = 1000
class dummy(ExtensionClass.Base):
att1 = 'att1'
att2 = 'att2'
att3 = ['att3']
def __init__(self, num):
self.num = num
def col1(self):
return 'col1'
def col2(self):
return 'col2'
def col3(self):
return ['col3']
for x in range(0, self.upper):
self._catalog.catalogObject(dummy(x), `x`)
self._catalog.aq_parent = dummy('foo') # fake out acquisition
def tearDown(self):
self._vocabulary = self._catalog = None
def checkResultLength(self):
upper = self.upper
a = self._catalog()
assert len(a) == upper, 'length should be %s, its %s'%(upper, len(a))
def checkFieldIndexLength(self):
a = self._catalog(att1='att1')
assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
len(a))
def checkTextIndexLength(self):
a = self._catalog(att2='att2')
assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
len(a))
def checkKeywordIndexLength(self):
a = self._catalog(att3='att3')
assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
len(a))
def checkUncatalogFieldIndex(self):
self.uncatalog()
a = self._catalog(att1='att1')
assert len(a) == 0, 'len: %s' % (len(a))
def checkUncatalogTextIndex(self):
self.uncatalog()
a = self._catalog(att2='att2')
assert len(a) == 0, 'len: %s' % (len(a))
def checkUncatalogKeywordIndex(self):
self.uncatalog()
a = self._catalog(att3='att3')
assert len(a) == 0, 'len: %s'%(len(a))
def checkBadUncatalog(self):
try:
self._catalog.uncatalogObject('asdasdasd')
except:
assert 1==2, 'uncatalogObject raised exception on bad uid'
def checkUniqueValuesForLength(self):
a = self._catalog.uniqueValuesFor('att1')
assert len(a) == 1, 'bad number of unique values %s' % str(a)
def checkUniqueValuesForContent(self):
a = self._catalog.uniqueValuesFor('att1')
assert a[0] == 'att1', 'bad content %s' % str(a[0])
def uncatalog(self):
for x in range(0, self.upper):
self._catalog.uncatalogObject(`x`)
class objRS(ExtensionClass.Base):
def __init__(self,num):
self.number = num
class testRS(unittest.TestCase):
def setUp(self):
self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
self._catalog = Catalog.Catalog()
self._catalog.addIndex('number', 'FieldIndex')
self._catalog.addColumn('number')
for i in range(50000):
if i%1000==0: print i
obj = objRS(whrandom.randint(0,20000))
self._catalog.catalogObject(obj,i)
self._catalog.aq_parent = objRS(200)
def testRangeSearch(self):
for i in range(1000000):
m = whrandom.randint(0,20000)
n = m + 1000
for r in self._catalog.searchResults( {"number" : (m,n) ,
"length_usage" : "range:min:max" }
):
size = r.number
assert m<=size and size<=n , "%d vs [%d,%d]" % (r.number,m,n)
def usage(program):
print "Usage: "
print
print "initalize the test catalog: %s -i -f <maximum number files to use> " % program
print "to run the basic tests: %s -b -f <maximum number files to use> " % program
print "to run the advanced tests: %s -a -f <maximum number files to use> " % program
def main():
global dataDir,maxFiles
opts,args = getopt.getopt(sys.argv[1:],"hiabf:xp",['help'])
opts.sort()
optsLst = map(lambda x: x[0],opts)
if optsLst==[]: usage(os.path.basename(sys.argv[0])); sys.exit(0)
for k,v in opts:
if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
if k == "-f": maxFiles = string.atoi(v)
dataDir = os.path.join("data",str(maxFiles))
if '-i' in optsLst:
unittest.TextTestRunner().run(get_tests('init'))
if '-b' in optsLst:
unittest.TextTestRunner().run(get_tests('bench1'))
if '-a' in optsLst:
unittest.TextTestRunner().run(get_tests('bench2'))
if '-x' in optsLst:
unittest.TextTestRunner().run(get_tests('exp'))
if '-p' in optsLst:
unittest.TextTestRunner().run(test_suite())
def test_suite():
return get_tests('basic')
def get_tests(what):
global dataDir,maxFiles
if what=='basic':
maxFiles = 100
dataDir = 'data/%d' % maxFiles
ts_cm= (
unittest.makeSuite(TestAddDelIndexes, 'check'),
unittest.makeSuite(TestCatalogObject, 'check'),
unittest.makeSuite(TestAddDelColumn, 'check'),
unittest.makeSuite(TestZCatalogObject, 'check')
)
t_aj = (
BuildEnv('buildTestEnvironment',dataDir,maxFiles),
testSearches("testFulltextIndex",numThreads=1),
testSearches("testFieldIndex",numThreads= 1),
testSearches("testFieldRangeIndex",numThreads=1),
testSearches("testKeywordIndex",numThreads= 1),
testSearches("testKeywordRangeIndex",numThreads= 1)
)
bench1_tests = (
testSearches("testFulltextIndex",numThreads=1),
testSearches("testFulltextIndex",numThreads= 4),
testSearches("testFieldIndex",numThreads= 1),
testSearches("testFieldIndex",numThreads= 4),
testSearches("testFieldRangeIndex",numThreads=1),
testSearches("testFieldRangeIndex",numThreads= 4),
testSearches("testKeywordIndex",numThreads= 1),
testSearches("testKeywordIndex",numThreads= 4),
testSearches("testKeywordRangeIndex",numThreads= 1),
testSearches("testKeywordRangeIndex",numThreads=4)
)
bench2_tests = (
testSearches("testReindexing",numThreads=1),
testSearches("testIncrementalIndexing",numThreads=1),
testSearches("testUpdates",numThreads=2,numUpdates=200),
testSearches("testUpdates",numThreads=4,numUpdates=200)
)
exp_tests = (
# testRS("testRangeSearch"),
# testSearches("testReindexing",numThreads=1),
testSearches("testReindexingAndModify",numThreads=1),
# testSearches("testUpdates",numThreads=10,numUpdates=100),
)
init_tests = (
BuildEnv("buildTestEnvironment",dataDir,maxFiles) ,
)
if what=='basic':
ts = unittest.TestSuite(ts_cm)
for x in t_aj: ts.addTest(x)
return ts
else:
ts = unittest.TestSuite()
for x in eval('%s_tests' % what): ts.addTest(x)
return ts
return
def pdebug():
import pdb
test_suite()
def debug():
test_suite().debug()
def pdebug():
import pdb
pdb.run('debug()')
if __name__ == '__main__':
main()
import os, sys
sys.path.insert(0, '.')
try:
import Testing
os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', '.')
except ImportError:
sys.path[0]='../../..'
import Testing
os.environ['SOFTWARE_HOME']='../../..'
os.environ['INSTANCE_HOME']=os.environ.get(
'INSTANCE_HOME',
os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
)
os.environ['STUPID_LOG_FILE']=os.path.join(os.environ['INSTANCE_HOME'],'var',
'debug.log')
here = os.getcwd()
import Zope
import mailbox, time, httplib
from string import strip, find, split, lower, atoi, join
from urllib import quote
from Products.ZCatalog import ZCatalog
from unittest import TestCase, TestSuite, JUnitTextTestRunner,\
VerboseTextTestRunner, makeSuite
from Testing.makerequest import makerequest
TextTestRunner = VerboseTextTestRunner
class TestTimeIndex(TestCase):
def setUp(self):
self.app = makerequest(Zope.app())
try: self.app._delObject('catalogtest')
except AttributeError: pass
self.app.manage_addFolder('catalogtest')
zcatalog = ZCatalog.ZCatalog('catalog', 'a catalog')
self.app.catalogtest._setObject('catalog', zcatalog)
c = self.app.catalogtest.catalog
for x in ('title', 'to', 'from', 'date', 'raw'):
try: c.manage_delIndexes([x])
except: pass
c.manage_addIndex('title', 'TextIndex')
c.manage_addIndex('to', 'TextIndex')
c.manage_addIndex('from', 'TextIndex')
c.manage_addIndex('date', 'FieldIndex')
c.manage_addIndex('raw', 'TextIndex')
def tearDown(self):
try: self.app._delObject('catalogtest')
except AttributeError: pass
try:
self.app._p_jar._db.pack()
self.app._p_jar.close()
except AttributeError: pass
self.app = None
del self.app
def checkTimeBulkIndex(self):
print
c = self.app.catalogtest.catalog
t = time.time()
loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), 500)
get_transaction().commit()
loadtime = time.time() - t
out("loading data took %s seconds.. " % loadtime)
t = time.time()
req = self.app.REQUEST
parents = [self.app.catalogtest.catalog,
self.app.catalogtest, self.app]
req['PARENTS'] = parents
rsp = self.app.REQUEST.RESPONSE
url1 = ''
c.manage_catalogFoundItems(req, rsp, url1, url1,
obj_metatypes=['DTML Document'])
indextime = time.time() - t
out("bulk index took %s seconds.. " % indextime)
out("total time for load and index was %s seconds.. "
% (loadtime + indextime))
def checkTimeIncrementalIndexAndQuery(self):
print
c = self.app.catalogtest.catalog
t = time.time()
max = 500
m = loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), max, c)
get_transaction().commit()
total = time.time() - t
out("total time for load and index was %s seconds.. " % total)
t = time.time()
rs = c() # empty query should return all
assert len(rs) == max, len(rs)
dates = m['date']
froms = m['from']
tos =m['to']
titles = m['title']
assert len(c({'date':'foobarfoo'})) == 0 # should return no results
for x in dates:
assert len(c({'date':x})) == 1 # each date should be fieldindexed
assert len(c({'from':'a'})) == 0 # should be caught by splitter
assert len(c({'raw':'chris'})) != 0
assert len(c({'raw':'gghdjkasjdsda'})) == 0
assert c({'PrincipiaSearchSource':'the*'})
def checkTimeSubcommit(self):
print
for x in (None,100,500,1000,10000):
out("testing subcommit at theshhold of %s" % x)
if x is not None:
self.setUp()
c = self.app.catalogtest.catalog
c.threshold = x
get_transaction().commit()
t = time.time()
loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), 500, c)
get_transaction().commit()
total = time.time() - t
out("total time with subcommit thresh %s was %s seconds.. "
% (x,total))
self.tearDown()
# utility
def loadmail(folder, name, mbox, max=None, catalog=None):
"""
creates a folder inside object 'folder' named 'name', opens
filename 'mbox' and adds 'max' mail messages as DTML documents to
the ZODB inside the folder named 'name'. If 'catalog' (which
should be a ZCatalog object) is passed in, call catalog_object on it
with the document while we're iterating. If 'max' is not None,
only do 'max' messages, else do all messages in the mbox archive.
"""
m = {'date':[],'from':[],'to':[],'title':[]}
folder.manage_addFolder(name)
folder=getattr(folder, name)
mb=mailbox.UnixMailbox(open(mbox))
i=0
every=100
message=mb.next()
while message:
part = `i/every * 100`
try:
dest = getattr(folder, part)
except AttributeError:
folder.manage_addFolder(part)
dest = getattr(folder, part)
dest.manage_addDTMLDocument(str(i), file=message.fp.read())
doc=getattr(dest, str(i))
i=i+1
for h in message.headers:
h=strip(h)
l=find(h,':')
if l <= 0: continue
name=lower(h[:l])
if name=='subject': name='title'
h=strip(h[l+1:])
type='string'
if 0 and name=='date': type='date'
elif 0:
try: atoi(h)
except: pass
else: type=int
if name=='title':
doc.manage_changeProperties(title=h)
m[name].append(h)
elif name in ('to', 'from', 'date'):
try: doc.manage_addProperty(name, h, type)
except: pass
m[name].append(h)
if catalog:
path = join(doc.getPhysicalPath(), '/')
catalog.catalog_object(doc, path)
if max is not None:
if i >= max: break
message=mb.next()
return m
def out(s):
print " %s" % s
def test_suite():
s1 = makeSuite(TestTimeIndex, 'check')
testsuite = TestSuite((s1,))
return testsuite
def main():
mb = os.path.join(here, 'zope.mbox')
if not os.path.isfile(mb):
print "do you want to get the zope.mbox file from lists.zope.org?"
print "it's required for testing (98MB, ~ 30mins on fast conn)"
print "it's also available at korak:/home/chrism/zope.mbox"
print "-- type 'Y' or 'N'"
a = raw_input()
if lower(a[:1]) == 'y':
server = 'lists.zope.org:80'
method = '/pipermail/zope.mbox/zope.mbox'
h = httplib.HTTP(server)
h.putrequest('GET', method)
h.putheader('User-Agent', 'silly')
h.putheader('Accept', 'text/html')
h.putheader('Accept', 'text/plain')
h.putheader('Host', server)
h.endheaders()
errcode, errmsg, headers = h.getreply()
if errcode != 200:
f = h.getfile()
data = f.read()
print data
raise "Error reading from host %s" % server
f = h.getfile()
out=open(mb,'w')
print "this is going to take a while..."
print "downloading mbox from %s" % server
while 1:
l = f.readline()
if not l: break
out.write(l)
alltests=test_suite()
runner = TextTestRunner()
runner.run(alltests)
def debug():
test_suite().debug()
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
...@@ -85,18 +85,15 @@ ...@@ -85,18 +85,15 @@
from Lexicon import Lexicon from Lexicon import Lexicon
from Splitter import Splitter from Splitter import Splitter
from intSet import intSet
from UnTextIndex import Or from UnTextIndex import Or
import re, string import re, string
import OIBTree, BTree, IOBTree, IIBTree
# Short cuts for common data containers
OIBTree = OIBTree.BTree # Object -> Integer
OOBTree = BTree.BTree # Object -> Object
IOBTree = IOBTree.BTree # Integer -> Object
IIBucket = IIBTree.Bucket # Integer -> Integer
from BTrees.IIBTree import IISet, union, IITreeSet
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
from randid import randid
class GlobbingLexicon(Lexicon): class GlobbingLexicon(Lexicon):
"""Lexicon which supports basic globbing function ('*' and '?'). """Lexicon which supports basic globbing function ('*' and '?').
...@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon): ...@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon):
def __init__(self): def __init__(self):
self.counter = 0 # word id counter XXX self.clear()
def clear(self):
self._lexicon = OIBTree() self._lexicon = OIBTree()
self._inverseLex = IOBTree() self._inverseLex = IOBTree()
self._digrams = OOBTree() self._digrams = OOBTree()
def _convertBTrees(self, threshold=200):
Lexicon._convertBTrees(self, threshold)
if type(self._digrams) is OOBTree: return
from BTrees.convert import convert
_digrams=self._digrams
self._digrams=OOBTree()
self._digrams._p_jar=self._p_jar
convert(_digrams, self._digrams, threshold, IITreeSet)
def createDigrams(self, word): def createDigrams(self, word):
"""Returns a list with the set of digrams in the word.""" """Returns a list with the set of digrams in the word."""
...@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon): ...@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon):
digrams.append(self.eow + word[0]) # Mark the beginning digrams.append(self.eow + word[0]) # Mark the beginning
for i in range(len(word)): for i in range(1,len(word)):
digrams.append(word[i:i+2]) digrams.append(word[i-1:i+1])
digrams[-1] = digrams[-1] + self.eow # Mark the end digrams[-1] = digrams[-1] + self.eow # Mark the end
...@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon): ...@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon):
set = getWordId # Kludge for old code set = getWordId # Kludge for old code
def getWord(self, wid):
return self._inverseLex.get(wid, None)
def assignWordId(self, word): def assignWordId(self, word):
"""Assigns a new word id to the provided word, and return it.""" """Assigns a new word id to the provided word, and return it."""
...@@ -166,19 +178,34 @@ class GlobbingLexicon(Lexicon): ...@@ -166,19 +178,34 @@ class GlobbingLexicon(Lexicon):
if self._lexicon.has_key(word): if self._lexicon.has_key(word):
return self._lexicon[word] return self._lexicon[word]
# First we go ahead and put the forward and reverse maps in.
self._lexicon[word] = self.counter # Get word id. BBB Backward compat pain.
self._inverseLex[self.counter] = word inverse=self._inverseLex
try: insert=inverse.insert
except AttributeError:
# we have an "old" BTree object
if inverse:
wid=inverse.keys()[-1]+1
else:
self._inverseLex=IOBTree()
wid=1
inverse[wid] = word
else:
# we have a "new" IOBTree object
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[word] = wid
# Now take all the digrams and insert them into the digram map. # Now take all the digrams and insert them into the digram map.
for digram in self.createDigrams(word): for digram in self.createDigrams(word):
set = self._digrams.get(digram) set = self._digrams.get(digram, None)
if set is None: if set is None:
self._digrams[digram] = set = intSet() self._digrams[digram] = set = IISet()
set.insert(self.counter) set.insert(wid)
self.counter = self.counter + 1 return wid
return self.counter - 1 # Adjust for the previous increment
def get(self, pattern): def get(self, pattern):
...@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon): ...@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon):
return (result, ) return (result, )
## now get all of the intsets that contain the result digrams ## now get all of the intsets that contain the result digrams
result = IIBucket() result = None
for digram in digrams: for digram in digrams:
if self._digrams.has_key(digram): result=union(result, self._digrams.get(digram, None))
matchSet = self._digrams[digram]
if matchSet is not None:
result = IIBucket().union(matchSet)
if len(result) == 0: if not result:
return () return ()
else: else:
## now we have narrowed the list of possible candidates ## now we have narrowed the list of possible candidates
...@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon): ...@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon):
expr = re.compile(self.createRegex(pattern)) expr = re.compile(self.createRegex(pattern))
words = [] words = []
hits = [] hits = IISet()
for x in result.keys(): for x in result:
if expr.match(self._inverseLex[x]): if expr.match(self._inverseLex[x]):
hits.append(x) hits.insert(x)
return hits return hits
...@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon): ...@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon):
def query_hook(self, q): def query_hook(self, q):
"""expand wildcards""" """expand wildcards"""
words = [] words = []
wids = []
for w in q: for w in q:
if ( (self.multi_wc in w) or if ( (self.multi_wc in w) or
(self.single_wc in w) ): (self.single_wc in w) ):
...@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon): ...@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon):
r'()&|!@#$%^{}\<>') r'()&|!@#$%^{}\<>')
return "%s$" % result return "%s$" % result
...@@ -84,11 +84,11 @@ ...@@ -84,11 +84,11 @@
############################################################################## ##############################################################################
"""Simple column indices""" """Simple column indices"""
__version__='$Revision: 1.27 $'[11:-2] __version__='$Revision: 1.28 $'[11:-2]
from Persistence import Persistent from Persistence import Persistent
from BTree import BTree from BTrees.OOBTree import OOBTree
from intSet import intSet from BTrees.IIBTree import IITreeSet
import operator import operator
from Missing import MV from Missing import MV
import string import string
...@@ -135,7 +135,7 @@ class Index(Persistent): ...@@ -135,7 +135,7 @@ class Index(Persistent):
self.id = id self.id = id
self.ignore_ex=ignore_ex self.ignore_ex=ignore_ex
self.call_methods=call_methods self.call_methods=call_methods
self._index = BTree() self._index = OOBTree()
self._reindex() self._reindex()
else: else:
...@@ -176,7 +176,7 @@ class Index(Persistent): ...@@ -176,7 +176,7 @@ class Index(Persistent):
def clear(self): def clear(self):
self._index = BTree() self._index = OOBTree()
def _reindex(self, start=0): def _reindex(self, start=0):
...@@ -200,7 +200,7 @@ class Index(Persistent): ...@@ -200,7 +200,7 @@ class Index(Persistent):
if k is None or k == MV: continue if k is None or k == MV: continue
set=get(k) set=get(k)
if set is None: index[k] = set = intSet() if set is None: index[k] = set = IITreeSet()
set.insert(i) set.insert(i)
...@@ -225,7 +225,7 @@ class Index(Persistent): ...@@ -225,7 +225,7 @@ class Index(Persistent):
return return
set = index.get(k) set = index.get(k)
if set is None: index[k] = set = intSet() if set is None: index[k] = set = IITreeSet()
set.insert(i) set.insert(i)
...@@ -301,8 +301,7 @@ class Index(Persistent): ...@@ -301,8 +301,7 @@ class Index(Persistent):
if hi: setlist = index.items(lo,hi) if hi: setlist = index.items(lo,hi)
else: setlist = index.items(lo) else: setlist = index.items(lo)
for k,set in setlist: for k,set in setlist:
if r is None: r = set w, r = weightedUnion(r, set)
else: r = r.union(set)
except KeyError: pass except KeyError: pass
else: #not a range else: #not a range
get = index.get get = index.get
...@@ -310,11 +309,10 @@ class Index(Persistent): ...@@ -310,11 +309,10 @@ class Index(Persistent):
if key: anyTrue = 1 if key: anyTrue = 1
set=get(key) set=get(key)
if set is not None: if set is not None:
if r is None: r = set w, r = weightedUnion(r, set)
else: r = r.union(set)
if r is None: if r is None:
if anyTrue: r=intSet() if anyTrue: r=IISet()
else: return None else: return None
return r, (id,) return r, (id,)
......
...@@ -92,11 +92,12 @@ mapping. ...@@ -92,11 +92,12 @@ mapping.
from Splitter import Splitter from Splitter import Splitter
from Persistence import Persistent from Persistence import Persistent
from Acquisition import Implicit from Acquisition import Implicit
import OIBTree, BTree
OIBTree=OIBTree.BTree
OOBTree=BTree.BTree
import re
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet, IITreeSet
from randid import randid
class Lexicon(Persistent, Implicit): class Lexicon(Persistent, Implicit):
"""Maps words to word ids and then some """Maps words to word ids and then some
...@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit): ...@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit):
stop_syn={} stop_syn={}
def __init__(self, stop_syn=None): def __init__(self, stop_syn=None):
self._lexicon = OIBTree() self.clear()
self.counter = 0
if stop_syn is None: if stop_syn is None:
self.stop_syn = {} self.stop_syn = {}
else: else:
self.stop_syn = stop_syn self.stop_syn = stop_syn
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
return
from BTrees.convert import convert
lexicon=self._lexicon
self._lexicon=OIBTree()
self._lexicon._p_jar=self._p_jar
convert(lexicon, self._lexicon, threshold)
try:
inverseLex=self._inverseLex
self._inverseLex=IOBTree()
except AttributeError:
# older lexicons didn't have an inverse lexicon
self._inverseLex=IOBTree()
inverseLex=self._inverseLex
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
def set_stop_syn(self, stop_syn): def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is: """ pass in a mapping of stopwords and synonyms. Format is:
...@@ -135,13 +161,16 @@ class Lexicon(Persistent, Implicit): ...@@ -135,13 +161,16 @@ class Lexicon(Persistent, Implicit):
def getWordId(self, word): def getWordId(self, word):
""" return the word id of 'word' """ """ return the word id of 'word' """
if self._lexicon.has_key(word): wid=self._lexicon.get(word, None)
return self._lexicon[word] if wid is None:
else: wid=self.assignWordId(word)
return self.assignWordId(word) return wid
set = getWordId set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
def assignWordId(self, word): def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it.""" """Assigns a new word id to the provided word and returns it."""
...@@ -149,17 +178,29 @@ class Lexicon(Persistent, Implicit): ...@@ -149,17 +178,29 @@ class Lexicon(Persistent, Implicit):
if self._lexicon.has_key(word): if self._lexicon.has_key(word):
return self._lexicon[word] return self._lexicon[word]
if not hasattr(self, 'counter'):
self.counter = 0 try: inverse=self._inverseLex
self._lexicon[intern(word)] = self.counter except AttributeError:
self.counter = self.counter + 1 # woops, old lexicom wo wids
return self.counter - 1 inverse=self._inverseLex=IOBTree()
for word, wid in self._lexicon.items():
inverse[wid]=word
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[intern(word)] = wid
return wid
def get(self, key, default=None): def get(self, key, default=None):
"""Return the matched word against the key.""" """Return the matched word against the key."""
return [self._lexicon.get(key, default)] r=IISet()
wid=self._lexicon.get(key, default)
if wid is not None: r.insert(wid)
return r
def __getitem__(self, key): def __getitem__(self, key):
return self.get(key) return self.get(key)
...@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit): ...@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit):
return Splitter(astring, words) return Splitter(astring, words)
def grep(self, query):
"""
regular expression search through the lexicon
he he.
Do not use unless you know what your doing!!!
"""
expr = re.compile(query)
hits = []
for x in self._lexicon.keys():
if expr.search(x):
hits.append(x)
return hits
def query_hook(self, q): def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """ """ we don't want to modify the query cuz we're dumb """
return q return q
......
...@@ -83,18 +83,33 @@ ...@@ -83,18 +83,33 @@
# #
############################################################################## ##############################################################################
from BTrees.IIBTree import IIBucket
from BTrees.IIBTree import weightedIntersection, weightedUnion, difference
from BTrees.OOBTree import OOSet, union
class ResultList: class ResultList:
def __init__(self, d, words, index, TupleType=type(())): def __init__(self, d, words, index, TupleType=type(())):
self._index = index self._index = index
if type(words) is not OOSet: words=OOSet(words)
self._words = words self._words = words
if (type(d) is TupleType): self._dict = { d[0] : d[1] }
else: self._dict = d
def __len__(self): return len(self._dict) if (type(d) is TupleType):
d = IIBucket((d,))
elif type(d) is not IIBucket:
d = IIBucket(d)
self._dict=d
self.__getitem__=d.__getitem__
try: self.__nonzero__=d.__nonzero__
except: pass
self.get=d.get
def __nonzero__(self):
return not not self._dict
def __getitem__(self, key): return self._dict[key] def bucket(self): return self._dict
def keys(self): return self._dict.keys() def keys(self): return self._dict.keys()
...@@ -103,42 +118,29 @@ class ResultList: ...@@ -103,42 +118,29 @@ class ResultList:
def items(self): return self._dict.items() def items(self): return self._dict.items()
def __and__(self, x): def __and__(self, x):
result = {} return self.__class__(
dict = self._dict weightedIntersection(self._dict, x._dict)[1],
xdict = x._dict union(self._words, x._words),
xhas = xdict.has_key self._index,
for id, score in dict.items(): )
if xhas(id): result[id] = xdict[id]+score
return self.__class__(result, self._words+x._words, self._index)
def and_not(self, x): def and_not(self, x):
result = {} return self.__class__(
dict = self._dict difference(self._dict, x._dict),
xdict = x._dict self._words,
xhas = xdict.has_key self._index,
for id, score in dict.items(): )
if not xhas(id): result[id] = score
return self.__class__(result, self._words, self._index)
def __or__(self, x): def __or__(self, x):
result = {} return self.__class__(
dict = self._dict weightedUnion(self._dict, x._dict)[1],
has = dict.has_key union(self._words, x._words),
xdict = x._dict self._index,
xhas = xdict.has_key )
for id, score in dict.items():
if xhas(id): result[id] = xdict[id]+score
else: result[id] = score
for id, score in xdict.items():
if not has(id): result[id] = score
return self.__class__(result, self._words+x._words, self._index) return self.__class__(result, self._words+x._words, self._index)
def near(self, x): def near(self, x):
result = {} result = IIBucket
dict = self._dict dict = self._dict
xdict = x._dict xdict = x._dict
xhas = xdict.has_key xhas = xdict.has_key
...@@ -160,5 +162,6 @@ class ResultList: ...@@ -160,5 +162,6 @@ class ResultList:
else: score = (score+xdict[id])/d else: score = (score+xdict[id])/d
result[id] = score result[id] = score
return self.__class__(result, self._words+x._words, self._index) return self.__class__(
result, union(self._words, x._words), self._index)
...@@ -202,13 +202,13 @@ Notes on a new text index design ...@@ -202,13 +202,13 @@ Notes on a new text index design
space. space.
""" """
__version__='$Revision: 1.25 $'[11:-2] __version__='$Revision: 1.26 $'[11:-2]
#XXX I strongly suspect that this is broken, but I'm not going to fix it. :(
from Globals import Persistent from Globals import Persistent
import BTree, IIBTree from BTrees.OOBTree import OOBTree
BTree=BTree.BTree from BTrees.IIBTree import IISet, IIBucket
IIBTree=IIBTree.Bucket
from intSet import intSet
import operator import operator
from Splitter import Splitter from Splitter import Splitter
from string import strip from string import strip
...@@ -250,7 +250,7 @@ class TextIndex(Persistent): ...@@ -250,7 +250,7 @@ class TextIndex(Persistent):
self.id=id self.id=id
self.ignore_ex=ignore_ex self.ignore_ex=ignore_ex
self.call_methods=call_methods self.call_methods=call_methods
self._index=BTree() self._index=OOBTree() #XXX Is this really an IOBTree?
self._syn=stop_word_dict self._syn=stop_word_dict
self._reindex() self._reindex()
else: else:
...@@ -261,7 +261,7 @@ class TextIndex(Persistent): ...@@ -261,7 +261,7 @@ class TextIndex(Persistent):
def clear(self): def clear(self):
self._index = BTree() self._index = OOBTree()
def positions(self, docid, words): def positions(self, docid, words):
...@@ -366,7 +366,7 @@ class TextIndex(Persistent): ...@@ -366,7 +366,7 @@ class TextIndex(Persistent):
index[word] = r index[word] = r
elif type(r) is dictType: elif type(r) is dictType:
if len(r) > 4: if len(r) > 4:
b = IIBTree() b = IIBucket()
for k, v in r.items(): b[k] = v for k, v in r.items(): b[k] = v
r = b r = b
r[id] = score r[id] = score
...@@ -440,7 +440,7 @@ class TextIndex(Persistent): ...@@ -440,7 +440,7 @@ class TextIndex(Persistent):
for key in keys: for key in keys:
key = strip(key) key = strip(key)
if not key: continue if not key: continue
rr = intSet() rr = IISet()
try: try:
for i,score in query(key,self).items(): for i,score in query(key,self).items():
if score: rr.insert(i) if score: rr.insert(i)
...@@ -451,5 +451,5 @@ class TextIndex(Persistent): ...@@ -451,5 +451,5 @@ class TextIndex(Persistent):
r = r.intersection(rr) r = r.intersection(rr)
if r is not None: return r, (id,) if r is not None: return r, (id,)
return intSet(), (id,) return IISet(), (id,)
...@@ -85,21 +85,25 @@ ...@@ -85,21 +85,25 @@
"""Simple column indices""" """Simple column indices"""
__version__='$Revision: 1.25 $'[11:-2] __version__='$Revision: 1.26 $'[11:-2]
from Globals import Persistent from Globals import Persistent
from Acquisition import Implicit from Acquisition import Implicit
import BTree import BTree
import IOBTree import IOBTree
from intSet import intSet
import operator import operator
from Missing import MV
import string, pdb import string, pdb
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
from types import * from types import *
from BTrees.OOBTree import OOBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IITreeSet, IISet, union
import BTrees.Length
import sys
_marker = []
def nonEmpty(s): def nonEmpty(s):
"returns true if a non-empty string or any other (nonstring) type" "returns true if a non-empty string or any other (nonstring) type"
...@@ -115,7 +119,7 @@ class UnIndex(Persistent, Implicit): ...@@ -115,7 +119,7 @@ class UnIndex(Persistent, Implicit):
meta_type = 'Field Index' meta_type = 'Field Index'
def __init__(self, id=None, ignore_ex=None, call_methods=None): def __init__(self, id, ignore_ex=None, call_methods=None):
"""Create an unindex """Create an unindex
UnIndexes are indexes that contain two index components, the UnIndexes are indexes that contain two index components, the
...@@ -123,6 +127,11 @@ class UnIndex(Persistent, Implicit): ...@@ -123,6 +127,11 @@ class UnIndex(Persistent, Implicit):
index. The inverted index is so that objects can be unindexed index. The inverted index is so that objects can be unindexed
even when the old value of the object is not known. even when the old value of the object is not known.
e.g.
self._index = {datum:[documentId1, documentId2]}
self._unindex = {documentId:datum}
The arguments are: The arguments are:
'id' -- the name of the item attribute to index. This is 'id' -- the name of the item attribute to index. This is
...@@ -138,23 +147,53 @@ class UnIndex(Persistent, Implicit): ...@@ -138,23 +147,53 @@ class UnIndex(Persistent, Implicit):
uninded methods for this to work. uninded methods for this to work.
""" """
######################################################################
# For b/w compatability, have to allow __init__ calls with zero args
if not id==ignore_ex==call_methods==None:
self.id = id self.id = id
self.ignore_ex=ignore_ex # currently unimplimented self.ignore_ex=ignore_ex # currently unimplimented
self.call_methods=call_methods self.call_methods=call_methods
self._index = BTree.BTree()
self._unindex = IOBTree.BTree()
else: self.__len__=BTrees.Length.Length() # see __len__ method docstring
pass self.clear()
def clear(self):
# inplace opportunistic conversion from old-style to new style BTrees
try: self.__len__.set(0)
except AttributeError: self.__len__=BTrees.Length.Length()
self._index = OOBTree()
self._unindex = IOBTree()
def _convertBTrees(self, threshold=200):
if type(self._index) is OOBTree: return
from BTrees.convert import convert
_index=self._index
self._index=OOBTree()
def convertSet(s, IITreeSet=IITreeSet):
if len(s) == 1:
try: return s[0] # convert to int
except: pass # This is just an optimization.
return IITreeSet(s)
convert(_index, self._index, threshold, convertSet)
_unindex=self._unindex
self._unindex=IOBTree()
convert(_unindex, self._unindex, threshold)
self.__len__=BTrees.Length.Length()
def __nonzero__(self):
return not not self._unindex
def __len__(self): def __len__(self):
return len(self._unindex) """Return the number of objects indexed.
This method is only called for indexes which have "old" BTrees,
and the *only* reason that UnIndexes maintain a __len__ is for
the searching code in the catalog during sorting.
"""
return len(self._unindex)
def histogram(self): def histogram(self):
"""Return a mapping which provides a histogram of the number of """Return a mapping which provides a histogram of the number of
...@@ -173,31 +212,39 @@ class UnIndex(Persistent, Implicit): ...@@ -173,31 +212,39 @@ class UnIndex(Persistent, Implicit):
return self._unindex.keys() return self._unindex.keys()
def getEntryForObject(self, documentId, default=MV): def getEntryForObject(self, documentId, default=_marker):
"""Takes a document ID and returns all the information we have """Takes a document ID and returns all the information we have
on that specific object.""" on that specific object."""
if default is not MV: if default is _marker:
return self._unindex.get(documentId, default)
else:
return self._unindex.get(documentId) return self._unindex.get(documentId)
else:
return self._unindex.get(documentId, default)
def removeForwardIndexEntry(self, entry, documentId): def removeForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and remove any reference to documentId """Take the entry provided and remove any reference to documentId
in its entry in the index.""" in its entry in the index."""
global _marker
indexRow = self._index.get(entry, MV) indexRow = self._index.get(entry, _marker)
if indexRow is not MV: if indexRow is not _marker:
try: try:
indexRow.remove(documentId) indexRow.remove(documentId)
if len(indexRow) == 0: if not indexRow:
del self._index[entry]
try: self.__len__.change(-1)
except AttributeError: pass # pre-BTrees-module instance
except AttributeError:
# index row is an int
del self._index[entry] del self._index[entry]
try: self.__len__.change(-1)
except AttributeError: pass # pre-BTrees-module instance
except: except:
LOG(self.__class__.__name__, ERROR, LOG(self.__class__.__name__, ERROR,
('unindex_object could not remove ' ('unindex_object could not remove '
'integer id %s from index %s. This ' 'documentId %s from index %s. This '
'should not happen.' 'should not happen.'
% (str(documentId), str(self.id)))) % (str(documentId), str(self.id))), '',
sys.exc_info())
else: else:
LOG(self.__class__.__name__, ERROR, LOG(self.__class__.__name__, ERROR,
('unindex_object tried to retrieve set %s ' ('unindex_object tried to retrieve set %s '
...@@ -210,20 +257,25 @@ class UnIndex(Persistent, Implicit): ...@@ -210,20 +257,25 @@ class UnIndex(Persistent, Implicit):
in the forward index. in the forward index.
This will also deal with creating the entire row if necessary.""" This will also deal with creating the entire row if necessary."""
global _marker
indexRow = self._index.get(entry, MV) indexRow = self._index.get(entry, _marker)
# Make sure there's actually a row there already. If not, create # Make sure there's actually a row there already. If not, create
# an IntSet and stuff it in first. # an IntSet and stuff it in first.
if indexRow is MV: if indexRow is _marker:
self._index[entry] = intSet() self._index[entry] = documentId
indexRow = self._index[entry] try: self.__len__.change(1)
indexRow.insert(documentId) except AttributeError: pass # pre-BTrees-module instance
else:
try: indexRow.insert(documentId)
except AttributeError:
# index row is an int
indexRow=IITreeSet((indexRow, documentId))
self._index[entry] = indexRow
def index_object(self, documentId, obj, threshold=None): def index_object(self, documentId, obj, threshold=None):
""" index and object 'obj' with integer id 'documentId'""" """ index and object 'obj' with integer id 'documentId'"""
global _marker
returnStatus = 0 returnStatus = 0
# First we need to see if there's anything interesting to look at # First we need to see if there's anything interesting to look at
...@@ -235,14 +287,16 @@ class UnIndex(Persistent, Implicit): ...@@ -235,14 +287,16 @@ class UnIndex(Persistent, Implicit):
if callable(datum): if callable(datum):
datum = datum() datum = datum()
except AttributeError: except AttributeError:
datum = MV datum = _marker
# We don't want to do anything that we don't have to here, so we'll # We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same. # check to see if the new and existing information is the same.
oldDatum = self._unindex.get(documentId, MV) oldDatum = self._unindex.get(documentId, _marker)
if not datum == oldDatum: if datum != oldDatum:
if oldDatum is not MV: if oldDatum is not _marker:
self.removeForwardIndexEntry(oldDatum, documentId) self.removeForwardIndexEntry(oldDatum, documentId)
if datum is not _marker:
self.insertForwardIndexEntry(datum, documentId) self.insertForwardIndexEntry(datum, documentId)
self._unindex[documentId] = datum self._unindex[documentId] = datum
...@@ -250,21 +304,24 @@ class UnIndex(Persistent, Implicit): ...@@ -250,21 +304,24 @@ class UnIndex(Persistent, Implicit):
return returnStatus return returnStatus
def unindex_object(self, documentId): def unindex_object(self, documentId):
""" Unindex the object with integer id 'documentId' and don't """ Unindex the object with integer id 'documentId' and don't
raise an exception if we fail """ raise an exception if we fail """
unindexRecord = self._unindex.get(documentId, None) global _marker
if unindexRecord is None: unindexRecord = self._unindex.get(documentId, _marker)
if unindexRecord is _marker:
return None return None
self.removeForwardIndexEntry(unindexRecord, documentId) self.removeForwardIndexEntry(unindexRecord, documentId)
try:
del self._unindex[documentId] del self._unindex[documentId]
except:
LOG('UnIndex', ERROR, 'Attempt to unindex nonexistent document'
' with id %s' % documentId)
def _apply_index(self, request, cid='', type=type, None=None):
def _apply_index(self, request, cid=''):
"""Apply the index to query parameters given in the argument, """Apply the index to query parameters given in the argument,
request request
...@@ -301,6 +358,7 @@ class UnIndex(Persistent, Implicit): ...@@ -301,6 +358,7 @@ class UnIndex(Persistent, Implicit):
r = None r = None
anyTrue = 0 anyTrue = 0
opr = None opr = None
IntType=type(1)
if request.has_key(id+'_usage'): if request.has_key(id+'_usage'):
# see if any usage params are sent to field # see if any usage params are sent to field
...@@ -321,10 +379,7 @@ class UnIndex(Persistent, Implicit): ...@@ -321,10 +379,7 @@ class UnIndex(Persistent, Implicit):
setlist = index.items(lo) setlist = index.items(lo)
for k, set in setlist: for k, set in setlist:
if r is None: r = union(r, set)
r = set
else:
r = r.union(set)
except KeyError: except KeyError:
pass pass
...@@ -334,16 +389,18 @@ class UnIndex(Persistent, Implicit): ...@@ -334,16 +389,18 @@ class UnIndex(Persistent, Implicit):
for key in keys: for key in keys:
if nonEmpty(key): if nonEmpty(key):
anyTrue = 1 anyTrue = 1
set=get(key) set=get(key, None)
if set is not None: if set is not None:
if r is None: r = union(r, set)
r = set
else: if type(r) is IntType: r=IISet((r,))
r = r.union(set) if r:
return r, (id,)
if r is None: if r is None:
if anyTrue: if anyTrue:
r=intSet() r=IISet()
else: else:
return None return None
...@@ -369,8 +426,9 @@ class UnIndex(Persistent, Implicit): ...@@ -369,8 +426,9 @@ class UnIndex(Persistent, Implicit):
name = self.id name = self.id
elif name != self.id: elif name != self.id:
return [] return []
if not withLengths: return tuple( if not withLengths: return tuple(
filter(nonEmpty,self._index.keys()) filter(nonEmpty, self._index.keys())
) )
else: else:
rl=[] rl=[]
...@@ -379,10 +437,8 @@ class UnIndex(Persistent, Implicit): ...@@ -379,10 +437,8 @@ class UnIndex(Persistent, Implicit):
else: rl.append((i, len(self._index[i]))) else: rl.append((i, len(self._index[i])))
return tuple(rl) return tuple(rl)
def keyForDocument(self, id):
return self._unindex(id)
def clear(self): def items(self): return self._index.items()
self._index = BTree.BTree()
self._unindex = IOBTree.BTree()
...@@ -83,10 +83,10 @@ ...@@ -83,10 +83,10 @@
# #
############################################################################## ##############################################################################
from UnIndex import UnIndex, MV, intSet from UnIndex import UnIndex
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
from Missing import MV from types import StringType
from types import * from BTrees.OOBTree import OOSet, difference
class UnKeywordIndex(UnIndex): class UnKeywordIndex(UnIndex):
...@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex): ...@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex):
# self.id is the name of the index, which is also the name of the # self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable, # attribute we're interested in. If the attribute is callable,
# we'll do so. # we'll do so.
try: newKeywords = getattr(obj, self.id, None)
newKeywords = getattr(obj, self.id)
if callable(newKeywords): if callable(newKeywords):
newKeywords = newKeywords() newKeywords = newKeywords()
except AttributeError:
newKeywords = MV
if type(newKeywords) is StringType: if type(newKeywords) is StringType:
newKeywords = (newKeywords, ) newKeywords = (newKeywords, )
if newKeywords is None:
self.unindex_object(documentId)
return 0
# Now comes the fun part, we need to figure out what's changed # Now comes the fun part, we need to figure out what's changed
# if anything from the previous record. # if anything from the previous record.
oldKeywords = self._unindex.get(documentId, MV) oldKeywords = self._unindex.get(documentId, None)
if newKeywords is MV: if oldKeywords is None:
self.unindex_object(documentId)
return 0
elif oldKeywords is MV:
try: try:
for kw in newKeywords: for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId) self.insertForwardIndexEntry(kw, documentId)
except TypeError: except TypeError:
return 0 return 0
else: else:
# We need the old keywords to be a mapping so we can manipulate if type(oldKeywords) is not OOSet: oldKeywords=OOSet(oldKeywords)
# them more easily. newKeywords=OOSet(newKeywords)
tmp = {} self.unindex_objectKeywords(
try: documentId, difference(oldKeywords, newKeywords))
for kw in oldKeywords: for kw in difference(newKeywords, oldKeywords):
tmp[kw] = None
oldKeywords = tmp
# Now we're going to go through the new keywords,
# and add those that aren't already indexed. If
# they are already indexed, just delete them from
# the list.
for kw in newKeywords:
if oldKeywords.has_key(kw):
del oldKeywords[kw]
else:
self.insertForwardIndexEntry(kw, documentId) self.insertForwardIndexEntry(kw, documentId)
# Now whatever is left in oldKeywords are keywords self._unindex[documentId] = list(newKeywords)
# that we no longer have, and need to be removed
# from the indexes.
for kw in oldKeywords.keys():
self.removeForwardIndexEntry(kw, documentId)
except TypeError:
return 0
self._unindex[documentId] = newKeywords[:] # Make a copy
return 1 return 1
def unindex_object(self, documentId): def unindex_objectKeywords(self, documentId, keywords):
""" carefully unindex the object with integer id 'documentId'""" """ carefully unindex the object with integer id 'documentId'"""
keywords = self._unindex.get(documentId, MV) if keywords is not None:
if keywords is MV:
return None
for kw in keywords: for kw in keywords:
self.removeForwardIndexEntry(kw, documentId) self.removeForwardIndexEntry(kw, documentId)
def unindex_object(self, documentId):
""" carefully unindex the object with integer id 'documentId'"""
keywords = self._unindex.get(documentId, None)
self.unindex_objectKeywords(documentId, keywords)
try:
del self._unindex[documentId] del self._unindex[documentId]
except KeyError:
LOG('UnKeywordIndex', ERROR, 'Attempt to unindex nonexistent'
' document id %s' % documentId)
...@@ -91,14 +91,11 @@ undo information so that objects can be unindexed when the old value ...@@ -91,14 +91,11 @@ undo information so that objects can be unindexed when the old value
is no longer known. is no longer known.
""" """
__version__ = '$Revision: 1.37 $'[11:-2] __version__ = '$Revision: 1.38 $'[11:-2]
import BTree, IIBTree, IOBTree, OIBTree
import string, regex, regsub, ts_regex import string, regex, regsub, ts_regex
import operator import operator
from intSet import intSet
from Globals import Persistent from Globals import Persistent
from Acquisition import Implicit from Acquisition import Implicit
from Splitter import Splitter from Splitter import Splitter
...@@ -107,10 +104,11 @@ from Lexicon import Lexicon ...@@ -107,10 +104,11 @@ from Lexicon import Lexicon
from ResultList import ResultList from ResultList import ResultList
from types import * from types import *
BTree = BTree.BTree # Regular generic BTree from BTrees.IOBTree import IOBTree
IOBTree = IOBTree.BTree # Integer -> Object from BTrees.OIBTree import OIBTree
IIBucket = IIBTree.Bucket # Integer -> Integer from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet
OIBTree = OIBTree.BTree # Object -> Integer from BTrees.IIBTree import difference, weightedIntersection
AndNot = 'andnot' AndNot = 'andnot'
And = 'and' And = 'and'
...@@ -141,7 +139,7 @@ class UnTextIndex(Persistent, Implicit): ...@@ -141,7 +139,7 @@ class UnTextIndex(Persistent, Implicit):
meta_type = 'Text Index' meta_type = 'Text Index'
def __init__(self, id=None, ignore_ex=None, def __init__(self, id, ignore_ex=None,
call_methods=None, lexicon=None): call_methods=None, lexicon=None):
"""Create an index """Create an index
...@@ -159,15 +157,11 @@ class UnTextIndex(Persistent, Implicit): ...@@ -159,15 +157,11 @@ class UnTextIndex(Persistent, Implicit):
'lexicon' is the lexicon object to specify, if None, the 'lexicon' is the lexicon object to specify, if None, the
index will use a private lexicon.""" index will use a private lexicon."""
if not id == ignore_ex == call_methods == None:
self.id = id self.id = id
self.ignore_ex = ignore_ex self.ignore_ex = ignore_ex
self.call_methods = call_methods self.call_methods = call_methods
self._index = IOBTree()
self._unindex = IOBTree()
else: self.clear()
pass
if lexicon is None: if lexicon is None:
## if no lexicon is provided, create a default one ## if no lexicon is provided, create a default one
...@@ -185,32 +179,55 @@ class UnTextIndex(Persistent, Implicit): ...@@ -185,32 +179,55 @@ class UnTextIndex(Persistent, Implicit):
in this way, but I don't see too much of a problem with it.""" in this way, but I don't see too much of a problem with it."""
if type(vocab_id) is not StringType: if type(vocab_id) is not StringType:
vocab = vocab_id return vocab_id
else: else:
vocab = getattr(self, vocab_id) vocab = getattr(self, vocab_id)
return vocab.lexicon return vocab.lexicon
def __nonzero__(self):
return not not self._unindex
def __len__(self): # Too expensive
"""Return the number of objects indexed.""" #def __len__(self):
# """Return the number of objects indexed."""
return len(self._unindex) # return len(self._unindex)
def clear(self): def clear(self):
"""Reinitialize the text index.""" """Reinitialize the text index."""
self._index = IOBTree() self._index = IOBTree()
self._unindex = IOBTree() self._unindex = IOBTree()
def _convertBTrees(self, threshold=200):
if type(self._index) is IOBTree: return
from BTrees.convert import convert
_index=self._index
self._index=IOBTree()
def histogram(self): def convertScores(scores,
type=type, TupleType=TupleType, IIBTree=IIBTree
):
if type(scores) is not TupleType and type(scores) is not IIBTree():
scores=IIBTree(scores)
return scores
convert(_index, self._index, threshold, convertScores)
_unindex=self._unindex
self._unindex=IOBTree()
convert(_unindex, self._unindex, threshold)
def histogram(self, type=type, TupleType=type(())):
"""Return a mapping which provides a histogram of the number of """Return a mapping which provides a histogram of the number of
elements found at each point in the index.""" elements found at each point in the index."""
histogram = {} histogram = IIBucket()
for (key, value) in self._index.items(): for (key, value) in self._index.items():
entry = len(value) if type(value) is TupleType: entry=1
else: entry = len(value)
histogram[entry] = histogram.get(entry, 0) + 1 histogram[entry] = histogram.get(entry, 0) + 1
return histogram return histogram
...@@ -227,13 +244,8 @@ class UnTextIndex(Persistent, Implicit): ...@@ -227,13 +244,8 @@ class UnTextIndex(Persistent, Implicit):
if results is None: if results is None:
return default return default
else: else:
# Now that we've got them, let's resolve out the word return tuple(map(self.getLexicon(self._lexicon).getWord,
# references results))
resolved = []
for (word, wordId) in wordMap:
if wordId in results:
resolved.append(word)
return tuple(resolved)
def insertForwardIndexEntry(self, entry, documentId, score=1): def insertForwardIndexEntry(self, entry, documentId, score=1):
...@@ -247,7 +259,8 @@ class UnTextIndex(Persistent, Implicit): ...@@ -247,7 +259,8 @@ class UnTextIndex(Persistent, Implicit):
5+ bucket. 5+ bucket.
""" """
indexRow = self._index.get(entry, None) index=self._index
indexRow = index.get(entry, None)
if indexRow is not None: if indexRow is not None:
if type(indexRow) is TupleType: if type(indexRow) is TupleType:
...@@ -260,78 +273,29 @@ class UnTextIndex(Persistent, Implicit): ...@@ -260,78 +273,29 @@ class UnTextIndex(Persistent, Implicit):
if indexRow[0] == documentId: if indexRow[0] == documentId:
if indexRow[1] != score: if indexRow[1] != score:
indexRow = (documentId, score) indexRow = (documentId, score)
index[entry] = indexRow
else: else:
indexRow = { indexRow[0]: indexRow[1] } indexRow={
indexRow[documentId] = score indexRow[0]: indexRow[1],
self._index[entry] = indexRow documentId: score,
elif type(indexRow) is DictType: }
if indexRow.has_key(documentId): index[entry] = indexRow
if indexRow[documentId] == score:
return 1 # No need to update
elif len(indexRow) > 4:
# We have a mapping (dictionary), but it has
# grown too large, so we'll convert it to a
# bucket.
newRow = IIBucket()
for (k, v) in indexRow.items():
newRow[k] = v
indexRow = newRow
indexRow[documentId] = score
self._index[entry] = indexRow
else: else:
if indexRow.get(documentId, -1) != score:
# score changed (or new entry)
if type(indexRow) is DictType:
indexRow[documentId] = score indexRow[documentId] = score
if len(indexRow) > 3:
# Big enough to give it's own database record
indexRow=IIBTree(indexRow)
index[entry] = indexRow
else: else:
# We've got a IIBucket already.
if indexRow.has_key(documentId):
if indexRow[documentId] == score:
return 1
indexRow[documentId] = score indexRow[documentId] = score
else: else:
# We don't have any information at this point, so we'll # We don't have any information at this point, so we'll
# put our first entry in, and use a tuple to save space # put our first entry in, and use a tuple to save space
self._index[entry] = (documentId, score) index[entry] = (documentId, score)
return 1
def insertReverseIndexEntry(self, entry, documentId):
"""Insert the correct entry into the reverse indexes for future
unindexing."""
newRow = self._unindex.get(documentId, [])
if newRow:
# Catch cases where we don't need to modify anything
if entry in newRow:
return 1
newRow.append(entry)
self._unindex[documentId] = newRow
def removeReverseEntry(self, entry, documentId):
"""Removes a single entry from the reverse index."""
newRow = self._unindex.get(documentId, [])
if newRow:
try:
newRow.remove(entry)
except ValueError:
pass # We don't have it, this is bad
self._unindex[documentId] = newRow
def removeForwardEntry(self, entry, documentId):
"""Remove a single entry from the forward index."""
currentRow = self._index.get(entry, None)
if type(currentRow) is TupleType:
del self._index[entry]
elif currentRow is not None:
try:
del self._index[entry][documentId]
except (KeyError, IndexError, TypeError):
LOG('UnTextIndex', ERROR,
'unindex_object tried to unindex nonexistent'
' document %s' % str(documentId))
def index_object(self, documentId, obj, threshold=None): def index_object(self, documentId, obj, threshold=None):
""" Index an object: """ Index an object:
...@@ -354,49 +318,58 @@ class UnTextIndex(Persistent, Implicit): ...@@ -354,49 +318,58 @@ class UnTextIndex(Persistent, Implicit):
except AttributeError: except AttributeError:
return 0 return 0
lexicon = self.getLexicon(self._lexicon)
splitter=lexicon.Splitter
sourceWords = self.getLexicon(self._lexicon).Splitter(source) wordScores = OIBTree()
wordList = OIBTree()
last = None last = None
# Run through the words and score them # Run through the words and score them
for word in sourceWords: for word in splitter(source):
if word[0] == '\"': if word[0] == '\"':
last = self.subindex(word[1:-1], wordList, last = self._subindex(word[1:-1], wordScores, last, splitter)
wordList.has_key, last) # XXX
else:
if wordList.has_key(word):
if word != last:
wordList[word] = wordList[word]+1
else: else:
wordList[word] = 1 if word==last: continue
last=word
wordScores[word]=wordScores.get(word,0)+1
lexicon = self.getLexicon(self._lexicon) # Convert scores to use wids:
currentWordIds = self._unindex.get(documentId, []) widScores=IIBucket()
wordCount = 0 getWid=lexicon.getWordId
for word, score in wordScores.items():
widScores[getWid(word)]=score
del wordScores
currentWids=IISet(self._unindex.get(documentId, []))
# First deal with deleted words # Get rid of document words that are no longer indexed
# To do this, the first thing we have to do is convert the self.unindex_objectWids(documentId, difference(currentWids, widScores))
# existing words to words, from wordIDS
wordListAsIds = OIBTree()
for word, score in wordList.items():
wordListAsIds[lexicon.getWordId(word)] = score
for word in currentWordIds: # Now index the words. Note that the new xIBTrees are clever
if not wordListAsIds.has_key(word): # enough to do nothing when there isn't a change. Woo hoo.
self.removeForwardEntry(word, documentId) insert=self.insertForwardIndexEntry
for wid, score in widScores.items():
insert(wid, documentId, score)
#import pdb; pdb.set_trace() # Save the unindexing info if it's changed:
# Now we can deal with new/updated entries wids=widScores.keys()
for wordId, score in wordListAsIds.items(): if wids != currentWids.keys():
self.insertForwardIndexEntry(wordId, documentId, score) self._unindex[documentId]=wids
self.insertReverseIndexEntry(wordId, documentId)
wordCount = wordCount + 1
# Return the number of words you indexed return len(wids)
return wordCount
def _subindex(self, source, wordScores, last, splitter):
"""Recursively handle multi-word synonyms"""
for word in splitter(source):
if word[0] == '\"':
last = self._subindex(word[1:-1], wordScores, last, splitter)
else:
if word==last: continue
last=word
wordScores[word]=wordScores.get(word,0)+1
return last
def unindex_object(self, i): def unindex_object(self, i):
""" carefully unindex document with integer id 'i' from the text """ carefully unindex document with integer id 'i' from the text
...@@ -404,21 +377,41 @@ class UnTextIndex(Persistent, Implicit): ...@@ -404,21 +377,41 @@ class UnTextIndex(Persistent, Implicit):
index = self._index index = self._index
unindex = self._unindex unindex = self._unindex
val = unindex.get(i, None) wids = unindex.get(i, None)
if val is not None: if wids is not None:
for n in val: self.unindex_objectWids(i, wids)
v = index.get(n, None) del unindex[i]
if type(v) is TupleType:
del index[n] def unindex_objectWids(self, i, wids):
elif v is not None: """ carefully unindex document with integer id 'i' from the text
index and do not fail if it does not exist """
index = self._index
get=index.get
for wid in wids:
widScores = get(wid, None)
if widScores is None:
LOG('UnTextIndex', ERROR,
'unindex_object tried to unindex nonexistent'
' document, wid %s, %s' % (i,wid))
continue
if type(widScores) is TupleType:
del index[wid]
else:
try: try:
del index[n][i] del widScores[i]
if widScores:
if type(widScores) is DictType:
if len(widScores) == 1:
# convert to tuple
widScores = widScores.items()[0]
index[wid]=widScores
else:
del index[wid]
except (KeyError, IndexError, TypeError): except (KeyError, IndexError, TypeError):
LOG('UnTextIndex', ERROR, LOG('UnTextIndex', ERROR,
'unindex_object tried to unindex nonexistent' 'unindex_object tried to unindex nonexistent'
' document %s' % str(i)) ' document %s' % str(i))
del unindex[i]
def __getitem__(self, word): def __getitem__(self, word):
"""Return an InvertedIndex-style result "list" """Return an InvertedIndex-style result "list"
...@@ -442,12 +435,13 @@ class UnTextIndex(Persistent, Implicit): ...@@ -442,12 +435,13 @@ class UnTextIndex(Persistent, Implicit):
if splitSource[:1] == '"' and splitSource[-1:] == '"': if splitSource[:1] == '"' and splitSource[-1:] == '"':
return self[splitSource] return self[splitSource]
r = self._index.get( wids=self.getLexicon(self._lexicon).get(splitSource)
self.getLexicon(self._lexicon).get(splitSource)[0], if wids:
None) r = self._index.get(wids[0], None)
if r is None: if r is None:
r = {} r = {}
else:
r={}
return ResultList(r, (splitSource,), self) return ResultList(r, (splitSource,), self)
...@@ -486,6 +480,7 @@ class UnTextIndex(Persistent, Implicit): ...@@ -486,6 +480,7 @@ class UnTextIndex(Persistent, Implicit):
if not keys or not string.strip(keys): if not keys or not string.strip(keys):
return None return None
keys = [keys] keys = [keys]
r = None r = None
for key in keys: for key in keys:
...@@ -493,21 +488,12 @@ class UnTextIndex(Persistent, Implicit): ...@@ -493,21 +488,12 @@ class UnTextIndex(Persistent, Implicit):
if not key: if not key:
continue continue
rr = IIBucket() b = self.query(key).bucket()
try: w, r = weightedIntersection(r, b)
for i, score in self.query(key).items():
if score:
rr[i] = score
except KeyError:
pass
if r is None:
r = rr
else:
# Note that we *and*/*narrow* multiple search terms.
r = r.intersection(rr)
if r is not None: if r is not None:
return r, (self.id,) return r, (self.id,)
return (IIBucket(), (self.id,)) return (IIBucket(), (self.id,))
...@@ -533,19 +519,6 @@ class UnTextIndex(Persistent, Implicit): ...@@ -533,19 +519,6 @@ class UnTextIndex(Persistent, Implicit):
return r return r
def _subindex(self, isrc, d, old, last):
src = self.getLexicon(self._lexicon).Splitter(isrc)
for s in src:
if s[0] == '\"':
last = self.subindex(s[1:-1],d,old,last)
else:
if old(s):
if s != last: d[s] = d[s]+1
else: d[s] = 1
return last
def query(self, s, default_operator=Or, ws=(string.whitespace,)): def query(self, s, default_operator=Or, ws=(string.whitespace,)):
""" This is called by TextIndexes. A 'query term' which is a """ This is called by TextIndexes. A 'query term' which is a
...@@ -565,7 +538,6 @@ class UnTextIndex(Persistent, Implicit): ...@@ -565,7 +538,6 @@ class UnTextIndex(Persistent, Implicit):
## For example, substitute wildcards, or translate words into ## For example, substitute wildcards, or translate words into
## various languages. ## various languages.
q = self.getLexicon(self._lexicon).query_hook(q) q = self.getLexicon(self._lexicon).query_hook(q)
# do some more parsing # do some more parsing
q = parse2(q, default_operator) q = parse2(q, default_operator)
......
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
#############################################################################
import whrandom
def randid(randint=whrandom.randint, choice=whrandom.choice, signs=(-1,1)):
return choice(signs)*randint(1,2000000000)
del whrandom
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import sys
try: import ZODB
except:
import os
sys.path.insert(0, os.getcwd())
sys.path.insert(0, '../..')
import ZODB
import unittest
from SearchIndex.Splitter import Splitter
class TestSplitter(unittest.TestCase):
def testSplitNormalText(self):
text = 'this is a long string of words'
a = Splitter(text)
r = map(None, a)
assert r == ['this', 'is', 'long', 'string', 'of', 'words']
def testDropNumeric(self):
text = '123 456 789 foobar without you nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['foobar', 'without', 'you', 'nothing'], r
def testDropSingleLetterWords(self):
text = 'without you I nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['without', 'you', 'nothing'], r
def testSplitOnNonAlpha(self):
text = 'without you I\'m nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['without', 'you', 'nothing'], r
def test_suite():
return unittest.makeSuite(TestSplitter, 'test')
def main():
unittest.TextTestRunner().run(test_suite())
def debug():
test_suite().debug()
def pdebug():
import pdb
pdb.run('debug()')
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import os, sys
sys.path.insert(0, os.getcwd())
try: import unittest
except:
sys.path[0]=os.path.join(sys.path[0],'..','..')
import unittest
import ZODB
from SearchIndex.UnKeywordIndex import UnKeywordIndex
class Dummy:
def __init__( self, foo ):
self._foo = foo
def foo( self ):
return self._foo
def __str__( self ):
return '<Dummy: %s>' % self._foo
__repr__ = __str__
class TestCase( unittest.TestCase ):
"""
Test KeywordIndex objects.
"""
def setUp( self ):
"""
"""
self._index = UnKeywordIndex( 'foo' )
self._marker = []
self._values = [ ( 0, Dummy( ['a'] ) )
, ( 1, Dummy( ['a','b'] ) )
, ( 2, Dummy( ['a','b','c'] ) )
, ( 3, Dummy( ['a','b','c', 'a'] ) )
, ( 4, Dummy( ['a', 'b', 'c', 'd'] ) )
, ( 5, Dummy( ['a', 'b', 'c', 'e'] ) )
, ( 6, Dummy( ['a', 'b', 'c', 'e', 'f'] ))
, ( 7, Dummy( [0] ) )
]
self._noop_req = { 'bar': 123 }
self._all_req = { 'foo': ['a'] }
self._some_req = { 'foo': ['e'] }
self._overlap_req = { 'foo': ['c', 'e'] }
self._string_req = {'foo': 'a'}
self._zero_req = { 'foo': [0] }
def tearDown( self ):
"""
"""
def _populateIndex( self ):
for k, v in self._values:
self._index.index_object( k, v )
def _checkApply( self, req, expectedValues ):
result, used = self._index._apply_index( req )
assert used == ( 'foo', )
try:
length = len(result)
except:
result = result.keys()
length = len(result)
assert length == len( expectedValues ), \
'%s | %s' % ( map( None, result ),
map(lambda x: x[0], expectedValues ))
for k, v in expectedValues:
assert k in result
def testEmpty( self ):
assert len( self._index ) == 0
assert len( self._index.referencedObjects() ) == 0
assert self._index.getEntryForObject( 1234 ) is None
assert ( self._index.getEntryForObject( 1234, self._marker )
is self._marker ), self._index.getEntryForObject(1234)
self._index.unindex_object( 1234 ) # nothrow
assert self._index.hasUniqueValuesFor( 'foo' )
assert not self._index.hasUniqueValuesFor( 'bar' )
assert len( self._index.uniqueValues( 'foo' ) ) == 0
assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._all_req, [] )
self._checkApply( self._some_req, [] )
self._checkApply( self._overlap_req, [] )
self._checkApply( self._string_req, [] )
def testPopulated( self ):
self._populateIndex()
values = self._values
#assert len( self._index ) == len( values )
assert len( self._index.referencedObjects() ) == len( values )
assert self._index.getEntryForObject( 1234 ) is None
assert ( self._index.getEntryForObject( 1234, self._marker )
is self._marker )
self._index.unindex_object( 1234 ) # nothrow
for k, v in values:
assert self._index.getEntryForObject( k ) == v.foo()
assert (len( self._index.uniqueValues( 'foo' ) ) == len( values )-1,
len(values)-1)
assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._all_req, values[:-1])
self._checkApply( self._some_req, values[ 5:7 ] )
self._checkApply( self._overlap_req, values[2:7] )
self._checkApply( self._string_req, values[:-1] )
def testZero( self ):
self._populateIndex()
values = self._values
self._checkApply( self._zero_req, values[ -1: ] )
assert 0 in self._index.uniqueValues( 'foo' )
def test_suite():
return unittest.makeSuite( TestCase )
if __name__ == '__main__':
unittest.TextTestRunner().run( test_suite() )
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import sys, os
sys.path.insert(0, os.getcwd())
try: import unittest
except:
sys.path[0]=os.path.join(sys.path[0],'..','..')
import unittest
class Dummy:
def __init__(self, **kw):
self.__dict__.update(kw)
import zLOG
def log_write(subsystem, severity, summary, detail, error):
if severity >= zLOG.PROBLEM:
assert 0, "%s(%s): %s" % (subsystem, severity, summary)
zLOG.log_write=log_write
import ZODB, ZODB.DemoStorage, ZODB.FileStorage
import SearchIndex.UnTextIndex
import SearchIndex.GlobbingLexicon
class Tests(unittest.TestCase):
def setUp(self):
self.index=SearchIndex.UnTextIndex.UnTextIndex('text')
self.doc=Dummy(text='this is the time, when all good zopes')
def dbopen(self):
n = 'fs_tmp__%s' % os.getpid()
s = ZODB.FileStorage.FileStorage(n)
db=self.db=ZODB.DB(s)
self.jar=db.open()
if not self.jar.root().has_key('index'):
self.jar.root()['index']=SearchIndex.UnTextIndex.UnTextIndex('text')
get_transaction().commit()
return self.jar.root()['index']
def dbclose(self):
self.jar.close()
self.db.close()
del self.jar
del self.db
def tearDown(self):
get_transaction().abort()
if hasattr(self, 'jar'):
self.dbclose()
os.system('rm -f fs_tmp__*')
def checkSimpleAddDelete(self):
"Check that we can add and delete an object without error"
self.index.index_object(0, self.doc)
self.index.index_object(1, self.doc)
self.doc.text='spam is good, spam is fine, span span span'
self.index.index_object(0, self.doc)
self.index.unindex_object(0)
def checkPersistentUpdate1(self):
"Check simple persistent indexing"
index=self.dbopen()
self.doc.text='this is the time, when all good zopes'
index.index_object(0, self.doc)
get_transaction().commit()
self.doc.text='time waits for no one'
index.index_object(1, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({})
assert r==None
r = index._apply_index({'text': 'python'})
assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
assert not r[0], "should have no results"
r = index._apply_index({'text': 'time'})
r=list(r[0].keys())
assert r == [0,1], r
def checkPersistentUpdate2(self):
"Check less simple persistent indexing"
index=self.dbopen()
self.doc.text='this is the time, when all good zopes'
index.index_object(0, self.doc)
get_transaction().commit()
self.doc.text='time waits for no one'
index.index_object(1, self.doc)
get_transaction().commit()
self.doc.text='the next task is to test'
index.index_object(3, self.doc)
get_transaction().commit()
self.doc.text='time time'
index.index_object(2, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({})
assert r==None
r = index._apply_index({'text': 'python'})
assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
assert not r[0], "should have no results"
r = index._apply_index({'text': 'time'})
r=list(r[0].keys())
assert r == [0,1,2], r
sample_texts = [
"""This is the time for all good men to come to
the aid of their country""",
"""ask not what your country can do for you,
ask what you can do for your country""",
"""Man, I can't wait to get to Montross!""",
"""Zope Public License (ZPL) Version 1.0""",
"""Copyright (c) Digital Creations. All rights reserved.""",
"""This license has been certified as Open Source(tm).""",
"""I hope I get to work on time""",
]
def checkGlobQuery(self):
"Check a glob query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'m*n'})
r=list(r[0].keys())
assert r == [0,2], r
def checkAndQuery(self):
"Check an AND query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'time and country'})
r=list(r[0].keys())
assert r == [0,], r
def checkOrQuery(self):
"Check an OR query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'time or country'})
r=list(r[0].keys())
assert r == [0,1,6], r
def test_suite():
return unittest.makeSuite(Tests, 'check')
def main():
unittest.TextTestRunner().run(test_suite())
def debug():
test_suite().debug()
def pdebug():
import pdb
pdb.run('debug()')
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
...@@ -82,7 +82,16 @@ ...@@ -82,7 +82,16 @@
# attributions are listed in the accompanying credits file. # attributions are listed in the accompanying credits file.
# #
############################################################################## ##############################################################################
import Zope
import sys
sys.path.insert(0, '.')
try:
import Testing
except ImportError:
sys.path[0] = '../../'
import Testing
import ZODB
import unittest import unittest
from SearchIndex.UnIndex import UnIndex from SearchIndex.UnIndex import UnIndex
...@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ): ...@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ):
, ( 5, Dummy( 'abce' ) ) , ( 5, Dummy( 'abce' ) )
, ( 6, Dummy( 'abce' ) ) , ( 6, Dummy( 'abce' ) )
, ( 7, Dummy( 0 ) ) # Collector #1959 , ( 7, Dummy( 0 ) ) # Collector #1959
] , ( 8, Dummy(None) )]
self._forward = {} self._forward = {}
self._backward = {} self._backward = {}
for k, v in self._values: for k, v in self._values:
...@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ): ...@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ):
, 'foo_usage': 'range:min:max' , 'foo_usage': 'range:min:max'
} }
self._zero_req = { 'foo': 0 } self._zero_req = { 'foo': 0 }
self._none_req = { 'foo': None }
def tearDown( self ): def tearDown( self ):
...@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ): ...@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ):
def _checkApply( self, req, expectedValues ): def _checkApply( self, req, expectedValues ):
result, used = self._index._apply_index( req ) result, used = self._index._apply_index( req )
if hasattr(result, 'keys'):
result = result.keys()
assert used == ( 'foo', ) assert used == ( 'foo', )
assert len( result ) == len( expectedValues ), \ assert len( result ) == len( expectedValues ), \
'%s | %s' % ( map( None, result ), expectedValues ) '%s | %s' % ( map( None, result ), expectedValues )
...@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ): ...@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ):
self._checkApply( self._range_req, [] ) self._checkApply( self._range_req, [] )
def testPopulated( self ): def testPopulated( self ):
""" Test a populated FieldIndex """
self._populateIndex() self._populateIndex()
values = self._values values = self._values
assert len( self._index ) == len( values ) assert len( self._index ) == len( values )-1 #'abce' is duplicate
assert len( self._index.referencedObjects() ) == len( values ) assert len( self._index.referencedObjects() ) == len( values )
assert self._index.getEntryForObject( 1234 ) is None assert self._index.getEntryForObject( 1234 ) is None
...@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ): ...@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ):
assert self._index._apply_index( self._noop_req ) is None assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._request, values[ -3:-1 ] ) self._checkApply( self._request, values[ -4:-2 ] )
self._checkApply( self._min_req, values[ 2:-1 ] ) self._checkApply( self._min_req, values[ 2:-2 ] )
self._checkApply( self._max_req, values[ :3 ] + values[ -1: ] ) self._checkApply( self._max_req, values[ :3 ] + values[ -2: ] )
self._checkApply( self._range_req, values[ 2:5 ] ) self._checkApply( self._range_req, values[ 2:5 ] )
def testZero( self ): def testZero( self ):
""" Make sure 0 gets indexed """
self._populateIndex() self._populateIndex()
values = self._values values = self._values
self._checkApply( self._zero_req, values[ -1: ] ) self._checkApply( self._zero_req, values[ -2:-1 ] )
assert 0 in self._index.uniqueValues( 'foo' ) assert 0 in self._index.uniqueValues( 'foo' )
def testNone(self):
""" make sure None gets indexed """
self._populateIndex()
values = self._values
self._checkApply(self._none_req, values[-1:])
assert None in self._index.uniqueValues('foo')
def testRange(self):
"""Test a range search"""
index = UnIndex( 'foo' )
for i in range(100):
index.index_object(i, Dummy(i%10))
r=index._apply_index({
'foo_usage': 'range:min:max',
'foo': [-99, 3]})
assert tuple(r[1])==('foo',), r[1]
r=list(r[0].keys())
expect=[
0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33,
40, 41, 42, 43, 50, 51, 52, 53, 60, 61, 62, 63, 70, 71, 72, 73,
80, 81, 82, 83, 90, 91, 92, 93
]
assert r==expect, r
def test_suite(): def test_suite():
return unittest.makeSuite( TestCase ) return unittest.makeSuite( TestCase )
def debug():
return test_suite().debug()
if __name__ == '__main__': def pdebug():
import pdb
pdb.run('debug()')
def main():
unittest.TextTestRunner().run( test_suite() ) unittest.TextTestRunner().run( test_suite() )
if __name__ == '__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Set up testing environment
$Id: __init__.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import os, sys
startfrom = head = os.getcwd()
while 1:
sys.path[0]=startfrom
try:
import ZODB
except ImportError:
head = os.path.split(startfrom)[0]
if head == '':
raise "Couldn't import ZODB"
startfrom = head
continue
else:
break
os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', startfrom)
os.environ['INSTANCE_HOME']=os.environ.get(
'INSTANCE_HOME',
os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
)
#!/usr/bin/env python1.5
# Dispatcher for usage inside Zope test environment
# Digital Creations
__version__ = '$Id: dispatcher.py,v 1.2 2001/03/15 13:16:25 jim Exp $'
import os,sys,re,string
import threading,time,commands,profile
class Dispatcher:
"""
a multi-purpose thread dispatcher
"""
def __init__(self,func=''):
self.fp = sys.stderr
self.f_startup = []
self.f_teardown = []
self.lastlog = ""
self.lock = threading.Lock()
self.func = func
self.profiling = 0
self.doc = getattr(self,self.func).__doc__
def setlog(self,fp):
self.fp = fp
def log(self,s):
if s==self.lastlog: return
self.fp.write(s)
self.fp.flush()
self.lastlog=s
def logn(self,s):
if s==self.lastlog: return
self.fp.write(s + '\n')
self.fp.flush()
self.lastlog=s
def profiling_on():
self.profiling = 1
def profiling_off():
self.profiling = 0
def dispatcher(self,name='', *params):
""" dispatcher for threads
The dispatcher expects one or several tupels:
(functionname, number of threads to start , args, keyword args)
"""
self.mem_usage = [-1]
mem_watcher = threading.Thread(None,self.mem_watcher,name='memwatcher')
mem_watcher.start()
self.start_test = time.time()
self.name = name
self.th_data = {}
self.runtime = {}
self._threads = []
s2s=self.s2s
for func,numthreads,args,kw in params:
f = getattr(self,func)
for i in range(0,numthreads):
kw['t_func'] = func
th = threading.Thread(None,self.worker,name="TH_%s_%03d" % (func,i) ,args=args,kwargs=kw)
self._threads.append(th)
for th in self._threads: th.start()
while threading.activeCount() > 1: time.sleep(1)
self.logn('ID: %s ' % self.name)
self.logn('FUNC: %s ' % self.func)
self.logn('DOC: %s ' % self.doc)
self.logn('Args: %s' % params)
for th in self._threads:
self.logn( '%-30s ........................ %9.3f sec' % (th.getName(), self.runtime[th.getName()]) )
for k,v in self.th_data[th.getName()].items():
self.logn ('%-30s %-15s = %s' % (' ',k,v) )
self.logn("")
self.logn('Complete running time: %9.3f sec' % (time.time()-self.start_test) )
if len(self.mem_usage)>1: self.mem_usage.remove(-1)
self.logn( "Memory: start: %s, end: %s, low: %s, high: %s" % \
(s2s(self.mem_usage[0]),s2s(self.mem_usage[-1]),s2s(min(self.mem_usage)), s2s(max(self.mem_usage))))
self.logn('')
def worker(self,*args,**kw):
for func in self.f_startup: f = getattr(self,func)()
t_func = getattr(self,kw['t_func'])
del kw['t_func']
ts = time.time()
apply(t_func,args,kw)
te = time.time()
for func in self.f_teardown: getattr(self,func)()
def th_setup(self):
""" initalize thread with some environment data """
env = {'start': time.time()
}
return env
def th_teardown(self,env,**kw):
""" famous last actions of thread """
self.lock.acquire()
self.th_data[ threading.currentThread().getName() ] = kw
self.runtime [ threading.currentThread().getName() ] = time.time() - env['start']
self.lock.release()
def getmem(self):
""" try to determine the current memory usage """
if not sys.platform in ['linux2']: return None
cmd = '/bin/ps --no-headers -o pid,vsize --pid %s' % os.getpid()
outp = commands.getoutput(cmd)
pid,vsize = filter(lambda x: x!="" , string.split(outp," ") )
data = open("/proc/%d/statm" % os.getpid()).read()
fields = re.split(" ",data)
mem = string.atoi(fields[0]) * 4096
return mem
def mem_watcher(self):
""" thread for watching memory usage """
running = 1
while running ==1:
self.mem_usage.append( self.getmem() )
time.sleep(1)
if threading.activeCount() == 2: running = 0
def register_startup(self,func):
self.f_startup.append(func)
def register_teardown(self,func):
self.f_teardown.append(func)
def s2s(self,n):
import math
if n <1024.0: return "%8.3lf Bytes" % n
if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
else: return n
if __name__=="__main__":
d=Dispatcher()
print d.getmem()
pass
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Facilitates unit tests which requires an acquirable REQUEST from
ZODB objects
Usage:
import makerequest
app = makerequest.makerequest(Zope.app())
$Id: makerequest.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import os
from os import environ
from sys import stdin
from ZPublisher.HTTPRequest import HTTPRequest
from ZPublisher.HTTPResponse import HTTPResponse
from ZPublisher.BaseRequest import RequestContainer
def makerequest(app):
resp = HTTPResponse()
environ['SERVER_NAME']='foo'
environ['SERVER_PORT']='80'
environ['REQUEST_METHOD'] = 'GET'
req = HTTPRequest(stdin, environ, resp)
return app.__of__(RequestContainer(REQUEST = req))
#!/usr/bin/env python
"""
Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's
Smalltalk testing framework.
Further information is available in the bundled documentation, and from
http://pyunit.sourceforge.net/
This module contains the core framework classes that form the basis of
specific test cases and suites (TestCase, TestSuite etc.), and also a
text-based utility class for running the tests and reporting the results
(TextTestRunner).
Copyright (c) 1999, 2000, 2001 Steve Purcell
This module is free software, and you may redistribute it and/or modify
it under the same terms as Python itself, so long as this copyright message
and disclaimer are retained in their original form.
IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
__author__ = "Steve Purcell"
__email__ = "stephen_purcell@yahoo.com"
__version__ = "$Revision: 1.2 $"[11:-2]
import time
import sys
import traceback
import string
import os
##############################################################################
# A platform-specific concession to help the code work for JPython users
##############################################################################
plat = string.lower(sys.platform)
_isJPython = string.find(plat, 'java') >= 0 or string.find(plat, 'jdk') >= 0
del plat
##############################################################################
# Test framework core
##############################################################################
class TestResult:
"""Holder for test result information.
Test results are automatically managed by the TestCase and TestSuite
classes, and do not need to be explicitly manipulated by writers of tests.
Each instance holds the total number of tests run, and collections of
failures and errors that occurred among those test runs. The collections
contain tuples of (testcase, exceptioninfo), where exceptioninfo is a
tuple of values as returned by sys.exc_info().
"""
def __init__(self,args=(),kw={}):
self.failures = []
self.errors = []
self.testsRun = 0
self.shouldStop = 0
self.__args = args
self.__kw = kw
def startTest(self, test):
"Called when the given test is about to be run"
self.testsRun = self.testsRun + 1
def stopTest(self, test):
"Called when the given test has been run"
pass
def addError(self, test, err):
"Called when an error has occurred"
self.errors.append((test, err))
def addFailure(self, test, err):
"Called when a failure has occurred"
self.failures.append((test, err))
def wasSuccessful(self):
"Tells whether or not this result was a success"
return len(self.failures) == len(self.errors) == 0
def stop(self):
"Indicates that the tests should be aborted"
self.shouldStop = 1
def __repr__(self):
return "<%s run=%i errors=%i failures=%i>" % \
(self.__class__, self.testsRun, len(self.errors),
len(self.failures))
class TestCase:
"""A class whose instances are single test cases.
Test authors should subclass TestCase for their own tests. Construction
and deconstruction of the test's environment ('fixture') can be
implemented by overriding the 'setUp' and 'tearDown' methods respectively.
By default, the test code itself should be placed in a method named
'runTest'.
If the fixture may be used for many test cases, create as
many test methods as are needed. When instantiating such a TestCase
subclass, specify in the constructor arguments the name of the test method
that the instance is to execute.
If it is necessary to override the __init__ method, the base class
__init__ method must always be called.
"""
def __init__(self, methodName='runTest',*args,**kw):
"""Create an instance of the class that will use the named test
method when executed. Raises a ValueError if the instance does
not have a method with the specified name.
"""
try:
self.__testMethodName = methodName
testMethod = getattr(self, methodName)
self.__testMethodDoc = testMethod.__doc__
except AttributeError:
raise ValueError, "no such test method in %s: %s" % \
(self.__class__, methodName)
self.__args = args
self.__kw = kw
def setUp(self):
"Hook method for setting up the test fixture before exercising it."
pass
def tearDown(self):
"Hook method for deconstructing the test fixture after testing it."
pass
def countTestCases(self):
return 1
def defaultTestResult(self):
return TestResult(self.__args,self.__kw)
def shortDescription(self):
"""Returns a one-line description of the test, or None if no
description has been provided.
The default implementation of this method returns the first line of
the specified test method's docstring.
"""
doc = self.__testMethodDoc
return doc and string.strip(string.split(doc, "\n")[0]) or None
def id(self):
return "%s.%s" % (self.__class__, self.__testMethodName)
def __str__(self):
return "%s (%s)" % (self.__testMethodName, self.__class__)
def __repr__(self):
return "<%s testMethod=%s>" % \
(self.__class__, self.__testMethodName)
def run(self, result=None):
return self(result)
def __call__(self, result=None):
if result is None: result = self.defaultTestResult()
result.startTest(self)
testMethod = getattr(self, self.__testMethodName)
try:
try:
self.setUp()
except:
result.addError(self,self.__exc_info())
return
try:
apply(testMethod,self.__args,self.__kw)
except AssertionError, e:
result.addFailure(self,self.__exc_info())
except:
result.addError(self,self.__exc_info())
try:
self.tearDown()
except:
result.addError(self,self.__exc_info())
finally:
result.stopTest(self)
def debug(self):
"""Run the test without collecting errors in a TestResult"""
self.setUp()
getattr(self, self.__testMethodName)()
self.tearDown()
def assert_(self, expr, msg=None):
"""Equivalent of built-in 'assert', but is not optimised out when
__debug__ is false.
"""
if not expr:
raise AssertionError, msg
failUnless = assert_
def failIf(self, expr, msg=None):
"Fail the test if the expression is true."
apply(self.assert_,(not expr,msg))
def assertRaises(self, excClass, callableObj, *args, **kwargs):
"""Assert that an exception of class excClass is thrown
by callableObj when invoked with arguments args and keyword
arguments kwargs. If a different type of exception is
thrown, it will not be caught, and the test case will be
deemed to have suffered an error, exactly as for an
unexpected exception.
"""
try:
apply(callableObj, args, kwargs)
except excClass:
return
else:
if hasattr(excClass,'__name__'): excName = excClass.__name__
else: excName = str(excClass)
raise AssertionError, excName
def assertEqual(self, first, second, msg=None):
"""Assert that the two objects are equal as determined by the '=='
operator.
"""
self.assert_((first == second), msg or '%s != %s' % (first, second))
def fail(self, msg=None):
"""Fail immediately, with the given message."""
raise AssertionError, msg
def __exc_info(self):
"""Return a version of sys.exc_info() with the traceback frame
minimised; usually the top level of the traceback frame is not
needed.
"""
exctype, excvalue, tb = sys.exc_info()
newtb = tb.tb_next
if newtb is None:
return (exctype, excvalue, tb)
return (exctype, excvalue, newtb)
class TestSuite:
"""A test suite is a composite test consisting of a number of TestCases.
For use, create an instance of TestSuite, then add test case instances.
When all tests have been added, the suite can be passed to a test
runner, such as TextTestRunner. It will run the individual test cases
in the order in which they were added, aggregating the results. When
subclassing, do not forget to call the base class constructor.
"""
def __init__(self, tests=()):
self._tests = []
self.addTests(tests)
def __repr__(self):
return "<%s tests=%s>" % (self.__class__, self._tests)
__str__ = __repr__
def countTestCases(self):
cases = 0
for test in self._tests:
cases = cases + test.countTestCases()
return cases
def addTest(self, test):
self._tests.append(test)
def addTests(self, tests):
for test in tests:
self.addTest(test)
def run(self, result):
return self(result)
def __call__(self, result):
for test in self._tests:
if result.shouldStop:
break
test(result)
return result
def debug(self):
"""Run the tests without collecting errors in a TestResult"""
for test in self._tests: test.debug()
class FunctionTestCase(TestCase):
"""A test case that wraps a test function.
This is useful for slipping pre-existing test functions into the
PyUnit framework. Optionally, set-up and tidy-up functions can be
supplied. As with TestCase, the tidy-up ('tearDown') function will
always be called if the set-up ('setUp') function ran successfully.
"""
def __init__(self, testFunc, setUp=None, tearDown=None,
description=None):
TestCase.__init__(self)
self.__setUpFunc = setUp
self.__tearDownFunc = tearDown
self.__testFunc = testFunc
self.__description = description
def setUp(self):
if self.__setUpFunc is not None:
self.__setUpFunc()
def tearDown(self):
if self.__tearDownFunc is not None:
self.__tearDownFunc()
def runTest(self):
self.__testFunc()
def id(self):
return self.__testFunc.__name__
def __str__(self):
return "%s (%s)" % (self.__class__, self.__testFunc.__name__)
def __repr__(self):
return "<%s testFunc=%s>" % (self.__class__, self.__testFunc)
def shortDescription(self):
if self.__description is not None: return self.__description
doc = self.__testFunc.__doc__
return doc and string.strip(string.split(doc, "\n")[0]) or None
##############################################################################
# Convenience functions
##############################################################################
def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp):
"""Extracts all the names of functions in the given test case class
and its base classes that start with the given prefix. This is used
by makeSuite().
"""
testFnNames = filter(lambda n,p=prefix: n[:len(p)] == p,
dir(testCaseClass))
for baseclass in testCaseClass.__bases__:
testFnNames = testFnNames + \
getTestCaseNames(baseclass, prefix, sortUsing=None)
if sortUsing:
testFnNames.sort(sortUsing)
return testFnNames
def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
"""Returns a TestSuite instance built from all of the test functions
in the given test case class whose names begin with the given
prefix. The cases are sorted by their function names
using the supplied comparison function, which defaults to 'cmp'.
"""
cases = map(testCaseClass,
getTestCaseNames(testCaseClass, prefix, sortUsing))
return suiteClass(cases)
def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
import types
tests = []
for name in dir(module):
obj = getattr(module, name)
if type(obj) == types.ClassType and issubclass(obj, TestCase):
tests.append(makeSuite(obj, prefix=prefix,
sortUsing=sortUsing, suiteClass=suiteClass))
return suiteClass(tests)
def createTestInstance(name, module=None, suiteClass=TestSuite):
"""Finds tests by their name, optionally only within the given module.
Return the newly-constructed test, ready to run. If the name contains a ':'
then the portion of the name after the colon is used to find a specific
test case within the test case class named before the colon.
Examples:
findTest('examples.listtests.suite')
-- returns result of calling 'suite'
findTest('examples.listtests.ListTestCase:checkAppend')
-- returns result of calling ListTestCase('checkAppend')
findTest('examples.listtests.ListTestCase:check-')
-- returns result of calling makeSuite(ListTestCase, prefix="check")
"""
spec = string.split(name, ':')
if len(spec) > 2: raise ValueError, "illegal test name: %s" % name
if len(spec) == 1:
testName = spec[0]
caseName = None
else:
testName, caseName = spec
parts = string.split(testName, '.')
if module is None:
if len(parts) < 2:
raise ValueError, "incomplete test name: %s" % name
constructor = __import__(string.join(parts[:-1],'.'))
parts = parts[1:]
else:
constructor = module
for part in parts:
constructor = getattr(constructor, part)
if not callable(constructor):
raise ValueError, "%s is not a callable object" % constructor
if caseName:
if caseName[-1] == '-':
prefix = caseName[:-1]
if not prefix:
raise ValueError, "prefix too short: %s" % name
test = makeSuite(constructor, prefix=prefix, suiteClass=suiteClass)
else:
test = constructor(caseName)
else:
test = constructor()
if not hasattr(test,"countTestCases"):
raise TypeError, \
"object %s found with spec %s is not a test" % (test, name)
return test
##############################################################################
# Text UI
##############################################################################
class _WritelnDecorator:
"""Used to decorate file-like objects with a handy 'writeln' method"""
def __init__(self,stream):
self.stream = stream
if _isJPython:
import java.lang.System
self.linesep = java.lang.System.getProperty("line.separator")
else:
self.linesep = os.linesep
def __getattr__(self, attr):
return getattr(self.stream,attr)
def writeln(self, *args):
if args: apply(self.write, args)
self.write(self.linesep)
class _JUnitTextTestResult(TestResult):
"""A test result class that can print formatted text results to a stream.
Used by JUnitTextTestRunner.
"""
def __init__(self, stream):
self.stream = stream
TestResult.__init__(self)
def addError(self, test, error):
TestResult.addError(self,test,error)
self.stream.write('E')
self.stream.flush()
if error[0] is KeyboardInterrupt:
self.shouldStop = 1
def addFailure(self, test, error):
TestResult.addFailure(self,test,error)
self.stream.write('F')
self.stream.flush()
def startTest(self, test):
TestResult.startTest(self,test)
self.stream.write('.')
self.stream.flush()
def printNumberedErrors(self,errFlavour,errors):
if not errors: return
if len(errors) == 1:
self.stream.writeln("There was 1 %s:" % errFlavour)
else:
self.stream.writeln("There were %i %ss:" %
(len(errors), errFlavour))
i = 1
for test,error in errors:
errString = string.join(apply(traceback.format_exception,error),"")
self.stream.writeln("%i) %s" % (i, test))
self.stream.writeln(errString)
i = i + 1
def printErrors(self):
self.printNumberedErrors("error",self.errors)
def printFailures(self):
self.printNumberedErrors("failure",self.failures)
def printHeader(self):
self.stream.writeln()
if self.wasSuccessful():
self.stream.writeln("OK (%i tests)" % self.testsRun)
else:
self.stream.writeln("!!!FAILURES!!!")
self.stream.writeln("Test Results")
self.stream.writeln()
self.stream.writeln("Run: %i ; Failures: %i ; Errors: %i" %
(self.testsRun, len(self.failures),
len(self.errors)))
def printResult(self):
self.printHeader()
self.printErrors()
self.printFailures()
class JUnitTextTestRunner:
"""A test runner class that displays results in textual form.
The display format approximates that of JUnit's 'textui' test runner.
This test runner may be removed in a future version of PyUnit.
"""
def __init__(self, stream=sys.stderr):
self.stream = _WritelnDecorator(stream)
def run(self, test):
"Run the given test case or test suite."
result = _JUnitTextTestResult(self.stream)
startTime = time.time()
test(result)
stopTime = time.time()
self.stream.writeln()
self.stream.writeln("Time: %.3fs" % float(stopTime - startTime))
result.printResult()
return result
##############################################################################
# Verbose text UI
##############################################################################
class _VerboseTextTestResult(TestResult):
"""A test result class that can print formatted text results to a stream.
Used by VerboseTextTestRunner.
"""
def __init__(self, stream, descriptions):
TestResult.__init__(self)
self.stream = stream
self.lastFailure = None
self.descriptions = descriptions
def startTest(self, test):
TestResult.startTest(self, test)
if self.descriptions:
self.stream.write(test.shortDescription() or str(test))
else:
self.stream.write(str(test))
self.stream.write(" ... ")
def stopTest(self, test):
TestResult.stopTest(self, test)
if self.lastFailure is not test:
self.stream.writeln("ok")
def addError(self, test, err):
TestResult.addError(self, test, err)
self._printError("ERROR", test, err)
self.lastFailure = test
if err[0] is KeyboardInterrupt:
self.shouldStop = 1
def addFailure(self, test, err):
TestResult.addFailure(self, test, err)
self._printError("FAIL", test, err)
self.lastFailure = test
def _printError(self, flavour, test, err):
errLines = []
separator1 = "\t" + '=' * 70
separator2 = "\t" + '-' * 70
if not self.lastFailure is test:
self.stream.writeln()
self.stream.writeln(separator1)
self.stream.writeln("\t%s" % flavour)
self.stream.writeln(separator2)
for line in apply(traceback.format_exception, err):
for l in string.split(line,"\n")[:-1]:
self.stream.writeln("\t%s" % l)
self.stream.writeln(separator1)
class VerboseTextTestRunner:
"""A test runner class that displays results in textual form.
It prints out the names of tests as they are run, errors as they
occur, and a summary of the results at the end of the test run.
"""
def __init__(self, stream=sys.stderr, descriptions=1):
self.stream = _WritelnDecorator(stream)
self.descriptions = descriptions
def run(self, test):
"Run the given test case or test suite."
result = _VerboseTextTestResult(self.stream, self.descriptions)
startTime = time.time()
test(result)
stopTime = time.time()
timeTaken = float(stopTime - startTime)
self.stream.writeln("-" * 78)
run = result.testsRun
self.stream.writeln("Ran %d test%s in %.3fs" %
(run, run > 1 and "s" or "", timeTaken))
self.stream.writeln()
if not result.wasSuccessful():
self.stream.write("FAILED (")
failed, errored = map(len, (result.failures, result.errors))
if failed:
self.stream.write("failures=%d" % failed)
if errored:
if failed: self.stream.write(", ")
self.stream.write("errors=%d" % errored)
self.stream.writeln(")")
else:
self.stream.writeln("OK")
return result
# Which flavour of TextTestRunner is the default?
TextTestRunner = VerboseTextTestRunner
##############################################################################
# Facilities for running tests from the command line
##############################################################################
class TestProgram:
"""A command-line program that runs a set of tests; this is primarily
for making test modules conveniently executable.
"""
USAGE = """\
Usage: %(progName)s [-h|--help] [test[:(casename|prefix-)]] [...]
Examples:
%(progName)s - run default set of tests
%(progName)s MyTestSuite - run suite 'MyTestSuite'
%(progName)s MyTestCase:checkSomething - run MyTestCase.checkSomething
%(progName)s MyTestCase:check- - run all 'check*' test methods
in MyTestCase
"""
def __init__(self, module='__main__', defaultTest=None,
argv=None, testRunner=None, suiteClass=TestSuite):
if type(module) == type(''):
self.module = __import__(module)
for part in string.split(module,'.')[1:]:
self.module = getattr(self.module, part)
else:
self.module = module
if argv is None:
argv = sys.argv
self.defaultTest = defaultTest
self.testRunner = testRunner
self.suiteClass = suiteClass
self.progName = os.path.basename(argv[0])
self.parseArgs(argv)
self.runTests()
def usageExit(self, msg=None):
if msg: print msg
print self.USAGE % self.__dict__
sys.exit(2)
def parseArgs(self, argv):
import getopt
try:
options, args = getopt.getopt(argv[1:], 'hH', ['help'])
opts = {}
for opt, value in options:
if opt in ('-h','-H','--help'):
self.usageExit()
if len(args) == 0 and self.defaultTest is None:
self.test = findTestCases(self.module,
suiteClass=self.suiteClass)
return
if len(args) > 0:
self.testNames = args
else:
self.testNames = (self.defaultTest,)
self.createTests()
except getopt.error, msg:
self.usageExit(msg)
def createTests(self):
tests = []
for testName in self.testNames:
tests.append(createTestInstance(testName, self.module,
suiteClass=self.suiteClass))
self.test = self.suiteClass(tests)
def runTests(self):
if self.testRunner is None:
self.testRunner = TextTestRunner()
result = self.testRunner.run(self.test)
sys.exit(not result.wasSuccessful())
main = TestProgram
##############################################################################
# Executing this module from the command line
##############################################################################
if __name__ == "__main__":
main(module=None)
...@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ...@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
""" """
__author__ = "Steve Purcell (stephen_purcell@yahoo.com)" __author__ = "Steve Purcell (stephen_purcell@yahoo.com)"
__version__ = "$Revision: 1.20 $"[11:-2] __version__ = "$Revision: 1.1.4.1 $"[11:-2]
import time import time
import sys import sys
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment