Merged changes from Catalog-BTrees-Integration branch.

e6b5d0c3 · Jim Fulton · 22eec3b7 · e6b5d0c3 · e6b5d0c3 · e6b5d0c3
Commit e6b5d0c3 authored Mar 15, 2001 by Jim Fulton
30 changed files
--- a/lib/Components/ExtensionClass/src/ThreadLock.c
+++ b/lib/Components/ExtensionClass/src/ThreadLock.c
@@ -33,7 +33,7 @@
  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  DAMAGE.

-  $Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $
+  $Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $

  If you have questions regarding this software,
  contact:
@@ -46,7 +46,7 @@
 */
 static char ThreadLock_module_documentation[] = 
 ""
-"\n$Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $"
+"\n$Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $"
 ;

 #include "Python.h"
@@ -93,8 +93,9 @@ typedef struct {
 staticforward PyTypeObject ThreadLockType;

 static int
-cacquire(ThreadLockObject *self)
+cacquire(ThreadLockObject *self, int wait)
 {
+  int acquired = 1;
 #ifdef WITH_THREAD
  long id = get_thread_ident();
 #else
@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self)
    {
 #ifdef WITH_THREAD
      Py_BEGIN_ALLOW_THREADS
-      acquire_lock(self->lock, 1);
+      acquired = acquire_lock(self->lock, wait ? WAIT_LOCK : NOWAIT_LOCK);
      Py_END_ALLOW_THREADS
 #endif
+      if (acquired)
+        {
          self->count=0;
          self->id=id;
        }
-  return 0;
+    }
+  return acquired;
 }

 static PyObject *
 acquire(ThreadLockObject *self, PyObject *args)
 {
-  if(cacquire(self) < 0) return NULL;
+  int wait = -1, acquired;
+  if (! PyArg_ParseTuple(args, "|i", &wait)) return NULL;
+  acquired=cacquire(self, wait);
+  if(acquired < 0) return NULL;
+  if (wait >= 0) return PyInt_FromLong(acquired);
  Py_INCREF(Py_None);
  return Py_None;
 }
@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self)
 #else
  long id = 1;
 #endif
+
  if(self->count >= 0 && self->id==id)
    {
      /* Somebody has locked me.  It is either the current thread or
@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self)
 static PyObject *
 release(ThreadLockObject *self, PyObject *args)
 {
+  if (! PyArg_ParseTuple(args, "")) return NULL;
  if(crelease(self) < 0) return NULL;
  Py_INCREF(Py_None);
  return Py_None;
@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args)
  PyObject *f, *a=0, *k=0;

  UNLESS(PyArg_ParseTuple(args,"OO|O",&f, &a, &k)) return NULL;
-  if(cacquire(self) < 0) return NULL;
+  if(cacquire(self, -1) < 0) return NULL;
  f=PyEval_CallObjectWithKeywords(f,a,k);
  if(crelease(self) < 0)
    {
@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = {
   "Acquire the lock, call the function, and then release the lock.\n"
  },
  {"acquire", (PyCFunction)acquire, 1,
-   "acquire() -- Acquire a lock, taking the thread ID into account"
+   "acquire([wait]) -- Acquire a lock, taking the thread ID into account"
  },
  {"release", (PyCFunction)release, 1,
   "release() -- Release a lock, taking the thread ID into account"
@@ -296,7 +306,7 @@ void
 initThreadLock()
 {
  PyObject *m, *d;
-  char *rev="$Revision: 1.7 $";
+  char *rev="$Revision: 1.8 $";

  m = Py_InitModule4("ThreadLock", Module_methods,
 		     ThreadLock_module_documentation,

--- a/lib/python/Products/ZCatalog/Catalog.py
+++ b/lib/python/Products/ZCatalog/Catalog.py
@@ -86,9 +86,6 @@
 from Persistence import Persistent
 import Acquisition
 import ExtensionClass
-import BTree, OIBTree, IOBTree, IIBTree
-IIBucket=IIBTree.Bucket
-from intSet import intSet
 from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
 from SearchIndex.Lexicon import Lexicon
 import regex, pdb
@@ -101,14 +98,13 @@ from zLOG import LOG, ERROR
 from Lazy import LazyMap, LazyFilter, LazyCat
 from CatalogBrains import AbstractCatalogBrain, NoBrainer

+from BTrees.IIBTree import intersection, weightedIntersection
+from BTrees.OIBTree import OIBTree
+from BTrees.IOBTree import IOBTree
+import BTrees.Length
+from SearchIndex.randid import randid
+
 import time
-class KWMultiMapping(MultiMapping):
-    def has_key(self, name):
-        try:
-            r=self[name]
-            return 1
-        except KeyError:
-            return 0

 def orify(seq,
          query_map={
@@ -118,7 +114,7 @@ def orify(seq,
    subqueries=[]
    for q in seq:
        try: q=query_map[type(q)](q)
-        except: q=Query.Cmp(q)
+        except KeyError: q=Query.Cmp(q)
        subqueries.append(q)
    return apply(Query.Or,tuple(subqueries))
    
@@ -152,9 +148,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # object unique identifier to the rid, and self.paths is a
        # mapping of the rid to the unique identifier.

-        self.data = BTree.BTree()       # mapping of rid to meta_data
-        self.uids = OIBTree.BTree()     # mapping of uid to rid
-        self.paths = IOBTree.BTree()    # mapping of rid to uid
+        self.__len__=BTrees.Length.Length()
+        self.clear()

        # indexes can share a lexicon or have a private copy.  Here,
        # we instantiate a lexicon to be shared by all text indexes.
@@ -163,7 +158,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        if type(vocabulary) is type(''):
            self.lexicon = vocabulary
        else:
-            #ack!
            self.lexicon = Lexicon()

        if brains is not None:
@@ -171,6 +165,52 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
            
        self.updateBrains()

+    def clear(self):
+        """ clear catalog """
+        
+        self.data  = IOBTree()  # mapping of rid to meta_data
+        self.uids  = OIBTree()  # mapping of uid to rid
+        self.paths = IOBTree()  # mapping of rid to uid
+
+        # convert old-style Catalog object to new in-place
+        try: self.__len__.set(0)
+        except AttributeError: self.__len__=BTrees.Length.Length()
+
+        for x in self.indexes.values():
+            x.clear()
+
+    def _convertBTrees(self, threshold=200):
+
+        from BTrees.convert import convert
+
+        if type(self.data) is not IOBTree:
+            data=self.data
+            self.data=IOBTree()
+            convert(data, self.data, threshold)
+
+            uids=self.uids
+            self.uids=OIBTree()
+            convert(uids, self.uids, threshold)
+
+            paths=self.paths
+            self.paths=IOBTree()
+            convert(paths, self.paths, threshold)
+
+            self.__len__=BTrees.Length.Length()
+
+        for index in self.indexes.values():
+            index._convertBTrees(threshold)
+
+        lexicon=self.lexicon
+        if type(lexicon) is type(''):
+           lexicon=getattr(self, lexicon).lexicon
+        lexicon._convertBTrees(threshold)
+
+    def __len__(self):
+        # NOTE, this is never called for new catalogs, since
+        # each instance overrides this.
+        return len(self.data)
+
    def updateBrains(self):
        self.useBrains(self._v_brains)

@@ -213,7 +253,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        
        scopy = self.schema.copy()

-        # it is useful for our brains to know these things
        scopy['data_record_id_']=len(self.schema.keys())
        scopy['data_record_score_']=len(self.schema.keys())+1
        scopy['data_record_normalized_score_']=len(self.schema.keys())+2
@@ -345,33 +384,54 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        'uid' is the unique Catalog identifier for this object

        """
-        data = self.data
-
-        if self.uids.has_key(uid):
-            index = self.uids[uid]
-        elif data:
-            index = data.keys()[-1] + 1  # find the next available unique id
-            self.uids[uid] = index
-            self.paths[index] = uid
-        else:
-            index = 0                       
-            self.uids[uid] = index
-            self.paths[index] = uid
        
+        data = self.data

        # meta_data is stored as a tuple for efficiency
        newDataRecord = self.recordify(object)
-        oldDataRecord = data.get(index, None)

-        # Now we need to compare the tuples before we update them!
-        if oldDataRecord is not None:
-            for i in range(len(newDataRecord)):
-                if newDataRecord[i] != oldDataRecord[i]:
+        index=self.uids.get(uid, None)
+        if index is not None:
+            # old data
+            
+            if data.get(index, 0) != newDataRecord:
+                # Update the meta-data, if necessary
                data[index] = newDataRecord
-                    break
+                
        else:
+            # new data
+            
+            if type(data) is IOBTree:
+                # New style, get radom id
+                
+                index=getattr(self, '_v_nextid', 0)
+                if index%4000 == 0: index = randid()
+                while not data.insert(index, newDataRecord):
+                    index=randid()
+
+                # We want ids to be somewhat random, but there are
+                # advantages for having some ids generated
+                # sequentially when many catalog updates are done at
+                # once, such as when reindexing or bulk indexing.
+                # We allocate ids sequentially using a volatile base,
+                # so different threads get different bases. This
+                # further reduces conflict and reduces churn in
+                # here and it result sets when bulk indexing.
+                self._v_nextid=index+1
+            else:
+                if data:
+                    # find the next available unique id
+                    index = data.keys()[-1] + 1
+                else:
+                    index=0
                data[index] = newDataRecord

+            try: self.__len__.change(1)
+            except AttributeError: pass # No managed length (old-style)
+                    
+            self.uids[uid] = index
+            self.paths[index] = uid
+            
        total = 0
        for x in self.indexes.values():
            ## tricky!  indexes need to acquire now, and because they
@@ -418,6 +478,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                    LOG('Catalog', ERROR, ('uncatalogObject unsuccessfully '
                                           'attempted to delete rid %s '
                                           'from paths or data btree.' % rid))
+                else:
+                    try: self.__len__.change(-1)
+                    except AttributeError: pass # No managed length
+
            del uids[uid]
            self.data = data
        else:
@@ -425,15 +489,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                                   'attempted to uncatalog an object '
                                   'with a uid of %s. ' % uid))
            
-    def clear(self):
-        """ clear catalog """
-        
-        self.data = BTree.BTree()
-        self.uids = OIBTree.BTree()
-        self.paths = IOBTree.BTree()
-
-        for x in self.indexes.values():
-            x.clear()

    def uniqueValuesFor(self, name):
        """ return unique values for FieldIndex name """
@@ -441,26 +496,16 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):

    def hasuid(self, uid):
        """ return the rid if catalog contains an object with uid """
-        if self.uids.has_key(uid):
-            return self.uids[uid]
-        else:
-            return None
+        return self.uids.get(uid)

    def recordify(self, object):
        """ turns an object into a record tuple """
-
        record = []
        # the unique id is allways the first element
        for x in self.names:
-            try:
-                attr = getattr(object, x)
-                if(callable(attr)):
-                    attr = attr()
-                    
-            except:
-                attr = MV
+            attr=getattr(object, x, MV)
+            if(attr is not MV and callable(attr)): attr=attr()
            record.append(attr)
-
        return tuple(record)

    def instantiate(self, record):
@@ -485,12 +530,9 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
 ## Searching engine.  You don't really have to worry about what goes
 ## on below here...  Most of this stuff came from ZTables with tweaks.

-    def _indexedSearch(self, args, sort_index, append, used,
-                       IIBType=type(IIBucket()), intSType=type(intSet())):
+    def _indexedSearch(self, args, sort_index, append, used):
        """
        Iterate through the indexes, applying the query to each one.
-        Do some magic to join result sets.  Be intelligent about
-        handling intSets and IIBuckets.
        """

        rs=None
@@ -498,7 +540,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        
        if used is None: used={}
        for i in self.indexes.keys():
-            try:
            index = self.indexes[i].__of__(self)
            if hasattr(index,'_apply_index'):
                r=index._apply_index(args)
@@ -506,74 +547,66 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                    r, u = r
                    for name in u:
                        used[name]=1
-                        if rs is None:
-                            rs = r
-                        else:
-                            # you can't intersect an IIBucket into an
-                            # intSet, but you can go the other way
-                            # around.  Make sure we're facing the
-                            # right direction...
-                            if type(rs) is intSType and type(r) is IIBType:
-                                rs=r.intersection(rs)
-                            else:
-                                rs=rs.intersection(r)
-            except:
-                return used
+                    w, rs = weightedIntersection(rs, r)

+        #assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
        if rs is None:
+            # return everything
            if sort_index is None:
                rs=data.items()
-                append(LazyMap(self.instantiate, rs))
+                append(LazyMap(self.instantiate, rs, len(self)))
            else:
                try:
-                    for k, intset in sort_index._index.items():
+                    for k, intset in sort_index.items():
                        append((k,LazyMap(self.__getitem__, intset)))
                except AttributeError:
-                    raise ValueError, "Incorrect index name passed as " \
-                          "'sort_on' parameter.  Note that you may only " \
-                          "sort on values for which there is a matching " \
-                          "index available."
+                    raise ValueError, (
+                        "Incorrect index name passed as" 
+                        " 'sort_on' parameter.  Note that you may only" 
+                        " sort on values for which there is a matching" 
+                        " index available.")
        elif rs:
-            if sort_index is None and type(rs) is IIBType:
-                # then there is score information.  Build a new result 
-                # set, sort it by score, reverse it, compute the
-                # normalized score, and Lazify it.
-                rset = []
-                for key, score in rs.items():
-                    rset.append((score, key))
-                rset.sort()
-                rset.reverse()
+            # this is reached by having an empty result set (ie non-None)
+            if sort_index is None and hasattr(rs, 'values'):
+                # having a 'values' means we have a data structure with
+                # scores.  Build a new result set, sort it by score, reverse
+                # it, compute the normalized score, and Lazify it.
+                rset = rs.byValue(0) # sort it by score
                max = float(rset[0][0])
                rs = []
                for score, key in rset:
+                    # compute normalized scores
                    rs.append(( int((score/max)*100), score, key))
                append(LazyMap(self.__getitem__, rs))
                    
-            elif sort_index is None and type(rs) is intSType:
+            elif sort_index is None and not hasattr(rs, 'values'):
                # no scores?  Just Lazify.
+                if hasattr(rs, 'keys'): rs=rs.keys() 
                append(LazyMap(self.__getitem__, rs))
            else:
                # sort.  If there are scores, then this block is not
                # reached, therefor 'sort-on' does not happen in the
                # context of text index query.  This should probably
                # sort by relevance first, then the 'sort-on' attribute.
-                if len(rs)>len(sort_index._index):
-                    for k, intset in sort_index._index.items():
-                        if type(rs) is IIBType:
-                            intset=rs.intersection(intset)
-                            # Since we still have an IIBucket, let's convert
-                            # it to its set of keys
-                            intset=intset.keys()
-                        else:
-                            intset=intset.intersection(rs)
+                if ((len(rs) / 4) > len(sort_index)):
+                    # if the sorted index has a quarter as many keys as
+                    # the result set
+                    for k, intset in sort_index.items():
+                        # We have an index that has a set of values for
+                        # each sort key, so we interset with each set and
+                        # get a sorted sequence of the intersections.
+
+                        # This only makes sense if the number of
+                        # keys is much less then the number of results.
+                        intset = intersection(rs, intset)
                        if intset:
+                            if hasattr(intset, 'keys'): intset=intset.keys() 
                            append((k,LazyMap(self.__getitem__, intset)))
                else:
-                    if type(rs) is IIBType:
-                        rs=rs.keys()
-                    for r in rs:
-                        append((sort_index._unindex[r],
-                               LazyMap(self.__getitem__,[r])))
+                    if hasattr(rs, 'keys'): rs=rs.keys()
+                    for did in rs:
+                        append((sort_index.keyForDocument(did),
+                               LazyMap(self.__getitem__,[did])))

        return used

@@ -587,10 +620,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # Get search arguments:
        if REQUEST is None and not kw:
            try: REQUEST=self.REQUEST
-            except: pass
+            except AttributeError: pass
        if kw:
            if REQUEST:
-                m=KWMultiMapping()
+                m=MultiMapping()
                m.push(REQUEST)
                m.push(kw)
                kw=m
@@ -599,7 +632,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # Make sure batch size is set
        if REQUEST and not REQUEST.has_key('batch_size'):
            try: batch_size=self.default_batch_size
-            except: batch_size=20
+            except AttributeError: batch_size=20
            REQUEST['batch_size']=batch_size

        # Compute "sort_index", which is a sort index, or none:
@@ -611,8 +644,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
            sort_index=kw['sort_on']
        else: sort_index=None
        sort_order=''
-        if sort_index is not None and sort_index in self.indexes.keys():
+        if sort_index is not None and self.indexes.has_key(sort_index):
            sort_index=self.indexes[sort_index]
+            if not hasattr(sort_index, 'keyForDocument'):
+                raise CatalogError('Invalid sort index')

        # Perform searches with indexes and sort_index
        r=[]
@@ -645,9 +680,4 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
    __call__ = searchResults


-
-
-
-
-
-
+class CatalogError(Exception): pass
--- a/lib/python/Products/ZCatalog/CatalogBrains.py
+++ b/lib/python/Products/ZCatalog/CatalogBrains.py
@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit):
    def getObject(self, REQUEST=None):
        """Try to return the object for this record"""
        try:
-            obj = self.aq_parent.restrictedTraverse(self.getPath())
+            obj = self.aq_parent.unrestrictedTraverse(self.getPath())
            if not obj:
                if REQUEST is None:
                    REQUEST = self.REQUEST

--- a/lib/python/Products/ZCatalog/Lazy.py
+++ b/lib/python/Products/ZCatalog/Lazy.py
@@ -82,8 +82,8 @@
 # attributions are listed in the accompanying credits file.
 # 
 ##############################################################################
-__doc__='''$Id: Lazy.py,v 1.3 2001/01/15 16:29:23 petrilli Exp $'''
-__version__='$Revision: 1.3 $'[11:-2]
+__doc__='''$Id: Lazy.py,v 1.4 2001/03/15 13:16:23 jim Exp $'''
+__version__='$Revision: 1.4 $'[11:-2]


 class Lazy:
@@ -148,11 +148,12 @@ class LazyCat(Lazy):
    # Lazy concatenation of one or more sequences.  Should be handy
    # for accessing small parts of big searches.
    
-    def __init__(self, sequences):
+    def __init__(self, sequences, length=None):
        self._seq=sequences
        self._data=[]
        self._sindex=0
        self._eindex=-1
+        if length is not None: self._len=length

    def __getitem__(self,index):

@@ -194,11 +195,12 @@ class LazyMap(Lazy):
    # Act like a sequence, but get data from a filtering process.
    # Don't access data until necessary

-    def __init__(self,func,seq):
+    def __init__(self, func, seq, length=None):
        self._seq=seq
-        self._len=len(seq)
        self._data=[]
        self._func=func
+        if length is not None: self._len=length
+        else: self._len = len(seq)

    def __getitem__(self,index):

@@ -229,7 +231,7 @@ class LazyFilter(Lazy):
    # Act like a sequence, but get data from a filtering process.
    # Don't access data until necessary

-    def __init__(self,test,seq):
+    def __init__(self, test, seq):
        self._seq=seq
        self._data=[]
        self._eindex=-1
@@ -270,7 +272,7 @@ class LazyMop(Lazy):
    # Act like a sequence, but get data from a filtering process.
    # Don't access data until necessary

-    def __init__(self,test,seq):
+    def __init__(self, test, seq):
        self._seq=seq
        self._data=[]
        self._eindex=-1

--- a/lib/python/Products/ZCatalog/Vocabulary.py
+++ b/lib/python/Products/ZCatalog/Vocabulary.py
@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit,
                 AccessControl.Role.RoleManager,
                 ):
    """
-    A Vocabulary is a user managable relization of a Lexicon object.
+    A Vocabulary is a user-managable realization of a Lexicon object.

    """

@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit,
        """ create the lexicon to manage... """
        self.id = id
        self.title = title
-        self.globbing = globbing
+        self.globbing = not not globbing

        if globbing:
            self.lexicon = GlobbingLexicon.GlobbingLexicon()

--- a/lib/python/Products/ZCatalog/ZCatalog.py
+++ b/lib/python/Products/ZCatalog/ZCatalog.py
@@ -97,14 +97,15 @@ from Persistence import Persistent
 from DocumentTemplate.DT_Util import InstanceDict, TemplateDict
 from DocumentTemplate.DT_Util import Eval, expr_globals
 from AccessControl.Permission import name_trans
-from Catalog import Catalog, orify
+from Catalog import Catalog, orify, CatalogError
 from SearchIndex import UnIndex, UnTextIndex
 from Vocabulary import Vocabulary
-import IOBTree
 from Shared.DC.ZRDB.TM import TM
 from AccessControl import getSecurityManager
 from zLOG import LOG, ERROR

+StringType=type('')
+
 manage_addZCatalogForm=DTMLFile('dtml/addZCatalog',globals())

 def manage_addZCatalog(self, id, title,
@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit):
    def __init__(self, id, title='', vocab_id=None, container=None):
        self.id=id
        self.title=title
-        self.vocab_id = vocab_id
        
        self.threshold = 10000
        self._v_total = 0
@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit):
        if vocab_id is None:
            v = Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
            self._setObject('Vocabulary', v)
-            v = 'Vocabulary'
+            self.vocab_id = 'Vocabulary'
        else:
-            v = vocab_id
+            self.vocab_id = vocab_id

-        self._catalog = Catalog(vocabulary=v)
+        self._catalog = Catalog(vocabulary=self.vocab_id)

        self._catalog.addColumn('id')
        self._catalog.addIndex('id', 'FieldIndex')
@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit):
        self._catalog.addColumn('summary')
        self._catalog.addIndex('PrincipiaSearchSource', 'TextIndex')

+    def __len__(self): return len(self._catalog)

    def getVocabulary(self):
        """ more ack! """
@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit):
            RESPONSE.redirect(URL1 + '/manage_catalogIndexes?manage_tabs_message=Index%20Deleted')


-    def catalog_object(self, obj, uid):
+    def catalog_object(self, obj, uid=None):
        """ wrapper around catalog """
+
+        if uid is None:
+            try: uid = obj.getPhysicalPath
+            except AttributeError:
+                raise CatalogError(
+                    "A cataloged object must support the 'getPhysicalPath' "
+                    "method if no unique id is provided when cataloging"
+                    )
+            else: uid=string.join(uid(), '/')
+        elif type(uid) is not StringType:
+            raise CatalogError('The object unique id must be a string.')
+
        self._catalog.catalogObject(obj, uid, None)
        # None passed in to catalogObject as third argument indicates
        # that we shouldn't try to commit subtransactions within any
@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit):
            # exceeded within the boundaries of the current transaction.
            if self._v_total > self.threshold:
                get_transaction().commit(1)
-                self._p_jar.cacheFullSweep(1)
+                self._p_jar.cacheFullSweep(3)
                self._v_total = 0

    def uncatalog_object(self, uid):
@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit):
        if hasattr(self, '_product_meta_types'): pmt=self._product_meta_types
        elif hasattr(self, 'aq_acquire'):
            try: pmt=self.aq_acquire('_product_meta_types')
-            except:  pass
+            except AttributeError:  pass
        return self.meta_types+Products.meta_types+pmt

    def valid_roles(self):
@@ -659,7 +672,7 @@ class ZCatalog(Folder, Persistent, Implicit):
        if string.find(path, script) != 0:
            path='%s/%s' % (script, path) 
        try: return REQUEST.resolve_url(path)
-        except: return None
+        except: pass

    def resolve_path(self, path):
        """ 
@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit):
        style url. If no object is found, None is returned.
        No exceptions are raised.
        """
-        try:
-            return self.unrestrictedTraverse(path)
-        except:
-            return None
+        try: return self.unrestrictedTraverse(path)
+        except: pass

    def manage_normalize_paths(self, REQUEST):
        """Ensure that all catalog paths are full physical paths
@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit):
                  '%s unchanged.' % (len(fixed), len(removed), unchanged),
          action='./manage_main')

+    def manage_convertBTrees(self, threshold=200):
+        """Convert the catalog's data structures to use BTrees package"""
+        tt=time.time()
+        ct=time.clock()
+        self._catalog._convertBTrees(threshold
+                                     *1 #make sure ints an int)
+                                     )
+        tt=time.time()-tt
+        ct=time.clock()-ct
+        return 'Finished conversion in %s seconds (%s cpu)' % (tt, ct)
    
 Globals.default__class_init__(ZCatalog)


--- a/lib/python/Products/ZCatalog/dtml/catalogIndexes.dtml
+++ b/lib/python/Products/ZCatalog/dtml/catalogIndexes.dtml
@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query.
          <div class="list-item">Index Name</div></td>
        <td width="20%" align="left" valign="top">
          <div class="list-item">Index Type</div></td>
-        <td width="15%" align="left" valign="top">
-          <div class="list-item">Size</div></td>
      </tr>
  </dtml-if>
  <dtml-if name="sequence-odd"><tr class="row-normal">
@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query.
    </td>
    <td width="60%" align="left" valign="top">
      <div class="list-item">
-      <a href="" target="_index_info_&dtml-id;">&dtml-id;</a></div></td>
+      &dtml-id;
+      </div>
+    </td>
    <td width="20%" align="left" valign="top">
      <div class="list-item">&dtml-meta_type;</div></td>
-    <td width="15%" align="left" valign="top"><div class="list-item"
-      ><dtml-var expr="_.len(_['sequence-item'])" thousands_commas>
      </div>
    </td>
  </tr>

--- a/lib/python/Products/ZCatalog/tests/__init__.py
+++ b/lib/python/Products/ZCatalog/tests/__init__.py
+# Making tests a package makes debugging easier.
--- a/lib/python/Products/ZCatalog/tests/keywords.py
+++ b/lib/python/Products/ZCatalog/tests/keywords.py
+import rfc822,mailbox,cPickle,string
+
+class Keywords:
+    """ stupid class to read a list of rfc822 messages and extract
+    all words from the subject header. We use this class for testing
+    purposes only
+    """
+
+
+    def __init__(self):
+        self.kw = []
+        
+    def build(self,mbox,limit):
+    
+        mb = mailbox.UnixMailbox(open(mbox))
+        msg = mb.next()
+        
+        while msg and len(self.kw) < limit:
+            sub = string.split( msg.dict.get("subject") , ' ')
+            for f in sub:
+                ok = 1
+                for c in f: 
+                    if not c in string.letters: ok=0
+            
+                if ok==1 and  not f in self.kw : self.kw.append(f)
+                
+            msg = mb.next()
+            
+        P = cPickle.Pickler(open('data/keywords','w'))
+        P.dump(self.kw)
+        
+    def reload(self):
+        P = cPickle.Unpickler(open('data/keywords','r'))
+        self.kw = P.load()
+        
+        
+    def keywords(self):
+        return self.kw
+        
+        
+        
--- a/lib/python/Products/ZCatalog/tests/testCatalog.py
+++ b/lib/python/Products/ZCatalog/tests/testCatalog.py
+#!/usr/bin/env python1.5
+
+"""
+    Testsuite for testing Catalogs
+    $Id: testCatalog.py,v 1.2 2001/03/15 13:16:24 jim Exp $
+    
+    Andreas Jung, andreas@digicool.com
+    
+    $Log: testCatalog.py,v $
+    Revision 1.2  2001/03/15 13:16:24  jim
+    Merged changes from Catalog-BTrees-Integration branch.
+
+    Revision 1.1.4.11  2001/03/14 18:43:16  andreas
+    rearranged source code
+
+    Revision 1.1.4.10  2001/03/14 15:12:24  andreas
+    minor changes
+
+    Revision 1.1.4.9  2001/03/13 22:45:07  andreas
+    yet another try/except clause (zope mbox file seems to contain some sloppy
+    messages)
+
+    Revision 1.1.4.8  2001/03/13 22:04:20  andreas
+    added try/except while reading and parsing the mbox file
+
+    Revision 1.1.4.7  2001/03/13 16:51:07  andreas
+    code cleanup
+
+    Revision 1.1.4.6  2001/03/13 14:37:40  andreas
+    prelimary version for integration into the Zope testsuites
+
+    Revision 1.1.4.5  2001/03/11 22:33:40  andreas
+    commit
+
+    Revision 1.1.2.23  2001/03/09 16:06:10  andreas
+    integrated chris unittestCatalog.py
+
+    Revision 1.1.2.22  2001/03/09 15:05:28  andreas
+    rewrote testUpdates()
+
+    Revision 1.1.2.21  2001/03/08 18:42:28  andreas
+    fixed typo
+
+    Revision 1.1.4.4  2001/03/08 12:14:27  andreas
+    minor changes
+
+    Revision 1.1.2.20  2001/03/07 14:58:40  andreas
+    *** empty log message ***
+
+    Revision 1.1.2.19  2001/03/07 14:07:51  andreas
+    Code cleanup
+
+    Revision 1.1.2.18  2001/03/07 12:46:32  andreas
+    added advanced tests
+
+    Revision 1.1.2.17  2001/03/07 10:28:27  andreas
+    reworked version now using the new thread dispatcher
+
+    Revision 1.1.2.16  2001/03/05 15:14:51  andreas
+    - minor changes in testing catalog/uncatalogObject
+    - tests must now be started in the lib/python directory
+    - older input sets are no longer valid (must be recreated)
+
+"""
+
+import os,sys
+sys.path.insert(0,'.')
+
+try:
+    import Testing
+except ImportError:
+    sys.path[0] = "../../.."
+    import Testing
+
+os.environ['STUPID_LOG_FILE']= "debug.log"
+
+here = os.getcwd()
+
+import Zope
+import ZODB, ZODB.FileStorage
+from Products.ZCatalog import Catalog,ZCatalog,Vocabulary
+import Persistence
+import ExtensionClass
+from Testing import dispatcher
+import keywords
+from zLOG import LOG
+
+from SearchIndex.UnIndex import UnIndex
+from SearchIndex.UnTextIndex import UnTextIndex
+from SearchIndex.UnKeywordIndex import UnKeywordIndex
+from SearchIndex.Lexicon import Lexicon
+
+import getopt,whrandom,time,string,mailbox,rfc822
+from Testing import unittest
+
+
+# maximum number of files to read for the test suite
+maxFiles = 1000
+
+# maximum number of threads for stress testa
+numThreads = 4
+
+
+# number of iterations for searches
+searchIterations = 1000
+
+# number of iterations for catalog/uncatalog operations
+updateIterations = 100
+
+# input mailbox file
+mbox   = os.environ.get("TESTCATALOG_MBOX","/usr/home/andreas/zope.mbox")
+mbox2  = "/usr/home/andreas/python.mbox"
+
+
+dataDir = ""
+
+
+#
+# Don't change anything below
+#
+
+
+class testZODB:
+    """ some wrapper stuff around ZODB """
+
+    def __init__(self, file = "data/work/Data.fs",open=1):
+    
+        self.db = ZODB.DB( ZODB.FileStorage.FileStorage(file) )
+
+        if open==1:
+            self.connection = self.db.open()
+            self.root = self.connection.root()
+
+        
+    def write(self,name,obj):
+        self.root[name] = obj
+        get_transaction().commit()
+
+        
+    def read(self,name):
+        return self.root[name]
+
+        
+    def __del__(self):
+        self.db.close()
+
+        
+        
+class testCatalog(Persistence.Persistent,unittest.TestCase):
+    """ Wrapper around the catalog stuff """
+
+    def __init__(self,mboxname,maxfiles):
+        self.msg_ids = []
+        self.num_files = 0
+        self.keywords = []
+        self.maxfiles = maxfiles
+        
+        self._vocabulary = Vocabulary.Vocabulary('Vocabulary',
+                            'Vocabulary', globbing=1)
+        self._catalog    = Catalog.Catalog()
+        self._catalog.addIndex('to',      'TextIndex')
+        self._catalog.addIndex('sender',  'TextIndex')
+        self._catalog.addIndex('subject', 'TextIndex')
+        self._catalog.addIndex('content', 'TextIndex')
+        self._catalog.addIndex('file_id', 'TextIndex')
+        self._catalog.addColumn('file_id')
+        self._catalog.addIndex('length',  'FieldIndex')
+        self._catalog.addColumn('length')
+        self._catalog.addIndex('date',    'FieldIndex')
+        self._catalog.addIndex('keywords', "KeywordIndex")
+
+        self.build_catalog(mboxname)
+
+
+    def build_catalog(self,mboxname):
+
+        mb = mailbox.UnixMailbox(open(mboxname,"r"))
+        i = 0
+
+        msg = mb.next()
+        while msg and self.num_files<self.maxfiles:
+
+            try:
+                self.catMessage(msg)
+                self.msg_ids.append(msg.dict["message-id"])
+            except: 
+                msg = mb.next()
+                continue
+
+
+            msg = mb.next()
+            self.num_files = self.num_files + 1
+            if self.num_files % 100==0: print self.num_files
+
+            try:
+                sub = string.split(msg.dict.get("subject",""))
+            except:
+                msg = mb.next()
+                continue
+
+            for s in sub: 
+                if not s in self.keywords: self.keywords.append(s)
+           
+        self._catalog.aq_parent = None
+        
+
+    def catMessage(self,m):
+        self._catalog.catalogObject( testMessage(m) , 
+                                    m.dict["message-id"] )
+        
+    def uncatMessage(self,uid):
+        self._catalog.uncatalogObject( uid )
+        
+            
+class testMessage(ExtensionClass.Base):
+
+    def __init__(self,msg,modify_doc=0):
+
+        self.sender  = msg.dict.get("from","")
+        self.subject = msg.dict.get("subject","")
+        self.to      = msg.dict.get("to","")
+        self.content = str(msg)
+        self.keywords= string.split(self.subject , " ")
+
+        if modify_doc !=0:
+            self.keywords = map(self.reverse,self.keywords)
+            
+
+        self.file_id = msg.dict.get("message-id","")
+   
+        self.length  = len(str(msg))
+        date         = msg.dict.get("date","")
+        try:
+            self.date    =  time.mktime(rfc822.parsedate(date)[:9])
+        except: pass  
+
+    def reverse(self,s):
+        l = list(s)
+        l.reverse()
+        return string.join(l,"")
+
+        
+    def __del__(self):
+       pass 
+
+
+class BuildEnv(dispatcher.Dispatcher,unittest.TestCase):
+    """ build environment """        
+
+    def __init__(self,func,*args,**kw):
+
+        unittest.TestCase.__init__(self,func,args,kw)
+        dispatcher.Dispatcher.__init__(self,func)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        self.logn('treads=%d  searchiterations=%d' % 
+                    (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % 
+                    (updateIterations,maxFiles))
+
+    #############################################################        
+    # Build up ZODB
+    #############################################################        
+
+        
+    def buildTestEnvironment(self,args,kw):
+        self.init_phase = 1
+        self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,args,kw))
+
+
+    def funcTestEnvironment(self,dataDir,maxFiles):
+
+        env = self.th_setup()
+
+        if not os.path.exists(dataDir): os.makedirs(dataDir)
+        
+        os.system("rm -f %s/*" % dataDir)
+        zodb = testZODB("%s/Data_orig.fs" % dataDir)
+            
+        print "parsing and reading mailbox file %s....please wait" % mbox
+        tc = testCatalog( mbox,maxFiles )
+            
+        print "writing Catalog to ZODB"
+        zodb.write("catalog" , tc)
+
+        print "Creating keywords file"
+        kw = keywords.Keywords()
+        kw.build(mbox,1000)
+
+    
+        print tc.num_files, "files read"
+        print "Initalization complete"
+
+        self.th_teardown(env)
+
+        
+class testSearches(dispatcher.Dispatcher,unittest.TestCase):
+    """ test searches """
+
+    def __init__(self,func,*args,**kw):
+
+        unittest.TestCase.__init__(self,func,args,kw) 
+        dispatcher.Dispatcher.__init__(self,func)
+
+        self.init_phase = 0
+
+        self.setlog( open("dispatcher.log","a") )
+        
+
+    def setUp(self):
+
+        os.system("rm -fr data/work")
+        if not os.path.exists("data/work"): os.makedirs("data/work")
+        assert os.system("cp %s/Data_orig.fs data/work/Data.fs" % dataDir)==0, \
+            "Error while replicating original data"
+        
+        self.zodb 	 	= testZODB("data/work/Data.fs",open=0)
+        self.threads    = {} 
+        self.init_zodb_size = self.zodb_size()
+
+        kw = keywords.Keywords()
+        kw.reload()
+        self.keywords  = kw.keywords()    
+
+        self.logn("-" * 80)
+        self.logn('treads=%d  searchiterations=%d' % 
+                    (numThreads,searchIterations))
+        self.logn('updateiterations=%d  maxfiles=%d' % 
+                    (updateIterations,maxFiles))
+
+
+    def tearDown(self):
+        self.log_zodb_size("before",self.init_zodb_size)
+        self.log_zodb_size("after ",self.zodb_size())
+        del self.zodb
+        self.zodb = self.catalog = None		
+
+    def log_zodb_size(self,s,n):
+        self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,n) )
+
+    def zodb_size(self):
+        return self.size2size(os.stat("data/work/Data.fs")[6])
+
+
+    def size2size(self,n):
+        import math
+        if n <1024.0: return "%8.3lf Bytes" % n
+        if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+        if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+
+        
+
+    #############################################################        
+    # Fulltext test
+    #############################################################        
+
+
+    def testFulltextIndex(self,args,kw):
+        """ benchmark FulltextIndex """
+        self.dispatcher('funcFulltextIndex' , 
+            ('funcFulltextIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFulltextIndex(self,*args):
+        """ benchmark FulltextIndex """
+
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"content" : kw } )
+
+        self.th_teardown(env)
+
+
+    #############################################################        
+    # Field index test
+    #############################################################        
+
+    def testFieldIndex(self,args,kw):
+        """ benchmark field index"""
+        self.dispatcher('funcFieldIndex' , 
+            ('funcFieldIndex',kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldIndex(self,*args):
+        """ benchmark FieldIndex """
+
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        for i in range(0,searchIterations):
+        
+            res = cat.searchResults( {"length" : i } )
+            for r in res:
+                assert i==r.length , "%s should have size %d but is %s" %  \
+                    (r.file_id,i,r.length)
+
+        self.th_teardown(env)
+                
+    #############################################################        
+    # Keyword index test
+    #############################################################        
+
+    def testKeywordIndex(self,args,kw):
+        """ benchmark Keyword index"""
+        self.dispatcher('funcKeywordIndex' , 
+            ('funcKeywordIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcKeywordIndex(self,*args):
+        """ benchmark KeywordIndex """
+
+        cat,msg_ids = self.get_catalog()
+        
+        env = self.th_setup()
+
+        for kw in self.keywords:
+            res = cat.searchResults( {"subject" : kw } )
+#            assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
+        
+        self.th_teardown(env)
+       
+    #############################################################        
+    # Field range index test
+    #############################################################        
+
+    def testFieldRangeIndex(self,args,kw):
+        """ benchmark field range index"""
+        self.dispatcher('funcFieldRangeIndex' , 
+            ('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcFieldRangeIndex(self,*args):
+        """ benchmark FieldRangeIndex """
+
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        rg = []
+        for i in range(searchIterations):
+            m = whrandom.randint(0,10000) 
+            n = m + 200
+            rg.append((m,n))
+
+
+        for i in range(searchIterations):
+            for r  in cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ):
+                size = r.length
+                assert rg[i][0]<=size and size<=rg[i][1] , \
+                "Filesize of %s is out of range (%d,%d) %d" % (r.file_id,rg[i][0],rg[i][1],size)
+
+        self.th_teardown(env)
+
+
+
+    #############################################################        
+    # Keyword + range index test
+    #############################################################        
+
+    def testKeywordRangeIndex(self,args,kw):
+        """ benchmark Keyword range index"""
+        self.dispatcher('funcKeywordRangeIndex' , 
+            ('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+    def funcKeywordRangeIndex(self,*args):
+        """ benchmark Keyword & IndexRange search """
+
+        cat,msg_ids = self.get_catalog()
+
+        rg = []
+        for i in range(len(self.keywords)):
+            m = whrandom.randint(0,10000) 
+            n = m + 200
+            rg.append(m,n)
+
+        env = self.th_setup()
+
+        results = []            
+        for i in range(len(self.keywords)):
+            results.append( cat.searchResults( {"keywords":self.keywords[i], 
+                                                "length" : rg[i],
+                                                "length_usage" : "range:min:max" } )
+                                            )
+
+        self.th_teardown(env)
+
+
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
+
+    def testUpdates(self,args,kw):
+        """ benchmark concurrent catalog/uncatalog operations """
+        self.dispatcher("testUpdates" , 
+            ("funcUpdates", kw["numThreads"] , args, kw ))
+
+
+    def funcUpdates(self,*args,**kw):
+        """ benchmark concurrent catalog/uncatalog operations """
+
+        uncat_conflicts = cat_conflicts = 0
+        cat,msg_ids = self.get_catalog()
+
+        msgs = self.setupUpdatesMethod(kw["numUpdates"])
+        keys = msgs.keys()
+
+        rdgen = whrandom.whrandom()
+        rdgen.seed(int(time.time()) % 256,int(time.time()) % 256,int(time.time()) % 256)
+
+        env = self.th_setup()
+
+        for i in range(len(keys)):
+
+            r = rdgen.randint(0,len(msgs)-1)
+
+            mid = keys[r]
+            obj = msgs[mid]
+
+            try:
+                cat.uncatalogObject(mid)
+
+                if kw.get("commit",1)==1:
+                    get_transaction().commit()            
+                    time.sleep(0.1)
+            except ZODB.POSException.ConflictError:
+                uncat_conflicts = uncat_conflicts + 1
+
+            try:
+                cat.catalogObject(obj,mid)
+
+                if kw.get("commit",1)==1:
+                    get_transaction().commit()            
+                    time.sleep(0.1)
+
+            except ZODB.POSException.ConflictError:
+                cat_conflicts = cat_conflicts + 1
+
+        try:
+            get_transaction().commit()            
+        except: pass
+
+
+        self.th_teardown(env,cat_conflicts=cat_conflicts,uncat_conflicts=uncat_conflicts)
+
+
+    def setupUpdatesMethod(self,numUpdates):
+        """ this method prepares a datastructure for the updates test.
+            we are reading the first n mails from the primary mailbox.
+            they are used for the update test
+        """
+
+        i = 0
+        dict = {}
+
+        mb = mailbox.UnixMailbox(open(mbox,"r"))
+
+        msg = mb.next()
+        while msg and i<numUpdates:
+
+            obj = testMessage(msg)
+            mid = msg.dict["message-id"]
+
+            dict[mid] = obj 
+
+            msg = mb.next()
+            i = i+1
+       
+        return dict 
+    
+
+
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
+
+    def testReindexing(self,args,kw):
+        """ test reindexing of existing data """
+        self.dispatcher("testReindexing" , 
+            ("funcReindexing",kw["numThreads"] , (mbox,1000) , {} ))
+
+    def testReindexingAndModify(self,args,kw):
+        """ test reindexing of existing data but with modifications"""
+        self.dispatcher("testReindexing" , 
+            ("funcReindexing",kw["numThreads"] , (mbox,1000,1) , {} ))
+
+
+    def funcReindexing(self,mbox,numfiles=100,modify_doc=0):
+        """ test reindexing of existing data """
+
+        cat_conflicts = 0
+        cat,msg_ids = self.get_catalog()
+
+        env = self.th_setup()
+
+        mb = mailbox.UnixMailbox(open(mbox,"r"))
+        i = 0
+
+        msg = mb.next()
+        while msg and i<numfiles:
+
+            obj = testMessage(msg,modify_doc)
+            mid = msg.dict["message-id"]
+
+            try:
+                cat.catalogObject(obj,mid)
+                get_transaction().commit()
+            except:
+                cat_conflicts = cat_conflicts + 1
+
+            msg = mb.next()
+            i = i+1
+            if i%100==0: print i
+
+        self.th_teardown(env,cat_conflicts=cat_conflicts)
+
+
+    #############################################################        
+    # Test full reindexing
+    #############################################################        
+    
+    def testIncrementalIndexing(self,args,kw):
+        """ testing incremental indexing """
+        self.dispatcher("testIncrementalIndexing" , 
+            ("funcReindexing",kw["numThreads"], (mbox2,1000) , {}))
+
+
+    def get_catalog(self):
+        """ return a catalog object """
+
+        # depended we are running in multithreaded mode we must take
+        # care how threads open the ZODB
+
+        connection  = self.zodb.db.open()
+        root        = connection.root()
+        cat	        = root["catalog"]._catalog
+        msg_ids     = root['catalog'].msg_ids
+
+        return cat,msg_ids
+
+################################################################################
+# Stuff of Chris
+################################################################################
+
+
+class CatalogBase:
+    def setUp(self):
+        self._vocabulary = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary',
+                                                 globbing=1)
+        self._catalog = Catalog.Catalog()
+
+    def tearDown(self):
+        self._vocabulary = self._catalog = None
+
+class TestAddDelColumn(CatalogBase, unittest.TestCase):
+    def checkAdd(self):
+        self._catalog.addColumn('id')
+        assert self._catalog.schema.has_key('id') == 1, 'add column failed'
+
+    def checkAddBad(self):
+        try:
+            self._catalog.addColumn('_id')
+        except:
+            pass
+        else:
+            raise 'invalid metadata column check failed'
+
+    def checkDel(self):
+        self._catalog.addColumn('id')
+        self._catalog.delColumn('id')
+        assert self._catalog.schema.has_key('id') != 1, 'del column failed'
+
+class TestAddDelIndexes(CatalogBase, unittest.TestCase):
+    def checkAddFieldIndex(self):
+        self._catalog.addIndex('id', 'FieldIndex')
+        assert type(self._catalog.indexes['id']) is type(UnIndex('id')),\
+               'add field index failed'
+
+    def checkAddTextIndex(self):
+        self._catalog.addIndex('id', 'TextIndex')
+        i = self._catalog.indexes['id']
+        assert type(i) is type(UnTextIndex('id', None, None, Lexicon())),\
+               'add text index failed'
+
+    def checkAddKeywordIndex(self):
+        self._catalog.addIndex('id', 'KeywordIndex')
+        i = self._catalog.indexes['id']
+        assert type(i) is type(UnKeywordIndex('id')), 'add kw index failed'
+
+    def checkDelFieldIndex(self):
+        self._catalog.addIndex('id', 'FieldIndex')
+        self._catalog.delIndex('id')
+        assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
+        
+    def checkDelTextIndex(self):
+        self._catalog.addIndex('id', 'TextIndex')
+        self._catalog.delIndex('id')
+        assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
+        
+    def checkDelKeywordIndex(self):
+        self._catalog.addIndex('id', 'KeywordIndex')
+        self._catalog.delIndex('id')
+        assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
+
+class TestSimultaneousAddAndRead(CatalogBase, unittest.TestCase):
+    def checkMultiThread(self):
+        pass
+
+class TestZCatalogObject(unittest.TestCase):
+    def checkInstantiateWithoutVocab(self):
+        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
+        zc = ZCatalog.ZCatalog('acatalog')
+        assert hasattr(zc, 'Vocabulary')
+        assert zc.getVocabulary().__class__ == v.__class__
+
+    def checkInstantiateWithGlobbingVocab(self):
+        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
+        zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
+        zc._setObject('vocab', v)
+        assert zc.getVocabulary() == v
+
+    def checkInstantiateWithNormalVocab(self):
+        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=0)
+        zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
+        zc._setObject('vocab', v)
+        assert zc.getVocabulary() == v
+
+class TestCatalogObject(unittest.TestCase):
+    def setUp(self):
+        self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary',
+                                                 globbing=1)
+        self._catalog = Catalog.Catalog()
+        self._catalog.addIndex('col1', 'FieldIndex')
+        self._catalog.addIndex('col2', 'TextIndex')
+        self._catalog.addIndex('col3', 'KeywordIndex')
+        self._catalog.addColumn('col1') 
+        self._catalog.addColumn('col2')
+        self._catalog.addColumn('col3')
+        
+        self._catalog.addIndex('att1', 'FieldIndex')
+        self._catalog.addIndex('att2', 'TextIndex')
+        self._catalog.addIndex('att3', 'KeywordIndex')
+        self._catalog.addColumn('att1') 
+        self._catalog.addColumn('att2')
+        self._catalog.addColumn('att3')
+
+        self._catalog.addColumn('num')
+        self.upper = 1000
+        class dummy(ExtensionClass.Base):
+            att1 = 'att1'
+            att2 = 'att2'
+            att3 = ['att3']
+            def __init__(self, num):
+                self.num = num
+                
+            def col1(self):
+                return 'col1'
+
+            def col2(self):
+                return 'col2'
+
+            def col3(self):
+                return ['col3']
+        for x in range(0, self.upper):
+            self._catalog.catalogObject(dummy(x), `x`)
+        self._catalog.aq_parent = dummy('foo') # fake out acquisition
+
+    def tearDown(self):
+        self._vocabulary = self._catalog = None
+
+    def checkResultLength(self):
+        upper = self.upper
+        a = self._catalog()
+        assert len(a) == upper, 'length should be %s, its %s'%(upper, len(a))
+
+    def checkFieldIndexLength(self):
+        a = self._catalog(att1='att1')
+        assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
+                                                                  len(a))
+    def checkTextIndexLength(self):
+        a = self._catalog(att2='att2')
+        assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
+                                                                  len(a))
+
+    def checkKeywordIndexLength(self):
+        a = self._catalog(att3='att3')
+        assert len(a) == self.upper, 'should be %s, but is %s' % (self.upper,
+                                                                  len(a))
+
+    def checkUncatalogFieldIndex(self):    
+        self.uncatalog()
+        a = self._catalog(att1='att1')
+        assert len(a) == 0, 'len: %s' % (len(a))
+        
+    def checkUncatalogTextIndex(self):
+        self.uncatalog()
+        a = self._catalog(att2='att2')
+        assert len(a) == 0, 'len: %s' % (len(a))
+
+    def checkUncatalogKeywordIndex(self):
+        self.uncatalog()
+        a = self._catalog(att3='att3')
+        assert len(a) == 0, 'len: %s'%(len(a))
+
+    def checkBadUncatalog(self):
+        try:
+            self._catalog.uncatalogObject('asdasdasd')
+        except:
+            assert 1==2, 'uncatalogObject raised exception on bad uid'
+
+    def checkUniqueValuesForLength(self):
+        a = self._catalog.uniqueValuesFor('att1')
+        assert len(a) == 1, 'bad number of unique values %s' % str(a)
+
+    def checkUniqueValuesForContent(self):
+        a = self._catalog.uniqueValuesFor('att1')
+        assert a[0] == 'att1', 'bad content %s' % str(a[0])
+
+    def uncatalog(self):
+        for x in range(0, self.upper):
+            self._catalog.uncatalogObject(`x`)
+
+
+class objRS(ExtensionClass.Base):
+
+    def __init__(self,num):
+        self.number = num
+
+class testRS(unittest.TestCase):
+
+    def setUp(self):
+        self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
+        self._catalog    = Catalog.Catalog()
+        self._catalog.addIndex('number',  'FieldIndex')
+        self._catalog.addColumn('number')
+
+        for i in range(50000): 
+            if i%1000==0: print i
+            obj = objRS(whrandom.randint(0,20000))
+            self._catalog.catalogObject(obj,i)
+           
+        self._catalog.aq_parent = objRS(200)
+
+    def testRangeSearch(self):
+        for i in range(1000000): 
+
+            m = whrandom.randint(0,20000) 
+            n = m + 1000
+
+            for r  in self._catalog.searchResults( {"number" : (m,n) ,
+                                                    "length_usage" : "range:min:max" } 
+                                            ):
+                size = r.number
+                assert m<=size and size<=n , "%d vs [%d,%d]" % (r.number,m,n)
+
+
+
+
+
+def usage(program):
+    print "Usage: "
+    print
+    print "initalize the test catalog:   %s -i -f <maximum number files to use> " % program
+    print "to run the basic tests:       %s -b -f <maximum number files to use> " % program
+    print "to run the advanced tests:    %s -a -f <maximum number files to use> " % program
+                
+
+def main():
+
+    global dataDir,maxFiles
+
+    opts,args = getopt.getopt(sys.argv[1:],"hiabf:xp",['help'])
+    opts.sort()
+
+    optsLst = map(lambda x: x[0],opts)
+
+    if optsLst==[]: usage(os.path.basename(sys.argv[0])); sys.exit(0)
+    
+    for k,v in opts:
+        if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
+        if k == "-f":   maxFiles    = string.atoi(v)
+
+    dataDir = os.path.join("data",str(maxFiles))
+
+    if '-i' in optsLst:
+        unittest.TextTestRunner().run(get_tests('init'))
+
+            
+    if '-b' in optsLst:
+        unittest.TextTestRunner().run(get_tests('bench1'))
+
+
+    if '-a' in optsLst:
+        unittest.TextTestRunner().run(get_tests('bench2'))
+
+
+    if '-x' in optsLst:
+        unittest.TextTestRunner().run(get_tests('exp'))
+
+
+
+    if '-p' in optsLst:
+        unittest.TextTestRunner().run(test_suite())
+
+def test_suite():
+
+    return get_tests('basic')
+
+
+def get_tests(what):
+    global dataDir,maxFiles
+
+    if what=='basic':
+        maxFiles = 100
+        dataDir = 'data/%d' % maxFiles
+
+    ts_cm= (
+         unittest.makeSuite(TestAddDelIndexes,  'check'),
+         unittest.makeSuite(TestCatalogObject,  'check'),
+         unittest.makeSuite(TestAddDelColumn,   'check'),
+         unittest.makeSuite(TestZCatalogObject, 'check')
+    )
+
+    t_aj = (
+         BuildEnv('buildTestEnvironment',dataDir,maxFiles),
+         testSearches("testFulltextIndex",numThreads=1),
+         testSearches("testFieldIndex",numThreads= 1),
+         testSearches("testFieldRangeIndex",numThreads=1),
+         testSearches("testKeywordIndex",numThreads= 1),
+         testSearches("testKeywordRangeIndex",numThreads= 1)
+    )
+
+    bench1_tests = (
+         testSearches("testFulltextIndex",numThreads=1),
+         testSearches("testFulltextIndex",numThreads= 4),
+         testSearches("testFieldIndex",numThreads= 1),
+         testSearches("testFieldIndex",numThreads= 4),
+         testSearches("testFieldRangeIndex",numThreads=1),
+         testSearches("testFieldRangeIndex",numThreads= 4),
+         testSearches("testKeywordIndex",numThreads= 1),
+         testSearches("testKeywordIndex",numThreads= 4),
+         testSearches("testKeywordRangeIndex",numThreads= 1),
+         testSearches("testKeywordRangeIndex",numThreads=4)
+    )
+
+    bench2_tests = (
+        testSearches("testReindexing",numThreads=1),
+        testSearches("testIncrementalIndexing",numThreads=1),
+        testSearches("testUpdates",numThreads=2,numUpdates=200),
+        testSearches("testUpdates",numThreads=4,numUpdates=200)
+    )
+
+    exp_tests = (
+#        testRS("testRangeSearch"),
+#       testSearches("testReindexing",numThreads=1),
+         testSearches("testReindexingAndModify",numThreads=1),
+#        testSearches("testUpdates",numThreads=10,numUpdates=100),
+    )
+            
+    init_tests = ( 
+        BuildEnv("buildTestEnvironment",dataDir,maxFiles) ,
+    )
+
+    if what=='basic':    
+        ts = unittest.TestSuite(ts_cm)
+        for x in t_aj: ts.addTest(x)
+        return ts
+
+    else:
+        ts = unittest.TestSuite()
+        for x in eval('%s_tests' % what): ts.addTest(x)
+        return ts
+
+    return
+
+
+
+def pdebug():
+    import pdb
+    test_suite()
+
+def debug():
+   test_suite().debug()
+ 
+def pdebug():
+    import pdb
+    pdb.run('debug()')
+
+
+if __name__ == '__main__':
+       main()
+
--- a/lib/python/Products/ZCatalog/tests/testCatalogTiming.py
+++ b/lib/python/Products/ZCatalog/tests/testCatalogTiming.py
+import os, sys
+sys.path.insert(0, '.')
+try:
+    import Testing
+    os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', '.')
+except ImportError:
+    sys.path[0]='../../..'
+    import Testing
+    os.environ['SOFTWARE_HOME']='../../..'
+
+os.environ['INSTANCE_HOME']=os.environ.get(
+    'INSTANCE_HOME',
+    os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
+    )
+
+os.environ['STUPID_LOG_FILE']=os.path.join(os.environ['INSTANCE_HOME'],'var',
+                                           'debug.log')
+here = os.getcwd()
+
+import Zope
+import mailbox, time, httplib
+from string import strip, find, split, lower, atoi, join
+from urllib import quote
+from Products.ZCatalog import ZCatalog
+from unittest import TestCase, TestSuite, JUnitTextTestRunner,\
+     VerboseTextTestRunner, makeSuite
+
+from Testing.makerequest import makerequest
+
+TextTestRunner = VerboseTextTestRunner
+
+class TestTimeIndex(TestCase):
+    def setUp(self):
+        self.app = makerequest(Zope.app())
+        try: self.app._delObject('catalogtest')
+        except AttributeError: pass
+        self.app.manage_addFolder('catalogtest')
+        zcatalog = ZCatalog.ZCatalog('catalog', 'a catalog')
+        self.app.catalogtest._setObject('catalog', zcatalog)
+        c = self.app.catalogtest.catalog
+        for x in ('title', 'to', 'from', 'date', 'raw'):
+            try: c.manage_delIndexes([x])
+            except: pass
+        c.manage_addIndex('title', 'TextIndex')
+        c.manage_addIndex('to', 'TextIndex')
+        c.manage_addIndex('from', 'TextIndex')
+        c.manage_addIndex('date', 'FieldIndex')
+        c.manage_addIndex('raw', 'TextIndex')
+        
+    def tearDown(self):
+        try: self.app._delObject('catalogtest')
+        except AttributeError: pass
+        try:
+            self.app._p_jar._db.pack()
+            self.app._p_jar.close()
+        except AttributeError: pass
+        self.app = None
+        del self.app
+      
+    def checkTimeBulkIndex(self):
+        print
+        c = self.app.catalogtest.catalog
+        t = time.time()
+        loadmail(self.app.catalogtest, 'zopemail',
+                 os.path.join(here, 'zope.mbox'), 500)
+        get_transaction().commit()
+        loadtime = time.time() - t
+        out("loading data took %s seconds.. " % loadtime)
+        t = time.time()
+        req = self.app.REQUEST
+        parents = [self.app.catalogtest.catalog,
+                   self.app.catalogtest, self.app]
+        req['PARENTS'] = parents
+        rsp = self.app.REQUEST.RESPONSE
+        url1 = ''
+        c.manage_catalogFoundItems(req, rsp, url1, url1,
+                                   obj_metatypes=['DTML Document'])
+        indextime = time.time() - t
+        out("bulk index took %s seconds.. " % indextime)
+        out("total time for load and index was %s seconds.. "
+            % (loadtime + indextime))
+
+    def checkTimeIncrementalIndexAndQuery(self):
+        print
+        c = self.app.catalogtest.catalog
+        t = time.time()
+        max = 500
+        m = loadmail(self.app.catalogtest, 'zopemail',
+                     os.path.join(here, 'zope.mbox'), max, c)
+        get_transaction().commit()
+        total = time.time() - t
+        out("total time for load and index was %s seconds.. " % total)
+        t = time.time()
+        rs = c() # empty query should return all
+        assert len(rs) == max, len(rs)
+        dates = m['date']
+        froms = m['from']
+        tos =m['to']
+        titles = m['title']
+        assert len(c({'date':'foobarfoo'})) == 0 # should return no results
+        for x in dates:
+            assert len(c({'date':x})) == 1 # each date should be fieldindexed
+        assert len(c({'from':'a'})) == 0 # should be caught by splitter
+        assert len(c({'raw':'chris'})) != 0
+        assert len(c({'raw':'gghdjkasjdsda'})) == 0
+        assert c({'PrincipiaSearchSource':'the*'})
+    
+    def checkTimeSubcommit(self):
+        print
+        for x in (None,100,500,1000,10000):
+            out("testing subcommit at theshhold of %s" % x)
+            if x is not None:
+                self.setUp()
+            c = self.app.catalogtest.catalog
+            c.threshold = x
+            get_transaction().commit()
+            t = time.time()
+            loadmail(self.app.catalogtest, 'zopemail',
+                     os.path.join(here, 'zope.mbox'), 500, c)
+            get_transaction().commit()
+            total = time.time() - t
+            out("total time with subcommit thresh %s was %s seconds.. "
+                % (x,total))
+            self.tearDown()
+
+
+# utility
+
+def loadmail(folder, name, mbox, max=None, catalog=None):
+    """
+    creates a folder inside object 'folder' named 'name', opens
+    filename 'mbox' and adds 'max' mail messages as DTML documents to
+    the ZODB inside the folder named 'name'.  If 'catalog' (which
+    should be a ZCatalog object) is passed in, call catalog_object on it
+    with the document while we're iterating.  If 'max' is not None,
+    only do 'max' messages, else do all messages in the mbox archive.
+    """
+    m = {'date':[],'from':[],'to':[],'title':[]}
+    folder.manage_addFolder(name)
+    folder=getattr(folder, name)
+    mb=mailbox.UnixMailbox(open(mbox))
+    i=0
+    every=100
+    message=mb.next()
+    while message:
+        part = `i/every * 100`
+        try:
+            dest = getattr(folder, part)
+        except AttributeError:
+            folder.manage_addFolder(part)
+            dest = getattr(folder, part)
+        dest.manage_addDTMLDocument(str(i), file=message.fp.read())
+        doc=getattr(dest, str(i))
+        i=i+1
+        for h in message.headers:
+            h=strip(h)
+            l=find(h,':')
+            if l <= 0: continue
+            name=lower(h[:l])
+            if name=='subject': name='title'
+            h=strip(h[l+1:])
+            type='string'
+            if 0 and name=='date': type='date'
+            elif 0:
+                try: atoi(h)
+                except: pass
+                else: type=int
+            if name=='title':
+                doc.manage_changeProperties(title=h)
+                m[name].append(h)
+            elif name in ('to', 'from', 'date'):
+                try: doc.manage_addProperty(name, h, type)
+                except: pass
+                m[name].append(h)
+        if catalog:
+            path = join(doc.getPhysicalPath(), '/')
+            catalog.catalog_object(doc, path)
+        if max is not None:
+            if i >= max: break
+        message=mb.next()
+    return m
+
+def out(s):
+    print "   %s" % s
+
+def test_suite():
+    s1 = makeSuite(TestTimeIndex, 'check')
+    
+    testsuite = TestSuite((s1,))
+    return testsuite
+
+def main():
+    mb = os.path.join(here, 'zope.mbox')
+    if not os.path.isfile(mb):
+        print "do you want to get the zope.mbox file from lists.zope.org?"
+        print "it's required for testing (98MB, ~ 30mins on fast conn)"
+        print "it's also available at korak:/home/chrism/zope.mbox" 
+        print "-- type 'Y' or 'N'"
+        a = raw_input()
+        if lower(a[:1]) == 'y':
+            server = 'lists.zope.org:80'
+            method = '/pipermail/zope.mbox/zope.mbox'
+            h = httplib.HTTP(server)
+            h.putrequest('GET', method)
+            h.putheader('User-Agent', 'silly')
+            h.putheader('Accept', 'text/html')
+            h.putheader('Accept', 'text/plain')
+            h.putheader('Host', server)
+            h.endheaders()
+            errcode, errmsg, headers = h.getreply()
+            if errcode != 200:
+                f = h.getfile()
+                data = f.read()
+                print data
+                raise "Error reading from host %s" % server
+            f = h.getfile()
+            out=open(mb,'w')
+            print "this is going to take a while..."
+            print "downloading mbox from %s" % server
+            while 1:
+                l = f.readline()
+                if not l: break
+                out.write(l)
+
+    alltests=test_suite()
+    runner = TextTestRunner()
+    runner.run(alltests)
+
+def debug():
+    test_suite().debug()
+
+if __name__=='__main__':
+   if len(sys.argv) > 1:
+      globals()[sys.argv[1]]()
+   else:
+      main()
+
--- a/lib/python/SearchIndex/GlobbingLexicon.py
+++ b/lib/python/SearchIndex/GlobbingLexicon.py
@@ -85,18 +85,15 @@

 from Lexicon import Lexicon
 from Splitter import Splitter
-from intSet import intSet
 from UnTextIndex import Or

 import re, string
-import OIBTree, BTree, IOBTree, IIBTree
-
-# Short cuts for common data containers
-OIBTree = OIBTree.BTree                 # Object -> Integer
-OOBTree = BTree.BTree                   # Object -> Object
-IOBTree = IOBTree.BTree                 # Integer -> Object
-IIBucket = IIBTree.Bucket               # Integer -> Integer

+from BTrees.IIBTree import IISet, union, IITreeSet
+from BTrees.OIBTree import OIBTree
+from BTrees.IOBTree import IOBTree
+from BTrees.OOBTree import OOBTree
+from randid import randid

 class GlobbingLexicon(Lexicon):
    """Lexicon which supports basic globbing function ('*' and '?').
@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon):


    def __init__(self):
-        self.counter = 0                # word id counter XXX
+        self.clear()
+
+    def clear(self):
        self._lexicon = OIBTree()
        self._inverseLex = IOBTree()
        self._digrams = OOBTree()

+    def _convertBTrees(self, threshold=200):
+        Lexicon._convertBTrees(self, threshold)
+        if type(self._digrams) is OOBTree: return
+
+        from BTrees.convert import convert
+
+        _digrams=self._digrams
+        self._digrams=OOBTree()
+        self._digrams._p_jar=self._p_jar
+        convert(_digrams, self._digrams, threshold, IITreeSet)
+

    def createDigrams(self, word):
        """Returns a list with the set of digrams in the word."""
@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon):

        digrams.append(self.eow + word[0])    # Mark the beginning

-        for i in range(len(word)):
-            digrams.append(word[i:i+2])
+        for i in range(1,len(word)):
+            digrams.append(word[i-1:i+1])

        digrams[-1] = digrams[-1] + self.eow  # Mark the end

@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon):

    set = getWordId                     # Kludge for old code

+    def getWord(self, wid):
+        return self._inverseLex.get(wid, None)

    def assignWordId(self, word):
        """Assigns a new word id to the provided word, and return it."""
@@ -166,19 +178,34 @@ class GlobbingLexicon(Lexicon):
        if self._lexicon.has_key(word):
            return self._lexicon[word]

-        # First we go ahead and put the forward and reverse maps in.
-        self._lexicon[word] = self.counter
-        self._inverseLex[self.counter] = word
+
+        # Get word id. BBB Backward compat pain.
+        inverse=self._inverseLex
+        try: insert=inverse.insert
+        except AttributeError:
+            # we have an "old" BTree object
+            if inverse:            
+                wid=inverse.keys()[-1]+1
+            else:
+                self._inverseLex=IOBTree()
+                wid=1
+            inverse[wid] = word
+        else:
+            # we have a "new" IOBTree object
+            wid=randid()
+            while not inverse.insert(wid, word):
+                wid=randid()
+
+        self._lexicon[word] = wid

        # Now take all the digrams and insert them into the digram map.
        for digram in self.createDigrams(word):
-            set = self._digrams.get(digram)
+            set = self._digrams.get(digram, None)
            if set is None:
-                self._digrams[digram] = set = intSet()
-            set.insert(self.counter)
+                self._digrams[digram] = set = IISet()
+            set.insert(wid)

-        self.counter = self.counter + 1
-        return self.counter - 1         # Adjust for the previous increment
+        return wid

    
    def get(self, pattern):
@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon):
            return (result, )
        
        ## now get all of the intsets that contain the result digrams
-        result = IIBucket()
+        result = None
        for digram in digrams:
-            if self._digrams.has_key(digram):
-                matchSet = self._digrams[digram]
-                if matchSet is not None:
-                    result = IIBucket().union(matchSet)
+            result=union(result, self._digrams.get(digram, None))

-        if len(result) == 0:
+        if not result:
            return ()
        else:
            ## now we have narrowed the list of possible candidates
@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon):

            expr = re.compile(self.createRegex(pattern))
            words = []
-            hits = []
-            for x in result.keys():
+            hits = IISet()
+            for x in result:
                if expr.match(self._inverseLex[x]):
-                    hits.append(x)
+                    hits.insert(x)
            return hits

                
@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon):
    def query_hook(self, q):
        """expand wildcards"""
        words = []
-        wids = []
        for w in q:
            if ( (self.multi_wc in w) or
                 (self.single_wc in w) ):
@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon):
                                  r'()&|!@#$%^{}\<>')

        return "%s$" % result 
+
+
--- a/lib/python/SearchIndex/Index.py
+++ b/lib/python/SearchIndex/Index.py
@@ -84,11 +84,11 @@
 ##############################################################################

 """Simple column indices"""
-__version__='$Revision: 1.27 $'[11:-2]
+__version__='$Revision: 1.28 $'[11:-2]

 from Persistence import Persistent
-from BTree import BTree
-from intSet import intSet
+from BTrees.OOBTree import OOBTree
+from BTrees.IIBTree import IITreeSet
 import operator
 from Missing import MV
 import string
@@ -135,7 +135,7 @@ class Index(Persistent):
            self.id = id
            self.ignore_ex=ignore_ex
            self.call_methods=call_methods
-            self._index = BTree()
+            self._index = OOBTree()
            
            self._reindex()
        else:
@@ -176,7 +176,7 @@ class Index(Persistent):


    def clear(self):
-        self._index = BTree()
+        self._index = OOBTree()


    def _reindex(self, start=0):
@@ -200,7 +200,7 @@ class Index(Persistent):
            if k is None or k == MV: continue

            set=get(k)
-            if set is None: index[k] = set = intSet()
+            if set is None: index[k] = set = IITreeSet()
            set.insert(i)


@@ -225,7 +225,7 @@ class Index(Persistent):
            return

        set = index.get(k)
-        if set is None: index[k] = set = intSet()
+        if set is None: index[k] = set = IITreeSet()
        set.insert(i)


@@ -301,8 +301,7 @@ class Index(Persistent):
                if hi: setlist = index.items(lo,hi)
                else:  setlist = index.items(lo)
                for k,set in setlist:
-                    if r is None: r = set
-                    else: r = r.union(set)
+                    w, r = weightedUnion(r, set)
            except KeyError: pass
        else:           #not a range
            get = index.get
@@ -310,11 +309,10 @@ class Index(Persistent):
                if key: anyTrue = 1
                set=get(key)
                if set is not None:
-                    if r is None: r = set
-                    else: r = r.union(set)
+                    w, r = weightedUnion(r, set)

        if r is None:
-            if anyTrue: r=intSet()
+            if anyTrue: r=IISet()
            else: return None

        return r, (id,)

--- a/lib/python/SearchIndex/Lexicon.py
+++ b/lib/python/SearchIndex/Lexicon.py
@@ -92,11 +92,12 @@ mapping.
 from Splitter import Splitter
 from Persistence import Persistent
 from Acquisition import Implicit
-import OIBTree, BTree
-OIBTree=OIBTree.BTree
-OOBTree=BTree.BTree
-import re

+from BTrees.OIBTree import OIBTree
+from BTrees.IOBTree import IOBTree
+from BTrees.IIBTree import IISet, IITreeSet
+
+from randid import randid

 class Lexicon(Persistent, Implicit):
    """Maps words to word ids and then some
@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit):
    stop_syn={}

    def __init__(self, stop_syn=None):
-        self._lexicon = OIBTree()
-        self.counter = 0
+        self.clear()
        if stop_syn is None:
            self.stop_syn = {}
        else:
            self.stop_syn = stop_syn

+    def clear(self):
+        self._lexicon = OIBTree()
+        self._inverseLex = IOBTree()
+        
+    def _convertBTrees(self, threshold=200):
+        if (type(self._lexicon) is OIBTree and
+            type(getattr(self, '_inverseLex', None)) is IOBTree):
+            return
+
+        from BTrees.convert import convert
+
+        lexicon=self._lexicon
+        self._lexicon=OIBTree()
+        self._lexicon._p_jar=self._p_jar
+        convert(lexicon, self._lexicon, threshold)
+
+        try:
+            inverseLex=self._inverseLex
+            self._inverseLex=IOBTree()
+        except AttributeError:
+            # older lexicons didn't have an inverse lexicon
+            self._inverseLex=IOBTree()
+            inverseLex=self._inverseLex
+
+        self._inverseLex._p_jar=self._p_jar
+        convert(inverseLex, self._inverseLex, threshold)
                
    def set_stop_syn(self, stop_syn):
        """ pass in a mapping of stopwords and synonyms.  Format is:
@@ -135,13 +161,16 @@ class Lexicon(Persistent, Implicit):
    def getWordId(self, word):
        """ return the word id of 'word' """

-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-        else:
-            return self.assignWordId(word)
+        wid=self._lexicon.get(word, None)
+        if wid is None: 
+            wid=self.assignWordId(word)
+        return wid
        
    set = getWordId

+    def getWord(self, wid):
+        """ post-2.3.1b2 method, will not work with unconverted lexicons """
+        return self._inverseLex.get(wid, None)
        
    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""
@@ -149,17 +178,29 @@ class Lexicon(Persistent, Implicit):
        if self._lexicon.has_key(word):
            return self._lexicon[word]

-        if not hasattr(self, 'counter'):
-            self.counter = 0
-        self._lexicon[intern(word)] = self.counter
-        self.counter = self.counter + 1
-        return self.counter - 1 
+
+        try: inverse=self._inverseLex
+        except AttributeError:
+            # woops, old lexicom wo wids
+            inverse=self._inverseLex=IOBTree()
+            for word, wid in self._lexicon.items():
+                inverse[wid]=word
+
+        wid=randid()
+        while not inverse.insert(wid, word):
+            wid=randid()
+
+        self._lexicon[intern(word)] = wid
+
+        return wid


    def get(self, key, default=None):
        """Return the matched word against the key."""
-        return [self._lexicon.get(key, default)]
-
+        r=IISet()
+        wid=self._lexicon.get(key, default)
+        if wid is not None: r.insert(wid)
+        return r

    def __getitem__(self, key):
        return self.get(key)
@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit):
        return Splitter(astring, words)


-    def grep(self, query):
-        """
-        regular expression search through the lexicon
-        he he.
-
-        Do not use unless you know what your doing!!!
-        """
-        expr = re.compile(query)
-        hits = []
-        for x in self._lexicon.keys():
-            if expr.search(x):
-                hits.append(x)
-        return hits
-
-
    def query_hook(self, q):
        """ we don't want to modify the query cuz we're dumb """
        return q

--- a/lib/python/SearchIndex/ResultList.py
+++ b/lib/python/SearchIndex/ResultList.py
@@ -83,18 +83,33 @@
 # 
 ##############################################################################

+from BTrees.IIBTree import IIBucket
+from BTrees.IIBTree import weightedIntersection, weightedUnion, difference
+from BTrees.OOBTree import OOSet, union

 class ResultList:
  
    def __init__(self, d, words, index, TupleType=type(())):
        self._index = index
+
+        if type(words) is not OOSet: words=OOSet(words)
        self._words = words
-        if (type(d) is TupleType): self._dict = { d[0] : d[1] }
-        else: self._dict = d
        
-    def __len__(self): return len(self._dict)
+        if (type(d) is TupleType):
+            d = IIBucket((d,))
+        elif type(d) is not IIBucket:
+            d = IIBucket(d)
+
+        self._dict=d
+        self.__getitem__=d.__getitem__
+        try: self.__nonzero__=d.__nonzero__
+        except: pass
+        self.get=d.get
+
+    def __nonzero__(self):
+        return not not self._dict

-    def __getitem__(self, key): return self._dict[key]
+    def bucket(self): return self._dict

    def keys(self): return self._dict.keys()

@@ -103,42 +118,29 @@ class ResultList:
    def items(self): return self._dict.items()  

    def __and__(self, x):
-        result = {}
-        dict = self._dict
-        xdict = x._dict
-        xhas = xdict.has_key
-        for id, score in dict.items():
-            if xhas(id): result[id] = xdict[id]+score
-    
-        return self.__class__(result, self._words+x._words, self._index)
+        return self.__class__(
+            weightedIntersection(self._dict, x._dict)[1],
+            union(self._words, x._words),
+            self._index,
+            )

    def and_not(self, x):
-        result = {}
-        dict = self._dict
-        xdict = x._dict
-        xhas = xdict.has_key
-        for id, score in dict.items():
-            if not xhas(id): result[id] = score
-    
-        return self.__class__(result, self._words, self._index)
+        return self.__class__(
+            difference(self._dict, x._dict),
+            self._words,
+            self._index,
+            )
  
    def __or__(self, x):
-        result = {}
-        dict = self._dict
-        has = dict.has_key
-        xdict = x._dict
-        xhas = xdict.has_key
-        for id, score in dict.items():
-            if xhas(id): result[id] = xdict[id]+score
-            else: result[id] = score
-
-        for id, score in xdict.items():
-            if not has(id): result[id] = score
-    
+        return self.__class__(
+            weightedUnion(self._dict, x._dict)[1],
+            union(self._words, x._words),
+            self._index,
+            )
        return self.__class__(result, self._words+x._words, self._index)

    def near(self, x):
-        result = {}
+        result = IIBucket
        dict = self._dict
        xdict = x._dict
        xhas = xdict.has_key
@@ -160,5 +162,6 @@ class ResultList:
            else: score = (score+xdict[id])/d
            result[id] = score
    
-        return self.__class__(result, self._words+x._words, self._index)
+        return self.__class__(
+            result, union(self._words, x._words), self._index)

--- a/lib/python/SearchIndex/TextIndex.py
+++ b/lib/python/SearchIndex/TextIndex.py
@@ -202,13 +202,13 @@ Notes on a new text index design
       space.

 """
-__version__='$Revision: 1.25 $'[11:-2]
+__version__='$Revision: 1.26 $'[11:-2]
+
+#XXX I strongly suspect that this is broken, but I'm not going to fix it. :(

 from Globals import Persistent
-import BTree, IIBTree
-BTree=BTree.BTree
-IIBTree=IIBTree.Bucket
-from intSet import intSet
+from BTrees.OOBTree import OOBTree
+from BTrees.IIBTree import IISet, IIBucket
 import operator
 from Splitter import Splitter
 from string import strip
@@ -250,7 +250,7 @@ class TextIndex(Persistent):
            self.id=id
            self.ignore_ex=ignore_ex
            self.call_methods=call_methods
-            self._index=BTree()
+            self._index=OOBTree() #XXX Is this really an IOBTree?
            self._syn=stop_word_dict
            self._reindex()
        else:
@@ -261,7 +261,7 @@ class TextIndex(Persistent):


    def clear(self):
-        self._index = BTree()
+        self._index = OOBTree()


    def positions(self, docid, words):
@@ -366,7 +366,7 @@ class TextIndex(Persistent):
                        index[word] = r
                    elif type(r) is dictType:
                        if len(r) > 4:
-                            b = IIBTree()
+                            b = IIBucket()
                            for k, v in r.items(): b[k] = v
                            r = b
                        r[id] = score
@@ -440,7 +440,7 @@ class TextIndex(Persistent):
        for key in keys:
            key = strip(key)
            if not key: continue
-            rr = intSet()
+            rr = IISet()
            try:
                for i,score in query(key,self).items():
                    if score: rr.insert(i)
@@ -451,5 +451,5 @@ class TextIndex(Persistent):
                r = r.intersection(rr) 

        if r is not None: return r, (id,)
-        return intSet(), (id,)
+        return IISet(), (id,)

--- a/lib/python/SearchIndex/UnIndex.py
+++ b/lib/python/SearchIndex/UnIndex.py
@@ -85,21 +85,25 @@

 """Simple column indices"""

-__version__='$Revision: 1.25 $'[11:-2]
-
-
+__version__='$Revision: 1.26 $'[11:-2]

 from Globals import Persistent
 from Acquisition import Implicit
 import BTree
 import IOBTree
-from intSet import intSet
 import operator
-from Missing import MV
 import string, pdb
 from zLOG import LOG, ERROR
 from types import *

+from BTrees.OOBTree import OOBTree
+from BTrees.IOBTree import IOBTree
+from BTrees.IIBTree import IITreeSet, IISet, union
+import BTrees.Length
+
+import sys
+
+_marker = []

 def nonEmpty(s):
    "returns true if a non-empty string or any other (nonstring) type"
@@ -115,7 +119,7 @@ class UnIndex(Persistent, Implicit):

    meta_type = 'Field Index'

-    def __init__(self, id=None, ignore_ex=None, call_methods=None):
+    def __init__(self, id, ignore_ex=None, call_methods=None):
        """Create an unindex

        UnIndexes are indexes that contain two index components, the
@@ -123,6 +127,11 @@ class UnIndex(Persistent, Implicit):
        index.  The inverted index is so that objects can be unindexed 
        even when the old value of the object is not known.

+        e.g.
+
+        self._index = {datum:[documentId1, documentId2]}
+        self._unindex = {documentId:datum}
+
        The arguments are:

          'id' -- the name of the item attribute to index.  This is
@@ -138,23 +147,53 @@ class UnIndex(Persistent, Implicit):
          uninded methods for this to work.

        """
-	######################################################################
-	# For b/w compatability, have to allow __init__ calls with zero args
-
-        if not id==ignore_ex==call_methods==None:
        self.id = id
        self.ignore_ex=ignore_ex        # currently unimplimented
        self.call_methods=call_methods
-            self._index = BTree.BTree()
-            self._unindex = IOBTree.BTree()

-        else:
-            pass
+        self.__len__=BTrees.Length.Length() # see __len__ method docstring
+        self.clear()
+
+    def clear(self):
+        # inplace opportunistic conversion from old-style to new style BTrees
+        try: self.__len__.set(0)
+        except AttributeError: self.__len__=BTrees.Length.Length()
+        self._index = OOBTree()
+        self._unindex = IOBTree()
+
+    def _convertBTrees(self, threshold=200):
+        if type(self._index) is OOBTree: return
+
+        from BTrees.convert import convert

+        _index=self._index
+        self._index=OOBTree()
+        
+        def convertSet(s, IITreeSet=IITreeSet):
+            if len(s) == 1:
+                try: return s[0]  # convert to int
+                except: pass # This is just an optimization.
+            return IITreeSet(s)
+    
+        convert(_index, self._index, threshold, convertSet)
+
+        _unindex=self._unindex
+        self._unindex=IOBTree()
+        convert(_unindex, self._unindex, threshold)
+
+        self.__len__=BTrees.Length.Length()
+
+    def __nonzero__(self):
+        return not not self._unindex

    def __len__(self):
-        return len(self._unindex)
+        """Return the number of objects indexed.

+        This method is only called for indexes which have "old" BTrees,
+        and the *only* reason that UnIndexes maintain a __len__ is for
+        the searching code in the catalog during sorting.
+        """
+        return len(self._unindex)

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
@@ -173,31 +212,39 @@ class UnIndex(Persistent, Implicit):
        return self._unindex.keys()


-    def getEntryForObject(self, documentId, default=MV):
+    def getEntryForObject(self, documentId, default=_marker):
        """Takes a document ID and returns all the information we have
        on that specific object."""
-        if default is not MV:
-            return self._unindex.get(documentId, default)
-        else:
+        if default is _marker:
            return self._unindex.get(documentId)
+        else:
+            return self._unindex.get(documentId, default)
            
        
    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index."""
-
-        indexRow = self._index.get(entry, MV)
-        if indexRow is not MV:
+        global _marker
+        indexRow = self._index.get(entry, _marker)
+        if indexRow is not _marker:
            try:
                indexRow.remove(documentId)
-                if len(indexRow) == 0:
+                if not indexRow:
+                    del self._index[entry]
+                    try: self.__len__.change(-1)
+                    except AttributeError: pass # pre-BTrees-module instance
+            except AttributeError:
+                # index row is an int
                del self._index[entry]
+                try: self.__len__.change(-1)
+                except AttributeError: pass # pre-BTrees-module instance   
            except:
                LOG(self.__class__.__name__, ERROR,
                    ('unindex_object could not remove '
-                     'integer id %s from index %s.  This '
+                     'documentId %s from index %s.  This '
                     'should not happen.'
-                     % (str(documentId), str(self.id)))) 
+                     % (str(documentId), str(self.id))), '',
+                    sys.exc_info())
        else:
            LOG(self.__class__.__name__, ERROR,
                ('unindex_object tried to retrieve set %s '
@@ -210,20 +257,25 @@ class UnIndex(Persistent, Implicit):
        in the forward index.

        This will also deal with creating the entire row if necessary."""
-
-        indexRow = self._index.get(entry, MV)
+        global _marker
+        indexRow = self._index.get(entry, _marker)
        
        # Make sure there's actually a row there already.  If not, create
        # an IntSet and stuff it in first.
-        if indexRow is MV:
-            self._index[entry] = intSet()
-            indexRow = self._index[entry]
-        indexRow.insert(documentId)
-
+        if indexRow is _marker:
+            self._index[entry] = documentId
+            try:  self.__len__.change(1)
+            except AttributeError: pass # pre-BTrees-module instance
+        else:
+            try: indexRow.insert(documentId)
+            except AttributeError:
+                # index row is an int
+                indexRow=IITreeSet((indexRow, documentId))
+                self._index[entry] = indexRow

    def index_object(self, documentId, obj, threshold=None):
        """ index and object 'obj' with integer id 'documentId'"""
-
+        global _marker
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
@@ -235,14 +287,16 @@ class UnIndex(Persistent, Implicit):
            if callable(datum):
                datum = datum()
        except AttributeError:
-            datum = MV
+            datum = _marker
 
        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
-        oldDatum = self._unindex.get(documentId, MV)
-        if not datum == oldDatum:
-            if oldDatum is not MV:
+        oldDatum = self._unindex.get(documentId, _marker)
+        if datum != oldDatum:
+            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId)
+
+            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)
                self._unindex[documentId] = datum

@@ -250,21 +304,24 @@ class UnIndex(Persistent, Implicit):

        return returnStatus

-
    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail """

-        unindexRecord = self._unindex.get(documentId, None)
-        if unindexRecord is None:
+        global _marker
+        unindexRecord = self._unindex.get(documentId, _marker)
+        if unindexRecord is _marker:
            return None

        self.removeForwardIndexEntry(unindexRecord, documentId)
        
+        try:
            del self._unindex[documentId]
+        except:
+            LOG('UnIndex', ERROR, 'Attempt to unindex nonexistent document'
+                ' with id %s' % documentId)

-
-    def _apply_index(self, request, cid=''): 
+    def _apply_index(self, request, cid='', type=type, None=None): 
        """Apply the index to query parameters given in the argument,
        request

@@ -301,6 +358,7 @@ class UnIndex(Persistent, Implicit):
        r = None
        anyTrue = 0
        opr = None
+        IntType=type(1)

        if request.has_key(id+'_usage'):
            # see if any usage params are sent to field
@@ -321,10 +379,7 @@ class UnIndex(Persistent, Implicit):
                    setlist = index.items(lo)

                for k, set in setlist:
-                    if r is None:
-                        r = set
-                    else:
-                        r = r.union(set)
+                    r = union(r, set)

            except KeyError:
                pass
@@ -334,16 +389,18 @@ class UnIndex(Persistent, Implicit):
            for key in keys:
                if nonEmpty(key):
                    anyTrue = 1
-                set=get(key)
+                set=get(key, None)
                if set is not None:
-                    if r is None:
-                        r = set
-                    else:
-                        r = r.union(set)
+                    r = union(r, set)
+
+        if type(r) is IntType: r=IISet((r,))
+        if r:
+            return r, (id,)
+
                
        if r is None:
            if anyTrue:
-                r=intSet()
+                r=IISet()
            else:
                return None

@@ -369,8 +426,9 @@ class UnIndex(Persistent, Implicit):
            name = self.id
        elif name != self.id:
            return []
+
        if not withLengths: return tuple(
-            filter(nonEmpty,self._index.keys())
+            filter(nonEmpty, self._index.keys())
            )
        else: 
            rl=[]
@@ -379,10 +437,8 @@ class UnIndex(Persistent, Implicit):
                else: rl.append((i, len(self._index[i])))
            return tuple(rl)

+    def keyForDocument(self, id):
+        return self._unindex(id)

-    def clear(self):
-        self._index = BTree.BTree()
-        self._unindex = IOBTree.BTree()
-
-
+    def items(self): return self._index.items()

--- a/lib/python/SearchIndex/UnKeywordIndex.py
+++ b/lib/python/SearchIndex/UnKeywordIndex.py
@@ -83,10 +83,10 @@
 # 
 ##############################################################################

-from UnIndex import UnIndex, MV, intSet
+from UnIndex import UnIndex
 from zLOG import LOG, ERROR
-from Missing import MV
-from types import *
+from types import StringType
+from BTrees.OOBTree import OOSet, difference

 class UnKeywordIndex(UnIndex):

@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex):
        # self.id is the name of the index, which is also the name of the
        # attribute we're interested in.  If the attribute is callable,
        # we'll do so.
-        try:
-            newKeywords = getattr(obj, self.id)
+        newKeywords = getattr(obj, self.id, None)
        if callable(newKeywords):
            newKeywords = newKeywords()
-        except AttributeError:
-            newKeywords = MV

        if type(newKeywords) is StringType:
            newKeywords = (newKeywords, )

+        if newKeywords is None:
+            self.unindex_object(documentId)
+            return 0
+
        # Now comes the fun part, we need to figure out what's changed
        # if anything from the previous record.
-        oldKeywords = self._unindex.get(documentId, MV)
+        oldKeywords = self._unindex.get(documentId, None)

-        if newKeywords is MV:
-            self.unindex_object(documentId)
-            return 0
-        elif oldKeywords is MV:
+        if oldKeywords is None:
            try:
                for kw in newKeywords:
                    self.insertForwardIndexEntry(kw, documentId)
            except TypeError:
                return 0
        else:
-            # We need the old keywords to be a mapping so we can manipulate
-            # them more easily.
-            tmp = {}
-            try:
-                for kw in oldKeywords:
-                    tmp[kw] = None
-                    oldKeywords = tmp
-
-                    # Now we're going to go through the new keywords,
-                    # and add those that aren't already indexed.  If
-                    # they are already indexed, just delete them from
-                    # the list.
-                    for kw in newKeywords:
-                        if oldKeywords.has_key(kw):
-                            del oldKeywords[kw]
-                        else:
+            if type(oldKeywords) is not OOSet: oldKeywords=OOSet(oldKeywords)
+            newKeywords=OOSet(newKeywords)
+            self.unindex_objectKeywords(
+                documentId, difference(oldKeywords, newKeywords))
+            for kw in difference(newKeywords, oldKeywords):
                self.insertForwardIndexEntry(kw, documentId)
        
-                    # Now whatever is left in oldKeywords are keywords
-                    # that we no longer have, and need to be removed
-                    # from the indexes.
-                    for kw in oldKeywords.keys():
-                        self.removeForwardIndexEntry(kw, documentId)
-
-            except TypeError:
-                return 0
-        
-        self._unindex[documentId] = newKeywords[:] # Make a copy
+        self._unindex[documentId] = list(newKeywords)

        return 1
    

-    def unindex_object(self, documentId):
+    def unindex_objectKeywords(self, documentId, keywords):
        """ carefully unindex the object with integer id 'documentId'"""

-        keywords = self._unindex.get(documentId, MV)
-        if keywords is MV:
-            return None
+        if keywords is not None:
            for kw in keywords:
                self.removeForwardIndexEntry(kw, documentId)

+    def unindex_object(self, documentId):
+        """ carefully unindex the object with integer id 'documentId'"""
+
+        keywords = self._unindex.get(documentId, None)
+        self.unindex_objectKeywords(documentId, keywords)
+        try:
            del self._unindex[documentId]
+        except KeyError:
+            LOG('UnKeywordIndex', ERROR, 'Attempt to unindex nonexistent'
+                ' document id %s' % documentId)
--- a/lib/python/SearchIndex/UnTextIndex.py
+++ b/lib/python/SearchIndex/UnTextIndex.py
@@ -91,14 +91,11 @@ undo information so that objects can be unindexed when the old value
 is no longer known.
 """

-__version__ = '$Revision: 1.37 $'[11:-2]
+__version__ = '$Revision: 1.38 $'[11:-2]

-
-import BTree, IIBTree, IOBTree, OIBTree
 import string, regex, regsub, ts_regex
 import operator

-from intSet import intSet
 from Globals import Persistent
 from Acquisition import Implicit
 from Splitter import Splitter
@@ -107,10 +104,11 @@ from Lexicon import Lexicon
 from ResultList import ResultList
 from types import *

-BTree = BTree.BTree                     # Regular generic BTree
-IOBTree = IOBTree.BTree                 # Integer -> Object 
-IIBucket = IIBTree.Bucket               # Integer -> Integer
-OIBTree = OIBTree.BTree                 # Object -> Integer
+from BTrees.IOBTree import IOBTree
+from BTrees.OIBTree import OIBTree
+from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet
+from BTrees.IIBTree import difference, weightedIntersection
+

 AndNot = 'andnot'
 And = 'and'
@@ -141,7 +139,7 @@ class UnTextIndex(Persistent, Implicit):
    meta_type = 'Text Index'


-    def __init__(self, id=None, ignore_ex=None,
+    def __init__(self, id, ignore_ex=None,
                 call_methods=None, lexicon=None):
        """Create an index

@@ -159,15 +157,11 @@ class UnTextIndex(Persistent, Implicit):
          'lexicon' is the lexicon object to specify, if None, the
          index will use a private lexicon."""
        
-        if not id == ignore_ex == call_methods == None:
        self.id = id
        self.ignore_ex = ignore_ex
        self.call_methods = call_methods
-            self._index = IOBTree()
-            self._unindex = IOBTree()

-        else:
-            pass
+        self.clear()
        
        if lexicon is None:
            ## if no lexicon is provided, create a default one
@@ -185,32 +179,55 @@ class UnTextIndex(Persistent, Implicit):
        in this way, but I don't see too much of a problem with it."""

        if type(vocab_id) is not StringType:
-            vocab = vocab_id
+            return vocab_id
        else:
            vocab = getattr(self, vocab_id)
            return vocab.lexicon

+    def __nonzero__(self):
+        return not not self._unindex
    
-    def __len__(self):
-        """Return the number of objects indexed."""
-
-        return len(self._unindex)
+    # Too expensive
+    #def __len__(self):
+    #    """Return the number of objects indexed."""
+    #    return len(self._unindex)


    def clear(self):
        """Reinitialize the text index."""
-        
        self._index = IOBTree()
        self._unindex = IOBTree()

+    def _convertBTrees(self, threshold=200):
+        if type(self._index) is IOBTree: return
+
+        from BTrees.convert import convert
+
+        _index=self._index
+        self._index=IOBTree()

-    def histogram(self):
+        def convertScores(scores,
+                          type=type, TupleType=TupleType, IIBTree=IIBTree
+                          ):
+            if type(scores) is not TupleType and type(scores) is not IIBTree():
+                scores=IIBTree(scores)
+            return scores
+                
+
+        convert(_index, self._index, threshold, convertScores)
+
+        _unindex=self._unindex
+        self._unindex=IOBTree()
+        convert(_unindex, self._unindex, threshold)
+
+    def histogram(self, type=type, TupleType=type(())):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index."""

-        histogram = {}
+        histogram = IIBucket()
        for (key, value) in self._index.items():
-            entry = len(value)
+            if type(value) is TupleType: entry=1
+            else: entry = len(value)
            histogram[entry] = histogram.get(entry, 0) + 1

        return histogram
@@ -227,13 +244,8 @@ class UnTextIndex(Persistent, Implicit):
        if results is None:
            return default
        else:
-            # Now that we've got them, let's resolve out the word
-            # references
-            resolved = []
-            for (word, wordId) in wordMap:
-                if wordId in results:
-                    resolved.append(word)
-            return tuple(resolved)
+            return tuple(map(self.getLexicon(self._lexicon).getWord,
+                             results))
        
            
    def insertForwardIndexEntry(self, entry, documentId, score=1):
@@ -247,7 +259,8 @@ class UnTextIndex(Persistent, Implicit):
            5+     bucket.
        """

-        indexRow = self._index.get(entry, None)
+        index=self._index
+        indexRow = index.get(entry, None)

        if indexRow is not None:
            if type(indexRow) is TupleType:
@@ -260,78 +273,29 @@ class UnTextIndex(Persistent, Implicit):
                if indexRow[0] == documentId:
                    if indexRow[1] != score:
                        indexRow = (documentId, score)
+                        index[entry] = indexRow
                else:
-                    indexRow = { indexRow[0]: indexRow[1] }
-                    indexRow[documentId] = score
-                    self._index[entry] = indexRow
-            elif type(indexRow) is DictType:
-                if indexRow.has_key(documentId):
-                    if indexRow[documentId] == score:
-                        return 1    # No need to update
-                elif len(indexRow) > 4:
-                    # We have a mapping (dictionary), but it has
-                    # grown too large, so we'll convert it to a
-                    # bucket.
-                    newRow = IIBucket()
-                    for (k, v) in indexRow.items():
-                        newRow[k] = v
-                    indexRow = newRow
-                    indexRow[documentId] = score
-                    self._index[entry] = indexRow
+                    indexRow={
+                        indexRow[0]: indexRow[1],
+                        documentId: score,
+                        }
+                    index[entry] = indexRow
            else:
+                if indexRow.get(documentId, -1) != score:
+                    # score changed (or new entry)
+                    
+                    if type(indexRow) is DictType:
                        indexRow[documentId] = score
+                        if len(indexRow) > 3:
+                            # Big enough to give it's own database record
+                            indexRow=IIBTree(indexRow) 
+                        index[entry] = indexRow
                    else:
-                # We've got a IIBucket already.
-                if indexRow.has_key(documentId):
-                    if indexRow[documentId] == score:
-                        return 1
                        indexRow[documentId] = score
        else:
            # We don't have any information at this point, so we'll
            # put our first entry in, and use a tuple to save space
-            self._index[entry] = (documentId, score)
-        return 1
-
-
-    def insertReverseIndexEntry(self, entry, documentId):
-        """Insert the correct entry into the reverse indexes for future
-        unindexing."""
-
-        newRow = self._unindex.get(documentId, [])
-        if newRow:
-            # Catch cases where we don't need to modify anything
-            if entry in newRow:
-                return 1
-        newRow.append(entry)
-        self._unindex[documentId] = newRow
-
-
-    def removeReverseEntry(self, entry, documentId):
-        """Removes a single entry from the reverse index."""
-
-        newRow = self._unindex.get(documentId, [])
-        if newRow:
-            try:
-                newRow.remove(entry)
-            except ValueError:
-                pass                    # We don't have it, this is bad
-        self._unindex[documentId] = newRow
-
-
-    def removeForwardEntry(self, entry, documentId):
-        """Remove a single entry from the forward index."""
-
-        currentRow = self._index.get(entry, None)
-        if type(currentRow) is TupleType:
-            del self._index[entry]
-        elif currentRow is not None:
-            try:
-                del self._index[entry][documentId]
-            except (KeyError, IndexError, TypeError):
-                LOG('UnTextIndex', ERROR,
-                    'unindex_object tried to unindex nonexistent'
-                    ' document %s' % str(documentId))
-
+            index[entry] = (documentId, score)

    def index_object(self, documentId, obj, threshold=None):
        """ Index an object:
@@ -354,49 +318,58 @@ class UnTextIndex(Persistent, Implicit):
        except AttributeError:
            return 0
        
+        lexicon = self.getLexicon(self._lexicon)
+        splitter=lexicon.Splitter

-        sourceWords = self.getLexicon(self._lexicon).Splitter(source)
-
-        wordList = OIBTree()
+        wordScores = OIBTree()
        last = None
        
        # Run through the words and score them
-        for word in sourceWords:
+        for word in splitter(source):
            if word[0] == '\"':
-                last = self.subindex(word[1:-1], wordList,
-                                     wordList.has_key, last) # XXX
-            else:
-                if wordList.has_key(word):
-                    if word != last:
-                        wordList[word] = wordList[word]+1
+                last = self._subindex(word[1:-1], wordScores, last, splitter)
            else:
-                    wordList[word] = 1
+                if word==last: continue
+                last=word
+                wordScores[word]=wordScores.get(word,0)+1

-        lexicon = self.getLexicon(self._lexicon)
-        currentWordIds = self._unindex.get(documentId, [])
-        wordCount = 0
+        # Convert scores to use wids:
+        widScores=IIBucket()
+        getWid=lexicon.getWordId
+        for word, score in wordScores.items():
+            widScores[getWid(word)]=score
+
+        del wordScores
+
+        currentWids=IISet(self._unindex.get(documentId, []))

-        # First deal with deleted words
-        # To do this, the first thing we have to do is convert the
-        # existing words to words, from wordIDS
-        wordListAsIds = OIBTree()
-        for word, score in wordList.items():
-            wordListAsIds[lexicon.getWordId(word)] = score
+        # Get rid of document words that are no longer indexed
+        self.unindex_objectWids(documentId, difference(currentWids, widScores))
        
-        for word in currentWordIds:
-            if not wordListAsIds.has_key(word):
-                self.removeForwardEntry(word, documentId)
+        # Now index the words. Note that the new xIBTrees are clever
+        # enough to do nothing when there isn't a change. Woo hoo.
+        insert=self.insertForwardIndexEntry
+        for wid, score in widScores.items():
+            insert(wid, documentId, score)

-        #import pdb; pdb.set_trace()
-        # Now we can deal with new/updated entries
-        for wordId, score in wordListAsIds.items():
-            self.insertForwardIndexEntry(wordId, documentId, score)
-            self.insertReverseIndexEntry(wordId, documentId)
-            wordCount = wordCount + 1
+        # Save the unindexing info if it's changed:
+        wids=widScores.keys()
+        if wids != currentWids.keys():
+            self._unindex[documentId]=wids

-        # Return the number of words you indexed
-        return wordCount
+        return len(wids)

+    def _subindex(self, source, wordScores, last, splitter):
+        """Recursively handle multi-word synonyms"""
+        for word in splitter(source):
+            if word[0] == '\"':
+                last = self._subindex(word[1:-1], wordScores, last, splitter)
+            else:
+                if word==last: continue
+                last=word
+                wordScores[word]=wordScores.get(word,0)+1
+
+        return last

    def unindex_object(self, i): 
        """ carefully unindex document with integer id 'i' from the text
@@ -404,21 +377,41 @@ class UnTextIndex(Persistent, Implicit):
        
        index = self._index
        unindex = self._unindex
-        val = unindex.get(i, None)
-        if val is not None:
-            for n in val:
-                v = index.get(n, None)
-                if type(v) is TupleType:
-                    del index[n]
-                elif v is not None:
+        wids = unindex.get(i, None)
+        if wids is not None:
+            self.unindex_objectWids(i, wids)
+            del unindex[i]
+
+    def unindex_objectWids(self, i, wids): 
+        """ carefully unindex document with integer id 'i' from the text
+        index and do not fail if it does not exist """
+
+        index = self._index
+        get=index.get
+        for wid in wids:
+            widScores = get(wid, None)
+            if widScores is None:
+                LOG('UnTextIndex', ERROR,
+                    'unindex_object tried to unindex nonexistent'
+                    ' document, wid  %s, %s' % (i,wid))
+                continue
+            if type(widScores) is TupleType:
+                del index[wid]
+            else:
                try:
-                        del index[n][i]
+                    del widScores[i]
+                    if widScores:
+                        if type(widScores) is DictType:
+                            if len(widScores) == 1:
+                                # convert to tuple
+                                widScores = widScores.items()[0]
+                            index[wid]=widScores
+                    else:
+                        del index[wid]
                except (KeyError, IndexError, TypeError):
                    LOG('UnTextIndex', ERROR,
                        'unindex_object tried to unindex nonexistent'
                        ' document %s' % str(i))
-            del unindex[i]
-

    def __getitem__(self, word):
        """Return an InvertedIndex-style result "list"
@@ -442,12 +435,13 @@ class UnTextIndex(Persistent, Implicit):
                if splitSource[:1] == '"' and splitSource[-1:] == '"':
                    return self[splitSource]

-                r = self._index.get(
-                     self.getLexicon(self._lexicon).get(splitSource)[0],
-                     None)
-
+                wids=self.getLexicon(self._lexicon).get(splitSource)
+                if wids:
+                    r = self._index.get(wids[0], None)
                    if r is None:
                        r = {}
+                else:
+                    r={}

                return ResultList(r, (splitSource,), self)

@@ -486,6 +480,7 @@ class UnTextIndex(Persistent, Implicit):
            if not keys or not string.strip(keys):
                return None
            keys = [keys]
+            
        r = None
        
        for key in keys:
@@ -493,21 +488,12 @@ class UnTextIndex(Persistent, Implicit):
            if not key:
                continue

-            rr = IIBucket()
-            try:
-                 for i, score in self.query(key).items():
-                    if score:
-                        rr[i] = score
-            except KeyError:
-                pass
-            if r is None:
-                r = rr
-            else:
-                # Note that we *and*/*narrow* multiple search terms.
-                r = r.intersection(rr) 
+            b = self.query(key).bucket()
+            w, r = weightedIntersection(r, b)

        if r is not None:
            return r, (self.id,)
+        
        return (IIBucket(), (self.id,))


@@ -533,19 +519,6 @@ class UnTextIndex(Persistent, Implicit):
        return r


-    def _subindex(self, isrc, d, old, last):
-        src = self.getLexicon(self._lexicon).Splitter(isrc)  
-
-        for s in src:
-            if s[0] == '\"':
-                last = self.subindex(s[1:-1],d,old,last)
-            else:
-                if old(s):
-                    if s != last: d[s] = d[s]+1
-                else: d[s] = 1
-
-        return last
-

    def query(self, s, default_operator=Or, ws=(string.whitespace,)):
        """ This is called by TextIndexes.  A 'query term' which is a
@@ -565,7 +538,6 @@ class UnTextIndex(Persistent, Implicit):
        ## For example, substitute wildcards, or translate words into
        ## various languages.
        q = self.getLexicon(self._lexicon).query_hook(q)
-        
        # do some more parsing
        q = parse2(q, default_operator)


--- a/lib/python/SearchIndex/randid.py
+++ b/lib/python/SearchIndex/randid.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+#############################################################################
+
+import whrandom
+
+def randid(randint=whrandom.randint, choice=whrandom.choice, signs=(-1,1)):
+    return choice(signs)*randint(1,2000000000)
+
+del whrandom
--- a/lib/python/SearchIndex/tests/__init__.py
+++ b/lib/python/SearchIndex/tests/__init__.py
+# This helps debugging.
--- a/lib/python/SearchIndex/tests/testSplitter.py
+++ b/lib/python/SearchIndex/tests/testSplitter.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+
+import sys
+
+try: import ZODB
+except:
+    import os
+    sys.path.insert(0, os.getcwd())
+    sys.path.insert(0, '../..')
+    import ZODB
+
+import unittest
+from SearchIndex.Splitter import Splitter
+
+class TestSplitter(unittest.TestCase):
+   def testSplitNormalText(self):
+       text = 'this is a long string of words'
+       a = Splitter(text)
+       r = map(None, a)
+       assert r == ['this', 'is', 'long', 'string', 'of', 'words']
+
+   def testDropNumeric(self):
+       text = '123 456 789 foobar without you nothing'
+       a = Splitter(text)
+       r = map(None, a)
+       assert r == ['foobar', 'without', 'you', 'nothing'], r
+       
+   def testDropSingleLetterWords(self):
+       text = 'without you I nothing'
+       a = Splitter(text)
+       r = map(None, a)
+       assert r == ['without', 'you', 'nothing'], r
+       
+   def testSplitOnNonAlpha(self):
+       text = 'without you I\'m nothing'
+       a = Splitter(text)
+       r = map(None, a)
+       assert r == ['without', 'you', 'nothing'], r
+       
+def test_suite():
+   return unittest.makeSuite(TestSplitter, 'test')
+
+def main():
+   unittest.TextTestRunner().run(test_suite())
+
+def debug():
+   test_suite().debug()
+
+def pdebug():
+    import pdb
+    pdb.run('debug()')
+   
+if __name__=='__main__':
+   if len(sys.argv) > 1:
+      globals()[sys.argv[1]]()
+   else:
+      main()
+
--- a/lib/python/SearchIndex/tests/testUnKeywordIndex.py
+++ b/lib/python/SearchIndex/tests/testUnKeywordIndex.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+import os, sys
+        
+sys.path.insert(0, os.getcwd())
+try: import unittest
+except:
+    sys.path[0]=os.path.join(sys.path[0],'..','..')
+    import unittest
+
+import ZODB
+from SearchIndex.UnKeywordIndex import UnKeywordIndex
+
+class Dummy:
+
+    def __init__( self, foo ):
+        self._foo = foo
+
+    def foo( self ):
+        return self._foo
+    
+    def __str__( self ):
+        return '<Dummy: %s>' % self._foo
+    
+    __repr__ = __str__
+
+class TestCase( unittest.TestCase ):
+    """
+        Test KeywordIndex objects.
+    """
+
+    def setUp( self ):
+        """
+        """
+        self._index = UnKeywordIndex( 'foo' )
+        self._marker = []
+        self._values = [ ( 0, Dummy( ['a'] ) )
+                       , ( 1, Dummy( ['a','b'] ) )
+                       , ( 2, Dummy( ['a','b','c'] ) )
+                       , ( 3, Dummy( ['a','b','c', 'a'] ) )
+                       , ( 4, Dummy( ['a', 'b', 'c', 'd'] ) )
+                       , ( 5, Dummy( ['a', 'b', 'c', 'e'] ) )
+                       , ( 6, Dummy( ['a', 'b', 'c', 'e', 'f'] ))
+                       , ( 7, Dummy( [0] ) ) 
+                       ]
+        self._noop_req  = { 'bar': 123 }
+        self._all_req = { 'foo': ['a'] }
+        self._some_req = { 'foo': ['e'] }
+        self._overlap_req = { 'foo': ['c', 'e'] }
+        self._string_req = {'foo': 'a'}
+        self._zero_req  = { 'foo': [0] }
+
+    def tearDown( self ):
+        """
+        """
+
+    def _populateIndex( self ):
+        for k, v in self._values:
+            self._index.index_object( k, v )
+    
+    def _checkApply( self, req, expectedValues ):
+        result, used = self._index._apply_index( req )
+        assert used == ( 'foo', )
+        try:
+            length = len(result)
+        except:
+            result = result.keys()
+            length = len(result)
+        assert length == len( expectedValues ), \
+          '%s | %s' % ( map( None, result ),
+                        map(lambda x: x[0], expectedValues ))
+        for k, v in expectedValues:
+            assert k in result
+    
+    def testEmpty( self ):
+        assert len( self._index ) == 0
+        assert len( self._index.referencedObjects() ) == 0
+
+        assert self._index.getEntryForObject( 1234 ) is None
+        assert ( self._index.getEntryForObject( 1234, self._marker )
+                  is self._marker ), self._index.getEntryForObject(1234)
+        self._index.unindex_object( 1234 ) # nothrow
+
+        assert self._index.hasUniqueValuesFor( 'foo' )
+        assert not self._index.hasUniqueValuesFor( 'bar' )
+        assert len( self._index.uniqueValues( 'foo' ) ) == 0
+
+        assert self._index._apply_index( self._noop_req ) is None
+        self._checkApply( self._all_req, [] )
+        self._checkApply( self._some_req, [] )
+        self._checkApply( self._overlap_req, [] )
+        self._checkApply( self._string_req, [] )
+        
+    def testPopulated( self ):
+        self._populateIndex()
+        values = self._values
+
+        #assert len( self._index ) == len( values )
+        assert len( self._index.referencedObjects() ) == len( values )
+
+        assert self._index.getEntryForObject( 1234 ) is None
+        assert ( self._index.getEntryForObject( 1234, self._marker )
+                  is self._marker )
+        self._index.unindex_object( 1234 ) # nothrow
+
+        for k, v in values:
+            assert self._index.getEntryForObject( k ) == v.foo()
+
+        assert (len( self._index.uniqueValues( 'foo' ) ) == len( values )-1,
+                len(values)-1)
+
+        assert self._index._apply_index( self._noop_req ) is None
+
+        self._checkApply( self._all_req, values[:-1])
+        self._checkApply( self._some_req, values[ 5:7 ] )
+        self._checkApply( self._overlap_req, values[2:7] )
+        self._checkApply( self._string_req, values[:-1] )
+
+    def testZero( self ):
+        self._populateIndex()
+        values = self._values
+        self._checkApply( self._zero_req, values[ -1: ] )
+        assert 0 in self._index.uniqueValues( 'foo' )
+
+
+def test_suite():
+    return unittest.makeSuite( TestCase )
+
+
+if __name__ == '__main__':
+    unittest.TextTestRunner().run( test_suite() )
+    
--- a/lib/python/SearchIndex/tests/testUnTextIndex.py
+++ b/lib/python/SearchIndex/tests/testUnTextIndex.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+
+import sys, os
+
+sys.path.insert(0, os.getcwd())
+try: import unittest
+except:
+    sys.path[0]=os.path.join(sys.path[0],'..','..')
+    import unittest
+
+class Dummy:
+
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+
+import zLOG
+
+def log_write(subsystem, severity, summary, detail, error):
+    if severity >= zLOG.PROBLEM:
+        assert 0, "%s(%s): %s" % (subsystem, severity, summary)
+
+zLOG.log_write=log_write
+
+import ZODB, ZODB.DemoStorage, ZODB.FileStorage
+import SearchIndex.UnTextIndex
+import SearchIndex.GlobbingLexicon
+
+class Tests(unittest.TestCase):
+
+   def setUp(self):
+       self.index=SearchIndex.UnTextIndex.UnTextIndex('text')
+       self.doc=Dummy(text='this is the time, when all good zopes')
+
+   def dbopen(self):
+       n = 'fs_tmp__%s' % os.getpid()
+       s = ZODB.FileStorage.FileStorage(n)
+       db=self.db=ZODB.DB(s)
+       self.jar=db.open()
+       if not self.jar.root().has_key('index'):
+           self.jar.root()['index']=SearchIndex.UnTextIndex.UnTextIndex('text')
+           get_transaction().commit()
+       return self.jar.root()['index']
+
+   def dbclose(self):
+       self.jar.close()
+       self.db.close()
+       del self.jar
+       del self.db
+
+   def tearDown(self):
+       get_transaction().abort()
+       if hasattr(self, 'jar'):
+           self.dbclose()
+           os.system('rm -f fs_tmp__*')
+
+   def checkSimpleAddDelete(self):
+       "Check that we can add and delete an object without error"
+       self.index.index_object(0, self.doc)
+       self.index.index_object(1, self.doc)
+       self.doc.text='spam is good, spam is fine, span span span'
+       self.index.index_object(0, self.doc)
+       self.index.unindex_object(0)
+
+   def checkPersistentUpdate1(self):
+       "Check simple persistent indexing"
+       index=self.dbopen()
+
+       self.doc.text='this is the time, when all good zopes'
+       index.index_object(0, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time waits for no one'
+       index.index_object(1, self.doc)
+       get_transaction().commit()
+       self.dbclose()
+
+       index=self.dbopen()
+       
+       r = index._apply_index({})
+       assert r==None
+
+       r = index._apply_index({'text': 'python'})
+       assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
+       assert not r[0], "should have no results"
+               
+       r = index._apply_index({'text': 'time'})
+       r=list(r[0].keys())
+       assert  r == [0,1], r
+
+   def checkPersistentUpdate2(self):
+       "Check less simple persistent indexing"
+       index=self.dbopen()
+
+       self.doc.text='this is the time, when all good zopes'
+       index.index_object(0, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time waits for no one'
+       index.index_object(1, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='the next task is to test'
+       index.index_object(3, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time time'
+       index.index_object(2, self.doc)
+       get_transaction().commit()
+       self.dbclose()
+
+       index=self.dbopen()
+       
+       r = index._apply_index({})
+       assert r==None
+
+       r = index._apply_index({'text': 'python'})
+       assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
+       assert not r[0], "should have no results"
+               
+       r = index._apply_index({'text': 'time'})
+       r=list(r[0].keys())
+       assert  r == [0,1,2], r
+
+
+
+   sample_texts = [
+       """This is the time for all good men to come to
+       the aid of their country""",
+       """ask not what your country can do for you,
+       ask what you can do for your country""",
+       """Man, I can't wait to get to Montross!""",
+       """Zope Public License (ZPL) Version 1.0""",
+       """Copyright (c) Digital Creations.  All rights reserved.""",
+       """This license has been certified as Open Source(tm).""",
+       """I hope I get to work on time""",
+       ]
+       
+   def checkGlobQuery(self):
+       "Check a glob query"
+       index=self.dbopen()
+       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
+
+       for i in range(len(self.sample_texts)):
+           self.doc.text=self.sample_texts[i]
+           index.index_object(i, self.doc)
+           get_transaction().commit()
+
+       self.dbclose()
+
+       index=self.dbopen()
+
+       r = index._apply_index({'text':'m*n'})
+       r=list(r[0].keys())
+       assert  r == [0,2], r
+
+   def checkAndQuery(self):
+       "Check an AND query"
+       index=self.dbopen()
+       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
+
+       for i in range(len(self.sample_texts)):
+           self.doc.text=self.sample_texts[i]
+           index.index_object(i, self.doc)
+           get_transaction().commit()
+
+       self.dbclose()
+
+       index=self.dbopen()
+
+       r = index._apply_index({'text':'time and country'})
+       r=list(r[0].keys())
+       assert  r == [0,], r
+
+   def checkOrQuery(self):
+       "Check an OR query"
+       index=self.dbopen()
+       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
+
+       for i in range(len(self.sample_texts)):
+           self.doc.text=self.sample_texts[i]
+           index.index_object(i, self.doc)
+           get_transaction().commit()
+
+       self.dbclose()
+
+       index=self.dbopen()
+
+       r = index._apply_index({'text':'time or country'})
+       r=list(r[0].keys())
+       assert  r == [0,1,6], r
+
+def test_suite():
+   return unittest.makeSuite(Tests, 'check')
+
+def main():
+   unittest.TextTestRunner().run(test_suite())
+
+def debug():
+   test_suite().debug()
+
+def pdebug():
+    import pdb
+    pdb.run('debug()')
+   
+if __name__=='__main__':
+   if len(sys.argv) > 1:
+      globals()[sys.argv[1]]()
+   else:
+      main()
+
--- a/lib/python/SearchIndex/tests/test_UnIndex.py
+++ b/lib/python/SearchIndex/tests/test_UnIndex.py
@@ -82,7 +82,16 @@
 # attributions are listed in the accompanying credits file.
 # 
 ##############################################################################
-import Zope
+
+import sys
+sys.path.insert(0, '.')
+try:
+    import Testing
+except ImportError:
+    sys.path[0] = '../../'
+    import Testing
+
+import ZODB
 import unittest
 from SearchIndex.UnIndex import UnIndex

@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ):
                       , ( 5, Dummy( 'abce' ) )
                       , ( 6, Dummy( 'abce' ) )
                       , ( 7, Dummy( 0 ) ) #  Collector #1959
-                       ]
+                       , ( 8, Dummy(None) )]
        self._forward = {}
        self._backward = {}
        for k, v in self._values:
@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ):
                          , 'foo_usage': 'range:min:max'
                          }
        self._zero_req  = { 'foo': 0 }
+        self._none_req  = { 'foo': None }


    def tearDown( self ):
@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ):
    
    def _checkApply( self, req, expectedValues ):
        result, used = self._index._apply_index( req )
+        if hasattr(result, 'keys'):
+            result = result.keys()
        assert used == ( 'foo', )
        assert len( result ) == len( expectedValues ), \
          '%s | %s' % ( map( None, result ), expectedValues )
@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ):
        self._checkApply( self._range_req, [] )
    
    def testPopulated( self ):
+        """ Test a populated FieldIndex """
        self._populateIndex()
        values = self._values

-        assert len( self._index ) == len( values )
+        assert len( self._index ) == len( values )-1 #'abce' is duplicate
        assert len( self._index.referencedObjects() ) == len( values )

        assert self._index.getEntryForObject( 1234 ) is None
@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ):

        assert self._index._apply_index( self._noop_req ) is None

-        self._checkApply( self._request, values[ -3:-1 ] )
-        self._checkApply( self._min_req, values[ 2:-1 ] )
-        self._checkApply( self._max_req, values[ :3 ] + values[ -1: ] )
+        self._checkApply( self._request, values[ -4:-2 ] )
+        self._checkApply( self._min_req, values[ 2:-2 ] )
+        self._checkApply( self._max_req, values[ :3 ] + values[ -2: ] )
        self._checkApply( self._range_req, values[ 2:5 ] )

    def testZero( self ):
+        """ Make sure 0 gets indexed """
        self._populateIndex()
        values = self._values
-        self._checkApply( self._zero_req, values[ -1: ] )
+        self._checkApply( self._zero_req, values[ -2:-1 ] )
        assert 0 in self._index.uniqueValues( 'foo' )

+    def testNone(self):
+        """ make sure None gets indexed """
+        self._populateIndex()
+        values = self._values
+        self._checkApply(self._none_req, values[-1:])
+        assert None in self._index.uniqueValues('foo')
+
+    def testRange(self):
+        """Test a range search"""
+        index = UnIndex( 'foo' )
+        for i in range(100):
+            index.index_object(i, Dummy(i%10))
+
+        r=index._apply_index({
+            'foo_usage': 'range:min:max',
+            'foo': [-99, 3]})
+
+        assert tuple(r[1])==('foo',), r[1]
+        r=list(r[0].keys())
+
+        expect=[
+            0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33,
+            40, 41, 42, 43, 50, 51, 52, 53, 60, 61, 62, 63, 70, 71, 72, 73,
+            80, 81, 82, 83, 90, 91, 92, 93
+            ]
+        
+        assert r==expect, r 
+            
        
 def test_suite():
    return unittest.makeSuite( TestCase )

+def debug():
+    return test_suite().debug()

-if __name__ == '__main__':
+def pdebug():
+    import pdb
+    pdb.run('debug()')
+
+def main():
    unittest.TextTestRunner().run( test_suite() )
+
+if __name__ == '__main__':
+   if len(sys.argv) > 1:
+      globals()[sys.argv[1]]()
+   else:
+      main()
--- a/lib/python/Testing/__init__.py
+++ b/lib/python/Testing/__init__.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+"""
+Set up testing environment
+
+$Id: __init__.py,v 1.2 2001/03/15 13:16:25 jim Exp $
+"""
+import os, sys
+startfrom = head = os.getcwd()
+
+while 1:
+    sys.path[0]=startfrom
+    try:
+        import ZODB
+    except ImportError:
+        head = os.path.split(startfrom)[0]
+        if head == '':
+            raise "Couldn't import ZODB"
+        startfrom = head
+        continue
+    else:
+        break
+
+os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', startfrom)
+
+os.environ['INSTANCE_HOME']=os.environ.get(
+    'INSTANCE_HOME',
+    os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
+    )
+
+
--- a/lib/python/Testing/dispatcher.py
+++ b/lib/python/Testing/dispatcher.py
+#!/usr/bin/env python1.5
+
+
+# Dispatcher for usage inside Zope test environment
+# Digital Creations
+
+__version__ = '$Id: dispatcher.py,v 1.2 2001/03/15 13:16:25 jim Exp $'
+
+
+import os,sys,re,string
+import threading,time,commands,profile
+
+
+class Dispatcher:
+
+    """ 
+    a multi-purpose thread dispatcher 
+    """
+    
+    def __init__(self,func=''): 
+        self.fp = sys.stderr
+        self.f_startup = []
+        self.f_teardown = []
+        self.lastlog = ""
+        self.lock 	    = threading.Lock()
+        self.func = func
+        self.profiling = 0
+
+        self.doc = getattr(self,self.func).__doc__
+        
+    def setlog(self,fp):
+        self.fp = fp
+        
+    def log(self,s):
+        if s==self.lastlog: return
+        self.fp.write(s)
+        self.fp.flush()
+        self.lastlog=s
+        
+    def logn(self,s):
+        if s==self.lastlog: return
+        self.fp.write(s + '\n')
+        self.fp.flush()
+        self.lastlog=s
+
+
+    def profiling_on():
+        self.profiling = 1
+
+    def profiling_off():
+        self.profiling = 0
+        
+        
+    def  dispatcher(self,name='', *params):
+        """ dispatcher for threads 
+        The dispatcher expects one or several tupels:
+        (functionname, number of threads to start , args, keyword args)
+        """
+        
+        self.mem_usage  = [-1]
+
+        mem_watcher = threading.Thread(None,self.mem_watcher,name='memwatcher')
+        mem_watcher.start()
+        
+        self.start_test = time.time()
+        self.name 		= name
+        self.th_data    = {}
+        self.runtime    = {}
+        self._threads   = []
+        s2s=self.s2s
+        
+        
+        for func,numthreads,args,kw in params:
+            f = getattr(self,func)
+            
+            for i in range(0,numthreads):
+                kw['t_func'] = func
+                th = threading.Thread(None,self.worker,name="TH_%s_%03d" % (func,i) ,args=args,kwargs=kw)
+                self._threads.append(th)
+                
+        for th in self._threads: 			th.start()
+        while threading.activeCount() > 1: time.sleep(1)
+        
+        self.logn('ID: %s ' % self.name)
+        self.logn('FUNC: %s ' % self.func)
+        self.logn('DOC: %s ' % self.doc)
+        self.logn('Args: %s' % params)
+        
+        for th in self._threads:
+            self.logn( '%-30s ........................ %9.3f sec' % (th.getName(), self.runtime[th.getName()]) )
+            for k,v in self.th_data[th.getName()].items():
+                self.logn ('%-30s  %-15s = %s' % (' ',k,v) )
+                
+               
+        self.logn("") 
+        self.logn('Complete running time:                                  %9.3f sec' % (time.time()-self.start_test) )
+        if len(self.mem_usage)>1: self.mem_usage.remove(-1)
+        self.logn( "Memory: start: %s, end: %s, low: %s, high: %s" %  \
+                        (s2s(self.mem_usage[0]),s2s(self.mem_usage[-1]),s2s(min(self.mem_usage)), s2s(max(self.mem_usage))))
+        self.logn('')
+        
+        
+    def worker(self,*args,**kw):
+    
+        for func in self.f_startup: f = getattr(self,func)()
+        
+        t_func = getattr(self,kw['t_func'])
+        del kw['t_func']
+        
+        ts = time.time()
+        apply(t_func,args,kw)			
+        te = time.time()
+        
+        for func in self.f_teardown: getattr(self,func)()
+        
+        
+        
+    def th_setup(self):
+        """ initalize thread with some environment data """
+        
+        env = {'start': time.time()
+                  }
+        return env
+        
+        
+    def th_teardown(self,env,**kw):
+        """ famous last actions of thread """
+        
+        self.lock.acquire()
+        self.th_data[ threading.currentThread().getName() ]   = kw
+        self.runtime  [ threading.currentThread().getName() ] = time.time() - env['start']
+        self.lock.release()
+        
+        
+    def getmem(self):
+        """ try to determine the current memory usage """
+       
+        if not sys.platform in ['linux2']: return None
+        cmd = '/bin/ps --no-headers -o pid,vsize --pid %s' % os.getpid()
+        outp = commands.getoutput(cmd)
+        pid,vsize = filter(lambda x: x!="" , string.split(outp," ") )
+
+        data = open("/proc/%d/statm" % os.getpid()).read()
+        fields = re.split(" ",data)
+        mem = string.atoi(fields[0]) * 4096
+
+       
+        return mem
+        
+        
+    def mem_watcher(self):
+        """ thread for watching memory usage """
+      
+        running = 1 
+
+        while running ==1:
+            self.mem_usage.append( self.getmem() )
+            time.sleep(1)
+            if threading.activeCount() == 2: running = 0
+            
+            
+    def register_startup(self,func):
+        self.f_startup.append(func)
+        
+    def register_teardown(self,func):
+        self.f_teardown.append(func)
+        
+
+    def s2s(self,n):
+        import math
+        if n <1024.0: return "%8.3lf Bytes" % n
+        if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+        if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+        else: return n
+
+if __name__=="__main__":        
+
+    d=Dispatcher()
+    print d.getmem()
+    pass
+        
--- a/lib/python/Testing/makerequest.py
+++ b/lib/python/Testing/makerequest.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+"""
+Facilitates unit tests which requires an acquirable REQUEST from
+ZODB objects
+
+Usage:
+
+    import makerequest
+    app = makerequest.makerequest(Zope.app())
+
+$Id: makerequest.py,v 1.2 2001/03/15 13:16:25 jim Exp $
+
+"""
+
+import os
+from os import environ
+from sys import stdin
+from ZPublisher.HTTPRequest import HTTPRequest
+from ZPublisher.HTTPResponse import HTTPResponse
+from ZPublisher.BaseRequest import RequestContainer
+
+def makerequest(app):
+    resp = HTTPResponse()
+    environ['SERVER_NAME']='foo'
+    environ['SERVER_PORT']='80'
+    environ['REQUEST_METHOD'] = 'GET'
+    req = HTTPRequest(stdin, environ, resp)
+    return app.__of__(RequestContainer(REQUEST = req))
--- a/lib/python/Testing/unittest.py
+++ b/lib/python/Testing/unittest.py
+#!/usr/bin/env python
+"""
+Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's
+Smalltalk testing framework.
+
+Further information is available in the bundled documentation, and from
+
+  http://pyunit.sourceforge.net/
+
+This module contains the core framework classes that form the basis of
+specific test cases and suites (TestCase, TestSuite etc.), and also a
+text-based utility class for running the tests and reporting the results
+(TextTestRunner).
+
+Copyright (c) 1999, 2000, 2001 Steve Purcell
+This module is free software, and you may redistribute it and/or modify
+it under the same terms as Python itself, so long as this copyright message
+and disclaimer are retained in their original form.
+
+IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE.  THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
+AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
+SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+"""
+
+__author__ = "Steve Purcell"
+__email__ = "stephen_purcell@yahoo.com"
+__version__ = "$Revision: 1.2 $"[11:-2]
+
+import time
+import sys
+import traceback
+import string
+import os
+
+##############################################################################
+# A platform-specific concession to help the code work for JPython users
+##############################################################################
+
+plat = string.lower(sys.platform)
+_isJPython = string.find(plat, 'java') >= 0 or string.find(plat, 'jdk') >= 0
+del plat
+
+
+##############################################################################
+# Test framework core
+##############################################################################
+
+class TestResult:
+    """Holder for test result information.
+
+    Test results are automatically managed by the TestCase and TestSuite
+    classes, and do not need to be explicitly manipulated by writers of tests.
+
+    Each instance holds the total number of tests run, and collections of
+    failures and errors that occurred among those test runs. The collections
+    contain tuples of (testcase, exceptioninfo), where exceptioninfo is a
+    tuple of values as returned by sys.exc_info().
+    """
+    def __init__(self,args=(),kw={}):
+        self.failures = []
+        self.errors = []
+        self.testsRun = 0
+        self.shouldStop = 0
+        self.__args = args
+        self.__kw   = kw
+
+    def startTest(self, test):
+        "Called when the given test is about to be run"
+        self.testsRun = self.testsRun + 1
+
+    def stopTest(self, test):
+        "Called when the given test has been run"
+        pass
+
+    def addError(self, test, err):
+        "Called when an error has occurred"
+        self.errors.append((test, err))
+
+    def addFailure(self, test, err):
+        "Called when a failure has occurred"
+        self.failures.append((test, err))
+
+    def wasSuccessful(self):
+        "Tells whether or not this result was a success"
+        return len(self.failures) == len(self.errors) == 0
+
+    def stop(self):
+        "Indicates that the tests should be aborted"
+        self.shouldStop = 1
+    
+    def __repr__(self):
+        return "<%s run=%i errors=%i failures=%i>" % \
+               (self.__class__, self.testsRun, len(self.errors),
+                len(self.failures))
+
+
+class TestCase:
+    """A class whose instances are single test cases.
+
+    Test authors should subclass TestCase for their own tests. Construction 
+    and deconstruction of the test's environment ('fixture') can be
+    implemented by overriding the 'setUp' and 'tearDown' methods respectively.
+
+    By default, the test code itself should be placed in a method named
+    'runTest'.
+    
+    If the fixture may be used for many test cases, create as 
+    many test methods as are needed. When instantiating such a TestCase
+    subclass, specify in the constructor arguments the name of the test method
+    that the instance is to execute.
+
+    If it is necessary to override the __init__ method, the base class
+    __init__ method must always be called.
+    """
+    def __init__(self, methodName='runTest',*args,**kw):
+        """Create an instance of the class that will use the named test
+           method when executed. Raises a ValueError if the instance does
+           not have a method with the specified name.
+        """
+        
+        try:
+            self.__testMethodName = methodName
+            testMethod = getattr(self, methodName)
+            self.__testMethodDoc = testMethod.__doc__
+        except AttributeError:
+            raise ValueError, "no such test method in %s: %s" % \
+                  (self.__class__, methodName)
+
+        self.__args = args
+        self.__kw   = kw
+
+    def setUp(self):
+        "Hook method for setting up the test fixture before exercising it."
+        pass
+
+    def tearDown(self):
+        "Hook method for deconstructing the test fixture after testing it."
+        pass
+
+    def countTestCases(self):
+        return 1
+
+    def defaultTestResult(self):
+        return TestResult(self.__args,self.__kw)
+
+    def shortDescription(self):
+        """Returns a one-line description of the test, or None if no
+        description has been provided.
+
+        The default implementation of this method returns the first line of
+        the specified test method's docstring.
+        """
+        doc = self.__testMethodDoc
+        return doc and string.strip(string.split(doc, "\n")[0]) or None
+
+    def id(self):
+        return "%s.%s" % (self.__class__, self.__testMethodName)
+
+    def __str__(self):
+        return "%s (%s)" % (self.__testMethodName, self.__class__)
+
+    def __repr__(self):
+        return "<%s testMethod=%s>" % \
+               (self.__class__, self.__testMethodName)
+
+    def run(self, result=None):
+        return self(result)
+
+    def __call__(self, result=None):
+        if result is None: result = self.defaultTestResult()
+        result.startTest(self)
+        testMethod = getattr(self, self.__testMethodName)
+        try:
+            try:
+                self.setUp()
+            except:
+                result.addError(self,self.__exc_info())
+                return
+
+            try:
+                apply(testMethod,self.__args,self.__kw)
+            except AssertionError, e:
+                result.addFailure(self,self.__exc_info())
+            except:
+                result.addError(self,self.__exc_info())
+
+            try:
+                self.tearDown()
+            except:
+                result.addError(self,self.__exc_info())
+        finally:
+            result.stopTest(self)
+
+    def debug(self):
+        """Run the test without collecting errors in a TestResult"""
+        self.setUp()
+        getattr(self, self.__testMethodName)()
+        self.tearDown()
+
+    def assert_(self, expr, msg=None):
+        """Equivalent of built-in 'assert', but is not optimised out when
+           __debug__ is false.
+        """
+        if not expr:
+            raise AssertionError, msg
+
+    failUnless = assert_
+
+    def failIf(self, expr, msg=None):
+        "Fail the test if the expression is true."
+        apply(self.assert_,(not expr,msg))
+
+    def assertRaises(self, excClass, callableObj, *args, **kwargs):
+        """Assert that an exception of class excClass is thrown
+           by callableObj when invoked with arguments args and keyword
+           arguments kwargs. If a different type of exception is
+           thrown, it will not be caught, and the test case will be
+           deemed to have suffered an error, exactly as for an
+           unexpected exception.
+        """
+        try:
+            apply(callableObj, args, kwargs)
+        except excClass:
+            return
+        else:
+            if hasattr(excClass,'__name__'): excName = excClass.__name__
+            else: excName = str(excClass)
+            raise AssertionError, excName
+
+    def assertEqual(self, first, second, msg=None):
+        """Assert that the two objects are equal as determined by the '=='
+           operator.
+        """
+        self.assert_((first == second), msg or '%s != %s' % (first, second))
+
+    def fail(self, msg=None):
+        """Fail immediately, with the given message."""
+        raise AssertionError, msg
+                                   
+    def __exc_info(self):
+        """Return a version of sys.exc_info() with the traceback frame
+           minimised; usually the top level of the traceback frame is not
+           needed.
+        """
+        exctype, excvalue, tb = sys.exc_info()
+        newtb = tb.tb_next
+        if newtb is None:
+            return (exctype, excvalue, tb)
+        return (exctype, excvalue, newtb)
+
+
+class TestSuite:
+    """A test suite is a composite test consisting of a number of TestCases.
+
+    For use, create an instance of TestSuite, then add test case instances.
+    When all tests have been added, the suite can be passed to a test
+    runner, such as TextTestRunner. It will run the individual test cases
+    in the order in which they were added, aggregating the results. When
+    subclassing, do not forget to call the base class constructor.
+    """
+    def __init__(self, tests=()):
+        self._tests = []
+        self.addTests(tests)
+
+    def __repr__(self):
+        return "<%s tests=%s>" % (self.__class__, self._tests)
+
+    __str__ = __repr__
+
+    def countTestCases(self):
+        cases = 0
+        for test in self._tests:
+            cases = cases + test.countTestCases()
+        return cases
+
+    def addTest(self, test):
+        self._tests.append(test)
+
+    def addTests(self, tests):
+        for test in tests:
+            self.addTest(test)
+
+    def run(self, result):
+        return self(result)
+
+    def __call__(self, result):
+        for test in self._tests:
+            if result.shouldStop:
+                break
+            test(result)
+        return result
+
+    def debug(self):
+        """Run the tests without collecting errors in a TestResult"""
+        for test in self._tests: test.debug()
+
+
+class FunctionTestCase(TestCase):
+    """A test case that wraps a test function.
+
+    This is useful for slipping pre-existing test functions into the
+    PyUnit framework. Optionally, set-up and tidy-up functions can be
+    supplied. As with TestCase, the tidy-up ('tearDown') function will
+    always be called if the set-up ('setUp') function ran successfully.
+    """
+
+    def __init__(self, testFunc, setUp=None, tearDown=None,
+                 description=None):
+        TestCase.__init__(self)
+        self.__setUpFunc = setUp
+        self.__tearDownFunc = tearDown
+        self.__testFunc = testFunc
+        self.__description = description
+
+    def setUp(self):
+        if self.__setUpFunc is not None:
+            self.__setUpFunc()
+
+    def tearDown(self):
+        if self.__tearDownFunc is not None:
+            self.__tearDownFunc()
+
+    def runTest(self):
+        self.__testFunc()
+
+    def id(self):
+        return self.__testFunc.__name__
+
+    def __str__(self):
+        return "%s (%s)" % (self.__class__, self.__testFunc.__name__)
+
+    def __repr__(self):
+        return "<%s testFunc=%s>" % (self.__class__, self.__testFunc)
+
+    def shortDescription(self):
+        if self.__description is not None: return self.__description
+        doc = self.__testFunc.__doc__
+        return doc and string.strip(string.split(doc, "\n")[0]) or None
+
+
+
+##############################################################################
+# Convenience functions
+##############################################################################
+
+def getTestCaseNames(testCaseClass, prefix, sortUsing=cmp):
+    """Extracts all the names of functions in the given test case class
+       and its base classes that start with the given prefix. This is used
+       by makeSuite().
+    """
+    testFnNames = filter(lambda n,p=prefix: n[:len(p)] == p,
+                         dir(testCaseClass))
+    for baseclass in testCaseClass.__bases__:
+        testFnNames = testFnNames + \
+                      getTestCaseNames(baseclass, prefix, sortUsing=None)
+    if sortUsing:
+        testFnNames.sort(sortUsing)
+    return testFnNames
+
+
+def makeSuite(testCaseClass, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
+    """Returns a TestSuite instance built from all of the test functions
+       in the given test case class whose names begin with the given
+       prefix. The cases are sorted by their function names
+       using the supplied comparison function, which defaults to 'cmp'.
+    """
+    cases = map(testCaseClass,
+                getTestCaseNames(testCaseClass, prefix, sortUsing))
+    return suiteClass(cases)
+
+
+def findTestCases(module, prefix='test', sortUsing=cmp, suiteClass=TestSuite):
+    import types
+    tests = []
+    for name in dir(module):
+        obj = getattr(module, name)
+        if type(obj) == types.ClassType and issubclass(obj, TestCase):
+            tests.append(makeSuite(obj, prefix=prefix,
+                         sortUsing=sortUsing, suiteClass=suiteClass))
+    return suiteClass(tests)
+
+
+def createTestInstance(name, module=None, suiteClass=TestSuite):
+    """Finds tests by their name, optionally only within the given module.
+
+    Return the newly-constructed test, ready to run. If the name contains a ':'
+    then the portion of the name after the colon is used to find a specific
+    test case within the test case class named before the colon.
+
+    Examples:
+     findTest('examples.listtests.suite')
+        -- returns result of calling 'suite'
+     findTest('examples.listtests.ListTestCase:checkAppend')
+        -- returns result of calling ListTestCase('checkAppend')
+     findTest('examples.listtests.ListTestCase:check-')
+        -- returns result of calling makeSuite(ListTestCase, prefix="check")
+    """
+    spec = string.split(name, ':')
+    if len(spec) > 2: raise ValueError, "illegal test name: %s" % name
+    if len(spec) == 1:
+        testName = spec[0]
+        caseName = None
+    else:
+        testName, caseName = spec
+    parts = string.split(testName, '.')
+    if module is None:
+        if len(parts) < 2:
+            raise ValueError, "incomplete test name: %s" % name
+        constructor = __import__(string.join(parts[:-1],'.'))
+        parts = parts[1:]
+    else:
+        constructor = module
+    for part in parts:
+        constructor = getattr(constructor, part)
+    if not callable(constructor):
+        raise ValueError, "%s is not a callable object" % constructor
+    if caseName:
+        if caseName[-1] == '-':
+            prefix = caseName[:-1]
+            if not prefix:
+                raise ValueError, "prefix too short: %s" % name
+            test = makeSuite(constructor, prefix=prefix, suiteClass=suiteClass)
+        else:
+            test = constructor(caseName)
+    else:
+        test = constructor()
+    if not hasattr(test,"countTestCases"):
+        raise TypeError, \
+              "object %s found with spec %s is not a test" % (test, name)
+    return test
+
+
+##############################################################################
+# Text UI
+##############################################################################
+
+class _WritelnDecorator:
+    """Used to decorate file-like objects with a handy 'writeln' method"""
+    def __init__(self,stream):
+        self.stream = stream
+        if _isJPython:
+            import java.lang.System
+            self.linesep = java.lang.System.getProperty("line.separator")
+        else:
+            self.linesep = os.linesep
+
+    def __getattr__(self, attr):
+        return getattr(self.stream,attr)
+
+    def writeln(self, *args):
+        if args: apply(self.write, args)
+        self.write(self.linesep)
+
+ 
+class _JUnitTextTestResult(TestResult):
+    """A test result class that can print formatted text results to a stream.
+
+    Used by JUnitTextTestRunner.
+    """
+    def __init__(self, stream):
+        self.stream = stream
+        TestResult.__init__(self)
+
+    def addError(self, test, error):
+        TestResult.addError(self,test,error)
+        self.stream.write('E')
+        self.stream.flush()
+        if error[0] is KeyboardInterrupt:
+            self.shouldStop = 1
+ 
+    def addFailure(self, test, error):
+        TestResult.addFailure(self,test,error)
+        self.stream.write('F')
+        self.stream.flush()
+ 
+    def startTest(self, test):
+        TestResult.startTest(self,test)
+        self.stream.write('.')
+        self.stream.flush()
+
+    def printNumberedErrors(self,errFlavour,errors):
+        if not errors: return
+        if len(errors) == 1:
+            self.stream.writeln("There was 1 %s:" % errFlavour)
+        else:
+            self.stream.writeln("There were %i %ss:" %
+                                (len(errors), errFlavour))
+        i = 1
+        for test,error in errors:
+            errString = string.join(apply(traceback.format_exception,error),"")
+            self.stream.writeln("%i) %s" % (i, test))
+            self.stream.writeln(errString)
+            i = i + 1
+ 
+    def printErrors(self):
+        self.printNumberedErrors("error",self.errors)
+
+    def printFailures(self):
+        self.printNumberedErrors("failure",self.failures)
+
+    def printHeader(self):
+        self.stream.writeln()
+        if self.wasSuccessful():
+            self.stream.writeln("OK (%i tests)" % self.testsRun)
+        else:
+            self.stream.writeln("!!!FAILURES!!!")
+            self.stream.writeln("Test Results")
+            self.stream.writeln()
+            self.stream.writeln("Run: %i ; Failures: %i ; Errors: %i" %
+                                (self.testsRun, len(self.failures),
+                                 len(self.errors)))
+            
+    def printResult(self):
+        self.printHeader()
+        self.printErrors()
+        self.printFailures()
+
+
+class JUnitTextTestRunner:
+    """A test runner class that displays results in textual form.
+    
+    The display format approximates that of JUnit's 'textui' test runner.
+    This test runner may be removed in a future version of PyUnit.
+    """
+    def __init__(self, stream=sys.stderr):
+        self.stream = _WritelnDecorator(stream)
+
+    def run(self, test):
+        "Run the given test case or test suite."
+        result = _JUnitTextTestResult(self.stream)
+        startTime = time.time()
+        test(result)
+        stopTime = time.time()
+        self.stream.writeln()
+        self.stream.writeln("Time: %.3fs" % float(stopTime - startTime))
+        result.printResult()
+        return result
+
+
+##############################################################################
+# Verbose text UI
+##############################################################################
+
+class _VerboseTextTestResult(TestResult):
+    """A test result class that can print formatted text results to a stream.
+
+    Used by VerboseTextTestRunner.
+    """
+    def __init__(self, stream, descriptions):
+        TestResult.__init__(self)
+        self.stream = stream
+        self.lastFailure = None
+        self.descriptions = descriptions
+        
+    def startTest(self, test):
+        TestResult.startTest(self, test)
+        if self.descriptions:
+            self.stream.write(test.shortDescription() or str(test))
+        else:
+            self.stream.write(str(test))
+        self.stream.write(" ... ")
+
+    def stopTest(self, test):
+        TestResult.stopTest(self, test)
+        if self.lastFailure is not test:
+            self.stream.writeln("ok")
+
+    def addError(self, test, err):
+        TestResult.addError(self, test, err)
+        self._printError("ERROR", test, err)
+        self.lastFailure = test
+        if err[0] is KeyboardInterrupt:
+            self.shouldStop = 1
+
+    def addFailure(self, test, err):
+        TestResult.addFailure(self, test, err)
+        self._printError("FAIL", test, err)
+        self.lastFailure = test
+
+    def _printError(self, flavour, test, err):
+        errLines = []
+        separator1 = "\t" + '=' * 70
+        separator2 = "\t" + '-' * 70
+        if not self.lastFailure is test:
+            self.stream.writeln()
+            self.stream.writeln(separator1)
+        self.stream.writeln("\t%s" % flavour)
+        self.stream.writeln(separator2)
+        for line in apply(traceback.format_exception, err):
+            for l in string.split(line,"\n")[:-1]:
+                self.stream.writeln("\t%s" % l)
+        self.stream.writeln(separator1)
+
+
+class VerboseTextTestRunner:
+    """A test runner class that displays results in textual form.
+    
+    It prints out the names of tests as they are run, errors as they
+    occur, and a summary of the results at the end of the test run.
+    """
+    def __init__(self, stream=sys.stderr, descriptions=1):
+        self.stream = _WritelnDecorator(stream)
+        self.descriptions = descriptions
+
+    def run(self, test):
+        "Run the given test case or test suite."
+        result = _VerboseTextTestResult(self.stream, self.descriptions)
+        startTime = time.time()
+        test(result)
+        stopTime = time.time()
+        timeTaken = float(stopTime - startTime)
+        self.stream.writeln("-" * 78)
+        run = result.testsRun
+        self.stream.writeln("Ran %d test%s in %.3fs" %
+                            (run, run > 1 and "s" or "", timeTaken))
+        self.stream.writeln()
+        if not result.wasSuccessful():
+            self.stream.write("FAILED (")
+            failed, errored = map(len, (result.failures, result.errors))
+            if failed:
+                self.stream.write("failures=%d" % failed)
+            if errored:
+                if failed: self.stream.write(", ")
+                self.stream.write("errors=%d" % errored)
+            self.stream.writeln(")")
+        else:
+            self.stream.writeln("OK")
+        return result
+        
+
+# Which flavour of TextTestRunner is the default?
+TextTestRunner = VerboseTextTestRunner
+
+
+##############################################################################
+# Facilities for running tests from the command line
+##############################################################################
+
+class TestProgram:
+    """A command-line program that runs a set of tests; this is primarily
+       for making test modules conveniently executable.
+    """
+    USAGE = """\
+Usage: %(progName)s [-h|--help] [test[:(casename|prefix-)]] [...]
+
+Examples:
+  %(progName)s                               - run default set of tests
+  %(progName)s MyTestSuite                   - run suite 'MyTestSuite'
+  %(progName)s MyTestCase:checkSomething     - run MyTestCase.checkSomething
+  %(progName)s MyTestCase:check-             - run all 'check*' test methods
+                                               in MyTestCase
+"""
+    def __init__(self, module='__main__', defaultTest=None,
+                 argv=None, testRunner=None, suiteClass=TestSuite):
+        if type(module) == type(''):
+            self.module = __import__(module)
+            for part in string.split(module,'.')[1:]:
+                self.module = getattr(self.module, part)
+        else:
+            self.module = module
+        if argv is None:
+            argv = sys.argv
+        self.defaultTest = defaultTest
+        self.testRunner = testRunner
+        self.suiteClass = suiteClass
+        self.progName = os.path.basename(argv[0])
+        self.parseArgs(argv)
+        self.runTests()
+
+    def usageExit(self, msg=None):
+        if msg: print msg
+        print self.USAGE % self.__dict__
+        sys.exit(2)
+
+    def parseArgs(self, argv):
+        import getopt
+        try:
+            options, args = getopt.getopt(argv[1:], 'hH', ['help'])
+            opts = {}
+            for opt, value in options:
+                if opt in ('-h','-H','--help'):
+                    self.usageExit()
+            if len(args) == 0 and self.defaultTest is None:
+                self.test = findTestCases(self.module,
+                                          suiteClass=self.suiteClass)
+                return
+            if len(args) > 0:
+                self.testNames = args
+            else:
+                self.testNames = (self.defaultTest,)
+            self.createTests()
+        except getopt.error, msg:
+            self.usageExit(msg)
+
+    def createTests(self):
+        tests = []
+        for testName in self.testNames:
+            tests.append(createTestInstance(testName, self.module,
+                                            suiteClass=self.suiteClass))
+        self.test = self.suiteClass(tests)
+
+    def runTests(self):
+        if self.testRunner is None:
+            self.testRunner = TextTestRunner()
+        result = self.testRunner.run(self.test)
+        sys.exit(not result.wasSuccessful())    
+
+main = TestProgram
+
+
+##############################################################################
+# Executing this module from the command line
+##############################################################################
+
+if __name__ == "__main__":
+    main(module=None)
--- a/lib/python/unittest.py
+++ b/lib/python/unittest.py
@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 """

 __author__ = "Steve Purcell (stephen_purcell@yahoo.com)"
-__version__ = "$Revision: 1.20 $"[11:-2]
+__version__ = "$Revision: 1.1.4.1 $"[11:-2]

 import time
 import sys