Merging into trunk.

a543f5d7 · Chris McDonough · 5f71029e · a543f5d7 · a543f5d7 · a543f5d7
Commit a543f5d7 authored Apr 17, 2001 by Chris McDonough
3 changed files
--- a/lib/python/Products/ZCatalog/Catalog.py
+++ b/lib/python/Products/ZCatalog/Catalog.py
@@ -545,7 +545,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
                    for name in u:
                        used[name]=1
                    w, rs = weightedIntersection(rs, r)
        #assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
        if rs is None:
            # return everything
@@ -635,17 +635,25 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
        # Compute "sort_index", which is a sort index, or none:
        if kw.has_key('sort-on'):
            sort_index=kw['sort-on']
+            del kw['sort-on']
        elif hasattr(self, 'sort-on'):
            sort_index=getattr(self, 'sort-on')
        elif kw.has_key('sort_on'):
            sort_index=kw['sort_on']
+            del kw['sort_on']
        else: sort_index=None
        sort_order=''
-        if sort_index is not None and self.indexes.has_key(sort_index):
+        if sort_index is not None:
-            sort_index=self.indexes[sort_index]
+            if self.indexes.has_key(sort_index):
-            if not hasattr(sort_index, 'keyForDocument'):
+                sort_index=self.indexes[sort_index]
-                raise CatalogError('Invalid sort index')
+                if not hasattr(sort_index, 'keyForDocument'):
+                    raise CatalogError(
+                        'The index chosen for sort_on is not capable of being'
+                        ' used as a sort index.'
+                        )
+            else:
+                raise CatalogError, ('Unknown sort_on index %s' % sort_index)
        # Perform searches with indexes and sort_index
        r=[]
        used=self._indexedSearch(kw, sort_index, r.append, used)

--- a/lib/python/Products/ZCatalog/tests/testCatalog.py
+++ b/lib/python/Products/ZCatalog/tests/testCatalog.py
@@ -2,11 +2,23 @@
 """
    Testsuite for testing Catalogs
-    $Id: testCatalog.py,v 1.4 2001/04/05 16:15:36 chrism Exp $
+    $Id: testCatalog.py,v 1.5 2001/04/17 17:08:13 chrism Exp $
    Andreas Jung, andreas@digicool.com
    $Log: testCatalog.py,v $
+    Revision 1.5  2001/04/17 17:08:13  chrism
+    Merging into trunk.
+    Revision 1.1.6.5  2001/04/17 17:01:21  chrism
+    More tests.
+    Revision 1.1.6.4.2.1  2001/04/17 06:39:45  chrism
+    added further tests for catalog object in test_suite.
+    Revision 1.1.6.4  2001/04/05 16:18:05  chrism
+    Added test for empty mapping returns all.
    Revision 1.4  2001/04/05 16:15:36  chrism
    added test for empty mapping returns all.
@@ -91,6 +103,7 @@ here = os.getcwd()
 import Zope
 import ZODB, ZODB.FileStorage
 from Products.ZCatalog import Catalog,ZCatalog,Vocabulary
+from Products.ZCatalog.Catalog import CatalogError
 import Persistence
 import ExtensionClass
 from Testing import dispatcher
@@ -121,8 +134,7 @@ updateIterations = 100
 # input mailbox file
 mbox   = os.environ.get("TESTCATALOG_MBOX","/usr/home/andreas/zope.mbox")
-mbox2  = "/usr/home/andreas/python.mbox"
+mbox2  = os.environ.get("TESTCATALOG_MBOX2", "/usr/home/andreas/python.mbox")
 dataDir = ""
@@ -720,11 +732,15 @@ class TestAddDelIndexes(CatalogBase, unittest.TestCase):
        self._catalog.delIndex('id')
        assert self._catalog.indexes.has_key('id') != 1, 'del index failed'
-class TestSimultaneousAddAndRead(CatalogBase, unittest.TestCase):
-    def checkMultiThread(self):
-        pass
 class TestZCatalogObject(unittest.TestCase):
+    def setUp(self):
+        class dummy(ExtensionClass.Base):
+            pass
+        self.dummy = dummy()
+    def tearDown(self):
+        self.dummy = None
    def checkInstantiateWithoutVocab(self):
        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
        zc = ZCatalog.ZCatalog('acatalog')
@@ -732,15 +748,19 @@ class TestZCatalogObject(unittest.TestCase):
        assert zc.getVocabulary().__class__ == v.__class__
    def checkInstantiateWithGlobbingVocab(self):
+        dummy = self.dummy
        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
-        zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
+        dummy.v = v
-        zc._setObject('vocab', v)
+        zc = ZCatalog.ZCatalog('acatalog', vocab_id='v', container=dummy)
+        zc = zc.__of__(dummy)
        assert zc.getVocabulary() == v
    def checkInstantiateWithNormalVocab(self):
+        dummy = self.dummy
        v = Vocabulary.Vocabulary('Vocabulary', 'Vocabulary', globbing=0)
-        zc = ZCatalog.ZCatalog('acatalog', vocab_id='vocab')
+        dummy.v = v
-        zc._setObject('vocab', v)
+        zc = ZCatalog.ZCatalog('acatalog', vocab_id='v', container=dummy)
+        zc = zc.__of__(dummy)
        assert zc.getVocabulary() == v
 class TestCatalogObject(unittest.TestCase):
@@ -758,11 +778,12 @@ class TestCatalogObject(unittest.TestCase):
        self._catalog.addIndex('att1', 'FieldIndex')
        self._catalog.addIndex('att2', 'TextIndex')
        self._catalog.addIndex('att3', 'KeywordIndex')
+        self._catalog.addIndex('num', 'FieldIndex')
        self._catalog.addColumn('att1') 
        self._catalog.addColumn('att2')
        self._catalog.addColumn('att3')
        self._catalog.addColumn('num')
        self.upper = 1000
        class dummy(ExtensionClass.Base):
            att1 = 'att1'
@@ -779,6 +800,8 @@ class TestCatalogObject(unittest.TestCase):
            def col3(self):
                return ['col3']
        for x in range(0, self.upper):
            self._catalog.catalogObject(dummy(x), `x`)
        self._catalog.aq_parent = dummy('foo') # fake out acquisition
@@ -855,6 +878,60 @@ class TestCatalogObject(unittest.TestCase):
        for x in range(0, self.upper):
            self._catalog.uncatalogObject(`x`)
+    def checkGoodSortIndex(self):
+        upper = self.upper
+        a = self._catalog(sort_on='num')
+        assert len(a) == upper, 'length should be %s, its %s'%(upper, len(a))
+        for x in range(self.upper):
+            assert a[x].num == x, x
+    def checkBadSortIndex(self):
+        self.assertRaises(CatalogError, self.badsortindex)
+    def badsortindex(self):
+        a = self._catalog(sort_on='foofaraw')
+    def checkWrongKindOfIndexForSort(self):
+        self.assertRaises(CatalogError, self.wrongsortindex)
+    def wrongsortindex(self):
+        a = self._catalog(sort_on='att2')
+    def checkTextIndexQWithSortOn(self):
+        upper = self.upper
+        a = self._catalog(sort_on='num', att2='att2')
+        assert len(a) == upper, 'length should be %s, its %s'%(upper, len(a))
+        for x in range(self.upper):
+            assert a[x].num == x, x
+    def checkTextIndexQWithoutSortOn(self):
+        upper = self.upper
+        a = self._catalog(att2='att2')
+        assert len(a) == upper, 'length should be %s, its %s'%(upper, len(a))
+        for x in range(self.upper):
+            assert a[x].data_record_score_ == 1, a[x].data_record_score_
+    def checkKeywordIndexWithMinRange(self):
+        a = self._catalog(att3='att', att3_usage='range:min')
+        assert len(a) == self.upper
+    def checkKeywordIndexWithMaxRange(self):
+        a = self._catalog(att3='att35', att3_usage='range:max')
+        assert len(a) == self.upper
+    def checkKeywordIndexWithMinMaxRangeCorrectSyntax(self):
+        a = self._catalog(att3=['att', 'att35'], att3_usage='range:min:max')
+        assert len(a) == self.upper
+    def checkKeywordIndexWithMinMaxRangeWrongSyntax(self):
+        "checkKeywordIndex with min/max range wrong syntax - known to fail"
+        a = self._catalog(att3=['att'], att3_usage='range:min:max')
+        assert len(a) == self.upper
+    def checkCombinedTextandKeywordQuery(self):
+        a = self._catalog(att3='att3', att2='att2')
+        assert len(a) == self.upper
 class objRS(ExtensionClass.Base):
    def __init__(self,num):
@@ -997,7 +1074,7 @@ def get_tests(what):
    if what=='basic':    
        ts = unittest.TestSuite(ts_cm)
-        for x in t_aj: ts.addTest(x)
+#        for x in t_aj: ts.addTest(x)
        return ts
    else:

--- a/lib/python/SearchIndex/UnIndex.py
+++ b/lib/python/SearchIndex/UnIndex.py
@@ -85,18 +85,17 @@
 """Simple column indices"""
-__version__='$Revision: 1.28 $'[11:-2]
+__version__='$Revision: 1.29 $'[11:-2]
 from Globals import Persistent
 from Acquisition import Implicit
 import BTree
 import IOBTree
-import operator
+import string
-import string, pdb
 from zLOG import LOG, ERROR
-from types import *
+from types import StringType, ListType, IntType, TupleType
-from BTrees.OOBTree import OOBTree
+from BTrees.OOBTree import OOBTree, OOSet
 from BTrees.IOBTree import IOBTree
 from BTrees.IIBTree import IITreeSet, IISet, union
 import BTrees.Length
@@ -105,15 +104,6 @@ import sys
 _marker = []
-def nonEmpty(s):
-    "returns true if a non-empty string or any other (nonstring) type"
-    if type(s) is StringType:
-        if s: return 1
-        else: return 0
-    else:
-        return 1
 class UnIndex(Persistent, Implicit):
    """UnIndex object interface"""
@@ -132,6 +122,10 @@ class UnIndex(Persistent, Implicit):
        self._index = {datum:[documentId1, documentId2]}
        self._unindex = {documentId:datum}
+        If any item in self._index has a length-one value, the value is an
+        integer, and not a set.  There are special cases in the code to deal
+        with this.
        The arguments are:
          'id' -- the name of the item attribute to index.  This is
@@ -207,8 +201,12 @@ class UnIndex(Persistent, Implicit):
        elements found at each point in the index."""
        histogram = {}
-        for (key, value) in self._index.items():
+        for item in self._index.items():
-            entry = len(value)
+            if type(item) is IntType:
+                entry = 1 # "set" length is 1
+            else:
+                key, value = item
+                entry = len(value)
            histogram[entry] = histogram.get(entry, 0) + 1
        return histogram
@@ -329,28 +327,45 @@ class UnIndex(Persistent, Implicit):
                ' with id %s' % documentId)
    def _apply_index(self, request, cid='', type=type, None=None): 
-        """Apply the index to query parameters given in the argument,
+        """Apply the index to query parameters given in the request arg.
-        request
+        The request argument should be a mapping object.
+        If the request does not have a key which matches the "id" of
+        the index instance, then None is returned.
+        If the request *does* have a key which matches the "id" of
+        the index instance, one of a few things can happen:
-        The argument should be a mapping object.
+          - if the value is a blank string, None is returned (in
+            order to support requests from web forms where
+            you can't tell a blank string from empty).
-        If the request does not contain the needed parameters, then
+          - if the value is a nonblank string, turn the value into
-        None is returned.
+            a single-element sequence, and proceed.
+          - if the value is a sequence, return a union search.
        If the request contains a parameter with the name of the
        column + '_usage', it is sniffed for information on how to
        handle applying the index.
-        Otherwise two objects are returned.  The first object is a
+        If None is not returned as a result of the abovementioned
+        constraints, two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
+        FAQ answer:  to search a Field Index for documents that
+        have a blank string as their value, wrap the request value
+        up in a tuple ala: request = {'id':('',)}
        """
        id = self.id              #name of the column
        cidid = "%s/%s" % (cid,id)
+        # i have no f'ing clue what this cdid stuff is for - chrism
        if request.has_key(cidid):
            keys = request[cidid]
        elif request.has_key(id):
@@ -359,60 +374,47 @@ class UnIndex(Persistent, Implicit):
            return None
        if type(keys) not in (ListType, TupleType):
-            keys = [keys]
+            if keys == '':
+                return None
+            else:
+                keys = [keys]
        index = self._index
        r = None
-        anyTrue = 0
        opr = None
-        IntType=type(1)
        if request.has_key(id+'_usage'):
            # see if any usage params are sent to field
            opr=string.split(string.lower(request[id+"_usage"]),':')
            opr, opr_args=opr[0], opr[1:]
-        if opr=="range":
+        if opr=="range":   # range search
            if 'min' in opr_args: lo = min(keys)
            else: lo = None
            if 'max' in opr_args: hi = max(keys)
            else: hi = None
+            if hi:
+                setlist = index.items(lo,hi)
+            else:
+                setlist = index.items(lo)
-            anyTrue=1
+            for k, set in setlist:
-            try:
+                if type(set) is IntType:
-                if hi:
+                    set = IISet((set,))
-                    setlist = index.items(lo,hi)
+                r = union(r, set)
-                else:
+        else: # not a range search
-                    setlist = index.items(lo)
-                for k, set in setlist:
-                    r = union(r, set)
-            except KeyError:
-                pass
-        else:           #not a range
-            get = index.get
            for key in keys:
-                if nonEmpty(key):
+                set=index.get(key, None)
-                    anyTrue = 1
-                set=get(key, None)
                if set is not None:
+                    if type(set) is IntType:
+                        set = IISet((set,))
                    r = union(r, set)
-        if type(r) is IntType: r=IISet((r,))
+        if type(r) is IntType:  r=IISet((r,))
-        if r:
-            return r, (id,)
        if r is None:
-            if anyTrue:
+            return IISet(), (id,)
-                r=IISet()
+        else:
-            else:
+            return r, (id,)
-                return None
-        return r, (id,)
    def hasUniqueValuesFor(self, name):
        ' has unique values for column NAME '
@@ -434,18 +436,27 @@ class UnIndex(Persistent, Implicit):
        elif name != self.id:
            return []
-        if not withLengths: return tuple(
+        if not withLengths:
-            filter(nonEmpty, self._index.keys())
+            return tuple(self._index.keys())
-            )
        else: 
            rl=[]
            for i in self._index.keys():
-                if not nonEmpty(i): continue
+                set = self._index[i]
-                else: rl.append((i, len(self._index[i])))
+                if type(set) is IntType:
+                    l = 1
+                else:
+                    l = len(set)
+                rl.append((i, l))
            return tuple(rl)
    def keyForDocument(self, id):
        return self._unindex[id]
-    def items(self): return self._index.items()
+    def items(self):
+        items = []
+        for k,v in self._index.items():
+            if type(v) is IntType:
+                v = IISet((v,))
+            items.append((k, v))
+        return items