Added first cut at relevance ranking. In addition to having a

data_record_id_ attribute, brains now also have a data_record_score_ attribute. Currently, the score is 1 for objects returned from field indexes, and search term frequency for text indexes.

Added first cut at relevance ranking. In addition to having a
data_record_id_ attribute, brains now also have a data_record_score_ attribute. Currently, the score is 1 for objects returned from field indexes, and search term frequency for text indexes.
59d506e2 · Michel Pelletier · ab7dc0c6 · 59d506e2 · 59d506e2
Commit 59d506e2 authored Sep 16, 1999 by Michel Pelletier
Hide whitespace changes
Inline Side-by-side

Showing with 59 additions and 23 deletions

lib/python/Products/ZCatalog/Catalog.py lib/python/Products/ZCatalog/Catalog.py +38 -10

lib/python/SearchIndex/UnTextIndex.py lib/python/SearchIndex/UnTextIndex.py +21 -13

No files found.
--- a/lib/python/Products/ZCatalog/Catalog.py
+++ b/lib/python/Products/ZCatalog/Catalog.py
@@ -85,7 +85,9 @@
 from Persistence import Persistent
 import Acquisition
-import BTree, OIBTree, IOBTree
+import BTree, OIBTree, IOBTree, IIBTree
+IIBucket=IIBTree.Bucket
+from intSet import intSet
 from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
 import regex, pdb
 from string import lower
@@ -152,12 +154,22 @@ class Catalog(Persistent, Acquisition.Implicit):
        self.useBrains(self._v_brains)
-    def __getitem__(self, index):
+    def __getitem__(self, index, ttype=type(())):
        """ Returns instances of self._v_brains, or whatever is passed 
        into self.useBrains.
        """
-        r=self._v_result_class(self.data[index]).__of__(self.aq_parent)
+##        import pdb
-        r.data_record_id_ = index
+##        pdb.set_trace()
+        if type(index) is ttype:
+            score, key = index
+            r=self._v_result_class(self.data[key]).__of__(self.aq_parent)
+            r.data_record_score_ = score
+            r.data_record_id_ = key
+        else:
+            r=self._v_result_class(self.data[index]).__of__(self.aq_parent)
+            r.data_record_id_ = index
+            r.data_record_id_ = 1
        return r
    def __setstate__(self, state):
@@ -180,6 +192,7 @@ class Catalog(Persistent, Acquisition.Implicit):
        scopy = self.schema.copy()
        scopy['data_record_id_']=len(self.schema.keys())
+        scopy['data_record_score_']=len(self.schema.keys())+1
        mybrains.__record_schema__ = scopy
        self._v_brains = brains
@@ -385,7 +398,8 @@ class Catalog(Persistent, Acquisition.Implicit):
 ## Searching engine
-    def _indexedSearch(self, args, sort_index, append, used):
+    def _indexedSearch(self, args, sort_index, append, used,
+                       IIBType=type(IIBucket()), intSType=type(intSet())):
        rs=None
        data=self.data
@@ -397,10 +411,16 @@ class Catalog(Persistent, Acquisition.Implicit):
                if hasattr(index,'_apply_index'):
                    r=index._apply_index(args)
                    if r is not None:
-                        r,u=r
+                        r, u = r
-                        for name in u: used[name]=1
+                        for name in u:
-                        if rs is None: rs=r
+                            used[name]=1
-                        else: rs=rs.intersection(r)
+                        if rs is None:
+                            rs = r
+                        else:
+                            if type(rs) is intSType and type(r) is IIBType:
+                                rs=r.intersection(rs)
+                            else:
+                                rs=rs.intersection(r)
            except:
                return used
@@ -412,7 +432,15 @@ class Catalog(Persistent, Acquisition.Implicit):
                for k, intset in sort_index._index.items():
                    append((k,LazyMap(self.__getitem__, intset)))
        elif rs:
-            if sort_index is None:
+            if type(rs) is IIBType:
+                rset = []
+                for key, score in rs.items():
+                    rset.append((score, key))
+                rset.sort()
+                rset.reverse()
+                append(LazyMap(self.__getitem__, rset))
+            elif sort_index is None and type(rs) is intSType:
                append(LazyMap(self.__getitem__, rs))
            else:
                for k, intset in sort_index._index.items():

--- a/lib/python/SearchIndex/UnTextIndex.py
+++ b/lib/python/SearchIndex/UnTextIndex.py
@@ -87,13 +87,13 @@
 """
-__version__='$Revision: 1.10 $'[11:-2]
+__version__='$Revision: 1.11 $'[11:-2]
 from Globals import Persistent
 import BTree, IIBTree, IOBTree, OIBTree
 BTree=BTree.BTree
 IOBTree=IOBTree.BTree
-IIBTree=IIBTree.Bucket
+IIBucket=IIBTree.Bucket
 OIBTree=OIBTree.BTree
 from intSet import intSet
 import operator
@@ -219,7 +219,7 @@ class UnTextIndex(Persistent):
                elif type(r) is dictType:
                    if len(r) > 4:
-                        b = IIBTree()
+                        b = IIBucket()
                        for k, v in r.items(): b[k] = v
                        r = b
                    r[i] = score
@@ -311,27 +311,35 @@ class UnTextIndex(Persistent):
        if request.has_key(id):
            keys = request[id]
-        else: return None
+        else:
+            return None
        if type(keys) is type(''):
-            if not keys or not strip(keys): return None
+            if not keys or not strip(keys):
+                return None
            keys = [keys]
        r = None
        for key in keys:
            key = strip(key)
-            if not key: continue
+            if not key:
-            rr = intSet()
+                continue
+            rr = IIBucket()
            try:
-                for i,score in query(key,self).items():
+                for i, score in query(key,self).items():
-                    if score: rr.insert(i)
+                    if score:
-            except KeyError: pass
+                        rr[i] = score
-            if r is None: r = rr
+            except KeyError:
+                pass
+            if r is None:
+                r = rr
            else:
                # Note that we *and*/*narrow* multiple search terms.
                r = r.intersection(rr) 
-        if r is not None: return r, (id,)
+        if r is not None:
-        return intSet(), (id,)
+            return r, (id,)
+        return IIBucket(), (id,)
 class ResultList: