Commit 59d506e2 authored by Michel Pelletier's avatar Michel Pelletier

Added first cut at relevance ranking. In addition to having a

data_record_id_ attribute, brains now also have a data_record_score_
attribute.  Currently, the score is 1 for objects returned from field
indexes, and search term frequency for text indexes.
parent ab7dc0c6
...@@ -85,7 +85,9 @@ ...@@ -85,7 +85,9 @@
from Persistence import Persistent from Persistence import Persistent
import Acquisition import Acquisition
import BTree, OIBTree, IOBTree import BTree, OIBTree, IOBTree, IIBTree
IIBucket=IIBTree.Bucket
from intSet import intSet
from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
import regex, pdb import regex, pdb
from string import lower from string import lower
...@@ -152,12 +154,22 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -152,12 +154,22 @@ class Catalog(Persistent, Acquisition.Implicit):
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
def __getitem__(self, index): def __getitem__(self, index, ttype=type(())):
""" Returns instances of self._v_brains, or whatever is passed """ Returns instances of self._v_brains, or whatever is passed
into self.useBrains. into self.useBrains.
""" """
r=self._v_result_class(self.data[index]).__of__(self.aq_parent) ## import pdb
r.data_record_id_ = index ## pdb.set_trace()
if type(index) is ttype:
score, key = index
r=self._v_result_class(self.data[key]).__of__(self.aq_parent)
r.data_record_score_ = score
r.data_record_id_ = key
else:
r=self._v_result_class(self.data[index]).__of__(self.aq_parent)
r.data_record_id_ = index
r.data_record_id_ = 1
return r return r
def __setstate__(self, state): def __setstate__(self, state):
...@@ -180,6 +192,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -180,6 +192,7 @@ class Catalog(Persistent, Acquisition.Implicit):
scopy = self.schema.copy() scopy = self.schema.copy()
scopy['data_record_id_']=len(self.schema.keys()) scopy['data_record_id_']=len(self.schema.keys())
scopy['data_record_score_']=len(self.schema.keys())+1
mybrains.__record_schema__ = scopy mybrains.__record_schema__ = scopy
self._v_brains = brains self._v_brains = brains
...@@ -385,7 +398,8 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -385,7 +398,8 @@ class Catalog(Persistent, Acquisition.Implicit):
## Searching engine ## Searching engine
def _indexedSearch(self, args, sort_index, append, used): def _indexedSearch(self, args, sort_index, append, used,
IIBType=type(IIBucket()), intSType=type(intSet())):
rs=None rs=None
data=self.data data=self.data
...@@ -397,10 +411,16 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -397,10 +411,16 @@ class Catalog(Persistent, Acquisition.Implicit):
if hasattr(index,'_apply_index'): if hasattr(index,'_apply_index'):
r=index._apply_index(args) r=index._apply_index(args)
if r is not None: if r is not None:
r,u=r r, u = r
for name in u: used[name]=1 for name in u:
if rs is None: rs=r used[name]=1
else: rs=rs.intersection(r) if rs is None:
rs = r
else:
if type(rs) is intSType and type(r) is IIBType:
rs=r.intersection(rs)
else:
rs=rs.intersection(r)
except: except:
return used return used
...@@ -412,7 +432,15 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -412,7 +432,15 @@ class Catalog(Persistent, Acquisition.Implicit):
for k, intset in sort_index._index.items(): for k, intset in sort_index._index.items():
append((k,LazyMap(self.__getitem__, intset))) append((k,LazyMap(self.__getitem__, intset)))
elif rs: elif rs:
if sort_index is None: if type(rs) is IIBType:
rset = []
for key, score in rs.items():
rset.append((score, key))
rset.sort()
rset.reverse()
append(LazyMap(self.__getitem__, rset))
elif sort_index is None and type(rs) is intSType:
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
else: else:
for k, intset in sort_index._index.items(): for k, intset in sort_index._index.items():
......
...@@ -87,13 +87,13 @@ ...@@ -87,13 +87,13 @@
""" """
__version__='$Revision: 1.10 $'[11:-2] __version__='$Revision: 1.11 $'[11:-2]
from Globals import Persistent from Globals import Persistent
import BTree, IIBTree, IOBTree, OIBTree import BTree, IIBTree, IOBTree, OIBTree
BTree=BTree.BTree BTree=BTree.BTree
IOBTree=IOBTree.BTree IOBTree=IOBTree.BTree
IIBTree=IIBTree.Bucket IIBucket=IIBTree.Bucket
OIBTree=OIBTree.BTree OIBTree=OIBTree.BTree
from intSet import intSet from intSet import intSet
import operator import operator
...@@ -219,7 +219,7 @@ class UnTextIndex(Persistent): ...@@ -219,7 +219,7 @@ class UnTextIndex(Persistent):
elif type(r) is dictType: elif type(r) is dictType:
if len(r) > 4: if len(r) > 4:
b = IIBTree() b = IIBucket()
for k, v in r.items(): b[k] = v for k, v in r.items(): b[k] = v
r = b r = b
r[i] = score r[i] = score
...@@ -311,27 +311,35 @@ class UnTextIndex(Persistent): ...@@ -311,27 +311,35 @@ class UnTextIndex(Persistent):
if request.has_key(id): if request.has_key(id):
keys = request[id] keys = request[id]
else: return None else:
return None
if type(keys) is type(''): if type(keys) is type(''):
if not keys or not strip(keys): return None if not keys or not strip(keys):
return None
keys = [keys] keys = [keys]
r = None r = None
for key in keys: for key in keys:
key = strip(key) key = strip(key)
if not key: continue if not key:
rr = intSet() continue
rr = IIBucket()
try: try:
for i,score in query(key,self).items(): for i, score in query(key,self).items():
if score: rr.insert(i) if score:
except KeyError: pass rr[i] = score
if r is None: r = rr except KeyError:
pass
if r is None:
r = rr
else: else:
# Note that we *and*/*narrow* multiple search terms. # Note that we *and*/*narrow* multiple search terms.
r = r.intersection(rr) r = r.intersection(rr)
if r is not None: return r, (id,) if r is not None:
return intSet(), (id,) return r, (id,)
return IIBucket(), (id,)
class ResultList: class ResultList:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment