Commit 0bc58a79 authored by Michel Pelletier's avatar Michel Pelletier

Somebody get me a bucket.

I overdocumented.
parent abb87ad3
...@@ -89,6 +89,7 @@ import BTree, OIBTree, IOBTree, IIBTree ...@@ -89,6 +89,7 @@ import BTree, OIBTree, IOBTree, IIBTree
IIBucket=IIBTree.Bucket IIBucket=IIBTree.Bucket
from intSet import intSet from intSet import intSet
from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
from SearchIndex.Lexicon import Lexicon
import regex, pdb import regex, pdb
from string import lower from string import lower
import Record import Record
...@@ -124,7 +125,8 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -124,7 +125,8 @@ class Catalog(Persistent, Acquisition.Implicit):
(references in the metadata) that satisfy a search query. (references in the metadata) that satisfy a search query.
This class is not Zope specific, and can be used in any python This class is not Zope specific, and can be used in any python
program to build catalogs of objects. program to build catalogs of objects. Note that it does require
the objects to be Persistent, and thus must be used with ZODB3.
""" """
_v_brains = NoBrainer _v_brains = NoBrainer
...@@ -149,22 +151,30 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -149,22 +151,30 @@ class Catalog(Persistent, Acquisition.Implicit):
self.uids = OIBTree.BTree() # mapping of uid to rid self.uids = OIBTree.BTree() # mapping of uid to rid
self.paths = IOBTree.BTree() # mapping of rid to uid self.paths = IOBTree.BTree() # mapping of rid to uid
# indexes can share a lexicon or have a private copy. Here,
# we instantiate a lexicon to be shared by all text indexes.
# This may change.
self.lexicon = Lexicon()
if brains is not None: if brains is not None:
self._v_brains = brains self._v_brains = brains
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
def __getitem__(self, index, ttype=type(())): def __getitem__(self, index, ttype=type(())):
""" Returns instances of self._v_brains, or whatever is passed """
Returns instances of self._v_brains, or whatever is passed
into self.useBrains. into self.useBrains.
""" """
if type(index) is ttype: if type(index) is ttype:
# then it contains a score...
normalized_score, score, key = index normalized_score, score, key = index
r=self._v_result_class(self.data[key]).__of__(self.aq_parent) r=self._v_result_class(self.data[key]).__of__(self.aq_parent)
r.data_record_id_ = key r.data_record_id_ = key
r.data_record_score_ = score r.data_record_score_ = score
r.data_record_normalized_score_ = normalized_score r.data_record_normalized_score_ = normalized_score
else: else:
# otherwise no score, set all scores to 1
r=self._v_result_class(self.data[index]).__of__(self.aq_parent) r=self._v_result_class(self.data[index]).__of__(self.aq_parent)
r.data_record_id_ = index r.data_record_id_ = index
r.data_record_score_ = 1 r.data_record_score_ = 1
...@@ -172,11 +182,12 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -172,11 +182,12 @@ class Catalog(Persistent, Acquisition.Implicit):
return r return r
def __setstate__(self, state): def __setstate__(self, state):
""" initialize your brains. This method is called when the
catalog is first activated (from the persistent storage) """
Persistent.__setstate__(self, state) Persistent.__setstate__(self, state)
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
def useBrains(self, brains): def useBrains(self, brains):
""" Sets up the Catalog to return an object (ala ZTables) that """ Sets up the Catalog to return an object (ala ZTables) that
is created on the fly from the tuple stored in the self.data is created on the fly from the tuple stored in the self.data
Btree. Btree.
...@@ -190,6 +201,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -190,6 +201,7 @@ class Catalog(Persistent, Acquisition.Implicit):
scopy={} scopy={}
scopy = self.schema.copy() scopy = self.schema.copy()
# it is useful for our brains to know these things
scopy['data_record_id_']=len(self.schema.keys()) scopy['data_record_id_']=len(self.schema.keys())
scopy['data_record_score_']=len(self.schema.keys())+1 scopy['data_record_score_']=len(self.schema.keys())+1
scopy['data_record_normalized_score_']=len(self.schema.keys())+2 scopy['data_record_normalized_score_']=len(self.schema.keys())+2
...@@ -200,7 +212,9 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -200,7 +212,9 @@ class Catalog(Persistent, Acquisition.Implicit):
self._v_result_class=mybrains self._v_result_class=mybrains
def addColumn(self, name, default_value=None): def addColumn(self, name, default_value=None):
""" adds a row to the meta data schema """ """
adds a row to the meta data schema
"""
schema = self.schema schema = self.schema
names = list(self.names) names = list(self.names)
...@@ -226,12 +240,15 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -226,12 +240,15 @@ class Catalog(Persistent, Acquisition.Implicit):
self.names = tuple(names) self.names = tuple(names)
self.schema = schema self.schema = schema
# new column? update the brain
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
self.__changed__(1) #why? self.__changed__(1) #why?
def delColumn(self, name): def delColumn(self, name):
""" deletes a row from the meta data schema """ """
deletes a row from the meta data schema
"""
names = list(self.names) names = list(self.names)
_index = names.index(name) _index = names.index(name)
...@@ -249,6 +266,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -249,6 +266,7 @@ class Catalog(Persistent, Acquisition.Implicit):
self.schema = schema self.schema = schema
self.names = tuple(names) self.names = tuple(names)
# update the brain
self.useBrains(self._v_brains) self.useBrains(self._v_brains)
# remove the column value from each record # remove the column value from each record
...@@ -258,15 +276,25 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -258,15 +276,25 @@ class Catalog(Persistent, Acquisition.Implicit):
self.data[key] = tuple(rec) self.data[key] = tuple(rec)
def addIndex(self, name, type): def addIndex(self, name, type):
""" adds an index """ """
adds an index
Currently supported are 'FieldIndexes', 'TextIndexs' and
'KeywordIndexes'.
"""
if self.indexes.has_key(name): if self.indexes.has_key(name):
raise 'Index Exists', 'The index specified allready exists' raise 'Index Exists', 'The index specified allready exists'
# this is currently a succesion of hacks. Indexes should be
# pluggable and managable
indexes = self.indexes indexes = self.indexes
if type == 'FieldIndex': if type == 'FieldIndex':
indexes[name] = UnIndex.UnIndex(name) indexes[name] = UnIndex.UnIndex(name)
elif type == 'TextIndex': elif type == 'TextIndex':
indexes[name] = UnTextIndex.UnTextIndex(name) indexes[name] = UnTextIndex.UnTextIndex(name, None, None,
self.lexicon)
elif type == 'KeywordIndex': elif type == 'KeywordIndex':
indexes[name] = UnKeywordIndex.UnKeywordIndex(name) indexes[name] = UnKeywordIndex.UnKeywordIndex(name)
...@@ -299,7 +327,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -299,7 +327,7 @@ class Catalog(Persistent, Acquisition.Implicit):
if self.uids.has_key(uid): if self.uids.has_key(uid):
i = self.uids[uid] i = self.uids[uid]
elif data: elif data:
i = data.keys()[-1] + 1 # find the next available rid i = data.keys()[-1] + 1 # find the next available unique id
else: else:
i = 0 i = 0
...@@ -340,10 +368,6 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -340,10 +368,6 @@ class Catalog(Persistent, Acquisition.Implicit):
except KeyError: except KeyError:
pass #fugedaboudit pass #fugedaboudit
# I think if your data gets out of sync due to crashes or
# ZClass problems, some items might not be here. The try's
# catch any inconsistencies and lets 'Update Catalog' sanify
# the situation
del self.data[rid] del self.data[rid]
del self.uids[uid] del self.uids[uid]
del self.paths[rid] del self.paths[rid]
...@@ -364,7 +388,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -364,7 +388,7 @@ class Catalog(Persistent, Acquisition.Implicit):
def hasuid(self, uid): def hasuid(self, uid):
""" return the rid if catalog contains an object with uid """ """ return the rid if catalog contains an object with uid """
if uid in self.uids.keys(): if self.uids.has_key(uid):
return self.uids[uid] return self.uids[uid]
else: else:
return None return None
...@@ -392,15 +416,22 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -392,15 +416,22 @@ class Catalog(Persistent, Acquisition.Implicit):
r.data_record_id_ = record[0] r.data_record_id_ = record[0]
return r.__of__(self) return r.__of__(self)
## Searching engine
## Searching engine. You don't really have to worry about what goes
## on below here...
def _indexedSearch(self, args, sort_index, append, used, def _indexedSearch(self, args, sort_index, append, used,
IIBType=type(IIBucket()), intSType=type(intSet())): IIBType=type(IIBucket()), intSType=type(intSet())):
"""
Iterate through the indexes, applying the query to each one.
Do some magic to join result sets. Be intelligent about
handling intSets and IIBuckets.
"""
## import pdb ## import pdb
## pdb.set_trace() ## pdb.set_trace()
## I use this so much I'm just leaving it commented out ## I use this so much I'm just leaving it commented out -michel
rs=None rs=None
data=self.data data=self.data
...@@ -418,6 +449,10 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -418,6 +449,10 @@ class Catalog(Persistent, Acquisition.Implicit):
if rs is None: if rs is None:
rs = r rs = r
else: else:
# you can't intersect an IIBucket into an
# intSet, but you can go the other way
# around. Make sure we're facing the
# right direction...
if type(rs) is intSType and type(r) is IIBType: if type(rs) is intSType and type(r) is IIBType:
rs=r.intersection(rs) rs=r.intersection(rs)
else: else:
...@@ -434,6 +469,9 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -434,6 +469,9 @@ class Catalog(Persistent, Acquisition.Implicit):
append((k,LazyMap(self.__getitem__, intset))) append((k,LazyMap(self.__getitem__, intset)))
elif rs: elif rs:
if type(rs) is IIBType: if type(rs) is IIBType:
# then there is score information. Build a new result
# set, sort it by score, reverse it, compute the
# normalized score, and Lazify it.
rset = [] rset = []
for key, score in rs.items(): for key, score in rs.items():
rset.append((score, key)) rset.append((score, key))
...@@ -446,8 +484,13 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -446,8 +484,13 @@ class Catalog(Persistent, Acquisition.Implicit):
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
elif sort_index is None and type(rs) is intSType: elif sort_index is None and type(rs) is intSType:
# no scores? Just Lazify.
append(LazyMap(self.__getitem__, rs)) append(LazyMap(self.__getitem__, rs))
else: else:
# sort. If there are scores, then this block is not
# reached, therefor 'sort-on' does not happen in the
# context of text index query. This should probably
# sort by relevance first, then the 'sort-on' attribute.
for k, intset in sort_index._index.items(): for k, intset in sort_index._index.items():
intset=intset.intersection(rs) intset=intset.intersection(rs)
if intset: if intset:
...@@ -462,12 +505,10 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -462,12 +505,10 @@ class Catalog(Persistent, Acquisition.Implicit):
type(''): Query.String, type(''): Query.String,
}, **kw): }, **kw):
#################################################################
# Get search arguments: # Get search arguments:
if REQUEST is None and not kw: if REQUEST is None and not kw:
try: REQUEST=self.REQUEST try: REQUEST=self.REQUEST
except: pass except: pass
if kw: if kw:
if REQUEST: if REQUEST:
m=KWMultiMapping() m=KWMultiMapping()
...@@ -476,15 +517,12 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -476,15 +517,12 @@ class Catalog(Persistent, Acquisition.Implicit):
kw=m kw=m
elif REQUEST: kw=REQUEST elif REQUEST: kw=REQUEST
#################################################################
# Make sure batch size is set # Make sure batch size is set
if REQUEST and not REQUEST.has_key('batch_size'): if REQUEST and not REQUEST.has_key('batch_size'):
try: batch_size=self.default_batch_size try: batch_size=self.default_batch_size
except: batch_size=20 except: batch_size=20
REQUEST['batch_size']=batch_size REQUEST['batch_size']=batch_size
#################################################################
# Compute "sort_index", which is a sort index, or none: # Compute "sort_index", which is a sort index, or none:
if kw.has_key('sort-on'): if kw.has_key('sort-on'):
sort_index=kw['sort-on'] sort_index=kw['sort-on']
...@@ -497,13 +535,11 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -497,13 +535,11 @@ class Catalog(Persistent, Acquisition.Implicit):
if sort_index is not None and sort_index in self.indexes.keys(): if sort_index is not None and sort_index in self.indexes.keys():
sort_index=self.indexes[sort_index] sort_index=self.indexes[sort_index]
#################################################################
# Perform searches with indexes and sort_index # Perform searches with indexes and sort_index
r=[] r=[]
used=self._indexedSearch(kw, sort_index, r.append, used) used=self._indexedSearch(kw, sort_index, r.append, used)
if not r: return r if not r: return r
#################################################################
# Sort/merge sub-results # Sort/merge sub-results
if len(r)==1: if len(r)==1:
if sort_index is None: r=r[0] if sort_index is None: r=r[0]
......
ZCatalog ZCatalog
Notes for Zope 2.0
Catalog now comes with the Zope core, there is no need to install it.
This is the ZCatalog product for the Z Object Publishing This is the ZCatalog product for the Z Object Publishing
Environment. Environment.
......
...@@ -469,3 +469,4 @@ Globals.default__class_init__(ZCatalog) ...@@ -469,3 +469,4 @@ Globals.default__class_init__(ZCatalog)
...@@ -8,11 +8,21 @@ ...@@ -8,11 +8,21 @@
<p> This list defines what indexes the Catalog will contain. When <p> This list defines what indexes the Catalog will contain. When
objects get cataloged, the values of any attributes they may have objects get cataloged, the values of any attributes they may have
which match an index in this list will get indexed. Indexes come in which match an index in this list will get indexed. Indexes come in
two flavors, TextIndexes and FieldIndexes. TextIndexes break text up three flavors, Text Indexes, Field Indexes and Keyword Indexes.</p>
into individual words, and are often refered to as 'full text
indexes'. Field indexes treat the value of an objects attribute <p><b>Text Indexes</b> break text up into individual words, and are
atomically, and can be used, for example, to track only a certain often refered to as <b>full-text indexes</b>. Text indexes sort results by
subset of object values, such as 'meta_type'.</p> <b>score</b> meaning they return 'hits' in order from the most
relevant to the lest relevant.</p>
<p><b>Field Indexes</b> treat the value of an objects attribute atomically,
and can be used, for example, to track only a certain subset of object
values, such as 'meta_type'.</p>
<p><b>Keyword Indexes</b> index a sequence of objects that act as
'keywords' for an object. A Keyword Index will return any objects
that have one or more keywords specified in a search query.</p>
<form action="<!--#var URL1-->"> <form action="<!--#var URL1-->">
......
...@@ -21,32 +21,35 @@ ...@@ -21,32 +21,35 @@
transaction, you <b>must</b> disable subtransactions, they are not transaction, you <b>must</b> disable subtransactions, they are not
compatible with ZSQL Methods.</p> compatible with ZSQL Methods.</p>
<h3>Subtransactions are <font color="red"> <h3>Subtransactions are
<dtml-if threshold> <dtml-if threshold>
<b>Enabled</b> <font color="green"><b>Enabled</b></font>
<dtml-else> <dtml-else>
<b>Disabled</b> <font color="red"><b>Disabled</b></font>
</dtml-if></h3><br></font> </dtml-if></h3>
<form action="." method=POST> <form action="." method=POST>
<dtml-if threshold> <dtml-if threshold>
<input type=submit name="manage_subbingToggle:method" value="Disable"> <input type=submit name="manage_subbingToggle:method"
value="Disable"> Subtransactions
<dtml-else> <dtml-else>
<input type=submit name="manage_subbingToggle:method" value="Enable"> <input type=submit name="manage_subbingToggle:method"
value="Enable"> Subtransactions
</dtml-if> </dtml-if>
</form> </form>
<form action="manage_edit" method=POST> <form action="manage_edit" method=POST>
<dtml-if threshold> <dtml-if threshold>
<p>The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number is low,
the Catalog will take much to index but consume much less memory. If
this number is higher, the Catalog will index quickly but consume much
more memory.</p>
<p>The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number
is low, the Catalog will take longer to index but consume less
memory. If this number is higher, the Catalog will index
quickly but consume much more memory.</p>
Subtransaction threshold: <input name="threshold:int" value="<!--#var Subtransaction threshold: <input name="threshold:int" value="<!--#var
threshold html_quote-->"><br> <input type=submit value=" Change "> threshold html_quote-->"><br> <input type=submit value=" Change ">
</dtml-if> </dtml-if>
</form> </form>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment