Commit ab7dc0c6 authored by Michel Pelletier's avatar Michel Pelletier

Added KeywordIndexes, and optimized subtransaction processing to kick

the cache.  Modified UI to include CPU time and enable/disable subtransactions
parent 67235350
...@@ -86,7 +86,7 @@ ...@@ -86,7 +86,7 @@
from Persistence import Persistent from Persistence import Persistent
import Acquisition import Acquisition
import BTree, OIBTree, IOBTree import BTree, OIBTree, IOBTree
from SearchIndex import UnIndex, UnTextIndex, Query from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
import regex, pdb import regex, pdb
from string import lower from string import lower
import Record import Record
...@@ -253,6 +253,8 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -253,6 +253,8 @@ class Catalog(Persistent, Acquisition.Implicit):
indexes[name] = UnIndex.UnIndex(name) indexes[name] = UnIndex.UnIndex(name)
elif type == 'TextIndex': elif type == 'TextIndex':
indexes[name] = UnTextIndex.UnTextIndex(name) indexes[name] = UnTextIndex.UnTextIndex(name)
elif type == 'KeywordIndex':
indexes[name] = UnKeywordIndex.UnKeywordIndex(name)
self.indexes = indexes self.indexes = indexes
...@@ -282,7 +284,7 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -282,7 +284,7 @@ class Catalog(Persistent, Acquisition.Implicit):
data = self.data data = self.data
if uid in self.uids.keys(): if self.uids.has_key(uid):
i = self.uids[uid] i = self.uids[uid]
elif data: elif data:
i = data.keys()[-1] + 1 # find the next available rid i = data.keys()[-1] + 1 # find the next available rid
...@@ -299,7 +301,6 @@ class Catalog(Persistent, Acquisition.Implicit): ...@@ -299,7 +301,6 @@ class Catalog(Persistent, Acquisition.Implicit):
for x in self.indexes.values(): for x in self.indexes.values():
if hasattr(x, 'index_object'): if hasattr(x, 'index_object'):
blah = x.index_object(i, object, threshold) blah = x.index_object(i, object, threshold)
__traceback_info__=(`total`, `blah`)
total = total + blah total = total + blah
self.data = data self.data = data
......
...@@ -90,7 +90,7 @@ from OFS.Folder import Folder ...@@ -90,7 +90,7 @@ from OFS.Folder import Folder
from OFS.FindSupport import FindSupport from OFS.FindSupport import FindSupport
from DateTime import DateTime from DateTime import DateTime
from SearchIndex import Query from SearchIndex import Query
import string, regex, urlparse, urllib, os, sys import string, regex, urlparse, urllib, os, sys, time
import Products import Products
from Acquisition import Implicit from Acquisition import Implicit
from Persistence import Persistent from Persistence import Persistent
...@@ -163,12 +163,12 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -163,12 +163,12 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
'manage_addIndex', 'manage_delIndexes', 'manage_main',], 'manage_addIndex', 'manage_delIndexes', 'manage_main',],
['Manager']), ['Manager']),
('Search ZCatalog', ('Search ZCatalog',
['searchResults', '__call__', 'uniqueValuesFor', ['searchResults', '__call__', 'uniqueValuesFor',
'getpath', 'schema', 'indexes', 'index_objects', 'getpath', 'schema', 'indexes', 'index_objects',
'all_meta_types', 'valid_roles', 'resolve_url', 'all_meta_types', 'valid_roles', 'resolve_url',
'getobject'], 'getobject'],
['Anonymous', 'Manager']), ['Anonymous', 'Manager']),
) )
...@@ -180,14 +180,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -180,14 +180,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
manage_catalogStatus = HTMLFile('catalogStatus', globals()) manage_catalogStatus = HTMLFile('catalogStatus', globals())
threshold=1000 threshold=10000
_v_total=0 _v_total=0
def __init__(self,id,title=''): def __init__(self,id,title=''):
self.id=id self.id=id
self.title=title self.title=title
self.threshold = 1000 self.threshold = 10000
self._v_total = 0 self._v_total = 0
self._catalog = Catalog() self._catalog = Catalog()
...@@ -214,6 +214,16 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -214,6 +214,16 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
RESPONSE.redirect(URL1 + '/manage_main?manage_tabs_message=Catalog%20Changed') RESPONSE.redirect(URL1 + '/manage_main?manage_tabs_message=Catalog%20Changed')
def manage_subbingToggle(self, REQUEST, RESPONSE, URL1):
""" toggle subtransactions """
if self.threshold:
self.threshold = None
else:
self.threshold = 10000
RESPONSE.redirect(URL1 + '/manage_catalogStatus?manage_tabs_message=Catalog%20Changed')
def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None): def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None):
""" index all Zope objects that 'urls' point to """ """ index all Zope objects that 'urls' point to """
if urls: if urls:
...@@ -238,6 +248,9 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -238,6 +248,9 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
def manage_catalogReindex(self, REQUEST, RESPONSE, URL1): def manage_catalogReindex(self, REQUEST, RESPONSE, URL1):
""" clear the catalog, then re-index everything """ """ clear the catalog, then re-index everything """
elapse = time.time()
c_elapse = time.clock()
paths = tuple(self._catalog.paths.values()) paths = tuple(self._catalog.paths.values())
self._catalog.clear() self._catalog.clear()
...@@ -245,8 +258,13 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -245,8 +258,13 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
obj = self.resolve_url(p, REQUEST) obj = self.resolve_url(p, REQUEST)
if obj is not None: if obj is not None:
self.catalog_object(obj, p) self.catalog_object(obj, p)
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=Catalog%20Updated') elapse = time.time() - elapse
c_elapse = time.clock() - c_elapse
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=' +
urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`)))
def manage_catalogClear(self, REQUEST, RESPONSE, URL1): def manage_catalogClear(self, REQUEST, RESPONSE, URL1):
""" clears the whole enchelada """ """ clears the whole enchelada """
...@@ -264,6 +282,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -264,6 +282,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
""" Find object according to search criteria and Catalog them """ Find object according to search criteria and Catalog them
""" """
elapse = time.time()
c_elapse = time.clock()
words = 0
results = self.ZopeFind(REQUEST.PARENTS[1], results = self.ZopeFind(REQUEST.PARENTS[1],
obj_metatypes=obj_metatypes, obj_metatypes=obj_metatypes,
obj_ids=obj_ids, obj_ids=obj_ids,
...@@ -282,7 +305,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -282,7 +305,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
REQUEST.script)[1][1:] REQUEST.script)[1][1:]
self.catalog_object(n[1], abs_path) self.catalog_object(n[1], abs_path)
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=Catalog%20Updated') elapse = time.time() - elapse
c_elapse = time.clock() - c_elapse
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=' +
urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`)))
def manage_addColumn(self, name, REQUEST, RESPONSE, URL1): def manage_addColumn(self, name, REQUEST, RESPONSE, URL1):
...@@ -316,10 +343,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit): ...@@ -316,10 +343,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
""" wrapper around catalog """ """ wrapper around catalog """
self._v_total = (self._v_total + self._v_total = (self._v_total +
self._catalog.catalogObject(obj, uid, self.threshold)) self._catalog.catalogObject(obj, uid, self.threshold))
if self._v_total > self.threshold: if self.threshold is not None:
get_transaction().commit(1) if self._v_total > self.threshold:
self._v_total = 0 # commit a subtransaction
get_transaction().commit(1)
# kick the chache
self._p_jar.cacheFullSweep(1)
self._v_total = 0
def uncatalog_object(self, uid): def uncatalog_object(self, uid):
""" wrapper around catalog """ """ wrapper around catalog """
......
...@@ -29,7 +29,8 @@ Add Index: <input name="name"> <br> ...@@ -29,7 +29,8 @@ Add Index: <input name="name"> <br>
of Index Type: <select name="type"> of Index Type: <select name="type">
<option value="TextIndex">TextIndex</option> <option value="TextIndex">TextIndex</option>
<option value="FieldIndex">FieldIndex</options> <option value="FieldIndex">FieldIndex</option>
<option value="KeywordIndex">KeywordIndex</option>
</select> </select>
<input name="manage_addIndex:method" type=submit value=" Add "> <input name="manage_addIndex:method" type=submit value=" Add ">
</form> </form>
......
<HTML> <HTML>
<HEAD> <HEAD>
<TITLE>View Catalog Records</TITLE> <TITLE>View Catalog Records</TITLE>
</HEAD> </HEAD>
<BODY BGCOLOR="#FFFFFF" LINK="#000099" VLINK="#555555"> <BODY BGCOLOR="#FFFFFF" LINK="#000099" VLINK="#555555">
<!--#var manage_tabs--> <!--#var manage_tabs-->
<p>The Subtransaction threshold is the number of words the catalog <p> Subtransactions allow Zope to commit small parts of a
will index before it commits a subtransaction. If this number is low, transaction over a period of time instead of all at once. For
the Catalog will take much longer to index but consume much less ZCatalog, this means using subtransactions can signficantly
memory. If this number is higher, the Catalog will index quickly but reduce the memory requirements needed to index huge amounts of
consume much more memory.</p> text all at once.</p>
<form action="manage_edit" method=POST>
Subtransaction threshold: <input name=threshold value="<!--#var <p> If enabled, subtransactions will reduce the memory
threshold html_quote-->"><br> requirements of ZCatalog, but <em>at the expense of speed</em>.
<input type=submit value=" Change "> If you choose to enable subtransactions, you can adjust how often
</form> ZCatalog commits a subtransactions by adjusting the
<b>threshold</b> below.</p>
<ul> <p> If you are using ZCatalog and ZSQL Methods in the same
<!--#in index_objects--> transaction, you <b>must</b> disable subtransactions, they are not
<li> compatible with ZSQL Methods.</p>
<!--#var "_.len(_['sequence-item'])"--> object are indexed in <b><!--#var "_['sequence-item'].id"--></b></li>
<!--#/in--> <h3>Subtransactions are <font color="red">
</ul> <dtml-if threshold>
<b>Enabled</b>
</BODY></HTML> <dtml-else>
<b>Disabled</b>
</dtml-if></h3><br></font>
<form action="." method=POST>
<dtml-if threshold>
<input type=submit name="manage_subbingToggle:method" value="Disable">
<dtml-else>
<input type=submit name="manage_subbingToggle:method" value="Enable">
</dtml-if>
</form>
<form action="manage_edit" method=POST>
<dtml-if threshold>
<p>The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number is low,
the Catalog will take much to index but consume much less memory. If
this number is higher, the Catalog will index quickly but consume much
more memory.</p>
Subtransaction threshold: <input name="threshold:int" value="<!--#var
threshold html_quote-->"><br> <input type=submit value=" Change ">
</dtml-if>
</form>
<hr width=75%>
<h3>Index Status</h3>
<ul>
<!--#in index_objects-->
<li>
<!--#var "_.len(_['sequence-item'])"-->
object are indexed in <b><!--#var "_['sequence-item'].id"--></b>
</li>
<!--#/in-->
</ul>
</BODY>
</HTML>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment