Commit ab7dc0c6 authored by Michel Pelletier's avatar Michel Pelletier

Added KeywordIndexes, and optimized subtransaction processing to kick

the cache.  Modified UI to include CPU time and enable/disable subtransactions
parent 67235350
......@@ -86,7 +86,7 @@
from Persistence import Persistent
import Acquisition
import BTree, OIBTree, IOBTree
from SearchIndex import UnIndex, UnTextIndex, Query
from SearchIndex import UnIndex, UnTextIndex, UnKeywordIndex, Query
import regex, pdb
from string import lower
import Record
......@@ -253,6 +253,8 @@ class Catalog(Persistent, Acquisition.Implicit):
indexes[name] = UnIndex.UnIndex(name)
elif type == 'TextIndex':
indexes[name] = UnTextIndex.UnTextIndex(name)
elif type == 'KeywordIndex':
indexes[name] = UnKeywordIndex.UnKeywordIndex(name)
self.indexes = indexes
......@@ -282,7 +284,7 @@ class Catalog(Persistent, Acquisition.Implicit):
data = self.data
if uid in self.uids.keys():
if self.uids.has_key(uid):
i = self.uids[uid]
elif data:
i = data.keys()[-1] + 1 # find the next available rid
......@@ -299,7 +301,6 @@ class Catalog(Persistent, Acquisition.Implicit):
for x in self.indexes.values():
if hasattr(x, 'index_object'):
blah = x.index_object(i, object, threshold)
__traceback_info__=(`total`, `blah`)
total = total + blah
self.data = data
......
......@@ -90,7 +90,7 @@ from OFS.Folder import Folder
from OFS.FindSupport import FindSupport
from DateTime import DateTime
from SearchIndex import Query
import string, regex, urlparse, urllib, os, sys
import string, regex, urlparse, urllib, os, sys, time
import Products
from Acquisition import Implicit
from Persistence import Persistent
......@@ -163,12 +163,12 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
'manage_addIndex', 'manage_delIndexes', 'manage_main',],
['Manager']),
('Search ZCatalog',
['searchResults', '__call__', 'uniqueValuesFor',
'getpath', 'schema', 'indexes', 'index_objects',
'all_meta_types', 'valid_roles', 'resolve_url',
'getobject'],
['Anonymous', 'Manager']),
('Search ZCatalog',
['searchResults', '__call__', 'uniqueValuesFor',
'getpath', 'schema', 'indexes', 'index_objects',
'all_meta_types', 'valid_roles', 'resolve_url',
'getobject'],
['Anonymous', 'Manager']),
)
......@@ -180,14 +180,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
manage_catalogStatus = HTMLFile('catalogStatus', globals())
threshold=1000
threshold=10000
_v_total=0
def __init__(self,id,title=''):
self.id=id
self.title=title
self.threshold = 1000
self.threshold = 10000
self._v_total = 0
self._catalog = Catalog()
......@@ -214,6 +214,16 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
RESPONSE.redirect(URL1 + '/manage_main?manage_tabs_message=Catalog%20Changed')
def manage_subbingToggle(self, REQUEST, RESPONSE, URL1):
""" toggle subtransactions """
if self.threshold:
self.threshold = None
else:
self.threshold = 10000
RESPONSE.redirect(URL1 + '/manage_catalogStatus?manage_tabs_message=Catalog%20Changed')
def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None):
""" index all Zope objects that 'urls' point to """
if urls:
......@@ -238,6 +248,9 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
def manage_catalogReindex(self, REQUEST, RESPONSE, URL1):
""" clear the catalog, then re-index everything """
elapse = time.time()
c_elapse = time.clock()
paths = tuple(self._catalog.paths.values())
self._catalog.clear()
......@@ -245,8 +258,13 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
obj = self.resolve_url(p, REQUEST)
if obj is not None:
self.catalog_object(obj, p)
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=Catalog%20Updated')
elapse = time.time() - elapse
c_elapse = time.clock() - c_elapse
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=' +
urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`)))
def manage_catalogClear(self, REQUEST, RESPONSE, URL1):
""" clears the whole enchelada """
......@@ -264,6 +282,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
""" Find object according to search criteria and Catalog them
"""
elapse = time.time()
c_elapse = time.clock()
words = 0
results = self.ZopeFind(REQUEST.PARENTS[1],
obj_metatypes=obj_metatypes,
obj_ids=obj_ids,
......@@ -282,7 +305,11 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
REQUEST.script)[1][1:]
self.catalog_object(n[1], abs_path)
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=Catalog%20Updated')
elapse = time.time() - elapse
c_elapse = time.clock() - c_elapse
RESPONSE.redirect(URL1 + '/manage_catalogView?manage_tabs_message=' +
urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`)))
def manage_addColumn(self, name, REQUEST, RESPONSE, URL1):
......@@ -316,10 +343,14 @@ class ZCatalog(Folder, FindSupport, Persistent, Implicit):
""" wrapper around catalog """
self._v_total = (self._v_total +
self._catalog.catalogObject(obj, uid, self.threshold))
if self._v_total > self.threshold:
get_transaction().commit(1)
self._v_total = 0
if self.threshold is not None:
if self._v_total > self.threshold:
# commit a subtransaction
get_transaction().commit(1)
# kick the chache
self._p_jar.cacheFullSweep(1)
self._v_total = 0
def uncatalog_object(self, uid):
""" wrapper around catalog """
......
......@@ -29,7 +29,8 @@ Add Index: <input name="name"> <br>
of Index Type: <select name="type">
<option value="TextIndex">TextIndex</option>
<option value="FieldIndex">FieldIndex</options>
<option value="FieldIndex">FieldIndex</option>
<option value="KeywordIndex">KeywordIndex</option>
</select>
<input name="manage_addIndex:method" type=submit value=" Add ">
</form>
......
<HTML>
<HEAD>
<TITLE>View Catalog Records</TITLE>
</HEAD>
<BODY BGCOLOR="#FFFFFF" LINK="#000099" VLINK="#555555">
<!--#var manage_tabs-->
<p>The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number is low,
the Catalog will take much longer to index but consume much less
memory. If this number is higher, the Catalog will index quickly but
consume much more memory.</p>
<form action="manage_edit" method=POST>
Subtransaction threshold: <input name=threshold value="<!--#var
threshold html_quote-->"><br>
<input type=submit value=" Change ">
</form>
<ul>
<!--#in index_objects-->
<li>
<!--#var "_.len(_['sequence-item'])"--> object are indexed in <b><!--#var "_['sequence-item'].id"--></b></li>
<!--#/in-->
</ul>
</BODY></HTML>
<HEAD>
<TITLE>View Catalog Records</TITLE>
</HEAD>
<BODY BGCOLOR="#FFFFFF" LINK="#000099" VLINK="#555555">
<!--#var manage_tabs-->
<p> Subtransactions allow Zope to commit small parts of a
transaction over a period of time instead of all at once. For
ZCatalog, this means using subtransactions can signficantly
reduce the memory requirements needed to index huge amounts of
text all at once.</p>
<p> If enabled, subtransactions will reduce the memory
requirements of ZCatalog, but <em>at the expense of speed</em>.
If you choose to enable subtransactions, you can adjust how often
ZCatalog commits a subtransactions by adjusting the
<b>threshold</b> below.</p>
<p> If you are using ZCatalog and ZSQL Methods in the same
transaction, you <b>must</b> disable subtransactions, they are not
compatible with ZSQL Methods.</p>
<h3>Subtransactions are <font color="red">
<dtml-if threshold>
<b>Enabled</b>
<dtml-else>
<b>Disabled</b>
</dtml-if></h3><br></font>
<form action="." method=POST>
<dtml-if threshold>
<input type=submit name="manage_subbingToggle:method" value="Disable">
<dtml-else>
<input type=submit name="manage_subbingToggle:method" value="Enable">
</dtml-if>
</form>
<form action="manage_edit" method=POST>
<dtml-if threshold>
<p>The Subtransaction threshold is the number of words the catalog
will index before it commits a subtransaction. If this number is low,
the Catalog will take much to index but consume much less memory. If
this number is higher, the Catalog will index quickly but consume much
more memory.</p>
Subtransaction threshold: <input name="threshold:int" value="<!--#var
threshold html_quote-->"><br> <input type=submit value=" Change ">
</dtml-if>
</form>
<hr width=75%>
<h3>Index Status</h3>
<ul>
<!--#in index_objects-->
<li>
<!--#var "_.len(_['sequence-item'])"-->
object are indexed in <b><!--#var "_['sequence-item'].id"--></b>
</li>
<!--#/in-->
</ul>
</BODY>
</HTML>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment