Commit e6b5d0c3 authored by Jim Fulton's avatar Jim Fulton

Merged changes from Catalog-BTrees-Integration branch.

parent 22eec3b7
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE. DAMAGE.
$Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $ $Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $
If you have questions regarding this software, If you have questions regarding this software,
contact: contact:
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
*/ */
static char ThreadLock_module_documentation[] = static char ThreadLock_module_documentation[] =
"" ""
"\n$Id: ThreadLock.c,v 1.7 1999/02/19 16:10:05 jim Exp $" "\n$Id: ThreadLock.c,v 1.8 2001/03/15 13:16:21 jim Exp $"
; ;
#include "Python.h" #include "Python.h"
...@@ -93,8 +93,9 @@ typedef struct { ...@@ -93,8 +93,9 @@ typedef struct {
staticforward PyTypeObject ThreadLockType; staticforward PyTypeObject ThreadLockType;
static int static int
cacquire(ThreadLockObject *self) cacquire(ThreadLockObject *self, int wait)
{ {
int acquired = 1;
#ifdef WITH_THREAD #ifdef WITH_THREAD
long id = get_thread_ident(); long id = get_thread_ident();
#else #else
...@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self) ...@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self)
{ {
#ifdef WITH_THREAD #ifdef WITH_THREAD
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
acquire_lock(self->lock, 1); acquired = acquire_lock(self->lock, wait ? WAIT_LOCK : NOWAIT_LOCK);
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
#endif #endif
self->count=0; if (acquired)
self->id=id; {
self->count=0;
self->id=id;
}
} }
return 0; return acquired;
} }
static PyObject * static PyObject *
acquire(ThreadLockObject *self, PyObject *args) acquire(ThreadLockObject *self, PyObject *args)
{ {
if(cacquire(self) < 0) return NULL; int wait = -1, acquired;
if (! PyArg_ParseTuple(args, "|i", &wait)) return NULL;
acquired=cacquire(self, wait);
if(acquired < 0) return NULL;
if (wait >= 0) return PyInt_FromLong(acquired);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
...@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self) ...@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self)
#else #else
long id = 1; long id = 1;
#endif #endif
if(self->count >= 0 && self->id==id) if(self->count >= 0 && self->id==id)
{ {
/* Somebody has locked me. It is either the current thread or /* Somebody has locked me. It is either the current thread or
...@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self) ...@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self)
static PyObject * static PyObject *
release(ThreadLockObject *self, PyObject *args) release(ThreadLockObject *self, PyObject *args)
{ {
if (! PyArg_ParseTuple(args, "")) return NULL;
if(crelease(self) < 0) return NULL; if(crelease(self) < 0) return NULL;
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
...@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args) ...@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args)
PyObject *f, *a=0, *k=0; PyObject *f, *a=0, *k=0;
UNLESS(PyArg_ParseTuple(args,"OO|O",&f, &a, &k)) return NULL; UNLESS(PyArg_ParseTuple(args,"OO|O",&f, &a, &k)) return NULL;
if(cacquire(self) < 0) return NULL; if(cacquire(self, -1) < 0) return NULL;
f=PyEval_CallObjectWithKeywords(f,a,k); f=PyEval_CallObjectWithKeywords(f,a,k);
if(crelease(self) < 0) if(crelease(self) < 0)
{ {
...@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = { ...@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = {
"Acquire the lock, call the function, and then release the lock.\n" "Acquire the lock, call the function, and then release the lock.\n"
}, },
{"acquire", (PyCFunction)acquire, 1, {"acquire", (PyCFunction)acquire, 1,
"acquire() -- Acquire a lock, taking the thread ID into account" "acquire([wait]) -- Acquire a lock, taking the thread ID into account"
}, },
{"release", (PyCFunction)release, 1, {"release", (PyCFunction)release, 1,
"release() -- Release a lock, taking the thread ID into account" "release() -- Release a lock, taking the thread ID into account"
...@@ -296,7 +306,7 @@ void ...@@ -296,7 +306,7 @@ void
initThreadLock() initThreadLock()
{ {
PyObject *m, *d; PyObject *m, *d;
char *rev="$Revision: 1.7 $"; char *rev="$Revision: 1.8 $";
m = Py_InitModule4("ThreadLock", Module_methods, m = Py_InitModule4("ThreadLock", Module_methods,
ThreadLock_module_documentation, ThreadLock_module_documentation,
......
This diff is collapsed.
...@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit): ...@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit):
def getObject(self, REQUEST=None): def getObject(self, REQUEST=None):
"""Try to return the object for this record""" """Try to return the object for this record"""
try: try:
obj = self.aq_parent.restrictedTraverse(self.getPath()) obj = self.aq_parent.unrestrictedTraverse(self.getPath())
if not obj: if not obj:
if REQUEST is None: if REQUEST is None:
REQUEST = self.REQUEST REQUEST = self.REQUEST
......
...@@ -82,8 +82,8 @@ ...@@ -82,8 +82,8 @@
# attributions are listed in the accompanying credits file. # attributions are listed in the accompanying credits file.
# #
############################################################################## ##############################################################################
__doc__='''$Id: Lazy.py,v 1.3 2001/01/15 16:29:23 petrilli Exp $''' __doc__='''$Id: Lazy.py,v 1.4 2001/03/15 13:16:23 jim Exp $'''
__version__='$Revision: 1.3 $'[11:-2] __version__='$Revision: 1.4 $'[11:-2]
class Lazy: class Lazy:
...@@ -148,11 +148,12 @@ class LazyCat(Lazy): ...@@ -148,11 +148,12 @@ class LazyCat(Lazy):
# Lazy concatenation of one or more sequences. Should be handy # Lazy concatenation of one or more sequences. Should be handy
# for accessing small parts of big searches. # for accessing small parts of big searches.
def __init__(self, sequences): def __init__(self, sequences, length=None):
self._seq=sequences self._seq=sequences
self._data=[] self._data=[]
self._sindex=0 self._sindex=0
self._eindex=-1 self._eindex=-1
if length is not None: self._len=length
def __getitem__(self,index): def __getitem__(self,index):
...@@ -194,11 +195,12 @@ class LazyMap(Lazy): ...@@ -194,11 +195,12 @@ class LazyMap(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,func,seq): def __init__(self, func, seq, length=None):
self._seq=seq self._seq=seq
self._len=len(seq)
self._data=[] self._data=[]
self._func=func self._func=func
if length is not None: self._len=length
else: self._len = len(seq)
def __getitem__(self,index): def __getitem__(self,index):
...@@ -229,7 +231,7 @@ class LazyFilter(Lazy): ...@@ -229,7 +231,7 @@ class LazyFilter(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,test,seq): def __init__(self, test, seq):
self._seq=seq self._seq=seq
self._data=[] self._data=[]
self._eindex=-1 self._eindex=-1
...@@ -270,7 +272,7 @@ class LazyMop(Lazy): ...@@ -270,7 +272,7 @@ class LazyMop(Lazy):
# Act like a sequence, but get data from a filtering process. # Act like a sequence, but get data from a filtering process.
# Don't access data until necessary # Don't access data until necessary
def __init__(self,test,seq): def __init__(self, test, seq):
self._seq=seq self._seq=seq
self._data=[] self._data=[]
self._eindex=-1 self._eindex=-1
......
...@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit, ...@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit,
AccessControl.Role.RoleManager, AccessControl.Role.RoleManager,
): ):
""" """
A Vocabulary is a user managable relization of a Lexicon object. A Vocabulary is a user-managable realization of a Lexicon object.
""" """
...@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit, ...@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit,
""" create the lexicon to manage... """ """ create the lexicon to manage... """
self.id = id self.id = id
self.title = title self.title = title
self.globbing = globbing self.globbing = not not globbing
if globbing: if globbing:
self.lexicon = GlobbingLexicon.GlobbingLexicon() self.lexicon = GlobbingLexicon.GlobbingLexicon()
......
...@@ -97,14 +97,15 @@ from Persistence import Persistent ...@@ -97,14 +97,15 @@ from Persistence import Persistent
from DocumentTemplate.DT_Util import InstanceDict, TemplateDict from DocumentTemplate.DT_Util import InstanceDict, TemplateDict
from DocumentTemplate.DT_Util import Eval, expr_globals from DocumentTemplate.DT_Util import Eval, expr_globals
from AccessControl.Permission import name_trans from AccessControl.Permission import name_trans
from Catalog import Catalog, orify from Catalog import Catalog, orify, CatalogError
from SearchIndex import UnIndex, UnTextIndex from SearchIndex import UnIndex, UnTextIndex
from Vocabulary import Vocabulary from Vocabulary import Vocabulary
import IOBTree
from Shared.DC.ZRDB.TM import TM from Shared.DC.ZRDB.TM import TM
from AccessControl import getSecurityManager from AccessControl import getSecurityManager
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
StringType=type('')
manage_addZCatalogForm=DTMLFile('dtml/addZCatalog',globals()) manage_addZCatalogForm=DTMLFile('dtml/addZCatalog',globals())
def manage_addZCatalog(self, id, title, def manage_addZCatalog(self, id, title,
...@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit):
def __init__(self, id, title='', vocab_id=None, container=None): def __init__(self, id, title='', vocab_id=None, container=None):
self.id=id self.id=id
self.title=title self.title=title
self.vocab_id = vocab_id
self.threshold = 10000 self.threshold = 10000
self._v_total = 0 self._v_total = 0
...@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit):
if vocab_id is None: if vocab_id is None:
v = Vocabulary('Vocabulary', 'Vocabulary', globbing=1) v = Vocabulary('Vocabulary', 'Vocabulary', globbing=1)
self._setObject('Vocabulary', v) self._setObject('Vocabulary', v)
v = 'Vocabulary' self.vocab_id = 'Vocabulary'
else: else:
v = vocab_id self.vocab_id = vocab_id
self._catalog = Catalog(vocabulary=v) self._catalog = Catalog(vocabulary=self.vocab_id)
self._catalog.addColumn('id') self._catalog.addColumn('id')
self._catalog.addIndex('id', 'FieldIndex') self._catalog.addIndex('id', 'FieldIndex')
...@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog.addColumn('summary') self._catalog.addColumn('summary')
self._catalog.addIndex('PrincipiaSearchSource', 'TextIndex') self._catalog.addIndex('PrincipiaSearchSource', 'TextIndex')
def __len__(self): return len(self._catalog)
def getVocabulary(self): def getVocabulary(self):
""" more ack! """ """ more ack! """
...@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit):
RESPONSE.redirect(URL1 + '/manage_catalogIndexes?manage_tabs_message=Index%20Deleted') RESPONSE.redirect(URL1 + '/manage_catalogIndexes?manage_tabs_message=Index%20Deleted')
def catalog_object(self, obj, uid): def catalog_object(self, obj, uid=None):
""" wrapper around catalog """ """ wrapper around catalog """
if uid is None:
try: uid = obj.getPhysicalPath
except AttributeError:
raise CatalogError(
"A cataloged object must support the 'getPhysicalPath' "
"method if no unique id is provided when cataloging"
)
else: uid=string.join(uid(), '/')
elif type(uid) is not StringType:
raise CatalogError('The object unique id must be a string.')
self._catalog.catalogObject(obj, uid, None) self._catalog.catalogObject(obj, uid, None)
# None passed in to catalogObject as third argument indicates # None passed in to catalogObject as third argument indicates
# that we shouldn't try to commit subtransactions within any # that we shouldn't try to commit subtransactions within any
...@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit):
# exceeded within the boundaries of the current transaction. # exceeded within the boundaries of the current transaction.
if self._v_total > self.threshold: if self._v_total > self.threshold:
get_transaction().commit(1) get_transaction().commit(1)
self._p_jar.cacheFullSweep(1) self._p_jar.cacheFullSweep(3)
self._v_total = 0 self._v_total = 0
def uncatalog_object(self, uid): def uncatalog_object(self, uid):
...@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if hasattr(self, '_product_meta_types'): pmt=self._product_meta_types if hasattr(self, '_product_meta_types'): pmt=self._product_meta_types
elif hasattr(self, 'aq_acquire'): elif hasattr(self, 'aq_acquire'):
try: pmt=self.aq_acquire('_product_meta_types') try: pmt=self.aq_acquire('_product_meta_types')
except: pass except AttributeError: pass
return self.meta_types+Products.meta_types+pmt return self.meta_types+Products.meta_types+pmt
def valid_roles(self): def valid_roles(self):
...@@ -658,8 +671,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -658,8 +671,8 @@ class ZCatalog(Folder, Persistent, Implicit):
script=REQUEST.script script=REQUEST.script
if string.find(path, script) != 0: if string.find(path, script) != 0:
path='%s/%s' % (script, path) path='%s/%s' % (script, path)
try: return REQUEST.resolve_url(path) try: return REQUEST.resolve_url(path)
except: return None except: pass
def resolve_path(self, path): def resolve_path(self, path):
""" """
...@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit):
style url. If no object is found, None is returned. style url. If no object is found, None is returned.
No exceptions are raised. No exceptions are raised.
""" """
try: try: return self.unrestrictedTraverse(path)
return self.unrestrictedTraverse(path) except: pass
except:
return None
def manage_normalize_paths(self, REQUEST): def manage_normalize_paths(self, REQUEST):
"""Ensure that all catalog paths are full physical paths """Ensure that all catalog paths are full physical paths
...@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit):
'%s unchanged.' % (len(fixed), len(removed), unchanged), '%s unchanged.' % (len(fixed), len(removed), unchanged),
action='./manage_main') action='./manage_main')
def manage_convertBTrees(self, threshold=200):
"""Convert the catalog's data structures to use BTrees package"""
tt=time.time()
ct=time.clock()
self._catalog._convertBTrees(threshold
*1 #make sure ints an int)
)
tt=time.time()-tt
ct=time.clock()-ct
return 'Finished conversion in %s seconds (%s cpu)' % (tt, ct)
Globals.default__class_init__(ZCatalog) Globals.default__class_init__(ZCatalog)
......
...@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query. ...@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query.
<div class="list-item">Index Name</div></td> <div class="list-item">Index Name</div></td>
<td width="20%" align="left" valign="top"> <td width="20%" align="left" valign="top">
<div class="list-item">Index Type</div></td> <div class="list-item">Index Type</div></td>
<td width="15%" align="left" valign="top">
<div class="list-item">Size</div></td>
</tr> </tr>
</dtml-if> </dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal"> <dtml-if name="sequence-odd"><tr class="row-normal">
...@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query. ...@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query.
</td> </td>
<td width="60%" align="left" valign="top"> <td width="60%" align="left" valign="top">
<div class="list-item"> <div class="list-item">
<a href="" target="_index_info_&dtml-id;">&dtml-id;</a></div></td> &dtml-id;
</div>
</td>
<td width="20%" align="left" valign="top"> <td width="20%" align="left" valign="top">
<div class="list-item">&dtml-meta_type;</div></td> <div class="list-item">&dtml-meta_type;</div></td>
<td width="15%" align="left" valign="top"><div class="list-item"
><dtml-var expr="_.len(_['sequence-item'])" thousands_commas>
</div> </div>
</td> </td>
</tr> </tr>
......
# Making tests a package makes debugging easier.
import rfc822,mailbox,cPickle,string
class Keywords:
""" stupid class to read a list of rfc822 messages and extract
all words from the subject header. We use this class for testing
purposes only
"""
def __init__(self):
self.kw = []
def build(self,mbox,limit):
mb = mailbox.UnixMailbox(open(mbox))
msg = mb.next()
while msg and len(self.kw) < limit:
sub = string.split( msg.dict.get("subject") , ' ')
for f in sub:
ok = 1
for c in f:
if not c in string.letters: ok=0
if ok==1 and not f in self.kw : self.kw.append(f)
msg = mb.next()
P = cPickle.Pickler(open('data/keywords','w'))
P.dump(self.kw)
def reload(self):
P = cPickle.Unpickler(open('data/keywords','r'))
self.kw = P.load()
def keywords(self):
return self.kw
This diff is collapsed.
import os, sys
sys.path.insert(0, '.')
try:
import Testing
os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', '.')
except ImportError:
sys.path[0]='../../..'
import Testing
os.environ['SOFTWARE_HOME']='../../..'
os.environ['INSTANCE_HOME']=os.environ.get(
'INSTANCE_HOME',
os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
)
os.environ['STUPID_LOG_FILE']=os.path.join(os.environ['INSTANCE_HOME'],'var',
'debug.log')
here = os.getcwd()
import Zope
import mailbox, time, httplib
from string import strip, find, split, lower, atoi, join
from urllib import quote
from Products.ZCatalog import ZCatalog
from unittest import TestCase, TestSuite, JUnitTextTestRunner,\
VerboseTextTestRunner, makeSuite
from Testing.makerequest import makerequest
TextTestRunner = VerboseTextTestRunner
class TestTimeIndex(TestCase):
def setUp(self):
self.app = makerequest(Zope.app())
try: self.app._delObject('catalogtest')
except AttributeError: pass
self.app.manage_addFolder('catalogtest')
zcatalog = ZCatalog.ZCatalog('catalog', 'a catalog')
self.app.catalogtest._setObject('catalog', zcatalog)
c = self.app.catalogtest.catalog
for x in ('title', 'to', 'from', 'date', 'raw'):
try: c.manage_delIndexes([x])
except: pass
c.manage_addIndex('title', 'TextIndex')
c.manage_addIndex('to', 'TextIndex')
c.manage_addIndex('from', 'TextIndex')
c.manage_addIndex('date', 'FieldIndex')
c.manage_addIndex('raw', 'TextIndex')
def tearDown(self):
try: self.app._delObject('catalogtest')
except AttributeError: pass
try:
self.app._p_jar._db.pack()
self.app._p_jar.close()
except AttributeError: pass
self.app = None
del self.app
def checkTimeBulkIndex(self):
print
c = self.app.catalogtest.catalog
t = time.time()
loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), 500)
get_transaction().commit()
loadtime = time.time() - t
out("loading data took %s seconds.. " % loadtime)
t = time.time()
req = self.app.REQUEST
parents = [self.app.catalogtest.catalog,
self.app.catalogtest, self.app]
req['PARENTS'] = parents
rsp = self.app.REQUEST.RESPONSE
url1 = ''
c.manage_catalogFoundItems(req, rsp, url1, url1,
obj_metatypes=['DTML Document'])
indextime = time.time() - t
out("bulk index took %s seconds.. " % indextime)
out("total time for load and index was %s seconds.. "
% (loadtime + indextime))
def checkTimeIncrementalIndexAndQuery(self):
print
c = self.app.catalogtest.catalog
t = time.time()
max = 500
m = loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), max, c)
get_transaction().commit()
total = time.time() - t
out("total time for load and index was %s seconds.. " % total)
t = time.time()
rs = c() # empty query should return all
assert len(rs) == max, len(rs)
dates = m['date']
froms = m['from']
tos =m['to']
titles = m['title']
assert len(c({'date':'foobarfoo'})) == 0 # should return no results
for x in dates:
assert len(c({'date':x})) == 1 # each date should be fieldindexed
assert len(c({'from':'a'})) == 0 # should be caught by splitter
assert len(c({'raw':'chris'})) != 0
assert len(c({'raw':'gghdjkasjdsda'})) == 0
assert c({'PrincipiaSearchSource':'the*'})
def checkTimeSubcommit(self):
print
for x in (None,100,500,1000,10000):
out("testing subcommit at theshhold of %s" % x)
if x is not None:
self.setUp()
c = self.app.catalogtest.catalog
c.threshold = x
get_transaction().commit()
t = time.time()
loadmail(self.app.catalogtest, 'zopemail',
os.path.join(here, 'zope.mbox'), 500, c)
get_transaction().commit()
total = time.time() - t
out("total time with subcommit thresh %s was %s seconds.. "
% (x,total))
self.tearDown()
# utility
def loadmail(folder, name, mbox, max=None, catalog=None):
"""
creates a folder inside object 'folder' named 'name', opens
filename 'mbox' and adds 'max' mail messages as DTML documents to
the ZODB inside the folder named 'name'. If 'catalog' (which
should be a ZCatalog object) is passed in, call catalog_object on it
with the document while we're iterating. If 'max' is not None,
only do 'max' messages, else do all messages in the mbox archive.
"""
m = {'date':[],'from':[],'to':[],'title':[]}
folder.manage_addFolder(name)
folder=getattr(folder, name)
mb=mailbox.UnixMailbox(open(mbox))
i=0
every=100
message=mb.next()
while message:
part = `i/every * 100`
try:
dest = getattr(folder, part)
except AttributeError:
folder.manage_addFolder(part)
dest = getattr(folder, part)
dest.manage_addDTMLDocument(str(i), file=message.fp.read())
doc=getattr(dest, str(i))
i=i+1
for h in message.headers:
h=strip(h)
l=find(h,':')
if l <= 0: continue
name=lower(h[:l])
if name=='subject': name='title'
h=strip(h[l+1:])
type='string'
if 0 and name=='date': type='date'
elif 0:
try: atoi(h)
except: pass
else: type=int
if name=='title':
doc.manage_changeProperties(title=h)
m[name].append(h)
elif name in ('to', 'from', 'date'):
try: doc.manage_addProperty(name, h, type)
except: pass
m[name].append(h)
if catalog:
path = join(doc.getPhysicalPath(), '/')
catalog.catalog_object(doc, path)
if max is not None:
if i >= max: break
message=mb.next()
return m
def out(s):
print " %s" % s
def test_suite():
s1 = makeSuite(TestTimeIndex, 'check')
testsuite = TestSuite((s1,))
return testsuite
def main():
mb = os.path.join(here, 'zope.mbox')
if not os.path.isfile(mb):
print "do you want to get the zope.mbox file from lists.zope.org?"
print "it's required for testing (98MB, ~ 30mins on fast conn)"
print "it's also available at korak:/home/chrism/zope.mbox"
print "-- type 'Y' or 'N'"
a = raw_input()
if lower(a[:1]) == 'y':
server = 'lists.zope.org:80'
method = '/pipermail/zope.mbox/zope.mbox'
h = httplib.HTTP(server)
h.putrequest('GET', method)
h.putheader('User-Agent', 'silly')
h.putheader('Accept', 'text/html')
h.putheader('Accept', 'text/plain')
h.putheader('Host', server)
h.endheaders()
errcode, errmsg, headers = h.getreply()
if errcode != 200:
f = h.getfile()
data = f.read()
print data
raise "Error reading from host %s" % server
f = h.getfile()
out=open(mb,'w')
print "this is going to take a while..."
print "downloading mbox from %s" % server
while 1:
l = f.readline()
if not l: break
out.write(l)
alltests=test_suite()
runner = TextTestRunner()
runner.run(alltests)
def debug():
test_suite().debug()
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
...@@ -85,18 +85,15 @@ ...@@ -85,18 +85,15 @@
from Lexicon import Lexicon from Lexicon import Lexicon
from Splitter import Splitter from Splitter import Splitter
from intSet import intSet
from UnTextIndex import Or from UnTextIndex import Or
import re, string import re, string
import OIBTree, BTree, IOBTree, IIBTree
# Short cuts for common data containers
OIBTree = OIBTree.BTree # Object -> Integer
OOBTree = BTree.BTree # Object -> Object
IOBTree = IOBTree.BTree # Integer -> Object
IIBucket = IIBTree.Bucket # Integer -> Integer
from BTrees.IIBTree import IISet, union, IITreeSet
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
from randid import randid
class GlobbingLexicon(Lexicon): class GlobbingLexicon(Lexicon):
"""Lexicon which supports basic globbing function ('*' and '?'). """Lexicon which supports basic globbing function ('*' and '?').
...@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon): ...@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon):
def __init__(self): def __init__(self):
self.counter = 0 # word id counter XXX self.clear()
def clear(self):
self._lexicon = OIBTree() self._lexicon = OIBTree()
self._inverseLex = IOBTree() self._inverseLex = IOBTree()
self._digrams = OOBTree() self._digrams = OOBTree()
def _convertBTrees(self, threshold=200):
Lexicon._convertBTrees(self, threshold)
if type(self._digrams) is OOBTree: return
from BTrees.convert import convert
_digrams=self._digrams
self._digrams=OOBTree()
self._digrams._p_jar=self._p_jar
convert(_digrams, self._digrams, threshold, IITreeSet)
def createDigrams(self, word): def createDigrams(self, word):
"""Returns a list with the set of digrams in the word.""" """Returns a list with the set of digrams in the word."""
...@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon): ...@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon):
digrams.append(self.eow + word[0]) # Mark the beginning digrams.append(self.eow + word[0]) # Mark the beginning
for i in range(len(word)): for i in range(1,len(word)):
digrams.append(word[i:i+2]) digrams.append(word[i-1:i+1])
digrams[-1] = digrams[-1] + self.eow # Mark the end digrams[-1] = digrams[-1] + self.eow # Mark the end
...@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon): ...@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon):
set = getWordId # Kludge for old code set = getWordId # Kludge for old code
def getWord(self, wid):
return self._inverseLex.get(wid, None)
def assignWordId(self, word): def assignWordId(self, word):
"""Assigns a new word id to the provided word, and return it.""" """Assigns a new word id to the provided word, and return it."""
...@@ -165,20 +177,35 @@ class GlobbingLexicon(Lexicon): ...@@ -165,20 +177,35 @@ class GlobbingLexicon(Lexicon):
# return it. # return it.
if self._lexicon.has_key(word): if self._lexicon.has_key(word):
return self._lexicon[word] return self._lexicon[word]
# First we go ahead and put the forward and reverse maps in.
self._lexicon[word] = self.counter # Get word id. BBB Backward compat pain.
self._inverseLex[self.counter] = word inverse=self._inverseLex
try: insert=inverse.insert
except AttributeError:
# we have an "old" BTree object
if inverse:
wid=inverse.keys()[-1]+1
else:
self._inverseLex=IOBTree()
wid=1
inverse[wid] = word
else:
# we have a "new" IOBTree object
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[word] = wid
# Now take all the digrams and insert them into the digram map. # Now take all the digrams and insert them into the digram map.
for digram in self.createDigrams(word): for digram in self.createDigrams(word):
set = self._digrams.get(digram) set = self._digrams.get(digram, None)
if set is None: if set is None:
self._digrams[digram] = set = intSet() self._digrams[digram] = set = IISet()
set.insert(self.counter) set.insert(wid)
self.counter = self.counter + 1 return wid
return self.counter - 1 # Adjust for the previous increment
def get(self, pattern): def get(self, pattern):
...@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon): ...@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon):
return (result, ) return (result, )
## now get all of the intsets that contain the result digrams ## now get all of the intsets that contain the result digrams
result = IIBucket() result = None
for digram in digrams: for digram in digrams:
if self._digrams.has_key(digram): result=union(result, self._digrams.get(digram, None))
matchSet = self._digrams[digram]
if matchSet is not None:
result = IIBucket().union(matchSet)
if len(result) == 0: if not result:
return () return ()
else: else:
## now we have narrowed the list of possible candidates ## now we have narrowed the list of possible candidates
...@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon): ...@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon):
expr = re.compile(self.createRegex(pattern)) expr = re.compile(self.createRegex(pattern))
words = [] words = []
hits = [] hits = IISet()
for x in result.keys(): for x in result:
if expr.match(self._inverseLex[x]): if expr.match(self._inverseLex[x]):
hits.append(x) hits.insert(x)
return hits return hits
...@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon): ...@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon):
def query_hook(self, q): def query_hook(self, q):
"""expand wildcards""" """expand wildcards"""
words = [] words = []
wids = []
for w in q: for w in q:
if ( (self.multi_wc in w) or if ( (self.multi_wc in w) or
(self.single_wc in w) ): (self.single_wc in w) ):
...@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon): ...@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon):
r'()&|!@#$%^{}\<>') r'()&|!@#$%^{}\<>')
return "%s$" % result return "%s$" % result
...@@ -84,11 +84,11 @@ ...@@ -84,11 +84,11 @@
############################################################################## ##############################################################################
"""Simple column indices""" """Simple column indices"""
__version__='$Revision: 1.27 $'[11:-2] __version__='$Revision: 1.28 $'[11:-2]
from Persistence import Persistent from Persistence import Persistent
from BTree import BTree from BTrees.OOBTree import OOBTree
from intSet import intSet from BTrees.IIBTree import IITreeSet
import operator import operator
from Missing import MV from Missing import MV
import string import string
...@@ -135,7 +135,7 @@ class Index(Persistent): ...@@ -135,7 +135,7 @@ class Index(Persistent):
self.id = id self.id = id
self.ignore_ex=ignore_ex self.ignore_ex=ignore_ex
self.call_methods=call_methods self.call_methods=call_methods
self._index = BTree() self._index = OOBTree()
self._reindex() self._reindex()
else: else:
...@@ -176,7 +176,7 @@ class Index(Persistent): ...@@ -176,7 +176,7 @@ class Index(Persistent):
def clear(self): def clear(self):
self._index = BTree() self._index = OOBTree()
def _reindex(self, start=0): def _reindex(self, start=0):
...@@ -200,7 +200,7 @@ class Index(Persistent): ...@@ -200,7 +200,7 @@ class Index(Persistent):
if k is None or k == MV: continue if k is None or k == MV: continue
set=get(k) set=get(k)
if set is None: index[k] = set = intSet() if set is None: index[k] = set = IITreeSet()
set.insert(i) set.insert(i)
...@@ -225,7 +225,7 @@ class Index(Persistent): ...@@ -225,7 +225,7 @@ class Index(Persistent):
return return
set = index.get(k) set = index.get(k)
if set is None: index[k] = set = intSet() if set is None: index[k] = set = IITreeSet()
set.insert(i) set.insert(i)
...@@ -301,8 +301,7 @@ class Index(Persistent): ...@@ -301,8 +301,7 @@ class Index(Persistent):
if hi: setlist = index.items(lo,hi) if hi: setlist = index.items(lo,hi)
else: setlist = index.items(lo) else: setlist = index.items(lo)
for k,set in setlist: for k,set in setlist:
if r is None: r = set w, r = weightedUnion(r, set)
else: r = r.union(set)
except KeyError: pass except KeyError: pass
else: #not a range else: #not a range
get = index.get get = index.get
...@@ -310,11 +309,10 @@ class Index(Persistent): ...@@ -310,11 +309,10 @@ class Index(Persistent):
if key: anyTrue = 1 if key: anyTrue = 1
set=get(key) set=get(key)
if set is not None: if set is not None:
if r is None: r = set w, r = weightedUnion(r, set)
else: r = r.union(set)
if r is None: if r is None:
if anyTrue: r=intSet() if anyTrue: r=IISet()
else: return None else: return None
return r, (id,) return r, (id,)
......
...@@ -92,11 +92,12 @@ mapping. ...@@ -92,11 +92,12 @@ mapping.
from Splitter import Splitter from Splitter import Splitter
from Persistence import Persistent from Persistence import Persistent
from Acquisition import Implicit from Acquisition import Implicit
import OIBTree, BTree
OIBTree=OIBTree.BTree
OOBTree=BTree.BTree
import re
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet, IITreeSet
from randid import randid
class Lexicon(Persistent, Implicit): class Lexicon(Persistent, Implicit):
"""Maps words to word ids and then some """Maps words to word ids and then some
...@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit): ...@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit):
stop_syn={} stop_syn={}
def __init__(self, stop_syn=None): def __init__(self, stop_syn=None):
self._lexicon = OIBTree() self.clear()
self.counter = 0
if stop_syn is None: if stop_syn is None:
self.stop_syn = {} self.stop_syn = {}
else: else:
self.stop_syn = stop_syn self.stop_syn = stop_syn
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
return
from BTrees.convert import convert
lexicon=self._lexicon
self._lexicon=OIBTree()
self._lexicon._p_jar=self._p_jar
convert(lexicon, self._lexicon, threshold)
try:
inverseLex=self._inverseLex
self._inverseLex=IOBTree()
except AttributeError:
# older lexicons didn't have an inverse lexicon
self._inverseLex=IOBTree()
inverseLex=self._inverseLex
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
def set_stop_syn(self, stop_syn): def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is: """ pass in a mapping of stopwords and synonyms. Format is:
...@@ -135,31 +161,46 @@ class Lexicon(Persistent, Implicit): ...@@ -135,31 +161,46 @@ class Lexicon(Persistent, Implicit):
def getWordId(self, word): def getWordId(self, word):
""" return the word id of 'word' """ """ return the word id of 'word' """
if self._lexicon.has_key(word): wid=self._lexicon.get(word, None)
return self._lexicon[word] if wid is None:
else: wid=self.assignWordId(word)
return self.assignWordId(word) return wid
set = getWordId set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
def assignWordId(self, word): def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it.""" """Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there # First make sure it's not already in there
if self._lexicon.has_key(word): if self._lexicon.has_key(word):
return self._lexicon[word] return self._lexicon[word]
if not hasattr(self, 'counter'):
self.counter = 0 try: inverse=self._inverseLex
self._lexicon[intern(word)] = self.counter except AttributeError:
self.counter = self.counter + 1 # woops, old lexicom wo wids
return self.counter - 1 inverse=self._inverseLex=IOBTree()
for word, wid in self._lexicon.items():
inverse[wid]=word
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[intern(word)] = wid
return wid
def get(self, key, default=None): def get(self, key, default=None):
"""Return the matched word against the key.""" """Return the matched word against the key."""
return [self._lexicon.get(key, default)] r=IISet()
wid=self._lexicon.get(key, default)
if wid is not None: r.insert(wid)
return r
def __getitem__(self, key): def __getitem__(self, key):
return self.get(key) return self.get(key)
...@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit): ...@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit):
return Splitter(astring, words) return Splitter(astring, words)
def grep(self, query):
"""
regular expression search through the lexicon
he he.
Do not use unless you know what your doing!!!
"""
expr = re.compile(query)
hits = []
for x in self._lexicon.keys():
if expr.search(x):
hits.append(x)
return hits
def query_hook(self, q): def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """ """ we don't want to modify the query cuz we're dumb """
return q return q
......
...@@ -83,18 +83,33 @@ ...@@ -83,18 +83,33 @@
# #
############################################################################## ##############################################################################
from BTrees.IIBTree import IIBucket
from BTrees.IIBTree import weightedIntersection, weightedUnion, difference
from BTrees.OOBTree import OOSet, union
class ResultList: class ResultList:
def __init__(self, d, words, index, TupleType=type(())): def __init__(self, d, words, index, TupleType=type(())):
self._index = index self._index = index
if type(words) is not OOSet: words=OOSet(words)
self._words = words self._words = words
if (type(d) is TupleType): self._dict = { d[0] : d[1] }
else: self._dict = d if (type(d) is TupleType):
d = IIBucket((d,))
def __len__(self): return len(self._dict) elif type(d) is not IIBucket:
d = IIBucket(d)
self._dict=d
self.__getitem__=d.__getitem__
try: self.__nonzero__=d.__nonzero__
except: pass
self.get=d.get
def __getitem__(self, key): return self._dict[key] def __nonzero__(self):
return not not self._dict
def bucket(self): return self._dict
def keys(self): return self._dict.keys() def keys(self): return self._dict.keys()
...@@ -103,42 +118,29 @@ class ResultList: ...@@ -103,42 +118,29 @@ class ResultList:
def items(self): return self._dict.items() def items(self): return self._dict.items()
def __and__(self, x): def __and__(self, x):
result = {} return self.__class__(
dict = self._dict weightedIntersection(self._dict, x._dict)[1],
xdict = x._dict union(self._words, x._words),
xhas = xdict.has_key self._index,
for id, score in dict.items(): )
if xhas(id): result[id] = xdict[id]+score
return self.__class__(result, self._words+x._words, self._index)
def and_not(self, x): def and_not(self, x):
result = {} return self.__class__(
dict = self._dict difference(self._dict, x._dict),
xdict = x._dict self._words,
xhas = xdict.has_key self._index,
for id, score in dict.items(): )
if not xhas(id): result[id] = score
return self.__class__(result, self._words, self._index)
def __or__(self, x): def __or__(self, x):
result = {} return self.__class__(
dict = self._dict weightedUnion(self._dict, x._dict)[1],
has = dict.has_key union(self._words, x._words),
xdict = x._dict self._index,
xhas = xdict.has_key )
for id, score in dict.items():
if xhas(id): result[id] = xdict[id]+score
else: result[id] = score
for id, score in xdict.items():
if not has(id): result[id] = score
return self.__class__(result, self._words+x._words, self._index) return self.__class__(result, self._words+x._words, self._index)
def near(self, x): def near(self, x):
result = {} result = IIBucket
dict = self._dict dict = self._dict
xdict = x._dict xdict = x._dict
xhas = xdict.has_key xhas = xdict.has_key
...@@ -160,5 +162,6 @@ class ResultList: ...@@ -160,5 +162,6 @@ class ResultList:
else: score = (score+xdict[id])/d else: score = (score+xdict[id])/d
result[id] = score result[id] = score
return self.__class__(result, self._words+x._words, self._index) return self.__class__(
result, union(self._words, x._words), self._index)
...@@ -202,13 +202,13 @@ Notes on a new text index design ...@@ -202,13 +202,13 @@ Notes on a new text index design
space. space.
""" """
__version__='$Revision: 1.25 $'[11:-2] __version__='$Revision: 1.26 $'[11:-2]
#XXX I strongly suspect that this is broken, but I'm not going to fix it. :(
from Globals import Persistent from Globals import Persistent
import BTree, IIBTree from BTrees.OOBTree import OOBTree
BTree=BTree.BTree from BTrees.IIBTree import IISet, IIBucket
IIBTree=IIBTree.Bucket
from intSet import intSet
import operator import operator
from Splitter import Splitter from Splitter import Splitter
from string import strip from string import strip
...@@ -250,7 +250,7 @@ class TextIndex(Persistent): ...@@ -250,7 +250,7 @@ class TextIndex(Persistent):
self.id=id self.id=id
self.ignore_ex=ignore_ex self.ignore_ex=ignore_ex
self.call_methods=call_methods self.call_methods=call_methods
self._index=BTree() self._index=OOBTree() #XXX Is this really an IOBTree?
self._syn=stop_word_dict self._syn=stop_word_dict
self._reindex() self._reindex()
else: else:
...@@ -261,7 +261,7 @@ class TextIndex(Persistent): ...@@ -261,7 +261,7 @@ class TextIndex(Persistent):
def clear(self): def clear(self):
self._index = BTree() self._index = OOBTree()
def positions(self, docid, words): def positions(self, docid, words):
...@@ -366,7 +366,7 @@ class TextIndex(Persistent): ...@@ -366,7 +366,7 @@ class TextIndex(Persistent):
index[word] = r index[word] = r
elif type(r) is dictType: elif type(r) is dictType:
if len(r) > 4: if len(r) > 4:
b = IIBTree() b = IIBucket()
for k, v in r.items(): b[k] = v for k, v in r.items(): b[k] = v
r = b r = b
r[id] = score r[id] = score
...@@ -440,7 +440,7 @@ class TextIndex(Persistent): ...@@ -440,7 +440,7 @@ class TextIndex(Persistent):
for key in keys: for key in keys:
key = strip(key) key = strip(key)
if not key: continue if not key: continue
rr = intSet() rr = IISet()
try: try:
for i,score in query(key,self).items(): for i,score in query(key,self).items():
if score: rr.insert(i) if score: rr.insert(i)
...@@ -451,5 +451,5 @@ class TextIndex(Persistent): ...@@ -451,5 +451,5 @@ class TextIndex(Persistent):
r = r.intersection(rr) r = r.intersection(rr)
if r is not None: return r, (id,) if r is not None: return r, (id,)
return intSet(), (id,) return IISet(), (id,)
This diff is collapsed.
...@@ -83,10 +83,10 @@ ...@@ -83,10 +83,10 @@
# #
############################################################################## ##############################################################################
from UnIndex import UnIndex, MV, intSet from UnIndex import UnIndex
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
from Missing import MV from types import StringType
from types import * from BTrees.OOBTree import OOSet, difference
class UnKeywordIndex(UnIndex): class UnKeywordIndex(UnIndex):
...@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex): ...@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex):
# self.id is the name of the index, which is also the name of the # self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable, # attribute we're interested in. If the attribute is callable,
# we'll do so. # we'll do so.
try: newKeywords = getattr(obj, self.id, None)
newKeywords = getattr(obj, self.id) if callable(newKeywords):
if callable(newKeywords): newKeywords = newKeywords()
newKeywords = newKeywords()
except AttributeError:
newKeywords = MV
if type(newKeywords) is StringType: if type(newKeywords) is StringType:
newKeywords = (newKeywords, ) newKeywords = (newKeywords, )
if newKeywords is None:
self.unindex_object(documentId)
return 0
# Now comes the fun part, we need to figure out what's changed # Now comes the fun part, we need to figure out what's changed
# if anything from the previous record. # if anything from the previous record.
oldKeywords = self._unindex.get(documentId, MV) oldKeywords = self._unindex.get(documentId, None)
if newKeywords is MV: if oldKeywords is None:
self.unindex_object(documentId)
return 0
elif oldKeywords is MV:
try: try:
for kw in newKeywords: for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId) self.insertForwardIndexEntry(kw, documentId)
except TypeError: except TypeError:
return 0 return 0
else: else:
# We need the old keywords to be a mapping so we can manipulate if type(oldKeywords) is not OOSet: oldKeywords=OOSet(oldKeywords)
# them more easily. newKeywords=OOSet(newKeywords)
tmp = {} self.unindex_objectKeywords(
try: documentId, difference(oldKeywords, newKeywords))
for kw in oldKeywords: for kw in difference(newKeywords, oldKeywords):
tmp[kw] = None self.insertForwardIndexEntry(kw, documentId)
oldKeywords = tmp
# Now we're going to go through the new keywords,
# and add those that aren't already indexed. If
# they are already indexed, just delete them from
# the list.
for kw in newKeywords:
if oldKeywords.has_key(kw):
del oldKeywords[kw]
else:
self.insertForwardIndexEntry(kw, documentId)
# Now whatever is left in oldKeywords are keywords
# that we no longer have, and need to be removed
# from the indexes.
for kw in oldKeywords.keys():
self.removeForwardIndexEntry(kw, documentId)
except TypeError:
return 0
self._unindex[documentId] = newKeywords[:] # Make a copy self._unindex[documentId] = list(newKeywords)
return 1 return 1
def unindex_object(self, documentId): def unindex_objectKeywords(self, documentId, keywords):
""" carefully unindex the object with integer id 'documentId'""" """ carefully unindex the object with integer id 'documentId'"""
keywords = self._unindex.get(documentId, MV) if keywords is not None:
if keywords is MV: for kw in keywords:
return None self.removeForwardIndexEntry(kw, documentId)
for kw in keywords:
self.removeForwardIndexEntry(kw, documentId)
del self._unindex[documentId] def unindex_object(self, documentId):
""" carefully unindex the object with integer id 'documentId'"""
keywords = self._unindex.get(documentId, None)
self.unindex_objectKeywords(documentId, keywords)
try:
del self._unindex[documentId]
except KeyError:
LOG('UnKeywordIndex', ERROR, 'Attempt to unindex nonexistent'
' document id %s' % documentId)
This diff is collapsed.
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
#############################################################################
import whrandom
def randid(randint=whrandom.randint, choice=whrandom.choice, signs=(-1,1)):
return choice(signs)*randint(1,2000000000)
del whrandom
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import sys
try: import ZODB
except:
import os
sys.path.insert(0, os.getcwd())
sys.path.insert(0, '../..')
import ZODB
import unittest
from SearchIndex.Splitter import Splitter
class TestSplitter(unittest.TestCase):
def testSplitNormalText(self):
text = 'this is a long string of words'
a = Splitter(text)
r = map(None, a)
assert r == ['this', 'is', 'long', 'string', 'of', 'words']
def testDropNumeric(self):
text = '123 456 789 foobar without you nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['foobar', 'without', 'you', 'nothing'], r
def testDropSingleLetterWords(self):
text = 'without you I nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['without', 'you', 'nothing'], r
def testSplitOnNonAlpha(self):
text = 'without you I\'m nothing'
a = Splitter(text)
r = map(None, a)
assert r == ['without', 'you', 'nothing'], r
def test_suite():
return unittest.makeSuite(TestSplitter, 'test')
def main():
unittest.TextTestRunner().run(test_suite())
def debug():
test_suite().debug()
def pdebug():
import pdb
pdb.run('debug()')
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import os, sys
sys.path.insert(0, os.getcwd())
try: import unittest
except:
sys.path[0]=os.path.join(sys.path[0],'..','..')
import unittest
import ZODB
from SearchIndex.UnKeywordIndex import UnKeywordIndex
class Dummy:
def __init__( self, foo ):
self._foo = foo
def foo( self ):
return self._foo
def __str__( self ):
return '<Dummy: %s>' % self._foo
__repr__ = __str__
class TestCase( unittest.TestCase ):
"""
Test KeywordIndex objects.
"""
def setUp( self ):
"""
"""
self._index = UnKeywordIndex( 'foo' )
self._marker = []
self._values = [ ( 0, Dummy( ['a'] ) )
, ( 1, Dummy( ['a','b'] ) )
, ( 2, Dummy( ['a','b','c'] ) )
, ( 3, Dummy( ['a','b','c', 'a'] ) )
, ( 4, Dummy( ['a', 'b', 'c', 'd'] ) )
, ( 5, Dummy( ['a', 'b', 'c', 'e'] ) )
, ( 6, Dummy( ['a', 'b', 'c', 'e', 'f'] ))
, ( 7, Dummy( [0] ) )
]
self._noop_req = { 'bar': 123 }
self._all_req = { 'foo': ['a'] }
self._some_req = { 'foo': ['e'] }
self._overlap_req = { 'foo': ['c', 'e'] }
self._string_req = {'foo': 'a'}
self._zero_req = { 'foo': [0] }
def tearDown( self ):
"""
"""
def _populateIndex( self ):
for k, v in self._values:
self._index.index_object( k, v )
def _checkApply( self, req, expectedValues ):
result, used = self._index._apply_index( req )
assert used == ( 'foo', )
try:
length = len(result)
except:
result = result.keys()
length = len(result)
assert length == len( expectedValues ), \
'%s | %s' % ( map( None, result ),
map(lambda x: x[0], expectedValues ))
for k, v in expectedValues:
assert k in result
def testEmpty( self ):
assert len( self._index ) == 0
assert len( self._index.referencedObjects() ) == 0
assert self._index.getEntryForObject( 1234 ) is None
assert ( self._index.getEntryForObject( 1234, self._marker )
is self._marker ), self._index.getEntryForObject(1234)
self._index.unindex_object( 1234 ) # nothrow
assert self._index.hasUniqueValuesFor( 'foo' )
assert not self._index.hasUniqueValuesFor( 'bar' )
assert len( self._index.uniqueValues( 'foo' ) ) == 0
assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._all_req, [] )
self._checkApply( self._some_req, [] )
self._checkApply( self._overlap_req, [] )
self._checkApply( self._string_req, [] )
def testPopulated( self ):
self._populateIndex()
values = self._values
#assert len( self._index ) == len( values )
assert len( self._index.referencedObjects() ) == len( values )
assert self._index.getEntryForObject( 1234 ) is None
assert ( self._index.getEntryForObject( 1234, self._marker )
is self._marker )
self._index.unindex_object( 1234 ) # nothrow
for k, v in values:
assert self._index.getEntryForObject( k ) == v.foo()
assert (len( self._index.uniqueValues( 'foo' ) ) == len( values )-1,
len(values)-1)
assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._all_req, values[:-1])
self._checkApply( self._some_req, values[ 5:7 ] )
self._checkApply( self._overlap_req, values[2:7] )
self._checkApply( self._string_req, values[:-1] )
def testZero( self ):
self._populateIndex()
values = self._values
self._checkApply( self._zero_req, values[ -1: ] )
assert 0 in self._index.uniqueValues( 'foo' )
def test_suite():
return unittest.makeSuite( TestCase )
if __name__ == '__main__':
unittest.TextTestRunner().run( test_suite() )
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import sys, os
sys.path.insert(0, os.getcwd())
try: import unittest
except:
sys.path[0]=os.path.join(sys.path[0],'..','..')
import unittest
class Dummy:
def __init__(self, **kw):
self.__dict__.update(kw)
import zLOG
def log_write(subsystem, severity, summary, detail, error):
if severity >= zLOG.PROBLEM:
assert 0, "%s(%s): %s" % (subsystem, severity, summary)
zLOG.log_write=log_write
import ZODB, ZODB.DemoStorage, ZODB.FileStorage
import SearchIndex.UnTextIndex
import SearchIndex.GlobbingLexicon
class Tests(unittest.TestCase):
def setUp(self):
self.index=SearchIndex.UnTextIndex.UnTextIndex('text')
self.doc=Dummy(text='this is the time, when all good zopes')
def dbopen(self):
n = 'fs_tmp__%s' % os.getpid()
s = ZODB.FileStorage.FileStorage(n)
db=self.db=ZODB.DB(s)
self.jar=db.open()
if not self.jar.root().has_key('index'):
self.jar.root()['index']=SearchIndex.UnTextIndex.UnTextIndex('text')
get_transaction().commit()
return self.jar.root()['index']
def dbclose(self):
self.jar.close()
self.db.close()
del self.jar
del self.db
def tearDown(self):
get_transaction().abort()
if hasattr(self, 'jar'):
self.dbclose()
os.system('rm -f fs_tmp__*')
def checkSimpleAddDelete(self):
"Check that we can add and delete an object without error"
self.index.index_object(0, self.doc)
self.index.index_object(1, self.doc)
self.doc.text='spam is good, spam is fine, span span span'
self.index.index_object(0, self.doc)
self.index.unindex_object(0)
def checkPersistentUpdate1(self):
"Check simple persistent indexing"
index=self.dbopen()
self.doc.text='this is the time, when all good zopes'
index.index_object(0, self.doc)
get_transaction().commit()
self.doc.text='time waits for no one'
index.index_object(1, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({})
assert r==None
r = index._apply_index({'text': 'python'})
assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
assert not r[0], "should have no results"
r = index._apply_index({'text': 'time'})
r=list(r[0].keys())
assert r == [0,1], r
def checkPersistentUpdate2(self):
"Check less simple persistent indexing"
index=self.dbopen()
self.doc.text='this is the time, when all good zopes'
index.index_object(0, self.doc)
get_transaction().commit()
self.doc.text='time waits for no one'
index.index_object(1, self.doc)
get_transaction().commit()
self.doc.text='the next task is to test'
index.index_object(3, self.doc)
get_transaction().commit()
self.doc.text='time time'
index.index_object(2, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({})
assert r==None
r = index._apply_index({'text': 'python'})
assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
assert not r[0], "should have no results"
r = index._apply_index({'text': 'time'})
r=list(r[0].keys())
assert r == [0,1,2], r
sample_texts = [
"""This is the time for all good men to come to
the aid of their country""",
"""ask not what your country can do for you,
ask what you can do for your country""",
"""Man, I can't wait to get to Montross!""",
"""Zope Public License (ZPL) Version 1.0""",
"""Copyright (c) Digital Creations. All rights reserved.""",
"""This license has been certified as Open Source(tm).""",
"""I hope I get to work on time""",
]
def checkGlobQuery(self):
"Check a glob query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'m*n'})
r=list(r[0].keys())
assert r == [0,2], r
def checkAndQuery(self):
"Check an AND query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'time and country'})
r=list(r[0].keys())
assert r == [0,], r
def checkOrQuery(self):
"Check an OR query"
index=self.dbopen()
index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
for i in range(len(self.sample_texts)):
self.doc.text=self.sample_texts[i]
index.index_object(i, self.doc)
get_transaction().commit()
self.dbclose()
index=self.dbopen()
r = index._apply_index({'text':'time or country'})
r=list(r[0].keys())
assert r == [0,1,6], r
def test_suite():
return unittest.makeSuite(Tests, 'check')
def main():
unittest.TextTestRunner().run(test_suite())
def debug():
test_suite().debug()
def pdebug():
import pdb
pdb.run('debug()')
if __name__=='__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
...@@ -82,7 +82,16 @@ ...@@ -82,7 +82,16 @@
# attributions are listed in the accompanying credits file. # attributions are listed in the accompanying credits file.
# #
############################################################################## ##############################################################################
import Zope
import sys
sys.path.insert(0, '.')
try:
import Testing
except ImportError:
sys.path[0] = '../../'
import Testing
import ZODB
import unittest import unittest
from SearchIndex.UnIndex import UnIndex from SearchIndex.UnIndex import UnIndex
...@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ): ...@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ):
, ( 5, Dummy( 'abce' ) ) , ( 5, Dummy( 'abce' ) )
, ( 6, Dummy( 'abce' ) ) , ( 6, Dummy( 'abce' ) )
, ( 7, Dummy( 0 ) ) # Collector #1959 , ( 7, Dummy( 0 ) ) # Collector #1959
] , ( 8, Dummy(None) )]
self._forward = {} self._forward = {}
self._backward = {} self._backward = {}
for k, v in self._values: for k, v in self._values:
...@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ): ...@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ):
, 'foo_usage': 'range:min:max' , 'foo_usage': 'range:min:max'
} }
self._zero_req = { 'foo': 0 } self._zero_req = { 'foo': 0 }
self._none_req = { 'foo': None }
def tearDown( self ): def tearDown( self ):
...@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ): ...@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ):
def _checkApply( self, req, expectedValues ): def _checkApply( self, req, expectedValues ):
result, used = self._index._apply_index( req ) result, used = self._index._apply_index( req )
if hasattr(result, 'keys'):
result = result.keys()
assert used == ( 'foo', ) assert used == ( 'foo', )
assert len( result ) == len( expectedValues ), \ assert len( result ) == len( expectedValues ), \
'%s | %s' % ( map( None, result ), expectedValues ) '%s | %s' % ( map( None, result ), expectedValues )
...@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ): ...@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ):
self._checkApply( self._range_req, [] ) self._checkApply( self._range_req, [] )
def testPopulated( self ): def testPopulated( self ):
""" Test a populated FieldIndex """
self._populateIndex() self._populateIndex()
values = self._values values = self._values
assert len( self._index ) == len( values ) assert len( self._index ) == len( values )-1 #'abce' is duplicate
assert len( self._index.referencedObjects() ) == len( values ) assert len( self._index.referencedObjects() ) == len( values )
assert self._index.getEntryForObject( 1234 ) is None assert self._index.getEntryForObject( 1234 ) is None
...@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ): ...@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ):
assert self._index._apply_index( self._noop_req ) is None assert self._index._apply_index( self._noop_req ) is None
self._checkApply( self._request, values[ -3:-1 ] ) self._checkApply( self._request, values[ -4:-2 ] )
self._checkApply( self._min_req, values[ 2:-1 ] ) self._checkApply( self._min_req, values[ 2:-2 ] )
self._checkApply( self._max_req, values[ :3 ] + values[ -1: ] ) self._checkApply( self._max_req, values[ :3 ] + values[ -2: ] )
self._checkApply( self._range_req, values[ 2:5 ] ) self._checkApply( self._range_req, values[ 2:5 ] )
def testZero( self ): def testZero( self ):
""" Make sure 0 gets indexed """
self._populateIndex() self._populateIndex()
values = self._values values = self._values
self._checkApply( self._zero_req, values[ -1: ] ) self._checkApply( self._zero_req, values[ -2:-1 ] )
assert 0 in self._index.uniqueValues( 'foo' ) assert 0 in self._index.uniqueValues( 'foo' )
def testNone(self):
""" make sure None gets indexed """
self._populateIndex()
values = self._values
self._checkApply(self._none_req, values[-1:])
assert None in self._index.uniqueValues('foo')
def testRange(self):
"""Test a range search"""
index = UnIndex( 'foo' )
for i in range(100):
index.index_object(i, Dummy(i%10))
r=index._apply_index({
'foo_usage': 'range:min:max',
'foo': [-99, 3]})
assert tuple(r[1])==('foo',), r[1]
r=list(r[0].keys())
expect=[
0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33,
40, 41, 42, 43, 50, 51, 52, 53, 60, 61, 62, 63, 70, 71, 72, 73,
80, 81, 82, 83, 90, 91, 92, 93
]
assert r==expect, r
def test_suite(): def test_suite():
return unittest.makeSuite( TestCase ) return unittest.makeSuite( TestCase )
def debug():
return test_suite().debug()
if __name__ == '__main__': def pdebug():
import pdb
pdb.run('debug()')
def main():
unittest.TextTestRunner().run( test_suite() ) unittest.TextTestRunner().run( test_suite() )
if __name__ == '__main__':
if len(sys.argv) > 1:
globals()[sys.argv[1]]()
else:
main()
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Set up testing environment
$Id: __init__.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import os, sys
startfrom = head = os.getcwd()
while 1:
sys.path[0]=startfrom
try:
import ZODB
except ImportError:
head = os.path.split(startfrom)[0]
if head == '':
raise "Couldn't import ZODB"
startfrom = head
continue
else:
break
os.environ['SOFTWARE_HOME']=os.environ.get('SOFTWARE_HOME', startfrom)
os.environ['INSTANCE_HOME']=os.environ.get(
'INSTANCE_HOME',
os.path.join(os.environ['SOFTWARE_HOME'],'..','..')
)
#!/usr/bin/env python1.5
# Dispatcher for usage inside Zope test environment
# Digital Creations
__version__ = '$Id: dispatcher.py,v 1.2 2001/03/15 13:16:25 jim Exp $'
import os,sys,re,string
import threading,time,commands,profile
class Dispatcher:
"""
a multi-purpose thread dispatcher
"""
def __init__(self,func=''):
self.fp = sys.stderr
self.f_startup = []
self.f_teardown = []
self.lastlog = ""
self.lock = threading.Lock()
self.func = func
self.profiling = 0
self.doc = getattr(self,self.func).__doc__
def setlog(self,fp):
self.fp = fp
def log(self,s):
if s==self.lastlog: return
self.fp.write(s)
self.fp.flush()
self.lastlog=s
def logn(self,s):
if s==self.lastlog: return
self.fp.write(s + '\n')
self.fp.flush()
self.lastlog=s
def profiling_on():
self.profiling = 1
def profiling_off():
self.profiling = 0
def dispatcher(self,name='', *params):
""" dispatcher for threads
The dispatcher expects one or several tupels:
(functionname, number of threads to start , args, keyword args)
"""
self.mem_usage = [-1]
mem_watcher = threading.Thread(None,self.mem_watcher,name='memwatcher')
mem_watcher.start()
self.start_test = time.time()
self.name = name
self.th_data = {}
self.runtime = {}
self._threads = []
s2s=self.s2s
for func,numthreads,args,kw in params:
f = getattr(self,func)
for i in range(0,numthreads):
kw['t_func'] = func
th = threading.Thread(None,self.worker,name="TH_%s_%03d" % (func,i) ,args=args,kwargs=kw)
self._threads.append(th)
for th in self._threads: th.start()
while threading.activeCount() > 1: time.sleep(1)
self.logn('ID: %s ' % self.name)
self.logn('FUNC: %s ' % self.func)
self.logn('DOC: %s ' % self.doc)
self.logn('Args: %s' % params)
for th in self._threads:
self.logn( '%-30s ........................ %9.3f sec' % (th.getName(), self.runtime[th.getName()]) )
for k,v in self.th_data[th.getName()].items():
self.logn ('%-30s %-15s = %s' % (' ',k,v) )
self.logn("")
self.logn('Complete running time: %9.3f sec' % (time.time()-self.start_test) )
if len(self.mem_usage)>1: self.mem_usage.remove(-1)
self.logn( "Memory: start: %s, end: %s, low: %s, high: %s" % \
(s2s(self.mem_usage[0]),s2s(self.mem_usage[-1]),s2s(min(self.mem_usage)), s2s(max(self.mem_usage))))
self.logn('')
def worker(self,*args,**kw):
for func in self.f_startup: f = getattr(self,func)()
t_func = getattr(self,kw['t_func'])
del kw['t_func']
ts = time.time()
apply(t_func,args,kw)
te = time.time()
for func in self.f_teardown: getattr(self,func)()
def th_setup(self):
""" initalize thread with some environment data """
env = {'start': time.time()
}
return env
def th_teardown(self,env,**kw):
""" famous last actions of thread """
self.lock.acquire()
self.th_data[ threading.currentThread().getName() ] = kw
self.runtime [ threading.currentThread().getName() ] = time.time() - env['start']
self.lock.release()
def getmem(self):
""" try to determine the current memory usage """
if not sys.platform in ['linux2']: return None
cmd = '/bin/ps --no-headers -o pid,vsize --pid %s' % os.getpid()
outp = commands.getoutput(cmd)
pid,vsize = filter(lambda x: x!="" , string.split(outp," ") )
data = open("/proc/%d/statm" % os.getpid()).read()
fields = re.split(" ",data)
mem = string.atoi(fields[0]) * 4096
return mem
def mem_watcher(self):
""" thread for watching memory usage """
running = 1
while running ==1:
self.mem_usage.append( self.getmem() )
time.sleep(1)
if threading.activeCount() == 2: running = 0
def register_startup(self,func):
self.f_startup.append(func)
def register_teardown(self,func):
self.f_teardown.append(func)
def s2s(self,n):
import math
if n <1024.0: return "%8.3lf Bytes" % n
if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
else: return n
if __name__=="__main__":
d=Dispatcher()
print d.getmem()
pass
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Facilitates unit tests which requires an acquirable REQUEST from
ZODB objects
Usage:
import makerequest
app = makerequest.makerequest(Zope.app())
$Id: makerequest.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import os
from os import environ
from sys import stdin
from ZPublisher.HTTPRequest import HTTPRequest
from ZPublisher.HTTPResponse import HTTPResponse
from ZPublisher.BaseRequest import RequestContainer
def makerequest(app):
resp = HTTPResponse()
environ['SERVER_NAME']='foo'
environ['SERVER_PORT']='80'
environ['REQUEST_METHOD'] = 'GET'
req = HTTPRequest(stdin, environ, resp)
return app.__of__(RequestContainer(REQUEST = req))
This diff is collapsed.
...@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ...@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
""" """
__author__ = "Steve Purcell (stephen_purcell@yahoo.com)" __author__ = "Steve Purcell (stephen_purcell@yahoo.com)"
__version__ = "$Revision: 1.20 $"[11:-2] __version__ = "$Revision: 1.1.4.1 $"[11:-2]
import time import time
import sys import sys
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment