Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
e6b5d0c3
Commit
e6b5d0c3
authored
Mar 15, 2001
by
Jim Fulton
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Merged changes from Catalog-BTrees-Integration branch.
parent
22eec3b7
Changes
30
Show whitespace changes
Inline
Side-by-side
Showing
30 changed files
with
3919 additions
and
594 deletions
+3919
-594
lib/Components/ExtensionClass/src/ThreadLock.c
lib/Components/ExtensionClass/src/ThreadLock.c
+21
-11
lib/python/Products/ZCatalog/Catalog.py
lib/python/Products/ZCatalog/Catalog.py
+157
-127
lib/python/Products/ZCatalog/CatalogBrains.py
lib/python/Products/ZCatalog/CatalogBrains.py
+1
-1
lib/python/Products/ZCatalog/Lazy.py
lib/python/Products/ZCatalog/Lazy.py
+9
-7
lib/python/Products/ZCatalog/Vocabulary.py
lib/python/Products/ZCatalog/Vocabulary.py
+2
-2
lib/python/Products/ZCatalog/ZCatalog.py
lib/python/Products/ZCatalog/ZCatalog.py
+36
-15
lib/python/Products/ZCatalog/dtml/catalogIndexes.dtml
lib/python/Products/ZCatalog/dtml/catalogIndexes.dtml
+3
-5
lib/python/Products/ZCatalog/tests/__init__.py
lib/python/Products/ZCatalog/tests/__init__.py
+1
-0
lib/python/Products/ZCatalog/tests/keywords.py
lib/python/Products/ZCatalog/tests/keywords.py
+41
-0
lib/python/Products/ZCatalog/tests/testCatalog.py
lib/python/Products/ZCatalog/tests/testCatalog.py
+998
-0
lib/python/Products/ZCatalog/tests/testCatalogTiming.py
lib/python/Products/ZCatalog/tests/testCatalogTiming.py
+237
-0
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+55
-30
lib/python/SearchIndex/Index.py
lib/python/SearchIndex/Index.py
+10
-12
lib/python/SearchIndex/Lexicon.py
lib/python/SearchIndex/Lexicon.py
+61
-35
lib/python/SearchIndex/ResultList.py
lib/python/SearchIndex/ResultList.py
+38
-35
lib/python/SearchIndex/TextIndex.py
lib/python/SearchIndex/TextIndex.py
+10
-10
lib/python/SearchIndex/UnIndex.py
lib/python/SearchIndex/UnIndex.py
+119
-63
lib/python/SearchIndex/UnKeywordIndex.py
lib/python/SearchIndex/UnKeywordIndex.py
+33
-48
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+156
-184
lib/python/SearchIndex/randid.py
lib/python/SearchIndex/randid.py
+91
-0
lib/python/SearchIndex/tests/__init__.py
lib/python/SearchIndex/tests/__init__.py
+1
-0
lib/python/SearchIndex/tests/testSplitter.py
lib/python/SearchIndex/tests/testSplitter.py
+141
-0
lib/python/SearchIndex/tests/testUnKeywordIndex.py
lib/python/SearchIndex/tests/testUnKeywordIndex.py
+214
-0
lib/python/SearchIndex/tests/testUnTextIndex.py
lib/python/SearchIndex/tests/testUnTextIndex.py
+293
-0
lib/python/SearchIndex/tests/test_UnIndex.py
lib/python/SearchIndex/tests/test_UnIndex.py
+62
-8
lib/python/Testing/__init__.py
lib/python/Testing/__init__.py
+113
-0
lib/python/Testing/dispatcher.py
lib/python/Testing/dispatcher.py
+181
-0
lib/python/Testing/makerequest.py
lib/python/Testing/makerequest.py
+111
-0
lib/python/Testing/unittest.py
lib/python/Testing/unittest.py
+723
-0
lib/python/unittest.py
lib/python/unittest.py
+1
-1
No files found.
lib/Components/ExtensionClass/src/ThreadLock.c
View file @
e6b5d0c3
...
...
@@ -33,7 +33,7 @@
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
$Id: ThreadLock.c,v 1.
7 1999/02/19 16:10:05
jim Exp $
$Id: ThreadLock.c,v 1.
8 2001/03/15 13:16:21
jim Exp $
If you have questions regarding this software,
contact:
...
...
@@ -46,7 +46,7 @@
*/
static
char
ThreadLock_module_documentation
[]
=
""
"
\n
$Id: ThreadLock.c,v 1.
7 1999/02/19 16:10:05
jim Exp $"
"
\n
$Id: ThreadLock.c,v 1.
8 2001/03/15 13:16:21
jim Exp $"
;
#include "Python.h"
...
...
@@ -93,8 +93,9 @@ typedef struct {
staticforward
PyTypeObject
ThreadLockType
;
static
int
cacquire
(
ThreadLockObject
*
self
)
cacquire
(
ThreadLockObject
*
self
,
int
wait
)
{
int
acquired
=
1
;
#ifdef WITH_THREAD
long
id
=
get_thread_ident
();
#else
...
...
@@ -113,19 +114,26 @@ cacquire(ThreadLockObject *self)
{
#ifdef WITH_THREAD
Py_BEGIN_ALLOW_THREADS
acquire
_lock
(
self
->
lock
,
1
);
acquire
d
=
acquire_lock
(
self
->
lock
,
wait
?
WAIT_LOCK
:
NOWAIT_LOCK
);
Py_END_ALLOW_THREADS
#endif
if
(
acquired
)
{
self
->
count
=
0
;
self
->
id
=
id
;
}
return
0
;
}
return
acquired
;
}
static
PyObject
*
acquire
(
ThreadLockObject
*
self
,
PyObject
*
args
)
{
if
(
cacquire
(
self
)
<
0
)
return
NULL
;
int
wait
=
-
1
,
acquired
;
if
(
!
PyArg_ParseTuple
(
args
,
"|i"
,
&
wait
))
return
NULL
;
acquired
=
cacquire
(
self
,
wait
);
if
(
acquired
<
0
)
return
NULL
;
if
(
wait
>=
0
)
return
PyInt_FromLong
(
acquired
);
Py_INCREF
(
Py_None
);
return
Py_None
;
}
...
...
@@ -138,6 +146,7 @@ crelease(ThreadLockObject *self)
#else
long
id
=
1
;
#endif
if
(
self
->
count
>=
0
&&
self
->
id
==
id
)
{
/* Somebody has locked me. It is either the current thread or
...
...
@@ -161,6 +170,7 @@ crelease(ThreadLockObject *self)
static
PyObject
*
release
(
ThreadLockObject
*
self
,
PyObject
*
args
)
{
if
(
!
PyArg_ParseTuple
(
args
,
""
))
return
NULL
;
if
(
crelease
(
self
)
<
0
)
return
NULL
;
Py_INCREF
(
Py_None
);
return
Py_None
;
...
...
@@ -172,7 +182,7 @@ call_method(ThreadLockObject *self, PyObject *args)
PyObject
*
f
,
*
a
=
0
,
*
k
=
0
;
UNLESS
(
PyArg_ParseTuple
(
args
,
"OO|O"
,
&
f
,
&
a
,
&
k
))
return
NULL
;
if
(
cacquire
(
self
)
<
0
)
return
NULL
;
if
(
cacquire
(
self
,
-
1
)
<
0
)
return
NULL
;
f
=
PyEval_CallObjectWithKeywords
(
f
,
a
,
k
);
if
(
crelease
(
self
)
<
0
)
{
...
...
@@ -189,7 +199,7 @@ static struct PyMethodDef ThreadLock_methods[] = {
"Acquire the lock, call the function, and then release the lock.
\n
"
},
{
"acquire"
,
(
PyCFunction
)
acquire
,
1
,
"acquire() -- Acquire a lock, taking the thread ID into account"
"acquire(
[wait]
) -- Acquire a lock, taking the thread ID into account"
},
{
"release"
,
(
PyCFunction
)
release
,
1
,
"release() -- Release a lock, taking the thread ID into account"
...
...
@@ -296,7 +306,7 @@ void
initThreadLock
()
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.
7
$"
;
char
*
rev
=
"$Revision: 1.
8
$"
;
m
=
Py_InitModule4
(
"ThreadLock"
,
Module_methods
,
ThreadLock_module_documentation
,
...
...
lib/python/Products/ZCatalog/Catalog.py
View file @
e6b5d0c3
...
...
@@ -86,9 +86,6 @@
from
Persistence
import
Persistent
import
Acquisition
import
ExtensionClass
import
BTree
,
OIBTree
,
IOBTree
,
IIBTree
IIBucket
=
IIBTree
.
Bucket
from
intSet
import
intSet
from
SearchIndex
import
UnIndex
,
UnTextIndex
,
UnKeywordIndex
,
Query
from
SearchIndex.Lexicon
import
Lexicon
import
regex
,
pdb
...
...
@@ -101,14 +98,13 @@ from zLOG import LOG, ERROR
from
Lazy
import
LazyMap
,
LazyFilter
,
LazyCat
from
CatalogBrains
import
AbstractCatalogBrain
,
NoBrainer
from
BTrees.IIBTree
import
intersection
,
weightedIntersection
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
import
BTrees.Length
from
SearchIndex.randid
import
randid
import
time
class
KWMultiMapping
(
MultiMapping
):
def
has_key
(
self
,
name
):
try
:
r
=
self
[
name
]
return
1
except
KeyError
:
return
0
def
orify
(
seq
,
query_map
=
{
...
...
@@ -118,7 +114,7 @@ def orify(seq,
subqueries
=
[]
for
q
in
seq
:
try
:
q
=
query_map
[
type
(
q
)](
q
)
except
:
q
=
Query
.
Cmp
(
q
)
except
KeyError
:
q
=
Query
.
Cmp
(
q
)
subqueries
.
append
(
q
)
return
apply
(
Query
.
Or
,
tuple
(
subqueries
))
...
...
@@ -152,9 +148,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# object unique identifier to the rid, and self.paths is a
# mapping of the rid to the unique identifier.
self
.
data
=
BTree
.
BTree
()
# mapping of rid to meta_data
self
.
uids
=
OIBTree
.
BTree
()
# mapping of uid to rid
self
.
paths
=
IOBTree
.
BTree
()
# mapping of rid to uid
self
.
__len__
=
BTrees
.
Length
.
Length
()
self
.
clear
()
# indexes can share a lexicon or have a private copy. Here,
# we instantiate a lexicon to be shared by all text indexes.
...
...
@@ -163,7 +158,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if
type
(
vocabulary
)
is
type
(
''
):
self
.
lexicon
=
vocabulary
else
:
#ack!
self
.
lexicon
=
Lexicon
()
if
brains
is
not
None
:
...
...
@@ -171,6 +165,52 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
self
.
updateBrains
()
def
clear
(
self
):
""" clear catalog """
self
.
data
=
IOBTree
()
# mapping of rid to meta_data
self
.
uids
=
OIBTree
()
# mapping of uid to rid
self
.
paths
=
IOBTree
()
# mapping of rid to uid
# convert old-style Catalog object to new in-place
try
:
self
.
__len__
.
set
(
0
)
except
AttributeError
:
self
.
__len__
=
BTrees
.
Length
.
Length
()
for
x
in
self
.
indexes
.
values
():
x
.
clear
()
def
_convertBTrees
(
self
,
threshold
=
200
):
from
BTrees.convert
import
convert
if
type
(
self
.
data
)
is
not
IOBTree
:
data
=
self
.
data
self
.
data
=
IOBTree
()
convert
(
data
,
self
.
data
,
threshold
)
uids
=
self
.
uids
self
.
uids
=
OIBTree
()
convert
(
uids
,
self
.
uids
,
threshold
)
paths
=
self
.
paths
self
.
paths
=
IOBTree
()
convert
(
paths
,
self
.
paths
,
threshold
)
self
.
__len__
=
BTrees
.
Length
.
Length
()
for
index
in
self
.
indexes
.
values
():
index
.
_convertBTrees
(
threshold
)
lexicon
=
self
.
lexicon
if
type
(
lexicon
)
is
type
(
''
):
lexicon
=
getattr
(
self
,
lexicon
).
lexicon
lexicon
.
_convertBTrees
(
threshold
)
def
__len__
(
self
):
# NOTE, this is never called for new catalogs, since
# each instance overrides this.
return
len
(
self
.
data
)
def
updateBrains
(
self
):
self
.
useBrains
(
self
.
_v_brains
)
...
...
@@ -213,7 +253,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
scopy
=
self
.
schema
.
copy
()
# it is useful for our brains to know these things
scopy
[
'data_record_id_'
]
=
len
(
self
.
schema
.
keys
())
scopy
[
'data_record_score_'
]
=
len
(
self
.
schema
.
keys
())
+
1
scopy
[
'data_record_normalized_score_'
]
=
len
(
self
.
schema
.
keys
())
+
2
...
...
@@ -345,33 +384,54 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
'uid' is the unique Catalog identifier for this object
"""
data
=
self
.
data
if
self
.
uids
.
has_key
(
uid
):
index
=
self
.
uids
[
uid
]
elif
data
:
index
=
data
.
keys
()[
-
1
]
+
1
# find the next available unique id
self
.
uids
[
uid
]
=
index
self
.
paths
[
index
]
=
uid
else
:
index
=
0
self
.
uids
[
uid
]
=
index
self
.
paths
[
index
]
=
uid
data
=
self
.
data
# meta_data is stored as a tuple for efficiency
newDataRecord
=
self
.
recordify
(
object
)
oldDataRecord
=
data
.
get
(
index
,
None
)
# Now we need to compare the tuples before we update them!
if
oldDataRecord
is
not
None
:
for
i
in
range
(
len
(
newDataRecord
)):
if
newDataRecord
[
i
]
!=
oldDataRecord
[
i
]:
index
=
self
.
uids
.
get
(
uid
,
None
)
if
index
is
not
None
:
# old data
if
data
.
get
(
index
,
0
)
!=
newDataRecord
:
# Update the meta-data, if necessary
data
[
index
]
=
newDataRecord
break
else
:
# new data
if
type
(
data
)
is
IOBTree
:
# New style, get radom id
index
=
getattr
(
self
,
'_v_nextid'
,
0
)
if
index
%
4000
==
0
:
index
=
randid
()
while
not
data
.
insert
(
index
,
newDataRecord
):
index
=
randid
()
# We want ids to be somewhat random, but there are
# advantages for having some ids generated
# sequentially when many catalog updates are done at
# once, such as when reindexing or bulk indexing.
# We allocate ids sequentially using a volatile base,
# so different threads get different bases. This
# further reduces conflict and reduces churn in
# here and it result sets when bulk indexing.
self
.
_v_nextid
=
index
+
1
else
:
if
data
:
# find the next available unique id
index
=
data
.
keys
()[
-
1
]
+
1
else
:
index
=
0
data
[
index
]
=
newDataRecord
try
:
self
.
__len__
.
change
(
1
)
except
AttributeError
:
pass
# No managed length (old-style)
self
.
uids
[
uid
]
=
index
self
.
paths
[
index
]
=
uid
total
=
0
for
x
in
self
.
indexes
.
values
():
## tricky! indexes need to acquire now, and because they
...
...
@@ -418,6 +478,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
LOG
(
'Catalog'
,
ERROR
,
(
'uncatalogObject unsuccessfully '
'attempted to delete rid %s '
'from paths or data btree.'
%
rid
))
else
:
try
:
self
.
__len__
.
change
(
-
1
)
except
AttributeError
:
pass
# No managed length
del
uids
[
uid
]
self
.
data
=
data
else
:
...
...
@@ -425,15 +489,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
'attempted to uncatalog an object '
'with a uid of %s. '
%
uid
))
def
clear
(
self
):
""" clear catalog """
self
.
data
=
BTree
.
BTree
()
self
.
uids
=
OIBTree
.
BTree
()
self
.
paths
=
IOBTree
.
BTree
()
for
x
in
self
.
indexes
.
values
():
x
.
clear
()
def
uniqueValuesFor
(
self
,
name
):
""" return unique values for FieldIndex name """
...
...
@@ -441,26 +496,16 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
def
hasuid
(
self
,
uid
):
""" return the rid if catalog contains an object with uid """
if
self
.
uids
.
has_key
(
uid
):
return
self
.
uids
[
uid
]
else
:
return
None
return
self
.
uids
.
get
(
uid
)
def
recordify
(
self
,
object
):
""" turns an object into a record tuple """
record
=
[]
# the unique id is allways the first element
for
x
in
self
.
names
:
try
:
attr
=
getattr
(
object
,
x
)
if
(
callable
(
attr
)):
attr
=
attr
()
except
:
attr
=
MV
attr
=
getattr
(
object
,
x
,
MV
)
if
(
attr
is
not
MV
and
callable
(
attr
)):
attr
=
attr
()
record
.
append
(
attr
)
return
tuple
(
record
)
def
instantiate
(
self
,
record
):
...
...
@@ -485,12 +530,9 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
## Searching engine. You don't really have to worry about what goes
## on below here... Most of this stuff came from ZTables with tweaks.
def
_indexedSearch
(
self
,
args
,
sort_index
,
append
,
used
,
IIBType
=
type
(
IIBucket
()),
intSType
=
type
(
intSet
())):
def
_indexedSearch
(
self
,
args
,
sort_index
,
append
,
used
):
"""
Iterate through the indexes, applying the query to each one.
Do some magic to join result sets. Be intelligent about
handling intSets and IIBuckets.
"""
rs
=
None
...
...
@@ -498,7 +540,6 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if
used
is
None
:
used
=
{}
for
i
in
self
.
indexes
.
keys
():
try
:
index
=
self
.
indexes
[
i
].
__of__
(
self
)
if
hasattr
(
index
,
'_apply_index'
):
r
=
index
.
_apply_index
(
args
)
...
...
@@ -506,74 +547,66 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
r
,
u
=
r
for
name
in
u
:
used
[
name
]
=
1
if
rs
is
None
:
rs
=
r
else
:
# you can't intersect an IIBucket into an
# intSet, but you can go the other way
# around. Make sure we're facing the
# right direction...
if
type
(
rs
)
is
intSType
and
type
(
r
)
is
IIBType
:
rs
=
r
.
intersection
(
rs
)
else
:
rs
=
rs
.
intersection
(
r
)
except
:
return
used
w
,
rs
=
weightedIntersection
(
rs
,
r
)
#assert rs==None or hasattr(rs, 'values') or hasattr(rs, 'keys')
if
rs
is
None
:
# return everything
if
sort_index
is
None
:
rs
=
data
.
items
()
append
(
LazyMap
(
self
.
instantiate
,
rs
))
append
(
LazyMap
(
self
.
instantiate
,
rs
,
len
(
self
)
))
else
:
try
:
for
k
,
intset
in
sort_index
.
_index
.
items
():
for
k
,
intset
in
sort_index
.
items
():
append
((
k
,
LazyMap
(
self
.
__getitem__
,
intset
)))
except
AttributeError
:
raise
ValueError
,
"Incorrect index name passed as "
\
"'sort_on' parameter. Note that you may only "
\
"sort on values for which there is a matching "
\
"index available."
raise
ValueError
,
(
"Incorrect index name passed as"
" 'sort_on' parameter. Note that you may only"
" sort on values for which there is a matching"
" index available."
)
elif
rs
:
if
sort_index
is
None
and
type
(
rs
)
is
IIBType
:
# then there is score information. Build a new result
# set, sort it by score, reverse it, compute the
# normalized score, and Lazify it.
rset
=
[]
for
key
,
score
in
rs
.
items
():
rset
.
append
((
score
,
key
))
rset
.
sort
()
rset
.
reverse
()
# this is reached by having an empty result set (ie non-None)
if
sort_index
is
None
and
hasattr
(
rs
,
'values'
):
# having a 'values' means we have a data structure with
# scores. Build a new result set, sort it by score, reverse
# it, compute the normalized score, and Lazify it.
rset
=
rs
.
byValue
(
0
)
# sort it by score
max
=
float
(
rset
[
0
][
0
])
rs
=
[]
for
score
,
key
in
rset
:
# compute normalized scores
rs
.
append
((
int
((
score
/
max
)
*
100
),
score
,
key
))
append
(
LazyMap
(
self
.
__getitem__
,
rs
))
elif
sort_index
is
None
and
type
(
rs
)
is
intSType
:
elif
sort_index
is
None
and
not
hasattr
(
rs
,
'values'
)
:
# no scores? Just Lazify.
if
hasattr
(
rs
,
'keys'
):
rs
=
rs
.
keys
()
append
(
LazyMap
(
self
.
__getitem__
,
rs
))
else
:
# sort. If there are scores, then this block is not
# reached, therefor 'sort-on' does not happen in the
# context of text index query. This should probably
# sort by relevance first, then the 'sort-on' attribute.
if
len
(
rs
)
>
len
(
sort_index
.
_index
):
for
k
,
intset
in
sort_index
.
_index
.
items
():
if
type
(
rs
)
is
IIBType
:
intset
=
rs
.
intersection
(
intset
)
# Since we still have an IIBucket, let's convert
# it to its set of keys
intset
=
intset
.
keys
()
else
:
intset
=
intset
.
intersection
(
rs
)
if
((
len
(
rs
)
/
4
)
>
len
(
sort_index
)):
# if the sorted index has a quarter as many keys as
# the result set
for
k
,
intset
in
sort_index
.
items
():
# We have an index that has a set of values for
# each sort key, so we interset with each set and
# get a sorted sequence of the intersections.
# This only makes sense if the number of
# keys is much less then the number of results.
intset
=
intersection
(
rs
,
intset
)
if
intset
:
if
hasattr
(
intset
,
'keys'
):
intset
=
intset
.
keys
()
append
((
k
,
LazyMap
(
self
.
__getitem__
,
intset
)))
else
:
if
type
(
rs
)
is
IIBType
:
rs
=
rs
.
keys
()
for
r
in
rs
:
append
((
sort_index
.
_unindex
[
r
],
LazyMap
(
self
.
__getitem__
,[
r
])))
if
hasattr
(
rs
,
'keys'
):
rs
=
rs
.
keys
()
for
did
in
rs
:
append
((
sort_index
.
keyForDocument
(
did
),
LazyMap
(
self
.
__getitem__
,[
did
])))
return
used
...
...
@@ -587,10 +620,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Get search arguments:
if
REQUEST
is
None
and
not
kw
:
try
:
REQUEST
=
self
.
REQUEST
except
:
pass
except
AttributeError
:
pass
if
kw
:
if
REQUEST
:
m
=
KW
MultiMapping
()
m
=
MultiMapping
()
m
.
push
(
REQUEST
)
m
.
push
(
kw
)
kw
=
m
...
...
@@ -599,7 +632,7 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Make sure batch size is set
if
REQUEST
and
not
REQUEST
.
has_key
(
'batch_size'
):
try
:
batch_size
=
self
.
default_batch_size
except
:
batch_size
=
20
except
AttributeError
:
batch_size
=
20
REQUEST
[
'batch_size'
]
=
batch_size
# Compute "sort_index", which is a sort index, or none:
...
...
@@ -611,8 +644,10 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
sort_index
=
kw
[
'sort_on'
]
else
:
sort_index
=
None
sort_order
=
''
if
sort_index
is
not
None
and
s
ort_index
in
self
.
indexes
.
keys
(
):
if
sort_index
is
not
None
and
s
elf
.
indexes
.
has_key
(
sort_index
):
sort_index
=
self
.
indexes
[
sort_index
]
if
not
hasattr
(
sort_index
,
'keyForDocument'
):
raise
CatalogError
(
'Invalid sort index'
)
# Perform searches with indexes and sort_index
r
=
[]
...
...
@@ -645,9 +680,4 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__
=
searchResults
class
CatalogError
(
Exception
):
pass
lib/python/Products/ZCatalog/CatalogBrains.py
View file @
e6b5d0c3
...
...
@@ -109,7 +109,7 @@ class AbstractCatalogBrain(Record.Record, Acquisition.Implicit):
def
getObject
(
self
,
REQUEST
=
None
):
"""Try to return the object for this record"""
try
:
obj
=
self
.
aq_parent
.
restrictedTraverse
(
self
.
getPath
())
obj
=
self
.
aq_parent
.
un
restrictedTraverse
(
self
.
getPath
())
if
not
obj
:
if
REQUEST
is
None
:
REQUEST
=
self
.
REQUEST
...
...
lib/python/Products/ZCatalog/Lazy.py
View file @
e6b5d0c3
...
...
@@ -82,8 +82,8 @@
# attributions are listed in the accompanying credits file.
#
##############################################################################
__doc__
=
'''$Id: Lazy.py,v 1.
3 2001/01/15 16:29:23 petrilli
Exp $'''
__version__
=
'$Revision: 1.
3
$'
[
11
:
-
2
]
__doc__
=
'''$Id: Lazy.py,v 1.
4 2001/03/15 13:16:23 jim
Exp $'''
__version__
=
'$Revision: 1.
4
$'
[
11
:
-
2
]
class
Lazy
:
...
...
@@ -148,11 +148,12 @@ class LazyCat(Lazy):
# Lazy concatenation of one or more sequences. Should be handy
# for accessing small parts of big searches.
def
__init__
(
self
,
sequences
):
def
__init__
(
self
,
sequences
,
length
=
None
):
self
.
_seq
=
sequences
self
.
_data
=
[]
self
.
_sindex
=
0
self
.
_eindex
=-
1
if
length
is
not
None
:
self
.
_len
=
length
def
__getitem__
(
self
,
index
):
...
...
@@ -194,11 +195,12 @@ class LazyMap(Lazy):
# Act like a sequence, but get data from a filtering process.
# Don't access data until necessary
def
__init__
(
self
,
func
,
seq
):
def
__init__
(
self
,
func
,
seq
,
length
=
None
):
self
.
_seq
=
seq
self
.
_len
=
len
(
seq
)
self
.
_data
=
[]
self
.
_func
=
func
if
length
is
not
None
:
self
.
_len
=
length
else
:
self
.
_len
=
len
(
seq
)
def
__getitem__
(
self
,
index
):
...
...
@@ -229,7 +231,7 @@ class LazyFilter(Lazy):
# Act like a sequence, but get data from a filtering process.
# Don't access data until necessary
def
__init__
(
self
,
test
,
seq
):
def
__init__
(
self
,
test
,
seq
):
self
.
_seq
=
seq
self
.
_data
=
[]
self
.
_eindex
=-
1
...
...
@@ -270,7 +272,7 @@ class LazyMop(Lazy):
# Act like a sequence, but get data from a filtering process.
# Don't access data until necessary
def
__init__
(
self
,
test
,
seq
):
def
__init__
(
self
,
test
,
seq
):
self
.
_seq
=
seq
self
.
_data
=
[]
self
.
_eindex
=-
1
...
...
lib/python/Products/ZCatalog/Vocabulary.py
View file @
e6b5d0c3
...
...
@@ -112,7 +112,7 @@ class Vocabulary(Item, Persistent, Implicit,
AccessControl
.
Role
.
RoleManager
,
):
"""
A Vocabulary is a user
managable re
lization of a Lexicon object.
A Vocabulary is a user
-managable rea
lization of a Lexicon object.
"""
...
...
@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit,
""" create the lexicon to manage... """
self
.
id
=
id
self
.
title
=
title
self
.
globbing
=
globbing
self
.
globbing
=
not
not
globbing
if
globbing
:
self
.
lexicon
=
GlobbingLexicon
.
GlobbingLexicon
()
...
...
lib/python/Products/ZCatalog/ZCatalog.py
View file @
e6b5d0c3
...
...
@@ -97,14 +97,15 @@ from Persistence import Persistent
from
DocumentTemplate.DT_Util
import
InstanceDict
,
TemplateDict
from
DocumentTemplate.DT_Util
import
Eval
,
expr_globals
from
AccessControl.Permission
import
name_trans
from
Catalog
import
Catalog
,
orify
from
Catalog
import
Catalog
,
orify
,
CatalogError
from
SearchIndex
import
UnIndex
,
UnTextIndex
from
Vocabulary
import
Vocabulary
import
IOBTree
from
Shared.DC.ZRDB.TM
import
TM
from
AccessControl
import
getSecurityManager
from
zLOG
import
LOG
,
ERROR
StringType
=
type
(
''
)
manage_addZCatalogForm
=
DTMLFile
(
'dtml/addZCatalog'
,
globals
())
def
manage_addZCatalog
(
self
,
id
,
title
,
...
...
@@ -225,7 +226,6 @@ class ZCatalog(Folder, Persistent, Implicit):
def
__init__
(
self
,
id
,
title
=
''
,
vocab_id
=
None
,
container
=
None
):
self
.
id
=
id
self
.
title
=
title
self
.
vocab_id
=
vocab_id
self
.
threshold
=
10000
self
.
_v_total
=
0
...
...
@@ -233,11 +233,11 @@ class ZCatalog(Folder, Persistent, Implicit):
if
vocab_id
is
None
:
v
=
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_setObject
(
'Vocabulary'
,
v
)
v
=
'Vocabulary'
self
.
vocab_id
=
'Vocabulary'
else
:
v
=
vocab_id
self
.
vocab_id
=
vocab_id
self
.
_catalog
=
Catalog
(
vocabulary
=
v
)
self
.
_catalog
=
Catalog
(
vocabulary
=
self
.
vocab_id
)
self
.
_catalog
.
addColumn
(
'id'
)
self
.
_catalog
.
addIndex
(
'id'
,
'FieldIndex'
)
...
...
@@ -254,6 +254,7 @@ class ZCatalog(Folder, Persistent, Implicit):
self
.
_catalog
.
addColumn
(
'summary'
)
self
.
_catalog
.
addIndex
(
'PrincipiaSearchSource'
,
'TextIndex'
)
def
__len__
(
self
):
return
len
(
self
.
_catalog
)
def
getVocabulary
(
self
):
""" more ack! """
...
...
@@ -406,8 +407,20 @@ class ZCatalog(Folder, Persistent, Implicit):
RESPONSE
.
redirect
(
URL1
+
'/manage_catalogIndexes?manage_tabs_message=Index%20Deleted'
)
def
catalog_object
(
self
,
obj
,
uid
):
def
catalog_object
(
self
,
obj
,
uid
=
None
):
""" wrapper around catalog """
if
uid
is
None
:
try
:
uid
=
obj
.
getPhysicalPath
except
AttributeError
:
raise
CatalogError
(
"A cataloged object must support the 'getPhysicalPath' "
"method if no unique id is provided when cataloging"
)
else
:
uid
=
string
.
join
(
uid
(),
'/'
)
elif
type
(
uid
)
is
not
StringType
:
raise
CatalogError
(
'The object unique id must be a string.'
)
self
.
_catalog
.
catalogObject
(
obj
,
uid
,
None
)
# None passed in to catalogObject as third argument indicates
# that we shouldn't try to commit subtransactions within any
...
...
@@ -433,7 +446,7 @@ class ZCatalog(Folder, Persistent, Implicit):
# exceeded within the boundaries of the current transaction.
if
self
.
_v_total
>
self
.
threshold
:
get_transaction
().
commit
(
1
)
self
.
_p_jar
.
cacheFullSweep
(
1
)
self
.
_p_jar
.
cacheFullSweep
(
3
)
self
.
_v_total
=
0
def
uncatalog_object
(
self
,
uid
):
...
...
@@ -527,7 +540,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if
hasattr
(
self
,
'_product_meta_types'
):
pmt
=
self
.
_product_meta_types
elif
hasattr
(
self
,
'aq_acquire'
):
try
:
pmt
=
self
.
aq_acquire
(
'_product_meta_types'
)
except
:
pass
except
AttributeError
:
pass
return
self
.
meta_types
+
Products
.
meta_types
+
pmt
def
valid_roles
(
self
):
...
...
@@ -659,7 +672,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if
string
.
find
(
path
,
script
)
!=
0
:
path
=
'%s/%s'
%
(
script
,
path
)
try
:
return
REQUEST
.
resolve_url
(
path
)
except
:
return
None
except
:
pass
def
resolve_path
(
self
,
path
):
"""
...
...
@@ -668,10 +681,8 @@ class ZCatalog(Folder, Persistent, Implicit):
style url. If no object is found, None is returned.
No exceptions are raised.
"""
try
:
return
self
.
unrestrictedTraverse
(
path
)
except
:
return
None
try
:
return
self
.
unrestrictedTraverse
(
path
)
except
:
pass
def
manage_normalize_paths
(
self
,
REQUEST
):
"""Ensure that all catalog paths are full physical paths
...
...
@@ -713,6 +724,16 @@ class ZCatalog(Folder, Persistent, Implicit):
'%s unchanged.'
%
(
len
(
fixed
),
len
(
removed
),
unchanged
),
action
=
'./manage_main'
)
def
manage_convertBTrees
(
self
,
threshold
=
200
):
"""Convert the catalog's data structures to use BTrees package"""
tt
=
time
.
time
()
ct
=
time
.
clock
()
self
.
_catalog
.
_convertBTrees
(
threshold
*
1
#make sure ints an int)
)
tt
=
time
.
time
()
-
tt
ct
=
time
.
clock
()
-
ct
return
'Finished conversion in %s seconds (%s cpu)'
%
(
tt
,
ct
)
Globals
.
default__class_init__
(
ZCatalog
)
...
...
lib/python/Products/ZCatalog/dtml/catalogIndexes.dtml
View file @
e6b5d0c3
...
...
@@ -37,8 +37,6 @@ that have one or more keywords specified in a search query.
<div class="list-item">Index Name</div></td>
<td width="20%" align="left" valign="top">
<div class="list-item">Index Type</div></td>
<td width="15%" align="left" valign="top">
<div class="list-item">Size</div></td>
</tr>
</dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal">
...
...
@@ -49,11 +47,11 @@ that have one or more keywords specified in a search query.
</td>
<td width="60%" align="left" valign="top">
<div class="list-item">
<a href="" target="_index_info_&dtml-id;">&dtml-id;</a></div></td>
&dtml-id;
</div>
</td>
<td width="20%" align="left" valign="top">
<div class="list-item">&dtml-meta_type;</div></td>
<td width="15%" align="left" valign="top"><div class="list-item"
><dtml-var expr="_.len(_['sequence-item'])" thousands_commas>
</div>
</td>
</tr>
...
...
lib/python/Products/ZCatalog/tests/__init__.py
0 → 100644
View file @
e6b5d0c3
# Making tests a package makes debugging easier.
lib/python/Products/ZCatalog/tests/keywords.py
0 → 100644
View file @
e6b5d0c3
import
rfc822
,
mailbox
,
cPickle
,
string
class
Keywords
:
""" stupid class to read a list of rfc822 messages and extract
all words from the subject header. We use this class for testing
purposes only
"""
def
__init__
(
self
):
self
.
kw
=
[]
def
build
(
self
,
mbox
,
limit
):
mb
=
mailbox
.
UnixMailbox
(
open
(
mbox
))
msg
=
mb
.
next
()
while
msg
and
len
(
self
.
kw
)
<
limit
:
sub
=
string
.
split
(
msg
.
dict
.
get
(
"subject"
)
,
' '
)
for
f
in
sub
:
ok
=
1
for
c
in
f
:
if
not
c
in
string
.
letters
:
ok
=
0
if
ok
==
1
and
not
f
in
self
.
kw
:
self
.
kw
.
append
(
f
)
msg
=
mb
.
next
()
P
=
cPickle
.
Pickler
(
open
(
'data/keywords'
,
'w'
))
P
.
dump
(
self
.
kw
)
def
reload
(
self
):
P
=
cPickle
.
Unpickler
(
open
(
'data/keywords'
,
'r'
))
self
.
kw
=
P
.
load
()
def
keywords
(
self
):
return
self
.
kw
lib/python/Products/ZCatalog/tests/testCatalog.py
0 → 100755
View file @
e6b5d0c3
#!/usr/bin/env python1.5
"""
Testsuite for testing Catalogs
$Id: testCatalog.py,v 1.2 2001/03/15 13:16:24 jim Exp $
Andreas Jung, andreas@digicool.com
$Log: testCatalog.py,v $
Revision 1.2 2001/03/15 13:16:24 jim
Merged changes from Catalog-BTrees-Integration branch.
Revision 1.1.4.11 2001/03/14 18:43:16 andreas
rearranged source code
Revision 1.1.4.10 2001/03/14 15:12:24 andreas
minor changes
Revision 1.1.4.9 2001/03/13 22:45:07 andreas
yet another try/except clause (zope mbox file seems to contain some sloppy
messages)
Revision 1.1.4.8 2001/03/13 22:04:20 andreas
added try/except while reading and parsing the mbox file
Revision 1.1.4.7 2001/03/13 16:51:07 andreas
code cleanup
Revision 1.1.4.6 2001/03/13 14:37:40 andreas
prelimary version for integration into the Zope testsuites
Revision 1.1.4.5 2001/03/11 22:33:40 andreas
commit
Revision 1.1.2.23 2001/03/09 16:06:10 andreas
integrated chris unittestCatalog.py
Revision 1.1.2.22 2001/03/09 15:05:28 andreas
rewrote testUpdates()
Revision 1.1.2.21 2001/03/08 18:42:28 andreas
fixed typo
Revision 1.1.4.4 2001/03/08 12:14:27 andreas
minor changes
Revision 1.1.2.20 2001/03/07 14:58:40 andreas
*** empty log message ***
Revision 1.1.2.19 2001/03/07 14:07:51 andreas
Code cleanup
Revision 1.1.2.18 2001/03/07 12:46:32 andreas
added advanced tests
Revision 1.1.2.17 2001/03/07 10:28:27 andreas
reworked version now using the new thread dispatcher
Revision 1.1.2.16 2001/03/05 15:14:51 andreas
- minor changes in testing catalog/uncatalogObject
- tests must now be started in the lib/python directory
- older input sets are no longer valid (must be recreated)
"""
import
os
,
sys
sys
.
path
.
insert
(
0
,
'.'
)
try
:
import
Testing
except
ImportError
:
sys
.
path
[
0
]
=
"../../.."
import
Testing
os
.
environ
[
'STUPID_LOG_FILE'
]
=
"debug.log"
here
=
os
.
getcwd
()
import
Zope
import
ZODB
,
ZODB
.
FileStorage
from
Products.ZCatalog
import
Catalog
,
ZCatalog
,
Vocabulary
import
Persistence
import
ExtensionClass
from
Testing
import
dispatcher
import
keywords
from
zLOG
import
LOG
from
SearchIndex.UnIndex
import
UnIndex
from
SearchIndex.UnTextIndex
import
UnTextIndex
from
SearchIndex.UnKeywordIndex
import
UnKeywordIndex
from
SearchIndex.Lexicon
import
Lexicon
import
getopt
,
whrandom
,
time
,
string
,
mailbox
,
rfc822
from
Testing
import
unittest
# maximum number of files to read for the test suite
maxFiles
=
1000
# maximum number of threads for stress testa
numThreads
=
4
# number of iterations for searches
searchIterations
=
1000
# number of iterations for catalog/uncatalog operations
updateIterations
=
100
# input mailbox file
mbox
=
os
.
environ
.
get
(
"TESTCATALOG_MBOX"
,
"/usr/home/andreas/zope.mbox"
)
mbox2
=
"/usr/home/andreas/python.mbox"
dataDir
=
""
#
# Don't change anything below
#
class
testZODB
:
""" some wrapper stuff around ZODB """
def
__init__
(
self
,
file
=
"data/work/Data.fs"
,
open
=
1
):
self
.
db
=
ZODB
.
DB
(
ZODB
.
FileStorage
.
FileStorage
(
file
)
)
if
open
==
1
:
self
.
connection
=
self
.
db
.
open
()
self
.
root
=
self
.
connection
.
root
()
def
write
(
self
,
name
,
obj
):
self
.
root
[
name
]
=
obj
get_transaction
().
commit
()
def
read
(
self
,
name
):
return
self
.
root
[
name
]
def
__del__
(
self
):
self
.
db
.
close
()
class
testCatalog
(
Persistence
.
Persistent
,
unittest
.
TestCase
):
""" Wrapper around the catalog stuff """
def
__init__
(
self
,
mboxname
,
maxfiles
):
self
.
msg_ids
=
[]
self
.
num_files
=
0
self
.
keywords
=
[]
self
.
maxfiles
=
maxfiles
self
.
_vocabulary
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_catalog
=
Catalog
.
Catalog
()
self
.
_catalog
.
addIndex
(
'to'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'sender'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'subject'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'content'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'file_id'
,
'TextIndex'
)
self
.
_catalog
.
addColumn
(
'file_id'
)
self
.
_catalog
.
addIndex
(
'length'
,
'FieldIndex'
)
self
.
_catalog
.
addColumn
(
'length'
)
self
.
_catalog
.
addIndex
(
'date'
,
'FieldIndex'
)
self
.
_catalog
.
addIndex
(
'keywords'
,
"KeywordIndex"
)
self
.
build_catalog
(
mboxname
)
def
build_catalog
(
self
,
mboxname
):
mb
=
mailbox
.
UnixMailbox
(
open
(
mboxname
,
"r"
))
i
=
0
msg
=
mb
.
next
()
while
msg
and
self
.
num_files
<
self
.
maxfiles
:
try
:
self
.
catMessage
(
msg
)
self
.
msg_ids
.
append
(
msg
.
dict
[
"message-id"
])
except
:
msg
=
mb
.
next
()
continue
msg
=
mb
.
next
()
self
.
num_files
=
self
.
num_files
+
1
if
self
.
num_files
%
100
==
0
:
print
self
.
num_files
try
:
sub
=
string
.
split
(
msg
.
dict
.
get
(
"subject"
,
""
))
except
:
msg
=
mb
.
next
()
continue
for
s
in
sub
:
if
not
s
in
self
.
keywords
:
self
.
keywords
.
append
(
s
)
self
.
_catalog
.
aq_parent
=
None
def
catMessage
(
self
,
m
):
self
.
_catalog
.
catalogObject
(
testMessage
(
m
)
,
m
.
dict
[
"message-id"
]
)
def
uncatMessage
(
self
,
uid
):
self
.
_catalog
.
uncatalogObject
(
uid
)
class
testMessage
(
ExtensionClass
.
Base
):
def
__init__
(
self
,
msg
,
modify_doc
=
0
):
self
.
sender
=
msg
.
dict
.
get
(
"from"
,
""
)
self
.
subject
=
msg
.
dict
.
get
(
"subject"
,
""
)
self
.
to
=
msg
.
dict
.
get
(
"to"
,
""
)
self
.
content
=
str
(
msg
)
self
.
keywords
=
string
.
split
(
self
.
subject
,
" "
)
if
modify_doc
!=
0
:
self
.
keywords
=
map
(
self
.
reverse
,
self
.
keywords
)
self
.
file_id
=
msg
.
dict
.
get
(
"message-id"
,
""
)
self
.
length
=
len
(
str
(
msg
))
date
=
msg
.
dict
.
get
(
"date"
,
""
)
try
:
self
.
date
=
time
.
mktime
(
rfc822
.
parsedate
(
date
)[:
9
])
except
:
pass
def
reverse
(
self
,
s
):
l
=
list
(
s
)
l
.
reverse
()
return
string
.
join
(
l
,
""
)
def
__del__
(
self
):
pass
class
BuildEnv
(
dispatcher
.
Dispatcher
,
unittest
.
TestCase
):
""" build environment """
def
__init__
(
self
,
func
,
*
args
,
**
kw
):
unittest
.
TestCase
.
__init__
(
self
,
func
,
args
,
kw
)
dispatcher
.
Dispatcher
.
__init__
(
self
,
func
)
self
.
init_phase
=
0
self
.
setlog
(
open
(
"dispatcher.log"
,
"a"
)
)
self
.
logn
(
'treads=%d searchiterations=%d'
%
(
numThreads
,
searchIterations
))
self
.
logn
(
'updateiterations=%d maxfiles=%d'
%
(
updateIterations
,
maxFiles
))
#############################################################
# Build up ZODB
#############################################################
def
buildTestEnvironment
(
self
,
args
,
kw
):
self
.
init_phase
=
1
self
.
dispatcher
(
"funcTestEnvironment"
,(
"funcTestEnvironment"
,
1
,
args
,
kw
))
def
funcTestEnvironment
(
self
,
dataDir
,
maxFiles
):
env
=
self
.
th_setup
()
if
not
os
.
path
.
exists
(
dataDir
):
os
.
makedirs
(
dataDir
)
os
.
system
(
"rm -f %s/*"
%
dataDir
)
zodb
=
testZODB
(
"%s/Data_orig.fs"
%
dataDir
)
print
"parsing and reading mailbox file %s....please wait"
%
mbox
tc
=
testCatalog
(
mbox
,
maxFiles
)
print
"writing Catalog to ZODB"
zodb
.
write
(
"catalog"
,
tc
)
print
"Creating keywords file"
kw
=
keywords
.
Keywords
()
kw
.
build
(
mbox
,
1000
)
print
tc
.
num_files
,
"files read"
print
"Initalization complete"
self
.
th_teardown
(
env
)
class
testSearches
(
dispatcher
.
Dispatcher
,
unittest
.
TestCase
):
""" test searches """
def
__init__
(
self
,
func
,
*
args
,
**
kw
):
unittest
.
TestCase
.
__init__
(
self
,
func
,
args
,
kw
)
dispatcher
.
Dispatcher
.
__init__
(
self
,
func
)
self
.
init_phase
=
0
self
.
setlog
(
open
(
"dispatcher.log"
,
"a"
)
)
def
setUp
(
self
):
os
.
system
(
"rm -fr data/work"
)
if
not
os
.
path
.
exists
(
"data/work"
):
os
.
makedirs
(
"data/work"
)
assert
os
.
system
(
"cp %s/Data_orig.fs data/work/Data.fs"
%
dataDir
)
==
0
,
\
"Error while replicating original data"
self
.
zodb
=
testZODB
(
"data/work/Data.fs"
,
open
=
0
)
self
.
threads
=
{}
self
.
init_zodb_size
=
self
.
zodb_size
()
kw
=
keywords
.
Keywords
()
kw
.
reload
()
self
.
keywords
=
kw
.
keywords
()
self
.
logn
(
"-"
*
80
)
self
.
logn
(
'treads=%d searchiterations=%d'
%
(
numThreads
,
searchIterations
))
self
.
logn
(
'updateiterations=%d maxfiles=%d'
%
(
updateIterations
,
maxFiles
))
def
tearDown
(
self
):
self
.
log_zodb_size
(
"before"
,
self
.
init_zodb_size
)
self
.
log_zodb_size
(
"after "
,
self
.
zodb_size
())
del
self
.
zodb
self
.
zodb
=
self
.
catalog
=
None
def
log_zodb_size
(
self
,
s
,
n
):
self
.
logn
(
"Size of ZODB (data/work/Data.fs) %s test : %s"
%
(
s
,
n
)
)
def
zodb_size
(
self
):
return
self
.
size2size
(
os
.
stat
(
"data/work/Data.fs"
)[
6
])
def
size2size
(
self
,
n
):
import
math
if
n
<
1024.0
:
return
"%8.3lf Bytes"
%
n
if
n
<
1024.0
*
1024.0
:
return
"%8.3lf KB"
%
(
1.0
*
n
/
1024.0
)
if
n
<
1024.0
*
1024.0
*
1024.0
:
return
"%8.3lf MB"
%
(
1.0
*
n
/
1024.0
/
1024.0
)
#############################################################
# Fulltext test
#############################################################
def
testFulltextIndex
(
self
,
args
,
kw
):
""" benchmark FulltextIndex """
self
.
dispatcher
(
'funcFulltextIndex'
,
(
'funcFulltextIndex'
,
kw
[
"numThreads"
]
,
()
,
{}
)
)
def
funcFulltextIndex
(
self
,
*
args
):
""" benchmark FulltextIndex """
cat
,
msg_ids
=
self
.
get_catalog
()
env
=
self
.
th_setup
()
for
kw
in
self
.
keywords
:
res
=
cat
.
searchResults
(
{
"content"
:
kw
}
)
self
.
th_teardown
(
env
)
#############################################################
# Field index test
#############################################################
def
testFieldIndex
(
self
,
args
,
kw
):
""" benchmark field index"""
self
.
dispatcher
(
'funcFieldIndex'
,
(
'funcFieldIndex'
,
kw
[
"numThreads"
]
,
()
,
{}
)
)
def
funcFieldIndex
(
self
,
*
args
):
""" benchmark FieldIndex """
cat
,
msg_ids
=
self
.
get_catalog
()
env
=
self
.
th_setup
()
for
i
in
range
(
0
,
searchIterations
):
res
=
cat
.
searchResults
(
{
"length"
:
i
}
)
for
r
in
res
:
assert
i
==
r
.
length
,
"%s should have size %d but is %s"
%
\
(
r
.
file_id
,
i
,
r
.
length
)
self
.
th_teardown
(
env
)
#############################################################
# Keyword index test
#############################################################
def
testKeywordIndex
(
self
,
args
,
kw
):
""" benchmark Keyword index"""
self
.
dispatcher
(
'funcKeywordIndex'
,
(
'funcKeywordIndex'
,
kw
[
"numThreads"
]
,
()
,
{}
)
)
def
funcKeywordIndex
(
self
,
*
args
):
""" benchmark KeywordIndex """
cat
,
msg_ids
=
self
.
get_catalog
()
env
=
self
.
th_setup
()
for
kw
in
self
.
keywords
:
res
=
cat
.
searchResults
(
{
"subject"
:
kw
}
)
# assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
self
.
th_teardown
(
env
)
#############################################################
# Field range index test
#############################################################
def
testFieldRangeIndex
(
self
,
args
,
kw
):
""" benchmark field range index"""
self
.
dispatcher
(
'funcFieldRangeIndex'
,
(
'funcFieldRangeIndex'
,
kw
[
"numThreads"
]
,
()
,
{}
)
)
def
funcFieldRangeIndex
(
self
,
*
args
):
""" benchmark FieldRangeIndex """
cat
,
msg_ids
=
self
.
get_catalog
()
env
=
self
.
th_setup
()
rg
=
[]
for
i
in
range
(
searchIterations
):
m
=
whrandom
.
randint
(
0
,
10000
)
n
=
m
+
200
rg
.
append
((
m
,
n
))
for
i
in
range
(
searchIterations
):
for
r
in
cat
.
searchResults
(
{
"length"
:
rg
[
i
],
"length_usage"
:
"range:min:max"
}
):
size
=
r
.
length
assert
rg
[
i
][
0
]
<=
size
and
size
<=
rg
[
i
][
1
]
,
\
"Filesize of %s is out of range (%d,%d) %d"
%
(
r
.
file_id
,
rg
[
i
][
0
],
rg
[
i
][
1
],
size
)
self
.
th_teardown
(
env
)
#############################################################
# Keyword + range index test
#############################################################
def
testKeywordRangeIndex
(
self
,
args
,
kw
):
""" benchmark Keyword range index"""
self
.
dispatcher
(
'funcKeywordRangeIndex'
,
(
'funcKeywordRangeIndex'
,
kw
[
"numThreads"
]
,
()
,
{}
)
)
def
funcKeywordRangeIndex
(
self
,
*
args
):
""" benchmark Keyword & IndexRange search """
cat
,
msg_ids
=
self
.
get_catalog
()
rg
=
[]
for
i
in
range
(
len
(
self
.
keywords
)):
m
=
whrandom
.
randint
(
0
,
10000
)
n
=
m
+
200
rg
.
append
(
m
,
n
)
env
=
self
.
th_setup
()
results
=
[]
for
i
in
range
(
len
(
self
.
keywords
)):
results
.
append
(
cat
.
searchResults
(
{
"keywords"
:
self
.
keywords
[
i
],
"length"
:
rg
[
i
],
"length_usage"
:
"range:min:max"
}
)
)
self
.
th_teardown
(
env
)
#############################################################
# Test full reindexing
#############################################################
def
testUpdates
(
self
,
args
,
kw
):
""" benchmark concurrent catalog/uncatalog operations """
self
.
dispatcher
(
"testUpdates"
,
(
"funcUpdates"
,
kw
[
"numThreads"
]
,
args
,
kw
))
def
funcUpdates
(
self
,
*
args
,
**
kw
):
""" benchmark concurrent catalog/uncatalog operations """
uncat_conflicts
=
cat_conflicts
=
0
cat
,
msg_ids
=
self
.
get_catalog
()
msgs
=
self
.
setupUpdatesMethod
(
kw
[
"numUpdates"
])
keys
=
msgs
.
keys
()
rdgen
=
whrandom
.
whrandom
()
rdgen
.
seed
(
int
(
time
.
time
())
%
256
,
int
(
time
.
time
())
%
256
,
int
(
time
.
time
())
%
256
)
env
=
self
.
th_setup
()
for
i
in
range
(
len
(
keys
)):
r
=
rdgen
.
randint
(
0
,
len
(
msgs
)
-
1
)
mid
=
keys
[
r
]
obj
=
msgs
[
mid
]
try
:
cat
.
uncatalogObject
(
mid
)
if
kw
.
get
(
"commit"
,
1
)
==
1
:
get_transaction
().
commit
()
time
.
sleep
(
0.1
)
except
ZODB
.
POSException
.
ConflictError
:
uncat_conflicts
=
uncat_conflicts
+
1
try
:
cat
.
catalogObject
(
obj
,
mid
)
if
kw
.
get
(
"commit"
,
1
)
==
1
:
get_transaction
().
commit
()
time
.
sleep
(
0.1
)
except
ZODB
.
POSException
.
ConflictError
:
cat_conflicts
=
cat_conflicts
+
1
try
:
get_transaction
().
commit
()
except
:
pass
self
.
th_teardown
(
env
,
cat_conflicts
=
cat_conflicts
,
uncat_conflicts
=
uncat_conflicts
)
def
setupUpdatesMethod
(
self
,
numUpdates
):
""" this method prepares a datastructure for the updates test.
we are reading the first n mails from the primary mailbox.
they are used for the update test
"""
i
=
0
dict
=
{}
mb
=
mailbox
.
UnixMailbox
(
open
(
mbox
,
"r"
))
msg
=
mb
.
next
()
while
msg
and
i
<
numUpdates
:
obj
=
testMessage
(
msg
)
mid
=
msg
.
dict
[
"message-id"
]
dict
[
mid
]
=
obj
msg
=
mb
.
next
()
i
=
i
+
1
return
dict
#############################################################
# Test full reindexing
#############################################################
def
testReindexing
(
self
,
args
,
kw
):
""" test reindexing of existing data """
self
.
dispatcher
(
"testReindexing"
,
(
"funcReindexing"
,
kw
[
"numThreads"
]
,
(
mbox
,
1000
)
,
{}
))
def
testReindexingAndModify
(
self
,
args
,
kw
):
""" test reindexing of existing data but with modifications"""
self
.
dispatcher
(
"testReindexing"
,
(
"funcReindexing"
,
kw
[
"numThreads"
]
,
(
mbox
,
1000
,
1
)
,
{}
))
def
funcReindexing
(
self
,
mbox
,
numfiles
=
100
,
modify_doc
=
0
):
""" test reindexing of existing data """
cat_conflicts
=
0
cat
,
msg_ids
=
self
.
get_catalog
()
env
=
self
.
th_setup
()
mb
=
mailbox
.
UnixMailbox
(
open
(
mbox
,
"r"
))
i
=
0
msg
=
mb
.
next
()
while
msg
and
i
<
numfiles
:
obj
=
testMessage
(
msg
,
modify_doc
)
mid
=
msg
.
dict
[
"message-id"
]
try
:
cat
.
catalogObject
(
obj
,
mid
)
get_transaction
().
commit
()
except
:
cat_conflicts
=
cat_conflicts
+
1
msg
=
mb
.
next
()
i
=
i
+
1
if
i
%
100
==
0
:
print
i
self
.
th_teardown
(
env
,
cat_conflicts
=
cat_conflicts
)
#############################################################
# Test full reindexing
#############################################################
def
testIncrementalIndexing
(
self
,
args
,
kw
):
""" testing incremental indexing """
self
.
dispatcher
(
"testIncrementalIndexing"
,
(
"funcReindexing"
,
kw
[
"numThreads"
],
(
mbox2
,
1000
)
,
{}))
def
get_catalog
(
self
):
""" return a catalog object """
# depended we are running in multithreaded mode we must take
# care how threads open the ZODB
connection
=
self
.
zodb
.
db
.
open
()
root
=
connection
.
root
()
cat
=
root
[
"catalog"
].
_catalog
msg_ids
=
root
[
'catalog'
].
msg_ids
return
cat
,
msg_ids
################################################################################
# Stuff of Chris
################################################################################
class
CatalogBase
:
def
setUp
(
self
):
self
.
_vocabulary
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_catalog
=
Catalog
.
Catalog
()
def
tearDown
(
self
):
self
.
_vocabulary
=
self
.
_catalog
=
None
class
TestAddDelColumn
(
CatalogBase
,
unittest
.
TestCase
):
def
checkAdd
(
self
):
self
.
_catalog
.
addColumn
(
'id'
)
assert
self
.
_catalog
.
schema
.
has_key
(
'id'
)
==
1
,
'add column failed'
def
checkAddBad
(
self
):
try
:
self
.
_catalog
.
addColumn
(
'_id'
)
except
:
pass
else
:
raise
'invalid metadata column check failed'
def
checkDel
(
self
):
self
.
_catalog
.
addColumn
(
'id'
)
self
.
_catalog
.
delColumn
(
'id'
)
assert
self
.
_catalog
.
schema
.
has_key
(
'id'
)
!=
1
,
'del column failed'
class
TestAddDelIndexes
(
CatalogBase
,
unittest
.
TestCase
):
def
checkAddFieldIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'FieldIndex'
)
assert
type
(
self
.
_catalog
.
indexes
[
'id'
])
is
type
(
UnIndex
(
'id'
)),
\
'add field index failed'
def
checkAddTextIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'TextIndex'
)
i
=
self
.
_catalog
.
indexes
[
'id'
]
assert
type
(
i
)
is
type
(
UnTextIndex
(
'id'
,
None
,
None
,
Lexicon
())),
\
'add text index failed'
def
checkAddKeywordIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'KeywordIndex'
)
i
=
self
.
_catalog
.
indexes
[
'id'
]
assert
type
(
i
)
is
type
(
UnKeywordIndex
(
'id'
)),
'add kw index failed'
def
checkDelFieldIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'FieldIndex'
)
self
.
_catalog
.
delIndex
(
'id'
)
assert
self
.
_catalog
.
indexes
.
has_key
(
'id'
)
!=
1
,
'del index failed'
def
checkDelTextIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'TextIndex'
)
self
.
_catalog
.
delIndex
(
'id'
)
assert
self
.
_catalog
.
indexes
.
has_key
(
'id'
)
!=
1
,
'del index failed'
def
checkDelKeywordIndex
(
self
):
self
.
_catalog
.
addIndex
(
'id'
,
'KeywordIndex'
)
self
.
_catalog
.
delIndex
(
'id'
)
assert
self
.
_catalog
.
indexes
.
has_key
(
'id'
)
!=
1
,
'del index failed'
class
TestSimultaneousAddAndRead
(
CatalogBase
,
unittest
.
TestCase
):
def
checkMultiThread
(
self
):
pass
class
TestZCatalogObject
(
unittest
.
TestCase
):
def
checkInstantiateWithoutVocab
(
self
):
v
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
zc
=
ZCatalog
.
ZCatalog
(
'acatalog'
)
assert
hasattr
(
zc
,
'Vocabulary'
)
assert
zc
.
getVocabulary
().
__class__
==
v
.
__class__
def
checkInstantiateWithGlobbingVocab
(
self
):
v
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
zc
=
ZCatalog
.
ZCatalog
(
'acatalog'
,
vocab_id
=
'vocab'
)
zc
.
_setObject
(
'vocab'
,
v
)
assert
zc
.
getVocabulary
()
==
v
def
checkInstantiateWithNormalVocab
(
self
):
v
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
0
)
zc
=
ZCatalog
.
ZCatalog
(
'acatalog'
,
vocab_id
=
'vocab'
)
zc
.
_setObject
(
'vocab'
,
v
)
assert
zc
.
getVocabulary
()
==
v
class
TestCatalogObject
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
_vocabulary
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_catalog
=
Catalog
.
Catalog
()
self
.
_catalog
.
addIndex
(
'col1'
,
'FieldIndex'
)
self
.
_catalog
.
addIndex
(
'col2'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'col3'
,
'KeywordIndex'
)
self
.
_catalog
.
addColumn
(
'col1'
)
self
.
_catalog
.
addColumn
(
'col2'
)
self
.
_catalog
.
addColumn
(
'col3'
)
self
.
_catalog
.
addIndex
(
'att1'
,
'FieldIndex'
)
self
.
_catalog
.
addIndex
(
'att2'
,
'TextIndex'
)
self
.
_catalog
.
addIndex
(
'att3'
,
'KeywordIndex'
)
self
.
_catalog
.
addColumn
(
'att1'
)
self
.
_catalog
.
addColumn
(
'att2'
)
self
.
_catalog
.
addColumn
(
'att3'
)
self
.
_catalog
.
addColumn
(
'num'
)
self
.
upper
=
1000
class
dummy
(
ExtensionClass
.
Base
):
att1
=
'att1'
att2
=
'att2'
att3
=
[
'att3'
]
def
__init__
(
self
,
num
):
self
.
num
=
num
def
col1
(
self
):
return
'col1'
def
col2
(
self
):
return
'col2'
def
col3
(
self
):
return
[
'col3'
]
for
x
in
range
(
0
,
self
.
upper
):
self
.
_catalog
.
catalogObject
(
dummy
(
x
),
`x`
)
self
.
_catalog
.
aq_parent
=
dummy
(
'foo'
)
# fake out acquisition
def
tearDown
(
self
):
self
.
_vocabulary
=
self
.
_catalog
=
None
def
checkResultLength
(
self
):
upper
=
self
.
upper
a
=
self
.
_catalog
()
assert
len
(
a
)
==
upper
,
'length should be %s, its %s'
%
(
upper
,
len
(
a
))
def
checkFieldIndexLength
(
self
):
a
=
self
.
_catalog
(
att1
=
'att1'
)
assert
len
(
a
)
==
self
.
upper
,
'should be %s, but is %s'
%
(
self
.
upper
,
len
(
a
))
def
checkTextIndexLength
(
self
):
a
=
self
.
_catalog
(
att2
=
'att2'
)
assert
len
(
a
)
==
self
.
upper
,
'should be %s, but is %s'
%
(
self
.
upper
,
len
(
a
))
def
checkKeywordIndexLength
(
self
):
a
=
self
.
_catalog
(
att3
=
'att3'
)
assert
len
(
a
)
==
self
.
upper
,
'should be %s, but is %s'
%
(
self
.
upper
,
len
(
a
))
def
checkUncatalogFieldIndex
(
self
):
self
.
uncatalog
()
a
=
self
.
_catalog
(
att1
=
'att1'
)
assert
len
(
a
)
==
0
,
'len: %s'
%
(
len
(
a
))
def
checkUncatalogTextIndex
(
self
):
self
.
uncatalog
()
a
=
self
.
_catalog
(
att2
=
'att2'
)
assert
len
(
a
)
==
0
,
'len: %s'
%
(
len
(
a
))
def
checkUncatalogKeywordIndex
(
self
):
self
.
uncatalog
()
a
=
self
.
_catalog
(
att3
=
'att3'
)
assert
len
(
a
)
==
0
,
'len: %s'
%
(
len
(
a
))
def
checkBadUncatalog
(
self
):
try
:
self
.
_catalog
.
uncatalogObject
(
'asdasdasd'
)
except
:
assert
1
==
2
,
'uncatalogObject raised exception on bad uid'
def
checkUniqueValuesForLength
(
self
):
a
=
self
.
_catalog
.
uniqueValuesFor
(
'att1'
)
assert
len
(
a
)
==
1
,
'bad number of unique values %s'
%
str
(
a
)
def
checkUniqueValuesForContent
(
self
):
a
=
self
.
_catalog
.
uniqueValuesFor
(
'att1'
)
assert
a
[
0
]
==
'att1'
,
'bad content %s'
%
str
(
a
[
0
])
def
uncatalog
(
self
):
for
x
in
range
(
0
,
self
.
upper
):
self
.
_catalog
.
uncatalogObject
(
`x`
)
class
objRS
(
ExtensionClass
.
Base
):
def
__init__
(
self
,
num
):
self
.
number
=
num
class
testRS
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
_vocabulary
=
Vocabulary
.
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_catalog
=
Catalog
.
Catalog
()
self
.
_catalog
.
addIndex
(
'number'
,
'FieldIndex'
)
self
.
_catalog
.
addColumn
(
'number'
)
for
i
in
range
(
50000
):
if
i
%
1000
==
0
:
print
i
obj
=
objRS
(
whrandom
.
randint
(
0
,
20000
))
self
.
_catalog
.
catalogObject
(
obj
,
i
)
self
.
_catalog
.
aq_parent
=
objRS
(
200
)
def
testRangeSearch
(
self
):
for
i
in
range
(
1000000
):
m
=
whrandom
.
randint
(
0
,
20000
)
n
=
m
+
1000
for
r
in
self
.
_catalog
.
searchResults
(
{
"number"
:
(
m
,
n
)
,
"length_usage"
:
"range:min:max"
}
):
size
=
r
.
number
assert
m
<=
size
and
size
<=
n
,
"%d vs [%d,%d]"
%
(
r
.
number
,
m
,
n
)
def
usage
(
program
):
print
"Usage: "
print
print
"initalize the test catalog: %s -i -f <maximum number files to use> "
%
program
print
"to run the basic tests: %s -b -f <maximum number files to use> "
%
program
print
"to run the advanced tests: %s -a -f <maximum number files to use> "
%
program
def
main
():
global
dataDir
,
maxFiles
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"hiabf:xp"
,[
'help'
])
opts
.
sort
()
optsLst
=
map
(
lambda
x
:
x
[
0
],
opts
)
if
optsLst
==
[]:
usage
(
os
.
path
.
basename
(
sys
.
argv
[
0
]));
sys
.
exit
(
0
)
for
k
,
v
in
opts
:
if
k
in
[
'-h'
,
'--help'
]
:
usage
(
os
.
path
.
basename
(
sys
.
argv
[
0
]));
sys
.
exit
(
0
)
if
k
==
"-f"
:
maxFiles
=
string
.
atoi
(
v
)
dataDir
=
os
.
path
.
join
(
"data"
,
str
(
maxFiles
))
if
'-i'
in
optsLst
:
unittest
.
TextTestRunner
().
run
(
get_tests
(
'init'
))
if
'-b'
in
optsLst
:
unittest
.
TextTestRunner
().
run
(
get_tests
(
'bench1'
))
if
'-a'
in
optsLst
:
unittest
.
TextTestRunner
().
run
(
get_tests
(
'bench2'
))
if
'-x'
in
optsLst
:
unittest
.
TextTestRunner
().
run
(
get_tests
(
'exp'
))
if
'-p'
in
optsLst
:
unittest
.
TextTestRunner
().
run
(
test_suite
())
def
test_suite
():
return
get_tests
(
'basic'
)
def
get_tests
(
what
):
global
dataDir
,
maxFiles
if
what
==
'basic'
:
maxFiles
=
100
dataDir
=
'data/%d'
%
maxFiles
ts_cm
=
(
unittest
.
makeSuite
(
TestAddDelIndexes
,
'check'
),
unittest
.
makeSuite
(
TestCatalogObject
,
'check'
),
unittest
.
makeSuite
(
TestAddDelColumn
,
'check'
),
unittest
.
makeSuite
(
TestZCatalogObject
,
'check'
)
)
t_aj
=
(
BuildEnv
(
'buildTestEnvironment'
,
dataDir
,
maxFiles
),
testSearches
(
"testFulltextIndex"
,
numThreads
=
1
),
testSearches
(
"testFieldIndex"
,
numThreads
=
1
),
testSearches
(
"testFieldRangeIndex"
,
numThreads
=
1
),
testSearches
(
"testKeywordIndex"
,
numThreads
=
1
),
testSearches
(
"testKeywordRangeIndex"
,
numThreads
=
1
)
)
bench1_tests
=
(
testSearches
(
"testFulltextIndex"
,
numThreads
=
1
),
testSearches
(
"testFulltextIndex"
,
numThreads
=
4
),
testSearches
(
"testFieldIndex"
,
numThreads
=
1
),
testSearches
(
"testFieldIndex"
,
numThreads
=
4
),
testSearches
(
"testFieldRangeIndex"
,
numThreads
=
1
),
testSearches
(
"testFieldRangeIndex"
,
numThreads
=
4
),
testSearches
(
"testKeywordIndex"
,
numThreads
=
1
),
testSearches
(
"testKeywordIndex"
,
numThreads
=
4
),
testSearches
(
"testKeywordRangeIndex"
,
numThreads
=
1
),
testSearches
(
"testKeywordRangeIndex"
,
numThreads
=
4
)
)
bench2_tests
=
(
testSearches
(
"testReindexing"
,
numThreads
=
1
),
testSearches
(
"testIncrementalIndexing"
,
numThreads
=
1
),
testSearches
(
"testUpdates"
,
numThreads
=
2
,
numUpdates
=
200
),
testSearches
(
"testUpdates"
,
numThreads
=
4
,
numUpdates
=
200
)
)
exp_tests
=
(
# testRS("testRangeSearch"),
# testSearches("testReindexing",numThreads=1),
testSearches
(
"testReindexingAndModify"
,
numThreads
=
1
),
# testSearches("testUpdates",numThreads=10,numUpdates=100),
)
init_tests
=
(
BuildEnv
(
"buildTestEnvironment"
,
dataDir
,
maxFiles
)
,
)
if
what
==
'basic'
:
ts
=
unittest
.
TestSuite
(
ts_cm
)
for
x
in
t_aj
:
ts
.
addTest
(
x
)
return
ts
else
:
ts
=
unittest
.
TestSuite
()
for
x
in
eval
(
'%s_tests'
%
what
):
ts
.
addTest
(
x
)
return
ts
return
def
pdebug
():
import
pdb
test_suite
()
def
debug
():
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
if
__name__
==
'__main__'
:
main
()
lib/python/Products/ZCatalog/tests/testCatalogTiming.py
0 → 100644
View file @
e6b5d0c3
import
os
,
sys
sys
.
path
.
insert
(
0
,
'.'
)
try
:
import
Testing
os
.
environ
[
'SOFTWARE_HOME'
]
=
os
.
environ
.
get
(
'SOFTWARE_HOME'
,
'.'
)
except
ImportError
:
sys
.
path
[
0
]
=
'../../..'
import
Testing
os
.
environ
[
'SOFTWARE_HOME'
]
=
'../../..'
os
.
environ
[
'INSTANCE_HOME'
]
=
os
.
environ
.
get
(
'INSTANCE_HOME'
,
os
.
path
.
join
(
os
.
environ
[
'SOFTWARE_HOME'
],
'..'
,
'..'
)
)
os
.
environ
[
'STUPID_LOG_FILE'
]
=
os
.
path
.
join
(
os
.
environ
[
'INSTANCE_HOME'
],
'var'
,
'debug.log'
)
here
=
os
.
getcwd
()
import
Zope
import
mailbox
,
time
,
httplib
from
string
import
strip
,
find
,
split
,
lower
,
atoi
,
join
from
urllib
import
quote
from
Products.ZCatalog
import
ZCatalog
from
unittest
import
TestCase
,
TestSuite
,
JUnitTextTestRunner
,
\
VerboseTextTestRunner
,
makeSuite
from
Testing.makerequest
import
makerequest
TextTestRunner
=
VerboseTextTestRunner
class
TestTimeIndex
(
TestCase
):
def
setUp
(
self
):
self
.
app
=
makerequest
(
Zope
.
app
())
try
:
self
.
app
.
_delObject
(
'catalogtest'
)
except
AttributeError
:
pass
self
.
app
.
manage_addFolder
(
'catalogtest'
)
zcatalog
=
ZCatalog
.
ZCatalog
(
'catalog'
,
'a catalog'
)
self
.
app
.
catalogtest
.
_setObject
(
'catalog'
,
zcatalog
)
c
=
self
.
app
.
catalogtest
.
catalog
for
x
in
(
'title'
,
'to'
,
'from'
,
'date'
,
'raw'
):
try
:
c
.
manage_delIndexes
([
x
])
except
:
pass
c
.
manage_addIndex
(
'title'
,
'TextIndex'
)
c
.
manage_addIndex
(
'to'
,
'TextIndex'
)
c
.
manage_addIndex
(
'from'
,
'TextIndex'
)
c
.
manage_addIndex
(
'date'
,
'FieldIndex'
)
c
.
manage_addIndex
(
'raw'
,
'TextIndex'
)
def
tearDown
(
self
):
try
:
self
.
app
.
_delObject
(
'catalogtest'
)
except
AttributeError
:
pass
try
:
self
.
app
.
_p_jar
.
_db
.
pack
()
self
.
app
.
_p_jar
.
close
()
except
AttributeError
:
pass
self
.
app
=
None
del
self
.
app
def
checkTimeBulkIndex
(
self
):
print
c
=
self
.
app
.
catalogtest
.
catalog
t
=
time
.
time
()
loadmail
(
self
.
app
.
catalogtest
,
'zopemail'
,
os
.
path
.
join
(
here
,
'zope.mbox'
),
500
)
get_transaction
().
commit
()
loadtime
=
time
.
time
()
-
t
out
(
"loading data took %s seconds.. "
%
loadtime
)
t
=
time
.
time
()
req
=
self
.
app
.
REQUEST
parents
=
[
self
.
app
.
catalogtest
.
catalog
,
self
.
app
.
catalogtest
,
self
.
app
]
req
[
'PARENTS'
]
=
parents
rsp
=
self
.
app
.
REQUEST
.
RESPONSE
url1
=
''
c
.
manage_catalogFoundItems
(
req
,
rsp
,
url1
,
url1
,
obj_metatypes
=
[
'DTML Document'
])
indextime
=
time
.
time
()
-
t
out
(
"bulk index took %s seconds.. "
%
indextime
)
out
(
"total time for load and index was %s seconds.. "
%
(
loadtime
+
indextime
))
def
checkTimeIncrementalIndexAndQuery
(
self
):
print
c
=
self
.
app
.
catalogtest
.
catalog
t
=
time
.
time
()
max
=
500
m
=
loadmail
(
self
.
app
.
catalogtest
,
'zopemail'
,
os
.
path
.
join
(
here
,
'zope.mbox'
),
max
,
c
)
get_transaction
().
commit
()
total
=
time
.
time
()
-
t
out
(
"total time for load and index was %s seconds.. "
%
total
)
t
=
time
.
time
()
rs
=
c
()
# empty query should return all
assert
len
(
rs
)
==
max
,
len
(
rs
)
dates
=
m
[
'date'
]
froms
=
m
[
'from'
]
tos
=
m
[
'to'
]
titles
=
m
[
'title'
]
assert
len
(
c
({
'date'
:
'foobarfoo'
}))
==
0
# should return no results
for
x
in
dates
:
assert
len
(
c
({
'date'
:
x
}))
==
1
# each date should be fieldindexed
assert
len
(
c
({
'from'
:
'a'
}))
==
0
# should be caught by splitter
assert
len
(
c
({
'raw'
:
'chris'
}))
!=
0
assert
len
(
c
({
'raw'
:
'gghdjkasjdsda'
}))
==
0
assert
c
({
'PrincipiaSearchSource'
:
'the*'
})
def
checkTimeSubcommit
(
self
):
print
for
x
in
(
None
,
100
,
500
,
1000
,
10000
):
out
(
"testing subcommit at theshhold of %s"
%
x
)
if
x
is
not
None
:
self
.
setUp
()
c
=
self
.
app
.
catalogtest
.
catalog
c
.
threshold
=
x
get_transaction
().
commit
()
t
=
time
.
time
()
loadmail
(
self
.
app
.
catalogtest
,
'zopemail'
,
os
.
path
.
join
(
here
,
'zope.mbox'
),
500
,
c
)
get_transaction
().
commit
()
total
=
time
.
time
()
-
t
out
(
"total time with subcommit thresh %s was %s seconds.. "
%
(
x
,
total
))
self
.
tearDown
()
# utility
def
loadmail
(
folder
,
name
,
mbox
,
max
=
None
,
catalog
=
None
):
"""
creates a folder inside object 'folder' named 'name', opens
filename 'mbox' and adds 'max' mail messages as DTML documents to
the ZODB inside the folder named 'name'. If 'catalog' (which
should be a ZCatalog object) is passed in, call catalog_object on it
with the document while we're iterating. If 'max' is not None,
only do 'max' messages, else do all messages in the mbox archive.
"""
m
=
{
'date'
:[],
'from'
:[],
'to'
:[],
'title'
:[]}
folder
.
manage_addFolder
(
name
)
folder
=
getattr
(
folder
,
name
)
mb
=
mailbox
.
UnixMailbox
(
open
(
mbox
))
i
=
0
every
=
100
message
=
mb
.
next
()
while
message
:
part
=
`i/every * 100`
try
:
dest
=
getattr
(
folder
,
part
)
except
AttributeError
:
folder
.
manage_addFolder
(
part
)
dest
=
getattr
(
folder
,
part
)
dest
.
manage_addDTMLDocument
(
str
(
i
),
file
=
message
.
fp
.
read
())
doc
=
getattr
(
dest
,
str
(
i
))
i
=
i
+
1
for
h
in
message
.
headers
:
h
=
strip
(
h
)
l
=
find
(
h
,
':'
)
if
l
<=
0
:
continue
name
=
lower
(
h
[:
l
])
if
name
==
'subject'
:
name
=
'title'
h
=
strip
(
h
[
l
+
1
:])
type
=
'string'
if
0
and
name
==
'date'
:
type
=
'date'
elif
0
:
try
:
atoi
(
h
)
except
:
pass
else
:
type
=
int
if
name
==
'title'
:
doc
.
manage_changeProperties
(
title
=
h
)
m
[
name
].
append
(
h
)
elif
name
in
(
'to'
,
'from'
,
'date'
):
try
:
doc
.
manage_addProperty
(
name
,
h
,
type
)
except
:
pass
m
[
name
].
append
(
h
)
if
catalog
:
path
=
join
(
doc
.
getPhysicalPath
(),
'/'
)
catalog
.
catalog_object
(
doc
,
path
)
if
max
is
not
None
:
if
i
>=
max
:
break
message
=
mb
.
next
()
return
m
def
out
(
s
):
print
" %s"
%
s
def
test_suite
():
s1
=
makeSuite
(
TestTimeIndex
,
'check'
)
testsuite
=
TestSuite
((
s1
,))
return
testsuite
def
main
():
mb
=
os
.
path
.
join
(
here
,
'zope.mbox'
)
if
not
os
.
path
.
isfile
(
mb
):
print
"do you want to get the zope.mbox file from lists.zope.org?"
print
"it's required for testing (98MB, ~ 30mins on fast conn)"
print
"it's also available at korak:/home/chrism/zope.mbox"
print
"-- type 'Y' or 'N'"
a
=
raw_input
()
if
lower
(
a
[:
1
])
==
'y'
:
server
=
'lists.zope.org:80'
method
=
'/pipermail/zope.mbox/zope.mbox'
h
=
httplib
.
HTTP
(
server
)
h
.
putrequest
(
'GET'
,
method
)
h
.
putheader
(
'User-Agent'
,
'silly'
)
h
.
putheader
(
'Accept'
,
'text/html'
)
h
.
putheader
(
'Accept'
,
'text/plain'
)
h
.
putheader
(
'Host'
,
server
)
h
.
endheaders
()
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
!=
200
:
f
=
h
.
getfile
()
data
=
f
.
read
()
print
data
raise
"Error reading from host %s"
%
server
f
=
h
.
getfile
()
out
=
open
(
mb
,
'w'
)
print
"this is going to take a while..."
print
"downloading mbox from %s"
%
server
while
1
:
l
=
f
.
readline
()
if
not
l
:
break
out
.
write
(
l
)
alltests
=
test_suite
()
runner
=
TextTestRunner
()
runner
.
run
(
alltests
)
def
debug
():
test_suite
().
debug
()
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
lib/python/SearchIndex/GlobbingLexicon.py
View file @
e6b5d0c3
...
...
@@ -85,18 +85,15 @@
from
Lexicon
import
Lexicon
from
Splitter
import
Splitter
from
intSet
import
intSet
from
UnTextIndex
import
Or
import
re
,
string
import
OIBTree
,
BTree
,
IOBTree
,
IIBTree
# Short cuts for common data containers
OIBTree
=
OIBTree
.
BTree
# Object -> Integer
OOBTree
=
BTree
.
BTree
# Object -> Object
IOBTree
=
IOBTree
.
BTree
# Integer -> Object
IIBucket
=
IIBTree
.
Bucket
# Integer -> Integer
from
BTrees.IIBTree
import
IISet
,
union
,
IITreeSet
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.OOBTree
import
OOBTree
from
randid
import
randid
class
GlobbingLexicon
(
Lexicon
):
"""Lexicon which supports basic globbing function ('*' and '?').
...
...
@@ -127,11 +124,24 @@ class GlobbingLexicon(Lexicon):
def
__init__
(
self
):
self
.
counter
=
0
# word id counter XXX
self
.
clear
()
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
self
.
_digrams
=
OOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
Lexicon
.
_convertBTrees
(
self
,
threshold
)
if
type
(
self
.
_digrams
)
is
OOBTree
:
return
from
BTrees.convert
import
convert
_digrams
=
self
.
_digrams
self
.
_digrams
=
OOBTree
()
self
.
_digrams
.
_p_jar
=
self
.
_p_jar
convert
(
_digrams
,
self
.
_digrams
,
threshold
,
IITreeSet
)
def
createDigrams
(
self
,
word
):
"""Returns a list with the set of digrams in the word."""
...
...
@@ -139,8 +149,8 @@ class GlobbingLexicon(Lexicon):
digrams
.
append
(
self
.
eow
+
word
[
0
])
# Mark the beginning
for
i
in
range
(
len
(
word
)):
digrams
.
append
(
word
[
i
:
i
+
2
])
for
i
in
range
(
1
,
len
(
word
)):
digrams
.
append
(
word
[
i
-
1
:
i
+
1
])
digrams
[
-
1
]
=
digrams
[
-
1
]
+
self
.
eow
# Mark the end
...
...
@@ -157,6 +167,8 @@ class GlobbingLexicon(Lexicon):
set
=
getWordId
# Kludge for old code
def
getWord
(
self
,
wid
):
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word, and return it."""
...
...
@@ -166,19 +178,34 @@ class GlobbingLexicon(Lexicon):
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
# First we go ahead and put the forward and reverse maps in.
self
.
_lexicon
[
word
]
=
self
.
counter
self
.
_inverseLex
[
self
.
counter
]
=
word
# Get word id. BBB Backward compat pain.
inverse
=
self
.
_inverseLex
try
:
insert
=
inverse
.
insert
except
AttributeError
:
# we have an "old" BTree object
if
inverse
:
wid
=
inverse
.
keys
()[
-
1
]
+
1
else
:
self
.
_inverseLex
=
IOBTree
()
wid
=
1
inverse
[
wid
]
=
word
else
:
# we have a "new" IOBTree object
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
self
.
_lexicon
[
word
]
=
wid
# Now take all the digrams and insert them into the digram map.
for
digram
in
self
.
createDigrams
(
word
):
set
=
self
.
_digrams
.
get
(
digram
)
set
=
self
.
_digrams
.
get
(
digram
,
None
)
if
set
is
None
:
self
.
_digrams
[
digram
]
=
set
=
int
Set
()
set
.
insert
(
self
.
counter
)
self
.
_digrams
[
digram
]
=
set
=
II
Set
()
set
.
insert
(
wid
)
self
.
counter
=
self
.
counter
+
1
return
self
.
counter
-
1
# Adjust for the previous increment
return
wid
def
get
(
self
,
pattern
):
...
...
@@ -208,14 +235,11 @@ class GlobbingLexicon(Lexicon):
return
(
result
,
)
## now get all of the intsets that contain the result digrams
result
=
IIBucket
()
result
=
None
for
digram
in
digrams
:
if
self
.
_digrams
.
has_key
(
digram
):
matchSet
=
self
.
_digrams
[
digram
]
if
matchSet
is
not
None
:
result
=
IIBucket
().
union
(
matchSet
)
result
=
union
(
result
,
self
.
_digrams
.
get
(
digram
,
None
))
if
len
(
result
)
==
0
:
if
not
result
:
return
()
else
:
## now we have narrowed the list of possible candidates
...
...
@@ -227,10 +251,10 @@ class GlobbingLexicon(Lexicon):
expr
=
re
.
compile
(
self
.
createRegex
(
pattern
))
words
=
[]
hits
=
[]
for
x
in
result
.
keys
()
:
hits
=
IISet
()
for
x
in
result
:
if
expr
.
match
(
self
.
_inverseLex
[
x
]):
hits
.
append
(
x
)
hits
.
insert
(
x
)
return
hits
...
...
@@ -242,7 +266,6 @@ class GlobbingLexicon(Lexicon):
def
query_hook
(
self
,
q
):
"""expand wildcards"""
words
=
[]
wids
=
[]
for
w
in
q
:
if
(
(
self
.
multi_wc
in
w
)
or
(
self
.
single_wc
in
w
)
):
...
...
@@ -286,3 +309,5 @@ class GlobbingLexicon(Lexicon):
r'()&|!@#$%^{}\
<>
')
return "%s$" % result
lib/python/SearchIndex/Index.py
View file @
e6b5d0c3
...
...
@@ -84,11 +84,11 @@
##############################################################################
"""Simple column indices"""
__version__
=
'$Revision: 1.2
7
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.2
8
$'
[
11
:
-
2
]
from
Persistence
import
Persistent
from
BTree
import
BTree
from
intSet
import
int
Set
from
BTree
s.OOBTree
import
OO
BTree
from
BTrees.IIBTree
import
IITree
Set
import
operator
from
Missing
import
MV
import
string
...
...
@@ -135,7 +135,7 @@ class Index(Persistent):
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
BTree
()
self
.
_index
=
OO
BTree
()
self
.
_reindex
()
else
:
...
...
@@ -176,7 +176,7 @@ class Index(Persistent):
def
clear
(
self
):
self
.
_index
=
BTree
()
self
.
_index
=
OO
BTree
()
def
_reindex
(
self
,
start
=
0
):
...
...
@@ -200,7 +200,7 @@ class Index(Persistent):
if
k
is
None
or
k
==
MV
:
continue
set
=
get
(
k
)
if
set
is
None
:
index
[
k
]
=
set
=
int
Set
()
if
set
is
None
:
index
[
k
]
=
set
=
IITree
Set
()
set
.
insert
(
i
)
...
...
@@ -225,7 +225,7 @@ class Index(Persistent):
return
set
=
index
.
get
(
k
)
if
set
is
None
:
index
[
k
]
=
set
=
int
Set
()
if
set
is
None
:
index
[
k
]
=
set
=
IITree
Set
()
set
.
insert
(
i
)
...
...
@@ -301,8 +301,7 @@ class Index(Persistent):
if
hi
:
setlist
=
index
.
items
(
lo
,
hi
)
else
:
setlist
=
index
.
items
(
lo
)
for
k
,
set
in
setlist
:
if
r
is
None
:
r
=
set
else
:
r
=
r
.
union
(
set
)
w
,
r
=
weightedUnion
(
r
,
set
)
except
KeyError
:
pass
else
:
#not a range
get
=
index
.
get
...
...
@@ -310,11 +309,10 @@ class Index(Persistent):
if
key
:
anyTrue
=
1
set
=
get
(
key
)
if
set
is
not
None
:
if
r
is
None
:
r
=
set
else
:
r
=
r
.
union
(
set
)
w
,
r
=
weightedUnion
(
r
,
set
)
if
r
is
None
:
if
anyTrue
:
r
=
int
Set
()
if
anyTrue
:
r
=
II
Set
()
else
:
return
None
return
r
,
(
id
,)
...
...
lib/python/SearchIndex/Lexicon.py
View file @
e6b5d0c3
...
...
@@ -92,11 +92,12 @@ mapping.
from
Splitter
import
Splitter
from
Persistence
import
Persistent
from
Acquisition
import
Implicit
import
OIBTree
,
BTree
OIBTree
=
OIBTree
.
BTree
OOBTree
=
BTree
.
BTree
import
re
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IISet
,
IITreeSet
from
randid
import
randid
class
Lexicon
(
Persistent
,
Implicit
):
"""Maps words to word ids and then some
...
...
@@ -112,13 +113,38 @@ class Lexicon(Persistent, Implicit):
stop_syn
=
{}
def
__init__
(
self
,
stop_syn
=
None
):
self
.
_lexicon
=
OIBTree
()
self
.
counter
=
0
self
.
clear
()
if
stop_syn
is
None
:
self
.
stop_syn
=
{}
else
:
self
.
stop_syn
=
stop_syn
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
if
(
type
(
self
.
_lexicon
)
is
OIBTree
and
type
(
getattr
(
self
,
'_inverseLex'
,
None
))
is
IOBTree
):
return
from
BTrees.convert
import
convert
lexicon
=
self
.
_lexicon
self
.
_lexicon
=
OIBTree
()
self
.
_lexicon
.
_p_jar
=
self
.
_p_jar
convert
(
lexicon
,
self
.
_lexicon
,
threshold
)
try
:
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
=
IOBTree
()
except
AttributeError
:
# older lexicons didn't have an inverse lexicon
self
.
_inverseLex
=
IOBTree
()
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
.
_p_jar
=
self
.
_p_jar
convert
(
inverseLex
,
self
.
_inverseLex
,
threshold
)
def
set_stop_syn
(
self
,
stop_syn
):
""" pass in a mapping of stopwords and synonyms. Format is:
...
...
@@ -135,13 +161,16 @@ class Lexicon(Persistent, Implicit):
def
getWordId
(
self
,
word
):
""" return the word id of 'word' """
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
return
self
.
assignWordId
(
word
)
wid
=
self
.
_lexicon
.
get
(
word
,
None
)
if
wid
is
None
:
wid
=
self
.
assignWordId
(
word
)
return
wid
set
=
getWordId
def
getWord
(
self
,
wid
):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word and returns it."""
...
...
@@ -149,17 +178,29 @@ class Lexicon(Persistent, Implicit):
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
if
not
hasattr
(
self
,
'counter'
):
self
.
counter
=
0
self
.
_lexicon
[
intern
(
word
)]
=
self
.
counter
self
.
counter
=
self
.
counter
+
1
return
self
.
counter
-
1
try
:
inverse
=
self
.
_inverseLex
except
AttributeError
:
# woops, old lexicom wo wids
inverse
=
self
.
_inverseLex
=
IOBTree
()
for
word
,
wid
in
self
.
_lexicon
.
items
():
inverse
[
wid
]
=
word
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
self
.
_lexicon
[
intern
(
word
)]
=
wid
return
wid
def
get
(
self
,
key
,
default
=
None
):
"""Return the matched word against the key."""
return
[
self
.
_lexicon
.
get
(
key
,
default
)]
r
=
IISet
()
wid
=
self
.
_lexicon
.
get
(
key
,
default
)
if
wid
is
not
None
:
r
.
insert
(
wid
)
return
r
def
__getitem__
(
self
,
key
):
return
self
.
get
(
key
)
...
...
@@ -176,21 +217,6 @@ class Lexicon(Persistent, Implicit):
return
Splitter
(
astring
,
words
)
def
grep
(
self
,
query
):
"""
regular expression search through the lexicon
he he.
Do not use unless you know what your doing!!!
"""
expr
=
re
.
compile
(
query
)
hits
=
[]
for
x
in
self
.
_lexicon
.
keys
():
if
expr
.
search
(
x
):
hits
.
append
(
x
)
return
hits
def
query_hook
(
self
,
q
):
""" we don't want to modify the query cuz we're dumb """
return
q
...
...
lib/python/SearchIndex/ResultList.py
View file @
e6b5d0c3
...
...
@@ -83,18 +83,33 @@
#
##############################################################################
from
BTrees.IIBTree
import
IIBucket
from
BTrees.IIBTree
import
weightedIntersection
,
weightedUnion
,
difference
from
BTrees.OOBTree
import
OOSet
,
union
class
ResultList
:
def
__init__
(
self
,
d
,
words
,
index
,
TupleType
=
type
(())):
self
.
_index
=
index
if
type
(
words
)
is
not
OOSet
:
words
=
OOSet
(
words
)
self
.
_words
=
words
if
(
type
(
d
)
is
TupleType
):
self
.
_dict
=
{
d
[
0
]
:
d
[
1
]
}
else
:
self
.
_dict
=
d
def
__len__
(
self
):
return
len
(
self
.
_dict
)
if
(
type
(
d
)
is
TupleType
):
d
=
IIBucket
((
d
,))
elif
type
(
d
)
is
not
IIBucket
:
d
=
IIBucket
(
d
)
self
.
_dict
=
d
self
.
__getitem__
=
d
.
__getitem__
try
:
self
.
__nonzero__
=
d
.
__nonzero__
except
:
pass
self
.
get
=
d
.
get
def
__nonzero__
(
self
):
return
not
not
self
.
_dict
def
__getitem__
(
self
,
key
):
return
self
.
_dict
[
key
]
def
bucket
(
self
):
return
self
.
_dict
def
keys
(
self
):
return
self
.
_dict
.
keys
()
...
...
@@ -103,42 +118,29 @@ class ResultList:
def
items
(
self
):
return
self
.
_dict
.
items
()
def
__and__
(
self
,
x
):
result
=
{}
dict
=
self
.
_dict
xdict
=
x
.
_dict
xhas
=
xdict
.
has_key
for
id
,
score
in
dict
.
items
():
if
xhas
(
id
):
result
[
id
]
=
xdict
[
id
]
+
score
return
self
.
__class__
(
result
,
self
.
_words
+
x
.
_words
,
self
.
_index
)
return
self
.
__class__
(
weightedIntersection
(
self
.
_dict
,
x
.
_dict
)[
1
],
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
,
)
def
and_not
(
self
,
x
):
result
=
{}
dict
=
self
.
_dict
xdict
=
x
.
_dict
xhas
=
xdict
.
has_key
for
id
,
score
in
dict
.
items
():
if
not
xhas
(
id
):
result
[
id
]
=
score
return
self
.
__class__
(
result
,
self
.
_words
,
self
.
_index
)
return
self
.
__class__
(
difference
(
self
.
_dict
,
x
.
_dict
),
self
.
_words
,
self
.
_index
,
)
def
__or__
(
self
,
x
):
result
=
{}
dict
=
self
.
_dict
has
=
dict
.
has_key
xdict
=
x
.
_dict
xhas
=
xdict
.
has_key
for
id
,
score
in
dict
.
items
():
if
xhas
(
id
):
result
[
id
]
=
xdict
[
id
]
+
score
else
:
result
[
id
]
=
score
for
id
,
score
in
xdict
.
items
():
if
not
has
(
id
):
result
[
id
]
=
score
return
self
.
__class__
(
weightedUnion
(
self
.
_dict
,
x
.
_dict
)[
1
],
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
,
)
return
self
.
__class__
(
result
,
self
.
_words
+
x
.
_words
,
self
.
_index
)
def
near
(
self
,
x
):
result
=
{}
result
=
IIBucket
dict
=
self
.
_dict
xdict
=
x
.
_dict
xhas
=
xdict
.
has_key
...
...
@@ -160,5 +162,6 @@ class ResultList:
else
:
score
=
(
score
+
xdict
[
id
])
/
d
result
[
id
]
=
score
return
self
.
__class__
(
result
,
self
.
_words
+
x
.
_words
,
self
.
_index
)
return
self
.
__class__
(
result
,
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
)
lib/python/SearchIndex/TextIndex.py
View file @
e6b5d0c3
...
...
@@ -202,13 +202,13 @@ Notes on a new text index design
space.
"""
__version__
=
'$Revision: 1.25 $'
[
11
:
-
2
]
__version__
=
'$Revision: 1.26 $'
[
11
:
-
2
]
#XXX I strongly suspect that this is broken, but I'm not going to fix it. :(
from
Globals
import
Persistent
import
BTree
,
IIBTree
BTree
=
BTree
.
BTree
IIBTree
=
IIBTree
.
Bucket
from
intSet
import
intSet
from
BTrees.OOBTree
import
OOBTree
from
BTrees.IIBTree
import
IISet
,
IIBucket
import
operator
from
Splitter
import
Splitter
from
string
import
strip
...
...
@@ -250,7 +250,7 @@ class TextIndex(Persistent):
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
BTree
()
self
.
_index
=
OOBTree
()
#XXX Is this really an IOBTree?
self
.
_syn
=
stop_word_dict
self
.
_reindex
()
else
:
...
...
@@ -261,7 +261,7 @@ class TextIndex(Persistent):
def
clear
(
self
):
self
.
_index
=
BTree
()
self
.
_index
=
OO
BTree
()
def
positions
(
self
,
docid
,
words
):
...
...
@@ -366,7 +366,7 @@ class TextIndex(Persistent):
index
[
word
]
=
r
elif
type
(
r
)
is
dictType
:
if
len
(
r
)
>
4
:
b
=
IIB
Tree
()
b
=
IIB
ucket
()
for
k
,
v
in
r
.
items
():
b
[
k
]
=
v
r
=
b
r
[
id
]
=
score
...
...
@@ -440,7 +440,7 @@ class TextIndex(Persistent):
for
key
in
keys
:
key
=
strip
(
key
)
if
not
key
:
continue
rr
=
int
Set
()
rr
=
II
Set
()
try
:
for
i
,
score
in
query
(
key
,
self
).
items
():
if
score
:
rr
.
insert
(
i
)
...
...
@@ -451,5 +451,5 @@ class TextIndex(Persistent):
r
=
r
.
intersection
(
rr
)
if
r
is
not
None
:
return
r
,
(
id
,)
return
int
Set
(),
(
id
,)
return
II
Set
(),
(
id
,)
lib/python/SearchIndex/UnIndex.py
View file @
e6b5d0c3
...
...
@@ -85,21 +85,25 @@
"""Simple column indices"""
__version__
=
'$Revision: 1.25 $'
[
11
:
-
2
]
__version__
=
'$Revision: 1.26 $'
[
11
:
-
2
]
from
Globals
import
Persistent
from
Acquisition
import
Implicit
import
BTree
import
IOBTree
from
intSet
import
intSet
import
operator
from
Missing
import
MV
import
string
,
pdb
from
zLOG
import
LOG
,
ERROR
from
types
import
*
from
BTrees.OOBTree
import
OOBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IITreeSet
,
IISet
,
union
import
BTrees.Length
import
sys
_marker
=
[]
def
nonEmpty
(
s
):
"returns true if a non-empty string or any other (nonstring) type"
...
...
@@ -115,7 +119,7 @@ class UnIndex(Persistent, Implicit):
meta_type
=
'Field Index'
def
__init__
(
self
,
id
=
None
,
ignore_ex
=
None
,
call_methods
=
None
):
def
__init__
(
self
,
id
,
ignore_ex
=
None
,
call_methods
=
None
):
"""Create an unindex
UnIndexes are indexes that contain two index components, the
...
...
@@ -123,6 +127,11 @@ class UnIndex(Persistent, Implicit):
index. The inverted index is so that objects can be unindexed
even when the old value of the object is not known.
e.g.
self._index = {datum:[documentId1, documentId2]}
self._unindex = {documentId:datum}
The arguments are:
'id' -- the name of the item attribute to index. This is
...
...
@@ -138,23 +147,53 @@ class UnIndex(Persistent, Implicit):
uninded methods for this to work.
"""
######################################################################
# For b/w compatability, have to allow __init__ calls with zero args
if
not
id
==
ignore_ex
==
call_methods
==
None
:
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
# currently unimplimented
self
.
call_methods
=
call_methods
self
.
_index
=
BTree
.
BTree
()
self
.
_unindex
=
IOBTree
.
BTree
()
else
:
pass
self
.
__len__
=
BTrees
.
Length
.
Length
()
# see __len__ method docstring
self
.
clear
()
def
clear
(
self
):
# inplace opportunistic conversion from old-style to new style BTrees
try
:
self
.
__len__
.
set
(
0
)
except
AttributeError
:
self
.
__len__
=
BTrees
.
Length
.
Length
()
self
.
_index
=
OOBTree
()
self
.
_unindex
=
IOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
if
type
(
self
.
_index
)
is
OOBTree
:
return
from
BTrees.convert
import
convert
_index
=
self
.
_index
self
.
_index
=
OOBTree
()
def
convertSet
(
s
,
IITreeSet
=
IITreeSet
):
if
len
(
s
)
==
1
:
try
:
return
s
[
0
]
# convert to int
except
:
pass
# This is just an optimization.
return
IITreeSet
(
s
)
convert
(
_index
,
self
.
_index
,
threshold
,
convertSet
)
_unindex
=
self
.
_unindex
self
.
_unindex
=
IOBTree
()
convert
(
_unindex
,
self
.
_unindex
,
threshold
)
self
.
__len__
=
BTrees
.
Length
.
Length
()
def
__nonzero__
(
self
):
return
not
not
self
.
_unindex
def
__len__
(
self
):
return
len
(
self
.
_unindex
)
"""Return the number of objects indexed.
This method is only called for indexes which have "old" BTrees,
and the *only* reason that UnIndexes maintain a __len__ is for
the searching code in the catalog during sorting.
"""
return
len
(
self
.
_unindex
)
def
histogram
(
self
):
"""Return a mapping which provides a histogram of the number of
...
...
@@ -173,31 +212,39 @@ class UnIndex(Persistent, Implicit):
return
self
.
_unindex
.
keys
()
def
getEntryForObject
(
self
,
documentId
,
default
=
MV
):
def
getEntryForObject
(
self
,
documentId
,
default
=
_marker
):
"""Takes a document ID and returns all the information we have
on that specific object."""
if
default
is
not
MV
:
return
self
.
_unindex
.
get
(
documentId
,
default
)
else
:
if
default
is
_marker
:
return
self
.
_unindex
.
get
(
documentId
)
else
:
return
self
.
_unindex
.
get
(
documentId
,
default
)
def
removeForwardIndexEntry
(
self
,
entry
,
documentId
):
"""Take the entry provided and remove any reference to documentId
in its entry in the index."""
indexRow
=
self
.
_index
.
get
(
entry
,
MV
)
if
indexRow
is
not
MV
:
global
_marker
indexRow
=
self
.
_index
.
get
(
entry
,
_marker
)
if
indexRow
is
not
_marker
:
try
:
indexRow
.
remove
(
documentId
)
if
len
(
indexRow
)
==
0
:
if
not
indexRow
:
del
self
.
_index
[
entry
]
try
:
self
.
__len__
.
change
(
-
1
)
except
AttributeError
:
pass
# pre-BTrees-module instance
except
AttributeError
:
# index row is an int
del
self
.
_index
[
entry
]
try
:
self
.
__len__
.
change
(
-
1
)
except
AttributeError
:
pass
# pre-BTrees-module instance
except
:
LOG
(
self
.
__class__
.
__name__
,
ERROR
,
(
'unindex_object could not remove '
'
integer i
d %s from index %s. This '
'
documentI
d %s from index %s. This '
'should not happen.'
%
(
str
(
documentId
),
str
(
self
.
id
))))
%
(
str
(
documentId
),
str
(
self
.
id
))),
''
,
sys
.
exc_info
())
else
:
LOG
(
self
.
__class__
.
__name__
,
ERROR
,
(
'unindex_object tried to retrieve set %s '
...
...
@@ -210,20 +257,25 @@ class UnIndex(Persistent, Implicit):
in the forward index.
This will also deal with creating the entire row if necessary."""
indexRow
=
self
.
_index
.
get
(
entry
,
MV
)
global
_marker
indexRow
=
self
.
_index
.
get
(
entry
,
_marker
)
# Make sure there's actually a row there already. If not, create
# an IntSet and stuff it in first.
if
indexRow
is
MV
:
self
.
_index
[
entry
]
=
intSet
()
indexRow
=
self
.
_index
[
entry
]
indexRow
.
insert
(
documentId
)
if
indexRow
is
_marker
:
self
.
_index
[
entry
]
=
documentId
try
:
self
.
__len__
.
change
(
1
)
except
AttributeError
:
pass
# pre-BTrees-module instance
else
:
try
:
indexRow
.
insert
(
documentId
)
except
AttributeError
:
# index row is an int
indexRow
=
IITreeSet
((
indexRow
,
documentId
))
self
.
_index
[
entry
]
=
indexRow
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
""" index and object 'obj' with integer id 'documentId'"""
global
_marker
returnStatus
=
0
# First we need to see if there's anything interesting to look at
...
...
@@ -235,14 +287,16 @@ class UnIndex(Persistent, Implicit):
if
callable
(
datum
):
datum
=
datum
()
except
AttributeError
:
datum
=
MV
datum
=
_marker
# We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same.
oldDatum
=
self
.
_unindex
.
get
(
documentId
,
MV
)
if
not
datum
=
=
oldDatum
:
if
oldDatum
is
not
MV
:
oldDatum
=
self
.
_unindex
.
get
(
documentId
,
_marker
)
if
datum
!
=
oldDatum
:
if
oldDatum
is
not
_marker
:
self
.
removeForwardIndexEntry
(
oldDatum
,
documentId
)
if
datum
is
not
_marker
:
self
.
insertForwardIndexEntry
(
datum
,
documentId
)
self
.
_unindex
[
documentId
]
=
datum
...
...
@@ -250,21 +304,24 @@ class UnIndex(Persistent, Implicit):
return
returnStatus
def
unindex_object
(
self
,
documentId
):
""" Unindex the object with integer id 'documentId' and don't
raise an exception if we fail """
unindexRecord
=
self
.
_unindex
.
get
(
documentId
,
None
)
if
unindexRecord
is
None
:
global
_marker
unindexRecord
=
self
.
_unindex
.
get
(
documentId
,
_marker
)
if
unindexRecord
is
_marker
:
return
None
self
.
removeForwardIndexEntry
(
unindexRecord
,
documentId
)
try
:
del
self
.
_unindex
[
documentId
]
except
:
LOG
(
'UnIndex'
,
ERROR
,
'Attempt to unindex nonexistent document'
' with id %s'
%
documentId
)
def
_apply_index
(
self
,
request
,
cid
=
''
):
def
_apply_index
(
self
,
request
,
cid
=
''
,
type
=
type
,
None
=
None
):
"""Apply the index to query parameters given in the argument,
request
...
...
@@ -301,6 +358,7 @@ class UnIndex(Persistent, Implicit):
r
=
None
anyTrue
=
0
opr
=
None
IntType
=
type
(
1
)
if
request
.
has_key
(
id
+
'_usage'
):
# see if any usage params are sent to field
...
...
@@ -321,10 +379,7 @@ class UnIndex(Persistent, Implicit):
setlist
=
index
.
items
(
lo
)
for
k
,
set
in
setlist
:
if
r
is
None
:
r
=
set
else
:
r
=
r
.
union
(
set
)
r
=
union
(
r
,
set
)
except
KeyError
:
pass
...
...
@@ -334,16 +389,18 @@ class UnIndex(Persistent, Implicit):
for
key
in
keys
:
if
nonEmpty
(
key
):
anyTrue
=
1
set
=
get
(
key
)
set
=
get
(
key
,
None
)
if
set
is
not
None
:
if
r
is
None
:
r
=
set
else
:
r
=
r
.
union
(
set
)
r
=
union
(
r
,
set
)
if
type
(
r
)
is
IntType
:
r
=
IISet
((
r
,))
if
r
:
return
r
,
(
id
,)
if
r
is
None
:
if
anyTrue
:
r
=
int
Set
()
r
=
II
Set
()
else
:
return
None
...
...
@@ -369,8 +426,9 @@ class UnIndex(Persistent, Implicit):
name
=
self
.
id
elif
name
!=
self
.
id
:
return
[]
if
not
withLengths
:
return
tuple
(
filter
(
nonEmpty
,
self
.
_index
.
keys
())
filter
(
nonEmpty
,
self
.
_index
.
keys
())
)
else
:
rl
=
[]
...
...
@@ -379,10 +437,8 @@ class UnIndex(Persistent, Implicit):
else
:
rl
.
append
((
i
,
len
(
self
.
_index
[
i
])))
return
tuple
(
rl
)
def
keyForDocument
(
self
,
id
):
return
self
.
_unindex
(
id
)
def
clear
(
self
):
self
.
_index
=
BTree
.
BTree
()
self
.
_unindex
=
IOBTree
.
BTree
()
def
items
(
self
):
return
self
.
_index
.
items
()
lib/python/SearchIndex/UnKeywordIndex.py
View file @
e6b5d0c3
...
...
@@ -83,10 +83,10 @@
#
##############################################################################
from
UnIndex
import
UnIndex
,
MV
,
intSet
from
UnIndex
import
UnIndex
from
zLOG
import
LOG
,
ERROR
from
Missing
import
MV
from
types
import
*
from
types
import
StringType
from
BTrees.OOBTree
import
OOSet
,
difference
class
UnKeywordIndex
(
UnIndex
):
...
...
@@ -111,69 +111,54 @@ class UnKeywordIndex(UnIndex):
# self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable,
# we'll do so.
try
:
newKeywords
=
getattr
(
obj
,
self
.
id
)
newKeywords
=
getattr
(
obj
,
self
.
id
,
None
)
if
callable
(
newKeywords
):
newKeywords
=
newKeywords
()
except
AttributeError
:
newKeywords
=
MV
if
type
(
newKeywords
)
is
StringType
:
newKeywords
=
(
newKeywords
,
)
if
newKeywords
is
None
:
self
.
unindex_object
(
documentId
)
return
0
# Now comes the fun part, we need to figure out what's changed
# if anything from the previous record.
oldKeywords
=
self
.
_unindex
.
get
(
documentId
,
MV
)
oldKeywords
=
self
.
_unindex
.
get
(
documentId
,
None
)
if
newKeywords
is
MV
:
self
.
unindex_object
(
documentId
)
return
0
elif
oldKeywords
is
MV
:
if
oldKeywords
is
None
:
try
:
for
kw
in
newKeywords
:
self
.
insertForwardIndexEntry
(
kw
,
documentId
)
except
TypeError
:
return
0
else
:
# We need the old keywords to be a mapping so we can manipulate
# them more easily.
tmp
=
{}
try
:
for
kw
in
oldKeywords
:
tmp
[
kw
]
=
None
oldKeywords
=
tmp
# Now we're going to go through the new keywords,
# and add those that aren't already indexed. If
# they are already indexed, just delete them from
# the list.
for
kw
in
newKeywords
:
if
oldKeywords
.
has_key
(
kw
):
del
oldKeywords
[
kw
]
else
:
if
type
(
oldKeywords
)
is
not
OOSet
:
oldKeywords
=
OOSet
(
oldKeywords
)
newKeywords
=
OOSet
(
newKeywords
)
self
.
unindex_objectKeywords
(
documentId
,
difference
(
oldKeywords
,
newKeywords
))
for
kw
in
difference
(
newKeywords
,
oldKeywords
):
self
.
insertForwardIndexEntry
(
kw
,
documentId
)
# Now whatever is left in oldKeywords are keywords
# that we no longer have, and need to be removed
# from the indexes.
for
kw
in
oldKeywords
.
keys
():
self
.
removeForwardIndexEntry
(
kw
,
documentId
)
except
TypeError
:
return
0
self
.
_unindex
[
documentId
]
=
newKeywords
[:]
# Make a copy
self
.
_unindex
[
documentId
]
=
list
(
newKeywords
)
return
1
def
unindex_object
(
self
,
documentId
):
def
unindex_object
Keywords
(
self
,
documentId
,
keywords
):
""" carefully unindex the object with integer id 'documentId'"""
keywords
=
self
.
_unindex
.
get
(
documentId
,
MV
)
if
keywords
is
MV
:
return
None
if
keywords
is
not
None
:
for
kw
in
keywords
:
self
.
removeForwardIndexEntry
(
kw
,
documentId
)
def
unindex_object
(
self
,
documentId
):
""" carefully unindex the object with integer id 'documentId'"""
keywords
=
self
.
_unindex
.
get
(
documentId
,
None
)
self
.
unindex_objectKeywords
(
documentId
,
keywords
)
try
:
del
self
.
_unindex
[
documentId
]
except
KeyError
:
LOG
(
'UnKeywordIndex'
,
ERROR
,
'Attempt to unindex nonexistent'
' document id %s'
%
documentId
)
lib/python/SearchIndex/UnTextIndex.py
View file @
e6b5d0c3
...
...
@@ -91,14 +91,11 @@ undo information so that objects can be unindexed when the old value
is no longer known.
"""
__version__
=
'$Revision: 1.3
7
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.3
8
$'
[
11
:
-
2
]
import
BTree
,
IIBTree
,
IOBTree
,
OIBTree
import
string
,
regex
,
regsub
,
ts_regex
import
operator
from
intSet
import
intSet
from
Globals
import
Persistent
from
Acquisition
import
Implicit
from
Splitter
import
Splitter
...
...
@@ -107,10 +104,11 @@ from Lexicon import Lexicon
from
ResultList
import
ResultList
from
types
import
*
BTree
=
BTree
.
BTree
# Regular generic BTree
IOBTree
=
IOBTree
.
BTree
# Integer -> Object
IIBucket
=
IIBTree
.
Bucket
# Integer -> Integer
OIBTree
=
OIBTree
.
BTree
# Object -> Integer
from
BTrees.IOBTree
import
IOBTree
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IIBTree
import
IIBTree
,
IIBucket
,
IISet
,
IITreeSet
from
BTrees.IIBTree
import
difference
,
weightedIntersection
AndNot
=
'andnot'
And
=
'and'
...
...
@@ -141,7 +139,7 @@ class UnTextIndex(Persistent, Implicit):
meta_type
=
'Text Index'
def
__init__
(
self
,
id
=
None
,
ignore_ex
=
None
,
def
__init__
(
self
,
id
,
ignore_ex
=
None
,
call_methods
=
None
,
lexicon
=
None
):
"""Create an index
...
...
@@ -159,15 +157,11 @@ class UnTextIndex(Persistent, Implicit):
'lexicon' is the lexicon object to specify, if None, the
index will use a private lexicon."""
if
not
id
==
ignore_ex
==
call_methods
==
None
:
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
else
:
pass
self
.
clear
()
if
lexicon
is
None
:
## if no lexicon is provided, create a default one
...
...
@@ -185,32 +179,55 @@ class UnTextIndex(Persistent, Implicit):
in this way, but I don't see too much of a problem with it."""
if
type
(
vocab_id
)
is
not
StringType
:
vocab
=
vocab_id
return
vocab_id
else
:
vocab
=
getattr
(
self
,
vocab_id
)
return
vocab
.
lexicon
def
__nonzero__
(
self
):
return
not
not
self
.
_unindex
def
__len__
(
self
):
"""Return the number of objects indexed."""
return
len
(
self
.
_unindex
)
# Too expensive
#def __len__(self):
# """Return the number of objects indexed."""
#
return len(self._unindex)
def
clear
(
self
):
"""Reinitialize the text index."""
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
if
type
(
self
.
_index
)
is
IOBTree
:
return
from
BTrees.convert
import
convert
_index
=
self
.
_index
self
.
_index
=
IOBTree
()
def
histogram
(
self
):
def
convertScores
(
scores
,
type
=
type
,
TupleType
=
TupleType
,
IIBTree
=
IIBTree
):
if
type
(
scores
)
is
not
TupleType
and
type
(
scores
)
is
not
IIBTree
():
scores
=
IIBTree
(
scores
)
return
scores
convert
(
_index
,
self
.
_index
,
threshold
,
convertScores
)
_unindex
=
self
.
_unindex
self
.
_unindex
=
IOBTree
()
convert
(
_unindex
,
self
.
_unindex
,
threshold
)
def
histogram
(
self
,
type
=
type
,
TupleType
=
type
(())):
"""Return a mapping which provides a histogram of the number of
elements found at each point in the index."""
histogram
=
{}
histogram
=
IIBucket
()
for
(
key
,
value
)
in
self
.
_index
.
items
():
entry
=
len
(
value
)
if
type
(
value
)
is
TupleType
:
entry
=
1
else
:
entry
=
len
(
value
)
histogram
[
entry
]
=
histogram
.
get
(
entry
,
0
)
+
1
return
histogram
...
...
@@ -227,13 +244,8 @@ class UnTextIndex(Persistent, Implicit):
if
results
is
None
:
return
default
else
:
# Now that we've got them, let's resolve out the word
# references
resolved
=
[]
for
(
word
,
wordId
)
in
wordMap
:
if
wordId
in
results
:
resolved
.
append
(
word
)
return
tuple
(
resolved
)
return
tuple
(
map
(
self
.
getLexicon
(
self
.
_lexicon
).
getWord
,
results
))
def
insertForwardIndexEntry
(
self
,
entry
,
documentId
,
score
=
1
):
...
...
@@ -247,7 +259,8 @@ class UnTextIndex(Persistent, Implicit):
5+ bucket.
"""
indexRow
=
self
.
_index
.
get
(
entry
,
None
)
index
=
self
.
_index
indexRow
=
index
.
get
(
entry
,
None
)
if
indexRow
is
not
None
:
if
type
(
indexRow
)
is
TupleType
:
...
...
@@ -260,78 +273,29 @@ class UnTextIndex(Persistent, Implicit):
if
indexRow
[
0
]
==
documentId
:
if
indexRow
[
1
]
!=
score
:
indexRow
=
(
documentId
,
score
)
index
[
entry
]
=
indexRow
else
:
indexRow
=
{
indexRow
[
0
]:
indexRow
[
1
]
}
indexRow
[
documentId
]
=
score
self
.
_index
[
entry
]
=
indexRow
elif
type
(
indexRow
)
is
DictType
:
if
indexRow
.
has_key
(
documentId
):
if
indexRow
[
documentId
]
==
score
:
return
1
# No need to update
elif
len
(
indexRow
)
>
4
:
# We have a mapping (dictionary), but it has
# grown too large, so we'll convert it to a
# bucket.
newRow
=
IIBucket
()
for
(
k
,
v
)
in
indexRow
.
items
():
newRow
[
k
]
=
v
indexRow
=
newRow
indexRow
[
documentId
]
=
score
self
.
_index
[
entry
]
=
indexRow
indexRow
=
{
indexRow
[
0
]:
indexRow
[
1
],
documentId
:
score
,
}
index
[
entry
]
=
indexRow
else
:
if
indexRow
.
get
(
documentId
,
-
1
)
!=
score
:
# score changed (or new entry)
if
type
(
indexRow
)
is
DictType
:
indexRow
[
documentId
]
=
score
if
len
(
indexRow
)
>
3
:
# Big enough to give it's own database record
indexRow
=
IIBTree
(
indexRow
)
index
[
entry
]
=
indexRow
else
:
# We've got a IIBucket already.
if
indexRow
.
has_key
(
documentId
):
if
indexRow
[
documentId
]
==
score
:
return
1
indexRow
[
documentId
]
=
score
else
:
# We don't have any information at this point, so we'll
# put our first entry in, and use a tuple to save space
self
.
_index
[
entry
]
=
(
documentId
,
score
)
return
1
def
insertReverseIndexEntry
(
self
,
entry
,
documentId
):
"""Insert the correct entry into the reverse indexes for future
unindexing."""
newRow
=
self
.
_unindex
.
get
(
documentId
,
[])
if
newRow
:
# Catch cases where we don't need to modify anything
if
entry
in
newRow
:
return
1
newRow
.
append
(
entry
)
self
.
_unindex
[
documentId
]
=
newRow
def
removeReverseEntry
(
self
,
entry
,
documentId
):
"""Removes a single entry from the reverse index."""
newRow
=
self
.
_unindex
.
get
(
documentId
,
[])
if
newRow
:
try
:
newRow
.
remove
(
entry
)
except
ValueError
:
pass
# We don't have it, this is bad
self
.
_unindex
[
documentId
]
=
newRow
def
removeForwardEntry
(
self
,
entry
,
documentId
):
"""Remove a single entry from the forward index."""
currentRow
=
self
.
_index
.
get
(
entry
,
None
)
if
type
(
currentRow
)
is
TupleType
:
del
self
.
_index
[
entry
]
elif
currentRow
is
not
None
:
try
:
del
self
.
_index
[
entry
][
documentId
]
except
(
KeyError
,
IndexError
,
TypeError
):
LOG
(
'UnTextIndex'
,
ERROR
,
'unindex_object tried to unindex nonexistent'
' document %s'
%
str
(
documentId
))
index
[
entry
]
=
(
documentId
,
score
)
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
""" Index an object:
...
...
@@ -354,49 +318,58 @@ class UnTextIndex(Persistent, Implicit):
except
AttributeError
:
return
0
lexicon
=
self
.
getLexicon
(
self
.
_lexicon
)
splitter
=
lexicon
.
Splitter
sourceWords
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
source
)
wordList
=
OIBTree
()
wordScores
=
OIBTree
()
last
=
None
# Run through the words and score them
for
word
in
s
ourceWords
:
for
word
in
s
plitter
(
source
)
:
if
word
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
word
[
1
:
-
1
],
wordList
,
wordList
.
has_key
,
last
)
# XXX
else
:
if
wordList
.
has_key
(
word
):
if
word
!=
last
:
wordList
[
word
]
=
wordList
[
word
]
+
1
last
=
self
.
_subindex
(
word
[
1
:
-
1
],
wordScores
,
last
,
splitter
)
else
:
wordList
[
word
]
=
1
if
word
==
last
:
continue
last
=
word
wordScores
[
word
]
=
wordScores
.
get
(
word
,
0
)
+
1
lexicon
=
self
.
getLexicon
(
self
.
_lexicon
)
currentWordIds
=
self
.
_unindex
.
get
(
documentId
,
[])
wordCount
=
0
# Convert scores to use wids:
widScores
=
IIBucket
()
getWid
=
lexicon
.
getWordId
for
word
,
score
in
wordScores
.
items
():
widScores
[
getWid
(
word
)]
=
score
del
wordScores
currentWids
=
IISet
(
self
.
_unindex
.
get
(
documentId
,
[]))
# First deal with deleted words
# To do this, the first thing we have to do is convert the
# existing words to words, from wordIDS
wordListAsIds
=
OIBTree
()
for
word
,
score
in
wordList
.
items
():
wordListAsIds
[
lexicon
.
getWordId
(
word
)]
=
score
# Get rid of document words that are no longer indexed
self
.
unindex_objectWids
(
documentId
,
difference
(
currentWids
,
widScores
))
for
word
in
currentWordIds
:
if
not
wordListAsIds
.
has_key
(
word
):
self
.
removeForwardEntry
(
word
,
documentId
)
# Now index the words. Note that the new xIBTrees are clever
# enough to do nothing when there isn't a change. Woo hoo.
insert
=
self
.
insertForwardIndexEntry
for
wid
,
score
in
widScores
.
items
():
insert
(
wid
,
documentId
,
score
)
#import pdb; pdb.set_trace()
# Now we can deal with new/updated entries
for
wordId
,
score
in
wordListAsIds
.
items
():
self
.
insertForwardIndexEntry
(
wordId
,
documentId
,
score
)
self
.
insertReverseIndexEntry
(
wordId
,
documentId
)
wordCount
=
wordCount
+
1
# Save the unindexing info if it's changed:
wids
=
widScores
.
keys
()
if
wids
!=
currentWids
.
keys
():
self
.
_unindex
[
documentId
]
=
wids
# Return the number of words you indexed
return
wordCount
return
len
(
wids
)
def
_subindex
(
self
,
source
,
wordScores
,
last
,
splitter
):
"""Recursively handle multi-word synonyms"""
for
word
in
splitter
(
source
):
if
word
[
0
]
==
'
\
"
'
:
last
=
self
.
_subindex
(
word
[
1
:
-
1
],
wordScores
,
last
,
splitter
)
else
:
if
word
==
last
:
continue
last
=
word
wordScores
[
word
]
=
wordScores
.
get
(
word
,
0
)
+
1
return
last
def
unindex_object
(
self
,
i
):
""" carefully unindex document with integer id 'i' from the text
...
...
@@ -404,21 +377,41 @@ class UnTextIndex(Persistent, Implicit):
index
=
self
.
_index
unindex
=
self
.
_unindex
val
=
unindex
.
get
(
i
,
None
)
if
val
is
not
None
:
for
n
in
val
:
v
=
index
.
get
(
n
,
None
)
if
type
(
v
)
is
TupleType
:
del
index
[
n
]
elif
v
is
not
None
:
wids
=
unindex
.
get
(
i
,
None
)
if
wids
is
not
None
:
self
.
unindex_objectWids
(
i
,
wids
)
del
unindex
[
i
]
def
unindex_objectWids
(
self
,
i
,
wids
):
""" carefully unindex document with integer id 'i' from the text
index and do not fail if it does not exist """
index
=
self
.
_index
get
=
index
.
get
for
wid
in
wids
:
widScores
=
get
(
wid
,
None
)
if
widScores
is
None
:
LOG
(
'UnTextIndex'
,
ERROR
,
'unindex_object tried to unindex nonexistent'
' document, wid %s, %s'
%
(
i
,
wid
))
continue
if
type
(
widScores
)
is
TupleType
:
del
index
[
wid
]
else
:
try
:
del
index
[
n
][
i
]
del
widScores
[
i
]
if
widScores
:
if
type
(
widScores
)
is
DictType
:
if
len
(
widScores
)
==
1
:
# convert to tuple
widScores
=
widScores
.
items
()[
0
]
index
[
wid
]
=
widScores
else
:
del
index
[
wid
]
except
(
KeyError
,
IndexError
,
TypeError
):
LOG
(
'UnTextIndex'
,
ERROR
,
'unindex_object tried to unindex nonexistent'
' document %s'
%
str
(
i
))
del
unindex
[
i
]
def
__getitem__
(
self
,
word
):
"""Return an InvertedIndex-style result "list"
...
...
@@ -442,12 +435,13 @@ class UnTextIndex(Persistent, Implicit):
if
splitSource
[:
1
]
==
'"'
and
splitSource
[
-
1
:]
==
'"'
:
return
self
[
splitSource
]
r
=
self
.
_index
.
get
(
self
.
getLexicon
(
self
.
_lexicon
).
get
(
splitSource
)[
0
],
None
)
wids
=
self
.
getLexicon
(
self
.
_lexicon
).
get
(
splitSource
)
if
wids
:
r
=
self
.
_index
.
get
(
wids
[
0
],
None
)
if
r
is
None
:
r
=
{}
else
:
r
=
{}
return
ResultList
(
r
,
(
splitSource
,),
self
)
...
...
@@ -486,6 +480,7 @@ class UnTextIndex(Persistent, Implicit):
if
not
keys
or
not
string
.
strip
(
keys
):
return
None
keys
=
[
keys
]
r
=
None
for
key
in
keys
:
...
...
@@ -493,21 +488,12 @@ class UnTextIndex(Persistent, Implicit):
if
not
key
:
continue
rr
=
IIBucket
()
try
:
for
i
,
score
in
self
.
query
(
key
).
items
():
if
score
:
rr
[
i
]
=
score
except
KeyError
:
pass
if
r
is
None
:
r
=
rr
else
:
# Note that we *and*/*narrow* multiple search terms.
r
=
r
.
intersection
(
rr
)
b
=
self
.
query
(
key
).
bucket
()
w
,
r
=
weightedIntersection
(
r
,
b
)
if
r
is
not
None
:
return
r
,
(
self
.
id
,)
return
(
IIBucket
(),
(
self
.
id
,))
...
...
@@ -533,19 +519,6 @@ class UnTextIndex(Persistent, Implicit):
return
r
def
_subindex
(
self
,
isrc
,
d
,
old
,
last
):
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
isrc
)
for
s
in
src
:
if
s
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
s
[
1
:
-
1
],
d
,
old
,
last
)
else
:
if
old
(
s
):
if
s
!=
last
:
d
[
s
]
=
d
[
s
]
+
1
else
:
d
[
s
]
=
1
return
last
def
query
(
self
,
s
,
default_operator
=
Or
,
ws
=
(
string
.
whitespace
,)):
""" This is called by TextIndexes. A 'query term' which is a
...
...
@@ -565,7 +538,6 @@ class UnTextIndex(Persistent, Implicit):
## For example, substitute wildcards, or translate words into
## various languages.
q
=
self
.
getLexicon
(
self
.
_lexicon
).
query_hook
(
q
)
# do some more parsing
q
=
parse2
(
q
,
default_operator
)
...
...
lib/python/SearchIndex/randid.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
#############################################################################
import
whrandom
def
randid
(
randint
=
whrandom
.
randint
,
choice
=
whrandom
.
choice
,
signs
=
(
-
1
,
1
)):
return
choice
(
signs
)
*
randint
(
1
,
2000000000
)
del
whrandom
lib/python/SearchIndex/tests/__init__.py
0 → 100644
View file @
e6b5d0c3
# This helps debugging.
lib/python/SearchIndex/tests/testSplitter.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
sys
try
:
import
ZODB
except
:
import
os
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
sys
.
path
.
insert
(
0
,
'../..'
)
import
ZODB
import
unittest
from
SearchIndex.Splitter
import
Splitter
class
TestSplitter
(
unittest
.
TestCase
):
def
testSplitNormalText
(
self
):
text
=
'this is a long string of words'
a
=
Splitter
(
text
)
r
=
map
(
None
,
a
)
assert
r
==
[
'this'
,
'is'
,
'long'
,
'string'
,
'of'
,
'words'
]
def
testDropNumeric
(
self
):
text
=
'123 456 789 foobar without you nothing'
a
=
Splitter
(
text
)
r
=
map
(
None
,
a
)
assert
r
==
[
'foobar'
,
'without'
,
'you'
,
'nothing'
],
r
def
testDropSingleLetterWords
(
self
):
text
=
'without you I nothing'
a
=
Splitter
(
text
)
r
=
map
(
None
,
a
)
assert
r
==
[
'without'
,
'you'
,
'nothing'
],
r
def
testSplitOnNonAlpha
(
self
):
text
=
'without you I
\
'
m nothing'
a
=
Splitter
(
text
)
r
=
map
(
None
,
a
)
assert
r
==
[
'without'
,
'you'
,
'nothing'
],
r
def
test_suite
():
return
unittest
.
makeSuite
(
TestSplitter
,
'test'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
())
def
debug
():
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
lib/python/SearchIndex/tests/testUnKeywordIndex.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
os
,
sys
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
try
:
import
unittest
except
:
sys
.
path
[
0
]
=
os
.
path
.
join
(
sys
.
path
[
0
],
'..'
,
'..'
)
import
unittest
import
ZODB
from
SearchIndex.UnKeywordIndex
import
UnKeywordIndex
class
Dummy
:
def
__init__
(
self
,
foo
):
self
.
_foo
=
foo
def
foo
(
self
):
return
self
.
_foo
def
__str__
(
self
):
return
'<Dummy: %s>'
%
self
.
_foo
__repr__
=
__str__
class
TestCase
(
unittest
.
TestCase
):
"""
Test KeywordIndex objects.
"""
def
setUp
(
self
):
"""
"""
self
.
_index
=
UnKeywordIndex
(
'foo'
)
self
.
_marker
=
[]
self
.
_values
=
[
(
0
,
Dummy
(
[
'a'
]
)
)
,
(
1
,
Dummy
(
[
'a'
,
'b'
]
)
)
,
(
2
,
Dummy
(
[
'a'
,
'b'
,
'c'
]
)
)
,
(
3
,
Dummy
(
[
'a'
,
'b'
,
'c'
,
'a'
]
)
)
,
(
4
,
Dummy
(
[
'a'
,
'b'
,
'c'
,
'd'
]
)
)
,
(
5
,
Dummy
(
[
'a'
,
'b'
,
'c'
,
'e'
]
)
)
,
(
6
,
Dummy
(
[
'a'
,
'b'
,
'c'
,
'e'
,
'f'
]
))
,
(
7
,
Dummy
(
[
0
]
)
)
]
self
.
_noop_req
=
{
'bar'
:
123
}
self
.
_all_req
=
{
'foo'
:
[
'a'
]
}
self
.
_some_req
=
{
'foo'
:
[
'e'
]
}
self
.
_overlap_req
=
{
'foo'
:
[
'c'
,
'e'
]
}
self
.
_string_req
=
{
'foo'
:
'a'
}
self
.
_zero_req
=
{
'foo'
:
[
0
]
}
def
tearDown
(
self
):
"""
"""
def
_populateIndex
(
self
):
for
k
,
v
in
self
.
_values
:
self
.
_index
.
index_object
(
k
,
v
)
def
_checkApply
(
self
,
req
,
expectedValues
):
result
,
used
=
self
.
_index
.
_apply_index
(
req
)
assert
used
==
(
'foo'
,
)
try
:
length
=
len
(
result
)
except
:
result
=
result
.
keys
()
length
=
len
(
result
)
assert
length
==
len
(
expectedValues
),
\
'%s | %s'
%
(
map
(
None
,
result
),
map
(
lambda
x
:
x
[
0
],
expectedValues
))
for
k
,
v
in
expectedValues
:
assert
k
in
result
def
testEmpty
(
self
):
assert
len
(
self
.
_index
)
==
0
assert
len
(
self
.
_index
.
referencedObjects
()
)
==
0
assert
self
.
_index
.
getEntryForObject
(
1234
)
is
None
assert
(
self
.
_index
.
getEntryForObject
(
1234
,
self
.
_marker
)
is
self
.
_marker
),
self
.
_index
.
getEntryForObject
(
1234
)
self
.
_index
.
unindex_object
(
1234
)
# nothrow
assert
self
.
_index
.
hasUniqueValuesFor
(
'foo'
)
assert
not
self
.
_index
.
hasUniqueValuesFor
(
'bar'
)
assert
len
(
self
.
_index
.
uniqueValues
(
'foo'
)
)
==
0
assert
self
.
_index
.
_apply_index
(
self
.
_noop_req
)
is
None
self
.
_checkApply
(
self
.
_all_req
,
[]
)
self
.
_checkApply
(
self
.
_some_req
,
[]
)
self
.
_checkApply
(
self
.
_overlap_req
,
[]
)
self
.
_checkApply
(
self
.
_string_req
,
[]
)
def
testPopulated
(
self
):
self
.
_populateIndex
()
values
=
self
.
_values
#assert len( self._index ) == len( values )
assert
len
(
self
.
_index
.
referencedObjects
()
)
==
len
(
values
)
assert
self
.
_index
.
getEntryForObject
(
1234
)
is
None
assert
(
self
.
_index
.
getEntryForObject
(
1234
,
self
.
_marker
)
is
self
.
_marker
)
self
.
_index
.
unindex_object
(
1234
)
# nothrow
for
k
,
v
in
values
:
assert
self
.
_index
.
getEntryForObject
(
k
)
==
v
.
foo
()
assert
(
len
(
self
.
_index
.
uniqueValues
(
'foo'
)
)
==
len
(
values
)
-
1
,
len
(
values
)
-
1
)
assert
self
.
_index
.
_apply_index
(
self
.
_noop_req
)
is
None
self
.
_checkApply
(
self
.
_all_req
,
values
[:
-
1
])
self
.
_checkApply
(
self
.
_some_req
,
values
[
5
:
7
]
)
self
.
_checkApply
(
self
.
_overlap_req
,
values
[
2
:
7
]
)
self
.
_checkApply
(
self
.
_string_req
,
values
[:
-
1
]
)
def
testZero
(
self
):
self
.
_populateIndex
()
values
=
self
.
_values
self
.
_checkApply
(
self
.
_zero_req
,
values
[
-
1
:
]
)
assert
0
in
self
.
_index
.
uniqueValues
(
'foo'
)
def
test_suite
():
return
unittest
.
makeSuite
(
TestCase
)
if
__name__
==
'__main__'
:
unittest
.
TextTestRunner
().
run
(
test_suite
()
)
lib/python/SearchIndex/tests/testUnTextIndex.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
sys
,
os
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
try
:
import
unittest
except
:
sys
.
path
[
0
]
=
os
.
path
.
join
(
sys
.
path
[
0
],
'..'
,
'..'
)
import
unittest
class
Dummy
:
def
__init__
(
self
,
**
kw
):
self
.
__dict__
.
update
(
kw
)
import
zLOG
def
log_write
(
subsystem
,
severity
,
summary
,
detail
,
error
):
if
severity
>=
zLOG
.
PROBLEM
:
assert
0
,
"%s(%s): %s"
%
(
subsystem
,
severity
,
summary
)
zLOG
.
log_write
=
log_write
import
ZODB
,
ZODB
.
DemoStorage
,
ZODB
.
FileStorage
import
SearchIndex.UnTextIndex
import
SearchIndex.GlobbingLexicon
class
Tests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
index
=
SearchIndex
.
UnTextIndex
.
UnTextIndex
(
'text'
)
self
.
doc
=
Dummy
(
text
=
'this is the time, when all good zopes'
)
def
dbopen
(
self
):
n
=
'fs_tmp__%s'
%
os
.
getpid
()
s
=
ZODB
.
FileStorage
.
FileStorage
(
n
)
db
=
self
.
db
=
ZODB
.
DB
(
s
)
self
.
jar
=
db
.
open
()
if
not
self
.
jar
.
root
().
has_key
(
'index'
):
self
.
jar
.
root
()[
'index'
]
=
SearchIndex
.
UnTextIndex
.
UnTextIndex
(
'text'
)
get_transaction
().
commit
()
return
self
.
jar
.
root
()[
'index'
]
def
dbclose
(
self
):
self
.
jar
.
close
()
self
.
db
.
close
()
del
self
.
jar
del
self
.
db
def
tearDown
(
self
):
get_transaction
().
abort
()
if
hasattr
(
self
,
'jar'
):
self
.
dbclose
()
os
.
system
(
'rm -f fs_tmp__*'
)
def
checkSimpleAddDelete
(
self
):
"Check that we can add and delete an object without error"
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
index_object
(
1
,
self
.
doc
)
self
.
doc
.
text
=
'spam is good, spam is fine, span span span'
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
unindex_object
(
0
)
def
checkPersistentUpdate1
(
self
):
"Check simple persistent indexing"
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
],
r
def
checkPersistentUpdate2
(
self
):
"Check less simple persistent indexing"
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'the next task is to test'
index
.
index_object
(
3
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time time'
index
.
index_object
(
2
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
2
],
r
sample_texts
=
[
"""This is the time for all good men to come to
the aid of their country"""
,
"""ask not what your country can do for you,
ask what you can do for your country"""
,
"""Man, I can't wait to get to Montross!"""
,
"""Zope Public License (ZPL) Version 1.0"""
,
"""Copyright (c) Digital Creations. All rights reserved."""
,
"""This license has been certified as Open Source(tm)."""
,
"""I hope I get to work on time"""
,
]
def
checkGlobQuery
(
self
):
"Check a glob query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'm*n'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
2
],
r
def
checkAndQuery
(
self
):
"Check an AND query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time and country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,],
r
def
checkOrQuery
(
self
):
"Check an OR query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time or country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
6
],
r
def
test_suite
():
return
unittest
.
makeSuite
(
Tests
,
'check'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
())
def
debug
():
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
lib/python/SearchIndex/tests/test_UnIndex.py
View file @
e6b5d0c3
...
...
@@ -82,7 +82,16 @@
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
Zope
import
sys
sys
.
path
.
insert
(
0
,
'.'
)
try
:
import
Testing
except
ImportError
:
sys
.
path
[
0
]
=
'../../'
import
Testing
import
ZODB
import
unittest
from
SearchIndex.UnIndex
import
UnIndex
...
...
@@ -117,7 +126,7 @@ class TestCase( unittest.TestCase ):
,
(
5
,
Dummy
(
'abce'
)
)
,
(
6
,
Dummy
(
'abce'
)
)
,
(
7
,
Dummy
(
0
)
)
# Collector #1959
]
,
(
8
,
Dummy
(
None
)
)
]
self
.
_forward
=
{}
self
.
_backward
=
{}
for
k
,
v
in
self
.
_values
:
...
...
@@ -137,6 +146,7 @@ class TestCase( unittest.TestCase ):
,
'foo_usage'
:
'range:min:max'
}
self
.
_zero_req
=
{
'foo'
:
0
}
self
.
_none_req
=
{
'foo'
:
None
}
def
tearDown
(
self
):
...
...
@@ -149,6 +159,8 @@ class TestCase( unittest.TestCase ):
def
_checkApply
(
self
,
req
,
expectedValues
):
result
,
used
=
self
.
_index
.
_apply_index
(
req
)
if
hasattr
(
result
,
'keys'
):
result
=
result
.
keys
()
assert
used
==
(
'foo'
,
)
assert
len
(
result
)
==
len
(
expectedValues
),
\
'%s | %s'
%
(
map
(
None
,
result
),
expectedValues
)
...
...
@@ -177,10 +189,11 @@ class TestCase( unittest.TestCase ):
self
.
_checkApply
(
self
.
_range_req
,
[]
)
def
testPopulated
(
self
):
""" Test a populated FieldIndex """
self
.
_populateIndex
()
values
=
self
.
_values
assert
len
(
self
.
_index
)
==
len
(
values
)
assert
len
(
self
.
_index
)
==
len
(
values
)
-
1
#'abce' is duplicate
assert
len
(
self
.
_index
.
referencedObjects
()
)
==
len
(
values
)
assert
self
.
_index
.
getEntryForObject
(
1234
)
is
None
...
...
@@ -195,21 +208,62 @@ class TestCase( unittest.TestCase ):
assert
self
.
_index
.
_apply_index
(
self
.
_noop_req
)
is
None
self
.
_checkApply
(
self
.
_request
,
values
[
-
3
:
-
1
]
)
self
.
_checkApply
(
self
.
_min_req
,
values
[
2
:
-
1
]
)
self
.
_checkApply
(
self
.
_max_req
,
values
[
:
3
]
+
values
[
-
1
:
]
)
self
.
_checkApply
(
self
.
_request
,
values
[
-
4
:
-
2
]
)
self
.
_checkApply
(
self
.
_min_req
,
values
[
2
:
-
2
]
)
self
.
_checkApply
(
self
.
_max_req
,
values
[
:
3
]
+
values
[
-
2
:
]
)
self
.
_checkApply
(
self
.
_range_req
,
values
[
2
:
5
]
)
def
testZero
(
self
):
""" Make sure 0 gets indexed """
self
.
_populateIndex
()
values
=
self
.
_values
self
.
_checkApply
(
self
.
_zero_req
,
values
[
-
1
:
]
)
self
.
_checkApply
(
self
.
_zero_req
,
values
[
-
2
:
-
1
]
)
assert
0
in
self
.
_index
.
uniqueValues
(
'foo'
)
def
testNone
(
self
):
""" make sure None gets indexed """
self
.
_populateIndex
()
values
=
self
.
_values
self
.
_checkApply
(
self
.
_none_req
,
values
[
-
1
:])
assert
None
in
self
.
_index
.
uniqueValues
(
'foo'
)
def
testRange
(
self
):
"""Test a range search"""
index
=
UnIndex
(
'foo'
)
for
i
in
range
(
100
):
index
.
index_object
(
i
,
Dummy
(
i
%
10
))
r
=
index
.
_apply_index
({
'foo_usage'
:
'range:min:max'
,
'foo'
:
[
-
99
,
3
]})
assert
tuple
(
r
[
1
])
==
(
'foo'
,),
r
[
1
]
r
=
list
(
r
[
0
].
keys
())
expect
=
[
0
,
1
,
2
,
3
,
10
,
11
,
12
,
13
,
20
,
21
,
22
,
23
,
30
,
31
,
32
,
33
,
40
,
41
,
42
,
43
,
50
,
51
,
52
,
53
,
60
,
61
,
62
,
63
,
70
,
71
,
72
,
73
,
80
,
81
,
82
,
83
,
90
,
91
,
92
,
93
]
assert
r
==
expect
,
r
def
test_suite
():
return
unittest
.
makeSuite
(
TestCase
)
def
debug
():
return
test_suite
().
debug
()
if
__name__
==
'__main__'
:
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
()
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
lib/python/Testing/__init__.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Set up testing environment
$Id: __init__.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import
os
,
sys
startfrom
=
head
=
os
.
getcwd
()
while
1
:
sys
.
path
[
0
]
=
startfrom
try
:
import
ZODB
except
ImportError
:
head
=
os
.
path
.
split
(
startfrom
)[
0
]
if
head
==
''
:
raise
"Couldn't import ZODB"
startfrom
=
head
continue
else
:
break
os
.
environ
[
'SOFTWARE_HOME'
]
=
os
.
environ
.
get
(
'SOFTWARE_HOME'
,
startfrom
)
os
.
environ
[
'INSTANCE_HOME'
]
=
os
.
environ
.
get
(
'INSTANCE_HOME'
,
os
.
path
.
join
(
os
.
environ
[
'SOFTWARE_HOME'
],
'..'
,
'..'
)
)
lib/python/Testing/dispatcher.py
0 → 100644
View file @
e6b5d0c3
#!/usr/bin/env python1.5
# Dispatcher for usage inside Zope test environment
# Digital Creations
__version__
=
'$Id: dispatcher.py,v 1.2 2001/03/15 13:16:25 jim Exp $'
import
os
,
sys
,
re
,
string
import
threading
,
time
,
commands
,
profile
class
Dispatcher
:
"""
a multi-purpose thread dispatcher
"""
def
__init__
(
self
,
func
=
''
):
self
.
fp
=
sys
.
stderr
self
.
f_startup
=
[]
self
.
f_teardown
=
[]
self
.
lastlog
=
""
self
.
lock
=
threading
.
Lock
()
self
.
func
=
func
self
.
profiling
=
0
self
.
doc
=
getattr
(
self
,
self
.
func
).
__doc__
def
setlog
(
self
,
fp
):
self
.
fp
=
fp
def
log
(
self
,
s
):
if
s
==
self
.
lastlog
:
return
self
.
fp
.
write
(
s
)
self
.
fp
.
flush
()
self
.
lastlog
=
s
def
logn
(
self
,
s
):
if
s
==
self
.
lastlog
:
return
self
.
fp
.
write
(
s
+
'
\
n
'
)
self
.
fp
.
flush
()
self
.
lastlog
=
s
def
profiling_on
():
self
.
profiling
=
1
def
profiling_off
():
self
.
profiling
=
0
def
dispatcher
(
self
,
name
=
''
,
*
params
):
""" dispatcher for threads
The dispatcher expects one or several tupels:
(functionname, number of threads to start , args, keyword args)
"""
self
.
mem_usage
=
[
-
1
]
mem_watcher
=
threading
.
Thread
(
None
,
self
.
mem_watcher
,
name
=
'memwatcher'
)
mem_watcher
.
start
()
self
.
start_test
=
time
.
time
()
self
.
name
=
name
self
.
th_data
=
{}
self
.
runtime
=
{}
self
.
_threads
=
[]
s2s
=
self
.
s2s
for
func
,
numthreads
,
args
,
kw
in
params
:
f
=
getattr
(
self
,
func
)
for
i
in
range
(
0
,
numthreads
):
kw
[
't_func'
]
=
func
th
=
threading
.
Thread
(
None
,
self
.
worker
,
name
=
"TH_%s_%03d"
%
(
func
,
i
)
,
args
=
args
,
kwargs
=
kw
)
self
.
_threads
.
append
(
th
)
for
th
in
self
.
_threads
:
th
.
start
()
while
threading
.
activeCount
()
>
1
:
time
.
sleep
(
1
)
self
.
logn
(
'ID: %s '
%
self
.
name
)
self
.
logn
(
'FUNC: %s '
%
self
.
func
)
self
.
logn
(
'DOC: %s '
%
self
.
doc
)
self
.
logn
(
'Args: %s'
%
params
)
for
th
in
self
.
_threads
:
self
.
logn
(
'%-30s ........................ %9.3f sec'
%
(
th
.
getName
(),
self
.
runtime
[
th
.
getName
()])
)
for
k
,
v
in
self
.
th_data
[
th
.
getName
()].
items
():
self
.
logn
(
'%-30s %-15s = %s'
%
(
' '
,
k
,
v
)
)
self
.
logn
(
""
)
self
.
logn
(
'Complete running time: %9.3f sec'
%
(
time
.
time
()
-
self
.
start_test
)
)
if
len
(
self
.
mem_usage
)
>
1
:
self
.
mem_usage
.
remove
(
-
1
)
self
.
logn
(
"Memory: start: %s, end: %s, low: %s, high: %s"
%
\
(
s2s
(
self
.
mem_usage
[
0
]),
s2s
(
self
.
mem_usage
[
-
1
]),
s2s
(
min
(
self
.
mem_usage
)),
s2s
(
max
(
self
.
mem_usage
))))
self
.
logn
(
''
)
def
worker
(
self
,
*
args
,
**
kw
):
for
func
in
self
.
f_startup
:
f
=
getattr
(
self
,
func
)()
t_func
=
getattr
(
self
,
kw
[
't_func'
])
del
kw
[
't_func'
]
ts
=
time
.
time
()
apply
(
t_func
,
args
,
kw
)
te
=
time
.
time
()
for
func
in
self
.
f_teardown
:
getattr
(
self
,
func
)()
def
th_setup
(
self
):
""" initalize thread with some environment data """
env
=
{
'start'
:
time
.
time
()
}
return
env
def
th_teardown
(
self
,
env
,
**
kw
):
""" famous last actions of thread """
self
.
lock
.
acquire
()
self
.
th_data
[
threading
.
currentThread
().
getName
()
]
=
kw
self
.
runtime
[
threading
.
currentThread
().
getName
()
]
=
time
.
time
()
-
env
[
'start'
]
self
.
lock
.
release
()
def
getmem
(
self
):
""" try to determine the current memory usage """
if
not
sys
.
platform
in
[
'linux2'
]:
return
None
cmd
=
'/bin/ps --no-headers -o pid,vsize --pid %s'
%
os
.
getpid
()
outp
=
commands
.
getoutput
(
cmd
)
pid
,
vsize
=
filter
(
lambda
x
:
x
!=
""
,
string
.
split
(
outp
,
" "
)
)
data
=
open
(
"/proc/%d/statm"
%
os
.
getpid
()).
read
()
fields
=
re
.
split
(
" "
,
data
)
mem
=
string
.
atoi
(
fields
[
0
])
*
4096
return
mem
def
mem_watcher
(
self
):
""" thread for watching memory usage """
running
=
1
while
running
==
1
:
self
.
mem_usage
.
append
(
self
.
getmem
()
)
time
.
sleep
(
1
)
if
threading
.
activeCount
()
==
2
:
running
=
0
def
register_startup
(
self
,
func
):
self
.
f_startup
.
append
(
func
)
def
register_teardown
(
self
,
func
):
self
.
f_teardown
.
append
(
func
)
def
s2s
(
self
,
n
):
import
math
if
n
<
1024.0
:
return
"%8.3lf Bytes"
%
n
if
n
<
1024.0
*
1024.0
:
return
"%8.3lf KB"
%
(
1.0
*
n
/
1024.0
)
if
n
<
1024.0
*
1024.0
*
1024.0
:
return
"%8.3lf MB"
%
(
1.0
*
n
/
1024.0
/
1024.0
)
else
:
return
n
if
__name__
==
"__main__"
:
d
=
Dispatcher
()
print
d
.
getmem
()
pass
lib/python/Testing/makerequest.py
0 → 100644
View file @
e6b5d0c3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
"""
Facilitates unit tests which requires an acquirable REQUEST from
ZODB objects
Usage:
import makerequest
app = makerequest.makerequest(Zope.app())
$Id: makerequest.py,v 1.2 2001/03/15 13:16:25 jim Exp $
"""
import
os
from
os
import
environ
from
sys
import
stdin
from
ZPublisher.HTTPRequest
import
HTTPRequest
from
ZPublisher.HTTPResponse
import
HTTPResponse
from
ZPublisher.BaseRequest
import
RequestContainer
def
makerequest
(
app
):
resp
=
HTTPResponse
()
environ
[
'SERVER_NAME'
]
=
'foo'
environ
[
'SERVER_PORT'
]
=
'80'
environ
[
'REQUEST_METHOD'
]
=
'GET'
req
=
HTTPRequest
(
stdin
,
environ
,
resp
)
return
app
.
__of__
(
RequestContainer
(
REQUEST
=
req
))
lib/python/Testing/unittest.py
0 → 100755
View file @
e6b5d0c3
#!/usr/bin/env python
"""
Python unit testing framework, based on Erich Gamma's JUnit and Kent Beck's
Smalltalk testing framework.
Further information is available in the bundled documentation, and from
http://pyunit.sourceforge.net/
This module contains the core framework classes that form the basis of
specific test cases and suites (TestCase, TestSuite etc.), and also a
text-based utility class for running the tests and reporting the results
(TextTestRunner).
Copyright (c) 1999, 2000, 2001 Steve Purcell
This module is free software, and you may redistribute it and/or modify
it under the same terms as Python itself, so long as this copyright message
and disclaimer are retained in their original form.
IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
__author__
=
"Steve Purcell"
__email__
=
"stephen_purcell@yahoo.com"
__version__
=
"$Revision: 1.2 $"
[
11
:
-
2
]
import
time
import
sys
import
traceback
import
string
import
os
##############################################################################
# A platform-specific concession to help the code work for JPython users
##############################################################################
plat
=
string
.
lower
(
sys
.
platform
)
_isJPython
=
string
.
find
(
plat
,
'java'
)
>=
0
or
string
.
find
(
plat
,
'jdk'
)
>=
0
del
plat
##############################################################################
# Test framework core
##############################################################################
class
TestResult
:
"""Holder for test result information.
Test results are automatically managed by the TestCase and TestSuite
classes, and do not need to be explicitly manipulated by writers of tests.
Each instance holds the total number of tests run, and collections of
failures and errors that occurred among those test runs. The collections
contain tuples of (testcase, exceptioninfo), where exceptioninfo is a
tuple of values as returned by sys.exc_info().
"""
def
__init__
(
self
,
args
=
(),
kw
=
{}):
self
.
failures
=
[]
self
.
errors
=
[]
self
.
testsRun
=
0
self
.
shouldStop
=
0
self
.
__args
=
args
self
.
__kw
=
kw
def
startTest
(
self
,
test
):
"Called when the given test is about to be run"
self
.
testsRun
=
self
.
testsRun
+
1
def
stopTest
(
self
,
test
):
"Called when the given test has been run"
pass
def
addError
(
self
,
test
,
err
):
"Called when an error has occurred"
self
.
errors
.
append
((
test
,
err
))
def
addFailure
(
self
,
test
,
err
):
"Called when a failure has occurred"
self
.
failures
.
append
((
test
,
err
))
def
wasSuccessful
(
self
):
"Tells whether or not this result was a success"
return
len
(
self
.
failures
)
==
len
(
self
.
errors
)
==
0
def
stop
(
self
):
"Indicates that the tests should be aborted"
self
.
shouldStop
=
1
def
__repr__
(
self
):
return
"<%s run=%i errors=%i failures=%i>"
%
\
(
self
.
__class__
,
self
.
testsRun
,
len
(
self
.
errors
),
len
(
self
.
failures
))
class
TestCase
:
"""A class whose instances are single test cases.
Test authors should subclass TestCase for their own tests. Construction
and deconstruction of the test's environment ('fixture') can be
implemented by overriding the 'setUp' and 'tearDown' methods respectively.
By default, the test code itself should be placed in a method named
'runTest'.
If the fixture may be used for many test cases, create as
many test methods as are needed. When instantiating such a TestCase
subclass, specify in the constructor arguments the name of the test method
that the instance is to execute.
If it is necessary to override the __init__ method, the base class
__init__ method must always be called.
"""
def
__init__
(
self
,
methodName
=
'runTest'
,
*
args
,
**
kw
):
"""Create an instance of the class that will use the named test
method when executed. Raises a ValueError if the instance does
not have a method with the specified name.
"""
try
:
self
.
__testMethodName
=
methodName
testMethod
=
getattr
(
self
,
methodName
)
self
.
__testMethodDoc
=
testMethod
.
__doc__
except
AttributeError
:
raise
ValueError
,
"no such test method in %s: %s"
%
\
(
self
.
__class__
,
methodName
)
self
.
__args
=
args
self
.
__kw
=
kw
def
setUp
(
self
):
"Hook method for setting up the test fixture before exercising it."
pass
def
tearDown
(
self
):
"Hook method for deconstructing the test fixture after testing it."
pass
def
countTestCases
(
self
):
return
1
def
defaultTestResult
(
self
):
return
TestResult
(
self
.
__args
,
self
.
__kw
)
def
shortDescription
(
self
):
"""Returns a one-line description of the test, or None if no
description has been provided.
The default implementation of this method returns the first line of
the specified test method's docstring.
"""
doc
=
self
.
__testMethodDoc
return
doc
and
string
.
strip
(
string
.
split
(
doc
,
"
\
n
"
)[
0
])
or
None
def
id
(
self
):
return
"%s.%s"
%
(
self
.
__class__
,
self
.
__testMethodName
)
def
__str__
(
self
):
return
"%s (%s)"
%
(
self
.
__testMethodName
,
self
.
__class__
)
def
__repr__
(
self
):
return
"<%s testMethod=%s>"
%
\
(
self
.
__class__
,
self
.
__testMethodName
)
def
run
(
self
,
result
=
None
):
return
self
(
result
)
def
__call__
(
self
,
result
=
None
):
if
result
is
None
:
result
=
self
.
defaultTestResult
()
result
.
startTest
(
self
)
testMethod
=
getattr
(
self
,
self
.
__testMethodName
)
try
:
try
:
self
.
setUp
()
except
:
result
.
addError
(
self
,
self
.
__exc_info
())
return
try
:
apply
(
testMethod
,
self
.
__args
,
self
.
__kw
)
except
AssertionError
,
e
:
result
.
addFailure
(
self
,
self
.
__exc_info
())
except
:
result
.
addError
(
self
,
self
.
__exc_info
())
try
:
self
.
tearDown
()
except
:
result
.
addError
(
self
,
self
.
__exc_info
())
finally
:
result
.
stopTest
(
self
)
def
debug
(
self
):
"""Run the test without collecting errors in a TestResult"""
self
.
setUp
()
getattr
(
self
,
self
.
__testMethodName
)()
self
.
tearDown
()
def
assert_
(
self
,
expr
,
msg
=
None
):
"""Equivalent of built-in 'assert', but is not optimised out when
__debug__ is false.
"""
if
not
expr
:
raise
AssertionError
,
msg
failUnless
=
assert_
def
failIf
(
self
,
expr
,
msg
=
None
):
"Fail the test if the expression is true."
apply
(
self
.
assert_
,(
not
expr
,
msg
))
def
assertRaises
(
self
,
excClass
,
callableObj
,
*
args
,
**
kwargs
):
"""Assert that an exception of class excClass is thrown
by callableObj when invoked with arguments args and keyword
arguments kwargs. If a different type of exception is
thrown, it will not be caught, and the test case will be
deemed to have suffered an error, exactly as for an
unexpected exception.
"""
try
:
apply
(
callableObj
,
args
,
kwargs
)
except
excClass
:
return
else
:
if
hasattr
(
excClass
,
'__name__'
):
excName
=
excClass
.
__name__
else
:
excName
=
str
(
excClass
)
raise
AssertionError
,
excName
def
assertEqual
(
self
,
first
,
second
,
msg
=
None
):
"""Assert that the two objects are equal as determined by the '=='
operator.
"""
self
.
assert_
((
first
==
second
),
msg
or
'%s != %s'
%
(
first
,
second
))
def
fail
(
self
,
msg
=
None
):
"""Fail immediately, with the given message."""
raise
AssertionError
,
msg
def
__exc_info
(
self
):
"""Return a version of sys.exc_info() with the traceback frame
minimised; usually the top level of the traceback frame is not
needed.
"""
exctype
,
excvalue
,
tb
=
sys
.
exc_info
()
newtb
=
tb
.
tb_next
if
newtb
is
None
:
return
(
exctype
,
excvalue
,
tb
)
return
(
exctype
,
excvalue
,
newtb
)
class
TestSuite
:
"""A test suite is a composite test consisting of a number of TestCases.
For use, create an instance of TestSuite, then add test case instances.
When all tests have been added, the suite can be passed to a test
runner, such as TextTestRunner. It will run the individual test cases
in the order in which they were added, aggregating the results. When
subclassing, do not forget to call the base class constructor.
"""
def
__init__
(
self
,
tests
=
()):
self
.
_tests
=
[]
self
.
addTests
(
tests
)
def
__repr__
(
self
):
return
"<%s tests=%s>"
%
(
self
.
__class__
,
self
.
_tests
)
__str__
=
__repr__
def
countTestCases
(
self
):
cases
=
0
for
test
in
self
.
_tests
:
cases
=
cases
+
test
.
countTestCases
()
return
cases
def
addTest
(
self
,
test
):
self
.
_tests
.
append
(
test
)
def
addTests
(
self
,
tests
):
for
test
in
tests
:
self
.
addTest
(
test
)
def
run
(
self
,
result
):
return
self
(
result
)
def
__call__
(
self
,
result
):
for
test
in
self
.
_tests
:
if
result
.
shouldStop
:
break
test
(
result
)
return
result
def
debug
(
self
):
"""Run the tests without collecting errors in a TestResult"""
for
test
in
self
.
_tests
:
test
.
debug
()
class
FunctionTestCase
(
TestCase
):
"""A test case that wraps a test function.
This is useful for slipping pre-existing test functions into the
PyUnit framework. Optionally, set-up and tidy-up functions can be
supplied. As with TestCase, the tidy-up ('tearDown') function will
always be called if the set-up ('setUp') function ran successfully.
"""
def
__init__
(
self
,
testFunc
,
setUp
=
None
,
tearDown
=
None
,
description
=
None
):
TestCase
.
__init__
(
self
)
self
.
__setUpFunc
=
setUp
self
.
__tearDownFunc
=
tearDown
self
.
__testFunc
=
testFunc
self
.
__description
=
description
def
setUp
(
self
):
if
self
.
__setUpFunc
is
not
None
:
self
.
__setUpFunc
()
def
tearDown
(
self
):
if
self
.
__tearDownFunc
is
not
None
:
self
.
__tearDownFunc
()
def
runTest
(
self
):
self
.
__testFunc
()
def
id
(
self
):
return
self
.
__testFunc
.
__name__
def
__str__
(
self
):
return
"%s (%s)"
%
(
self
.
__class__
,
self
.
__testFunc
.
__name__
)
def
__repr__
(
self
):
return
"<%s testFunc=%s>"
%
(
self
.
__class__
,
self
.
__testFunc
)
def
shortDescription
(
self
):
if
self
.
__description
is
not
None
:
return
self
.
__description
doc
=
self
.
__testFunc
.
__doc__
return
doc
and
string
.
strip
(
string
.
split
(
doc
,
"
\
n
"
)[
0
])
or
None
##############################################################################
# Convenience functions
##############################################################################
def
getTestCaseNames
(
testCaseClass
,
prefix
,
sortUsing
=
cmp
):
"""Extracts all the names of functions in the given test case class
and its base classes that start with the given prefix. This is used
by makeSuite().
"""
testFnNames
=
filter
(
lambda
n
,
p
=
prefix
:
n
[:
len
(
p
)]
==
p
,
dir
(
testCaseClass
))
for
baseclass
in
testCaseClass
.
__bases__
:
testFnNames
=
testFnNames
+
\
getTestCaseNames
(
baseclass
,
prefix
,
sortUsing
=
None
)
if
sortUsing
:
testFnNames
.
sort
(
sortUsing
)
return
testFnNames
def
makeSuite
(
testCaseClass
,
prefix
=
'test'
,
sortUsing
=
cmp
,
suiteClass
=
TestSuite
):
"""Returns a TestSuite instance built from all of the test functions
in the given test case class whose names begin with the given
prefix. The cases are sorted by their function names
using the supplied comparison function, which defaults to 'cmp'.
"""
cases
=
map
(
testCaseClass
,
getTestCaseNames
(
testCaseClass
,
prefix
,
sortUsing
))
return
suiteClass
(
cases
)
def
findTestCases
(
module
,
prefix
=
'test'
,
sortUsing
=
cmp
,
suiteClass
=
TestSuite
):
import
types
tests
=
[]
for
name
in
dir
(
module
):
obj
=
getattr
(
module
,
name
)
if
type
(
obj
)
==
types
.
ClassType
and
issubclass
(
obj
,
TestCase
):
tests
.
append
(
makeSuite
(
obj
,
prefix
=
prefix
,
sortUsing
=
sortUsing
,
suiteClass
=
suiteClass
))
return
suiteClass
(
tests
)
def
createTestInstance
(
name
,
module
=
None
,
suiteClass
=
TestSuite
):
"""Finds tests by their name, optionally only within the given module.
Return the newly-constructed test, ready to run. If the name contains a ':'
then the portion of the name after the colon is used to find a specific
test case within the test case class named before the colon.
Examples:
findTest('examples.listtests.suite')
-- returns result of calling 'suite'
findTest('examples.listtests.ListTestCase:checkAppend')
-- returns result of calling ListTestCase('checkAppend')
findTest('examples.listtests.ListTestCase:check-')
-- returns result of calling makeSuite(ListTestCase, prefix="check")
"""
spec
=
string
.
split
(
name
,
':'
)
if
len
(
spec
)
>
2
:
raise
ValueError
,
"illegal test name: %s"
%
name
if
len
(
spec
)
==
1
:
testName
=
spec
[
0
]
caseName
=
None
else
:
testName
,
caseName
=
spec
parts
=
string
.
split
(
testName
,
'.'
)
if
module
is
None
:
if
len
(
parts
)
<
2
:
raise
ValueError
,
"incomplete test name: %s"
%
name
constructor
=
__import__
(
string
.
join
(
parts
[:
-
1
],
'.'
))
parts
=
parts
[
1
:]
else
:
constructor
=
module
for
part
in
parts
:
constructor
=
getattr
(
constructor
,
part
)
if
not
callable
(
constructor
):
raise
ValueError
,
"%s is not a callable object"
%
constructor
if
caseName
:
if
caseName
[
-
1
]
==
'-'
:
prefix
=
caseName
[:
-
1
]
if
not
prefix
:
raise
ValueError
,
"prefix too short: %s"
%
name
test
=
makeSuite
(
constructor
,
prefix
=
prefix
,
suiteClass
=
suiteClass
)
else
:
test
=
constructor
(
caseName
)
else
:
test
=
constructor
()
if
not
hasattr
(
test
,
"countTestCases"
):
raise
TypeError
,
\
"object %s found with spec %s is not a test"
%
(
test
,
name
)
return
test
##############################################################################
# Text UI
##############################################################################
class
_WritelnDecorator
:
"""Used to decorate file-like objects with a handy 'writeln' method"""
def
__init__
(
self
,
stream
):
self
.
stream
=
stream
if
_isJPython
:
import
java.lang.System
self
.
linesep
=
java
.
lang
.
System
.
getProperty
(
"line.separator"
)
else
:
self
.
linesep
=
os
.
linesep
def
__getattr__
(
self
,
attr
):
return
getattr
(
self
.
stream
,
attr
)
def
writeln
(
self
,
*
args
):
if
args
:
apply
(
self
.
write
,
args
)
self
.
write
(
self
.
linesep
)
class
_JUnitTextTestResult
(
TestResult
):
"""A test result class that can print formatted text results to a stream.
Used by JUnitTextTestRunner.
"""
def
__init__
(
self
,
stream
):
self
.
stream
=
stream
TestResult
.
__init__
(
self
)
def
addError
(
self
,
test
,
error
):
TestResult
.
addError
(
self
,
test
,
error
)
self
.
stream
.
write
(
'E'
)
self
.
stream
.
flush
()
if
error
[
0
]
is
KeyboardInterrupt
:
self
.
shouldStop
=
1
def
addFailure
(
self
,
test
,
error
):
TestResult
.
addFailure
(
self
,
test
,
error
)
self
.
stream
.
write
(
'F'
)
self
.
stream
.
flush
()
def
startTest
(
self
,
test
):
TestResult
.
startTest
(
self
,
test
)
self
.
stream
.
write
(
'.'
)
self
.
stream
.
flush
()
def
printNumberedErrors
(
self
,
errFlavour
,
errors
):
if
not
errors
:
return
if
len
(
errors
)
==
1
:
self
.
stream
.
writeln
(
"There was 1 %s:"
%
errFlavour
)
else
:
self
.
stream
.
writeln
(
"There were %i %ss:"
%
(
len
(
errors
),
errFlavour
))
i
=
1
for
test
,
error
in
errors
:
errString
=
string
.
join
(
apply
(
traceback
.
format_exception
,
error
),
""
)
self
.
stream
.
writeln
(
"%i) %s"
%
(
i
,
test
))
self
.
stream
.
writeln
(
errString
)
i
=
i
+
1
def
printErrors
(
self
):
self
.
printNumberedErrors
(
"error"
,
self
.
errors
)
def
printFailures
(
self
):
self
.
printNumberedErrors
(
"failure"
,
self
.
failures
)
def
printHeader
(
self
):
self
.
stream
.
writeln
()
if
self
.
wasSuccessful
():
self
.
stream
.
writeln
(
"OK (%i tests)"
%
self
.
testsRun
)
else
:
self
.
stream
.
writeln
(
"!!!FAILURES!!!"
)
self
.
stream
.
writeln
(
"Test Results"
)
self
.
stream
.
writeln
()
self
.
stream
.
writeln
(
"Run: %i ; Failures: %i ; Errors: %i"
%
(
self
.
testsRun
,
len
(
self
.
failures
),
len
(
self
.
errors
)))
def
printResult
(
self
):
self
.
printHeader
()
self
.
printErrors
()
self
.
printFailures
()
class
JUnitTextTestRunner
:
"""A test runner class that displays results in textual form.
The display format approximates that of JUnit's 'textui' test runner.
This test runner may be removed in a future version of PyUnit.
"""
def
__init__
(
self
,
stream
=
sys
.
stderr
):
self
.
stream
=
_WritelnDecorator
(
stream
)
def
run
(
self
,
test
):
"Run the given test case or test suite."
result
=
_JUnitTextTestResult
(
self
.
stream
)
startTime
=
time
.
time
()
test
(
result
)
stopTime
=
time
.
time
()
self
.
stream
.
writeln
()
self
.
stream
.
writeln
(
"Time: %.3fs"
%
float
(
stopTime
-
startTime
))
result
.
printResult
()
return
result
##############################################################################
# Verbose text UI
##############################################################################
class
_VerboseTextTestResult
(
TestResult
):
"""A test result class that can print formatted text results to a stream.
Used by VerboseTextTestRunner.
"""
def
__init__
(
self
,
stream
,
descriptions
):
TestResult
.
__init__
(
self
)
self
.
stream
=
stream
self
.
lastFailure
=
None
self
.
descriptions
=
descriptions
def
startTest
(
self
,
test
):
TestResult
.
startTest
(
self
,
test
)
if
self
.
descriptions
:
self
.
stream
.
write
(
test
.
shortDescription
()
or
str
(
test
))
else
:
self
.
stream
.
write
(
str
(
test
))
self
.
stream
.
write
(
" ... "
)
def
stopTest
(
self
,
test
):
TestResult
.
stopTest
(
self
,
test
)
if
self
.
lastFailure
is
not
test
:
self
.
stream
.
writeln
(
"ok"
)
def
addError
(
self
,
test
,
err
):
TestResult
.
addError
(
self
,
test
,
err
)
self
.
_printError
(
"ERROR"
,
test
,
err
)
self
.
lastFailure
=
test
if
err
[
0
]
is
KeyboardInterrupt
:
self
.
shouldStop
=
1
def
addFailure
(
self
,
test
,
err
):
TestResult
.
addFailure
(
self
,
test
,
err
)
self
.
_printError
(
"FAIL"
,
test
,
err
)
self
.
lastFailure
=
test
def
_printError
(
self
,
flavour
,
test
,
err
):
errLines
=
[]
separator1
=
"
\
t
"
+
'='
*
70
separator2
=
"
\
t
"
+
'-'
*
70
if
not
self
.
lastFailure
is
test
:
self
.
stream
.
writeln
()
self
.
stream
.
writeln
(
separator1
)
self
.
stream
.
writeln
(
"
\
t
%s"
%
flavour
)
self
.
stream
.
writeln
(
separator2
)
for
line
in
apply
(
traceback
.
format_exception
,
err
):
for
l
in
string
.
split
(
line
,
"
\
n
"
)[:
-
1
]:
self
.
stream
.
writeln
(
"
\
t
%s"
%
l
)
self
.
stream
.
writeln
(
separator1
)
class
VerboseTextTestRunner
:
"""A test runner class that displays results in textual form.
It prints out the names of tests as they are run, errors as they
occur, and a summary of the results at the end of the test run.
"""
def
__init__
(
self
,
stream
=
sys
.
stderr
,
descriptions
=
1
):
self
.
stream
=
_WritelnDecorator
(
stream
)
self
.
descriptions
=
descriptions
def
run
(
self
,
test
):
"Run the given test case or test suite."
result
=
_VerboseTextTestResult
(
self
.
stream
,
self
.
descriptions
)
startTime
=
time
.
time
()
test
(
result
)
stopTime
=
time
.
time
()
timeTaken
=
float
(
stopTime
-
startTime
)
self
.
stream
.
writeln
(
"-"
*
78
)
run
=
result
.
testsRun
self
.
stream
.
writeln
(
"Ran %d test%s in %.3fs"
%
(
run
,
run
>
1
and
"s"
or
""
,
timeTaken
))
self
.
stream
.
writeln
()
if
not
result
.
wasSuccessful
():
self
.
stream
.
write
(
"FAILED ("
)
failed
,
errored
=
map
(
len
,
(
result
.
failures
,
result
.
errors
))
if
failed
:
self
.
stream
.
write
(
"failures=%d"
%
failed
)
if
errored
:
if
failed
:
self
.
stream
.
write
(
", "
)
self
.
stream
.
write
(
"errors=%d"
%
errored
)
self
.
stream
.
writeln
(
")"
)
else
:
self
.
stream
.
writeln
(
"OK"
)
return
result
# Which flavour of TextTestRunner is the default?
TextTestRunner
=
VerboseTextTestRunner
##############################################################################
# Facilities for running tests from the command line
##############################################################################
class
TestProgram
:
"""A command-line program that runs a set of tests; this is primarily
for making test modules conveniently executable.
"""
USAGE
=
"""
\
Usage: %(progName)s [-h|--help] [test[:(casename|prefix-)]] [...]
Examples:
%(progName)s - run default set of tests
%(progName)s MyTestSuite - run suite 'MyTestSuite'
%(progName)s MyTestCase:checkSomething - run MyTestCase.checkSomething
%(progName)s MyTestCase:check- - run all 'check*' test methods
in MyTestCase
"""
def
__init__
(
self
,
module
=
'__main__'
,
defaultTest
=
None
,
argv
=
None
,
testRunner
=
None
,
suiteClass
=
TestSuite
):
if
type
(
module
)
==
type
(
''
):
self
.
module
=
__import__
(
module
)
for
part
in
string
.
split
(
module
,
'.'
)[
1
:]:
self
.
module
=
getattr
(
self
.
module
,
part
)
else
:
self
.
module
=
module
if
argv
is
None
:
argv
=
sys
.
argv
self
.
defaultTest
=
defaultTest
self
.
testRunner
=
testRunner
self
.
suiteClass
=
suiteClass
self
.
progName
=
os
.
path
.
basename
(
argv
[
0
])
self
.
parseArgs
(
argv
)
self
.
runTests
()
def
usageExit
(
self
,
msg
=
None
):
if
msg
:
print
msg
print
self
.
USAGE
%
self
.
__dict__
sys
.
exit
(
2
)
def
parseArgs
(
self
,
argv
):
import
getopt
try
:
options
,
args
=
getopt
.
getopt
(
argv
[
1
:],
'hH'
,
[
'help'
])
opts
=
{}
for
opt
,
value
in
options
:
if
opt
in
(
'-h'
,
'-H'
,
'--help'
):
self
.
usageExit
()
if
len
(
args
)
==
0
and
self
.
defaultTest
is
None
:
self
.
test
=
findTestCases
(
self
.
module
,
suiteClass
=
self
.
suiteClass
)
return
if
len
(
args
)
>
0
:
self
.
testNames
=
args
else
:
self
.
testNames
=
(
self
.
defaultTest
,)
self
.
createTests
()
except
getopt
.
error
,
msg
:
self
.
usageExit
(
msg
)
def
createTests
(
self
):
tests
=
[]
for
testName
in
self
.
testNames
:
tests
.
append
(
createTestInstance
(
testName
,
self
.
module
,
suiteClass
=
self
.
suiteClass
))
self
.
test
=
self
.
suiteClass
(
tests
)
def
runTests
(
self
):
if
self
.
testRunner
is
None
:
self
.
testRunner
=
TextTestRunner
()
result
=
self
.
testRunner
.
run
(
self
.
test
)
sys
.
exit
(
not
result
.
wasSuccessful
())
main
=
TestProgram
##############################################################################
# Executing this module from the command line
##############################################################################
if
__name__
==
"__main__"
:
main
(
module
=
None
)
lib/python/unittest.py
View file @
e6b5d0c3
...
...
@@ -30,7 +30,7 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
"""
__author__
=
"Steve Purcell (stephen_purcell@yahoo.com)"
__version__
=
"$Revision: 1.
20
$"
[
11
:
-
2
]
__version__
=
"$Revision: 1.
1.4.1
$"
[
11
:
-
2
]
import
time
import
sys
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment