Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
81625f82
Commit
81625f82
authored
Jan 23, 2001
by
Christopher Petrilli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Merging in Catalog changes for the lexicon.
parent
381d6e48
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
153 additions
and
116 deletions
+153
-116
doc/CHANGES.txt
doc/CHANGES.txt
+9
-0
lib/python/Products/ZCatalog/ZCatalog.py
lib/python/Products/ZCatalog/ZCatalog.py
+26
-8
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+29
-50
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+89
-58
No files found.
doc/CHANGES.txt
View file @
81625f82
...
...
@@ -19,6 +19,15 @@ Zope changes
hook to create PythonScripts (for MIMEtype 'text/x-python')
and DTMLMethods (for other 'text' MIMEtypes) (Collector #998).
Bugs Fixed
- Mechanisms in the underbelly of the Catalog and Globbing
Lexicon (which is the default for all new Catalogs) has been
overhauled given substantial performance increases. On
simple queries, performance should double (or more) in many
situations, whereas with globbed queries it may increase by
substantially more.
Zope 2.3.0 beta 1
Features Added
...
...
lib/python/Products/ZCatalog/ZCatalog.py
View file @
81625f82
...
...
@@ -101,9 +101,9 @@ from Catalog import Catalog, orify
from
SearchIndex
import
UnIndex
,
UnTextIndex
from
Vocabulary
import
Vocabulary
import
IOBTree
from
Shared.DC.ZRDB.TM
import
TM
from
AccessControl
import
getSecurityManager
manage_addZCatalogForm
=
DTMLFile
(
'dtml/addZCatalog'
,
globals
())
def
manage_addZCatalog
(
self
,
id
,
title
,
vocab_id
=
None
,
REQUEST
=
None
):
...
...
@@ -217,7 +217,8 @@ class ZCatalog(Folder, Persistent, Implicit):
threshold
=
10000
_v_total
=
0
_v_transaction
=
None
def
__init__
(
self
,
id
,
title
=
''
,
vocab_id
=
None
,
container
=
None
):
self
.
id
=
id
self
.
title
=
title
...
...
@@ -401,14 +402,31 @@ class ZCatalog(Folder, Persistent, Implicit):
def
catalog_object
(
self
,
obj
,
uid
):
""" wrapper around catalog """
self
.
_v_total
=
(
self
.
_v_total
+
self
.
_catalog
.
catalogObject
(
obj
,
uid
,
self
.
threshold
))
self
.
_catalog
.
catalogObject
(
obj
,
uid
,
None
)
# None passed in to catalogObject as third argument indicates
# that we shouldn't try to commit subtransactions within any
# indexing code. We throw away the result of the call to
# catalogObject (which is a word count), because it's
# worthless to us here.
if
self
.
threshold
is
not
None
:
# figure out whether or not to commit a subtransaction.
t
=
id
(
get_transaction
())
if
t
!=
self
.
_v_transaction
:
self
.
_v_total
=
0
self
.
_v_transaction
=
t
self
.
_v_total
=
self
.
_v_total
+
1
# increment the _v_total counter for this thread only and get
# a reference to the current transaction.
# the _v_total counter is zeroed if we notice that we're in
# a different transaction than the last one that came by.
# self.threshold represents the number of times that
# catalog_object needs to be called in order for the catalog
# to commit a subtransaction. The semantics here mean that
# we should commit a subtransaction if our threshhold is
# exceeded within the boundaries of the current transaction.
if
self
.
_v_total
>
self
.
threshold
:
# commit a subtransaction
get_transaction
().
commit
(
1
)
# kick the chache, this may be overkill but ya never know
self
.
_p_jar
.
cacheFullSweep
(
1
)
self
.
_v_total
=
0
...
...
@@ -692,7 +710,7 @@ class ZCatalog(Folder, Persistent, Implicit):
'%s unchanged.'
%
(
len
(
fixed
),
len
(
removed
),
unchanged
),
action
=
'./manage_main'
)
Globals
.
default__class_init__
(
ZCatalog
)
...
...
lib/python/SearchIndex/GlobbingLexicon.py
View file @
81625f82
...
...
@@ -83,28 +83,22 @@
#
##############################################################################
import
string
,
regex
,
ts_regex
import
regsub
from
Lexicon
import
Lexicon
__doc__
=
""" Lexicon object that supports
"""
from
Lexicon
import
Lexicon
from
Splitter
import
Splitter
from
Persistence
import
Persistent
from
Acquisition
import
Implicit
import
OIBTree
,
BTree
,
IOBTree
from
intSet
import
intSet
OIBTree
=
OIBTree
.
BTree
OOBTree
=
BTree
.
BTree
IOBTree
=
IOBTree
.
BTree
import
re
from
UnTextIndex
import
Or
import
re
,
time
import
OIBTree
,
BTree
,
IOBTree
,
IIBTree
OIBTree
=
OIBTree
.
BTree
# Object -> Integer
OOBTree
=
BTree
.
BTree
# Object -> Object
IOBTree
=
IOBTree
.
BTree
# Integer -> Object
IIBucket
=
IIBTree
.
Bucket
# Integer -> Integer
import
pdb
class
GlobbingLexicon
(
Lexicon
):
"""
...
...
@@ -155,7 +149,6 @@ class GlobbingLexicon(Lexicon):
set
.
insert
(
self
.
counter
)
self
.
_digrams
=
_digrams
counter
=
self
.
counter
self
.
counter
=
self
.
counter
+
1
return
counter
...
...
@@ -163,14 +156,14 @@ class GlobbingLexicon(Lexicon):
def
get
(
self
,
pattern
):
""" Query the lexicon for words matching a pattern.
"""
wc_set
=
[
self
.
multi_wc
,
self
.
single_wc
]
digrams
=
[]
globbing
=
0
for
i
in
range
(
len
(
pattern
)):
if
pattern
[
i
]
in
wc_set
:
globbing
=
1
continue
if
i
==
0
:
...
...
@@ -184,21 +177,19 @@ class GlobbingLexicon(Lexicon):
except
IndexError
:
digrams
.
append
(
(
pattern
[
i
]
+
self
.
eow
)
)
if
not
globbing
:
result
=
self
.
_lexicon
.
get
(
pattern
,
())
return
(
result
,
)
## now get all of the intsets that contain the result digrams
result
=
None
result
=
IIBucket
()
for
digram
in
digrams
:
if
self
.
_digrams
.
has_key
(
digram
):
set
=
self
.
_digrams
[
digram
]
if
set
is
not
None
:
if
result
is
None
:
result
=
set
else
:
result
.
intersection
(
set
)
if
result
is
None
:
matchSet
=
self
.
_digrams
[
digram
]
if
matchSet
is
not
None
:
result
=
IIBucket
().
union
(
matchSet
)
if
len
(
result
)
==
0
:
return
()
else
:
## now we have narrowed the list of possible candidates
...
...
@@ -211,10 +202,9 @@ class GlobbingLexicon(Lexicon):
expr
=
re
.
compile
(
self
.
translate
(
pattern
))
words
=
[]
hits
=
[]
for
x
in
result
:
if
expr
.
sear
ch
(
self
.
_inverseLex
[
x
]):
for
x
in
result
.
keys
()
:
if
expr
.
mat
ch
(
self
.
_inverseLex
[
x
]):
hits
.
append
(
x
)
return
hits
def
__getitem__
(
self
,
word
):
...
...
@@ -226,14 +216,15 @@ class GlobbingLexicon(Lexicon):
"""
words
=
[]
wids
=
[]
for
w
in
q
:
if
(
(
self
.
multi_wc
in
w
)
or
(
self
.
single_wc
in
w
)
):
(
self
.
single_wc
in
w
)
):
wids
=
self
.
get
(
w
)
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
self
.
_inverseLex
[
wid
]
)
words
.
append
(
wid
)
else
:
words
.
append
(
w
)
...
...
@@ -262,19 +253,7 @@ class GlobbingLexicon(Lexicon):
if
c
==
self
.
multi_wc
:
res
=
res
+
'.*'
elif
c
==
self
.
single_wc
:
res
=
res
+
'.'
res
=
res
+
'.
?
'
else
:
res
=
res
+
re
.
escape
(
c
)
return
res
+
"$"
return
res
+
'$'
lib/python/SearchIndex/UnTextIndex.py
View file @
81625f82
...
...
@@ -92,7 +92,7 @@ is no longer known.
"""
__version__
=
'$Revision: 1.3
3
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.3
4
$'
[
11
:
-
2
]
from
Globals
import
Persistent
...
...
@@ -368,24 +368,44 @@ class UnTextIndex(Persistent, Implicit):
def
__getitem__
(
self
,
word
):
"""Return an InvertedIndex-style result "list"
"""
src
=
tuple
(
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
word
))
if
not
src
:
return
ResultList
({},
(
word
,),
self
)
if
len
(
src
)
==
1
:
src
=
src
[
0
]
if
src
[:
1
]
==
'"'
and
src
[
-
1
:]
==
'"'
:
return
self
[
src
]
r
=
self
.
_index
.
get
(
self
.
getLexicon
(
self
.
_lexicon
).
get
(
src
)[
0
],
None
)
if
r
is
None
:
r
=
{}
return
ResultList
(
r
,
(
src
,),
self
)
r
=
None
for
word
in
src
:
rr
=
self
[
word
]
if
r
is
None
:
r
=
rr
else
:
r
=
r
.
near
(
rr
)
return
r
Note that this differentiates between being passed an Integer
and a String. Strings are looked up in the lexicon, whereas
Integers are assumed to be resolved word ids. """
if
type
(
word
)
is
IntType
:
# We have a word ID
result
=
self
.
_index
.
get
(
word
,
{})
return
ResultList
(
result
,
(
word
,),
self
)
else
:
splitSource
=
tuple
(
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
word
))
if
not
splitSource
:
return
ResultList
({},
(
word
,),
self
)
if
len
(
splitSource
)
==
1
:
splitSource
=
splitSource
[
0
]
if
splitSource
[:
1
]
==
'"'
and
splitSource
[
-
1
:]
==
'"'
:
return
self
[
splitSource
]
r
=
self
.
_index
.
get
(
self
.
getLexicon
(
self
.
_lexicon
).
get
(
splitSource
)[
0
],
None
)
if
r
is
None
:
r
=
{}
return
ResultList
(
r
,
(
splitSource
,),
self
)
r
=
None
for
word
in
splitSource
:
rr
=
self
[
word
]
if
r
is
None
:
r
=
rr
else
:
r
=
r
.
near
(
rr
)
return
r
def
_apply_index
(
self
,
request
,
cid
=
''
):
...
...
@@ -482,13 +502,11 @@ class UnTextIndex(Persistent, Implicit):
whole thing is 'evaluated'
"""
# First replace any occurences of " and not " with " andnot "
s
=
ts_regex
.
gsub
(
'[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+'
%
(
ws
*
3
),
' andnot '
,
s
)
# do some parsing
q
=
parse
(
s
)
...
...
@@ -496,7 +514,7 @@ class UnTextIndex(Persistent, Implicit):
## For example, substitute wildcards, or translate words into
## various languages.
q
=
self
.
getLexicon
(
self
.
_lexicon
).
query_hook
(
q
)
# do some more parsing
q
=
parse2
(
q
,
default_operator
)
...
...
@@ -509,65 +527,78 @@ class UnTextIndex(Persistent, Implicit):
try
:
left
=
q
[
i
-
1
]
right
=
q
[
i
+
1
]
except
IndexError
:
raise
QueryError
,
"Malformed query"
t
=
type
(
left
)
if
t
is
ListType
:
left
=
evaluate
(
left
,
self
)
elif
t
is
StringType
:
left
=
self
[
left
]
t
=
type
(
right
)
if
t
is
ListType
:
right
=
evaluate
(
right
,
self
)
elif
t
is
StringType
:
right
=
self
[
right
]
except
IndexError
:
raise
QueryError
,
"Malformed query"
operandType
=
type
(
left
)
if
operandType
is
IntType
:
left
=
self
[
left
]
elif
operandType
is
StringType
:
left
=
self
[
left
]
elif
operandType
is
ListType
:
left
=
evaluate
(
left
,
self
)
operandType
=
type
(
right
)
if
operandType
is
IntType
:
right
=
self
[
right
]
elif
operandType
is
StringType
:
right
=
self
[
right
]
elif
operandType
is
ListType
:
right
=
evaluate
(
right
,
self
)
return
(
left
,
right
)
def
evaluate
(
self
,
q
):
def
evaluate
(
self
,
q
uery
):
'''Evaluate a parsed query'''
## import pdb
## pdb.set_trace()
if
(
len
(
q
)
==
1
):
if
(
type
(
q
[
0
])
is
ListType
):
return
evaluate
(
q
[
0
],
self
)
return
self
[
q
[
0
]]
# There are two options if the query passed in is only one
# item. It means either it's an embedded query, in which case
# we'll recursively evaluate, other wise it's nothing for us
# to evaluate, and we just get the results and return them.
if
(
len
(
query
)
==
1
):
if
(
type
(
query
[
0
])
is
ListType
):
return
evaluate
(
query
[
0
],
self
)
return
self
[
query
[
0
]]
# __getitem__
# Now we need to loop through the query and expand out
# operators. They are currently evaluated in the following
# order: AndNote -> And -> Or -> Near
i
=
0
while
(
i
<
len
(
q
)):
if
q
[
i
]
is
AndNot
:
left
,
right
=
self
.
get_operands
(
q
,
i
)
while
(
i
<
len
(
q
uery
)):
if
q
uery
[
i
]
is
AndNot
:
left
,
right
=
self
.
get_operands
(
q
uery
,
i
)
val
=
left
.
and_not
(
right
)
q
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
q
uery
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
else
:
i
=
i
+
1
i
=
0
while
(
i
<
len
(
q
)):
if
q
[
i
]
is
And
:
left
,
right
=
self
.
get_operands
(
q
,
i
)
while
(
i
<
len
(
q
uery
)):
if
q
uery
[
i
]
is
And
:
left
,
right
=
self
.
get_operands
(
q
uery
,
i
)
val
=
left
&
right
q
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
q
uery
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
else
:
i
=
i
+
1
i
=
0
while
(
i
<
len
(
q
)):
if
q
[
i
]
is
Or
:
left
,
right
=
self
.
get_operands
(
q
,
i
)
while
(
i
<
len
(
q
uery
)):
if
q
uery
[
i
]
is
Or
:
left
,
right
=
self
.
get_operands
(
q
uery
,
i
)
val
=
left
|
right
q
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
q
uery
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
else
:
i
=
i
+
1
i
=
0
while
(
i
<
len
(
q
)):
if
q
[
i
]
is
Near
:
left
,
right
=
self
.
get_operands
(
q
,
i
)
while
(
i
<
len
(
q
uery
)):
if
q
uery
[
i
]
is
Near
:
left
,
right
=
self
.
get_operands
(
q
uery
,
i
)
val
=
left
.
near
(
right
)
q
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
q
uery
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
else
:
i
=
i
+
1
if
(
len
(
q
)
!=
1
):
raise
QueryError
,
"Malformed query"
if
(
len
(
q
uery
)
!=
1
):
raise
QueryError
,
"Malformed query"
return
q
[
0
]
return
q
uery
[
0
]
def
parse
(
s
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment