Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
63651bb3
Commit
63651bb3
authored
Jan 31, 2000
by
Michel Pelletier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
partial searching
parent
d03616e0
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
365 additions
and
47 deletions
+365
-47
lib/python/Products/ZCatalog/ZCatalog.py
lib/python/Products/ZCatalog/ZCatalog.py
+53
-13
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+252
-0
lib/python/SearchIndex/Lexicon.py
lib/python/SearchIndex/Lexicon.py
+31
-10
lib/python/SearchIndex/UnIndex.py
lib/python/SearchIndex/UnIndex.py
+3
-2
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+26
-22
No files found.
lib/python/Products/ZCatalog/ZCatalog.py
View file @
63651bb3
...
...
@@ -99,17 +99,46 @@ from DocumentTemplate.DT_Util import Eval, expr_globals
from
AccessControl.Permission
import
name_trans
from
Catalog
import
Catalog
,
orify
from
SearchIndex
import
UnIndex
,
UnTextIndex
from
Vocabulary
import
Vocabulary
import
IOBTree
manage_addZCatalogForm
=
HTMLFile
(
'addZCatalog'
,
globals
())
def
manage_addZCatalog
(
self
,
id
,
title
,
REQUEST
=
None
):
def
manage_addZCatalog
(
self
,
id
,
title
,
vocab
=
''
,
vocab_id
=
''
,
REQUEST
=
None
):
"""Add a ZCatalog object
"""
c
=
ZCatalog
(
id
,
title
)
self
.
_setObject
(
id
,
c
)
c
=
ZCatalog
(
id
,
title
,
vocab
,
vocab_id
,
self
)
self
.
_setObject
(
id
,
c
)
if
REQUEST
is
not
None
:
return
self
.
manage_main
(
self
,
REQUEST
)
return
self
.
manage_main
(
self
,
REQUEST
)
def
VocabularyIDs
(
self
):
""" returns a list of acquireable vocabularies. Stole this from
ZSQLMethods """
ids
=
{}
have_id
=
ids
.
has_key
StringType
=
type
(
''
)
while
self
is
not
None
:
if
hasattr
(
self
,
'objectValues'
):
for
o
in
self
.
objectValues
():
if
(
hasattr
(
o
,
'_isAVocabulary'
)
and
o
.
_isAVocabulary
and
hasattr
(
o
,
'id'
)):
id
=
o
.
id
if
type
(
id
)
is
not
StringType
:
id
=
id
()
if
not
have_id
(
id
):
if
hasattr
(
o
,
'title_and_id'
):
o
=
o
.
title_and_id
()
else
:
o
=
id
ids
[
id
]
=
id
if
hasattr
(
self
,
'aq_parent'
):
self
=
self
.
aq_parent
else
:
self
=
None
ids
=
map
(
lambda
item
:
(
item
[
1
],
item
[
0
]),
ids
.
items
())
ids
.
sort
()
return
ids
class
ZCatalog
(
Folder
,
Persistent
,
Implicit
):
...
...
@@ -191,13 +220,22 @@ class ZCatalog(Folder, Persistent, Implicit):
threshold
=
10000
_v_total
=
0
def
__init__
(
self
,
id
,
title
=
''
):
def
__init__
(
self
,
id
,
title
=
''
,
vocab
=
0
,
vocab_id
=
''
,
container
=
None
):
self
.
id
=
id
self
.
title
=
title
self
.
vocab_id
=
vocab_id
self
.
threshold
=
10000
self
.
_v_total
=
0
self
.
_catalog
=
Catalog
()
if
not
vocab
:
v
=
Vocabulary
(
'Vocabulary'
,
'Vocabulary'
,
globbing
=
1
)
self
.
_setObject
(
'Vocabulary'
,
v
)
v
=
'Vocabulary'
else
:
v
=
vocab_id
self
.
_catalog
=
Catalog
(
vocabulary
=
v
)
self
.
_catalog
.
addColumn
(
'id'
)
self
.
_catalog
.
addIndex
(
'id'
,
'FieldIndex'
)
...
...
@@ -215,6 +253,11 @@ class ZCatalog(Folder, Persistent, Implicit):
self
.
_catalog
.
addIndex
(
'PrincipiaSearchSource'
,
'TextIndex'
)
def
getVocabulary
(
self
):
""" more ack! """
return
getattr
(
self
,
self
.
vocab_id
)
def
manage_edit
(
self
,
RESPONSE
,
URL1
,
threshold
=
1000
,
REQUEST
=
None
):
""" edit the catalog """
self
.
threshold
=
threshold
...
...
@@ -359,7 +402,7 @@ class ZCatalog(Folder, Persistent, Implicit):
if
self
.
_v_total
>
self
.
threshold
:
# commit a subtransaction
get_transaction
().
commit
(
1
)
# kick the chache
# kick the chache
, this may be overkill but ya never know
self
.
_p_jar
.
cacheFullSweep
(
1
)
self
.
_v_total
=
0
...
...
@@ -545,10 +588,7 @@ class ZCatalog(Folder, Persistent, Implicit):
)
):
if
apply_func
:
if
apply_path
:
apply_func
(
ob
,
(
apply_path
+
'/'
+
p
))
else
:
apply_func
(
ob
,
p
)
else
:
add_result
((
p
,
ob
))
dflag
=
0
...
...
lib/python/SearchIndex/GlobbingLexicon.py
0 → 100644
View file @
63651bb3
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
string
,
regex
,
ts_regex
import
regsub
from
Lexicon
import
Lexicon
__doc__
=
""" Lexicon object that supports
"""
from
Splitter
import
Splitter
from
Persistence
import
Persistent
from
Acquisition
import
Implicit
import
OIBTree
,
BTree
,
IOBTree
from
intSet
import
intSet
OIBTree
=
OIBTree
.
BTree
OOBTree
=
BTree
.
BTree
IOBTree
=
IOBTree
.
BTree
import
re
class
GlobbingLexicon
(
Lexicon
):
"""
Base class to support globbing lexicon object.
"""
multi_wc
=
'*'
single_wc
=
'?'
eow
=
'$'
def
__init__
(
self
):
self
.
counter
=
0
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
self
.
_digrams
=
OOBTree
()
def
set
(
self
,
word
):
""" """
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
word
=
intern
(
word
)
self
.
_lexicon
[
word
]
=
self
.
counter
self
.
_inverseLex
[
self
.
counter
]
=
word
## now, split the word into digrams and insert references
## to 'word' into the digram object. The first and last
## digrams in the list are specially marked with $ to
## indicate the beginning and end of the word
digrams
=
[]
digrams
.
append
(
self
.
eow
+
word
[
0
])
# mark the beginning
for
i
in
range
(
len
(
word
)):
digrams
.
append
(
word
[
i
:
i
+
2
])
digrams
[
-
1
]
=
digrams
[
-
1
]
+
self
.
eow
# mark the end
_digrams
=
self
.
_digrams
for
digram
in
digrams
:
set
=
_digrams
.
get
(
digram
)
if
set
is
None
:
_digrams
[
digram
]
=
set
=
intSet
()
set
.
insert
(
self
.
counter
)
self
.
_digrams
=
_digrams
self
.
counter
=
self
.
counter
+
1
return
self
.
counter
def
query
(
self
,
pattern
):
""" Query the lexicon for words matching a pattern.
"""
wc_set
=
[
self
.
multi_wc
,
self
.
single_wc
]
digrams
=
[]
for
i
in
range
(
len
(
pattern
)):
if
pattern
[
i
]
in
wc_set
:
continue
if
i
==
0
:
digrams
.
insert
(
i
,
(
self
.
eow
+
pattern
[
i
])
)
digrams
.
append
((
pattern
[
i
]
+
pattern
[
i
+
1
]))
else
:
try
:
if
pattern
[
i
+
1
]
not
in
wc_set
:
digrams
.
append
(
pattern
[
i
]
+
pattern
[
i
+
1
]
)
except
IndexError
:
digrams
.
append
(
(
pattern
[
i
]
+
self
.
eow
)
)
## now get all of the intsets that contain the result digrams
result
=
None
for
digram
in
digrams
:
if
self
.
_digrams
.
has_key
(
digram
):
set
=
self
.
_digrams
[
digram
]
if
set
is
not
None
:
if
result
is
None
:
result
=
set
else
:
result
.
intersection
(
set
)
if
result
is
None
:
return
()
else
:
## now we have narrowed the list of possible candidates
## down to those words which contain digrams. However,
## some words may have been returned that match digrams,
## but do not match 'pattern'. This is because some words
## may contain all matching digrams, but in the wrong
## order.
expr
=
re
.
compile
(
self
.
translate
(
pattern
))
words
=
[]
hits
=
[]
for
x
in
result
:
if
expr
.
search
(
self
.
_inverseLex
[
x
]):
hits
.
append
(
x
)
return
hits
def
__getitem__
(
self
,
word
):
""" """
return
self
.
query
(
word
)
def
translate
(
self
,
pat
):
"""Translate a PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
i
,
n
=
0
,
len
(
pat
)
res
=
''
while
i
<
n
:
c
=
pat
[
i
]
i
=
i
+
1
if
c
==
self
.
multi_wc
:
res
=
res
+
'.*'
elif
c
==
self
.
single_wc
:
res
=
res
+
'.'
else
:
res
=
res
+
re
.
escape
(
c
)
return
res
+
"$"
lib/python/SearchIndex/Lexicon.py
View file @
63651bb3
...
...
@@ -113,15 +113,8 @@ class Lexicon(Persistent, Implicit):
"""
def
__init__
(
self
,
globbish
=
None
):
def
__init__
(
self
):
self
.
_lexicon
=
OIBTree
()
if
globbish
:
self
.
_ngrams
=
OOBTree
()
self
.
counter
=
0
def
__getitem__
(
self
,
key
):
""" overload mapping behavior """
return
self
.
_lexicon
[
key
]
def
set
(
self
,
word
):
""" return the word id of 'word' """
...
...
@@ -134,19 +127,23 @@ class Lexicon(Persistent, Implicit):
self
.
counter
=
self
.
counter
+
1
return
self
.
counter
def
get
(
self
,
key
):
""" """
return
self
.
_lexicon
[
key
]
def
__len__
(
self
):
return
len
(
self
.
_lexicon
)
def
Splitter
(
self
,
astring
,
words
):
""" wrap the splitter """
return
Splitter
(
astring
,
words
)
def
grep
(
self
,
query
):
"""
regular expression search through the lexicon
he he.
Do not use unless you know what your doing!!!
"""
expr
=
re
.
compile
(
query
)
hits
=
[]
...
...
@@ -155,6 +152,12 @@ class Lexicon(Persistent, Implicit):
hits
.
append
(
x
)
return
hits
AndNot
=
'andnot'
And
=
'and'
Or
=
'or'
...
...
@@ -166,9 +169,27 @@ def query(s, index, default_operator = Or,
# First replace any occurences of " and not " with " andnot "
s
=
ts_regex
.
gsub
(
'[%s]+and[%s]*not[%s]+'
%
(
ws
*
3
),
' andnot '
,
s
)
q
=
parse
(
s
)
q
=
parse_wc
(
q
,
index
)
q
=
parse2
(
q
,
default_operator
)
return
evaluate
(
q
,
index
)
def
parse_wc
(
q
,
index
):
'''expand wildcards'''
lex
=
index
.
getLexicon
(
index
.
_lexicon
)
words
=
[]
for
w
in
q
:
if
(
(
lex
.
multi_wc
in
w
)
or
(
lex
.
single_wc
in
w
)
):
wids
=
lex
.
query
(
w
)
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
lex
.
_inverseLex
[
wid
])
else
:
words
.
append
(
w
)
return
words
def
parse
(
s
):
'''Parse parentheses and quotes'''
l
=
[]
...
...
lib/python/SearchIndex/UnIndex.py
View file @
63651bb3
...
...
@@ -84,9 +84,10 @@
##############################################################################
"""Simple column indices"""
__version__
=
'$Revision: 1.
9
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.
10
$'
[
11
:
-
2
]
from
Globals
import
Persistent
from
Acquisition
import
Implicit
import
BTree
import
IOBTree
from
intSet
import
intSet
...
...
@@ -107,7 +108,7 @@ def nonEmpty(s):
return
1
class
UnIndex
(
Persistent
):
class
UnIndex
(
Persistent
,
Implicit
):
"""UnIndex object interface"""
def
__init__
(
self
,
id
=
None
,
ignore_ex
=
None
,
call_methods
=
None
):
...
...
lib/python/SearchIndex/UnTextIndex.py
View file @
63651bb3
...
...
@@ -92,10 +92,11 @@ is no longer known.
"""
__version__
=
'$Revision: 1.1
8
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.1
9
$'
[
11
:
-
2
]
from
Globals
import
Persistent
import
BTree
,
IIBTree
,
IOBTree
,
OIBTree
from
Acquisition
import
Implicit
BTree
=
BTree
.
BTree
IOBTree
=
IOBTree
.
BTree
IIBucket
=
IIBTree
.
Bucket
...
...
@@ -110,7 +111,7 @@ import string, regex, regsub, pdb
from
Lexicon
import
Lexicon
,
query
,
stop_word_dict
from
ResultList
import
ResultList
class
UnTextIndex
(
Persistent
):
class
UnTextIndex
(
Persistent
,
Implicit
):
def
__init__
(
self
,
id
=
None
,
ignore_ex
=
None
,
call_methods
=
None
,
lexicon
=
None
):
...
...
@@ -161,7 +162,18 @@ class UnTextIndex(Persistent):
if
lexicon
is
None
:
self
.
_lexicon
=
Lexicon
()
else
:
self
.
_lexicon
=
lexicon
self
.
_lexicon
=
lexicon
def
getLexicon
(
self
,
vocab_id
):
""" bit of a hack, indexes have been made acquirers so that
they can acquire a vocabulary object from the object system in
Zope. I don't think indexes were ever intended to participate
in this way, but I don't see too much of a problem with it.
"""
vocab
=
getattr
(
self
,
vocab_id
)
return
vocab
.
lexicon
def
__len__
(
self
):
...
...
@@ -213,7 +225,7 @@ class UnTextIndex(Persistent):
## The Splitter should now be european compliant at least.
## Someone should test this.
src
=
self
.
_lexicon
.
Splitter
(
k
,
self
.
_syn
)
src
=
self
.
getLexicon
(
self
.
_lexicon
)
.
Splitter
(
k
,
self
.
_syn
)
## This returns a tuple of stemmed words. Stopwords have been
## stripped.
...
...
@@ -226,7 +238,7 @@ class UnTextIndex(Persistent):
index
=
self
.
_index
unindex
=
self
.
_unindex
lexicon
=
self
.
_lexicon
lexicon
=
self
.
getLexicon
(
self
.
_lexicon
)
get
=
index
.
get
unindex
[
i
]
=
[]
times
=
0
...
...
@@ -297,28 +309,20 @@ class UnTextIndex(Persistent):
def
__getitem__
(
self
,
word
):
"""Return an InvertedIndex-style result "list"
"""
src
=
tuple
(
self
.
_lexicon
.
Splitter
(
word
,
self
.
_syn
))
if
not
src
:
return
ResultList
({},
(
word
,),
self
)
src
=
tuple
(
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
word
,
self
.
_syn
))
if
not
src
:
return
ResultList
({},
(
word
,),
self
)
if
len
(
src
)
==
1
:
src
=
src
[
0
]
if
src
[:
1
]
==
'"'
and
src
[
-
1
:]
==
'"'
:
return
self
[
src
]
r
=
self
.
_index
.
get
(
self
.
_lexicon
[
word
],
None
)
if
r
is
None
:
r
=
{}
if
src
[:
1
]
==
'"'
and
src
[
-
1
:]
==
'"'
:
return
self
[
src
]
r
=
self
.
_index
.
get
(
self
.
getLexicon
(
self
.
_lexicon
)[
word
][
0
],
None
)
if
r
is
None
:
r
=
{}
return
ResultList
(
r
,
(
word
,),
self
)
r
=
None
for
word
in
src
:
rr
=
self
[
word
]
if
r
is
None
:
r
=
rr
else
:
r
=
r
.
near
(
rr
)
if
r
is
None
:
r
=
rr
else
:
r
=
r
.
near
(
rr
)
return
r
...
...
@@ -393,13 +397,13 @@ class UnTextIndex(Persistent):
r
=
[]
for
word
in
words
:
r
=
r
+
self
.
_lexicon
.
Splitter
(
doc
,
self
.
_syn
).
indexes
(
word
)
r
=
r
+
self
.
getLexicon
(
self
.
_lexicon
)
.
Splitter
(
doc
,
self
.
_syn
).
indexes
(
word
)
return
r
def
_subindex
(
self
,
isrc
,
d
,
old
,
last
):
src
=
self
.
_l
exicon
.
Splitter
(
isrc
,
self
.
_syn
)
src
=
self
.
getL
exicon
.
Splitter
(
isrc
,
self
.
_syn
)
for
s
in
src
:
if
s
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
s
[
1
:
-
1
],
d
,
old
,
last
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment