Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
3095fce7
Commit
3095fce7
authored
Apr 14, 2004
by
Andreas Jung
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
removed 'SearchIndex' package
parent
2dc887a3
Changes
16
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
2 additions
and
3076 deletions
+2
-3076
doc/CHANGES.txt
doc/CHANGES.txt
+2
-0
lib/python/SearchIndex/.testinfo
lib/python/SearchIndex/.testinfo
+0
-1
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+0
-245
lib/python/SearchIndex/Index.py
lib/python/SearchIndex/Index.py
+0
-248
lib/python/SearchIndex/Lexicon.py
lib/python/SearchIndex/Lexicon.py
+0
-202
lib/python/SearchIndex/PluggableIndex.py
lib/python/SearchIndex/PluggableIndex.py
+0
-74
lib/python/SearchIndex/README.txt
lib/python/SearchIndex/README.txt
+0
-2
lib/python/SearchIndex/ResultList.py
lib/python/SearchIndex/ResultList.py
+0
-94
lib/python/SearchIndex/Setup
lib/python/SearchIndex/Setup
+0
-2
lib/python/SearchIndex/Splitter.c
lib/python/SearchIndex/Splitter.c
+0
-427
lib/python/SearchIndex/TextIndex.py
lib/python/SearchIndex/TextIndex.py
+0
-563
lib/python/SearchIndex/UnIndex.py
lib/python/SearchIndex/UnIndex.py
+0
-393
lib/python/SearchIndex/UnKeywordIndex.py
lib/python/SearchIndex/UnKeywordIndex.py
+0
-92
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+0
-689
lib/python/SearchIndex/__init__.py
lib/python/SearchIndex/__init__.py
+0
-25
lib/python/SearchIndex/randid.py
lib/python/SearchIndex/randid.py
+0
-19
No files found.
doc/CHANGES.txt
View file @
3095fce7
...
...
@@ -24,6 +24,8 @@ Zope Changes
Features added
- The obsolete 'SearchIndex' packages has been removed
- Traversal now supports a "post traversal hook" that get's run
after traversal finished and the security context is established.
...
...
lib/python/SearchIndex/.testinfo
deleted
100644 → 0
View file @
2dc887a3
# Nothing to see here (deprecated module).
lib/python/SearchIndex/GlobbingLexicon.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
from
Lexicon
import
Lexicon
from
Splitter
import
Splitter
from
UnTextIndex
import
Or
import
re
,
string
from
BTrees.IIBTree
import
IISet
,
union
,
IITreeSet
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.OOBTree
import
OOBTree
from
randid
import
randid
class
GlobbingLexicon
(
Lexicon
):
"""Lexicon which supports basic globbing function ('*' and '?').
This lexicon keeps several data structures around that are useful
for searching. They are:
'_lexicon' -- Contains the mapping from word => word_id
'_inverseLex' -- Contains the mapping from word_id => word
'_digrams' -- Contains a mapping from digram => word_id
Before going further, it is necessary to understand what a digram is,
as it is a core component of the structure of this lexicon. A digram
is a two-letter sequence in a word. For example, the word 'zope'
would be converted into the digrams::
['$z', 'zo', 'op', 'pe', 'e$']
where the '$' is a word marker. It is used at the beginning and end
of the words. Those digrams are significant.
"""
multi_wc
=
'*'
single_wc
=
'?'
eow
=
'$'
def
__init__
(
self
):
self
.
clear
()
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
self
.
_digrams
=
OOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
Lexicon
.
_convertBTrees
(
self
,
threshold
)
if
type
(
self
.
_digrams
)
is
OOBTree
:
return
from
BTrees.convert
import
convert
_digrams
=
self
.
_digrams
self
.
_digrams
=
OOBTree
()
self
.
_digrams
.
_p_jar
=
self
.
_p_jar
convert
(
_digrams
,
self
.
_digrams
,
threshold
,
IITreeSet
)
def
createDigrams
(
self
,
word
):
"""Returns a list with the set of digrams in the word."""
digrams
=
list
(
word
)
digrams
.
append
(
self
.
eow
)
last
=
self
.
eow
for
i
in
range
(
len
(
digrams
)):
last
,
digrams
[
i
]
=
digrams
[
i
],
last
+
digrams
[
i
]
return
digrams
def
getWordId
(
self
,
word
):
"""Provided 'word', return the matching integer word id."""
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
return
self
.
assignWordId
(
word
)
set
=
getWordId
# Kludge for old code
def
getWord
(
self
,
wid
):
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word, and return it."""
# Double check it's not in the lexicon already, and if it is, just
# return it.
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
# Get word id. BBB Backward compat pain.
inverse
=
self
.
_inverseLex
try
:
insert
=
inverse
.
insert
except
AttributeError
:
# we have an "old" BTree object
if
inverse
:
wid
=
inverse
.
keys
()[
-
1
]
+
1
else
:
self
.
_inverseLex
=
IOBTree
()
wid
=
1
inverse
[
wid
]
=
word
else
:
# we have a "new" IOBTree object
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
self
.
_lexicon
[
word
]
=
wid
# Now take all the digrams and insert them into the digram map.
for
digram
in
self
.
createDigrams
(
word
):
set
=
self
.
_digrams
.
get
(
digram
,
None
)
if
set
is
None
:
self
.
_digrams
[
digram
]
=
set
=
IISet
()
set
.
insert
(
wid
)
return
wid
def
get
(
self
,
pattern
):
""" Query the lexicon for words matching a pattern."""
wc_set
=
[
self
.
multi_wc
,
self
.
single_wc
]
digrams
=
[]
globbing
=
0
for
i
in
range
(
len
(
pattern
)):
if
pattern
[
i
]
in
wc_set
:
globbing
=
1
continue
if
i
==
0
:
digrams
.
insert
(
i
,
(
self
.
eow
+
pattern
[
i
])
)
digrams
.
append
((
pattern
[
i
]
+
pattern
[
i
+
1
]))
else
:
try
:
if
pattern
[
i
+
1
]
not
in
wc_set
:
digrams
.
append
(
pattern
[
i
]
+
pattern
[
i
+
1
]
)
except
IndexError
:
digrams
.
append
(
(
pattern
[
i
]
+
self
.
eow
)
)
if
not
globbing
:
result
=
self
.
_lexicon
.
get
(
pattern
,
None
)
if
result
is
None
:
return
()
return
(
result
,
)
## now get all of the intsets that contain the result digrams
result
=
None
for
digram
in
digrams
:
result
=
union
(
result
,
self
.
_digrams
.
get
(
digram
,
None
))
if
not
result
:
return
()
else
:
## now we have narrowed the list of possible candidates
## down to those words which contain digrams. However,
## some words may have been returned that match digrams,
## but do not match 'pattern'. This is because some words
## may contain all matching digrams, but in the wrong
## order.
expr
=
re
.
compile
(
self
.
createRegex
(
pattern
))
words
=
[]
hits
=
IISet
()
for
x
in
result
:
if
expr
.
match
(
self
.
_inverseLex
[
x
]):
hits
.
insert
(
x
)
return
hits
def
__getitem__
(
self
,
word
):
""" """
return
self
.
get
(
word
)
def
query_hook
(
self
,
q
):
"""expand wildcards"""
ListType
=
type
([])
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
self
.
query_hook
(
e
)
elif
(
(
self
.
multi_wc
in
e
)
or
(
self
.
single_wc
in
e
)
):
wids
=
self
.
get
(
e
)
words
=
[]
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
wid
)
if
not
words
:
# if words is empty, return something that will make
# textindex's __getitem__ return an empty result list
words
.
append
(
''
)
q
[
i
]
=
words
i
=
i
-
1
return
q
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
## don't do anything, less efficient but there's not much
## sense in stemming a globbing lexicon.
return
Splitter
(
astring
)
def
createRegex
(
self
,
pat
):
"""Translate a PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
# Remove characters that are meaningful in a regex
transTable
=
string
.
maketrans
(
""
,
""
)
result
=
string
.
translate
(
pat
,
transTable
,
r'()&|!@#$%^{}\
<>.
')
# First, deal with multi-character globbing
result = string.replace(result, '
*
', '
.
*
')
# Next, we need to deal with single-character globbing
result = string.replace(result, '
?
', '
.
')
return "%s$" % result
lib/python/SearchIndex/Index.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Simple column indices"""
__version__
=
'$Revision: 1.31 $'
[
11
:
-
2
]
from
Persistence
import
Persistent
from
BTrees.OOBTree
import
OOBTree
from
BTrees.IIBTree
import
IITreeSet
import
operator
from
Missing
import
MV
import
string
ListType
=
type
([])
StringType
=
type
(
's'
)
def
nonEmpty
(
s
):
"returns true if a non-empty string or any other (nonstring) type"
if
type
(
s
)
is
StringType
:
if
s
:
return
1
else
:
return
0
else
:
return
1
class
Index
(
Persistent
):
"""Index object interface"""
isDeprecatedIndex
=
1
def
__init__
(
self
,
data
=
None
,
schema
=
None
,
id
=
None
,
ignore_ex
=
None
,
call_methods
=
None
):
"""Create an index
The arguments are:
'data' -- a mapping from integer object ids to objects or
records,
'schema' -- a mapping from item name to index into data
records. If 'data' is a mapping to objects, then schema
should ne 'None'.
'id' -- the name of the item attribute to index. This is
either an attribute name or a record key.
"""
######################################################################
# For b/w compatability, have to allow __init__ calls with zero args
if
not
data
==
schema
==
id
==
ignore_ex
==
call_methods
==
None
:
self
.
_data
=
data
self
.
_schema
=
schema
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
OOBTree
()
self
.
_reindex
()
else
:
pass
# for b/w compatability
_init
=
__init__
def
dpHasUniqueValuesFor
(
self
,
name
):
' has unique values for column NAME '
if
name
==
self
.
id
:
return
1
else
:
return
0
def
dpUniqueValues
(
self
,
name
=
None
,
withLengths
=
0
):
"""
\
returns the unique values for name
if withLengths is true, returns a sequence of
tuples of (value, length)
"""
if
name
is
None
:
name
=
self
.
id
elif
name
!=
self
.
id
:
return
[]
if
not
withLengths
:
return
tuple
(
filter
(
nonEmpty
,
self
.
_index
.
keys
())
)
else
:
rl
=
[]
for
i
in
self
.
_index
.
keys
():
if
not
nonEmpty
(
i
):
continue
else
:
rl
.
append
((
i
,
len
(
self
.
_index
[
i
])))
return
tuple
(
rl
)
def
clear
(
self
):
self
.
_index
=
OOBTree
()
def
_reindex
(
self
,
start
=
0
):
"""Recompute index data for data with ids >= start."""
index
=
self
.
_index
get
=
index
.
get
if
not
start
:
index
.
clear
()
id
=
self
.
id
if
self
.
_schema
is
None
:
f
=
getattr
else
:
f
=
operator
.
__getitem__
id
=
self
.
_schema
[
id
]
for
i
,
row
in
self
.
_data
.
items
(
start
):
k
=
f
(
row
,
id
)
if
k
is
None
or
k
==
MV
:
continue
set
=
get
(
k
)
if
set
is
None
:
index
[
k
]
=
set
=
IITreeSet
()
set
.
insert
(
i
)
def
index_item
(
self
,
i
,
obj
=
None
):
"""Recompute index data for data with ids >= start."""
index
=
self
.
_index
id
=
self
.
id
if
(
self
.
_schema
is
None
)
or
(
obj
is
not
None
):
f
=
getattr
else
:
f
=
operator
.
__getitem__
id
=
self
.
_schema
[
id
]
if
obj
is
None
:
obj
=
self
.
_data
[
i
]
try
:
k
=
f
(
obj
,
id
)
except
:
return
if
self
.
call_methods
:
k
=
k
()
if
k
is
None
or
k
==
MV
:
return
set
=
index
.
get
(
k
)
if
set
is
None
:
index
[
k
]
=
set
=
IITreeSet
()
set
.
insert
(
i
)
def
unindex_item
(
self
,
i
,
obj
=
None
):
"""Recompute index data for data with ids >= start."""
index
=
self
.
_index
id
=
self
.
id
if
self
.
_schema
is
None
:
f
=
getattr
else
:
f
=
operator
.
__getitem__
id
=
self
.
_schema
[
id
]
if
obj
is
None
:
obj
=
self
.
_data
[
i
]
try
:
k
=
f
(
obj
,
id
)
except
:
return
if
self
.
call_methods
:
k
=
k
()
if
k
is
None
or
k
==
MV
:
return
set
=
index
.
get
(
k
)
if
set
is
not
None
:
set
.
remove
(
i
)
def
_apply_index
(
self
,
request
,
cid
=
''
):
"""Apply the index to query parameters given in the argument,
request
The argument should be a mapping object.
If the request does not contain the needed parameters, then
None is returned.
If the request contains a parameter with the name of the
column + '_usage', it is sniffed for information on how to
handle applying the index.
Otherwise two objects are returned. The first object is a
ResultSet containing the record numbers of the matching
records. The second object is a tuple containing the names of
all data fields used.
"""
id
=
self
.
id
#name of the column
cidid
=
"%s/%s"
%
(
cid
,
id
)
has_key
=
request
.
has_key
if
has_key
(
cidid
):
keys
=
request
[
cidid
]
elif
has_key
(
id
):
keys
=
request
[
id
]
else
:
return
None
if
type
(
keys
)
is
not
ListType
:
keys
=
[
keys
]
index
=
self
.
_index
r
=
None
anyTrue
=
0
opr
=
None
if
request
.
has_key
(
id
+
'_usage'
):
# see if any usage params are sent to field
opr
=
string
.
split
(
string
.
lower
(
request
[
id
+
"_usage"
]),
':'
)
opr
,
opr_args
=
opr
[
0
],
opr
[
1
:]
if
opr
==
"range"
:
if
'min'
in
opr_args
:
lo
=
min
(
keys
)
else
:
lo
=
None
if
'max'
in
opr_args
:
hi
=
max
(
keys
)
else
:
hi
=
None
anyTrue
=
1
try
:
if
hi
:
setlist
=
index
.
items
(
lo
,
hi
)
else
:
setlist
=
index
.
items
(
lo
)
for
k
,
set
in
setlist
:
w
,
r
=
weightedUnion
(
r
,
set
)
except
KeyError
:
pass
else
:
#not a range
get
=
index
.
get
for
key
in
keys
:
if
key
:
anyTrue
=
1
set
=
get
(
key
)
if
set
is
not
None
:
w
,
r
=
weightedUnion
(
r
,
set
)
if
r
is
None
:
if
anyTrue
:
r
=
IISet
()
else
:
return
None
return
r
,
(
id
,)
lib/python/SearchIndex/Lexicon.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__
=
""" Module breaks out Zope specific methods and behavior. In
addition, provides the Lexicon class which defines a word to integer
mapping.
"""
from
Splitter
import
Splitter
from
Persistence
import
Persistent
from
Acquisition
import
Implicit
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IISet
,
IITreeSet
from
randid
import
randid
class
Lexicon
(
Persistent
,
Implicit
):
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
stop_syn
=
{}
def
__init__
(
self
,
stop_syn
=
None
):
self
.
clear
()
if
stop_syn
is
None
:
self
.
stop_syn
=
{}
else
:
self
.
stop_syn
=
stop_syn
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
if
(
type
(
self
.
_lexicon
)
is
OIBTree
and
type
(
getattr
(
self
,
'_inverseLex'
,
None
))
is
IOBTree
):
return
from
BTrees.convert
import
convert
lexicon
=
self
.
_lexicon
self
.
_lexicon
=
OIBTree
()
self
.
_lexicon
.
_p_jar
=
self
.
_p_jar
convert
(
lexicon
,
self
.
_lexicon
,
threshold
)
try
:
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
=
IOBTree
()
except
AttributeError
:
# older lexicons didn't have an inverse lexicon
self
.
_inverseLex
=
IOBTree
()
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
.
_p_jar
=
self
.
_p_jar
convert
(
inverseLex
,
self
.
_inverseLex
,
threshold
)
def
set_stop_syn
(
self
,
stop_syn
):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self
.
stop_syn
=
stop_syn
def
getWordId
(
self
,
word
):
""" return the word id of 'word' """
wid
=
self
.
_lexicon
.
get
(
word
,
None
)
if
wid
is
None
:
wid
=
self
.
assignWordId
(
word
)
return
wid
set
=
getWordId
def
getWord
(
self
,
wid
):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
try
:
inverse
=
self
.
_inverseLex
except
AttributeError
:
# woops, old lexicom wo wids
inverse
=
self
.
_inverseLex
=
IOBTree
()
for
word
,
wid
in
self
.
_lexicon
.
items
():
inverse
[
wid
]
=
word
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
self
.
_lexicon
[
intern
(
word
)]
=
wid
return
wid
def
get
(
self
,
key
,
default
=
None
):
"""Return the matched word against the key."""
r
=
IISet
()
wid
=
self
.
_lexicon
.
get
(
key
,
default
)
if
wid
is
not
None
:
r
.
insert
(
wid
)
return
r
def
__getitem__
(
self
,
key
):
return
self
.
get
(
key
)
def
__len__
(
self
):
return
len
(
self
.
_lexicon
)
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
if
words
is
None
:
words
=
self
.
stop_syn
return
Splitter
(
astring
,
words
)
def
query_hook
(
self
,
q
):
""" we don't want to modify the query cuz we're dumb """
return
q
stop_words
=
(
'am'
,
'ii'
,
'iii'
,
'per'
,
'po'
,
're'
,
'a'
,
'about'
,
'above'
,
'across'
,
'after'
,
'afterwards'
,
'again'
,
'against'
,
'all'
,
'almost'
,
'alone'
,
'along'
,
'already'
,
'also'
,
'although'
,
'always'
,
'am'
,
'among'
,
'amongst'
,
'amoungst'
,
'amount'
,
'an'
,
'and'
,
'another'
,
'any'
,
'anyhow'
,
'anyone'
,
'anything'
,
'anyway'
,
'anywhere'
,
'are'
,
'around'
,
'as'
,
'at'
,
'back'
,
'be'
,
'became'
,
'because'
,
'become'
,
'becomes'
,
'becoming'
,
'been'
,
'before'
,
'beforehand'
,
'behind'
,
'being'
,
'below'
,
'beside'
,
'besides'
,
'between'
,
'beyond'
,
'bill'
,
'both'
,
'bottom'
,
'but'
,
'by'
,
'can'
,
'cannot'
,
'cant'
,
'con'
,
'could'
,
'couldnt'
,
'cry'
,
'describe'
,
'detail'
,
'do'
,
'done'
,
'down'
,
'due'
,
'during'
,
'each'
,
'eg'
,
'eight'
,
'either'
,
'eleven'
,
'else'
,
'elsewhere'
,
'empty'
,
'enough'
,
'even'
,
'ever'
,
'every'
,
'everyone'
,
'everything'
,
'everywhere'
,
'except'
,
'few'
,
'fifteen'
,
'fifty'
,
'fill'
,
'find'
,
'fire'
,
'first'
,
'five'
,
'for'
,
'former'
,
'formerly'
,
'forty'
,
'found'
,
'four'
,
'from'
,
'front'
,
'full'
,
'further'
,
'get'
,
'give'
,
'go'
,
'had'
,
'has'
,
'hasnt'
,
'have'
,
'he'
,
'hence'
,
'her'
,
'here'
,
'hereafter'
,
'hereby'
,
'herein'
,
'hereupon'
,
'hers'
,
'herself'
,
'him'
,
'himself'
,
'his'
,
'how'
,
'however'
,
'hundred'
,
'i'
,
'ie'
,
'if'
,
'in'
,
'inc'
,
'indeed'
,
'interest'
,
'into'
,
'is'
,
'it'
,
'its'
,
'itself'
,
'keep'
,
'last'
,
'latter'
,
'latterly'
,
'least'
,
'less'
,
'made'
,
'many'
,
'may'
,
'me'
,
'meanwhile'
,
'might'
,
'mill'
,
'mine'
,
'more'
,
'moreover'
,
'most'
,
'mostly'
,
'move'
,
'much'
,
'must'
,
'my'
,
'myself'
,
'name'
,
'namely'
,
'neither'
,
'never'
,
'nevertheless'
,
'next'
,
'nine'
,
'no'
,
'nobody'
,
'none'
,
'noone'
,
'nor'
,
'not'
,
'nothing'
,
'now'
,
'nowhere'
,
'of'
,
'off'
,
'often'
,
'on'
,
'once'
,
'one'
,
'only'
,
'onto'
,
'or'
,
'other'
,
'others'
,
'otherwise'
,
'our'
,
'ours'
,
'ourselves'
,
'out'
,
'over'
,
'own'
,
'per'
,
'perhaps'
,
'please'
,
'pre'
,
'put'
,
'rather'
,
're'
,
'same'
,
'see'
,
'seem'
,
'seemed'
,
'seeming'
,
'seems'
,
'serious'
,
'several'
,
'she'
,
'should'
,
'show'
,
'side'
,
'since'
,
'sincere'
,
'six'
,
'sixty'
,
'so'
,
'some'
,
'somehow'
,
'someone'
,
'something'
,
'sometime'
,
'sometimes'
,
'somewhere'
,
'still'
,
'such'
,
'take'
,
'ten'
,
'than'
,
'that'
,
'the'
,
'their'
,
'them'
,
'themselves'
,
'then'
,
'thence'
,
'there'
,
'thereafter'
,
'thereby'
,
'therefore'
,
'therein'
,
'thereupon'
,
'these'
,
'they'
,
'thick'
,
'thin'
,
'third'
,
'this'
,
'those'
,
'though'
,
'three'
,
'through'
,
'throughout'
,
'thru'
,
'thus'
,
'to'
,
'together'
,
'too'
,
'toward'
,
'towards'
,
'twelve'
,
'twenty'
,
'two'
,
'un'
,
'under'
,
'until'
,
'up'
,
'upon'
,
'us'
,
'very'
,
'via'
,
'was'
,
'we'
,
'well'
,
'were'
,
'what'
,
'whatever'
,
'when'
,
'whence'
,
'whenever'
,
'where'
,
'whereafter'
,
'whereas'
,
'whereby'
,
'wherein'
,
'whereupon'
,
'wherever'
,
'whether'
,
'which'
,
'while'
,
'whither'
,
'who'
,
'whoever'
,
'whole'
,
'whom'
,
'whose'
,
'why'
,
'will'
,
'with'
,
'within'
,
'without'
,
'would'
,
'yet'
,
'you'
,
'your'
,
'yours'
,
'yourself'
,
'yourselves'
,
)
stop_word_dict
=
{}
for
word
in
stop_words
:
stop_word_dict
[
word
]
=
None
lib/python/SearchIndex/PluggableIndex.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Pluggable Index Base Class """
__version__
=
'$Revision: 1.4 $'
[
11
:
-
2
]
import
Interface
class
PluggableIndex
:
"""Base pluggable index class"""
def
getEntryForObject
(
self
,
documentId
,
default
=
None
):
"""Get all information contained for a specific object by documentId"""
pass
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
"""Index an object:
'documentId' is the integer ID of the document
'obj' is the object to be indexed
'threshold' is the number of words to process between committing
subtransactions. If None, subtransactions are disabled"""
pass
def
unindex_object
(
self
,
documentId
):
"""Remove the documentId from the index"""
pass
def
uniqueValues
(
self
,
name
=
None
,
withLengths
=
0
):
"""Returns the unique values for name.
If 'withLengths' is true, returns a sequence of tuples of
(value, length)"""
pass
def
_apply_index
(
self
,
request
,
cid
=
''
):
"""Apply the index to query parameters given in the argument, request.
The argument should be a mapping object.
If the request does not contain the needed parametrs, then None is
returned.
If the request contains a parameter with the name of the column
+ "_usage", it is sniffed for information on how to handle applying
the index.
Otherwise two objects are returned. The first object is a ResultSet
containing the record numbers of the matching records. The second
object is a tuple containing the names of all data fields used."""
pass
PluggableIndexInterface
=
Interface
.
impliedInterface
(
PluggableIndex
)
PluggableIndex
.
__implements__
=
PluggableIndexInterface
lib/python/SearchIndex/README.txt
deleted
100644 → 0
View file @
2dc887a3
The SearchIndex package is deprecated since Zope 2.4
Instead use the re-factored modules in Products/PluginIndexes.
lib/python/SearchIndex/ResultList.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from
BTrees.IIBTree
import
IIBucket
from
BTrees.IIBTree
import
weightedIntersection
,
weightedUnion
,
difference
from
BTrees.OOBTree
import
OOSet
,
union
class
ResultList
:
def
__init__
(
self
,
d
,
words
,
index
,
TupleType
=
type
(())):
self
.
_index
=
index
if
type
(
words
)
is
not
OOSet
:
words
=
OOSet
(
words
)
self
.
_words
=
words
if
(
type
(
d
)
is
TupleType
):
d
=
IIBucket
((
d
,))
elif
type
(
d
)
is
not
IIBucket
:
d
=
IIBucket
(
d
)
self
.
_dict
=
d
self
.
__getitem__
=
d
.
__getitem__
try
:
self
.
__nonzero__
=
d
.
__nonzero__
except
:
pass
self
.
get
=
d
.
get
def
__nonzero__
(
self
):
return
not
not
self
.
_dict
def
bucket
(
self
):
return
self
.
_dict
def
keys
(
self
):
return
self
.
_dict
.
keys
()
def
has_key
(
self
,
key
):
return
self
.
_dict
.
has_key
(
key
)
def
items
(
self
):
return
self
.
_dict
.
items
()
def
__and__
(
self
,
x
):
return
self
.
__class__
(
weightedIntersection
(
self
.
_dict
,
x
.
_dict
)[
1
],
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
,
)
def
and_not
(
self
,
x
):
return
self
.
__class__
(
difference
(
self
.
_dict
,
x
.
_dict
),
self
.
_words
,
self
.
_index
,
)
def
__or__
(
self
,
x
):
return
self
.
__class__
(
weightedUnion
(
self
.
_dict
,
x
.
_dict
)[
1
],
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
,
)
return
self
.
__class__
(
result
,
self
.
_words
+
x
.
_words
,
self
.
_index
)
def
near
(
self
,
x
):
result
=
IIBucket
()
dict
=
self
.
_dict
xdict
=
x
.
_dict
xhas
=
xdict
.
has_key
positions
=
self
.
_index
.
positions
for
id
,
score
in
dict
.
items
():
if
not
xhas
(
id
):
continue
p
=
(
map
(
lambda
i
:
(
i
,
0
),
positions
(
id
,
self
.
_words
))
+
map
(
lambda
i
:
(
i
,
1
),
positions
(
id
,
x
.
_words
)))
p
.
sort
()
d
=
lp
=
9999
li
=
None
lsrc
=
None
for
i
,
src
in
p
:
if
i
is
not
li
and
src
is
not
lsrc
and
li
is
not
None
:
d
=
min
(
d
,
i
-
li
)
li
=
i
lsrc
=
src
if
d
==
lp
:
score
=
min
(
score
,
xdict
[
id
])
# synonyms
else
:
score
=
(
score
+
xdict
[
id
])
/
d
result
[
id
]
=
score
return
self
.
__class__
(
result
,
union
(
self
.
_words
,
x
.
_words
),
self
.
_index
)
lib/python/SearchIndex/Setup
deleted
100644 → 0
View file @
2dc887a3
*shared*
Splitter Splitter.c
lib/python/SearchIndex/Splitter.c
deleted
100644 → 0
View file @
2dc887a3
This diff is collapsed.
Click to expand it.
lib/python/SearchIndex/TextIndex.py
deleted
100644 → 0
View file @
2dc887a3
This diff is collapsed.
Click to expand it.
lib/python/SearchIndex/UnIndex.py
deleted
100644 → 0
View file @
2dc887a3
This diff is collapsed.
Click to expand it.
lib/python/SearchIndex/UnKeywordIndex.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from
UnIndex
import
UnIndex
from
zLOG
import
LOG
,
ERROR
from
types
import
StringType
from
BTrees.OOBTree
import
OOSet
,
difference
class
UnKeywordIndex
(
UnIndex
):
meta_type
=
'Keyword Index'
"""Like an UnIndex only it indexes sequences of items
Searches match any keyword.
This should have an _apply_index that returns a relevance score
"""
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
""" index an object 'obj' with integer id 'i'
Ideally, we've been passed a sequence of some sort that we
can iterate over. If however, we haven't, we should do something
useful with the results. In the case of a string, this means
indexing the entire string as a keyword."""
# First we need to see if there's anything interesting to look at
# self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable,
# we'll do so.
newKeywords
=
getattr
(
obj
,
self
.
id
,
())
if
callable
(
newKeywords
):
newKeywords
=
newKeywords
()
if
type
(
newKeywords
)
is
StringType
:
newKeywords
=
(
newKeywords
,
)
oldKeywords
=
self
.
_unindex
.
get
(
documentId
,
None
)
if
oldKeywords
is
None
:
# we've got a new document, let's not futz around.
try
:
for
kw
in
newKeywords
:
self
.
insertForwardIndexEntry
(
kw
,
documentId
)
self
.
_unindex
[
documentId
]
=
list
(
newKeywords
)
except
TypeError
:
return
0
else
:
# we have an existing entry for this document, and we need
# to figure out if any of the keywords have actually changed
if
type
(
oldKeywords
)
is
not
OOSet
:
oldKeywords
=
OOSet
(
oldKeywords
)
newKeywords
=
OOSet
(
newKeywords
)
fdiff
=
difference
(
oldKeywords
,
newKeywords
)
rdiff
=
difference
(
newKeywords
,
oldKeywords
)
if
fdiff
or
rdiff
:
# if we've got forward or reverse changes
self
.
_unindex
[
documentId
]
=
list
(
newKeywords
)
if
fdiff
:
self
.
unindex_objectKeywords
(
documentId
,
fdiff
)
if
rdiff
:
for
kw
in
rdiff
:
self
.
insertForwardIndexEntry
(
kw
,
documentId
)
return
1
def
unindex_objectKeywords
(
self
,
documentId
,
keywords
):
""" carefully unindex the object with integer id 'documentId'"""
if
keywords
is
not
None
:
for
kw
in
keywords
:
self
.
removeForwardIndexEntry
(
kw
,
documentId
)
def
unindex_object
(
self
,
documentId
):
""" carefully unindex the object with integer id 'documentId'"""
keywords
=
self
.
_unindex
.
get
(
documentId
,
None
)
self
.
unindex_objectKeywords
(
documentId
,
keywords
)
try
:
del
self
.
_unindex
[
documentId
]
except
KeyError
:
LOG
(
'UnKeywordIndex'
,
ERROR
,
'Attempt to unindex nonexistent'
' document id %s'
%
documentId
)
lib/python/SearchIndex/UnTextIndex.py
deleted
100644 → 0
View file @
2dc887a3
This diff is collapsed.
Click to expand it.
lib/python/SearchIndex/__init__.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__
=
'''Collected utilities to support database indexing.
$Id: __init__.py,v 1.10 2002/08/14 21:46:24 mj Exp $'''
__version__
=
'$Revision: 1.10 $'
[
11
:
-
2
]
import
warnings
warnings
.
warn
(
"The usage of the SearchIndex package is deprecated since
\
Zope 2.4.
\
n
\
This package is only kept for backwards compatibility for a while
\
n
\
and will go away in a future release.
\
n
\
\
n
\
Please use instead the re-factored modules in Products/PluginIndexes.
\
n
\
"
,
DeprecationWarning
)
lib/python/SearchIndex/randid.py
deleted
100644 → 0
View file @
2dc887a3
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
import
whrandom
def
randid
(
randint
=
whrandom
.
randint
,
choice
=
whrandom
.
choice
,
signs
=
(
-
1
,
1
)):
return
choice
(
signs
)
*
randint
(
1
,
2000000000
)
del
whrandom
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment