Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
3d88c027
Commit
3d88c027
authored
Jul 30, 2001
by
Evan Simpson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Merge TextIndex fixes from 2.4 branch
parent
233671d4
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
629 additions
and
403 deletions
+629
-403
lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
...ython/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
+31
-26
lib/python/Products/PluginIndexes/TextIndex/TextIndex.py
lib/python/Products/PluginIndexes/TextIndex/TextIndex.py
+130
-125
lib/python/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
...n/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
+313
-0
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+23
-19
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+100
-100
lib/python/SearchIndex/tests/testUnTextIndex.py
lib/python/SearchIndex/tests/testUnTextIndex.py
+32
-133
No files found.
lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
View file @
3d88c027
...
...
@@ -85,7 +85,7 @@
from
Lexicon
import
Lexicon
import
Splitter
from
Products.PluginIndexes.TextIndex.TextIndex
import
Or
from
TextIndex
import
Or
,
Op
import
re
,
string
...
...
@@ -147,14 +147,12 @@ class GlobbingLexicon(Lexicon):
def
createDigrams
(
self
,
word
):
"""Returns a list with the set of digrams in the word."""
digrams
=
[]
digrams
.
append
(
self
.
eow
+
word
[
0
])
# Mark the beginning
for
i
in
range
(
1
,
len
(
word
)):
digrams
.
append
(
word
[
i
-
1
:
i
+
1
])
digrams
=
list
(
word
)
digrams
.
append
(
self
.
eow
)
last
=
self
.
eow
digrams
[
-
1
]
=
digrams
[
-
1
]
+
self
.
eow
# Mark the end
for
i
in
range
(
len
(
digrams
)):
last
,
digrams
[
i
]
=
digrams
[
i
],
last
+
digrams
[
i
]
return
digrams
...
...
@@ -269,21 +267,30 @@ class GlobbingLexicon(Lexicon):
def
query_hook
(
self
,
q
):
"""expand wildcards"""
words
=
[]
for
w
in
q
:
if
(
(
self
.
multi_wc
in
w
)
or
(
self
.
single_wc
in
w
)
):
wids
=
self
.
get
(
w
)
ListType
=
type
([])
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
self
.
query_hook
(
e
)
elif
isinstance
(
e
,
Op
):
pass
elif
(
(
self
.
multi_wc
in
e
)
or
(
self
.
single_wc
in
e
)
):
wids
=
self
.
get
(
e
)
words
=
[]
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
wid
)
else
:
words
.
append
(
w
)
if
not
words
:
# if words is empty, return something that will make
# textindex's __getitem__ return an empty result list
words
.
append
(
''
)
q
[
i
]
=
words
i
=
i
-
1
# if words is empty, return something that will make textindex's
# __getitem__ return an empty result list
return
words
or
[
''
]
return
q
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
...
...
@@ -300,18 +307,16 @@ class GlobbingLexicon(Lexicon):
There is no way to quote meta-characters.
"""
# Remove characters that are meaningful in a regex
transTable
=
string
.
maketrans
(
""
,
""
)
result
=
string
.
translate
(
pat
,
transTable
,
r'()&|!@#$%^{}\
<>.
')
# First, deal with mu
tl
i-character globbing
result
=
string
.
replace
(
pa
t
,
'*'
,
'.*'
)
# First, deal with mu
lt
i-character globbing
result = string.replace(
resul
t, '
*
', '
.
*
')
# Next, we need to deal with single-character globbing
result
=
string
.
replace
(
result
,
'?'
,
'.?'
)
# Now, we need to remove all of the characters that
# are forbidden.
result
=
string
.
translate
(
result
,
transTable
,
r'()&|!@#$%^{}\
<>
')
result = string.replace(result, '
?
', '
.
')
return "%s$" % result
...
...
lib/python/Products/PluginIndexes/TextIndex/TextIndex.py
View file @
3d88c027
...
...
@@ -85,13 +85,9 @@
"""Text Index
The TextIndex falls under the 'I didnt have a better name for it'
excuse. It is an 'Un' Text index because it stores a little bit of
undo information so that objects can be unindexed when the old value
is no longer known.
"""
__version__
=
'$Revision: 1.
9
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.
10
$'
[
11
:
-
2
]
import
string
,
re
...
...
@@ -113,12 +109,21 @@ from Lexicon import Lexicon
from
types
import
*
AndNot
=
'andnot'
And
=
'and'
Or
=
'or'
Near
=
'...'
class
Op
:
def
__init__
(
self
,
name
):
self
.
name
=
name
def
__repr__
(
self
):
return
self
.
name
__str__
=
__repr__
AndNot
=
Op
(
'andnot'
)
And
=
Op
(
'and'
)
Or
=
Op
(
'or'
)
Near
=
Op
(
'...'
)
QueryError
=
'TextIndex.QueryError'
operator_dict
=
{
'andnot'
:
AndNot
,
'and'
:
And
,
'or'
:
Or
,
'...'
:
Near
,
'near'
:
Near
,
AndNot
:
AndNot
,
And
:
And
,
Or
:
Or
,
Near
:
Near
}
class
TextIndex
(
PluggableIndex
.
PluggableIndex
,
Persistent
,
Implicit
,
SimpleItem
):
...
...
@@ -176,8 +181,6 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
# Default text index operator (should be visible to ZMI)
self
.
operators
=
{
'andnot'
:
AndNot
,
'and'
:
And
,
'near'
:
Near
,
'or'
:
Or
}
self
.
useOperator
=
'or'
self
.
clear
()
...
...
@@ -508,10 +511,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
# Changed for 2.4
# We use the default operator that can me managed via the ZMI
query_operator
=
record
.
get
(
'operator'
,
self
.
useOperator
)
if
not
query_operator
in
self
.
operators
.
keys
():
raise
exceptions
.
RuntimeError
,
"Invalid operator '%s' for a TextIndex"
\
%
query_operator
qop
=
record
.
get
(
'operator'
,
self
.
useOperator
)
# We keep this for pre-2.4 compatibility
# This stinking code should go away somewhere. A global
...
...
@@ -520,10 +520,16 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
# should be specified on a per-index base
if
request
.
has_key
(
'textindex_operator'
):
query_operator
=
request
[
'textindex_operator'
]
warnings
.
warn
(
"The usage of the 'textindex_operator' is no longer recommended.
\
n
"
\
"Please use a mapping object and the 'operator' to specify the operator"
)
qop
=
request
[
'textindex_operator'
]
warnings
.
warn
(
"The usage of the 'textindex_operator' "
"is no longer recommended.
\
n
"
"Please use a mapping object and the "
"'operator' key to specify the operator."
)
query_operator
=
operator_dict
.
get
(
qop
)
if
query_operator
is
None
:
raise
exceptions
.
RuntimeError
,
(
"Invalid operator '%s' "
"for a TextIndex"
%
qop
)
r
=
None
for
key
in
record
.
keys
:
...
...
@@ -572,29 +578,37 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
def
query
(
self
,
s
,
default_operator
=
Or
,
ws
=
(
string
.
whitespace
,)):
""" This is called by TextIndexes. A 'query term' which is a
string 's' is passed in, along with an index object. s is
parsed, then the wildcards are parsed, then something is
parsed again, then the whole thing is 'evaluated'. """
def
query
(
self
,
s
,
default_operator
=
Or
):
""" Evaluate a query string.
Convert the query string into a data structure of nested lists
and strings, based on the grouping of whitespace-separated
strings by parentheses and quotes. The 'Near' operator is
inserted between the strings of a quoted group.
The Lexicon is given the opportunity to transform the
data structure. Stemming, wildcards, and translation are
possible Lexicon services.
Finally, the query list is normalized so that it and every
sub-list consist of non-operator strings or lists separated
by operators. This list is evaluated.
"""
# First replace any occurences of " and not " with " andnot "
s
=
re
.
sub
(
'[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+'
%
(
ws
*
3
),
' andnot '
,
s
)
s
=
re
.
sub
(
'(?i)
\
s+
a
nd
\
s*
n
ot
\
s+
'
, '
andnot
', s)
#
do some parsing
#
Parse parentheses and quotes
q = parse(s)
## here, we give lexicons a chance to transform the query.
## For example, substitute wildcards, or translate words into
## various languages.
# Allow the Lexicon to process the query
q = self.getLexicon().query_hook(q)
# do some more parsing
# Insert the default operator between any two search terms not
# already joined by an operator.
q = parse2(q, default_operator)
#
#
evalute the final 'expression'
# evalute the final '
expression
'
return self.evaluate(q)
...
...
@@ -629,22 +643,20 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
def evaluate(self, query):
"""Evaluate a parsed query"""
# There are two options if the query passed in is only one
# item. It means either it's an embedded query, in which case
# we'll recursively evaluate, other wise it's nothing for us
# to evaluate, and we just get the results and return them.
if
(
len
(
query
)
==
1
):
if
(
type
(
query
[
0
])
is
ListType
):
return
self
.
evaluate
(
query
[
0
])
# Strip off meaningless layers
while isinstance(query, ListType) and len(query) == 1:
query = query[0]
return
self
[
query
[
0
]]
# __getitem__
# If it'
s
not
a
list
,
assume
a
string
or
number
if
not
isinstance
(
query
,
ListType
):
return
self
[
query
]
# Now we need to loop through the query and
expand out
# Now we need to loop through the query and
reduce
# operators. They are currently evaluated in the following
# order: AndNot
e
-> And -> Or -> Near
# order: AndNot -> And -> Or -> Near
i
=
0
while
(
i
<
len
(
query
)):
if
query
[
i
]
==
AndNot
:
if
query
[
i
]
is
AndNot
:
left
,
right
=
self
.
get_operands
(
query
,
i
)
val
=
left
.
and_not
(
right
)
query
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
...
...
@@ -652,7 +664,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
i
=
0
while
(
i
<
len
(
query
)):
if
query
[
i
]
==
And
:
if
query
[
i
]
is
And
:
left
,
right
=
self
.
get_operands
(
query
,
i
)
val
=
left
&
right
query
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
...
...
@@ -660,7 +672,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
i
=
0
while
(
i
<
len
(
query
)):
if
query
[
i
]
==
Or
:
if
query
[
i
]
is
Or
:
left
,
right
=
self
.
get_operands
(
query
,
i
)
val
=
left
|
right
query
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
...
...
@@ -668,14 +680,15 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
i
=
0
while
(
i
<
len
(
query
)):
if
query
[
i
]
==
Near
:
if
query
[
i
]
is
Near
:
left
,
right
=
self
.
get_operands
(
query
,
i
)
val
=
left
.
near
(
right
)
query
[(
i
-
1
)
:
(
i
+
2
)]
=
[
val
]
else
:
i
=
i
+
1
if
(
len
(
query
)
!=
1
):
raise
QueryError
,
"Malformed query"
if
(
len
(
query
)
!=
1
):
import
pdb
;
pdb
.
set_trace
()
raise
QueryError
,
"Malformed query"
return
query
[
0
]
...
...
@@ -706,101 +719,93 @@ def parse(s):
l
=
[]
tmp
=
string
.
lower
(
s
)
while
(
1
):
p
=
parens
(
tmp
)
if
(
p
is
None
):
# No parentheses found. Look for quotes then exit.
l
=
l
+
quotes
(
tmp
)
break
else
:
# Look for quotes in the section of the string before
# the parentheses, then parse the string inside the parens
l
=
l
+
quotes
(
tmp
[:(
p
[
0
]
-
1
)])
l
.
append
(
parse
(
tmp
[
p
[
0
]
:
p
[
1
]]))
p
=
parens
(
tmp
)
while
p
is
not
None
:
# Look for quotes in the section of the string before
# the parentheses, then parse the string inside the parens
l
=
l
+
quotes
(
p
[
0
])
l
.
append
(
parse
(
p
[
1
]))
# continue looking through the rest of the string
tmp
=
tmp
[(
p
[
1
]
+
1
):]
# continue looking through the rest of the string
tmp
=
p
[
2
]
p
=
parens
(
tmp
)
return
l
return
l
+
quotes
(
tmp
)
def
parse2
(
q
,
default_operator
,
operator_dict
=
{
AndNot
:
AndNot
,
And
:
And
,
Or
:
Or
,
Near
:
Near
}):
def
parse2
(
q
,
default_operator
,
operator_dict
=
operator_dict
):
"""Find operators and operands"""
i
=
0
isop
=
operator_dict
.
has_key
while
(
i
<
len
(
q
)):
if
(
type
(
q
[
i
])
is
ListType
):
q
[
i
]
=
parse2
(
q
[
i
],
default_operator
)
# every other item, starting with the first, should be an operand
if
((
i
%
2
)
!=
0
):
# This word should be an operator; if it is not, splice in
# the default operator.
if
type
(
q
[
i
])
is
not
ListType
and
isop
(
q
[
i
]):
q
[
i
]
=
operator_dict
[
q
[
i
]]
else
:
q
[
i
:
i
]
=
[
default_operator
]
i
=
i
+
1
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
q
[
i
]
=
parse2
(
e
,
default_operator
)
if
i
%
2
:
q
.
insert
(
i
,
default_operator
)
elif
i
%
2
:
# This element should be an operator
if
isop
(
e
):
# Ensure that it is identical, not merely equal.
q
[
i
]
=
operator_dict
[
e
]
else
:
# Insert the default operator.
q
.
insert
(
i
,
default_operator
)
i
=
i
-
1
return
q
def
parens
(
s
,
parens_re
=
re
.
compile
(
'[
\
(
\
)]'
).
search
):
index
=
open_index
=
paren_count
=
0
while
1
:
mo
=
parens_re
(
s
,
index
)
if
mo
is
None
:
break
def
parens
(
s
,
parens_re
=
re
.
compile
(
'[()]'
).
search
):
mo
=
parens_re
(
s
)
if
mo
is
None
:
return
open_index
=
mo
.
start
(
0
)
+
1
paren_count
=
0
while
mo
is
not
None
:
index
=
mo
.
start
(
0
)
if
s
[
index
]
==
'('
:
paren_count
=
paren_count
+
1
if
open_index
==
0
:
open_index
=
index
+
1
else
:
paren_count
=
paren_count
-
1
if
paren_count
==
0
:
return
(
s
[:
open_index
-
1
],
s
[
open_index
:
index
],
s
[
index
+
1
:])
if
paren_count
<
0
:
break
mo
=
parens_re
(
s
,
index
+
1
)
if
paren_count
==
0
:
return
open_index
,
index
else
:
index
=
index
+
1
if
paren_count
==
0
:
# No parentheses Found
return
None
else
:
raise
QueryError
,
"Mismatched parentheses"
raise
QueryError
,
"Mismatched parentheses"
def
quotes
(
s
,
ws
=
(
string
.
whitespace
,)):
# split up quoted regions
splitted
=
re
.
split
(
'[%s]*
\
"
[%s]*'
%
(
ws
*
2
),
s
)
split
=
string
.
split
if
(
len
(
splitted
)
>
1
):
if
((
len
(
splitted
)
%
2
)
==
0
):
raise
QueryError
,
"Mismatched quotes"
def
quotes
(
s
):
split
=
string
.
split
if
'"'
not
in
s
:
return
split
(
s
)
for
i
in
range
(
1
,
len
(
splitted
),
2
):
# split the quoted region into words
splitted
[
i
]
=
filter
(
None
,
split
(
splitted
[
i
]))
# put the Proxmity operator in between quoted words
for
j
in
range
(
1
,
len
(
splitted
[
i
])):
splitted
[
i
][
j
:
j
]
=
[
Near
]
for
i
in
range
(
len
(
splitted
)
-
1
,
-
1
,
-
2
):
# split the non-quoted region into words
splitted
[
i
:
i
+
1
]
=
filter
(
None
,
split
(
splitted
[
i
]))
splitted
=
filter
(
None
,
splitted
)
else
:
# No quotes, so just split the string into words
splitted
=
filter
(
None
,
split
(
s
))
return
splitted
# split up quoted regions
splitted
=
re
.
split
(
'
\
s*
\
"
\
s*
'
, s)
if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
for i in range(1,len(splitted),2):
# split the quoted region into words
words = splitted[i] = split(splitted[i])
# put the Proxmity operator in between quoted words
j = len(words) - 1
while j > 0:
words.insert(j, Near)
j = j - 1
i = len(splitted) - 1
while i >= 0:
# split the non-quoted region into words
splitted[i:i+1] = split(splitted[i])
i = i - 2
return filter(None, splitted)
manage_addTextIndexForm = DTMLFile('
dtml
/
addTextIndex
', globals())
...
...
lib/python/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
0 → 100644
View file @
3d88c027
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
#
# Copyright (c) Digital Creations. All rights reserved.
#
# This license has been certified as Open Source(tm).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# violation to remove this button, it is requested that the
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
#
#
# Disclaimer
#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
import
sys
,
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
sys
.
path
[
0
],
'..'
))
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
try
:
import
unittest
except
:
sys
.
path
[
0
]
=
os
.
path
.
join
(
sys
.
path
[
0
],
'..'
,
'..'
,
'..'
)
import
unittest
print
sys
.
path
class
Dummy
:
def
__init__
(
self
,
**
kw
):
self
.
__dict__
.
update
(
kw
)
import
zLOG
def
log_write
(
subsystem
,
severity
,
summary
,
detail
,
error
):
if
severity
>=
zLOG
.
PROBLEM
:
assert
0
,
"%s(%s): %s"
%
(
subsystem
,
severity
,
summary
)
zLOG
.
log_write
=
log_write
import
ZODB
,
ZODB
.
DemoStorage
,
ZODB
.
FileStorage
import
TextIndex
import
GlobbingLexicon
class
Tests
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
index
=
TextIndex
.
TextIndex
(
'text'
)
self
.
doc
=
Dummy
(
text
=
'this is the time, when all good zopes'
)
def
dbopen
(
self
):
n
=
'fs_tmp__%s'
%
os
.
getpid
()
s
=
ZODB
.
FileStorage
.
FileStorage
(
n
)
db
=
self
.
db
=
ZODB
.
DB
(
s
)
self
.
jar
=
db
.
open
()
if
not
self
.
jar
.
root
().
has_key
(
'index'
):
self
.
jar
.
root
()[
'index'
]
=
TextIndex
.
TextIndex
(
'text'
)
get_transaction
().
commit
()
return
self
.
jar
.
root
()[
'index'
]
def
dbclose
(
self
):
self
.
jar
.
close
()
self
.
db
.
close
()
del
self
.
jar
del
self
.
db
def
tearDown
(
self
):
get_transaction
().
abort
()
if
hasattr
(
self
,
'jar'
):
self
.
dbclose
()
os
.
system
(
'rm -f fs_tmp__*'
)
def
checkSimpleAddDelete
(
self
):
"Check that we can add and delete an object without error"
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
index_object
(
1
,
self
.
doc
)
self
.
doc
.
text
=
'spam is good, spam is fine, span span span'
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
unindex_object
(
0
)
def
checkPersistentUpdate1
(
self
):
"Check simple persistent indexing"
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
],
r
def
checkPersistentUpdate2
(
self
):
"Check less simple persistent indexing"
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'the next task is to test'
index
.
index_object
(
3
,
self
.
doc
)
get_transaction
().
commit
()
self
.
doc
.
text
=
'time time'
index
.
index_object
(
2
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
2
],
r
sample_texts
=
[
"""This is the time for all good men to come to
the aid of their country"""
,
"""ask not what your country can do for you,
ask what you can do for your country"""
,
"""Man, I can't wait to get to Montross!"""
,
"""Zope Public License (ZPL) Version 1.0"""
,
"""Copyright (c) Digital Creations. All rights reserved."""
,
"""This license has been certified as Open Source(tm)."""
,
"""I hope I get to work on time"""
,
]
def
globTest
(
self
,
qmap
,
rlist
):
"Check a glob query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
list
(
index
.
_apply_index
(
qmap
)[
0
].
keys
())
assert
r
==
rlist
,
r
return
index
.
_apply_index
def
checkStarQuery
(
self
):
"Check a star query"
self
.
globTest
({
'text'
:
'm*n'
},
[
0
,
2
])
def
checkAndQuery
(
self
):
"Check an AND query"
self
.
globTest
({
'text'
:
'time and country'
},
[
0
,])
def
checkOrQuery
(
self
):
"Check an OR query"
self
.
globTest
({
'text'
:
'time or country'
},
[
0
,
1
,
6
])
def
checkDefOrQuery
(
self
):
"Check a default OR query"
self
.
globTest
({
'text'
:
'time country'
},
[
0
,
1
,
6
])
def
checkNearQuery
(
self
):
"""Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)"""
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
self
.
globTest
({
'text'
:
'time ... country'
},
[
0
,])
def
checkQuotesQuery
(
self
):
"""Check a quoted query"""
ai
=
self
.
globTest
({
'text'
:
'"This is the time"'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
'"now is the time"'
})[
0
].
keys
())
assert
r
==
[],
r
def
checkAndNotQuery
(
self
):
"Check an ANDNOT query"
self
.
globTest
({
'text'
:
'time and not country'
},
[
6
,])
def
checkParenMatchingQuery
(
self
):
"Check a query with parens"
ai
=
self
.
globTest
({
'text'
:
'(time and country) men'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
'(time and not country) or men'
})[
0
].
keys
())
assert
r
==
[
0
,
6
],
r
def
checkTextIndexOperatorQuery
(
self
):
"Check a query with 'operator' in the request"
self
.
globTest
({
'text'
:
{
'query'
:
'time men'
,
'operator'
:
'and'
}},
[
0
,])
def
checkNonExistentWord
(
self
):
""" Check for nonexistent word """
self
.
globTest
({
'text'
:
'zop'
},
[])
def
checkComplexQuery1
(
self
):
""" Check complex query 1 """
self
.
globTest
({
'text'
:
'((?ount* or get) and not wait) '
'"been *ert*"'
},
[
0
,
1
,
5
,
6
])
def
test_suite
():
return
unittest
.
makeSuite
(
Tests
,
'check'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
())
def
debug
():
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
lib/python/SearchIndex/GlobbingLexicon.py
View file @
3d88c027
...
...
@@ -267,21 +267,28 @@ class GlobbingLexicon(Lexicon):
def
query_hook
(
self
,
q
):
"""expand wildcards"""
words
=
[]
for
w
in
q
:
if
(
(
self
.
multi_wc
in
w
)
or
(
self
.
single_wc
in
w
)
):
wids
=
self
.
get
(
w
)
ListType
=
type
([])
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
self
.
query_hook
(
e
)
elif
(
(
self
.
multi_wc
in
e
)
or
(
self
.
single_wc
in
e
)
):
wids
=
self
.
get
(
e
)
words
=
[]
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
wid
)
else
:
words
.
append
(
w
)
if
not
words
:
# if words is empty, return something that will make
# textindex's __getitem__ return an empty result list
words
.
append
(
''
)
q
[
i
]
=
words
i
=
i
-
1
# if words is empty, return something that will make textindex's
# __getitem__ return an empty result list
return
words
or
[
''
]
return
q
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
...
...
@@ -298,19 +305,16 @@ class GlobbingLexicon(Lexicon):
There is no way to quote meta-characters.
"""
# Remove characters that are meaningful in a regex
transTable
=
string
.
maketrans
(
""
,
""
)
result
=
string
.
translate
(
pat
,
transTable
,
r'()&|!@#$%^{}\
<>.
')
# First, deal with mu
tl
i-character globbing
result
=
string
.
replace
(
pa
t
,
'*'
,
'.*'
)
# First, deal with mu
lt
i-character globbing
result = string.replace(
resul
t, '
*
', '
.
*
')
# Next, we need to deal with single-character globbing
result
=
string
.
replace
(
result
,
'?'
,
'.?'
)
# Now, we need to remove all of the characters that
# are forbidden.
result
=
string
.
translate
(
result
,
transTable
,
r'()&|!@#$%^{}\
<>
')
result = string.replace(result, '
?
', '
.
')
return "%s$" % result
lib/python/SearchIndex/UnTextIndex.py
View file @
3d88c027
...
...
@@ -91,7 +91,7 @@ undo information so that objects can be unindexed when the old value
is no longer known.
"""
__version__
=
'$Revision: 1.
49
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.
50
$'
[
11
:
-
2
]
import
string
,
re
...
...
@@ -428,7 +428,7 @@ class UnTextIndex(Persistent, Implicit):
and a String. Strings are looked up in the lexicon, whereas
Integers are assumed to be resolved word ids. """
if
type
(
word
)
is
IntType
:
if
isinstance
(
word
,
IntType
)
:
# We have a word ID
result
=
self
.
_index
.
get
(
word
,
{})
return
ResultList
(
result
,
(
word
,),
self
)
...
...
@@ -440,7 +440,7 @@ class UnTextIndex(Persistent, Implicit):
if
len
(
splitSource
)
==
1
:
splitSource
=
splitSource
[
0
]
if
splitSource
[:
1
]
==
'"'
and
splitSource
[
-
1
:]
==
'"'
:
if
splitSource
[:
1
]
==
splitSource
[
-
1
:]
==
'"'
:
return
self
[
splitSource
]
wids
=
self
.
getLexicon
(
self
.
_lexicon
).
get
(
splitSource
)
...
...
@@ -551,28 +551,37 @@ class UnTextIndex(Persistent, Implicit):
def
query
(
self
,
s
,
default_operator
=
Or
,
ws
=
(
string
.
whitespace
,)):
""" This is called by TextIndexes. A 'query term' which is a
string 's' is passed in, along with an index object. s is
parsed, then the wildcards are parsed, then something is
parsed again, then the whole thing is 'evaluated'. """
def
query
(
self
,
s
,
default_operator
=
Or
):
""" Evaluate a query string.
Convert the query string into a data structure of nested lists
and strings, based on the grouping of whitespace-separated
strings by parentheses and quotes. The 'Near' operator is
inserted between the strings of a quoted group.
The Lexicon is given the opportunity to transform the
data structure. Stemming, wildcards, and translation are
possible Lexicon services.
Finally, the query list is normalized so that it and every
sub-list consist of non-operator strings or lists separated
by operators. This list is evaluated.
"""
# First replace any occurences of " and not " with " andnot "
s
=
re
.
sub
(
'[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+'
%
(
ws
*
3
),
' andnot '
,
s
)
s
=
re
.
sub
(
'(?i)
\
s+
a
nd
\
s*
n
ot
\
s+
'
, '
andnot
', s)
#
do some parsing
#
Parse parentheses and quotes
q = parse(s)
## here, we give lexicons a chance to transform the query.
## For example, substitute wildcards, or translate words into
## various languages.
# Allow the Lexicon to process the query
q = self.getLexicon(self._lexicon).query_hook(q)
# do some more parsing
# Insert the default operator between any two search terms not
# already joined by an operator.
q = parse2(q, default_operator)
#
#
evalute the final 'expression'
# evalute the final '
expression
'
return self.evaluate(q)
...
...
@@ -605,19 +614,17 @@ class UnTextIndex(Persistent, Implicit):
def evaluate(self, query):
"""Evaluate a parsed query"""
# There are two options if the query passed in is only one
# item. It means either it's an embedded query, in which case
# we'll recursively evaluate, other wise it's nothing for us
# to evaluate, and we just get the results and return them.
if
(
len
(
query
)
==
1
):
if
(
type
(
query
[
0
])
is
ListType
):
return
self
.
evaluate
(
query
[
0
])
# Strip off meaningless layers
while isinstance(query, ListType) and len(query) == 1:
query = query[0]
return
self
[
query
[
0
]]
# __getitem__
# If it'
s
not
a
list
,
assume
a
string
or
number
if
not
isinstance
(
query
,
ListType
):
return
self
[
query
]
# Now we need to loop through the query and
expand out
# Now we need to loop through the query and
reduce
# operators. They are currently evaluated in the following
# order: AndNot
e
-> And -> Or -> Near
# order: AndNot -> And -> Or -> Near
i
=
0
while
(
i
<
len
(
query
)):
if
query
[
i
]
is
AndNot
:
...
...
@@ -660,98 +667,91 @@ def parse(s):
l
=
[]
tmp
=
string
.
lower
(
s
)
while
(
1
):
p
=
parens
(
tmp
)
p
=
parens
(
tmp
)
while
p
is
not
None
:
# Look for quotes in the section of the string before
# the parentheses, then parse the string inside the parens
l
=
l
+
quotes
(
p
[
0
])
l
.
append
(
parse
(
p
[
1
]))
if
(
p
is
None
):
# No parentheses found. Look for quotes then exit.
l
=
l
+
quotes
(
tmp
)
break
else
:
# Look for quotes in the section of the string before
# the parentheses, then parse the string inside the parens
l
=
l
+
quotes
(
tmp
[:(
p
[
0
]
-
1
)])
l
.
append
(
parse
(
tmp
[
p
[
0
]
:
p
[
1
]]))
# continue looking through the rest of the string
tmp
=
tmp
[(
p
[
1
]
+
1
):]
# continue looking through the rest of the string
tmp
=
p
[
2
]
p
=
parens
(
tmp
)
return
l
return
l
+
quotes
(
tmp
)
def
parse2
(
q
,
default_operator
,
operator_dict
=
{
AndNot
:
AndNot
,
And
:
And
,
Or
:
Or
,
Near
:
Near
}):
"""Find operators and operands"""
i
=
0
isop
=
operator_dict
.
has_key
while
(
i
<
len
(
q
)):
if
(
type
(
q
[
i
])
is
ListType
):
q
[
i
]
=
parse2
(
q
[
i
],
default_operator
)
# every other item, starting with the first, should be an operand
if
((
i
%
2
)
!=
0
):
# This word should be an operator; if it is not, splice in
# the default operator.
if
type
(
q
[
i
])
is
not
ListType
and
isop
(
q
[
i
]):
q
[
i
]
=
operator_dict
[
q
[
i
]]
else
:
q
[
i
:
i
]
=
[
default_operator
]
i
=
i
+
1
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
q
[
i
]
=
parse2
(
e
,
default_operator
)
if
i
%
2
:
q
.
insert
(
i
,
default_operator
)
elif
i
%
2
:
# This element should be an operator
if
isop
(
e
):
# Ensure that it is identical, not merely equal.
q
[
i
]
=
operator_dict
[
e
]
else
:
# Insert the default operator.
q
.
insert
(
i
,
default_operator
)
i
=
i
-
1
return
q
def
parens
(
s
,
parens_re
=
re
.
compile
(
'[
\
(
\
)]'
).
search
):
index
=
open_index
=
paren_count
=
0
while
1
:
mo
=
parens_re
(
s
,
index
)
if
mo
is
None
:
break
def
parens
(
s
,
parens_re
=
re
.
compile
(
'[()]'
).
search
):
mo
=
parens_re
(
s
)
if
mo
is
None
:
return
open_index
=
mo
.
start
(
0
)
+
1
paren_count
=
0
while
mo
is
not
None
:
index
=
mo
.
start
(
0
)
if
s
[
index
]
==
'('
:
paren_count
=
paren_count
+
1
if
open_index
==
0
:
open_index
=
index
+
1
else
:
paren_count
=
paren_count
-
1
if
paren_count
==
0
:
return
(
s
[:
open_index
-
1
],
s
[
open_index
:
index
],
s
[
index
+
1
:])
if
paren_count
<
0
:
break
mo
=
parens_re
(
s
,
index
+
1
)
if
paren_count
==
0
:
return
open_index
,
index
else
:
index
=
index
+
1
if
paren_count
==
0
:
# No parentheses Found
return
None
else
:
raise
QueryError
,
"Mismatched parentheses"
raise
QueryError
,
"Mismatched parentheses"
def
quotes
(
s
,
ws
=
(
string
.
whitespace
,)):
# split up quoted regions
splitted
=
re
.
split
(
'[%s]*
\
"
[%s]*'
%
(
ws
*
2
),
s
)
split
=
string
.
split
if
(
len
(
splitted
)
>
1
):
if
((
len
(
splitted
)
%
2
)
==
0
):
raise
QueryError
,
"Mismatched quotes"
def
quotes
(
s
):
split
=
string
.
split
if
'"'
not
in
s
:
return
split
(
s
)
for
i
in
range
(
1
,
len
(
splitted
),
2
):
# split the quoted region into words
splitted
[
i
]
=
filter
(
None
,
split
(
splitted
[
i
]))
# put the Proxmity operator in between quoted words
for
j
in
range
(
1
,
len
(
splitted
[
i
])):
splitted
[
i
][
j
:
j
]
=
[
Near
]
for
i
in
range
(
len
(
splitted
)
-
1
,
-
1
,
-
2
):
# split the non-quoted region into words
splitted
[
i
:
i
+
1
]
=
filter
(
None
,
split
(
splitted
[
i
]))
splitted
=
filter
(
None
,
splitted
)
else
:
# No quotes, so just split the string into words
splitted
=
filter
(
None
,
split
(
s
))
return
splitted
# split up quoted regions
splitted
=
re
.
split
(
'
\
s*
\
"
\
s*
'
, s)
if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
for i in range(1,len(splitted),2):
# split the quoted region into words
words = splitted[i] = split(splitted[i])
# put the Proxmity operator in between quoted words
j = len(words) - 1
while j > 0:
words.insert(j, Near)
j = j - 1
i = len(splitted) - 1
while i >= 0:
# split the non-quoted region into words
splitted[i:i+1] = split(splitted[i])
i = i - 2
return filter(None, splitted)
lib/python/SearchIndex/tests/testUnTextIndex.py
View file @
3d88c027
...
...
@@ -217,8 +217,8 @@ class Tests(unittest.TestCase):
"""This license has been certified as Open Source(tm)."""
,
"""I hope I get to work on time"""
,
]
def
checkGlobQuery
(
self
):
def
globTest
(
self
,
qmap
,
rlist
):
"Check a glob query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
...
...
@@ -232,162 +232,61 @@ class Tests(unittest.TestCase):
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'm*n'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
2
],
r
r
=
list
(
index
.
_apply_index
(
qmap
)[
0
].
keys
())
assert
r
==
rlist
,
r
return
index
.
_apply_index
def
checkStarQuery
(
self
):
"Check a star query"
self
.
globTest
({
'text'
:
'm*n'
},
[
0
,
2
])
def
checkAndQuery
(
self
):
"Check an AND query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time and country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,],
r
self
.
globTest
({
'text'
:
'time and country'
},
[
0
,])
def
checkOrQuery
(
self
):
"Check an OR query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
self
.
globTest
({
'text'
:
'time or country'
},
[
0
,
1
,
6
])
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time or country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
6
],
r
def
checkDefOrQuery
(
self
):
"Check a default OR query"
self
.
globTest
({
'text'
:
'time country'
},
[
0
,
1
,
6
])
def
checkNearQuery
(
self
):
"""Check a NEAR query.. (NOTE:ACTUALLY AN 'OR' TEST!!)"""
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean OR
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
"""Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)"""
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
self
.
globTest
({
'text'
:
'time ... country'
},
[
0
,])
self
.
dbclose
()
index
=
self
.
dbopen
(
)
def
checkQuotesQuery
(
self
):
"""Check a quoted query"""
ai
=
self
.
globTest
({
'text'
:
'"This is the time"'
},
[
0
,]
)
r
=
index
.
_apply_index
({
'text'
:
'time near country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
6
],
r
r
=
list
(
ai
({
'text'
:
'"now is the time"'
})[
0
].
keys
())
assert
r
==
[],
r
def
checkAndNotQuery
(
self
):
"Check an ANDNOT query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time and not country'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
6
],
r
self
.
globTest
({
'text'
:
'time and not country'
},
[
6
,])
def
checkParenMatchingQuery
(
self
):
"Check a query with parens"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
ai
=
self
.
globTest
({
'text'
:
'(time and country) men'
},
[
0
,])
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'(time and country) men'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
],
r
r
=
index
.
_apply_index
({
'text'
:
'(time and not country) or men'
})
r
=
list
(
r
[
0
].
keys
())
r
=
list
(
ai
({
'text'
:
'(time and not country) or men'
})[
0
].
keys
())
assert
r
==
[
0
,
6
],
r
def
checkQuoteMatchingQuery
(
self
):
"Check a query with quotes.. this is known to fail under 2.3.1b3-"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'"This is the time"'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
],
r
r
=
index
.
_apply_index
({
'text'
:
'"now is the time"'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[],
r
def
checkTextIndexOperatorQuery
(
self
):
"Check a query with 'textindex_operator' in the request"
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'time men'
,
'textindex_operator'
:
'and'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
],
r
self
.
globTest
({
'text'
:
'time men'
,
'textindex_operator'
:
'and'
},
[
0
,])
def
checkNonExistentWord
(
self
):
""" Check for nonexistent word """
index
=
self
.
dbopen
()
index
.
_lexicon
=
SearchIndex
.
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
get_transaction
().
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({
'text'
:
'zop'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[],
r
self
.
globTest
({
'text'
:
'zop'
},
[])
def
checkComplexQuery1
(
self
):
""" Check complex query 1 """
self
.
globTest
({
'text'
:
'((?ount* or get) and not wait) '
'"been *ert*"'
},
[
0
,
1
,
5
,
6
])
def
test_suite
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment