Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
b01e161f
Commit
b01e161f
authored
Jul 03, 2009
by
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- removed deprecated TextIndex
parent
0aa3f2ea
Changes
32
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
32 changed files
with
2 additions
and
4008 deletions
+2
-4008
setup.py
setup.py
+0
-15
src/Products/PluginIndexes/README.txt
src/Products/PluginIndexes/README.txt
+0
-26
src/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
src/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
+0
-269
src/Products/PluginIndexes/TextIndex/Lexicon.py
src/Products/PluginIndexes/TextIndex/Lexicon.py
+0
-220
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/SETUP.cfg
...nIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/SETUP.cfg
+0
-3
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
...ndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
+0
-4
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
...ex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
+0
-593
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/SETUP.cfg
...luginIndexes/TextIndex/Splitter/UnicodeSplitter/SETUP.cfg
+0
-3
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
...ginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
+0
-1
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
.../TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
+0
-429
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/__init__.py
...exes/TextIndex/Splitter/UnicodeSplitter/tests/__init__.py
+0
-1
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/testUnicodeSplitter.py
...dex/Splitter/UnicodeSplitter/tests/testUnicodeSplitter.py
+0
-75
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/SETUP.cfg
...s/PluginIndexes/TextIndex/Splitter/ZopeSplitter/SETUP.cfg
+0
-3
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
...PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
+0
-4
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
...ndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
+0
-529
src/Products/PluginIndexes/TextIndex/Splitter/__init__.py
src/Products/PluginIndexes/TextIndex/Splitter/__init__.py
+0
-33
src/Products/PluginIndexes/TextIndex/Splitter/setup.py
src/Products/PluginIndexes/TextIndex/Splitter/setup.py
+0
-23
src/Products/PluginIndexes/TextIndex/TextIndex.py
src/Products/PluginIndexes/TextIndex/TextIndex.py
+0
-760
src/Products/PluginIndexes/TextIndex/Vocabulary.py
src/Products/PluginIndexes/TextIndex/Vocabulary.py
+0
-138
src/Products/PluginIndexes/TextIndex/__init__.py
src/Products/PluginIndexes/TextIndex/__init__.py
+0
-7
src/Products/PluginIndexes/TextIndex/dtml/addTextIndex.dtml
src/Products/PluginIndexes/TextIndex/dtml/addTextIndex.dtml
+0
-86
src/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
src/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
+0
-112
src/Products/PluginIndexes/TextIndex/dtml/manageTextIndex.dtml
...roducts/PluginIndexes/TextIndex/dtml/manageTextIndex.dtml
+0
-79
src/Products/PluginIndexes/TextIndex/dtml/manageVocabulary.dtml
...oducts/PluginIndexes/TextIndex/dtml/manageVocabulary.dtml
+0
-17
src/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
src/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
+0
-117
src/Products/PluginIndexes/TextIndex/dtml/vocab_query.dtml
src/Products/PluginIndexes/TextIndex/dtml/vocab_query.dtml
+0
-12
src/Products/PluginIndexes/TextIndex/help/TextIndex_searchResults.stx
.../PluginIndexes/TextIndex/help/TextIndex_searchResults.stx
+0
-23
src/Products/PluginIndexes/TextIndex/tests/__init__.py
src/Products/PluginIndexes/TextIndex/tests/__init__.py
+0
-15
src/Products/PluginIndexes/TextIndex/tests/testSplitter.py
src/Products/PluginIndexes/TextIndex/tests/testSplitter.py
+0
-89
src/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
src/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
+0
-277
src/Products/PluginIndexes/__init__.py
src/Products/PluginIndexes/__init__.py
+1
-14
src/Products/PluginIndexes/interfaces.py
src/Products/PluginIndexes/interfaces.py
+1
-31
No files found.
setup.py
View file @
b01e161f
...
...
@@ -75,21 +75,6 @@ params = dict(name='Zope2',
sources
=
[
'src/initgroups/_initgroups.c'
]),
# indexes
Extension
(
name
=
'Products.PluginIndexes.TextIndex.Splitter.'
'ZopeSplitter.ZopeSplitter'
,
sources
=
[
'src/Products/PluginIndexes/TextIndex/Splitter/'
'ZopeSplitter/src/ZopeSplitter.c'
]),
Extension
(
name
=
'Products.PluginIndexes.TextIndex.Splitter.'
'ISO_8859_1_Splitter.ISO_8859_1_Splitter'
,
sources
=
[
'src/Products/PluginIndexes/TextIndex/Splitter/'
'ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c'
]),
Extension
(
name
=
'Products.PluginIndexes.TextIndex.Splitter.'
'UnicodeSplitter.UnicodeSplitter'
,
sources
=
[
'src/Products/PluginIndexes/TextIndex/Splitter/'
'UnicodeSplitter/src/UnicodeSplitter.c'
]),
Extension
(
name
=
'Products.ZCTextIndex.stopper'
,
sources
=
[
'src/Products/ZCTextIndex/stopper.c'
]),
...
...
src/Products/PluginIndexes/README.txt
View file @
b01e161f
...
...
@@ -47,34 +47,8 @@ Changes to Indexes:
- new index type
Changes to TextIndex:
- ZMI allows to select a different vocabulary. To use a vocabulary different
from the ZCatalogs default vocabulary 'Vocabulary' you must create a new
Vocabulary through the ZMI of the ZCatalog. After creating the vocabulary you
can choose the vocabulary on the ZMI management screen for the text index.
- the default operator might be overridden by specifying a new one
as 'operator' (see below)
- removed direct dependency from Splitter module. Splitter is now
acquired from used vocabulary
- usage of the 'textindex_operator' is deprecated
- lots of internal rework
Changes to Vocabulary:
- added Splitter selection on the add formular
Changes to ZCatalog
- Vocabulary.py moved to Products/PluginIndexes/TextIndex. A wrapper
for backward compatibility is in place
- added ZCatalogIndexes.py to provide access to indexes with pluggable
index interface
...
...
src/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
import
re
import
string
from
BTrees.IIBTree
import
IISet
,
union
,
IITreeSet
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.OOBTree
import
OOBTree
from
Products.PluginIndexes.common.randid
import
randid
from
Products.PluginIndexes.TextIndex.TextIndex
import
Op
from
Products.PluginIndexes.TextIndex.TextIndex
import
Or
from
Products.PluginIndexes.TextIndex.Lexicon
import
Lexicon
from
Products.PluginIndexes.TextIndex.Splitter
import
getSplitter
class
GlobbingLexicon
(
Lexicon
):
"""Lexicon which supports basic globbing function ('*' and '?').
This lexicon keeps several data structures around that are useful
for searching. They are:
'_lexicon' -- Contains the mapping from word => word_id
'_inverseLex' -- Contains the mapping from word_id => word
'_digrams' -- Contains a mapping from digram => word_id
Before going further, it is necessary to understand what a digram is,
as it is a core component of the structure of this lexicon. A digram
is a two-letter sequence in a word. For example, the word 'zope'
would be converted into the digrams::
['$z', 'zo', 'op', 'pe', 'e$']
where the '$' is a word marker. It is used at the beginning and end
of the words. Those digrams are significant.
"""
multi_wc
=
'*'
single_wc
=
'?'
eow
=
'$'
def
__init__
(
self
,
useSplitter
=
None
,
extra
=
None
):
self
.
clear
()
self
.
useSplitter
=
useSplitter
self
.
splitterParams
=
extra
self
.
SplitterFunc
=
getSplitter
(
self
.
useSplitter
)
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
self
.
_digrams
=
OOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
Lexicon
.
_convertBTrees
(
self
,
threshold
)
if
type
(
self
.
_digrams
)
is
OOBTree
:
return
from
BTrees.convert
import
convert
_digrams
=
self
.
_digrams
self
.
_digrams
=
OOBTree
()
self
.
_digrams
.
_p_jar
=
self
.
_p_jar
convert
(
_digrams
,
self
.
_digrams
,
threshold
,
IITreeSet
)
def
createDigrams
(
self
,
word
):
"""Returns a list with the set of digrams in the word."""
word
=
'$'
+
word
+
'$'
return
[
word
[
i
:
i
+
2
]
for
i
in
range
(
len
(
word
)
-
1
)]
def
getWordId
(
self
,
word
):
"""Provided 'word', return the matching integer word id."""
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
return
self
.
assignWordId
(
word
)
set
=
getWordId
# Kludge for old code
def
getWord
(
self
,
wid
):
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word, and return it."""
# Double check it's not in the lexicon already, and if it is, just
# return it.
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
# Get word id. BBB Backward compat pain.
inverse
=
self
.
_inverseLex
try
:
insert
=
inverse
.
insert
except
AttributeError
:
# we have an "old" BTree object
if
inverse
:
wid
=
inverse
.
keys
()[
-
1
]
+
1
else
:
self
.
_inverseLex
=
IOBTree
()
wid
=
1
inverse
[
wid
]
=
word
else
:
# we have a "new" IOBTree object
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
self
.
_lexicon
[
word
]
=
wid
# Now take all the digrams and insert them into the digram map.
for
digram
in
self
.
createDigrams
(
word
):
set
=
self
.
_digrams
.
get
(
digram
,
None
)
if
set
is
None
:
self
.
_digrams
[
digram
]
=
set
=
IISet
()
set
.
insert
(
wid
)
return
wid
def
get
(
self
,
pattern
):
""" Query the lexicon for words matching a pattern."""
# single word pattern produce a slicing problem below.
# Because the splitter throws away single characters we can
# return an empty tuple here.
if
len
(
pattern
)
==
1
:
return
()
wc_set
=
[
self
.
multi_wc
,
self
.
single_wc
]
digrams
=
[]
globbing
=
0
for
i
in
range
(
len
(
pattern
)):
if
pattern
[
i
]
in
wc_set
:
globbing
=
1
continue
if
i
==
0
:
digrams
.
insert
(
i
,
(
self
.
eow
+
pattern
[
i
])
)
digrams
.
append
((
pattern
[
i
]
+
pattern
[
i
+
1
]))
else
:
try
:
if
pattern
[
i
+
1
]
not
in
wc_set
:
digrams
.
append
(
pattern
[
i
]
+
pattern
[
i
+
1
]
)
except
IndexError
:
digrams
.
append
(
(
pattern
[
i
]
+
self
.
eow
)
)
if
not
globbing
:
result
=
self
.
_lexicon
.
get
(
pattern
,
None
)
if
result
is
None
:
return
()
return
(
result
,
)
## now get all of the intsets that contain the result digrams
result
=
None
for
digram
in
digrams
:
result
=
union
(
result
,
self
.
_digrams
.
get
(
digram
,
None
))
if
not
result
:
return
()
else
:
## now we have narrowed the list of possible candidates
## down to those words which contain digrams. However,
## some words may have been returned that match digrams,
## but do not match 'pattern'. This is because some words
## may contain all matching digrams, but in the wrong
## order.
expr
=
re
.
compile
(
self
.
createRegex
(
pattern
))
words
=
[]
hits
=
IISet
()
for
x
in
result
:
if
expr
.
match
(
self
.
_inverseLex
[
x
]):
hits
.
insert
(
x
)
return
hits
def
__getitem__
(
self
,
word
):
""" """
return
self
.
get
(
word
)
def
query_hook
(
self
,
q
):
"""expand wildcards"""
ListType
=
type
([])
i
=
len
(
q
)
-
1
while
i
>=
0
:
e
=
q
[
i
]
if
isinstance
(
e
,
ListType
):
self
.
query_hook
(
e
)
elif
isinstance
(
e
,
Op
):
pass
elif
(
(
self
.
multi_wc
in
e
)
or
(
self
.
single_wc
in
e
)
):
wids
=
self
.
get
(
e
)
words
=
[]
for
wid
in
wids
:
if
words
:
words
.
append
(
Or
)
words
.
append
(
wid
)
if
not
words
:
# if words is empty, return something that will make
# textindex's __getitem__ return an empty result list
words
.
append
(
''
)
q
[
i
]
=
words
i
=
i
-
1
return
q
def
Splitter
(
self
,
astring
,
words
=
None
,
encoding
=
"latin1"
):
""" wrap the splitter """
## don't do anything, less efficient but there's not much
## sense in stemming a globbing lexicon.
try
:
return
self
.
SplitterFunc
(
astring
,
words
,
encoding
=
encoding
,
singlechar
=
self
.
splitterParams
.
splitterSingleChars
,
indexnumbers
=
self
.
splitterParams
.
splitterIndexNumbers
,
casefolding
=
self
.
splitterParams
.
splitterCasefolding
)
except
:
return
self
.
SplitterFunc
(
astring
,
words
)
def
createRegex
(
self
,
pat
):
"""Translate a PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
# Remove characters that are meaningful in a regex
if
not
isinstance
(
pat
,
unicode
):
transTable
=
string
.
maketrans
(
""
,
""
)
result
=
string
.
translate
(
pat
,
transTable
,
r'()&|!@#$%^{}\
<>.
')
else:
transTable={}
for ch in r'
()
&|
!
@
#$%^{}\<>.':
transTable
[
ord
(
ch
)]
=
None
result
=
pat
.
translate
(
transTable
)
# First, deal with multi-character globbing
result
=
result
.
replace
(
'*'
,
'.*'
)
# Next, we need to deal with single-character globbing
result
=
result
.
replace
(
'?'
,
'.'
)
return
"%s$"
%
result
src/Products/PluginIndexes/TextIndex/Lexicon.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__
=
""" Module breaks out Zope specific methods and behavior. In
addition, provides the Lexicon class which defines a word to integer
mapping.
"""
from
Acquisition
import
Implicit
from
BTrees.OIBTree
import
OIBTree
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IISet
from
BTrees.IIBTree
import
IITreeSet
from
Persistence
import
Persistent
from
Products.PluginIndexes.common.randid
import
randid
from
Products.PluginIndexes.TextIndex.Splitter
import
getSplitter
from
Products.PluginIndexes.TextIndex.Splitter
import
splitterNames
from
types
import
StringType
class
Lexicon
(
Persistent
,
Implicit
):
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
stop_syn
=
{}
def
__init__
(
self
,
stop_syn
=
None
,
useSplitter
=
None
,
extra
=
None
):
self
.
clear
()
if
stop_syn
is
None
:
self
.
stop_syn
=
{}
else
:
self
.
stop_syn
=
stop_syn
self
.
useSplitter
=
splitterNames
[
0
]
if
useSplitter
:
self
.
useSplitter
=
useSplitter
self
.
splitterParams
=
extra
self
.
SplitterFunc
=
getSplitter
(
self
.
useSplitter
)
def
clear
(
self
):
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
def
_convertBTrees
(
self
,
threshold
=
200
):
if
(
type
(
self
.
_lexicon
)
is
OIBTree
and
type
(
getattr
(
self
,
'_inverseLex'
,
None
))
is
IOBTree
):
return
from
BTrees.convert
import
convert
lexicon
=
self
.
_lexicon
self
.
_lexicon
=
OIBTree
()
self
.
_lexicon
.
_p_jar
=
self
.
_p_jar
convert
(
lexicon
,
self
.
_lexicon
,
threshold
)
try
:
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
=
IOBTree
()
except
AttributeError
:
# older lexicons didn't have an inverse lexicon
self
.
_inverseLex
=
IOBTree
()
inverseLex
=
self
.
_inverseLex
self
.
_inverseLex
.
_p_jar
=
self
.
_p_jar
convert
(
inverseLex
,
self
.
_inverseLex
,
threshold
)
def
set_stop_syn
(
self
,
stop_syn
):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self
.
stop_syn
=
stop_syn
def
getWordId
(
self
,
word
):
""" return the word id of 'word' """
wid
=
self
.
_lexicon
.
get
(
word
,
None
)
if
wid
is
None
:
wid
=
self
.
assignWordId
(
word
)
return
wid
set
=
getWordId
def
getWord
(
self
,
wid
):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return
self
.
_inverseLex
.
get
(
wid
,
None
)
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
try
:
inverse
=
self
.
_inverseLex
except
AttributeError
:
# woops, old lexicom wo wids
inverse
=
self
.
_inverseLex
=
IOBTree
()
for
word
,
wid
in
self
.
_lexicon
.
items
():
inverse
[
wid
]
=
word
wid
=
randid
()
while
not
inverse
.
insert
(
wid
,
word
):
wid
=
randid
()
if
isinstance
(
word
,
str
):
self
.
_lexicon
[
intern
(
word
)]
=
wid
else
:
self
.
_lexicon
[
word
]
=
wid
return
wid
def
get
(
self
,
key
,
default
=
None
):
"""Return the matched word against the key."""
r
=
IISet
()
wid
=
self
.
_lexicon
.
get
(
key
,
default
)
if
wid
is
not
None
:
r
.
insert
(
wid
)
return
r
def
__getitem__
(
self
,
key
):
return
self
.
get
(
key
)
def
__len__
(
self
):
return
len
(
self
.
_lexicon
)
def
Splitter
(
self
,
astring
,
words
=
None
,
encoding
=
"latin1"
):
""" wrap the splitter """
if
words
is
None
:
words
=
self
.
stop_syn
try
:
return
self
.
SplitterFunc
(
astring
,
words
,
encoding
=
encoding
,
singlechar
=
self
.
splitterParams
.
splitterSingleChars
,
indexnumbers
=
self
.
splitterParams
.
splitterIndexNumbers
,
casefolding
=
self
.
splitterParams
.
splitterCasefolding
)
except
:
return
self
.
SplitterFunc
(
astring
,
words
)
def
query_hook
(
self
,
q
):
""" we don't want to modify the query cuz we're dumb """
return
q
stop_words
=
(
'am'
,
'ii'
,
'iii'
,
'per'
,
'po'
,
're'
,
'a'
,
'about'
,
'above'
,
'across'
,
'after'
,
'afterwards'
,
'again'
,
'against'
,
'all'
,
'almost'
,
'alone'
,
'along'
,
'already'
,
'also'
,
'although'
,
'always'
,
'am'
,
'among'
,
'amongst'
,
'amoungst'
,
'amount'
,
'an'
,
'and'
,
'another'
,
'any'
,
'anyhow'
,
'anyone'
,
'anything'
,
'anyway'
,
'anywhere'
,
'are'
,
'around'
,
'as'
,
'at'
,
'back'
,
'be'
,
'became'
,
'because'
,
'become'
,
'becomes'
,
'becoming'
,
'been'
,
'before'
,
'beforehand'
,
'behind'
,
'being'
,
'below'
,
'beside'
,
'besides'
,
'between'
,
'beyond'
,
'bill'
,
'both'
,
'bottom'
,
'but'
,
'by'
,
'can'
,
'cannot'
,
'cant'
,
'con'
,
'could'
,
'couldnt'
,
'cry'
,
'describe'
,
'detail'
,
'do'
,
'done'
,
'down'
,
'due'
,
'during'
,
'each'
,
'eg'
,
'eight'
,
'either'
,
'eleven'
,
'else'
,
'elsewhere'
,
'empty'
,
'enough'
,
'even'
,
'ever'
,
'every'
,
'everyone'
,
'everything'
,
'everywhere'
,
'except'
,
'few'
,
'fifteen'
,
'fifty'
,
'fill'
,
'find'
,
'fire'
,
'first'
,
'five'
,
'for'
,
'former'
,
'formerly'
,
'forty'
,
'found'
,
'four'
,
'from'
,
'front'
,
'full'
,
'further'
,
'get'
,
'give'
,
'go'
,
'had'
,
'has'
,
'hasnt'
,
'have'
,
'he'
,
'hence'
,
'her'
,
'here'
,
'hereafter'
,
'hereby'
,
'herein'
,
'hereupon'
,
'hers'
,
'herself'
,
'him'
,
'himself'
,
'his'
,
'how'
,
'however'
,
'hundred'
,
'i'
,
'ie'
,
'if'
,
'in'
,
'inc'
,
'indeed'
,
'interest'
,
'into'
,
'is'
,
'it'
,
'its'
,
'itself'
,
'keep'
,
'last'
,
'latter'
,
'latterly'
,
'least'
,
'less'
,
'made'
,
'many'
,
'may'
,
'me'
,
'meanwhile'
,
'might'
,
'mill'
,
'mine'
,
'more'
,
'moreover'
,
'most'
,
'mostly'
,
'move'
,
'much'
,
'must'
,
'my'
,
'myself'
,
'name'
,
'namely'
,
'neither'
,
'never'
,
'nevertheless'
,
'next'
,
'nine'
,
'no'
,
'nobody'
,
'none'
,
'noone'
,
'nor'
,
'not'
,
'nothing'
,
'now'
,
'nowhere'
,
'of'
,
'off'
,
'often'
,
'on'
,
'once'
,
'one'
,
'only'
,
'onto'
,
'or'
,
'other'
,
'others'
,
'otherwise'
,
'our'
,
'ours'
,
'ourselves'
,
'out'
,
'over'
,
'own'
,
'per'
,
'perhaps'
,
'please'
,
'pre'
,
'put'
,
'rather'
,
're'
,
'same'
,
'see'
,
'seem'
,
'seemed'
,
'seeming'
,
'seems'
,
'serious'
,
'several'
,
'she'
,
'should'
,
'show'
,
'side'
,
'since'
,
'sincere'
,
'six'
,
'sixty'
,
'so'
,
'some'
,
'somehow'
,
'someone'
,
'something'
,
'sometime'
,
'sometimes'
,
'somewhere'
,
'still'
,
'such'
,
'take'
,
'ten'
,
'than'
,
'that'
,
'the'
,
'their'
,
'them'
,
'themselves'
,
'then'
,
'thence'
,
'there'
,
'thereafter'
,
'thereby'
,
'therefore'
,
'therein'
,
'thereupon'
,
'these'
,
'they'
,
'thick'
,
'thin'
,
'third'
,
'this'
,
'those'
,
'though'
,
'three'
,
'through'
,
'throughout'
,
'thru'
,
'thus'
,
'to'
,
'together'
,
'too'
,
'toward'
,
'towards'
,
'twelve'
,
'twenty'
,
'two'
,
'un'
,
'under'
,
'until'
,
'up'
,
'upon'
,
'us'
,
'very'
,
'via'
,
'was'
,
'we'
,
'well'
,
'were'
,
'what'
,
'whatever'
,
'when'
,
'whence'
,
'whenever'
,
'where'
,
'whereafter'
,
'whereas'
,
'whereby'
,
'wherein'
,
'whereupon'
,
'wherever'
,
'whether'
,
'which'
,
'while'
,
'whither'
,
'who'
,
'whoever'
,
'whole'
,
'whom'
,
'whose'
,
'why'
,
'will'
,
'with'
,
'within'
,
'without'
,
'would'
,
'yet'
,
'you'
,
'your'
,
'yours'
,
'yourself'
,
'yourselves'
,
)
stop_word_dict
=
{}
for
word
in
stop_words
:
stop_word_dict
[
word
]
=
None
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/SETUP.cfg
deleted
100644 → 0
View file @
0aa3f2ea
<extension ISO_8859_1_Splitter>
source src/ISO_8859_1_Splitter.c
</extension>
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
from
ISO_8859_1_Splitter
import
ISO_8859_1_Splitter
def
Splitter
(
txt
,
stopwords
=
None
,
encoding
=
'latin1'
):
return
ISO_8859_1_Splitter
(
txt
,
stopwords
)
src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
deleted
100644 → 0
View file @
0aa3f2ea
This diff is collapsed.
Click to expand it.
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/SETUP.cfg
deleted
100644 → 0
View file @
0aa3f2ea
<extension UnicodeSplitter>
source src/UnicodeSplitter.c
</extension>
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
from
UnicodeSplitter
import
UnicodeSplitter
as
Splitter
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
deleted
100644 → 0
View file @
0aa3f2ea
This diff is collapsed.
Click to expand it.
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
# Nothing to see here.
src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/testUnicodeSplitter.py
deleted
100644 → 0
View file @
0aa3f2ea
# -*- coding: ISO-8859-1 -*-
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import
os
,
sys
,
unittest
from
Products.PluginIndexes.TextIndex.Splitter.UnicodeSplitter.UnicodeSplitter
\
import
UnicodeSplitter
class
UnicodeSplitterTests
(
unittest
.
TestCase
):
def
setUp
(
self
):
texts
=
(
'The quick brown fox jumps over the lazy dog'
,
'Bei den dreitgigen Angriffen seien auch bis'
' auf einen alle Flugpltze der Taliban zerstrt worden'
,
)
self
.
testdata
=
[]
for
t
in
texts
:
uniLst
=
[
unicode
(
x
,
'latin1'
)
for
x
in
t
.
lower
().
split
(
' '
)]
self
.
testdata
.
append
(
(
t
,
uniLst
)
)
def
testSimpleSplit
(
self
):
""" testing splitter functionality """
for
t
,
expected
in
self
.
testdata
:
fields
=
list
(
UnicodeSplitter
(
t
))
assert
fields
==
expected
,
"%s vs %s"
%
(
fields
,
expected
)
return
0
def
testStopwords
(
self
):
""" testing splitter with stopwords """
text
=
'The quick brown fox jumps over The lazy dog'
expected
=
[
u'quick'
,
u'brown'
,
u'fox'
,
u'jumps'
,
u'over'
,
u'lazy'
,
u'cat'
]
sw_dict
=
{
'the'
:
None
,
'dog'
:
'cat'
}
splitter
=
UnicodeSplitter
(
text
,
sw_dict
)
fields
=
list
(
splitter
)
self
.
assertEquals
(
fields
,
expected
)
self
.
assertEquals
(
splitter
.
indexes
(
'jumps'
),
[
3
])
def
test_suite
():
return
unittest
.
makeSuite
(
UnicodeSplitterTests
)
def
debug
():
return
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
()
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/SETUP.cfg
deleted
100644 → 0
View file @
0aa3f2ea
<extension ZopeSplitter>
source src/ZopeSplitter.c
</extension>
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
from
ZopeSplitter
import
ZopeSplitter
def
Splitter
(
txt
,
stopwords
=
{},
encoding
=
"latin1"
):
return
ZopeSplitter
(
txt
,
stopwords
)
src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
deleted
100644 → 0
View file @
0aa3f2ea
This diff is collapsed.
Click to expand it.
src/Products/PluginIndexes/TextIndex/Splitter/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
import
os
,
sys
,
exceptions
availableSplitters
=
(
(
"ZopeSplitter"
,
"Zope Default Splitter"
),
(
"ISO_8859_1_Splitter"
,
"Werner Strobls ISO-8859-1 Splitter"
),
(
"UnicodeSplitter"
,
"Unicode-aware splitter"
)
)
splitterNames
=
map
(
lambda
x
:
x
[
0
],
availableSplitters
)
def
getSplitter
(
name
=
None
):
if
not
name
in
splitterNames
and
name
:
raise
exceptions
.
RuntimeError
,
"No such splitter '%s'"
%
name
if
not
name
:
name
=
splitterNames
[
0
]
if
not
vars
().
has_key
(
name
):
exec
(
"from %s.%s import %s"
%
(
name
,
name
,
name
))
return
vars
()[
name
]
src/Products/PluginIndexes/TextIndex/Splitter/setup.py
deleted
100644 → 0
View file @
0aa3f2ea
#!/usr/bin/env python
from
distutils.core
import
setup
,
Extension
import
os
,
exceptions
,
commands
,
sys
CFLAGS
=
[]
LFLAGS
=
[]
LIBS
=
[]
setup
(
name
=
"Splitter"
,
version
=
"1.0"
,
description
=
"Splitters for Zope 2.5"
,
author
=
"Andreas Jung"
,
author_email
=
"andreas@zope.com"
,
url
=
"http://www.zope.org/..."
,
ext_modules
=
[
Extension
(
"ZopeSplitter"
,[
'ZopeSplitter/src/ZopeSplitter.c'
]),
\
Extension
(
"ISO_8859_1_Splitter"
,[
'ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c'
]),
\
Extension
(
"UnicodeSplitter"
,[
'UnicodeSplitter/src/UnicodeSplitter.c'
])
\
]
)
src/Products/PluginIndexes/TextIndex/TextIndex.py
deleted
100644 → 0
View file @
0aa3f2ea
This diff is collapsed.
Click to expand it.
src/Products/PluginIndexes/TextIndex/Vocabulary.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Vocabulary for deprecated text index.
$Id$
"""
from
AccessControl.Permissions
import
manage_vocabulary
from
AccessControl.Permissions
import
query_vocabulary
from
AccessControl.Role
import
RoleManager
from
AccessControl.SecurityInfo
import
ClassSecurityInfo
from
Acquisition
import
Implicit
from
App.class_init
import
InitializeClass
from
App.Dialogs
import
MessageDialog
from
App.special_dtml
import
DTMLFile
from
Persistence
import
Persistent
from
OFS.SimpleItem
import
Item
from
zope.interface
import
implements
from
Products.PluginIndexes.interfaces
import
IVocabulary
from
Products.PluginIndexes.TextIndex
import
Lexicon
,
GlobbingLexicon
from
Products.PluginIndexes.TextIndex.Lexicon
import
stop_word_dict
from
Products.PluginIndexes.TextIndex
import
Splitter
manage_addVocabularyForm
=
DTMLFile
(
'dtml/addVocabulary'
,
globals
())
def
manage_addVocabulary
(
self
,
id
,
title
,
globbing
=
None
,
extra
=
None
,
splitter
=
''
,
REQUEST
=
None
):
"""Add a Vocabulary object
"""
id
=
str
(
id
)
title
=
str
(
title
)
if
globbing
:
globbing
=
1
c
=
Vocabulary
(
id
,
title
,
globbing
,
splitter
,
extra
)
self
.
_setObject
(
id
,
c
)
if
REQUEST
is
not
None
:
return
self
.
manage_main
(
self
,
REQUEST
,
update_menu
=
1
)
class
_extra
:
pass
class
Vocabulary
(
Item
,
Persistent
,
Implicit
,
RoleManager
):
"""A Vocabulary is a user-managable realization of a Lexicon object.
"""
implements
(
IVocabulary
)
security
=
ClassSecurityInfo
()
security
.
setPermissionDefault
(
manage_vocabulary
,
(
'Manager'
,))
security
.
setPermissionDefault
(
query_vocabulary
,
(
'Anonymous'
,
'Manager'
,))
meta_type
=
"Vocabulary"
_isAVocabulary
=
1
manage_options
=
(
(
{
'label'
:
'Vocabulary'
,
'action'
:
'manage_main'
,
'help'
:
(
'ZCatalog'
,
'Vocabulary_Vocabulary.stx'
)},
{
'label'
:
'Query'
,
'action'
:
'manage_query'
,
'help'
:
(
'ZCatalog'
,
'Vocabulary_Query.stx'
)},
)
+
Item
.
manage_options
+
RoleManager
.
manage_options
)
security
.
declareProtected
(
manage_vocabulary
,
'manage_main'
)
manage_main
=
DTMLFile
(
'dtml/manage_vocab'
,
globals
())
security
.
declareProtected
(
manage_vocabulary
,
'manage_query'
)
manage_query
=
DTMLFile
(
'dtml/vocab_query'
,
globals
())
def
__init__
(
self
,
id
,
title
=
''
,
globbing
=
None
,
splitter
=
None
,
extra
=
None
):
""" create the lexicon to manage... """
self
.
id
=
id
self
.
title
=
title
self
.
globbing
=
not
not
globbing
self
.
useSplitter
=
Splitter
.
splitterNames
[
0
]
if
splitter
:
self
.
useSplitter
=
splitter
if
not
extra
:
extra
=
_extra
()
extra
.
splitterIndexNumbers
=
0
extra
.
splitterSingleChars
=
0
extra
.
splitterCasefolding
=
1
if
globbing
:
self
.
lexicon
=
GlobbingLexicon
.
GlobbingLexicon
(
useSplitter
=
self
.
useSplitter
,
extra
=
extra
)
else
:
self
.
lexicon
=
Lexicon
.
Lexicon
(
stop_word_dict
,
useSplitter
=
self
.
useSplitter
,
extra
=
extra
)
def
getLexicon
(
self
):
return
self
.
lexicon
security
.
declareProtected
(
query_vocabulary
,
'query'
)
def
query
(
self
,
pattern
):
""" """
result
=
[]
for
x
in
self
.
lexicon
.
get
(
pattern
):
if
self
.
globbing
:
result
.
append
(
self
.
lexicon
.
_inverseLex
[
x
])
else
:
result
.
append
(
pattern
)
return
str
(
result
)
def
manage_insert
(
self
,
word
=
''
,
URL1
=
None
,
RESPONSE
=
None
):
""" doc string """
self
.
insert
(
word
)
if
RESPONSE
:
RESPONSE
.
redirect
(
URL1
+
'/manage_main'
)
def
manage_stop_syn
(
self
,
stop_syn
,
REQUEST
=
None
):
pass
def
insert
(
self
,
word
=
''
):
self
.
lexicon
.
set
(
word
)
def
words
(
self
):
return
self
.
lexicon
.
_lexicon
.
items
()
InitializeClass
(
Vocabulary
)
src/Products/PluginIndexes/TextIndex/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
# empty comment for winzip and friends
import
warnings
warnings
.
warn
(
'Using TextIndex is deprecated (will be removed in Zope '
'2.12). Use ZCTextIndex instead.'
,
DeprecationWarning
,
stacklevel
=
2
)
src/Products/PluginIndexes/TextIndex/dtml/addTextIndex.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-var manage_page_header>
<dtml-var "manage_form_title(this(), _,
form_title='Add TextIndex',
)">
<p class="form-help">
<strong>Note:</strong>
TextIndex is deprecated. It has been replaced by ZCTextIndex. Consider
using it instead
</p>
<p class="form-help">
<strong>Text Indexes</strong> break text up into individual words, and
are often referred to as full-text indexes. Text indexes
sort results by score meaning they return hits in order
from the most relevant to the lest relevant.
</p>
<form action="manage_addTextIndex" method="post" enctype="multipart/form-data">
<table cellspacing="0" cellpadding="2" border="0">
<tr>
<td align="left" valign="top">
<div class="form-label">
Id
</div>
</td>
<td align="left" valign="top">
<input type="text" name="id" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
Vocabulary
</div>
</td>
<td>
<dtml-let vocabs="superValues('Vocabulary')">
<dtml-if vocabs>
<select name="extra.vocabulary:record">
<dtml-in expr="superValues('Vocabulary')">
<option value="&dtml-id;">
&dtml-id; <dtml-var title fmt="(%s)" null html_quote>
</option>
</dtml-in>
</select>
<dtml-else>
<em class="std-text">Create a Vocabulary object first.</em>
</dtml-if>
</dtml-let>
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-optional">
Type
</div>
</td>
<td align="left" valign="top">
TextIndex
</td>
</tr>
<tr>
<td align="left" valign="top">
</td>
<td align="left" valign="top">
<div class="form-element">
<input class="form-element" type="submit" name="submit"
value=" Add " />
</div>
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-var manage_page_header>
<dtml-var "manage_form_title(this(), _,
form_title='Add Vocabulary',
)">
<FORM ACTION="manage_addVocabulary" METHOD="POST">
<table cellspacing="0" cellpadding="2" border="0">
<tr>
<td align="left" valign="top">
<div class="form-label">
Id
</div>
</td>
<td align="left" valign="top">
<input type="text" name="id" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-optional">
Title
</div>
</td>
<td align="left" valign="top">
<input type="text" name="title" size="40" />
</td>
</tr>
<dtml-if availableSplitters>
<tr>
<td align="left" valign="top">
<div class="form-optional">
Splitter
</div>
</td>
<td align="left" valign="top">
<select name="splitter">
<dtml-in availableSplitters>
<option value="&dtml-sequence-key;">&dtml-sequence-item;
</dtml-in>
</select>
</td>
</tr>
</dtml-if>
<tr>
<td align="left" valign="top">
<div class="form-label">
Index numbers
</td>
<td align="left" valign="top">
<select name="extra.splitterIndexNumbers:record:int">
<option value="0" selected>no
<option value="1">yes
</select>
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
Index single characters
</td>
<td align="left" valign="top">
<select name="extra.splitterSingleChars:record:int" >
<option value="0" selected>no
<option value="1">yes
</select>
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
Case-insensitive
</td>
<td align="left" valign="top">
<select name="extra.splitterCasefolding:record:int">
<option value="0" >no
<option value="1"selected>yes
</select>
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
globbing?
</td>
<td align="left" valign="top">
<input type="checkbox" name="globbing" />
</td>
</tr>
<tr>
<td align="left" valign="top">
</td>
<td align="left" valign="top">
<div class="form-element">
<input class="form-element" type="submit" name="submit"
value=" Add " />
</div>
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/dtml/manageTextIndex.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
<form method="post" action="manage_setPreferences">
<table border="0" cellspacing="2" cellpadding="2">
<tr>
<th align="left" width="20%">Vocabulary to use</th>
<td align="left">
<select name="vocabulary">
<dtml-in "superValues('Vocabulary')">
<dtml-if "getId()==vocabulary_id">
<option value="&dtml-id;" selected>
&dtml-id; <dtml-var title fmt="(%s)" null html_quote>
</option>
<dtml-else>
<option value="&dtml-id;">
&dtml-id; <dtml-var title fmt="(%s)" null html_quote>
</option>
</dtml-if>
</dtml-in>
</select>
</td>
<td>
<em>Warning:</em> changing the vocabulary makes only sense when after
creating the index and before indexing any objects. The index will be cleared
when you change the vocabulary after indexing objects.
</td>
</tr>
<dtml-comment>
<tr>
<th align="left">Splitter</th>
<td>
<select name="splitter">
<dtml-in availableSplitters>
<dtml-if "_.getitem('sequence-key')==useSplitter">
<option value="&dtml-sequence-key;" selected>&dtml-sequence-item;
<dtml-else>
<option value="&dtml-sequence-key;">&dtml-sequence-item;
</dtml-if>
</dtml-in>
</select>
</td>
</tr>
<tr>
<th align="left">Default text operator</th>
<td>
<select name="text_operator">
<dtml-in "operators.keys()">
<dtml-if "_.getitem('sequence-item')==useOperator">
<option value="&dtml-sequence-item;" selected>&dtml-sequence-item;
<dtml-else>
<option value="&dtml-sequence-item;">&dtml-sequence-item;
</dtml-if>
</dtml-in>
</select>
</td>
</tr>
</dtml-comment>
<tr>
<td colspan="3">
<input type="submit" value="Save changes">
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/dtml/manageVocabulary.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-var manage_page_header>
<dtml-var "manage_form_title(this(), _,
form_title='Manage vocabulary of text index',
help_topic='addIndex.stx'
)">
<dtml-var "getLexicon('Vocabulary')">
<form action="manage_addTextIndex" method="post" enctype="multipart/form-data">
<table cellspacing="0" cellpadding="2" border="0">
</table>
</form>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-call "RESPONSE.setHeader('content-type','text/html; charset: utf-8')">
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-text">
<dtml-let lexicon="getLexicon()">
<dtml-try>
<dtml-let x="lexicon.multi_wc"></dtml-let>
Globbing is <em>enabled</em>
<dtml-except>
Globbing is <em>disabled</em>
</dtml-try>
<dtml-if useSplitter>
, Splitter is <em>&dtml-useSplitter;</em>
</dtml-if>
<dtml-try>
, Index number=<dtml-var "lexicon.splitterParams.splitterIndexNumbers">
, Case-insensitve=<dtml-var "lexicon.splitterParams.splitterCasefolding">
, Index single characters=<dtml-var "lexicon.splitterParams.splitterSingleChars">
<dtml-except>
</dtml-try>
</dtml-let>
</p>
<dtml-if words>
<p class="form-text">
&dtml-id; contains <em><dtml-var words fmt=collection-length thousands_commas></em>
word(s).
</p>
<dtml-in words previous size=20 start=query_start >
<span class="list-nav">
<a href="&dtml-URL;?query_start=&dtml-previous-sequence-start-number;">
[Previous <dtml-var previous-sequence-size> entries]
</a>
</span>
</dtml-in>
<dtml-in words next size=20 start=query_start >
<span class="list-nav">
<a href="&dtml-URL;?query_start=&dtml-next-sequence-start-number;">
[Next <dtml-var next-sequence-size> entries]
</a>
</span>
</dtml-in>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<dtml-in words size=20 start=query_start >
<dtml-if name="sequence-start">
<tr class="list-header">
<td width="80%" align="left" valign="top">
<div class="list-item">Word</div></td>
<td width="20%" align="left" valign="top">
<div class="list-item">Word ID</div></td>
</tr>
</dtml-if>
<dtml-if name="sequence-odd"><tr class="row-normal">
<dtml-else><tr class="row-hilite"></dtml-if>
<td valign="top" align="left">
<div class="form-text">
<dtml-if "_.same_type(_['sequence-key'], 'x')">
&dtml-sequence-key;
<dtml-else>
<dtml-var "_['sequence-key'].encode('utf-8')" html_quote>
</dtml-if>
</div>
</td>
<td valign="top" align="left">
<div class="form-text">&dtml-sequence-item;</div>
</td>
</tr>
</dtml-in>
</table>
<dtml-in words previous size=20 start=query_start >
<div class="list-nav">
<a href="&dtml-URL;?query_start=&dtml-previous-sequence-start-number;">
[Previous <dtml-var previous-sequence-size> entries]
</a>
</div>
</dtml-in>
<dtml-in words next size=20 start=query_start >
<div class="list-nav">
<a href="&dtml-URL;?query_start=&dtml-next-sequence-start-number;">
[Next <dtml-var next-sequence-size> entries]
</a>
</div>
</dtml-in>
<dtml-else>
<p class="form-text">
There are no words in the Vocabulary.
</p>
</dtml-if>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/dtml/vocab_query.dtml
deleted
100644 → 0
View file @
0aa3f2ea
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<form action="query" method=POST>
<input type="text" name="pattern" size="20">
<div class="form-element">
<input class="form-element" type="submit" name="submit" value="Query">
</div>
</form>
<dtml-var manage_page_footer>
src/Products/PluginIndexes/TextIndex/help/TextIndex_searchResults.stx
deleted
100644 → 0
View file @
0aa3f2ea
ZCatalog - searchResults: specifying parameters for a search query
The searchResults() method of the ZCatalog accepts parameters that
define a query to be made on that catalog. A query can either be
passed as keyword argument to searchResults(), as a mapping, or as
part of a Zope REQUEST object, typically from HTML forms.
The index of the catalog to query is either the name of the
keyword argument, a key in a mapping, or an attribute of a record
object.
Attributes of record objects
'query' -- either a sequence of objects or a single value to be
passed as query to the index (mandatory)
'operator' -- specifies the combination of search results when
query is a sequence of values. (optional, default: 'or').
Allowed values:
'and', 'or', 'andnot', 'near'
src/Products/PluginIndexes/TextIndex/tests/__init__.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
# This file is needed to make this a package.
src/Products/PluginIndexes/TextIndex/tests/testSplitter.py
deleted
100644 → 0
View file @
0aa3f2ea
# -*- coding: ISO-8859-1 -*-
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import
os
,
sys
import
unittest
,
locale
from
Products.PluginIndexes.TextIndex
import
Splitter
class
TestCase
(
unittest
.
TestCase
):
"""
Test our Splitters
"""
def
setUp
(
self
):
self
.
testdata
=
(
(
'The quick brown fox jumps over the lazy dog'
,
[
'the'
,
'quick'
,
'brown'
,
'fox'
,
'jumps'
,
'over'
,
'the'
,
'lazy'
,
'dog'
]),
(
'fters sterreichische herber berfall da rger verrgert'
,
[
'fters'
,
'sterreichische'
,
'herber'
,
'berfall'
,
'da'
,
'rger'
,
'verrgert'
])
)
pass
def
tearDown
(
self
):
"""
"""
def
testAvailableSplitters
(
self
):
"Test available splitters"
assert
len
(
Splitter
.
availableSplitters
)
>
0
assert
len
(
Splitter
.
splitterNames
)
>
0
assert
len
(
Splitter
.
availableSplitters
)
==
len
(
Splitter
.
splitterNames
)
def
_test
(
self
,
sp_name
,
text
,
splitted
):
splitter
=
Splitter
.
getSplitter
(
sp_name
)
result
=
list
(
splitter
(
text
))
assert
result
==
splitted
,
"%s: %s vs %s"
%
(
sp_name
,
result
,
splitted
)
# def testZopeSplitter(self):
# """test ZopeSplitter (this test is known to fail because it does not support ISO stuff) """
#
# for text,splitted in self.testdata:
# self._test("ZopeSplitter",text,splitted)
def
testISOSplitter
(
self
):
"""test ISOSplitter"""
for
text
,
splitted
in
self
.
testdata
:
self
.
_test
(
"ISO_8859_1_Splitter"
,
text
,
splitted
)
def
test_suite
():
return
unittest
.
makeSuite
(
TestCase
)
def
debug
():
return
test_suite
().
debug
()
def
pdebug
():
import
pdb
pdb
.
run
(
'debug()'
)
def
main
():
unittest
.
TextTestRunner
().
run
(
test_suite
()
)
if
__name__
==
'__main__'
:
if
len
(
sys
.
argv
)
>
1
:
globals
()[
sys
.
argv
[
1
]]()
else
:
main
()
src/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
deleted
100644 → 0
View file @
0aa3f2ea
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""TextIndex unit tests.
$Id$
"""
import
unittest
import
Testing
import
Zope2
Zope2
.
startup
()
import
ZODB
from
ZODB.MappingStorage
import
MappingStorage
import
transaction
from
Products.PluginIndexes.TextIndex
import
TextIndex
from
Products.PluginIndexes.TextIndex
import
GlobbingLexicon
class
Dummy
:
def
__init__
(
self
,
text
):
self
.
_text
=
text
def
text
(
self
):
return
self
.
_text
def
__str__
(
self
):
return
'<Dummy: %s>'
%
self
.
_text
__repr__
=
__str__
class
Tests
(
unittest
.
TestCase
):
db
=
None
jar
=
None
def
setUp
(
self
):
self
.
index
=
TextIndex
.
TextIndex
(
'text'
)
self
.
doc
=
Dummy
(
text
=
'this is the time, when all good zopes'
)
def
dbopen
(
self
):
if
self
.
db
is
None
:
s
=
MappingStorage
()
self
.
db
=
ZODB
.
DB
(
s
)
db
=
self
.
db
if
self
.
jar
is
not
None
:
raise
RuntimeError
,
'test needs to dbclose() before dbopen()'
jar
=
db
.
open
()
self
.
jar
=
jar
if
not
jar
.
root
().
has_key
(
'index'
):
jar
.
root
()[
'index'
]
=
TextIndex
.
TextIndex
(
'text'
)
transaction
.
commit
()
return
jar
.
root
()[
'index'
]
def
dbclose
(
self
):
self
.
jar
.
close
()
self
.
jar
=
None
def
tearDown
(
self
):
transaction
.
abort
()
if
self
.
jar
is
not
None
:
self
.
dbclose
()
if
self
.
db
is
not
None
:
self
.
db
.
close
()
self
.
db
=
None
def
test_z3interfaces
(
self
):
from
Products.PluginIndexes.interfaces
import
IPluggableIndex
from
Products.PluginIndexes.interfaces
import
ITextIndex
from
Products.PluginIndexes.TextIndex.TextIndex
import
TextIndex
from
zope.interface.verify
import
verifyClass
verifyClass
(
IPluggableIndex
,
TextIndex
)
verifyClass
(
ITextIndex
,
TextIndex
)
def
test_SimpleAddDelete
(
self
):
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
index_object
(
1
,
self
.
doc
)
self
.
doc
.
text
=
'spam is good, spam is fine, span span span'
self
.
index
.
index_object
(
0
,
self
.
doc
)
self
.
index
.
unindex_object
(
0
)
def
test_PersistentUpdate1
(
self
):
# Check simple persistent indexing
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
transaction
.
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
transaction
.
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
],
r
def
test_PersistentUpdate2
(
self
):
# Check less simple persistent indexing
index
=
self
.
dbopen
()
self
.
doc
.
text
=
'this is the time, when all good zopes'
index
.
index_object
(
0
,
self
.
doc
)
transaction
.
commit
()
self
.
doc
.
text
=
'time waits for no one'
index
.
index_object
(
1
,
self
.
doc
)
transaction
.
commit
()
self
.
doc
.
text
=
'the next task is to test'
index
.
index_object
(
3
,
self
.
doc
)
transaction
.
commit
()
self
.
doc
.
text
=
'time time'
index
.
index_object
(
2
,
self
.
doc
)
transaction
.
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
index
.
_apply_index
({})
assert
r
==
None
r
=
index
.
_apply_index
({
'text'
:
'python'
})
assert
len
(
r
)
==
2
and
r
[
1
]
==
(
'text'
,),
'incorrectly not used'
assert
not
r
[
0
],
"should have no results"
r
=
index
.
_apply_index
({
'text'
:
'time'
})
r
=
list
(
r
[
0
].
keys
())
assert
r
==
[
0
,
1
,
2
],
r
sample_texts
=
[
"""This is the time for all good men to come to
the aid of their country"""
,
"""ask not what your country can do for you,
ask what you can do for your country"""
,
"""Man, I can't wait to get to Montross!"""
,
"""Zope Public License (ZPL) Version 1.0"""
,
"""Copyright (c) Digital Creations. All rights reserved."""
,
"""This license has been certified as Open Source(tm)."""
,
"""I hope I get to work on time"""
,
]
def
globTest
(
self
,
qmap
,
rlist
):
"Check a glob query"
index
=
self
.
dbopen
()
index
.
_lexicon
=
GlobbingLexicon
.
GlobbingLexicon
()
for
i
in
range
(
len
(
self
.
sample_texts
)):
self
.
doc
.
text
=
self
.
sample_texts
[
i
]
index
.
index_object
(
i
,
self
.
doc
)
transaction
.
commit
()
self
.
dbclose
()
index
=
self
.
dbopen
()
r
=
list
(
index
.
_apply_index
(
qmap
)[
0
].
keys
())
assert
r
==
rlist
,
r
return
index
.
_apply_index
def
test_StarQuery
(
self
):
self
.
globTest
({
'text'
:
'm*n'
},
[
0
,
2
])
def
test_AndQuery
(
self
):
self
.
globTest
({
'text'
:
'time and country'
},
[
0
,])
def
test_OrQuery
(
self
):
self
.
globTest
({
'text'
:
'time or country'
},
[
0
,
1
,
6
])
def
test_DefaultOrQuery
(
self
):
self
.
globTest
({
'text'
:
'time country'
},
[
0
,
1
,
6
])
def
test_NearQuery
(
self
):
# Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
self
.
globTest
({
'text'
:
'time ... country'
},
[
0
,])
def
test_QuotesQuery
(
self
):
ai
=
self
.
globTest
({
'text'
:
'"This is the time"'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
'"now is the time"'
})[
0
].
keys
())
assert
r
==
[],
r
def
test_AndNotQuery
(
self
):
self
.
globTest
({
'text'
:
'time and not country'
},
[
6
,])
def
test_ParenMatchingQuery
(
self
):
ai
=
self
.
globTest
({
'text'
:
'(time and country) men'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
'(time and not country) or men'
})[
0
].
keys
())
assert
r
==
[
0
,
6
],
r
def
test_TextIndexOperatorQuery
(
self
):
self
.
globTest
({
'text'
:
{
'query'
:
'time men'
,
'operator'
:
'and'
}},
[
0
,])
def
test_NonExistentWord
(
self
):
self
.
globTest
({
'text'
:
'zop'
},
[])
def
test_ComplexQuery1
(
self
):
self
.
globTest
({
'text'
:
'((?ount* or get) and not wait) '
'"been *ert*"'
},
[
0
,
1
,
5
,
6
])
# same tests, unicode strings
def
test_StarQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
u'm*n'
},
[
0
,
2
])
def
test_AndQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
u'time and country'
},
[
0
,])
def
test_OrQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
u'time or country'
},
[
0
,
1
,
6
])
def
test_DefaultOrQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
u'time country'
},
[
0
,
1
,
6
])
def
test_NearQueryUnicode
(
self
):
# Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!) (unicode)
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
self
.
globTest
({
'text'
:
u'time ... country'
},
[
0
,])
def
test_QuotesQueryUnicode
(
self
):
ai
=
self
.
globTest
({
'text'
:
u'"This is the time"'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
'"now is the time"'
})[
0
].
keys
())
assert
r
==
[],
r
def
test_AndNotQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
u'time and not country'
},
[
6
,])
def
test_ParenMatchingQueryUnicode
(
self
):
ai
=
self
.
globTest
({
'text'
:
u'(time and country) men'
},
[
0
,])
r
=
list
(
ai
({
'text'
:
u'(time and not country) or men'
})[
0
].
keys
())
assert
r
==
[
0
,
6
],
r
def
test_TextIndexOperatorQueryUnicode
(
self
):
self
.
globTest
({
'text'
:
{
u'query'
:
u'time men'
,
'operator'
:
'and'
}},
[
0
,])
def
test_NonExistentWordUnicode
(
self
):
self
.
globTest
({
'text'
:
u'zop'
},
[])
def
test_ComplexQuery1Unicode
(
self
):
self
.
globTest
({
'text'
:
u'((?ount* or get) and not wait) '
'"been *ert*"'
},
[
0
,
1
,
5
,
6
])
def
test_suite
():
return
unittest
.
makeSuite
(
Tests
)
if
__name__
==
'__main__'
:
unittest
.
main
(
defaultTest
=
'test_suite'
)
src/Products/PluginIndexes/__init__.py
View file @
b01e161f
...
...
@@ -21,20 +21,7 @@ import DateRangeIndex.DateRangeIndex
from
Products.PluginIndexes.common
import
ResultList
from
Products.PluginIndexes.common
import
UnIndex
# BBB: TextIndex is deprecated but we don't want the warning to appear here
import
warnings
warnings
.
filterwarnings
(
'ignore'
,
message
=
'^Using TextIndex'
,
append
=
1
)
try
:
import
TextIndex.TextIndex
finally
:
del
warnings
.
filters
[
-
1
]
try
:
del
__warningregistry__
except
NameError
:
pass
_indexes
=
(
'TextIndex'
,
'KeywordIndex'
,
_indexes
=
(
'KeywordIndex'
,
'FieldIndex'
,
'PathIndex'
,
'TopicIndex'
,
...
...
src/Products/PluginIndexes/interfaces.py
View file @
b01e161f
...
...
@@ -160,38 +160,8 @@ class IPathIndex(Interface):
"""
class
IVocabulary
(
Interface
):
"""A Vocabulary is a user-managable realization of a Lexicon object.
"""
class
ITextIndex
(
Interface
):
"""Full-text index.
There is a ZCatalog UML model that sheds some light on what is
going on here. '_index' is a BTree which maps word ids to mapping
from document id to score. Something like:
{'bob' : {1 : 5, 2 : 3, 42 : 9}}
{'uncle' : {1 : 1}}
The '_unindex' attribute is a mapping from document id to word
ids. This mapping allows the catalog to unindex an object:
{42 : ('bob', 'is', 'your', 'uncle')
This isn't exactly how things are represented in memory, many
optimizations happen along the way.
"""
def
getLexicon
(
vocab_id
=
None
):
"""Get the Lexicon in use.
"""
class
IFilteredSet
(
Interface
):
"""A pre-calculated result list based on an expression.
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment