Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
b8c3a39b
Commit
b8c3a39b
authored
Mar 25, 2000
by
Michel Pelletier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added stop word interface to Lexicon.
parent
8e6e5acb
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
73 additions
and
16 deletions
+73
-16
lib/python/Products/ZCatalog/Vocabulary.py
lib/python/Products/ZCatalog/Vocabulary.py
+8
-3
lib/python/Products/ZCatalog/edit_stop_syn.dtml
lib/python/Products/ZCatalog/edit_stop_syn.dtml
+17
-0
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+8
-0
lib/python/SearchIndex/Lexicon.py
lib/python/SearchIndex/Lexicon.py
+22
-3
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+18
-10
No files found.
lib/python/Products/ZCatalog/Vocabulary.py
View file @
b8c3a39b
...
...
@@ -91,7 +91,7 @@ from Persistence import Persistent
from
OFS.SimpleItem
import
Item
from
SearchIndex
import
Lexicon
,
GlobbingLexicon
from
VocabularyInterface
import
VocabularyInterface
from
SearchIndex.Lexicon
import
stop_word_dict
manage_addVocabularyForm
=
HTMLFile
(
'addVocabulary'
,
globals
())
...
...
@@ -113,7 +113,6 @@ class Vocabulary(Item, Persistent, Implicit):
meta_type
=
"Vocabulary"
_isAVocabulary
=
1
__extends__
=
(
VocabularyInterface
,)
manage_options
=
(
...
...
@@ -137,6 +136,7 @@ class Vocabulary(Item, Persistent, Implicit):
[
'Anonymous'
,
'Manager'
]),
)
## manage_main = HTMLFile('vocab_manage_main', globals())
manage_vocabulary
=
HTMLFile
(
'manage_vocab'
,
globals
())
...
...
@@ -151,7 +151,7 @@ class Vocabulary(Item, Persistent, Implicit):
if
globbing
:
self
.
lexicon
=
GlobbingLexicon
.
GlobbingLexicon
()
else
:
self
.
lexicon
=
Lexicon
.
Lexicon
()
self
.
lexicon
=
Lexicon
.
Lexicon
(
stop_word_dict
)
def
query
(
self
,
pattern
):
""" """
...
...
@@ -171,6 +171,11 @@ class Vocabulary(Item, Persistent, Implicit):
if
RESPONSE
:
RESPONSE
.
redirect
(
URL1
+
'/manage_vocabulary'
)
def
manage_stop_syn
(
self
,
stop_syn
,
REQUEST
=
None
):
pass
def
insert
(
self
,
word
=
''
):
self
.
lexicon
.
set
(
word
)
...
...
lib/python/Products/ZCatalog/edit_stop_syn.dtml
0 → 100644
View file @
b8c3a39b
<html>
<head>
<title>
Edit
<dtml-var
title_or_id
></title>
</head>
<body
bgcolor=
"#ffffff"
link=
"#000099"
vlink=
"#555555"
alink=
"#77003b"
>
<dtml-var
manage_tabs
>
<form
action=
"manage_stop_syn"
method=
"POST"
>
<textarea
name=
"stop_syn:lines"
>
</textarea>
</form>
<br>
</body>
</html>
lib/python/SearchIndex/GlobbingLexicon.py
View file @
b8c3a39b
...
...
@@ -239,6 +239,14 @@ class GlobbingLexicon(Lexicon):
return
words
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
## don't do anything, less efficient but there's not much
## sense in stemming a globbing lexicon.
return
Splitter
(
astring
)
def
translate
(
self
,
pat
):
"""Translate a PATTERN to a regular expression.
...
...
lib/python/SearchIndex/Lexicon.py
View file @
b8c3a39b
...
...
@@ -113,11 +113,27 @@ class Lexicon(Persistent, Implicit):
"""
counter
=
0
def
__init__
(
self
):
def
__init__
(
self
,
stop_syn
=
None
):
self
.
_lexicon
=
OIBTree
()
self
.
counter
=
0
if
stop_syn
is
None
:
self
.
stop_syn
=
{}
else
:
self
.
stop_syn
=
{}
def
set_stop_syn
(
selfb
,
stop_syn
):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self
.
stop_syn
=
stop_syn
def
set
(
self
,
word
):
""" return the word id of 'word' """
...
...
@@ -142,8 +158,11 @@ class Lexicon(Persistent, Implicit):
def
__len__
(
self
):
return
len
(
self
.
_lexicon
)
def
Splitter
(
self
,
astring
,
words
):
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
if
words
is
None
:
word
=
self
.
stop_syn
return
Splitter
(
astring
,
words
)
def
grep
(
self
,
query
):
...
...
lib/python/SearchIndex/UnTextIndex.py
View file @
b8c3a39b
...
...
@@ -92,7 +92,7 @@ is no longer known.
"""
__version__
=
'$Revision: 1.2
1
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.2
2
$'
[
11
:
-
2
]
from
Globals
import
Persistent
import
BTree
,
IIBTree
,
IOBTree
,
OIBTree
...
...
@@ -164,7 +164,6 @@ class UnTextIndex(Persistent, Implicit):
self
.
call_methods
=
call_methods
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
self
.
_syn
=
stop_word_dict
else
:
pass
...
...
@@ -177,6 +176,11 @@ class UnTextIndex(Persistent, Implicit):
self
.
_lexicon
=
lexicon
def
__setstate
(
self
,
state
):
Persistent
.
__setstate__
(
self
,
state
)
if
hasattr
(
self
,
'_syn'
):
del
self
.
_syn
def
getLexicon
(
self
,
vocab_id
):
""" bit of a hack, indexes have been made acquirers so that
...
...
@@ -194,10 +198,10 @@ class UnTextIndex(Persistent, Implicit):
def
__len__
(
self
):
return
len
(
self
.
_unindex
)
def
__setstate__
(
self
,
state
):
Persistent
.
__setstate__
(
self
,
state
)
if
not
hasattr
(
self
,
'_lexicon'
):
self
.
_lexicon
=
Lexicon
()
##
def __setstate__(self, state):
##
Persistent.__setstate__(self, state)
##
if not hasattr(self, '_lexicon'):
##
self._lexicon = Lexicon()
def
clear
(
self
):
...
...
@@ -240,7 +244,11 @@ class UnTextIndex(Persistent, Implicit):
## The Splitter should now be european compliant at least.
## Someone should test this.
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
k
,
self
.
_syn
)
## import pdb
## pdb.set_trace()
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
k
)
## This returns a tuple of stemmed words. Stopwords have been
## stripped.
...
...
@@ -324,7 +332,7 @@ class UnTextIndex(Persistent, Implicit):
def
__getitem__
(
self
,
word
):
"""Return an InvertedIndex-style result "list"
"""
src
=
tuple
(
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
word
,
self
.
_syn
))
src
=
tuple
(
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
word
))
if
not
src
:
return
ResultList
({},
(
word
,),
self
)
if
len
(
src
)
==
1
:
src
=
src
[
0
]
...
...
@@ -412,13 +420,13 @@ class UnTextIndex(Persistent, Implicit):
r
=
[]
for
word
in
words
:
r
=
r
+
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
doc
,
self
.
_syn
).
indexes
(
word
)
r
=
r
+
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
doc
).
indexes
(
word
)
return
r
def
_subindex
(
self
,
isrc
,
d
,
old
,
last
):
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
isrc
,
self
.
_syn
)
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
isrc
)
for
s
in
src
:
if
s
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
s
[
1
:
-
1
],
d
,
old
,
last
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment