Commit 023c1df8 authored by Casey Duncan's avatar Casey Duncan

Fix for issue #505

ZCTextIndex is now associated by path to its lexicon. After replacing a lexicon used by an index, clear the index to make it use the new lexicon.
parent 477d8bc1
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Plug in text index for ZCatalog with relevance ranking.""" """Plug in text index for ZCatalog with relevance ranking."""
from cgi import escape from cgi import escape
from types import TupleType
import ZODB import ZODB
from Persistence import Persistent from Persistence import Persistent
...@@ -69,18 +70,19 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -69,18 +70,19 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
# via the silly "extra" record. # via the silly "extra" record.
self._fieldname = field_name or getattr(extra, 'doc_attr', '') or id self._fieldname = field_name or getattr(extra, 'doc_attr', '') or id
lexicon_id = lexicon_id or extra.lexicon_id lexicon_id = lexicon_id or extra.lexicon_idp
lexicon = getattr(caller, lexicon_id, None) lexicon = getattr(caller, lexicon_id, None)
if lexicon is None: if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id) raise LookupError, 'Lexicon "%s" not found' % escape(lexicon_id)
if not ILexicon.isImplementedBy(lexicon): if not ILexicon.isImplementedBy(lexicon):
raise ValueError, \ raise ValueError('Object "%s" does not implement '
'Object "%s" does not implement ZCTextIndex Lexicon interface' \ 'ZCTextIndex Lexicon interface'
% lexicon.getId() % lexicon.getId())
self.lexicon = lexicon self.lexicon_path = lexicon.getPhysicalPath()
self._v_lexicon = lexicon
if index_factory is None: if index_factory is None:
if extra.index_type not in index_types.keys(): if extra.index_type not in index_types.keys():
...@@ -91,7 +93,31 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -91,7 +93,31 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
else: else:
self._index_factory = index_factory self._index_factory = index_factory
self.clear() self.index = self._index_factory(self.getLexicon())
## Private Methods ##
security.declarePrivate('getLexicon')
def getLexicon(self):
"""Get the lexicon for this index
"""
if hasattr(self, 'lexicon'):
# Fix up old ZCTextIndexes by removing direct lexicon ref
# and changing it to a path
lexicon = getattr(self.aq_parent, self.lexicon.getId())
self.lexicon_path = lexicon.getPhysicalPath()
del self.lexicon
try:
return self._v_lexicon
except AttributeError:
lexicon = self.unrestrictedTraverse(self.lexicon_path)
if not ILexicon.isImplementedBy(lexicon):
raise TypeError('Object "%s" is not a ZCTextIndex Lexicon'
% lexicon.getId())
self._v_lexicon = lexicon
return lexicon
## External methods not in the Pluggable Index API ## ## External methods not in the Pluggable Index API ##
...@@ -103,7 +129,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -103,7 +129,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
The num results is the total number of results before trimming The num results is the total number of results before trimming
to the nbest results. to the nbest results.
""" """
tree = QueryParser(self.lexicon).parseQuery(query) tree = QueryParser(self.getLexicon()).parseQuery(query)
results = tree.executeQuery(self.index) results = tree.executeQuery(self.index)
if results is None: if results is None:
return [], 0 return [], 0
...@@ -144,7 +170,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -144,7 +170,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
query_str = ' '.join(record.keys) query_str = ' '.join(record.keys)
if not query_str: if not query_str:
return None return None
tree = QueryParser(self.lexicon).parseQuery(query_str) tree = QueryParser(self.getLexicon()).parseQuery(query_str)
results = tree.executeQuery(self.index) results = tree.executeQuery(self.index)
return results, (self._fieldname,) return results, (self._fieldname,)
...@@ -154,7 +180,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -154,7 +180,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
word_ids = self.index.get_words(documentId) word_ids = self.index.get_words(documentId)
except KeyError: except KeyError:
return default return default
get_word = self.lexicon.get_word get_word = self.getLexicon().get_word
return [get_word(wid) for wid in word_ids] return [get_word(wid) for wid in word_ids]
def uniqueValues(self, name=None, withLengths=0): def uniqueValues(self, name=None, withLengths=0):
...@@ -168,7 +194,13 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -168,7 +194,13 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
def clear(self): def clear(self):
"""reinitialize the index (but not the lexicon)""" """reinitialize the index (but not the lexicon)"""
self.index = self._index_factory(self.lexicon) try:
# Remove the cached reference to the lexicon
# So that it is refreshed
del self._v_lexicon
except (AttributeError, KeyError):
pass
self.index = self._index_factory(self.getLexicon())
## User Interface Methods ## ## User Interface Methods ##
...@@ -182,9 +214,13 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -182,9 +214,13 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Return indexed attribute name""" """Return indexed attribute name"""
return self._fieldname return self._fieldname
def getLexiconId(self): def getLexiconPath(self):
"""Return the id of the lexicon used by the index""" """Return the path of the lexicon used by the index"""
return self.lexicon.getId() try:
self.getLexicon() # Make sure the path is set
return '/'.join(self.lexicon_path)
except KeyError:
return
InitializeClass(ZCTextIndex) InitializeClass(ZCTextIndex)
......
...@@ -11,11 +11,16 @@ ...@@ -11,11 +11,16 @@
</p> </p>
<p class="form-help"> <p class="form-help">
ZCTextIndex Lexicon used: ZCTextIndex Lexicon used:
<em><dtml-var getLexiconId></em> <dtml-if getLexiconPath>
<a href="<dtml-var getLexiconPath>/manage_main"
><dtml-var getLexiconPath></a>
<dtml-else>
<em>(Lexicon Not Found)</em>
</dtml-if>
</p> </p>
<p class="form-help"> <p class="form-help">
<em>Note:</em> You cannot change the lexicon assigned to a ZCTextIndex. <em>Note:</em> The lexicon assigned to the index cannot be changed. To replace
To use another lexicon, delete this index and create a new one that the existing lexicon, create a new lexicon in the same place and clear the
uses the desired lexicon. index. This will make the index use the replacement lexicon.
</p> </p>
<dtml-var manage_page_footer> <dtml-var manage_page_footer>
...@@ -13,18 +13,19 @@ ...@@ -13,18 +13,19 @@
############################################################################## ##############################################################################
from Interface.Verify import verifyClass from Interface.Verify import verifyClass
import Acquisition
from Products.PluginIndexes.common.PluggableIndex import \ from Products.PluginIndexes.common.PluggableIndex import \
PluggableIndexInterface PluggableIndexInterface
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex, PLexicon
from Products.ZCTextIndex.tests import \ from Products.ZCTextIndex.tests import \
testIndex, testQueryEngine, testQueryParser testIndex, testQueryEngine, testQueryParser
from Products.ZCTextIndex.BaseIndex import \ from Products.ZCTextIndex.BaseIndex import \
scaled_int, SCALE_FACTOR, inverse_doc_frequency scaled_int, SCALE_FACTOR, inverse_doc_frequency
from Products.ZCTextIndex.CosineIndex import CosineIndex from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter from Products.ZCTextIndex.Lexicon import Splitter
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.StopDict import get_stopdict from Products.ZCTextIndex.StopDict import get_stopdict
...@@ -37,12 +38,17 @@ class Indexable: ...@@ -37,12 +38,17 @@ class Indexable:
def __init__(self, text): def __init__(self, text):
self.text = text self.text = text
class LexiconHolder: class LexiconHolder(Acquisition.Implicit):
def __init__(self, lexicon): def __init__(self, lexicon):
self.lexicon = lexicon self.lexicon = lexicon
class Extra: def getPhysicalPath(self):
pass return ('',) # Pretend to be the root
def dummyUnrestrictedTraverse(self, path):
if path == ('', 'lexicon',):
return self.lexicon
raise 'NotFound', path
# The tests classes below create a ZCTextIndex(). Then they create # The tests classes below create a ZCTextIndex(). Then they create
# instance variables that point to the internal components used by # instance variables that point to the internal components used by
...@@ -95,12 +101,17 @@ text = [ ...@@ -95,12 +101,17 @@ text = [
class ZCIndexTestsBase: class ZCIndexTestsBase:
def setUp(self): def setUp(self):
extra = Extra() self.lexicon = PLexicon('lexicon', '',
extra.doc_attr = 'text' Splitter(),
extra.lexicon_id = 'lexicon' CaseNormalizer(),
self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) StopWordRemover())
caller = LexiconHolder(self.lexicon) caller = LexiconHolder(self.lexicon)
self.zc_index = ZCTextIndex('name', extra, caller, self.IndexFactory) self.zc_index = ZCTextIndex('name',
None,
caller,
self.IndexFactory,
'text',
'lexicon')
self.index = self.zc_index.index self.index = self.zc_index.index
def parserFailure(self, query): def parserFailure(self, query):
...@@ -454,13 +465,18 @@ class QueryTestsBase(testQueryEngine.TestQueryEngine, ...@@ -454,13 +465,18 @@ class QueryTestsBase(testQueryEngine.TestQueryEngine,
docs = ["foo bar ham", "bar ham", "foo ham", "ham"] docs = ["foo bar ham", "bar ham", "foo ham", "ham"]
def setUp(self): def setUp(self):
extra = Extra() self.lexicon = PLexicon('lexicon', '',
extra.doc_attr = 'text' Splitter(),
extra.lexicon_id = 'lexicon' CaseNormalizer(),
self.lexicon = Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover()) StopWordRemover())
caller = LexiconHolder(self.lexicon) caller = LexiconHolder(self.lexicon)
self.zc_index = ZCTextIndex('name', extra, caller, self.IndexFactory)
self.zc_index = ZCTextIndex('name',
None,
caller,
self.IndexFactory,
'text',
'lexicon')
self.parser = QueryParser(self.lexicon) self.parser = QueryParser(self.lexicon)
self.index = self.zc_index.index self.index = self.zc_index.index
self.add_docs() self.add_docs()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment