Commit b98b6471 authored by Tim Peters's avatar Tim Peters

Whitespace normalization.

parent 80cc084f
...@@ -37,8 +37,8 @@ class HTMLWordSplitter: ...@@ -37,8 +37,8 @@ class HTMLWordSplitter:
for pat in remove: for pat in remove:
text = re.sub(pat, " ", text) text = re.sub(pat, " ", text)
return re.findall(wordpat, text) return re.findall(wordpat, text)
element_factory.registerFactory('Word Splitter', element_factory.registerFactory('Word Splitter',
'HTML aware splitter', 'HTML aware splitter',
HTMLWordSplitter) HTMLWordSplitter)
......
...@@ -19,21 +19,21 @@ class IPipelineElementFactory(Interface): ...@@ -19,21 +19,21 @@ class IPipelineElementFactory(Interface):
def registerFactory(group, name, factory): def registerFactory(group, name, factory):
"""Registers a pipeline factory by name and element group. """Registers a pipeline factory by name and element group.
Each name can be registered only once for a given group. Duplicate Each name can be registered only once for a given group. Duplicate
registrations will raise a ValueError registrations will raise a ValueError
""" """
def getFactoryGroups(): def getFactoryGroups():
"""Returns a sorted list of element group names """Returns a sorted list of element group names
""" """
def getFactoryNames(group): def getFactoryNames(group):
"""Returns a sorted list of registered pipeline factory names """Returns a sorted list of registered pipeline factory names
in the specified element group in the specified element group
""" """
def instantiate(group, name): def instantiate(group, name):
"""Instantiates a pipeline element by group and name. If name is not """Instantiates a pipeline element by group and name. If name is not
registered raise a KeyError. registered raise a KeyError.
""" """
...@@ -170,22 +170,22 @@ class Splitter: ...@@ -170,22 +170,22 @@ class Splitter:
for s in lst: for s in lst:
result += self.rxGlob.findall(s) result += self.rxGlob.findall(s)
return result return result
element_factory.registerFactory('Word Splitter', element_factory.registerFactory('Word Splitter',
'Whitespace splitter', 'Whitespace splitter',
Splitter) Splitter)
class CaseNormalizer: class CaseNormalizer:
def process(self, lst): def process(self, lst):
return [w.lower() for w in lst] return [w.lower() for w in lst]
element_factory.registerFactory('Case Normalizer', element_factory.registerFactory('Case Normalizer',
'Case Normalizer', 'Case Normalizer',
CaseNormalizer) CaseNormalizer)
element_factory.registerFactory('Stop Words', element_factory.registerFactory('Stop Words',
' Don\'t remove stop words', ' Don\'t remove stop words',
None) None)
class StopWordRemover: class StopWordRemover:
...@@ -202,8 +202,8 @@ class StopWordRemover: ...@@ -202,8 +202,8 @@ class StopWordRemover:
def process(self, lst): def process(self, lst):
return self._process(self.dict, lst) return self._process(self.dict, lst)
element_factory.registerFactory('Stop Words', element_factory.registerFactory('Stop Words',
'Remove listed stop words only', 'Remove listed stop words only',
StopWordRemover) StopWordRemover)
class StopWordAndSingleCharRemover(StopWordRemover): class StopWordAndSingleCharRemover(StopWordRemover):
...@@ -211,7 +211,7 @@ class StopWordAndSingleCharRemover(StopWordRemover): ...@@ -211,7 +211,7 @@ class StopWordAndSingleCharRemover(StopWordRemover):
dict = get_stopdict().copy() dict = get_stopdict().copy()
for c in range(255): for c in range(255):
dict[chr(c)] = None dict[chr(c)] = None
element_factory.registerFactory('Stop Words', element_factory.registerFactory('Stop Words',
'Remove listed and single char words', 'Remove listed and single char words',
StopWordAndSingleCharRemover) StopWordAndSingleCharRemover)
...@@ -14,36 +14,36 @@ ...@@ -14,36 +14,36 @@
from Products.ZCTextIndex.IPipelineElementFactory \ from Products.ZCTextIndex.IPipelineElementFactory \
import IPipelineElementFactory import IPipelineElementFactory
class PipelineElementFactory: class PipelineElementFactory:
__implements__ = IPipelineElementFactory __implements__ = IPipelineElementFactory
def __init__(self): def __init__(self):
self._groups = {} self._groups = {}
def registerFactory(self, group, name, factory): def registerFactory(self, group, name, factory):
if self._groups.has_key(group) and \ if self._groups.has_key(group) and \
self._groups[group].has_key(name): self._groups[group].has_key(name):
raise ValueError('ZCTextIndex lexicon element "%s" ' raise ValueError('ZCTextIndex lexicon element "%s" '
'already registered in group "%s"' 'already registered in group "%s"'
% (name, group)) % (name, group))
elements = self._groups.get(group) elements = self._groups.get(group)
if elements is None: if elements is None:
elements = self._groups[group] = {} elements = self._groups[group] = {}
elements[name] = factory elements[name] = factory
def getFactoryGroups(self): def getFactoryGroups(self):
groups = self._groups.keys() groups = self._groups.keys()
groups.sort() groups.sort()
return groups return groups
def getFactoryNames(self, group): def getFactoryNames(self, group):
names = self._groups[group].keys() names = self._groups[group].keys()
names.sort() names.sort()
return names return names
def instantiate(self, group, name): def instantiate(self, group, name):
factory = self._groups[group][name] factory = self._groups[group][name]
if factory is not None: if factory is not None:
......
...@@ -35,7 +35,7 @@ from PipelineFactory import element_factory ...@@ -35,7 +35,7 @@ from PipelineFactory import element_factory
from Products.ZCTextIndex.CosineIndex import CosineIndex from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex from Products.ZCTextIndex.OkapiIndex import OkapiIndex
index_types = {'Okapi BM25 Rank':OkapiIndex, index_types = {'Okapi BM25 Rank':OkapiIndex,
'Cosine Measure':CosineIndex} 'Cosine Measure':CosineIndex}
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
...@@ -77,7 +77,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -77,7 +77,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self._index_type = extra.index_type self._index_type = extra.index_type
else: else:
self._index_factory = index_factory self._index_factory = index_factory
self.clear() self.clear()
## External methods not in the Pluggable Index API ## ## External methods not in the Pluggable Index API ##
...@@ -157,7 +157,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -157,7 +157,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
## User Interface Methods ## ## User Interface Methods ##
manage_main = DTMLFile('dtml/manageZCTextIndex', globals()) manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
def getIndexType(self): def getIndexType(self):
"""Return index type string""" """Return index type string"""
return getattr(self, '_index_type', self._index_factory.__name__) return getattr(self, '_index_type', self._index_factory.__name__)
...@@ -176,10 +176,10 @@ manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals()) ...@@ -176,10 +176,10 @@ manage_addLexiconForm = DTMLFile('dtml/addLexicon', globals())
def manage_addLexicon(self, id, title='', elements=[], REQUEST=None): def manage_addLexicon(self, id, title='', elements=[], REQUEST=None):
"""Add ZCTextIndex Lexicon""" """Add ZCTextIndex Lexicon"""
pipeline = [] pipeline = []
for el_record in elements: for el_record in elements:
if not hasattr(el_record, 'name'): if not hasattr(el_record, 'name'):
continue # Skip over records that only specify element group continue # Skip over records that only specify element group
element = element_factory.instantiate(el_record.group, el_record.name) element = element_factory.instantiate(el_record.group, el_record.name)
if element is not None: if element is not None:
...@@ -199,7 +199,7 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem): ...@@ -199,7 +199,7 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
"""Lexicon for ZCTextIndex""" """Lexicon for ZCTextIndex"""
meta_type = 'ZCTextIndex Lexicon' meta_type = 'ZCTextIndex Lexicon'
manage_options = ({'label':'Overview', 'action':'manage_main'},) + \ manage_options = ({'label':'Overview', 'action':'manage_main'},) + \
SimpleItem.manage_options SimpleItem.manage_options
...@@ -207,13 +207,13 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem): ...@@ -207,13 +207,13 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
self.id = str(id) self.id = str(id)
self.title = str(title) self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline) PLexicon.inheritedAttribute('__init__')(self, *pipeline)
## User Interface Methods ## ## User Interface Methods ##
def getPipelineNames(self): def getPipelineNames(self):
"""Return list of names of pipeline element classes""" """Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline] return [element.__class__.__name__ for element in self._pipeline]
manage_main = DTMLFile('dtml/manageLexicon', globals()) manage_main = DTMLFile('dtml/manageLexicon', globals())
InitializeClass(PLexicon) InitializeClass(PLexicon)
...@@ -39,15 +39,14 @@ def initialize(context): ...@@ -39,15 +39,14 @@ def initialize(context):
getElementGroups, getElementNames), getElementGroups, getElementNames),
icon='www/lexicon.gif' icon='www/lexicon.gif'
) )
## Functions below are for use in the ZMI constructor forms ## ## Functions below are for use in the ZMI constructor forms ##
def getElementGroups(self): def getElementGroups(self):
return element_factory.getFactoryGroups() return element_factory.getFactoryGroups()
def getElementNames(self, group): def getElementNames(self, group):
return element_factory.getFactoryNames(group) return element_factory.getFactoryNames(group)
def getIndexTypes(self): def getIndexTypes(self):
return ZCTextIndex.index_types.keys() return ZCTextIndex.index_types.keys()
...@@ -54,7 +54,7 @@ def main(rt): ...@@ -54,7 +54,7 @@ def main(rt):
chooser = NBest(10) chooser = NBest(10)
chooser.addmany(b.items()) chooser.addmany(b.items())
results = chooser.getbest() results = chooser.getbest()
else: else:
try: try:
for _ in ITERS: for _ in ITERS:
......
...@@ -17,32 +17,32 @@ from Products.ZCTextIndex.IPipelineElement import IPipelineElement ...@@ -17,32 +17,32 @@ from Products.ZCTextIndex.IPipelineElement import IPipelineElement
from Products.ZCTextIndex.PipelineFactory import PipelineElementFactory from Products.ZCTextIndex.PipelineFactory import PipelineElementFactory
class NullPipelineElement: class NullPipelineElement:
__implements__ = IPipelineElement __implements__ = IPipelineElement
def process(source): def process(source):
pass pass
class PipelineFactoryTest(TestCase): class PipelineFactoryTest(TestCase):
def setUp(self): def setUp(self):
self.huey = NullPipelineElement() self.huey = NullPipelineElement()
self.dooey = NullPipelineElement() self.dooey = NullPipelineElement()
self.louie = NullPipelineElement() self.louie = NullPipelineElement()
self.daffy = NullPipelineElement() self.daffy = NullPipelineElement()
def testPipeline(self): def testPipeline(self):
pf = PipelineElementFactory() pf = PipelineElementFactory()
pf.registerFactory('donald', 'huey', self.huey) pf.registerFactory('donald', 'huey', self.huey)
pf.registerFactory('donald', 'dooey', self.dooey) pf.registerFactory('donald', 'dooey', self.dooey)
pf.registerFactory('donald', 'louie', self.louie) pf.registerFactory('donald', 'louie', self.louie)
pf.registerFactory('looney', 'daffy', self.daffy) pf.registerFactory('looney', 'daffy', self.daffy)
self.assertRaises(ValueError, pf.registerFactory,'donald', 'huey', self.assertRaises(ValueError, pf.registerFactory,'donald', 'huey',
self.huey) self.huey)
self.assertEqual(pf.getFactoryGroups(), ['donald', 'looney']) self.assertEqual(pf.getFactoryGroups(), ['donald', 'looney'])
self.assertEqual(pf.getFactoryNames('donald'), self.assertEqual(pf.getFactoryNames('donald'),
['dooey', 'huey', 'louie']) ['dooey', 'huey', 'louie'])
def test_suite(): def test_suite():
return makeSuite(PipelineFactoryTest) return makeSuite(PipelineFactoryTest)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment