- removed deprecated TextIndex

cb45cbcc · adc7e054 · cb45cbcc · cb45cbcc · cb45cbcc · adc7e054
Commit cb45cbcc authored Jul 03, 2009 by
33 changed files
--- a/doc/CHANGES.rst
+++ b/doc/CHANGES.rst
@@ -19,6 +19,8 @@ Features Added
 Restructuring
 +++++++++++++
+- PluginIndexes: Removed deprecated TextIndex.
 - HelpSys now uses ZCTextIndex instead of the deprecated TextIndex. Please
  update your Zope databases by deleting the Product registrations in the
  Control Panel and restarting Zope.

--- a/setup.py
+++ b/setup.py
@@ -75,21 +75,6 @@ params = dict(name='Zope2',
            sources=['src/initgroups/_initgroups.c']),
      # indexes
-      Extension(
-            name='Products.PluginIndexes.TextIndex.Splitter.'
-                 'ZopeSplitter.ZopeSplitter',
-            sources=['src/Products/PluginIndexes/TextIndex/Splitter/'
-                     'ZopeSplitter/src/ZopeSplitter.c']),
-      Extension(
-            name='Products.PluginIndexes.TextIndex.Splitter.'
-                 'ISO_8859_1_Splitter.ISO_8859_1_Splitter',
-            sources=['src/Products/PluginIndexes/TextIndex/Splitter/'
-                     'ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c']),
-      Extension(
-            name='Products.PluginIndexes.TextIndex.Splitter.'
-                 'UnicodeSplitter.UnicodeSplitter',
-            sources=['src/Products/PluginIndexes/TextIndex/Splitter/'
-                     'UnicodeSplitter/src/UnicodeSplitter.c']),
      Extension(
            name='Products.ZCTextIndex.stopper',
            sources=['src/Products/ZCTextIndex/stopper.c']),

--- a/src/Products/PluginIndexes/README.txt
+++ b/src/Products/PluginIndexes/README.txt
@@ -47,34 +47,8 @@ Changes to Indexes:
  - new index type
- Changes to TextIndex:
-  - ZMI allows to select a different vocabulary. To use a vocabulary different
-    from the ZCatalogs default vocabulary 'Vocabulary' you must create a new
-    Vocabulary through the ZMI of the ZCatalog. After creating the vocabulary you
-    can choose the vocabulary on the ZMI management screen for the text index.
-  - the default operator might be overridden by specifying a new one
-    as 'operator' (see below)
-  - removed direct dependency from Splitter module. Splitter is now
-    acquired from used vocabulary
-  - usage of the 'textindex_operator' is deprecated
-  - lots of internal rework
-Changes to Vocabulary:
-  - added Splitter selection on the add formular
 Changes to ZCatalog
-  - Vocabulary.py moved to Products/PluginIndexes/TextIndex. A wrapper 
-    for backward compatibility is in place
  - added ZCatalogIndexes.py to provide access to indexes with pluggable
    index interface

--- a/src/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
+++ b/src/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-#############################################################################
-import re
-import string
-from BTrees.IIBTree import IISet, union, IITreeSet
-from BTrees.OIBTree import OIBTree
-from BTrees.IOBTree import IOBTree
-from BTrees.OOBTree import OOBTree
-from Products.PluginIndexes.common.randid import randid
-from Products.PluginIndexes.TextIndex.TextIndex import Op
-from Products.PluginIndexes.TextIndex.TextIndex import Or
-from Products.PluginIndexes.TextIndex.Lexicon import Lexicon
-from Products.PluginIndexes.TextIndex.Splitter import getSplitter
-class GlobbingLexicon(Lexicon):
-    """Lexicon which supports basic globbing function ('*' and '?').
-    This lexicon keeps several data structures around that are useful
-    for searching. They are:
-      '_lexicon' -- Contains the mapping from word => word_id
-      '_inverseLex' -- Contains the mapping from word_id => word
-      '_digrams' -- Contains a mapping from digram => word_id
-    Before going further, it is necessary to understand what a digram is,
-    as it is a core component of the structure of this lexicon.  A digram
-    is a two-letter sequence in a word.  For example, the word 'zope'
-    would be converted into the digrams::
-      ['$z', 'zo', 'op', 'pe', 'e$']
-    where the '$' is a word marker.  It is used at the beginning and end
-    of the words.  Those digrams are significant.
-    """
-    multi_wc = '*'
-    single_wc = '?'
-    eow = '$'
-    def __init__(self,useSplitter=None,extra=None):
-        self.clear()
-        self.useSplitter = useSplitter
-        self.splitterParams = extra
-        self.SplitterFunc = getSplitter(self.useSplitter)
-    def clear(self):
-        self._lexicon = OIBTree()
-        self._inverseLex = IOBTree()
-        self._digrams = OOBTree()
-    def _convertBTrees(self, threshold=200):
-        Lexicon._convertBTrees(self, threshold)
-        if type(self._digrams) is OOBTree: return
-        from BTrees.convert import convert
-        _digrams=self._digrams
-        self._digrams=OOBTree()
-        self._digrams._p_jar=self._p_jar
-        convert(_digrams, self._digrams, threshold, IITreeSet)
-    def createDigrams(self, word):
-        """Returns a list with the set of digrams in the word."""
-        word = '$'+word+'$'
-        return [ word[i:i+2] for i in range(len(word)-1)]
-    def getWordId(self, word):
-        """Provided 'word', return the matching integer word id."""
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-        else:
-            return self.assignWordId(word)
-    set = getWordId                     # Kludge for old code
-    def getWord(self, wid):
-        return self._inverseLex.get(wid, None)
-    def assignWordId(self, word):
-        """Assigns a new word id to the provided word, and return it."""
-        # Double check it's not in the lexicon already, and if it is, just
-        # return it.
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-        # Get word id. BBB Backward compat pain.
-        inverse=self._inverseLex
-        try: insert=inverse.insert
-        except AttributeError:
-            # we have an "old" BTree object
-            if inverse:
-                wid=inverse.keys()[-1]+1
-            else:
-                self._inverseLex=IOBTree()
-                wid=1
-            inverse[wid] = word
-        else:
-            # we have a "new" IOBTree object
-            wid=randid()
-            while not inverse.insert(wid, word):
-                wid=randid()
-        self._lexicon[word] = wid
-        # Now take all the digrams and insert them into the digram map.
-        for digram in self.createDigrams(word):
-            set = self._digrams.get(digram, None)
-            if set is None:
-                self._digrams[digram] = set = IISet()
-            set.insert(wid)
-        return wid
-    def get(self, pattern):
-        """ Query the lexicon for words matching a pattern."""
-        # single word pattern  produce a slicing problem below.
-        # Because the splitter throws away single characters we can
-        # return an empty tuple here.
-        if len(pattern)==1: return ()
-        wc_set = [self.multi_wc, self.single_wc]
-        digrams = []
-        globbing = 0
-        for i in range(len(pattern)):
-            if pattern[i] in wc_set:
-                globbing = 1
-                continue
-            if i == 0:
-                digrams.insert(i, (self.eow + pattern[i]) )
-                digrams.append((pattern[i] + pattern[i+1]))
-            else:
-                try:
-                    if pattern[i+1] not in wc_set:
-                        digrams.append( pattern[i] + pattern[i+1] )
-                except IndexError:
-                    digrams.append( (pattern[i] + self.eow) )
-        if not globbing:
-            result =  self._lexicon.get(pattern, None)
-            if result is None:
-                return ()
-            return (result, )
-        ## now get all of the intsets that contain the result digrams
-        result = None
-        for digram in digrams:
-            result=union(result, self._digrams.get(digram, None))
-        if not result:
-            return ()
-        else:
-            ## now we have narrowed the list of possible candidates
-            ## down to those words which contain digrams.  However,
-            ## some words may have been returned that match digrams,
-            ## but do not match 'pattern'.  This is because some words
-            ## may contain all matching digrams, but in the wrong
-            ## order.
-            expr = re.compile(self.createRegex(pattern))
-            words = []
-            hits = IISet()
-            for x in result:
-                if expr.match(self._inverseLex[x]):
-                    hits.insert(x)
-            return hits
-    def __getitem__(self, word):
-        """ """
-        return self.get(word)
-    def query_hook(self, q):
-        """expand wildcards"""
-        ListType = type([])
-        i = len(q) - 1
-        while i >= 0:
-            e = q[i]
-            if isinstance(e, ListType):
-                self.query_hook(e)
-            elif isinstance(e, Op):
-                pass
-            elif ( (self.multi_wc in e) or
-                   (self.single_wc in e) ):
-                wids = self.get(e)
-                words = []
-                for wid in wids:
-                    if words:
-                        words.append(Or)
-                    words.append(wid)
-                if not words:
-                    # if words is empty, return something that will make
-                    # textindex's __getitem__ return an empty result list
-                    words.append('')
-                q[i] = words
-            i = i - 1
-        return q
-    def Splitter(self, astring, words=None, encoding="latin1"):
-        """ wrap the splitter """
-        ## don't do anything, less efficient but there's not much
-        ## sense in stemming a globbing lexicon.
-        try:
-            return self.SplitterFunc(
-                    astring,
-                    words,
-                    encoding=encoding,
-                    singlechar=self.splitterParams.splitterSingleChars,
-                    indexnumbers=self.splitterParams.splitterIndexNumbers,
-                    casefolding=self.splitterParams.splitterCasefolding
-                    )
-        except:
-            return self.SplitterFunc(astring, words)
-    def createRegex(self, pat):
-        """Translate a PATTERN to a regular expression.
-        There is no way to quote meta-characters.
-        """
-        # Remove characters that are meaningful in a regex
-        if not isinstance(pat, unicode):
-            transTable = string.maketrans("", "")
-            result = string.translate(pat, transTable,
-                                      r'()&|!@#$%^{}\<>.')
-        else:
-            transTable={}
-            for ch in r'()&|!@#$%^{}\<>.':
-                transTable[ord(ch)]=None
-            result=pat.translate(transTable)
-        # First, deal with multi-character globbing
-        result = result.replace( '*', '.*')
-        # Next, we need to deal with single-character globbing
-        result = result.replace( '?', '.')
-        return "%s$" % result
--- a/src/Products/PluginIndexes/TextIndex/Lexicon.py
+++ b/src/Products/PluginIndexes/TextIndex/Lexicon.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-__doc__=""" Module breaks out Zope specific methods and behavior.  In
-addition, provides the Lexicon class which defines a word to integer
-mapping.
-"""
-from Acquisition import Implicit
-from BTrees.OIBTree import OIBTree
-from BTrees.IOBTree import IOBTree
-from BTrees.IIBTree import IISet
-from BTrees.IIBTree import IITreeSet
-from Persistence import Persistent
-from Products.PluginIndexes.common.randid import randid
-from Products.PluginIndexes.TextIndex.Splitter import getSplitter
-from Products.PluginIndexes.TextIndex.Splitter import splitterNames
-from types import StringType
-class Lexicon(Persistent, Implicit):
-    """Maps words to word ids and then some
-    The Lexicon object is an attempt to abstract vocabularies out of
-    Text indexes.  This abstraction is not totally cooked yet, this
-    module still includes the parser for the 'Text Index Query
-    Language' and a few other hacks.
-    """
-    # default for older objects
-    stop_syn={}
-    def __init__(self, stop_syn=None,useSplitter=None,extra=None):
-        self.clear()
-        if stop_syn is None:
-            self.stop_syn = {}
-        else:
-            self.stop_syn = stop_syn
-        self.useSplitter = splitterNames[0]
-        if useSplitter: self.useSplitter=useSplitter
-        self.splitterParams = extra
-        self.SplitterFunc = getSplitter(self.useSplitter)
-    def clear(self):
-        self._lexicon = OIBTree()
-        self._inverseLex = IOBTree()
-    def _convertBTrees(self, threshold=200):
-        if (type(self._lexicon) is OIBTree and
-            type(getattr(self, '_inverseLex', None)) is IOBTree):
-            return
-        from BTrees.convert import convert
-        lexicon=self._lexicon
-        self._lexicon=OIBTree()
-        self._lexicon._p_jar=self._p_jar
-        convert(lexicon, self._lexicon, threshold)
-        try:
-            inverseLex=self._inverseLex
-            self._inverseLex=IOBTree()
-        except AttributeError:
-            # older lexicons didn't have an inverse lexicon
-            self._inverseLex=IOBTree()
-            inverseLex=self._inverseLex
-        self._inverseLex._p_jar=self._p_jar
-        convert(inverseLex, self._inverseLex, threshold)
-    def set_stop_syn(self, stop_syn):
-        """ pass in a mapping of stopwords and synonyms.  Format is:
-        {'word' : [syn1, syn2, ..., synx]}
-        Vocabularies do not necesarily need to implement this if their
-        splitters do not support stemming or stoping.
-        """
-        self.stop_syn = stop_syn
-    def getWordId(self, word):
-        """ return the word id of 'word' """
-        wid=self._lexicon.get(word, None)
-        if wid is None:
-            wid=self.assignWordId(word)
-        return wid
-    set = getWordId
-    def getWord(self, wid):
-        """ post-2.3.1b2 method, will not work with unconverted lexicons """
-        return self._inverseLex.get(wid, None)
-    def assignWordId(self, word):
-        """Assigns a new word id to the provided word and returns it."""
-        # First make sure it's not already in there
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-        try: inverse=self._inverseLex
-        except AttributeError:
-            # woops, old lexicom wo wids
-            inverse=self._inverseLex=IOBTree()
-            for word, wid in self._lexicon.items():
-                inverse[wid]=word
-        wid=randid()
-        while not inverse.insert(wid, word):
-            wid=randid()
-        if isinstance(word, str):
-            self._lexicon[intern(word)] = wid
-        else:
-            self._lexicon[word] = wid
-        return wid
-    def get(self, key, default=None):
-        """Return the matched word against the key."""
-        r=IISet()
-        wid=self._lexicon.get(key, default)
-        if wid is not None: r.insert(wid)
-        return r
-    def __getitem__(self, key):
-        return self.get(key)
-    def __len__(self):
-        return len(self._lexicon)
-    def Splitter(self, astring, words=None, encoding = "latin1"):
-        """ wrap the splitter """
-        if words is None: words = self.stop_syn
-        try:
-            return self.SplitterFunc(
-                    astring,
-                    words,
-                    encoding=encoding,
-                    singlechar=self.splitterParams.splitterSingleChars,
-                    indexnumbers=self.splitterParams.splitterIndexNumbers,
-                    casefolding=self.splitterParams.splitterCasefolding
-                    )
-        except:
-            return self.SplitterFunc(astring, words)
-    def query_hook(self, q):
-        """ we don't want to modify the query cuz we're dumb """
-        return q
-stop_words=(
-    'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across',
-    'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone',
-    'along', 'already', 'also', 'although', 'always', 'am', 'among',
-    'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any',
-    'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around',
-    'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes',
-    'becoming', 'been', 'before', 'beforehand', 'behind', 'being',
-    'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both',
-    'bottom', 'but', 'by', 'can', 'cannot', 'cant', 'con', 'could',
-    'couldnt', 'cry', 'describe', 'detail', 'do', 'done', 'down', 'due',
-    'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else',
-    'elsewhere', 'empty', 'enough', 'even', 'ever', 'every', 'everyone',
-    'everything', 'everywhere', 'except', 'few', 'fifteen', 'fifty',
-    'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly',
-    'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get',
-    'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her',
-    'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers',
-    'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'i',
-    'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it',
-    'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least',
-    'less', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill',
-    'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must',
-    'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless',
-    'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not',
-    'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once',
-    'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our',
-    'ours', 'ourselves', 'out', 'over', 'own', 'per', 'perhaps',
-    'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem',
-    'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should',
-    'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some',
-    'somehow', 'someone', 'something', 'sometime', 'sometimes',
-    'somewhere', 'still', 'such', 'take', 'ten', 'than', 'that', 'the',
-    'their', 'them', 'themselves', 'then', 'thence', 'there',
-    'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these',
-    'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three',
-    'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too',
-    'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under',
-    'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well',
-    'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where',
-    'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon',
-    'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever',
-    'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without',
-    'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves',
-    )
-stop_word_dict={}
-for word in stop_words: stop_word_dict[word]=None
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/SETUP.cfg
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/SETUP.cfg
-<extension ISO_8859_1_Splitter>
-  source src/ISO_8859_1_Splitter.c
-</extension>
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
-from ISO_8859_1_Splitter import ISO_8859_1_Splitter
-def Splitter(txt,stopwords=None,encoding='latin1'):
-    return ISO_8859_1_Splitter(txt,stopwords)
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
-/*****************************************************************************
-  Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-  This software is subject to the provisions of the Zope Public License,
-  Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-  FOR A PARTICULAR PURPOSE
- ****************************************************************************/
-#include "Python.h"
-#include <ctype.h>
-#define ASSIGN(V,E) {PyObject *__e; __e=(E); Py_XDECREF(V); (V)=__e;}
-#define UNLESS(E) if(!(E))
-#define UNLESS_ASSIGN(V,E) ASSIGN(V,E) UNLESS(V)
-#define UPPERCASE "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-#define LOWERCASE "abcdefghijklmnopqrstuvwxyz"
-#define DIGITSETC "0123456789-"
-static unsigned char letdig[256];
-static unsigned char trtolower[256];
-typedef struct
-{
-    PyObject_HEAD
-    PyObject *text, *synstop;
-    char *here, *end;
-    int index;
-    int allow_single_chars;
-    int index_numbers;
-    int max_len;
-    int casefolding;
-}
-Splitter;
-static PyObject *next_word(Splitter *,char **,char **);
-static int myisalnum(int c)
-{
-    return letdig[(unsigned char)c];
-}
-static int mytolower(int c)
-{
-    return trtolower[(unsigned char)c];
-}
-static int myisspace(int c)
-{
-    if (myisalnum(c))
-        return 0;
-    return isspace(c);
-}
-static void initSplitterTrtabs(void)
-{
-    int i;
-    static int initialized=0;
-    if (initialized)
-        return;
-    initialized=1;
-    for (i=0;i<256;i++) {
-        letdig[i]=0;
-        trtolower[i]=i;
-    }
-    for (i=0;i<sizeof(UPPERCASE);i++) {
-        trtolower[(unsigned char)UPPERCASE[i]]=LOWERCASE[i];
-        letdig[(unsigned char)LOWERCASE[i]]=1;
-        letdig[(unsigned char)UPPERCASE[i]]=1;
-    }
-    for (i=0;i<sizeof(DIGITSETC);i++) {
-        letdig[(unsigned char)DIGITSETC[i]]=1;
-    }
-}
-static void
-Splitter_reset(Splitter *self)
-{
-    self->here = PyString_AsString(self->text);
-    self->index = -1;
-}
-static void
-Splitter_dealloc(Splitter *self)
-{
-    Py_XDECREF(self->text);
-    Py_XDECREF(self->synstop);
-    PyObject_DEL(self);
-}
-static int
-Splitter_length(Splitter *self)
-{
-    PyObject *res=0;
-    Splitter_reset(self);
-    while(1) {
-        UNLESS_ASSIGN(res,next_word(self,NULL,NULL)) return -1;
-        UNLESS(PyString_Check(res)) {
-            Py_DECREF(res);
-            break;
-        }
-    }
-    return self->index+1;
-}
-static PyObject *
-Splitter_split(Splitter*self)
-{
-    PyObject *list=NULL,*word=NULL;
-    UNLESS(list = PyList_New(0)) return NULL;
-    Splitter_reset(self);
-    while (1) {
-        Py_XDECREF(word);
-        UNLESS(word = next_word(self,NULL,NULL)) return NULL;
-        if (word == Py_None) {
-            return list;
-        }
-        PyList_Append(list,word);
-    }
-    return list;
-}
-static PyObject *
-Splitter_concat(Splitter *self, PyObject *other)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");
-    return NULL;
-}
-static PyObject *
-Splitter_repeat(Splitter *self, long n)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");
-    return NULL;
-}
-/*
-  Map an input word to an output word by applying standard
-  filtering/mapping words, including synonyms/stop words.
-  Input is a word.
-  Output is:
-     None -- The word is a stop word
-     sometext -- A replacement for the word
- */
-static PyObject *
-check_synstop(Splitter *self, PyObject *word)
-{
-    PyObject *value;
-    char *cword;
-    int len;
-    cword = PyString_AsString(word);
-    len = PyString_Size(word) - 1;
-    len = PyString_Size(word);
-    if(len < 2 && ! self->allow_single_chars)	/* Single-letter words are stop words! */
-    {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    /*************************************************************
-      Test whether a word has any letters.                       *
-                                                                 */
-    for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); )
-        ;
-    if (len < 0 && ! self->index_numbers) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    /*
-     * If no letters, treat it as a stop word.
-     *************************************************************/
-    Py_INCREF(word);
-    if (self->synstop == NULL)
-        return word;
-    while ((value = PyObject_GetItem(self->synstop, word)) &&
-            PyString_Check(value)) {
-        ASSIGN(word,value);
-        if(len++ > 100)
-            break;	/* Avoid infinite recurssion */
-    }
-    if (value == NULL) {
-        PyErr_Clear();
-        return word;
-    }
-    return value;		/* Which must be None! */
-}
-static PyObject *
-next_word(Splitter *self, char **startpos, char **endpos)
-{
-    char wbuf[256];
-    char *end, *here, *b;
-    int i = 0, c;
-    PyObject *pyword, *res;
-    here=self->here;
-    end=self->end;
-    b=wbuf;
-    while (here < end) {
-        /* skip hyphens */
-        if ((i > 0) && (*here == '-')) {
-            here++;
-            while (myisspace(*here) && (here < end))
-                here++;
-            continue;
-        }
-        if (self->casefolding)
-            c=mytolower(*here);
-        else
-            c = (*here);
-        /* Check to see if this character is part of a word */
-        if(myisalnum((unsigned char)c) || c=='/') { /* Found a word character */
-            if(startpos && i==0)
-                *startpos=here;
-            if(i++ < self->max_len)
-                *b++ = c;
-        } else if (i != 0) { /* We've found the end of a word */
-            if(i >= self->max_len)
-                i=self->max_len; /* "stem" the long word */
-            UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) {
-                self->here=here;
-                return NULL;
-            }
-            UNLESS(res = check_synstop(self, pyword)) {
-                self->here=here;
-                Py_DECREF(pyword);
-                return NULL;
-            }
-            if (res != Py_None) {
-                if(endpos)
-                    *endpos=here;
-                self->here=here;
-                Py_DECREF(pyword);
-                self->index++;
-                return res;
-            }
-            /* The word is a stopword, so ignore it */
-            Py_DECREF(res);
-            Py_DECREF(pyword);
-            i = 0;
-            b=wbuf;
-        }
-        here++;
-    }
-    self->here=here;
-    /* We've reached the end of the string */
-    if(i >= self->max_len)
-        i=self->max_len; /* "stem" the long word */
-    if (i == 0) {
-        /* No words */
-        self->here=here;
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) return NULL;
-    if(endpos)
-        *endpos=here;
-    res = check_synstop(self, pyword);
-    Py_DECREF(pyword);
-    if(PyString_Check(res))
-        self->index++;
-    return res;
-}
-static PyObject *
-Splitter_item(Splitter *self, int i)
-{
-    PyObject *word = NULL;
-    if (i <= self->index)
-        Splitter_reset(self);
-    while(self->index < i) {
-        Py_XDECREF(word);
-        UNLESS(word = next_word(self,NULL,NULL)) return NULL;
-        if (word == Py_None) {
-            Py_DECREF(word);
-            PyErr_SetString(PyExc_IndexError,
-                            "Splitter index out of range");
-            return NULL;
-        }
-    }
-    return word;
-}
-static PyObject *
-Splitter_slice(Splitter *self, int i, int j)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");
-    return NULL;
-}
-static PySequenceMethods Splitter_as_sequence = {
-            (inquiry)Splitter_length,        /*sq_length*/
-            (binaryfunc)Splitter_concat,     /*sq_concat*/
-            (intargfunc)Splitter_repeat,     /*sq_repeat*/
-            (intargfunc)Splitter_item,       /*sq_item*/
-            (intintargfunc)Splitter_slice,   /*sq_slice*/
-            (intobjargproc)0,                    /*sq_ass_item*/
-            (intintobjargproc)0,                 /*sq_ass_slice*/
-        };
-static PyObject *
-Splitter_pos(Splitter *self, PyObject *args)
-{
-    char *start, *end, *ctext;
-    PyObject *res;
-    int i;
-    UNLESS(PyArg_Parse(args, "i", &i)) return NULL;
-    if (i <= self->index)
-        Splitter_reset(self);
-    while(self->index < i) {
-        UNLESS(res=next_word(self, &start, &end)) return NULL;
-        if(PyString_Check(res)) {
-            self->index++;
-            Py_DECREF(res);
-            continue;
-        }
-        Py_DECREF(res);
-        PyErr_SetString(PyExc_IndexError, "Splitter index out of range");
-        return NULL;
-    }
-    ctext=PyString_AsString(self->text);
-    return Py_BuildValue("(ii)", start - ctext, end - ctext);
-}
-static PyObject *
-Splitter_indexes(Splitter *self, PyObject *args)
-{
-    PyObject *word, *r, *w=0, *index=0;
-    int i=0;
-    UNLESS(PyArg_ParseTuple(args,"O",&word)) return NULL;
-    UNLESS(r=PyList_New(0)) return NULL;
-    UNLESS(word=check_synstop(self, word)) goto err;
-    Splitter_reset(self);
-    while(1) {
-        UNLESS_ASSIGN(w,next_word(self, NULL, NULL)) goto err;
-        UNLESS(PyString_Check(w)) break;
-        if(PyObject_Compare(word,w)==0) {
-            UNLESS_ASSIGN(index,PyInt_FromLong(i)) goto err;
-            if(PyList_Append(r,index) < 0)
-                goto err;
-        }
-        i++;
-    }
-    Py_XDECREF(w);
-    Py_XDECREF(index);
-    return r;
-err:
-    Py_DECREF(r);
-    Py_XDECREF(index);
-    return NULL;
-}
-static struct PyMethodDef Splitter_methods[] =
-    {
-        { "split", (PyCFunction)Splitter_split, 0,
-            "split() -- Split the string in one run"
-        },
-        { "pos", (PyCFunction)Splitter_pos, 0,
-            "pos(index) -- Return the starting and ending position of a token"
-        },
-        { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
-          "indexes(word) -- Return al list of the indexes of word in the sequence",
-        },
-        { NULL, NULL }		/* sentinel */
-    };
-static PyObject *
-Splitter_getattr(Splitter *self, char *name)
-{
-    return Py_FindMethod(Splitter_methods, (PyObject *)self, name);
-}
-static char SplitterType__doc__[] = "";
-static PyTypeObject SplitterType = {
-                                       PyObject_HEAD_INIT(NULL)
-                                       0,                                 /*ob_size*/
-                                       "Splitter",                    /*tp_name*/
-                                       sizeof(Splitter),              /*tp_basicsize*/
-                                       0,                                 /*tp_itemsize*/
-                                       /* methods */
-                                       (destructor)Splitter_dealloc,  /*tp_dealloc*/
-                                       (printfunc)0,                      /*tp_print*/
-                                       (getattrfunc)Splitter_getattr, /*tp_getattr*/
-                                       (setattrfunc)0,                    /*tp_setattr*/
-                                       (cmpfunc)0,                        /*tp_compare*/
-                                       (reprfunc)0,                       /*tp_repr*/
-                                       0,                                 /*tp_as_number*/
-                                       &Splitter_as_sequence,         /*tp_as_sequence*/
-                                       0,                                 /*tp_as_mapping*/
-                                       (hashfunc)0,                       /*tp_hash*/
-                                       (ternaryfunc)0,                    /*tp_call*/
-                                       (reprfunc)0,                       /*tp_str*/
-                                       /* Space for future expansion */
-                                       0L,0L,0L,0L,
-                                       SplitterType__doc__ /* Documentation string */
-                                   };
-static char *splitter_args[]={"doc","synstop","encoding","singlechar","indexnumbers","maxlen","casefolding",NULL};
-static PyObject *
-get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
-{
-    Splitter *self;
-    PyObject *doc, *synstop = NULL;
-    char * encoding="latin1";
-    int single_char = 0;
-    int index_numbers = 0;
-    int max_len=64;
-    int casefolding=1;
-    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,&doc,&synstop,&encoding,&single_char,&index_numbers,&max_len,&casefolding)) return NULL;
-    if (index_numbers<0 || index_numbers>1) {
-        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
-        return NULL;
-    }
-    if (casefolding<0 || casefolding>1) {
-        PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1");
-        return NULL;
-    }
-    if (single_char<0 || single_char>1) {
-        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
-        return NULL;
-    }
-    if (max_len<1 || max_len>128) {
-        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
-        return NULL;
-    }
-    UNLESS(self = PyObject_NEW(Splitter, &SplitterType)) return NULL;
-    if(synstop) {
-        self->synstop=synstop;
-        Py_INCREF(synstop);
-    } else
-        self->synstop=NULL;
-    UNLESS(self->text = PyObject_Str(doc)) goto err;
-    UNLESS(self->here=PyString_AsString(self->text)) goto err;
-    self->end = self->here + PyString_Size(self->text);
-    self->allow_single_chars    = single_char;
-    self->index_numbers         = index_numbers;
-    self->max_len               = max_len;
-    self->casefolding           = casefolding;
-    self->index = -1;
-    return (PyObject*)self;
-err:
-    Py_DECREF(self);
-    return NULL;
-}
-static struct PyMethodDef Splitter_module_methods[] =
-    {
-        { "ISO_8859_1_Splitter", (PyCFunction)get_Splitter, METH_VARARGS|METH_KEYWORDS,
-          "ISO_8859_1_Splitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen][,casefolding]) -- Return a word splitter"
-        },
-        { NULL, NULL }
-    };
-static char Splitter_module_documentation[] =
-    "Parse source strings into sequences of words\n"
-    "\n"
-    "for use in an inverted index\n"
-    "\n"
-    "$Id$\n"
-    ;
-void
-initISO_8859_1_Splitter(void)
-{
-    PyObject *m;
-    /* Create the module and add the functions */
-    initSplitterTrtabs();
-    m = Py_InitModule4("ISO_8859_1_Splitter", Splitter_module_methods,
-                       Splitter_module_documentation,
-                       (PyObject*)NULL,PYTHON_API_VERSION);
-}
--- a/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/SETUP.cfg
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/SETUP.cfg
-<extension UnicodeSplitter>
-  source src/UnicodeSplitter.c
-</extension>
--- a/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
-from UnicodeSplitter import UnicodeSplitter as Splitter
--- a/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
-/*****************************************************************************
-  Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-  This software is subject to the provisions of the Zope Public License,
-  Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-  FOR A PARTICULAR PURPOSE
- ****************************************************************************/
-#include "Python.h"
-#ifndef min
-#define min(a,b) ((a)<(b)?(a):(b))
-#endif
-typedef struct
-{
-    PyObject_HEAD
-    PyObject *list;
-    PyObject *synstop;
-    int max_len;
-    int allow_single_chars;
-    int index_numbers;
-    int casefolding;
-}
-Splitter;
-static
-PyUnicodeObject *prepareString(Splitter *self, PyUnicodeObject *o);
-static PyObject *checkSynword(Splitter *self, PyObject *word)
-{
-    /* Always returns a borrowed reference */
-    PyObject *value;
-    if (PyUnicode_GetSize(word)==1 && ! self->allow_single_chars) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    if (self->synstop) {
-        value = PyDict_GetItem(self->synstop,word);
-        if (value != NULL) {
-          return value;
-        }
-    }
-    return word;
-}
-static void
-Splitter_dealloc(Splitter *self)
-{
-    Py_XDECREF(self->list);
-    Py_XDECREF(self->synstop);
-    PyObject_DEL(self);
-}
-static int
-Splitter_length(Splitter *self)
-{
-    return PyList_Size(self->list);
-}
-static PyObject *
-Splitter_concat(Splitter *self, PyObject *other)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");
-    return NULL;
-}
-static PyObject *
-Splitter_repeat(Splitter *self, long n)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");
-    return NULL;
-}
-static PyObject *
-Splitter_item(Splitter *self, int i)
-{
-  PyObject *item;
-  item = PyList_GetItem(self->list, i);
-  Py_XINCREF(item);  /* Promote borrowed ref unless exception */
-  return item;
-}
-static PyObject * 
-Splitter_split(Splitter *self) {
-    Py_INCREF(self->list);
-    return self->list;
-}
-static PyObject *
-Splitter_indexes(Splitter *self, PyObject *args)
-{
-    int i=0, size;
-    PyObject *word=NULL,*item=NULL,*r=NULL,*index=NULL;
-    if (! (PyArg_ParseTuple(args,"O",&word))) return NULL;
-    if (! (r=PyList_New(0))) return NULL;
-    size = PyList_Size(self->list);
-    for (i=0;i<size;i++) {
-        item=PyList_GET_ITEM(self->list,i);
-        if (PyUnicode_Compare(word,item)==0) {
-            index=PyInt_FromLong(i);
-            if(!index) return NULL;
-            PyList_Append(r,index);
-        }
-    }
-    return r;
-}
-static PyObject *
-Splitter_slice(Splitter *self, int i, int j)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");
-    return NULL;
-}
-static PySequenceMethods Splitter_as_sequence = {
-    (inquiry)Splitter_length,        /*sq_length*/
-    (binaryfunc)Splitter_concat,     /*sq_concat*/
-    (intargfunc)Splitter_repeat,     /*sq_repeat*/
-    (intargfunc)Splitter_item,       /*sq_item*/
-    (intintargfunc)Splitter_slice,   /*sq_slice*/
-    (intobjargproc)0,                    /*sq_ass_item*/
-    (intintobjargproc)0,                 /*sq_ass_slice*/
-};
-static struct PyMethodDef Splitter_methods[] =
-    {
-        { "split", (PyCFunction) Splitter_split, 0,
-          "split() -- Split string in one run" },
-        { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
-          "indexes(word) -- Return a list of the indexes of word in the sequence",
-        },
-        { NULL, NULL }		/* sentinel */
-    };
-static PyObject *
-Splitter_getattr(Splitter *self, char *name)
-{
-    return Py_FindMethod(Splitter_methods, (PyObject *)self, name);
-}
-static char SplitterType__doc__[] = "";
-static PyTypeObject SplitterType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                                 /*ob_size*/
-    "Splitter",                    /*tp_name*/
-    sizeof(Splitter),              /*tp_basicsize*/
-    0,                                 /*tp_itemsize*/
-    /* methods */
-    (destructor)Splitter_dealloc,  /*tp_dealloc*/
-    (printfunc)0,                      /*tp_print*/
-    (getattrfunc)Splitter_getattr, /*tp_getattr*/
-    (setattrfunc)0,                    /*tp_setattr*/
-    (cmpfunc)0,                        /*tp_compare*/
-    (reprfunc)0,                       /*tp_repr*/
-    0,                                 /*tp_as_number*/
-    &Splitter_as_sequence,         /*tp_as_sequence*/
-    0,                                 /*tp_as_mapping*/
-    (hashfunc)0,                       /*tp_hash*/
-    (ternaryfunc)0,                    /*tp_call*/
-    (reprfunc)0,                       /*tp_str*/
-    /* Space for future expansion */
-    0L,0L,0L,0L,
-    SplitterType__doc__ /* Documentation string */
-};
-static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
-{
-    PyObject *word,*synword;
-    PyUnicodeObject * doc1;
-    Py_UNICODE *s;
-    int len = doc->length;
-    int inside_word=0;
-    int i=0;
-    int start=0;
-    doc1 = prepareString(self,doc);
-    if (doc1 == NULL)
-      return -1;
-    s=doc1->str;
-    self->list = PyList_New(0);
-    for (i = 0; i < len; s++, i++) {
-        register Py_UNICODE ch;
-        ch = *s;
-        if (!inside_word) {
-            if (self->index_numbers) {
-                if (Py_UNICODE_ISALNUM(ch)) {
-                    inside_word=1;
-                    start = i;
-                }
-            } else {
-                if (Py_UNICODE_ISALPHA(ch)) {
-                    inside_word=1;
-                    start = i;
-                }
-            }
-        } else {
-            if (!(Py_UNICODE_ISALNUM(ch) || ch=='/' || ch=='_' || ch=='-')) {
-                inside_word = 0;
-                word = PySequence_GetSlice((PyObject *)doc1,start,
-                                           min(i, start + self->max_len));
-                if (word==NULL)
-                  goto err;
-                synword = checkSynword(self,word);
-                if (synword != Py_None) {
-                  PyList_Append(self->list,synword);
-                }
-                start =  0;
-#ifdef DEBUG
-                PyObject_Print(word,stdout,0);
-                fflush(stdout);
-#endif
-                Py_DECREF(word);
-            }
-        }
-    }
-    if (inside_word) {
-        word = PySequence_GetSlice((PyObject *)doc1,start,
-                                   min(len, start + self->max_len));
-        if (word==NULL)
-          goto err;
-        synword = checkSynword(self,word);
-        if (synword != Py_None) {
-          PyList_Append(self->list,synword);
-        }
-        Py_DECREF(word);
-    }
-#ifdef DEBUG
-    PyObject_Print(self->list,stdout,0);
-    fflush(stdout);
-#endif
-    Py_DECREF(doc1);
-    return 1;
- err:
-    Py_DECREF(doc1);
-    return -1;
-}
-static
-void fixlower(PyUnicodeObject *self)
-{
-    int len = self->length;
-    Py_UNICODE *s = self->str;
-    while (len-- > 0) {
-        register Py_UNICODE ch;
-        ch = Py_UNICODE_TOLOWER(*s);
-        if (ch != *s) *s = ch;
-        s++;
-    }
-}
-static
-PyUnicodeObject *prepareString(Splitter *self,PyUnicodeObject *o)
-{
-    PyUnicodeObject *u;
-    u = (PyUnicodeObject*) PyUnicode_FromUnicode(o->str, o->length);
-    if (u != NULL){
-        if (self->casefolding)
-          fixlower(u);
-    }
-    return  u;
-}
-static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen","casefolding",NULL};
-static PyObject *
-newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
-{
-    Splitter *self=NULL;
-    PyObject *doc=NULL, *unicodedoc=NULL,*synstop=NULL;
-    char *encoding = "latin1";
-    int index_numbers = 0;
-    int max_len=64;
-    int single_char = 0;
-    int casefolding=1;
-    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len,&casefolding))) return NULL;
-#ifdef DEBUG
-    puts("got text");
-    PyObject_Print(doc,stdout,0);
-    fflush(stdout);
-#endif
-    if (index_numbers<0 || index_numbers>1) {
-        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
-        return NULL;
-    }
-    if (casefolding<0 || casefolding>1) {
-        PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1");
-        return NULL;
-    }
-    if (single_char<0 || single_char>1) {
-        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
-        return NULL;
-    }
-    if (max_len<1 || max_len>128) {
-        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
-        return NULL;
-    }
-    if (PyString_Check(doc)) {
-        unicodedoc = PyUnicode_FromEncodedObject(doc,encoding,"strict");
-        if (unicodedoc ==NULL) {
-            PyErr_SetString(PyExc_UnicodeError, "Problem converting encoded string");
-            return NULL;
-        }
-    } else if( PyUnicode_Check(doc)) {
-        unicodedoc = doc;
-        Py_INCREF(unicodedoc);
-    } else {
-        PyErr_SetString(PyExc_TypeError, "first argument is neither string nor unicode.");
-        return NULL;
-    }
-    if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL;
-    if (synstop) {
-        self->synstop = synstop;
-        Py_INCREF(synstop);
-    } else  self->synstop=NULL;
-    self->index_numbers      = index_numbers;
-    self->max_len            = max_len;
-    self->allow_single_chars = single_char;
-    self->casefolding        = casefolding;
-    if ((splitUnicodeString(self,(PyUnicodeObject *)unicodedoc)) < 0)
-      goto err;
-    Py_DECREF(unicodedoc);
-    return (PyObject*)self;
-err:
-    Py_DECREF(self);
-    Py_DECREF(unicodedoc);
-    return NULL;
-}
-static struct PyMethodDef Splitter_module_methods[] =
-    {
-        { "UnicodeSplitter", (PyCFunction)newSplitter,
-          METH_VARARGS|METH_KEYWORDS,
-          "UnicodeSplitter(doc[,synstop][,encoding='latin1'][,indexnumbers][,maxlen][,singlechar][,casefolding]) "
-          "-- Return a word splitter"
-        },
-        { NULL, NULL }
-    };
-static char Splitter_module_documentation[] =
-    "Parse source (unicode) string into sequences of words\n"
-    "\n"
-    "for use in an inverted index\n"
-    "\n"
-    "$Id$\n"
-    ;
-void
-initUnicodeSplitter(void)
-{
-    PyObject *m, *d;
-    char *rev="$Revision: 1.16 $";
-    /* Create the module and add the functions */
-    m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods,
-                       Splitter_module_documentation,
-                       (PyObject*)NULL,PYTHON_API_VERSION);
-    /* Add some symbolic constants to the module */
-    d = PyModule_GetDict(m);
-    PyDict_SetItemString(d, "__version__",
-                         PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
-    if (PyErr_Occurred()) Py_FatalError("can't initialize module Splitter");
-}
--- a/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/__init__.py
-# Nothing to see here.
--- a/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/testUnicodeSplitter.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/tests/testUnicodeSplitter.py
-# -*- coding: ISO-8859-1 -*-
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-import os,sys,unittest
-from Products.PluginIndexes.TextIndex.Splitter.UnicodeSplitter.UnicodeSplitter \
-     import UnicodeSplitter
-class UnicodeSplitterTests(unittest.TestCase):
-    def setUp(self):
-        texts = ('The quick brown fox jumps over the lazy dog',
-                 'Bei den dreitgigen Angriffen seien auch bis'
-                 ' auf einen alle Flugpltze der Taliban zerstrt worden',
-            )
-        self.testdata = []
-        for t in texts:
-            uniLst = [unicode(x,'latin1') for x in t.lower().split(' ')]
-            self.testdata.append( (t, uniLst) )
-    def testSimpleSplit(self):
-        """ testing splitter functionality """
-        for t,expected in self.testdata:
-            fields = list(UnicodeSplitter(t))
-            assert fields == expected, "%s vs %s" % (fields,expected)
-        return 0
-    def testStopwords(self):
-        """ testing splitter with stopwords """
-        text = 'The quick brown fox jumps over The lazy dog'
-        expected = [ u'quick',u'brown',u'fox',u'jumps',u'over',u'lazy',u'cat']
-        sw_dict = {'the':None,'dog':'cat'}
-        splitter = UnicodeSplitter(text,sw_dict)
-        fields = list(splitter)
-        self.assertEquals(fields, expected)
-        self.assertEquals(splitter.indexes('jumps'), [3])
-def test_suite():
-    return unittest.makeSuite(UnicodeSplitterTests)
-def debug():
-    return test_suite().debug()
-def pdebug():
-    import pdb
-    pdb.run('debug()')
-def main():
-    unittest.TextTestRunner().run( test_suite() )
-if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        globals()[sys.argv[1]]()
-    else:
-        main()
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/SETUP.cfg
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/SETUP.cfg
-<extension ZopeSplitter>
-  source src/ZopeSplitter.c
-</extension>
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
-from ZopeSplitter import ZopeSplitter
-def Splitter(txt,stopwords={},encoding="latin1"):
-    return ZopeSplitter(txt,stopwords)
--- a/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
-/*****************************************************************************
-  Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-  This software is subject to the provisions of the Zope Public License,
-  Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-  FOR A PARTICULAR PURPOSE
- ****************************************************************************/
-#include "Python.h"
-#include <ctype.h>
-#define ASSIGN(V,E) {PyObject *__e; __e=(E); Py_XDECREF(V); (V)=__e;}
-#define UNLESS(E) if(!(E))
-#define UNLESS_ASSIGN(V,E) ASSIGN(V,E) UNLESS(V)
-typedef struct
-{
-    PyObject_HEAD
-    PyObject *text, *synstop;
-    char *here, *end;
-    int index;
-    int allow_single_chars;
-    int index_numbers;
-    int max_len;
-    int casefolding;
-}
-Splitter;
-static PyObject *next_word(Splitter *, char **, char **);
-static void
-Splitter_reset(Splitter *self)
-{
-    self->here = PyString_AsString(self->text);
-    self->index = -1;
-}
-static void
-Splitter_dealloc(Splitter *self)
-{
-    Py_XDECREF(self->text);
-    Py_XDECREF(self->synstop);
-    PyObject_DEL(self);
-}
-static int
-Splitter_length(Splitter *self)
-{
-    PyObject *res=0;
-    Splitter_reset(self);
-    while(1) {
-        UNLESS_ASSIGN(res,next_word(self,NULL,NULL)) return -1;
-        UNLESS(PyString_Check(res)) {
-            Py_DECREF(res);
-            break;
-        }
-    }
-    return self->index+1;
-}
-static PyObject *
-Splitter_concat(Splitter *self, PyObject *other)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");
-    return NULL;
-}
-static PyObject *
-Splitter_repeat(Splitter *self, long n)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");
-    return NULL;
-}
-/*
-  Map an input word to an output word by applying standard
-  filtering/mapping words, including synonyms/stop words.
-  Input is a word.
-  Output is:
-     None -- The word is a stop word
-     sometext -- A replacement for the word
- */
-static PyObject *
-check_synstop(Splitter *self, PyObject *word)
-{
-    PyObject *value;
-    char *cword;
-    int len;
-    cword = PyString_AS_STRING(word);
-    len = PyString_GET_SIZE(word);
-    if (len < 2 && !self->allow_single_chars)	
-    /* Single-letter words are stop words! */
-    {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    /*************************************************************
-      Test whether a word has any letters.                       *
-                                                                 */
-    for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); )
-        ;
-    if (len < 0 && ! self->index_numbers) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    /*
-     * If no letters, treat it as a stop word.
-     *************************************************************/
-    Py_INCREF(word);
-    if (self->synstop == NULL)
-        return word;
-    len = 0;
-    while ((value = PyObject_GetItem(self->synstop, word)) &&
-            PyString_Check(value)) {
-	Py_DECREF(word);
-	word = value;
-        if (len++ > 100)
-            break;	/* Avoid infinite recurssion */
-    }
-    if (value == NULL) {
-        PyErr_Clear();
-        return word;
-    }
-    return value;		/* Which must be None! */
-}
-static PyObject *
-next_word(Splitter *self, char **startpos, char **endpos)
-{
-    char wbuf[256];
-    char *end, *here, *b;
-    int i = 0, c;
-    PyObject *pyword, *res;
-    here=self->here;
-    end=self->end;
-    b=wbuf;
-    while (here < end) {
-        /* skip hyphens */
-        if ((i > 0) && (*here == '-')) {
-            here++;
-            while (isspace((unsigned char) *here) && (here < end))
-                here++;
-            continue;
-        }
-        if (self->casefolding) 
-            c = tolower((unsigned char) *here);
-        else
-            c = (unsigned char) *here;
-        /* Check to see if this character is part of a word */
-        if (isalnum((unsigned char)c) || c == '/' || c == '_') { 
-            /* Found a word character */
-            if (startpos && i == 0)
-                *startpos = here;
-            if (i++ < self->max_len)
-                *b++ = c;
-        } else if (i != 0) { /* We've found the end of a word */
-            if (i >= self->max_len)
-                i =self->max_len; /* "stem" the long word */
-            UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) {
-                self->here = here;
-                return NULL;
-            }
-            UNLESS(res = check_synstop(self, pyword)) {
-                self->here = here;
-                Py_DECREF(pyword);
-                return NULL;
-            }
-            if (res != Py_None) {
-                if (endpos)
-                    *endpos = here;
-                self->here = here;
-                Py_DECREF(pyword);
-                self->index++;
-                return res;
-            }
-            /* The word is a stopword, so ignore it */
-            Py_DECREF(res);
-            Py_DECREF(pyword);
-            i = 0;
-            b = wbuf;
-        }
-        here++;
-    }
-    self->here=here;
-    /* We've reached the end of the string */
-    if (i >= self->max_len)
-        i = self->max_len; /* "stem" the long word */
-    if (i == 0) {
-        /* No words */
-        self->here=here;
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) return NULL;
-    if(endpos)
-        *endpos=here;
-    res = check_synstop(self, pyword);
-    Py_DECREF(pyword);
-    if (PyString_Check(res))
-        self->index++;
-    return res;
-}
-static PyObject *
-Splitter_item(Splitter *self, int i)
-{
-    PyObject *word = NULL;
-    if (i <= self->index)
-        Splitter_reset(self);
-    while(self->index < i) {
-        Py_XDECREF(word);
-        UNLESS(word = next_word(self,NULL,NULL)) return NULL;
-        if (word == Py_None) {
-            Py_DECREF(word);
-            PyErr_SetString(PyExc_IndexError,
-                            "Splitter index out of range");
-            return NULL;
-        }
-    }
-    return word;
-}
-static PyObject *
-Splitter_split(Splitter*self)
-{
-    PyObject *list=NULL,*word=NULL;
-    UNLESS(list = PyList_New(0)) return NULL;
-    Splitter_reset(self);
-    while (1) {
-        Py_XDECREF(word);
-        UNLESS(word = next_word(self, NULL, NULL)) return NULL;
-        if (word == Py_None) {
-            return list;
-        }
-        PyList_Append(list,word);
-    }
-    return list;
-}
-static PyObject *
-Splitter_slice(Splitter *self, int i, int j)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");
-    return NULL;
-}
-static PySequenceMethods Splitter_as_sequence = {
-    (inquiry)Splitter_length,        /*sq_length*/
-    (binaryfunc)Splitter_concat,     /*sq_concat*/
-    (intargfunc)Splitter_repeat,     /*sq_repeat*/
-    (intargfunc)Splitter_item,       /*sq_item*/
-    (intintargfunc)Splitter_slice,   /*sq_slice*/
-    (intobjargproc)0,                    /*sq_ass_item*/
-    (intintobjargproc)0,                 /*sq_ass_slice*/
-};
-static PyObject *
-Splitter_pos(Splitter *self, PyObject *args)
-{
-    char *start, *end, *ctext;
-    PyObject *res;
-    int i;
-    UNLESS(PyArg_Parse(args, "i", &i)) return NULL;
-    if (i <= self->index)
-        Splitter_reset(self);
-    while(self->index < i) {
-        UNLESS(res=next_word(self, &start, &end)) return NULL;
-        if(PyString_Check(res)) {
-            self->index++;
-            Py_DECREF(res);
-            continue;
-        }
-        Py_DECREF(res);
-        PyErr_SetString(PyExc_IndexError, "Splitter index out of range");
-        return NULL;
-    }
-    ctext=PyString_AsString(self->text);
-    return Py_BuildValue("(ii)", start - ctext, end - ctext);
-}
-static PyObject *
-Splitter_indexes(Splitter *self, PyObject *args)
-{
-    PyObject *word, *r, *w=0, *index=0;
-    int i=0;
-    UNLESS(PyArg_ParseTuple(args,"O",&word)) return NULL;
-    UNLESS(r=PyList_New(0)) return NULL;
-    UNLESS(word=check_synstop(self, word)) goto err;
-    Splitter_reset(self);
-    while(1) {
-        UNLESS_ASSIGN(w,next_word(self, NULL, NULL)) goto err;
-        UNLESS(PyString_Check(w)) break;
-        if(PyObject_Compare(word,w)==0) {
-            UNLESS_ASSIGN(index,PyInt_FromLong(i)) goto err;
-            if(PyList_Append(r,index) < 0)
-                goto err;
-        }
-        i++;
-    }
-    Py_XDECREF(w);
-    Py_XDECREF(index);
-    return r;
-err:
-    Py_DECREF(r);
-    Py_XDECREF(index);
-    return NULL;
-}
-static struct PyMethodDef Splitter_methods[] =
-    {
-        { "split", (PyCFunction)Splitter_split, 0,
-            "split() -- Split complete string in one run"
-        },
-        { "pos", (PyCFunction)Splitter_pos, 0,
-          "pos(index) -- Return the starting and ending position of a token"
-        },
-        { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
-          "indexes(word) -- Return a list of the indexes of word in the sequence",
-        },
-        { NULL, NULL }		/* sentinel */
-    };
-static PyObject *
-Splitter_getattr(Splitter *self, char *name)
-{
-    return Py_FindMethod(Splitter_methods, (PyObject *)self, name);
-}
-static char SplitterType__doc__[] = "";
-static PyTypeObject SplitterType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                                 /*ob_size*/
-    "Splitter",                    /*tp_name*/
-    sizeof(Splitter),              /*tp_basicsize*/
-    0,                                 /*tp_itemsize*/
-    /* methods */
-    (destructor)Splitter_dealloc,  /*tp_dealloc*/
-    (printfunc)0,                      /*tp_print*/
-    (getattrfunc)Splitter_getattr, /*tp_getattr*/
-    (setattrfunc)0,                    /*tp_setattr*/
-    (cmpfunc)0,                        /*tp_compare*/
-    (reprfunc)0,                       /*tp_repr*/
-    0,                                 /*tp_as_number*/
-    &Splitter_as_sequence,         /*tp_as_sequence*/
-    0,                                 /*tp_as_mapping*/
-    (hashfunc)0,                       /*tp_hash*/
-    (ternaryfunc)0,                    /*tp_call*/
-    (reprfunc)0,                       /*tp_str*/
-    /* Space for future expansion */
-    0L,0L,0L,0L,
-    SplitterType__doc__ /* Documentation string */
-};
-static char *splitter_args[]={"doc","synstop","encoding","singlechar","indexnumbers","maxlen","casefolding",NULL};
-static PyObject *
-get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
-{
-    Splitter *self;
-    PyObject *doc, *synstop = NULL;
-    char *encoding = "latin1";
-    int single_char = 0;
-    int index_numbers = 0;
-    int max_len= 64;
-    int casefolding = 1;
-    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,
-                                       &doc,
-                                       &synstop,
-                                       &encoding,
-                                       &single_char,
-                                       &index_numbers,
-                                       &max_len,
-                                       &casefolding
-                                    )) return NULL;
-    if (index_numbers<0 || index_numbers>1) {
-        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
-        return NULL;
-    }
-    if (casefolding<0 || casefolding>1) {
-        PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1");
-        return NULL;
-    }
-    if (single_char<0 || single_char>1) {
-        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
-        return NULL;
-    }
-    if (max_len<1 || max_len>128) {
-        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
-        return NULL;
-    }
-    UNLESS(self = PyObject_NEW(Splitter, &SplitterType)) return NULL;
-    if(synstop) {
-        self->synstop=synstop;
-        Py_INCREF(synstop);
-    } else
-        self->synstop=NULL;
-    UNLESS(self->text = PyObject_Str(doc)) goto err;
-    UNLESS(self->here = PyString_AS_STRING(self->text)) goto err;
-    self->end = self->here + PyString_GET_SIZE(self->text);
-    self->index = -1;
-    self->allow_single_chars = single_char;
-    self->index_numbers      = index_numbers;
-    self->max_len            = max_len;
-    self->casefolding        = casefolding;
-    return (PyObject*)self;
-err:
-    Py_DECREF(self);
-    return NULL;
-}
-static struct PyMethodDef Splitter_module_methods[] =
-    {
-        { "ZopeSplitter", (PyCFunction)get_Splitter, METH_VARARGS|METH_KEYWORDS,
-            "ZopeSplitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen][,casefolding]) -- Return a word splitter"
-        },
-        { NULL, NULL }
-    };
-static char Splitter_module_documentation[] =
-    "Parse source strings into sequences of words\n"
-    "\n"
-    "for use in an inverted index\n"
-    "\n"
-    "$Id$\n"
-    ;
-void
-initZopeSplitter(void)
-{
-    /* Create the module and add the functions */
-    Py_InitModule4("ZopeSplitter", Splitter_module_methods,
-		   Splitter_module_documentation, NULL, PYTHON_API_VERSION);
-}
--- a/src/Products/PluginIndexes/TextIndex/Splitter/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/__init__.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-#############################################################################
-import os,sys,exceptions
-availableSplitters = (
-  ("ZopeSplitter" , "Zope Default Splitter"),
-  ("ISO_8859_1_Splitter" , "Werner Strobls ISO-8859-1 Splitter"),
-  ("UnicodeSplitter" , "Unicode-aware splitter")
-)
-splitterNames = map(lambda x: x[0],availableSplitters)
-def getSplitter(name=None):
-    if not name in splitterNames and name:
-        raise exceptions.RuntimeError, "No such splitter '%s'" % name
-    if not name: name = splitterNames[0]
-    if not vars().has_key(name):
-        exec( "from %s.%s import %s" % (name,name,name))
-    return vars()[name]
--- a/src/Products/PluginIndexes/TextIndex/Splitter/setup.py
+++ b/src/Products/PluginIndexes/TextIndex/Splitter/setup.py
-#!/usr/bin/env python
-from distutils.core import setup,Extension
-import os,exceptions,commands,sys
-CFLAGS = []
-LFLAGS = []
-LIBS=[]
-setup (name = "Splitter",
-    version = "1.0",
-    description = "Splitters for Zope 2.5",
-    author = "Andreas Jung",
-    author_email = "andreas@zope.com",
-    url = "http://www.zope.org/...",
-    ext_modules=[
-        Extension("ZopeSplitter",['ZopeSplitter/src/ZopeSplitter.c']), \
-        Extension("ISO_8859_1_Splitter",['ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c']),   \
-        Extension("UnicodeSplitter",['UnicodeSplitter/src/UnicodeSplitter.c'])   \
-        ]
-      )
--- a/src/Products/PluginIndexes/TextIndex/TextIndex.py
+++ b/src/Products/PluginIndexes/TextIndex/TextIndex.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Deprecated text index. Please use ZCTextIndex instead.
-$Id$
-"""
-from cgi import escape
-from logging import getLogger
-import operator
-import re
-import warnings
-from types import *
-from Acquisition import Implicit
-from App.special_dtml import DTMLFile
-from OFS.SimpleItem import SimpleItem
-from BTrees.IIBTree import difference
-from BTrees.IIBTree import IIBTree
-from BTrees.IIBTree import IIBucket
-from BTrees.IIBTree import IISet
-from BTrees.IIBTree import weightedIntersection
-from BTrees.IOBTree import IOBTree
-from BTrees.OIBTree import OIBTree
-from Persistence import Persistent
-from zope.interface import implements
-from Products.PluginIndexes.common import safe_callable
-from Products.PluginIndexes.common.ResultList import ResultList
-from Products.PluginIndexes.common.util import parseIndexRequest
-from Products.PluginIndexes.interfaces import IPluggableIndex
-from Products.PluginIndexes.interfaces import ITextIndex
-from Products.PluginIndexes.TextIndex.Lexicon import Lexicon
-LOG = getLogger('TextIndex')
-class Op:
-    def __init__(self, name):
-        self.name = name
-    def __repr__(self):
-        return self.name
-    __str__ = __repr__
-AndNot      = Op('andnot')
-And         = Op('and')
-Or          = Op('or')
-Near        = Op('...')
-QueryError  = 'TextIndex.QueryError'
-operator_dict = {'andnot': AndNot, 'and': And, 'or': Or,
-                 '...': Near, 'near': Near,
-                 AndNot: AndNot, And: And, Or: Or, Near: Near}
-class TextIndex(Persistent, Implicit, SimpleItem):
-    """Full-text index.
-    There is a ZCatalog UML model that sheds some light on what is
-    going on here.  '_index' is a BTree which maps word ids to mapping
-    from document id to score.  Something like:
-      {'bob' : {1 : 5, 2 : 3, 42 : 9}}
-      {'uncle' : {1 : 1}}
-    The '_unindex' attribute is a mapping from document id to word
-    ids.  This mapping allows the catalog to unindex an object:
-      {42 : ('bob', 'is', 'your', 'uncle')
-    This isn't exactly how things are represented in memory, many
-    optimizations happen along the way.
-    """
-    implements(ITextIndex, IPluggableIndex)
-    meta_type='TextIndex'
-    query_options = ('query', 'operator')
-    manage_options= (
-        {'label': 'Settings',
-         'action': 'manage_main',
-         'help': ('TextIndex','TextIndex_Settings.stx')},
-    )
-    def __init__(self, id, ignore_ex=None, call_methods=None, lexicon=None,
-                 caller=None, extra=None):
-        """Create an index
-        The arguments are:
-          'id' -- the name of the item attribute to index.  This is
-          either an attribute name or a record key.
-          'ignore_ex' -- Tells the indexer to ignore exceptions that
-          are rasied when indexing an object.
-          'call_methods' -- Tells the indexer to call methods instead
-          of getattr or getitem to get an attribute.
-          'lexicon' is the lexicon object to specify, if None, the
-          index will use a private lexicon.
-          'caller' -- instance that created the index (maybe None)
-          'extra'  -- Record to keep additional parameters
-        """
-        self.id             = id
-        self.ignore_ex      = ignore_ex
-        self.call_methods   = call_methods
-        self.catalog        = caller
-        # Default text index operator (should be visible to ZMI)
-        self.useOperator  = 'or'
-        if extra:   self.vocabulary_id = extra.vocabulary
-        else:       self.vocabulary_id = "Vocabulary"
-        self._lexicon = None
-        self.clear()
-        if lexicon is not None:
-            # We need to hold a reference to the lexicon, since we can't
-            # really change lexicons.
-            self._lexicon = lexicon
-            self.vocabulary_id = '__userdefined__'
-    def getId(self):
-        return self.id
-    def getLexicon(self, vocab_id=None):
-        """Get the Lexicon in use.
-        """
-        if self._lexicon is None:
-            ## if no lexicon is provided, create a default one
-            try:
-                if self.catalog is None:
-                    self.catalog = self.aq_inner.aq_parent.aq_base
-                self._lexicon = getattr(self.catalog,self.vocabulary_id).getLexicon()
-            except:
-                self._lexicon = Lexicon()
-                self.vocabulary_id = '__intern__'
-        return self._lexicon
-    def __nonzero__(self):
-        return not not self._unindex
-    def clear(self):
-        """Reinitialize the text index."""
-        self._index   = IOBTree()
-        self._unindex = IOBTree()
-        if self.getLexicon() and self.vocabulary_id=='__userdefined__':
-            self.getLexicon().clear()
-        self._lexicon = None
-    def _convertBTrees(self, threshold=200):
-        if type(self._lexicon) is type(''):
-            # Turn the name reference into a hard reference.
-            self._lexicon=self.getLexicon()
-        if type(self._index) is IOBTree: return
-        from BTrees.convert import convert
-        _index=self._index
-        self._index=IOBTree()
-        def convertScores(scores,
-                          type=type,
-                          IIBTree=IIBTree
-                          ):
-            if type(scores) is not tuple and type(scores) is not IIBTree():
-                scores=IIBTree(scores)
-            return scores
-        convert(_index, self._index, threshold, convertScores)
-        _unindex=self._unindex
-        self._unindex=IOBTree()
-        convert(_unindex, self._unindex, threshold)
-    def histogram(self, type=type):
-        """Return a mapping which provides a histogram of the number of
-        elements found at each point in the index."""
-        histogram = IIBucket()
-        for (key, value) in self._index.items():
-            if type(value) is tuple:
-                entry = 1
-            else:
-                entry = len(value)
-            histogram[entry] = histogram.get(entry, 0) + 1
-        return histogram
-    def getEntryForObject(self, rid, default=None):
-        """Get all information contained for a specific object.
-        This takes the objects record ID as it's main argument."""
-        results = self._unindex.get(rid, None)
-        if results is None:
-            return default
-        else:
-            return tuple(map(self.getLexicon().getWord,
-                             results))
-    def insertForwardIndexEntry(self, entry, documentId, score=1):
-        """Uses the information provided to update the indexes.
-        The basic logic for choice of data structure is based on
-        the number of entries as follows:
-            1      tuple
-            2-3    dictionary
-            4+     bucket.
-        """
-        index=self._index
-        indexRow = index.get(entry, None)
-        if indexRow is not None:
-            if type(indexRow) is tuple:
-                # Tuples are only used for rows which have only
-                # a single entry.  Since we now need more, we'll
-                # promote it to a mapping object (dictionary).
-                # First, make sure we're not already in it, if so
-                # update the score if necessary.
-                if indexRow[0] == documentId:
-                    if indexRow[1] != score:
-                        indexRow = (documentId, score)
-                        index[entry] = indexRow
-                else:
-                    indexRow={
-                        indexRow[0]: indexRow[1],
-                        documentId: score,
-                        }
-                    index[entry] = indexRow
-            else:
-                if indexRow.get(documentId, -1) != score:
-                    # score changed (or new entry)
-                    if type(indexRow) is dict:
-                        indexRow[documentId] = score
-                        if len(indexRow) > 3:
-                            # Big enough to give it's own database record
-                            indexRow=IIBTree(indexRow)
-                        index[entry] = indexRow
-                    else:
-                        indexRow[documentId] = score
-        else:
-            # We don't have any information at this point, so we'll
-            # put our first entry in, and use a tuple to save space
-            index[entry] = (documentId, score)
-    def index_object(self, documentId, obj, threshold=None):
-        """ Index an object:
-        'documentId' is the integer id of the document
-        'obj' is the object to be indexed
-        'threshold' is the number of words to process between
-        commiting subtransactions.  If 'None' subtransactions are
-        disabled. """
-        # sniff the object for our 'id', the 'document source' of the
-        # index is this attribute.  If it smells callable, call it.
-        try:
-            source = getattr(obj, self.id)
-            if safe_callable(source):
-                source = source()
-            if not isinstance(source, unicode):
-                source = str(source)
-        except (AttributeError, TypeError):
-            return 0
-        # sniff the object for 'id'+'_encoding'
-        try:
-            encoding = getattr(obj, self.id+'_encoding')
-            if safe_callable(encoding ):
-                encoding = str(encoding())
-            else:
-                encoding = str(encoding)
-        except (AttributeError, TypeError):
-            encoding = 'latin1'
-        lexicon = self.getLexicon()
-        splitter = lexicon.Splitter
-        wordScores = OIBTree()
-        last = None
-        # Run through the words and score them
-        for word in list(splitter(source,encoding=encoding)):
-            if word[0] == '\"':
-                last = self._subindex(word[1:-1], wordScores, last, splitter)
-            else:
-                if word==last: continue
-                last=word
-                wordScores[word]=wordScores.get(word,0)+1
-        # Convert scores to use wids:
-        widScores=IIBucket()
-        getWid=lexicon.getWordId
-        for word, score in wordScores.items():
-            widScores[getWid(word)]=score
-        del wordScores
-        currentWids=IISet(self._unindex.get(documentId, []))
-        # Get rid of document words that are no longer indexed
-        self.unindex_objectWids(documentId, difference(currentWids, widScores))
-        # Now index the words. Note that the new xIBTrees are clever
-        # enough to do nothing when there isn't a change. Woo hoo.
-        insert=self.insertForwardIndexEntry
-        for wid, score in widScores.items():
-            insert(wid, documentId, score)
-        # Save the unindexing info if it's changed:
-        wids=widScores.keys()
-        if wids != currentWids.keys():
-            self._unindex[documentId]=wids
-        return len(wids)
-    def _subindex(self, source, wordScores, last, splitter):
-        """Recursively handle multi-word synonyms"""
-        for word in splitter(source):
-            if word[0] == '\"':
-                last = self._subindex(word[1:-1], wordScores, last, splitter)
-            else:
-                if word==last: continue
-                last=word
-                wordScores[word]=wordScores.get(word,0)+1
-        return last
-    def unindex_object(self, i):
-        """ carefully unindex document with integer id 'i' from the text
-        index and do not fail if it does not exist """
-        index = self._index
-        unindex = self._unindex
-        wids = unindex.get(i, None)
-        if wids is not None:
-            self.unindex_objectWids(i, wids)
-            del unindex[i]
-    def unindex_objectWids(self, i, wids):
-        """ carefully unindex document with integer id 'i' from the text
-        index and do not fail if it does not exist """
-        index = self._index
-        get=index.get
-        for wid in wids:
-            widScores = get(wid, None)
-            if widScores is None:
-                LOG.error('unindex_object tried to unindex nonexistent'
-                          ' document, wid  %s, %s' % (i,wid))
-                continue
-            if type(widScores) is tuple:
-                del index[wid]
-            else:
-                try:
-                    del widScores[i]
-                    if widScores:
-                        if type(widScores) is dict:
-                            if len(widScores) == 1:
-                                # convert to tuple
-                                widScores = widScores.items()[0]
-                            index[wid]=widScores
-                    else:
-                        del index[wid]
-                except (KeyError, IndexError, TypeError):
-                    LOG.error('unindex_object tried to unindex nonexistent'
-                              ' document %s' % str(i))
-    def __getitem__(self, word):
-        """Return an InvertedIndex-style result "list"
-        Note that this differentiates between being passed an Integer
-        and a String.  Strings are looked up in the lexicon, whereas
-        Integers are assumed to be resolved word ids. """
-        if type(word) is IntType:
-            # We have a word ID
-            result = self._index.get(word, {})
-            return ResultList(result, (word,), self)
-        else:
-            splitSource = tuple(self.getLexicon().Splitter(word))
-            if not splitSource:
-                return ResultList({}, (word,), self)
-            if len(splitSource) == 1:
-                splitSource = splitSource[0]
-                if splitSource[:1] == '"' and splitSource[-1:] == '"':
-                    return self[splitSource]
-                wids=self.getLexicon().get(splitSource)
-                if wids:
-                    r = self._index.get(wids[0], None)
-                    if r is None:
-                        r = {}
-                else:
-                    r={}
-                return ResultList(r, (splitSource,), self)
-            r = None
-            for word in splitSource:
-                rr = self[word]
-                if r is None:
-                    r = rr
-                else:
-                    r = r.near(rr)
-            return r
-    def _apply_index(self, request):
-        """ Apply the index to query parameters given in the argument,
-        request
-        The argument should be a mapping object.
-        If the request does not contain the needed parameters, then
-        None is returned.
-        Otherwise two objects are returned.  The first object is a
-        ResultSet containing the record numbers of the matching
-        records.  The second object is a tuple containing the names of
-        all data fields used.
-        """
-        record = parseIndexRequest(request, self.id, self.query_options)
-        if record.keys is None:
-            return None
-        # Changed for 2.4
-        # We use the default operator that can me managed via the ZMI
-        qop = record.get('operator', self.useOperator)
-        # We keep this for pre-2.4 compatibility
-        # This stinking code should go away somewhere. A global
-        # textindex_operator makes no sense when using multiple
-        # text indexes inside a catalog. An index operator should
-        # should be specified on a per-index base
-        if request.has_key('textindex_operator'):
-            qop = request['textindex_operator']
-            warnings.warn("The usage of the 'textindex_operator' "
-                          "is no longer recommended.\n"
-                          "Please use a mapping object and the "
-                          "'operator' key to specify the operator.")
-        query_operator = operator_dict.get(qop)
-        if query_operator is None:
-            raise exceptions.RuntimeError, ("Invalid operator '%s' "
-                                            "for a TextIndex" % escape(qop))
-        r = None
-        for key in record.keys:
-            key = key.strip()
-            if not key:
-                continue
-            b = self.query(key, query_operator).bucket()
-            w, r = weightedIntersection(r, b)
-        if r is not None:
-            return r, (self.id,)
-        return (IIBucket(), (self.id,))
-    def positions(self, docid, words,
-                  # This was never tested: obj
-                  ):
-        """Return the positions in the document for the given document
-        id of the word, word."""
-        return [1]
-        #################################################################
-        # The code below here is broken and requires an API change to fix
-        # it. Waaaaa.
-        if self._schema is None:
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[self.id]
-        if self.call_methods:
-            doc = str(f(obj, self.id)())
-        else:
-            doc = str(f(obj, self.id))
-        r = []
-        for word in words:
-            r = r+self.getLexicon().Splitter(doc).indexes(word)
-        return r
-    def query(self, s, default_operator=Or):
-        """ Evaluate a query string.
-        Convert the query string into a data structure of nested lists
-        and strings, based on the grouping of whitespace-separated
-        strings by parentheses and quotes.  The 'Near' operator is
-        inserted between the strings of a quoted group.
-        The Lexicon is given the opportunity to transform the
-        data structure.  Stemming, wildcards, and translation are
-        possible Lexicon services.
-        Finally, the query list is normalized so that it and every
-        sub-list consist of non-operator strings or lists separated
-        by operators. This list is evaluated.
-        """
-        # First replace any occurences of " and not " with " andnot "
-        s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)
-        # Parse parentheses and quotes
-        q = parse(s)
-        # Allow the Lexicon to process the query
-        q = self.getLexicon().query_hook(q)
-        # Insert the default operator between any two search terms not
-        # already joined by an operator.
-        q = parse2(q, default_operator)
-        # evalute the final 'expression'
-        return self.evaluate(q)
-    def get_operands(self, q, i):
-        """Evaluate and return the left and right operands for an operator"""
-        try:
-            left  = q[i - 1]
-            right = q[i + 1]
-        except IndexError:
-            raise QueryError, "Malformed query"
-        operandType = type(left)
-        if operandType is IntType:
-            left = self[left]
-        elif isinstance(left,str) or isinstance(left,unicode):
-            left = self[left]
-        elif operandType is list:
-            left = self.evaluate(left)
-        operandType = type(right)
-        if operandType is IntType:
-            right = self[right]
-        elif isinstance(right,str) or isinstance(right,unicode):
-            right = self[right]
-        elif operandType is list:
-            right = self.evaluate(right)
-        return (left, right)
-    def evaluate(self, query):
-        """Evaluate a parsed query"""
-        # Strip off meaningless layers
-        while isinstance(query, list) and len(query) == 1:
-            query = query[0]
-        # If it's not a list, assume a string or number
-        if not isinstance(query, list):
-            return self[query]
-        # Now we need to loop through the query and reduce
-        # operators.  They are currently evaluated in the following
-        # order: AndNot -> And -> Or -> Near
-        i = 0
-        while (i < len(query)):
-            if query[i] is AndNot:
-                left, right = self.get_operands(query, i)
-                val = left.and_not(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-        i = 0
-        while (i < len(query)):
-            if query[i] is And:
-                left, right = self.get_operands(query, i)
-                val = left & right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-        i = 0
-        while (i < len(query)):
-            if query[i] is Or:
-                left, right = self.get_operands(query, i)
-                val = left | right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-        i = 0
-        while (i < len(query)):
-            if query[i] is Near:
-                left, right = self.get_operands(query, i)
-                val = left.near(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-        if (len(query) != 1):
-            raise QueryError, "Malformed query"
-        return query[0]
-    def getIndexSourceNames(self):
-        """ return name of indexed attributes """
-        return (self.id, )
-    def numObjects(self):
-        """ return number of index objects """
-        return len(self._index)
-    def manage_setPreferences(self,vocabulary,
-                               REQUEST=None,RESPONSE=None,URL2=None):
-        """ preferences of TextIndex """
-        if self.vocabulary_id != vocabulary:
-            self.clear()
-            self.vocabulary_id    = vocabulary
-        if RESPONSE:
-            RESPONSE.redirect(URL2 + '/manage_main?manage_tabs_message=Preferences%20saved')
-    manage = manage_main = DTMLFile("dtml/manageTextIndex",globals())
-    manage_main._setName('manage_main')
-    manage_vocabulary = DTMLFile("dtml/manageVocabulary",globals())
-def parse(s):
-    """Parse parentheses and quotes"""
-    l = []
-    tmp = s.lower()
-    p = parens(tmp)
-    while p is not None:
-        # Look for quotes in the section of the string before
-        # the parentheses, then parse the string inside the parens
-        l = l + quotes(p[0])
-        l.append(parse(p[1]))
-        # continue looking through the rest of the string
-        tmp = p[2]
-        p = parens(tmp)
-    return l + quotes(tmp)
-def parse2(q, default_operator, operator_dict=operator_dict):
-    """Find operators and operands"""
-    isop = operator_dict.has_key
-    i = 0
-    while i < len(q):
-        e = q[i]
-        if isinstance(e, list):
-            q[i] = parse2(e, default_operator)
-            if i % 2:
-                q.insert(i, default_operator)
-                i = i + 1
-        elif i % 2:
-            # This element should be an operator
-            if isop(e):
-                # Ensure that it is identical, not merely equal.
-                q[i] = operator_dict[e]
-            else:
-                # Insert the default operator.
-                q.insert(i, default_operator)
-                i = i + 1
-        i = i + 1
-    return q
-def parens(s, parens_re=re.compile('[()]').search):
-    mo = parens_re(s)
-    if mo is None:
-        return
-    open_index = mo.start(0) + 1
-    paren_count = 0
-    while mo is not None:
-        index = mo.start(0)
-        if s[index] == '(':
-            paren_count = paren_count + 1
-        else:
-            paren_count = paren_count - 1
-            if paren_count == 0:
-                return (s[:open_index - 1], s[open_index:index],
-                        s[index + 1:])
-            if paren_count < 0:
-                break
-        mo = parens_re(s, index + 1)
-    raise QueryError, "Mismatched parentheses"
-def quotes(s):
-    if '"' not in s:
-        return s.split()
-    # split up quoted regions
-    splitted = re.split('\s*\"\s*', s)
-    if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
-    for i in range(1,len(splitted),2):
-        # split the quoted region into words
-        words = splitted[i] = splitted[i].split()
-        # put the Proxmity operator in between quoted words
-        j = len(words) - 1
-        while j > 0:
-            words.insert(j, Near)
-            j = j - 1
-    i = len(splitted) - 1
-    while i >= 0:
-        # split the non-quoted region into words
-        splitted[i:i+1] = splitted[i].split()
-        i = i - 2
-    return filter(None, splitted)
-manage_addTextIndexForm = DTMLFile('dtml/addTextIndex', globals())
-def manage_addTextIndex(self, id, extra=None, REQUEST=None, RESPONSE=None, URL3=None):
-    """Add a text index"""
-    return self.manage_addIndex(id, 'TextIndex', extra, REQUEST, RESPONSE, URL3)
--- a/src/Products/PluginIndexes/TextIndex/Vocabulary.py
+++ b/src/Products/PluginIndexes/TextIndex/Vocabulary.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Vocabulary for deprecated text index.
-$Id$
-"""
-from AccessControl.Permissions import manage_vocabulary
-from AccessControl.Permissions import query_vocabulary
-from AccessControl.Role import RoleManager
-from AccessControl.SecurityInfo import ClassSecurityInfo
-from Acquisition import Implicit
-from App.class_init import InitializeClass
-from App.Dialogs import MessageDialog
-from App.special_dtml import DTMLFile
-from Persistence import Persistent
-from OFS.SimpleItem import Item
-from zope.interface import implements
-from Products.PluginIndexes.interfaces import IVocabulary
-from Products.PluginIndexes.TextIndex import Lexicon, GlobbingLexicon
-from Products.PluginIndexes.TextIndex.Lexicon import stop_word_dict
-from Products.PluginIndexes.TextIndex import Splitter
-manage_addVocabularyForm=DTMLFile('dtml/addVocabulary',globals())
-def manage_addVocabulary(self, id, title, globbing=None, extra=None,
-                         splitter='', REQUEST=None):
-    """Add a Vocabulary object
-    """
-    id=str(id)
-    title=str(title)
-    if globbing: globbing=1
-    c=Vocabulary(id, title, globbing,splitter,extra)
-    self._setObject(id, c)
-    if REQUEST is not None:
-        return self.manage_main(self,REQUEST,update_menu=1)
-class _extra: pass
-class Vocabulary(Item, Persistent, Implicit, RoleManager):
-    """A Vocabulary is a user-managable realization of a Lexicon object.
-    """
-    implements(IVocabulary)
-    security = ClassSecurityInfo()
-    security.setPermissionDefault(manage_vocabulary, ('Manager',))
-    security.setPermissionDefault(query_vocabulary, ('Anonymous', 'Manager',))
-    meta_type = "Vocabulary"
-    _isAVocabulary = 1
-    manage_options=(
-        (
-        {'label': 'Vocabulary', 'action': 'manage_main',
-         'help' : ('ZCatalog', 'Vocabulary_Vocabulary.stx')},
-        {'label': 'Query', 'action': 'manage_query',
-         'help': ('ZCatalog', 'Vocabulary_Query.stx')},
-        )
-        + Item.manage_options
-        + RoleManager.manage_options
-        )
-    security.declareProtected(manage_vocabulary, 'manage_main')
-    manage_main = DTMLFile('dtml/manage_vocab', globals())
-    security.declareProtected(manage_vocabulary, 'manage_query')
-    manage_query = DTMLFile('dtml/vocab_query', globals())
-    def __init__(self, id, title='', globbing=None,splitter=None,extra=None):
-        """ create the lexicon to manage... """
-        self.id = id
-        self.title = title
-        self.globbing = not not globbing
-        self.useSplitter = Splitter.splitterNames[0]
-        if splitter:
-            self.useSplitter = splitter
-        if not extra:
-            extra = _extra()
-            extra.splitterIndexNumbers = 0
-            extra.splitterSingleChars  = 0
-            extra.splitterCasefolding  = 1
-        if globbing:
-            self.lexicon = GlobbingLexicon.GlobbingLexicon(
-                                useSplitter=self.useSplitter,extra=extra)
-        else:
-            self.lexicon = Lexicon.Lexicon(stop_word_dict,
-                                useSplitter=self.useSplitter,extra=extra)
-    def getLexicon(self):
-        return self.lexicon
-    security.declareProtected(query_vocabulary, 'query')
-    def query(self, pattern):
-        """ """
-        result = []
-        for x in self.lexicon.get(pattern):
-            if self.globbing:
-                result.append(self.lexicon._inverseLex[x])
-            else:
-                result.append(pattern)
-        return str(result)
-    def manage_insert(self, word='', URL1=None, RESPONSE=None):
-        """ doc string """
-        self.insert(word)
-        if RESPONSE:
-            RESPONSE.redirect(URL1 + '/manage_main')
-    def manage_stop_syn(self, stop_syn, REQUEST=None):
-        pass
-    def insert(self, word=''):
-        self.lexicon.set(word)
-    def words(self):
-        return self.lexicon._lexicon.items()
-InitializeClass(Vocabulary)
--- a/src/Products/PluginIndexes/TextIndex/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/__init__.py
-# empty comment for winzip and friends
-import warnings
-warnings.warn('Using TextIndex is deprecated (will be removed in Zope '
-              '2.12). Use ZCTextIndex instead.',
-              DeprecationWarning,
-              stacklevel=2) 
--- a/src/Products/PluginIndexes/TextIndex/dtml/addTextIndex.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/addTextIndex.dtml
-<dtml-var manage_page_header>
-<dtml-var "manage_form_title(this(), _,
-           form_title='Add TextIndex',
-	   )">
-<p class="form-help">
-  <strong>Note:</strong>
-  TextIndex is deprecated. It has been replaced by ZCTextIndex. Consider
-  using it instead
-</p>
-<p class="form-help">
-<strong>Text Indexes</strong> break text up into individual words, and 
-are often referred to as full-text indexes. Text indexes 
-sort results by score meaning they return hits in order 
-from the most relevant to the lest relevant.
-</p>
-<form action="manage_addTextIndex" method="post" enctype="multipart/form-data">
-<table cellspacing="0" cellpadding="2" border="0">
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Id
-    </div>
-    </td>
-    <td align="left" valign="top">
-    <input type="text" name="id" size="40" />
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Vocabulary
-    </div>
-    </td>
-    <td>
-    <dtml-let vocabs="superValues('Vocabulary')">
-      <dtml-if vocabs>
-        <select name="extra.vocabulary:record">  
-          <dtml-in expr="superValues('Vocabulary')">
-            <option value="&dtml-id;">
-              &dtml-id; <dtml-var title fmt="(%s)" null html_quote>
-            </option>
-          </dtml-in>
-        </select>
-      <dtml-else>
-        <em class="std-text">Create a Vocabulary object first.</em>
-      </dtml-if>
-    </dtml-let>
-    </td> 
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-optional">
-    Type
-    </div>
-    </td>
-    <td align="left" valign="top">
-    TextIndex
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    </td>
-    <td align="left" valign="top">
-    <div class="form-element">
-    <input class="form-element" type="submit" name="submit" 
-     value=" Add " /> 
-    </div>
-    </td>
-  </tr>
-</table>
-</form>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
-<dtml-var manage_page_header>
-<dtml-var "manage_form_title(this(), _,
-           form_title='Add Vocabulary',
-	   )">
-<FORM ACTION="manage_addVocabulary" METHOD="POST">
-<table cellspacing="0" cellpadding="2" border="0">
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Id
-    </div>
-    </td>
-    <td align="left" valign="top">
-    <input type="text" name="id" size="40" />
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-optional">
-    Title
-    </div>
-    </td>
-    <td align="left" valign="top">
-    <input type="text" name="title" size="40" />
-    </td>
-  </tr>
-  <dtml-if availableSplitters>
-   <tr>
-     <td align="left" valign="top">
-     <div class="form-optional">
-     Splitter
-     </div>
-     </td>
-     <td align="left" valign="top">
-     <select name="splitter"> 
-       <dtml-in availableSplitters>
-         <option value="&dtml-sequence-key;">&dtml-sequence-item;
-       </dtml-in>
-     </select>
-     </td>
-   </tr>
-  </dtml-if>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Index numbers
-    </td>
-    <td align="left" valign="top">
-    <select name="extra.splitterIndexNumbers:record:int">
-     <option value="0" selected>no
-     <option value="1">yes
-    </select>
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Index single characters 
-    </td>
-    <td align="left" valign="top">
-    <select name="extra.splitterSingleChars:record:int" >
-     <option value="0" selected>no
-     <option value="1">yes
-    </select>
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    Case-insensitive
-    </td>
-    <td align="left" valign="top">
-    <select name="extra.splitterCasefolding:record:int">
-     <option value="0" >no
-     <option value="1"selected>yes
-    </select>
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    <div class="form-label">
-    globbing?
-    </td>
-    <td align="left" valign="top">
-    <input type="checkbox" name="globbing" />
-    </td>
-  </tr>
-  <tr>
-    <td align="left" valign="top">
-    </td>
-    <td align="left" valign="top">
-    <div class="form-element">
-    <input class="form-element" type="submit" name="submit" 
-     value=" Add " /> 
-    </div>
-    </td>
-  </tr>
-</table>
-</form>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/dtml/manageTextIndex.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/manageTextIndex.dtml
-<dtml-var manage_page_header>
-<dtml-var manage_tabs>
-<p class="form-help">
-<form method="post" action="manage_setPreferences">
-<table border="0" cellspacing="2" cellpadding="2">
-<tr>
-  <th align="left" width="20%">Vocabulary to use</th>
-  <td align="left">
-    <select name="vocabulary">  
-      <dtml-in "superValues('Vocabulary')">
-        <dtml-if "getId()==vocabulary_id">
-          <option value="&dtml-id;" selected>
-            &dtml-id; <dtml-var title fmt="(%s)" null html_quote>
-          </option> 
-        <dtml-else>
-          <option value="&dtml-id;">
-            &dtml-id; <dtml-var title fmt="(%s)" null html_quote>
-          </option> 
-        </dtml-if>
-      </dtml-in>
-    </select>
- </td>
- <td>
-   <em>Warning:</em> changing the vocabulary makes only sense when after 
-   creating the index and before indexing any objects. The index will be cleared
-   when you change the vocabulary after indexing objects.
- </td>
-</tr> 
-<dtml-comment>
-<tr>
-  <th align="left">Splitter</th>
-  <td>
-    <select name="splitter"> 
-      <dtml-in availableSplitters>
-        <dtml-if "_.getitem('sequence-key')==useSplitter"> 
-           <option value="&dtml-sequence-key;" selected>&dtml-sequence-item;
-        <dtml-else>
-           <option value="&dtml-sequence-key;">&dtml-sequence-item;
-        </dtml-if>
-      </dtml-in>
-    </select>
-  </td>
-</tr>
-<tr>
-  <th align="left">Default text operator</th>
-  <td>
-    <select name="text_operator"> 
-      <dtml-in "operators.keys()">
-        <dtml-if "_.getitem('sequence-item')==useOperator"> 
-           <option value="&dtml-sequence-item;" selected>&dtml-sequence-item;
-        <dtml-else>
-           <option value="&dtml-sequence-item;">&dtml-sequence-item;
-        </dtml-if>
-      </dtml-in>
-    </select>
-  </td>
-</tr>
-</dtml-comment>
-<tr>
-  <td colspan="3">
-    <input type="submit" value="Save changes">
-  </td>
-</tr>
-</table>
-</form>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/dtml/manageVocabulary.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/manageVocabulary.dtml
-<dtml-var manage_page_header>
-<dtml-var "manage_form_title(this(), _,
-           form_title='Manage vocabulary of text index',
-           help_topic='addIndex.stx'
-	   )">
-<dtml-var "getLexicon('Vocabulary')">
-<form action="manage_addTextIndex" method="post" enctype="multipart/form-data">
-<table cellspacing="0" cellpadding="2" border="0">
-</table>
-</form>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
-<dtml-call "RESPONSE.setHeader('content-type','text/html; charset: utf-8')">
-<dtml-var manage_page_header>
-<dtml-var manage_tabs>
-<p class="form-text">
-  <dtml-let lexicon="getLexicon()">
-    <dtml-try> 
-      <dtml-let x="lexicon.multi_wc"></dtml-let>
-      Globbing is <em>enabled</em>
-      <dtml-except>
-      Globbing is <em>disabled</em>
-    </dtml-try>
-    <dtml-if useSplitter>
-     , Splitter is <em>&dtml-useSplitter;</em>   
-    </dtml-if>
-    <dtml-try>
-     , Index number=<dtml-var "lexicon.splitterParams.splitterIndexNumbers">
-     , Case-insensitve=<dtml-var "lexicon.splitterParams.splitterCasefolding">
-     , Index single characters=<dtml-var "lexicon.splitterParams.splitterSingleChars">
-    <dtml-except>
-    </dtml-try>
-  </dtml-let>
-</p>
-<dtml-if words>
-<p class="form-text">
-&dtml-id; contains <em><dtml-var words fmt=collection-length thousands_commas></em>
- word(s).
-</p>
-<dtml-in words previous size=20 start=query_start >
-  <span class="list-nav">
-  <a href="&dtml-URL;?query_start=&dtml-previous-sequence-start-number;">
-    [Previous <dtml-var previous-sequence-size> entries]
-  </a>
-  </span>
-</dtml-in>
-<dtml-in words next size=20 start=query_start >
-  <span class="list-nav">
-  <a href="&dtml-URL;?query_start=&dtml-next-sequence-start-number;">
-    [Next <dtml-var next-sequence-size> entries]
-  </a>
-  </span>
-</dtml-in>
-<table width="100%" cellspacing="0" cellpadding="2" border="0">
-  <dtml-in words size=20 start=query_start >
-    <dtml-if name="sequence-start">
-      <tr class="list-header">
-        <td width="80%" align="left" valign="top">
-          <div class="list-item">Word</div></td>
-        <td width="20%" align="left" valign="top">
-          <div class="list-item">Word ID</div></td>
-      </tr>
-    </dtml-if>
-  <dtml-if name="sequence-odd"><tr class="row-normal">
-  <dtml-else><tr class="row-hilite"></dtml-if>
-    <td valign="top" align="left">
-    <div class="form-text">
-      <dtml-if "_.same_type(_['sequence-key'], 'x')">
-        &dtml-sequence-key;
-      <dtml-else>
-        <dtml-var "_['sequence-key'].encode('utf-8')" html_quote>
-      </dtml-if>
-      </div>
-    </td>
-    <td valign="top" align="left">
-    <div class="form-text">&dtml-sequence-item;</div>
-    </td>
-  </tr>
-  </dtml-in>
-</table>
-<dtml-in words previous size=20 start=query_start >
-  <div class="list-nav">
-  <a href="&dtml-URL;?query_start=&dtml-previous-sequence-start-number;">
-    [Previous <dtml-var previous-sequence-size> entries]
-  </a>
-  </div>
-</dtml-in>
-<dtml-in words next size=20 start=query_start >
-  <div class="list-nav">
-  <a href="&dtml-URL;?query_start=&dtml-next-sequence-start-number;">
-    [Next <dtml-var next-sequence-size> entries]
-  </a>
-  </div>
-</dtml-in>
-<dtml-else>
-<p class="form-text">
-There are no words in the Vocabulary.
-</p>
-</dtml-if>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/dtml/vocab_query.dtml
+++ b/src/Products/PluginIndexes/TextIndex/dtml/vocab_query.dtml
-<dtml-var manage_page_header>
-<dtml-var manage_tabs>
-<form action="query" method=POST>
-  <input type="text" name="pattern" size="20">
-  <div class="form-element">
-  <input class="form-element" type="submit" name="submit" value="Query">
-  </div>
-</form>
-<dtml-var manage_page_footer>
--- a/src/Products/PluginIndexes/TextIndex/help/TextIndex_searchResults.stx
+++ b/src/Products/PluginIndexes/TextIndex/help/TextIndex_searchResults.stx
-ZCatalog - searchResults: specifying parameters for a search query
-    The searchResults() method of the ZCatalog accepts parameters that
-    define a query to be made on that catalog.  A query can either be
-    passed as keyword argument to searchResults(), as a mapping, or as
-    part of a Zope REQUEST object, typically from HTML forms.
-    The index of the catalog to query is either the name of the
-    keyword argument, a key in a mapping, or an attribute of a record
-    object.
-    Attributes of record objects
-      'query' -- either a sequence of objects or a single value to be
-      passed as query to the index (mandatory)
-      'operator' -- specifies the combination of search results when
-      query is a sequence of values. (optional, default: 'or').
-        Allowed values:
-         'and', 'or', 'andnot', 'near' 
--- a/src/Products/PluginIndexes/TextIndex/tests/__init__.py
+++ b/src/Products/PluginIndexes/TextIndex/tests/__init__.py
-##############################################################################
-#
-# Copyright (c) 2003 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-# This file is needed to make this a package.
--- a/src/Products/PluginIndexes/TextIndex/tests/testSplitter.py
+++ b/src/Products/PluginIndexes/TextIndex/tests/testSplitter.py
-# -*- coding: ISO-8859-1 -*-
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-import os,sys
-import unittest,locale
-from Products.PluginIndexes.TextIndex import Splitter
-class TestCase( unittest.TestCase ):
-    """
-        Test our Splitters
-    """
-    def setUp( self ):
-        self.testdata = (
-        ('The quick brown fox jumps over the lazy dog',
-          ['the','quick','brown','fox','jumps','over','the','lazy','dog']),
-        (  'fters   sterreichische   herber   berfall   da   rger   verrgert',
-          ['fters','sterreichische','herber','berfall','da','rger','verrgert'])
-        )
-        pass
-    def tearDown( self ):
-        """
-        """
-    def testAvailableSplitters( self ):
-        "Test available splitters"
-        assert len(Splitter.availableSplitters) >0
-        assert len(Splitter.splitterNames)>0
-        assert len(Splitter.availableSplitters)==len(Splitter.splitterNames)
-    def _test(self,sp_name,text,splitted):
-        splitter = Splitter.getSplitter(sp_name)
-        result = list(splitter(text))
-        assert result==splitted, "%s: %s vs %s" % (sp_name,result,splitted)
-#    def testZopeSplitter(self):
-#        """test ZopeSplitter (this test is known to fail because it does not support ISO stuff) """
-#
-#        for text,splitted in self.testdata:
-#            self._test("ZopeSplitter",text,splitted)
-    def testISOSplitter(self):
-        """test ISOSplitter"""
-        for text,splitted in self.testdata:
-            self._test("ISO_8859_1_Splitter",text,splitted)
-def test_suite():
-    return unittest.makeSuite( TestCase )
-def debug():
-    return test_suite().debug()
-def pdebug():
-    import pdb
-    pdb.run('debug()')
-def main():
-    unittest.TextTestRunner().run( test_suite() )
-if __name__ == '__main__':
-    if len(sys.argv) > 1:
-        globals()[sys.argv[1]]()
-    else:
-        main()
--- a/src/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
+++ b/src/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""TextIndex unit tests.
-$Id$
-"""
-import unittest
-import Testing
-import Zope2
-Zope2.startup()
-import ZODB
-from ZODB.MappingStorage import MappingStorage
-import transaction
-from Products.PluginIndexes.TextIndex import TextIndex
-from Products.PluginIndexes.TextIndex import GlobbingLexicon
-class Dummy:
-    def __init__( self, text ):
-        self._text = text
-    def text( self ):
-        return self._text
-    def __str__( self ):
-        return '<Dummy: %s>' % self._text
-    __repr__ = __str__
-class Tests(unittest.TestCase):
-    db = None
-    jar = None
-    def setUp(self):
-        self.index=TextIndex.TextIndex('text')
-        self.doc=Dummy(text='this is the time, when all good zopes')
-    def dbopen(self):
-        if self.db is None:
-            s = MappingStorage()
-            self.db = ZODB.DB(s)
-        db = self.db
-        if self.jar is not None:
-            raise RuntimeError, 'test needs to dbclose() before dbopen()'
-        jar = db.open()
-        self.jar = jar
-        if not jar.root().has_key('index'):
-            jar.root()['index'] = TextIndex.TextIndex('text')
-            transaction.commit()
-        return jar.root()['index']
-    def dbclose(self):
-        self.jar.close()
-        self.jar = None
-    def tearDown(self):
-        transaction.abort()
-        if self.jar is not None:
-            self.dbclose()
-        if self.db is not None:
-            self.db.close()
-            self.db = None
-    def test_z3interfaces(self):
-        from Products.PluginIndexes.interfaces import IPluggableIndex
-        from Products.PluginIndexes.interfaces import ITextIndex
-        from Products.PluginIndexes.TextIndex.TextIndex import TextIndex
-        from zope.interface.verify import verifyClass
-        verifyClass(IPluggableIndex, TextIndex)
-        verifyClass(ITextIndex, TextIndex)
-    def test_SimpleAddDelete(self):
-        self.index.index_object(0, self.doc)
-        self.index.index_object(1, self.doc)
-        self.doc.text='spam is good, spam is fine, span span span'
-        self.index.index_object(0, self.doc)
-        self.index.unindex_object(0)
-    def test_PersistentUpdate1(self):
-        # Check simple persistent indexing
-        index=self.dbopen()
-        self.doc.text='this is the time, when all good zopes'
-        index.index_object(0, self.doc)
-        transaction.commit()
-        self.doc.text='time waits for no one'
-        index.index_object(1, self.doc)
-        transaction.commit()
-        self.dbclose()
-        index=self.dbopen()
-        r = index._apply_index({})
-        assert r==None
-        r = index._apply_index({'text': 'python'})
-        assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
-        assert not r[0], "should have no results"
-        r = index._apply_index({'text': 'time'})
-        r=list(r[0].keys())
-        assert  r == [0,1], r
-    def test_PersistentUpdate2(self):
-        # Check less simple persistent indexing
-        index=self.dbopen()
-        self.doc.text='this is the time, when all good zopes'
-        index.index_object(0, self.doc)
-        transaction.commit()
-        self.doc.text='time waits for no one'
-        index.index_object(1, self.doc)
-        transaction.commit()
-        self.doc.text='the next task is to test'
-        index.index_object(3, self.doc)
-        transaction.commit()
-        self.doc.text='time time'
-        index.index_object(2, self.doc)
-        transaction.commit()
-        self.dbclose()
-        index=self.dbopen()
-        r = index._apply_index({})
-        assert r==None
-        r = index._apply_index({'text': 'python'})
-        assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
-        assert not r[0], "should have no results"
-        r = index._apply_index({'text': 'time'})
-        r=list(r[0].keys())
-        assert  r == [0,1,2], r
-    sample_texts = [
-        """This is the time for all good men to come to
-        the aid of their country""",
-        """ask not what your country can do for you,
-        ask what you can do for your country""",
-        """Man, I can't wait to get to Montross!""",
-        """Zope Public License (ZPL) Version 1.0""",
-        """Copyright (c) Digital Creations.  All rights reserved.""",
-        """This license has been certified as Open Source(tm).""",
-        """I hope I get to work on time""",
-        ]
-    def globTest(self, qmap, rlist):
-        "Check a glob query"
-        index=self.dbopen()
-        index._lexicon = GlobbingLexicon.GlobbingLexicon()
-        for i in range(len(self.sample_texts)):
-            self.doc.text=self.sample_texts[i]
-            index.index_object(i, self.doc)
-            transaction.commit()
-        self.dbclose()
-        index=self.dbopen()
-        r = list(index._apply_index(qmap)[0].keys())
-        assert  r == rlist, r
-        return index._apply_index
-    def test_StarQuery(self):
-        self.globTest({'text':'m*n'}, [0,2])
-    def test_AndQuery(self):
-        self.globTest({'text':'time and country'}, [0,])
-    def test_OrQuery(self):
-        self.globTest({'text':'time or country'}, [0,1,6])
-    def test_DefaultOrQuery(self):
-        self.globTest({'text':'time country'}, [0,1,6])
-    def test_NearQuery(self):
-        # Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)
-        # NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
-        self.globTest({'text':'time ... country'}, [0,])
-    def test_QuotesQuery(self):
-        ai = self.globTest({'text':'"This is the time"'}, [0,])
-        r = list(ai({'text':'"now is the time"'})[0].keys())
-        assert  r == [], r
-    def test_AndNotQuery(self):
-        self.globTest({'text':'time and not country'}, [6,])
-    def test_ParenMatchingQuery(self):
-        ai = self.globTest({'text':'(time and country) men'}, [0,])
-        r = list(ai({'text':'(time and not country) or men'})[0].keys())
-        assert  r == [0, 6], r
-    def test_TextIndexOperatorQuery(self):
-        self.globTest({'text': {'query': 'time men', 'operator':'and'}}, [0,])
-    def test_NonExistentWord(self):
-        self.globTest({'text':'zop'}, [])
-    def test_ComplexQuery1(self):
-        self.globTest({'text':'((?ount* or get) and not wait) '
-                       '"been *ert*"'}, [0, 1, 5, 6])
-    # same tests, unicode strings
-    def test_StarQueryUnicode(self):
-        self.globTest({'text':u'm*n'}, [0,2])
-    def test_AndQueryUnicode(self):
-        self.globTest({'text':u'time and country'}, [0,])
-    def test_OrQueryUnicode(self):
-        self.globTest({'text':u'time or country'}, [0,1,6])
-    def test_DefaultOrQueryUnicode(self):
-        self.globTest({'text':u'time country'}, [0,1,6])
-    def test_NearQueryUnicode(self):
-        # Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!) (unicode)
-        # NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
-        self.globTest({'text':u'time ... country'}, [0,])
-    def test_QuotesQueryUnicode(self):
-        ai = self.globTest({'text':u'"This is the time"'}, [0,])
-        r = list(ai({'text':'"now is the time"'})[0].keys())
-        assert  r == [], r
-    def test_AndNotQueryUnicode(self):
-        self.globTest({'text':u'time and not country'}, [6,])
-    def test_ParenMatchingQueryUnicode(self):
-        ai = self.globTest({'text':u'(time and country) men'}, [0,])
-        r = list(ai({'text':u'(time and not country) or men'})[0].keys())
-        assert  r == [0, 6], r
-    def test_TextIndexOperatorQueryUnicode(self):
-        self.globTest({'text': {u'query': u'time men', 'operator':'and'}},
-                      [0,])
-    def test_NonExistentWordUnicode(self):
-        self.globTest({'text':u'zop'}, [])
-    def test_ComplexQuery1Unicode(self):
-        self.globTest({'text':u'((?ount* or get) and not wait) '
-                       '"been *ert*"'}, [0, 1, 5, 6])
-def test_suite():
-    return unittest.makeSuite(Tests)
-if __name__=='__main__':
-    unittest.main(defaultTest='test_suite')
--- a/src/Products/PluginIndexes/__init__.py
+++ b/src/Products/PluginIndexes/__init__.py
@@ -21,20 +21,7 @@ import DateRangeIndex.DateRangeIndex
 from Products.PluginIndexes.common import ResultList
 from Products.PluginIndexes.common import UnIndex
-# BBB: TextIndex is deprecated but we don't want the warning to appear here
+_indexes =  ('KeywordIndex',
-import warnings
-warnings.filterwarnings('ignore', message='^Using TextIndex', append=1)
-try:
-    import TextIndex.TextIndex
-finally:
-    del warnings.filters[-1]
-    try:
-        del __warningregistry__
-    except NameError:
-        pass
-_indexes =  ('TextIndex',
-             'KeywordIndex',
             'FieldIndex',
             'PathIndex',
             'TopicIndex',

--- a/src/Products/PluginIndexes/interfaces.py
+++ b/src/Products/PluginIndexes/interfaces.py
@@ -160,38 +160,8 @@ class IPathIndex(Interface):
    """
-class IVocabulary(Interface):
-    """A Vocabulary is a user-managable realization of a Lexicon object.
-    """
-class ITextIndex(Interface):
-    """Full-text index.
-    There is a ZCatalog UML model that sheds some light on what is
-    going on here.  '_index' is a BTree which maps word ids to mapping
-    from document id to score.  Something like:
-      {'bob' : {1 : 5, 2 : 3, 42 : 9}}
-      {'uncle' : {1 : 1}}
-    The '_unindex' attribute is a mapping from document id to word
-    ids.  This mapping allows the catalog to unindex an object:
-      {42 : ('bob', 'is', 'your', 'uncle')
-    This isn't exactly how things are represented in memory, many
-    optimizations happen along the way.
-    """
-    def getLexicon(vocab_id=None):
-        """Get the Lexicon in use.
-        """
 class IFilteredSet(Interface):
    """A pre-calculated result list based on an expression.
    """