Merge TextIndex fixes from 2.4 branch

3d88c027 · Evan Simpson · 233671d4 · 3d88c027 · 3d88c027 · 3d88c027
Commit 3d88c027 authored Jul 30, 2001 by Evan Simpson
6 changed files
--- a/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
@@ -85,7 +85,7 @@

 from Lexicon import Lexicon
 import Splitter
-from Products.PluginIndexes.TextIndex.TextIndex import Or
+from TextIndex import Or, Op

 import re, string

@@ -147,14 +147,12 @@ class GlobbingLexicon(Lexicon):

    def createDigrams(self, word):
        """Returns a list with the set of digrams in the word."""
-        digrams = []
-
-        digrams.append(self.eow + word[0])    # Mark the beginning
-
-        for i in range(1,len(word)):
-            digrams.append(word[i-1:i+1])
+        digrams = list(word)
+        digrams.append(self.eow)
+        last = self.eow

-        digrams[-1] = digrams[-1] + self.eow  # Mark the end
+        for i in range(len(digrams)):
+            last, digrams[i] = digrams[i], last + digrams[i]

        return digrams

@@ -269,21 +267,30 @@ class GlobbingLexicon(Lexicon):

    def query_hook(self, q):
        """expand wildcards"""
-        words = []
-        for w in q:
-            if ( (self.multi_wc in w) or
-                 (self.single_wc in w) ):
-                wids = self.get(w)
+        ListType = type([])
+        i = len(q) - 1
+        while i >= 0:
+            e = q[i]
+            if isinstance(e, ListType):
+                self.query_hook(e)
+            elif isinstance(e, Op):
+                pass
+            elif ( (self.multi_wc in e) or
+                   (self.single_wc in e) ):
+                wids = self.get(e)
+                words = []
                for wid in wids:
                    if words:
                        words.append(Or)
                    words.append(wid)
-            else:
-                words.append(w)
+                if not words:
+                    # if words is empty, return something that will make
+                    # textindex's __getitem__ return an empty result list
+                    words.append('')
+                q[i] = words
+            i = i - 1

-        # if words is empty, return something that will make textindex's
-        # __getitem__ return an empty result list
-        return words or ['']
+        return q

    def Splitter(self, astring, words=None):
        """ wrap the splitter """
@@ -300,18 +307,16 @@ class GlobbingLexicon(Lexicon):
        There is no way to quote meta-characters.
        """

+        # Remove characters that are meaningful in a regex
        transTable = string.maketrans("", "")
+        result = string.translate(pat, transTable,
+                                  r'()&|!@#$%^{}\<>.')
        
-        # First, deal with mutli-character globbing
-        result = string.replace(pat, '*', '.*')
+        # First, deal with multi-character globbing
+        result = string.replace(result, '*', '.*')

        # Next, we need to deal with single-character globbing
-        result = string.replace(result, '?', '.?')
-
-        # Now, we need to remove all of the characters that
-        # are forbidden.
-        result = string.translate(result, transTable,
-                                  r'()&|!@#$%^{}\<>')
+        result = string.replace(result, '?', '.')

        return "%s$" % result 


--- a/lib/python/Products/PluginIndexes/TextIndex/TextIndex.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/TextIndex.py
@@ -85,13 +85,9 @@

 """Text Index

-The TextIndex falls under the 'I didnt have a better name for it'
-excuse.  It is an 'Un' Text index because it stores a little bit of
-undo information so that objects can be unindexed when the old value
-is no longer known.
 """

-__version__ = '$Revision: 1.9 $'[11:-2]
+__version__ = '$Revision: 1.10 $'[11:-2]


 import string, re
@@ -113,12 +109,21 @@ from Lexicon import Lexicon

 from types import *

-AndNot      = 'andnot'
-And         = 'and'
-Or          = 'or'
-Near        = '...'
+class Op:
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+    __str__ = __repr__
+
+AndNot      = Op('andnot')
+And         = Op('and')
+Or          = Op('or')
+Near        = Op('...')
 QueryError  = 'TextIndex.QueryError'
-
+operator_dict = {'andnot': AndNot, 'and': And, 'or': Or,
+                 '...': Near, 'near': Near,
+                 AndNot: AndNot, And: And, Or: Or, Near: Near}

 class TextIndex(PluggableIndex.PluggableIndex, Persistent,
     Implicit, SimpleItem):
@@ -176,8 +181,6 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,


        # Default text index operator (should be visible to ZMI)
-        self.operators = { 'andnot':AndNot, 'and':And,
-                            'near':Near, 'or':Or }
        self.useOperator  = 'or'

        self.clear()
@@ -508,10 +511,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
        # Changed for 2.4
        # We use the default operator that can me managed via the ZMI

-        query_operator = record.get('operator',self.useOperator)
-        if not query_operator in self.operators.keys():
-            raise exceptions.RuntimeError,"Invalid operator '%s' for a TextIndex"\
-                     % query_operator
+        qop = record.get('operator', self.useOperator)

        # We keep this for pre-2.4 compatibility
        # This stinking code should go away somewhere. A global
@@ -520,10 +520,16 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,
        # should be specified on a per-index base

        if request.has_key('textindex_operator'):
-            query_operator = request['textindex_operator']
-            warnings.warn("The usage of the 'textindex_operator' is no longer recommended.\n"\
-                          "Please use a mapping object and the 'operator' to specify the operator")
-
+            qop = request['textindex_operator']
+            warnings.warn("The usage of the 'textindex_operator' "
+                          "is no longer recommended.\n"
+                          "Please use a mapping object and the "
+                          "'operator' key to specify the operator.")
+
+        query_operator = operator_dict.get(qop)
+        if query_operator is None:
+            raise exceptions.RuntimeError, ("Invalid operator '%s' "
+                                            "for a TextIndex" % qop)
        r = None

        for key in record.keys:
@@ -572,29 +578,37 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,



-    def query(self, s, default_operator=Or, ws=(string.whitespace,)):
-        """ This is called by TextIndexes.  A 'query term' which is a
-        string 's' is passed in, along with an index object.  s is
-        parsed, then the wildcards are parsed, then something is
-        parsed again, then the whole thing is 'evaluated'. """
+    def query(self, s, default_operator=Or):
+        """ Evaluate a query string.
+        
+        Convert the query string into a data structure of nested lists
+        and strings, based on the grouping of whitespace-separated
+        strings by parentheses and quotes.  The 'Near' operator is
+        inserted between the strings of a quoted group.
+
+        The Lexicon is given the opportunity to transform the
+        data structure.  Stemming, wildcards, and translation are
+        possible Lexicon services.
+
+        Finally, the query list is normalized so that it and every
+        sub-list consist of non-operator strings or lists separated
+        by operators. This list is evaluated.
+        """

        # First replace any occurences of " and not " with " andnot "
-        s = re.sub(
-            '[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+' % (ws * 3),
-            ' andnot ', s)
+        s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)

-        # do some parsing
+        # Parse parentheses and quotes
        q = parse(s)

-        ## here, we give lexicons a chance to transform the query.
-        ## For example, substitute wildcards, or translate words into
-        ## various languages.
+        # Allow the Lexicon to process the query
        q = self.getLexicon().query_hook(q)
-        # do some more parsing

+        # Insert the default operator between any two search terms not
+        # already joined by an operator.
        q = parse2(q, default_operator)

-        ## evalute the final 'expression'
+        # evalute the final 'expression'
        return self.evaluate(q)


@@ -629,22 +643,20 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,

    def evaluate(self, query):
        """Evaluate a parsed query"""
-        # There are two options if the query passed in is only one
-        # item. It means either it's an embedded query, in which case
-        # we'll recursively evaluate, other wise it's nothing for us
-        # to evaluate, and we just get the results and return them.
-        if (len(query) == 1):
-            if (type(query[0]) is ListType):
-                return self.evaluate(query[0])
+        # Strip off meaningless layers
+        while isinstance(query, ListType) and len(query) == 1:
+            query = query[0]

-            return self[query[0]]       # __getitem__
+        # If it's not a list, assume a string or number
+        if not isinstance(query, ListType):
+            return self[query]

-        # Now we need to loop through the query and expand out
+        # Now we need to loop through the query and reduce
        # operators.  They are currently evaluated in the following
-        # order: AndNote -> And -> Or -> Near
+        # order: AndNot -> And -> Or -> Near
        i = 0
        while (i < len(query)):
-            if query[i] == AndNot:
+            if query[i] is AndNot:
                left, right = self.get_operands(query, i)
                val = left.and_not(right)
                query[(i - 1) : (i + 2)] = [ val ]
@@ -652,7 +664,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,

        i = 0
        while (i < len(query)):
-            if query[i] == And:
+            if query[i] is And:
                left, right = self.get_operands(query, i)
                val = left & right
                query[(i - 1) : (i + 2)] = [ val ]
@@ -660,7 +672,7 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,

        i = 0
        while (i < len(query)):
-            if query[i] == Or:
+            if query[i] is Or:
                left, right = self.get_operands(query, i)
                val = left | right
                query[(i - 1) : (i + 2)] = [ val ]
@@ -668,14 +680,15 @@ class TextIndex(PluggableIndex.PluggableIndex, Persistent,

        i = 0
        while (i < len(query)):
-            if query[i] == Near:
+            if query[i] is Near:
                left, right = self.get_operands(query, i)
                val = left.near(right)
                query[(i - 1) : (i + 2)] = [ val ]
            else: i = i + 1

-
-        if (len(query) != 1): raise QueryError, "Malformed query"
+        if (len(query) != 1):
+            import pdb; pdb.set_trace()
+            raise QueryError, "Malformed query"

        return query[0]

@@ -706,101 +719,93 @@ def parse(s):
    l = []
    tmp = string.lower(s)

-    while (1):
-        p = parens(tmp)
-
-        if (p is None):
-            # No parentheses found.  Look for quotes then exit.
-            l = l + quotes(tmp)
-            break
-        else:
-            # Look for quotes in the section of the string before
-            # the parentheses, then parse the string inside the parens
-            l = l + quotes(tmp[:(p[0] - 1)])
-            l.append(parse(tmp[p[0] : p[1]]))
+    p = parens(tmp)
+    while p is not None:
+        # Look for quotes in the section of the string before
+        # the parentheses, then parse the string inside the parens
+        l = l + quotes(p[0])
+        l.append(parse(p[1]))

-            # continue looking through the rest of the string
-            tmp = tmp[(p[1] + 1):]
+        # continue looking through the rest of the string
+        tmp = p[2]
+        p = parens(tmp)

-    return l
+    return l + quotes(tmp)

-def parse2(q, default_operator,
-           operator_dict={AndNot: AndNot, And: And, Or: Or, Near: Near}):
+def parse2(q, default_operator, operator_dict=operator_dict):
    """Find operators and operands"""
-    i = 0
    isop = operator_dict.has_key
-    while (i < len(q)):
-        if (type(q[i]) is ListType): q[i] = parse2(q[i], default_operator)
-
-        # every other item, starting with the first, should be an operand
-        if ((i % 2) != 0):
-            # This word should be an operator; if it is not, splice in
-            # the default operator.
-            
-            if type(q[i]) is not ListType and isop(q[i]):
-                q[i] = operator_dict[q[i]]
-            else: q[i : i] = [ default_operator ]
-
-        i = i + 1
+    i = len(q) - 1
+    while i >= 0:
+        e = q[i]
+        if isinstance(e, ListType):
+            q[i] = parse2(e, default_operator)
+            if i % 2:
+                q.insert(i, default_operator)
+        elif i % 2:
+            # This element should be an operator
+            if isop(e):
+                # Ensure that it is identical, not merely equal.
+                q[i] = operator_dict[e]
+            else:
+                # Insert the default operator.
+                q.insert(i, default_operator)
+        i = i - 1

    return q


-def parens(s, parens_re=re.compile('[\(\)]').search):
-
-    index = open_index = paren_count = 0
-
-    while 1:
-
-        mo = parens_re(s, index)
-        if mo is None : break
-
+def parens(s, parens_re=re.compile('[()]').search):
+    mo = parens_re(s)
+    if mo is None:
+        return
+    
+    open_index = mo.start(0) + 1
+    paren_count = 0
+    while mo is not None:
        index = mo.start(0)
    
        if s[index] == '(':
            paren_count = paren_count + 1
-            if open_index == 0 : open_index = index + 1
        else:
            paren_count = paren_count - 1
+            if paren_count == 0:
+                return (s[:open_index - 1], s[open_index:index],
+                        s[index + 1:])
+            if paren_count < 0:
+                break
+        mo = parens_re(s, index + 1)

-        if paren_count == 0:
-            return open_index, index
-        else:
-            index = index + 1
-
-    if paren_count == 0: # No parentheses Found
-        return None
-    else:
-        raise QueryError, "Mismatched parentheses"      
+    raise QueryError, "Mismatched parentheses"      


-def quotes(s, ws=(string.whitespace,)):
-     # split up quoted regions
-     splitted = re.split( '[%s]*\"[%s]*' % (ws * 2),s)
-     split=string.split
-
-     if (len(splitted) > 1):
-         if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
+def quotes(s):
+    split=string.split
+    if '"' not in s:
+        return split(s)
    
-         for i in range(1,len(splitted),2):
-             # split the quoted region into words
-             splitted[i] = filter(None, split(splitted[i]))
-
-             # put the Proxmity operator in between quoted words
-             for j in range(1, len(splitted[i])):
-                 splitted[i][j : j] = [ Near ]
-
-         for i in range(len(splitted)-1,-1,-2):
-             # split the non-quoted region into words
-             splitted[i:i+1] = filter(None, split(splitted[i]))
-
-         splitted = filter(None, splitted)
-     else:
-         # No quotes, so just split the string into words
-         splitted = filter(None, split(s))
-
-     return splitted
+    # split up quoted regions
+    splitted = re.split('\s*\"\s*', s)

+    if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
+    
+    for i in range(1,len(splitted),2):
+        # split the quoted region into words
+        words = splitted[i] = split(splitted[i])
+        
+        # put the Proxmity operator in between quoted words
+        j = len(words) - 1
+        while j > 0:
+            words.insert(j, Near)
+            j = j - 1
+
+    i = len(splitted) - 1
+    while i >= 0:
+        # split the non-quoted region into words
+        splitted[i:i+1] = split(splitted[i])
+        i = i - 2
+
+    return filter(None, splitted)


 manage_addTextIndexForm = DTMLFile('dtml/addTextIndex', globals())

--- a/lib/python/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/tests/testTextIndex.py
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+
+import sys, os
+
+sys.path.insert(0, os.path.join(sys.path[0],'..'))
+sys.path.insert(0, os.getcwd())
+try: import unittest
+except:
+    sys.path[0]=os.path.join(sys.path[0],'..','..', '..')
+    import unittest
+
+print sys.path
+
+class Dummy:
+
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+
+import zLOG
+
+def log_write(subsystem, severity, summary, detail, error):
+    if severity >= zLOG.PROBLEM:
+        assert 0, "%s(%s): %s" % (subsystem, severity, summary)
+
+zLOG.log_write=log_write
+
+import ZODB, ZODB.DemoStorage, ZODB.FileStorage
+import TextIndex
+import GlobbingLexicon
+
+class Tests(unittest.TestCase):
+
+   def setUp(self):
+       self.index=TextIndex.TextIndex('text')
+       self.doc=Dummy(text='this is the time, when all good zopes')
+
+   def dbopen(self):
+       n = 'fs_tmp__%s' % os.getpid()
+       s = ZODB.FileStorage.FileStorage(n)
+       db=self.db=ZODB.DB(s)
+       self.jar=db.open()
+       if not self.jar.root().has_key('index'):
+           self.jar.root()['index']=TextIndex.TextIndex('text')
+           get_transaction().commit()
+       return self.jar.root()['index']
+
+   def dbclose(self):
+       self.jar.close()
+       self.db.close()
+       del self.jar
+       del self.db
+
+   def tearDown(self):
+       get_transaction().abort()
+       if hasattr(self, 'jar'):
+           self.dbclose()
+           os.system('rm -f fs_tmp__*')
+
+   def checkSimpleAddDelete(self):
+       "Check that we can add and delete an object without error"
+       self.index.index_object(0, self.doc)
+       self.index.index_object(1, self.doc)
+       self.doc.text='spam is good, spam is fine, span span span'
+       self.index.index_object(0, self.doc)
+       self.index.unindex_object(0)
+
+   def checkPersistentUpdate1(self):
+       "Check simple persistent indexing"
+       index=self.dbopen()
+
+       self.doc.text='this is the time, when all good zopes'
+       index.index_object(0, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time waits for no one'
+       index.index_object(1, self.doc)
+       get_transaction().commit()
+       self.dbclose()
+
+       index=self.dbopen()
+       
+       r = index._apply_index({})
+       assert r==None
+
+       r = index._apply_index({'text': 'python'})
+       assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
+       assert not r[0], "should have no results"
+               
+       r = index._apply_index({'text': 'time'})
+       r=list(r[0].keys())
+       assert  r == [0,1], r
+
+   def checkPersistentUpdate2(self):
+       "Check less simple persistent indexing"
+       index=self.dbopen()
+
+       self.doc.text='this is the time, when all good zopes'
+       index.index_object(0, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time waits for no one'
+       index.index_object(1, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='the next task is to test'
+       index.index_object(3, self.doc)
+       get_transaction().commit()
+
+       self.doc.text='time time'
+       index.index_object(2, self.doc)
+       get_transaction().commit()
+       self.dbclose()
+
+       index=self.dbopen()
+       
+       r = index._apply_index({})
+       assert r==None
+
+       r = index._apply_index({'text': 'python'})
+       assert len(r) == 2 and r[1]==('text',), 'incorrectly not used'
+       assert not r[0], "should have no results"
+               
+       r = index._apply_index({'text': 'time'})
+       r=list(r[0].keys())
+       assert  r == [0,1,2], r
+
+
+
+   sample_texts = [
+       """This is the time for all good men to come to
+       the aid of their country""",
+       """ask not what your country can do for you,
+       ask what you can do for your country""",
+       """Man, I can't wait to get to Montross!""",
+       """Zope Public License (ZPL) Version 1.0""",
+       """Copyright (c) Digital Creations.  All rights reserved.""",
+       """This license has been certified as Open Source(tm).""",
+       """I hope I get to work on time""",
+       ]
+
+   def globTest(self, qmap, rlist):
+       "Check a glob query"
+       index=self.dbopen()
+       index._lexicon = GlobbingLexicon.GlobbingLexicon()
+
+       for i in range(len(self.sample_texts)):
+           self.doc.text=self.sample_texts[i]
+           index.index_object(i, self.doc)
+           get_transaction().commit()
+
+       self.dbclose()
+
+       index=self.dbopen()
+
+       r = list(index._apply_index(qmap)[0].keys())
+       assert  r == rlist, r
+       return index._apply_index
+       
+   def checkStarQuery(self):
+       "Check a star query"
+       self.globTest({'text':'m*n'}, [0,2])
+
+   def checkAndQuery(self):
+       "Check an AND query"
+       self.globTest({'text':'time and country'}, [0,])
+
+   def checkOrQuery(self):
+       "Check an OR query"
+       self.globTest({'text':'time or country'}, [0,1,6])
+
+   def checkDefOrQuery(self):
+       "Check a default OR query"
+       self.globTest({'text':'time country'}, [0,1,6])
+
+   def checkNearQuery(self):
+       """Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)"""
+       # NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
+       self.globTest({'text':'time ... country'}, [0,])
+
+   def checkQuotesQuery(self):
+       """Check a quoted query"""
+       ai = self.globTest({'text':'"This is the time"'}, [0,])
+
+       r = list(ai({'text':'"now is the time"'})[0].keys())
+       assert  r == [], r
+
+   def checkAndNotQuery(self):
+       "Check an ANDNOT query"
+       self.globTest({'text':'time and not country'}, [6,])
+
+   def checkParenMatchingQuery(self):
+       "Check a query with parens"
+       ai = self.globTest({'text':'(time and country) men'}, [0,])
+
+       r = list(ai({'text':'(time and not country) or men'})[0].keys())
+       assert  r == [0, 6], r
+
+   def checkTextIndexOperatorQuery(self):
+       "Check a query with 'operator' in the request"
+       self.globTest({'text': {'query': 'time men', 'operator':'and'}}, [0,])
+
+   def checkNonExistentWord(self):
+       """ Check for nonexistent word """
+       self.globTest({'text':'zop'}, [])
+       
+   def checkComplexQuery1(self):
+       """ Check complex query 1 """
+       self.globTest({'text':'((?ount* or get) and not wait) '
+                      '"been *ert*"'}, [0, 1, 5, 6])
+       
+
+def test_suite():
+   return unittest.makeSuite(Tests, 'check')
+
+def main():
+   unittest.TextTestRunner().run(test_suite())
+
+def debug():
+   test_suite().debug()
+
+def pdebug():
+    import pdb
+    pdb.run('debug()')
+   
+if __name__=='__main__':
+   if len(sys.argv) > 1:
+      globals()[sys.argv[1]]()
+   else:
+      main()
+
--- a/lib/python/SearchIndex/GlobbingLexicon.py
+++ b/lib/python/SearchIndex/GlobbingLexicon.py
@@ -267,21 +267,28 @@ class GlobbingLexicon(Lexicon):

    def query_hook(self, q):
        """expand wildcards"""
-        words = []
-        for w in q:
-            if ( (self.multi_wc in w) or
-                 (self.single_wc in w) ):
-                wids = self.get(w)
+        ListType = type([])
+        i = len(q) - 1
+        while i >= 0:
+            e = q[i]
+            if isinstance(e, ListType):
+                self.query_hook(e)
+            elif ( (self.multi_wc in e) or
+                   (self.single_wc in e) ):
+                wids = self.get(e)
+                words = []
                for wid in wids:
                    if words:
                        words.append(Or)
                    words.append(wid)
-            else:
-                words.append(w)
+                if not words:
+                    # if words is empty, return something that will make
+                    # textindex's __getitem__ return an empty result list
+                    words.append('')
+                q[i] = words
+            i = i - 1

-        # if words is empty, return something that will make textindex's
-        # __getitem__ return an empty result list
-        return words or ['']
+        return q

    def Splitter(self, astring, words=None):
        """ wrap the splitter """
@@ -298,19 +305,16 @@ class GlobbingLexicon(Lexicon):
        There is no way to quote meta-characters.
        """

+        # Remove characters that are meaningful in a regex
        transTable = string.maketrans("", "")
+        result = string.translate(pat, transTable,
+                                  r'()&|!@#$%^{}\<>.')
        
-        # First, deal with mutli-character globbing
-        result = string.replace(pat, '*', '.*')
+        # First, deal with multi-character globbing
+        result = string.replace(result, '*', '.*')

        # Next, we need to deal with single-character globbing
-        result = string.replace(result, '?', '.?')
-
-        # Now, we need to remove all of the characters that
-        # are forbidden.
-        result = string.translate(result, transTable,
-                                  r'()&|!@#$%^{}\<>')
+        result = string.replace(result, '?', '.')

        return "%s$" % result 

-
--- a/lib/python/SearchIndex/UnTextIndex.py
+++ b/lib/python/SearchIndex/UnTextIndex.py
@@ -91,7 +91,7 @@ undo information so that objects can be unindexed when the old value
 is no longer known.
 """

-__version__ = '$Revision: 1.49 $'[11:-2]
+__version__ = '$Revision: 1.50 $'[11:-2]


 import string, re
@@ -428,7 +428,7 @@ class UnTextIndex(Persistent, Implicit):
        and a String.  Strings are looked up in the lexicon, whereas
        Integers are assumed to be resolved word ids. """
        
-        if type(word) is IntType:
+        if isinstance(word, IntType):
            # We have a word ID
            result = self._index.get(word, {})
            return ResultList(result, (word,), self)
@@ -440,7 +440,7 @@ class UnTextIndex(Persistent, Implicit):
        
            if len(splitSource) == 1:
                splitSource = splitSource[0]
-                if splitSource[:1] == '"' and splitSource[-1:] == '"':
+                if splitSource[:1] == splitSource[-1:] == '"':
                    return self[splitSource]

                wids=self.getLexicon(self._lexicon).get(splitSource)
@@ -551,28 +551,37 @@ class UnTextIndex(Persistent, Implicit):



-    def query(self, s, default_operator=Or, ws=(string.whitespace,)):
-        """ This is called by TextIndexes.  A 'query term' which is a
-        string 's' is passed in, along with an index object.  s is
-        parsed, then the wildcards are parsed, then something is
-        parsed again, then the whole thing is 'evaluated'. """
+    def query(self, s, default_operator=Or):
+        """ Evaluate a query string.
+        
+        Convert the query string into a data structure of nested lists
+        and strings, based on the grouping of whitespace-separated
+        strings by parentheses and quotes.  The 'Near' operator is
+        inserted between the strings of a quoted group.
+
+        The Lexicon is given the opportunity to transform the
+        data structure.  Stemming, wildcards, and translation are
+        possible Lexicon services.
+
+        Finally, the query list is normalized so that it and every
+        sub-list consist of non-operator strings or lists separated
+        by operators. This list is evaluated.
+        """

        # First replace any occurences of " and not " with " andnot "
-        s = re.sub(
-            '[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+' % (ws * 3),
-            ' andnot ', s)
+        s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)

-        # do some parsing
+        # Parse parentheses and quotes
        q = parse(s)

-        ## here, we give lexicons a chance to transform the query.
-        ## For example, substitute wildcards, or translate words into
-        ## various languages.
+        # Allow the Lexicon to process the query
        q = self.getLexicon(self._lexicon).query_hook(q)
-        # do some more parsing
+
+        # Insert the default operator between any two search terms not
+        # already joined by an operator.
        q = parse2(q, default_operator)

-        ## evalute the final 'expression'
+        # evalute the final 'expression'
        return self.evaluate(q)


@@ -605,19 +614,17 @@ class UnTextIndex(Persistent, Implicit):

    def evaluate(self, query):
        """Evaluate a parsed query"""
-        # There are two options if the query passed in is only one
-        # item. It means either it's an embedded query, in which case
-        # we'll recursively evaluate, other wise it's nothing for us
-        # to evaluate, and we just get the results and return them.
-        if (len(query) == 1):
-            if (type(query[0]) is ListType):
-                return self.evaluate(query[0])
+        # Strip off meaningless layers
+        while isinstance(query, ListType) and len(query) == 1:
+            query = query[0]

-            return self[query[0]]       # __getitem__
+        # If it's not a list, assume a string or number
+        if not isinstance(query, ListType):
+            return self[query]

-        # Now we need to loop through the query and expand out
+        # Now we need to loop through the query and reduce
        # operators.  They are currently evaluated in the following
-        # order: AndNote -> And -> Or -> Near
+        # order: AndNot -> And -> Or -> Near
        i = 0
        while (i < len(query)):
            if query[i] is AndNot:
@@ -660,98 +667,91 @@ def parse(s):
    l = []
    tmp = string.lower(s)

-    while (1):
-        p = parens(tmp)
+    p = parens(tmp)
+    while p is not None:
+        # Look for quotes in the section of the string before
+        # the parentheses, then parse the string inside the parens
+        l = l + quotes(p[0])
+        l.append(parse(p[1]))

-        if (p is None):
-            # No parentheses found.  Look for quotes then exit.
-            l = l + quotes(tmp)
-            break
-        else:
-            # Look for quotes in the section of the string before
-            # the parentheses, then parse the string inside the parens
-            l = l + quotes(tmp[:(p[0] - 1)])
-            l.append(parse(tmp[p[0] : p[1]]))
-
-            # continue looking through the rest of the string
-            tmp = tmp[(p[1] + 1):]
+        # continue looking through the rest of the string
+        tmp = p[2]
+        p = parens(tmp)

-    return l
+    return l + quotes(tmp)

 def parse2(q, default_operator,
           operator_dict={AndNot: AndNot, And: And, Or: Or, Near: Near}):
    """Find operators and operands"""
-    i = 0
    isop = operator_dict.has_key
-    while (i < len(q)):
-        if (type(q[i]) is ListType): q[i] = parse2(q[i], default_operator)
-
-        # every other item, starting with the first, should be an operand
-        if ((i % 2) != 0):
-            # This word should be an operator; if it is not, splice in
-            # the default operator.
-            
-            if type(q[i]) is not ListType and isop(q[i]):
-                q[i] = operator_dict[q[i]]
-            else: q[i : i] = [ default_operator ]
-
-        i = i + 1
+    i = len(q) - 1
+    while i >= 0:
+        e = q[i]
+        if isinstance(e, ListType):
+            q[i] = parse2(e, default_operator)
+            if i % 2:
+                q.insert(i, default_operator)
+        elif i % 2:
+            # This element should be an operator
+            if isop(e):
+                # Ensure that it is identical, not merely equal.
+                q[i] = operator_dict[e]
+            else:
+                # Insert the default operator.
+                q.insert(i, default_operator)
+        i = i - 1

    return q


-def parens(s, parens_re=re.compile('[\(\)]').search):
-
-    index = open_index = paren_count = 0
-
-    while 1:
-
-        mo = parens_re(s, index)
-        if mo is None : break
-
+def parens(s, parens_re=re.compile('[()]').search):
+    mo = parens_re(s)
+    if mo is None:
+        return
+    
+    open_index = mo.start(0) + 1
+    paren_count = 0
+    while mo is not None:
        index = mo.start(0)
    
        if s[index] == '(':
            paren_count = paren_count + 1
-            if open_index == 0 : open_index = index + 1
        else:
            paren_count = paren_count - 1
+            if paren_count == 0:
+                return (s[:open_index - 1], s[open_index:index],
+                        s[index + 1:])
+            if paren_count < 0:
+                break
+        mo = parens_re(s, index + 1)

-        if paren_count == 0:
-            return open_index, index
-        else:
-            index = index + 1
-
-    if paren_count == 0: # No parentheses Found
-        return None
-    else:
-        raise QueryError, "Mismatched parentheses"      
-
+    raise QueryError, "Mismatched parentheses"      

-def quotes(s, ws=(string.whitespace,)):
-     # split up quoted regions
-     splitted = re.split( '[%s]*\"[%s]*' % (ws * 2),s)
-     split=string.split

-     if (len(splitted) > 1):
-         if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
+def quotes(s):
+    split=string.split
+    if '"' not in s:
+        return split(s)
    
-         for i in range(1,len(splitted),2):
-             # split the quoted region into words
-             splitted[i] = filter(None, split(splitted[i]))
-
-             # put the Proxmity operator in between quoted words
-             for j in range(1, len(splitted[i])):
-                 splitted[i][j : j] = [ Near ]
-
-         for i in range(len(splitted)-1,-1,-2):
-             # split the non-quoted region into words
-             splitted[i:i+1] = filter(None, split(splitted[i]))
-
-         splitted = filter(None, splitted)
-     else:
-         # No quotes, so just split the string into words
-         splitted = filter(None, split(s))
-
-     return splitted
+    # split up quoted regions
+    splitted = re.split('\s*\"\s*', s)

+    if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
+    
+    for i in range(1,len(splitted),2):
+        # split the quoted region into words
+        words = splitted[i] = split(splitted[i])
+        
+        # put the Proxmity operator in between quoted words
+        j = len(words) - 1
+        while j > 0:
+            words.insert(j, Near)
+            j = j - 1
+
+    i = len(splitted) - 1
+    while i >= 0:
+        # split the non-quoted region into words
+        splitted[i:i+1] = split(splitted[i])
+        i = i - 2
+
+    return filter(None, splitted)
--- a/lib/python/SearchIndex/tests/testUnTextIndex.py
+++ b/lib/python/SearchIndex/tests/testUnTextIndex.py
@@ -217,8 +217,8 @@ class Tests(unittest.TestCase):
       """This license has been certified as Open Source(tm).""",
       """I hope I get to work on time""",
       ]
-       
-   def checkGlobQuery(self):
+
+   def globTest(self, qmap, rlist):
       "Check a glob query"
       index=self.dbopen()
       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
@@ -232,162 +232,61 @@ class Tests(unittest.TestCase):

       index=self.dbopen()

-       r = index._apply_index({'text':'m*n'})
-       r=list(r[0].keys())
-       assert  r == [0,2], r
+       r = list(index._apply_index(qmap)[0].keys())
+       assert  r == rlist, r
+       return index._apply_index
+       
+   def checkStarQuery(self):
+       "Check a star query"
+       self.globTest({'text':'m*n'}, [0,2])

   def checkAndQuery(self):
       "Check an AND query"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'time and country'})
-       r=list(r[0].keys())
-       assert  r == [0,], r
+       self.globTest({'text':'time and country'}, [0,])

   def checkOrQuery(self):
       "Check an OR query"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
+       self.globTest({'text':'time or country'}, [0,1,6])

-       index=self.dbopen()
-
-       r = index._apply_index({'text':'time or country'})
-       r=list(r[0].keys())
-       assert  r == [0,1,6], r
+   def checkDefOrQuery(self):
+       "Check a default OR query"
+       self.globTest({'text':'time country'}, [0,1,6])

   def checkNearQuery(self):
-       """Check a NEAR query.. (NOTE:ACTUALLY AN 'OR' TEST!!)"""
-       # NEAR never worked, so Zopes post-2.3.1b3 define near to mean OR
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
+       """Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!)"""
+       # NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
+       self.globTest({'text':'time ... country'}, [0,])

-       self.dbclose()
-
-       index=self.dbopen()
+   def checkQuotesQuery(self):
+       """Check a quoted query"""
+       ai = self.globTest({'text':'"This is the time"'}, [0,])

-       r = index._apply_index({'text':'time near country'})
-       r=list(r[0].keys())
-       assert  r == [0,1,6], r
+       r = list(ai({'text':'"now is the time"'})[0].keys())
+       assert  r == [], r

   def checkAndNotQuery(self):
       "Check an ANDNOT query"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'time and not country'})
-       r=list(r[0].keys())
-       assert  r == [6], r
+       self.globTest({'text':'time and not country'}, [6,])

   def checkParenMatchingQuery(self):
       "Check a query with parens"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
+       ai = self.globTest({'text':'(time and country) men'}, [0,])

-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'(time and country) men'})
-       r=list(r[0].keys())
-       assert  r == [0], r
-
-       r = index._apply_index({'text':'(time and not country) or men'})
-       r=list(r[0].keys())
+       r = list(ai({'text':'(time and not country) or men'})[0].keys())
       assert  r == [0, 6], r

-   def checkQuoteMatchingQuery(self):
-       "Check a query with quotes.. this is known to fail under 2.3.1b3-"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'"This is the time"'})
-       r=list(r[0].keys())
-       assert  r == [0], r
-
-       r = index._apply_index({'text':'"now is the time"'})
-       r=list(r[0].keys())
-       assert  r == [], r
-
   def checkTextIndexOperatorQuery(self):
       "Check a query with 'textindex_operator' in the request"
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'time men','textindex_operator':'and'})
-       r=list(r[0].keys())
-       assert  r == [0], r
+       self.globTest({'text':'time men', 'textindex_operator':'and'}, [0,])

   def checkNonExistentWord(self):
       """ Check for nonexistent word """
-       index=self.dbopen()
-       index._lexicon = SearchIndex.GlobbingLexicon.GlobbingLexicon()
-
-       for i in range(len(self.sample_texts)):
-           self.doc.text=self.sample_texts[i]
-           index.index_object(i, self.doc)
-           get_transaction().commit()
-
-       self.dbclose()
-
-       index=self.dbopen()
-
-       r = index._apply_index({'text':'zop'})
-       r=list(r[0].keys())
-       assert  r == [], r
+       self.globTest({'text':'zop'}, [])
+       
+   def checkComplexQuery1(self):
+       """ Check complex query 1 """
+       self.globTest({'text':'((?ount* or get) and not wait) '
+                      '"been *ert*"'}, [0, 1, 5, 6])
       

 def test_suite():