Commit ca1d97b2 authored by Andreas Jung's avatar Andreas Jung

- Collector #250: applied several patches for TextIndex for better

        unicode support for the GlobbingLexicon
parent f4a0d46e
...@@ -35,6 +35,9 @@ Zope Changes ...@@ -35,6 +35,9 @@ Zope Changes
to display unicode strings in the vocabulary properly (now using to display unicode strings in the vocabulary properly (now using
UTF-8 encoding for display purposes) UTF-8 encoding for display purposes)
- Collector #250: applied several patches for TextIndex for better
unicode support for the GlobbingLexicon
Zope 2.5.1 beta 1 Zope 2.5.1 beta 1
......
...@@ -26,6 +26,7 @@ from randid import randid ...@@ -26,6 +26,7 @@ from randid import randid
from Products.PluginIndexes.TextIndex.TextIndex import Or from Products.PluginIndexes.TextIndex.TextIndex import Or
from Products.PluginIndexes.TextIndex.TextIndex import Op from Products.PluginIndexes.TextIndex.TextIndex import Op
from types import UnicodeType
class GlobbingLexicon(Lexicon): class GlobbingLexicon(Lexicon):
"""Lexicon which supports basic globbing function ('*' and '?'). """Lexicon which supports basic globbing function ('*' and '?').
...@@ -250,9 +251,15 @@ class GlobbingLexicon(Lexicon): ...@@ -250,9 +251,15 @@ class GlobbingLexicon(Lexicon):
""" """
# Remove characters that are meaningful in a regex # Remove characters that are meaningful in a regex
if not isinstance(pat, UnicodeType):
transTable = string.maketrans("", "") transTable = string.maketrans("", "")
result = string.translate(pat, transTable, result = string.translate(pat, transTable,
r'()&|!@#$%^{}\<>.') r'()&|!@#$%^{}\<>.')
else:
transTable={}
for ch in r'()&|!@#$%^{}\<>.':
transTable[ord(ch)]=None
result=pat.translate(transTable)
# First, deal with multi-character globbing # First, deal with multi-character globbing
result = result.replace( '*', '.*') result = result.replace( '*', '.*')
......
...@@ -222,6 +222,59 @@ class Tests(unittest.TestCase): ...@@ -222,6 +222,59 @@ class Tests(unittest.TestCase):
self.globTest({'text':'((?ount* or get) and not wait) ' self.globTest({'text':'((?ount* or get) and not wait) '
'"been *ert*"'}, [0, 1, 5, 6]) '"been *ert*"'}, [0, 1, 5, 6])
# same tests, unicode strings
def checkStarQueryUnicode(self):
"Check a star query (unicode)"
self.globTest({'text':u'm*n'}, [0,2])
def checkAndQueryUnicode(self):
"Check an AND query (unicode)"
self.globTest({'text':u'time and country'}, [0,])
def checkOrQueryUnicode(self):
"Check an OR query (unicode)"
self.globTest({'text':u'time or country'}, [0,1,6])
def checkDefOrQueryUnicode(self):
"Check a default OR query (unicode)"
self.globTest({'text':u'time country'}, [0,1,6])
def checkNearQueryUnicode(self):
"""Check a NEAR query.. (NOTE:ACTUALLY AN 'AND' TEST!!) (unicode)"""
# NEAR never worked, so Zopes post-2.3.1b3 define near to mean AND
self.globTest({'text':u'time ... country'}, [0,])
def checkQuotesQueryUnicode(self):
"""Check a quoted query (unicode)"""
ai = self.globTest({'text':u'"This is the time"'}, [0,])
r = list(ai({'text':'"now is the time"'})[0].keys())
assert r == [], r
def checkAndNotQueryUnicode(self):
"Check an ANDNOT query (unicode)"
self.globTest({'text':u'time and not country'}, [6,])
def checkParenMatchingQueryUnicode(self):
"Check a query with parens (unicode)"
ai = self.globTest({'text':u'(time and country) men'}, [0,])
r = list(ai({'text':u'(time and not country) or men'})[0].keys())
assert r == [0, 6], r
def checkTextIndexOperatorQueryUnicode(self):
"Check a query with 'operator' in the request (unicode)"
self.globTest({'text': {u'query': u'time men', 'operator':'and'}}, [0,])
def checkNonExistentWordUnicode(self):
""" Check for nonexistent word (unicode)"""
self.globTest({'text':u'zop'}, [])
def checkComplexQuery1Unicode(self):
""" Check complex query 1 (unicode)"""
self.globTest({'text':u'((?ount* or get) and not wait) '
'"been *ert*"'}, [0, 1, 5, 6])
def test_suite(): def test_suite():
return unittest.makeSuite(Tests, 'check') return unittest.makeSuite(Tests, 'check')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment