Commit d882ae0a authored by Vincent Pelletier's avatar Vincent Pelletier

Fix quoted string & search key nesting.

It is not practical, as it is not easily possible to nest parsers.
This fix stops unquoting strings in lexer, and postpone it to syntax tree
walking code.
Add tests showing search key nesting works with different unquoting rules.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40389 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 7e29fdc9
......@@ -52,7 +52,7 @@ from interfaces.query_catalog import ISearchKeyCatalog
from zope.interface.verify import verifyClass
from zope.interface import implements
from SearchText import isAdvancedSearchText
from SearchText import isAdvancedSearchText, dequote
# Try to import ActiveObject in order to make SQLCatalog active
try:
......@@ -2062,8 +2062,13 @@ class Catalog(Folder,
@profiler_decorator
def _buildQueryFromAbstractSyntaxTreeNode(self, node, search_key):
if search_key.dequoteParsedText():
_dequote = dequote
else:
_dequote = lambda x: x
if node.isLeaf():
result = search_key.buildQuery(node.getValue(), comparison_operator=node.getComparisonOperator())
result = search_key.buildQuery(_dequote(node.getValue()),
comparison_operator=node.getComparisonOperator())
elif node.isColumn():
result = self.buildQueryFromAbstractSyntaxTreeNode(node.getSubNode(), node.getColumnName())
else:
......@@ -2072,7 +2077,8 @@ class Catalog(Folder,
append = query_list.append
for subnode in node.getNodeList():
if subnode.isLeaf():
value_dict.setdefault(subnode.getComparisonOperator(), []).append(subnode.getValue())
value_dict.setdefault(subnode.getComparisonOperator(),
[]).append(_dequote(subnode.getValue()))
else:
subquery = self._buildQueryFromAbstractSyntaxTreeNode(subnode, search_key)
if subquery is not None:
......
......@@ -30,7 +30,7 @@
from SearchKey import SearchKey
from Products.ZSQLCatalog.Query.SimpleQuery import SimpleQuery
from Products.ZSQLCatalog.SearchText import FullText_parse
from Products.ZSQLCatalog.SearchText import parse
from Products.ZSQLCatalog.interfaces.search_key import ISearchKey
from zope.interface.verify import verifyClass
from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
......@@ -46,10 +46,13 @@ class FullTextKey(SearchKey):
get_operator_from_value = False
def parseSearchText(self, value, is_column):
return FullText_parse(value, is_column)
return parse(value, is_column)
def dequoteParsedText(self):
return False
def _renderValueAsSearchText(self, value, operator):
return operator.asSearchText(value)
return '(%s)' % (value, )
@profiler_decorator
def _processSearchValue(self, search_value, logical_operator,
......
......@@ -375,5 +375,8 @@ class SearchKey(object):
def parseSearchText(self, value, is_column):
return None
def dequoteParsedText(self):
return True
verifyClass(ISearchKey, SearchKey)
......@@ -27,7 +27,7 @@
from SearchKey import SearchKey
from Products.ZSQLCatalog.Query.SimpleQuery import SimpleQuery
from Products.ZSQLCatalog.SearchText import FullText_parse
from Products.ZSQLCatalog.SearchText import parse
from Products.ZSQLCatalog.interfaces.search_key import ISearchKey
from zope.interface.verify import verifyClass
from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
......@@ -40,10 +40,13 @@ class SphinxSEFullTextKey(SearchKey):
get_operator_from_value = False
def parseSearchText(self, value, is_column):
return FullText_parse(value, is_column)
return parse(value, is_column)
def dequoteParsedText(self):
return False
def _renderValueAsSearchText(self, value, operator):
return operator.asSearchText(value)
return '(%s)' % (value, )
@profiler_decorator
def _buildQuery(self, operator_value_dict, logical_operator, parsed, group):
......
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
# Vincent Pelletier <vincent@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from lexer import update_docstrings
from AdvancedSearchTextParser import ValueNode, NotNode, LogicalNode
from AdvancedSearchTextParser import ColumnNode, AdvancedSearchTextParser
class FullTextSearchTextParser(AdvancedSearchTextParser):
# IMPORTANT:
# In short: Don't remove any token definition below even if they look
# useless.
# In detail: The lex methods below are redefined here because of ply nice
# feature of prioritizing tokens using the *line* *number* at which they
# are defined. As we inherit those methods from another class from another
# file (which doesn't match this file's content, of course) we must redefine
# wrapper methods to enforce token priority. Kudos to ply for so much
# customisable behaviour. Not.
def t_LEFT_PARENTHESE(self, t):
return AdvancedSearchTextParser.t_LEFT_PARENTHESE(self, t)
def t_RIGHT_PARENTHESE(self, t):
return AdvancedSearchTextParser.t_RIGHT_PARENTHESE(self, t)
def t_OPERATOR(self, t):
return AdvancedSearchTextParser.t_OPERATOR(self, t)
def t_STRING(self, t):
# Here is the only difference between AdvancedSearchTextParser and this
# class: strings are kept escaped (ie, they are considered as WORDs).
return AdvancedSearchTextParser.t_WORD(self, t)
def t_COLUMN(self, t):
return AdvancedSearchTextParser.t_COLUMN(self, t)
def t_OR(self, t):
return AdvancedSearchTextParser.t_OR(self, t)
def t_AND(self, t):
return AdvancedSearchTextParser.t_AND(self, t)
def t_NOT(self, t):
return AdvancedSearchTextParser.t_NOT(self, t)
def t_WORD(self, t):
return AdvancedSearchTextParser.t_WORD(self, t)
update_docstrings(FullTextSearchTextParser)
......@@ -31,7 +31,6 @@
import threading
from AdvancedSearchTextDetector import AdvancedSearchTextDetector
from AdvancedSearchTextParser import AdvancedSearchTextParser
from FullTextSearchTextParser import FullTextSearchTextParser
from lexer import ParserOrLexerError
try:
from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
......@@ -73,7 +72,6 @@ class ParserPool(object):
parser_pool = ParserPool()
DETECTOR_ID = parser_pool.register(AdvancedSearchTextDetector)
PARSER_ID = parser_pool.register(AdvancedSearchTextParser)
FULLTEXT_PARSER_ID = parser_pool.register(FullTextSearchTextParser)
def safeParsingDecorator(func):
"""
......@@ -105,11 +103,6 @@ def parse(input, is_column, *args, **kw):
result = None
return result
@profiler_decorator
@safeParsingDecorator
def FullText_parse(input, is_column, *args, **kw):
return parser_pool.get(FULLTEXT_PARSER_ID)(input, is_column, *args, **kw)
if __name__ == '__main__':
class Query:
def __init__(self, column, value, comparison_operator='='):
......
from SearchTextParser import parse, isAdvancedSearchText, FullText_parse
from SearchTextParser import parse, isAdvancedSearchText
def dequote(value):
assert isinstance(value, basestring), value
if len(value) >= 2 and value[0] == value[-1] == '"' and value[-2] != '\\':
escaped = False
value_list = []
append = value_list.append
for char in value[1:-1]:
if escaped:
escaped = False
if char != '"':
append('\\')
else:
if char == '\\':
escaped = True
continue
if char == '"':
raise ValueError('Cannot dequote substrings.')
append(char)
assert not escaped
value = ''.join(value_list)
return value
......@@ -104,22 +104,6 @@ class lexer(object):
def t_STRING(self, t):
r'"(\\.|[^\\"])*"'
# Unescape value and strip surrounding quotes
value_list = []
append = value_list.append
escaped = False
for char in t.value[1:-1]:
if escaped:
escaped = False
if char != '"':
append('\\')
else:
if char == '\\':
escaped = True
continue
append(char)
assert not escaped
t.value = ''.join(value_list)
return t
def t_COLUMN(self, t):
......
......@@ -212,7 +212,8 @@ class TestSQLCatalog(unittest.TestCase):
def assertCatalogRaises(self, exception, kw):
self.assertRaises(exception, self._catalog, src__=1, query_table='foo', **kw)
def catalog(self, reference_tree, kw, check_search_text=True):
def catalog(self, reference_tree, kw, check_search_text=True,
check_select_expression=True):
reference_param_dict = self._catalog._queryResults(query_table='foo', **kw)
query = self._catalog.buildQuery(kw)
self.assertEqual(reference_tree, query)
......@@ -220,6 +221,9 @@ class TestSQLCatalog(unittest.TestCase):
if check_search_text:
# XXX: sould "keyword" be always used for search text searches ?
search_text_param_dict = self._catalog._queryResults(query_table='foo', keyword=search_text)
if not check_select_expression:
search_text_param_dict.pop('select_expression')
reference_param_dict.pop('select_expression')
self.assertEqual(reference_param_dict, search_text_param_dict,
'Query: %r\nSearchText: %r\nReference: %r\nSecond rendering: %r' % \
(query, search_text, reference_param_dict, search_text_param_dict))
......@@ -519,7 +523,7 @@ class TestSQLCatalog(unittest.TestCase):
{'fulltext': 'a+b'})
self.catalog(ReferenceQuery(ReferenceQuery(operator='match_boolean',
fulltext=MatchList(['a +b', '+b a'])), operator='and'),
{'fulltext': 'a +b'})
{'fulltext': 'a +b'}, check_search_text=False)
self.catalog(ReferenceQuery(ReferenceQuery(
ReferenceQuery(operator='=', uid='foo'),
ReferenceQuery(operator='match_boolean',
......@@ -534,6 +538,20 @@ class TestSQLCatalog(unittest.TestCase):
self.catalog(ReferenceQuery(ReferenceQuery(operator='match',
fulltext='"foo" bar "baz"'), operator='and'),
{'fulltext': '"foo" bar "baz"'})
# ...But each column must follow rules defined in configured SearchKey for
# that column (in this case: quotes must be stripped).
ref_query = ReferenceQuery(ReferenceQuery(ReferenceQuery(operator='match',
fulltext='"foo" bar'), ReferenceQuery(operator='=',
default='hoge \"pon'), operator='and'), operator='and')
self.catalog(ref_query, {
'keyword': 'default:"hoge \\"pon" AND fulltext:("foo" bar)'})
self.catalog(ref_query, {
'fulltext': '"foo" bar AND default:"hoge \\"pon"'})
ref_query = ReferenceQuery(ReferenceQuery(ReferenceQuery(operator='match',
fulltext='"\\"foo\\" bar"'), ReferenceQuery(operator='=',
default='hoge \"pon'), operator='and'), operator='and')
self.catalog(ref_query, {
'keyword': 'default:"hoge \\"pon" AND fulltext:"\\"foo\\" bar"'})
def test_DefaultKeyTextRendering(self):
self.catalog(ReferenceQuery(ReferenceQuery(operator='like', default='a% b'), operator='and'),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment