Fix quoted string & search key nesting.

It is not practical, as it is not easily possible to nest parsers. This fix stops unquoting strings in lexer, and postpone it to syntax tree walking code. Add tests showing search key nesting works with different unquoting rules. git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40389 20353a03-c40f-0410-a6d1-a30d3c3de9de

Fix quoted string & search key nesting.
It is not practical, as it is not easily possible to nest parsers. This fix stops unquoting strings in lexer, and postpone it to syntax tree walking code. Add tests showing search key nesting works with different unquoting rules. git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40389 20353a03-c40f-0410-a6d1-a30d3c3de9de
d882ae0a · Vincent Pelletier · 7e29fdc9 · d882ae0a · d882ae0a · d882ae0a
Commit d882ae0a authored Nov 18, 2010 by Vincent Pelletier
9 changed files
--- a/product/ZSQLCatalog/SQLCatalog.py
+++ b/product/ZSQLCatalog/SQLCatalog.py
@@ -52,7 +52,7 @@ from interfaces.query_catalog import ISearchKeyCatalog
 from zope.interface.verify import verifyClass
 from zope.interface import implements

-from SearchText import isAdvancedSearchText
+from SearchText import isAdvancedSearchText, dequote

 # Try to import ActiveObject in order to make SQLCatalog active
 try:
@@ -2062,8 +2062,13 @@ class Catalog(Folder,

  @profiler_decorator
  def _buildQueryFromAbstractSyntaxTreeNode(self, node, search_key):
+    if search_key.dequoteParsedText():
+      _dequote = dequote
+    else:
+      _dequote = lambda x: x
    if node.isLeaf():
-      result = search_key.buildQuery(node.getValue(), comparison_operator=node.getComparisonOperator())
+      result = search_key.buildQuery(_dequote(node.getValue()),
+        comparison_operator=node.getComparisonOperator())
    elif node.isColumn():
      result = self.buildQueryFromAbstractSyntaxTreeNode(node.getSubNode(), node.getColumnName())
    else:
@@ -2072,7 +2077,8 @@ class Catalog(Folder,
      append = query_list.append
      for subnode in node.getNodeList():
        if subnode.isLeaf():
-          value_dict.setdefault(subnode.getComparisonOperator(), []).append(subnode.getValue())
+          value_dict.setdefault(subnode.getComparisonOperator(),
+            []).append(_dequote(subnode.getValue()))
        else:
          subquery = self._buildQueryFromAbstractSyntaxTreeNode(subnode, search_key)
          if subquery is not None:

--- a/product/ZSQLCatalog/SearchKey/FullTextKey.py
+++ b/product/ZSQLCatalog/SearchKey/FullTextKey.py
@@ -30,7 +30,7 @@

 from SearchKey import SearchKey
 from Products.ZSQLCatalog.Query.SimpleQuery import SimpleQuery
-from Products.ZSQLCatalog.SearchText import FullText_parse
+from Products.ZSQLCatalog.SearchText import parse
 from Products.ZSQLCatalog.interfaces.search_key import ISearchKey
 from zope.interface.verify import verifyClass
 from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
@@ -46,10 +46,13 @@ class FullTextKey(SearchKey):
  get_operator_from_value = False

  def parseSearchText(self, value, is_column):
-    return FullText_parse(value, is_column)
+    return parse(value, is_column)
+
+  def dequoteParsedText(self):
+    return False

  def _renderValueAsSearchText(self, value, operator):
-    return operator.asSearchText(value)
+    return '(%s)' % (value, )

  @profiler_decorator
  def _processSearchValue(self, search_value, logical_operator,

--- a/product/ZSQLCatalog/SearchKey/SearchKey.py
+++ b/product/ZSQLCatalog/SearchKey/SearchKey.py
@@ -375,5 +375,8 @@ class SearchKey(object):
  def parseSearchText(self, value, is_column):
    return None

+  def dequoteParsedText(self):
+    return True
+
 verifyClass(ISearchKey, SearchKey)

--- a/product/ZSQLCatalog/SearchKey/SphinxSEFullTextKey.py
+++ b/product/ZSQLCatalog/SearchKey/SphinxSEFullTextKey.py
@@ -27,7 +27,7 @@

 from SearchKey import SearchKey
 from Products.ZSQLCatalog.Query.SimpleQuery import SimpleQuery
-from Products.ZSQLCatalog.SearchText import FullText_parse
+from Products.ZSQLCatalog.SearchText import parse
 from Products.ZSQLCatalog.interfaces.search_key import ISearchKey
 from zope.interface.verify import verifyClass
 from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
@@ -40,10 +40,13 @@ class SphinxSEFullTextKey(SearchKey):
  get_operator_from_value = False

  def parseSearchText(self, value, is_column):
-    return FullText_parse(value, is_column)
+    return parse(value, is_column)
+
+  def dequoteParsedText(self):
+    return False

  def _renderValueAsSearchText(self, value, operator):
-    return operator.asSearchText(value)
+    return '(%s)' % (value, )

  @profiler_decorator
  def _buildQuery(self, operator_value_dict, logical_operator, parsed, group):

--- a/product/ZSQLCatalog/SearchText/FullTextSearchTextParser.py
+++ b/product/ZSQLCatalog/SearchText/FullTextSearchTextParser.py
-##############################################################################
-#
-# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
-#                    Vincent Pelletier <vincent@nexedi.com>
-#
-# WARNING: This program as such is intended to be used by professional
-# programmers who take the whole responsability of assessing all potential
-# consequences resulting from its eventual inadequacies and bugs
-# End users who are looking for a ready-to-use solution with commercial
-# garantees and support are strongly adviced to contract a Free Software
-# Service Company
-#
-# This program is Free Software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-#
-##############################################################################
-
-from lexer import update_docstrings
-from AdvancedSearchTextParser import ValueNode, NotNode, LogicalNode
-from AdvancedSearchTextParser import ColumnNode, AdvancedSearchTextParser
-
-class FullTextSearchTextParser(AdvancedSearchTextParser):
-
-  # IMPORTANT:
-  # In short: Don't remove any token definition below even if they look
-  # useless.
-  # In detail: The lex methods below are redefined here because of ply nice
-  # feature of prioritizing tokens using the *line* *number* at which they
-  # are defined. As we inherit those methods from another class from another
-  # file (which doesn't match this file's content, of course) we must redefine
-  # wrapper methods to enforce token priority. Kudos to ply for so much
-  # customisable behaviour. Not.
-
-  def t_LEFT_PARENTHESE(self, t):
-    return AdvancedSearchTextParser.t_LEFT_PARENTHESE(self, t)
-
-  def t_RIGHT_PARENTHESE(self, t):
-    return AdvancedSearchTextParser.t_RIGHT_PARENTHESE(self, t)
-
-  def t_OPERATOR(self, t):
-    return AdvancedSearchTextParser.t_OPERATOR(self, t)
-
-  def t_STRING(self, t):
-    # Here is the only difference between AdvancedSearchTextParser and this
-    # class: strings are kept escaped (ie, they are considered as WORDs).
-    return AdvancedSearchTextParser.t_WORD(self, t)
-
-  def t_COLUMN(self, t):
-    return AdvancedSearchTextParser.t_COLUMN(self, t)
-
-  def t_OR(self, t):
-    return AdvancedSearchTextParser.t_OR(self, t)
-
-  def t_AND(self, t):
-    return AdvancedSearchTextParser.t_AND(self, t)
-
-  def t_NOT(self, t):
-    return AdvancedSearchTextParser.t_NOT(self, t)
-
-  def t_WORD(self, t):
-    return AdvancedSearchTextParser.t_WORD(self, t)
-
-update_docstrings(FullTextSearchTextParser)
-
--- a/product/ZSQLCatalog/SearchText/SearchTextParser.py
+++ b/product/ZSQLCatalog/SearchText/SearchTextParser.py
@@ -31,7 +31,6 @@
 import threading
 from AdvancedSearchTextDetector import AdvancedSearchTextDetector
 from AdvancedSearchTextParser import AdvancedSearchTextParser
-from FullTextSearchTextParser import FullTextSearchTextParser
 from lexer import ParserOrLexerError
 try:
  from Products.ZSQLCatalog.SQLCatalog import profiler_decorator
@@ -73,7 +72,6 @@ class ParserPool(object):
 parser_pool = ParserPool()
 DETECTOR_ID = parser_pool.register(AdvancedSearchTextDetector)
 PARSER_ID = parser_pool.register(AdvancedSearchTextParser)
-FULLTEXT_PARSER_ID = parser_pool.register(FullTextSearchTextParser)

 def safeParsingDecorator(func):
  """
@@ -105,11 +103,6 @@ def parse(input, is_column, *args, **kw):
    result = None
  return result

-@profiler_decorator
-@safeParsingDecorator
-def FullText_parse(input, is_column, *args, **kw):
-  return parser_pool.get(FULLTEXT_PARSER_ID)(input, is_column, *args, **kw)
-
 if __name__ == '__main__':
  class Query:
    def __init__(self, column, value, comparison_operator='='):

--- a/product/ZSQLCatalog/SearchText/__init__.py
+++ b/product/ZSQLCatalog/SearchText/__init__.py
-from SearchTextParser import parse, isAdvancedSearchText, FullText_parse
+from SearchTextParser import parse, isAdvancedSearchText
+
+def dequote(value):
+  assert isinstance(value, basestring), value
+  if len(value) >= 2 and value[0] == value[-1] == '"' and value[-2] != '\\':
+    escaped = False
+    value_list = []
+    append = value_list.append
+    for char in value[1:-1]:
+      if escaped:
+        escaped = False
+        if char != '"':
+          append('\\')
+      else:
+        if char == '\\':
+          escaped = True
+          continue
+        if char == '"':
+          raise ValueError('Cannot dequote substrings.')
+      append(char)
+    assert not escaped
+    value = ''.join(value_list)
+  return value

--- a/product/ZSQLCatalog/SearchText/lexer.py
+++ b/product/ZSQLCatalog/SearchText/lexer.py
@@ -104,22 +104,6 @@ class lexer(object):

  def t_STRING(self, t):
    r'"(\\.|[^\\"])*"'
-    # Unescape value and strip surrounding quotes
-    value_list = []
-    append = value_list.append
-    escaped = False
-    for char in t.value[1:-1]:
-      if escaped:
-        escaped = False
-        if char != '"':
-          append('\\')
-      else:
-        if char == '\\':
-          escaped = True
-          continue
-      append(char)
-    assert not escaped
-    t.value = ''.join(value_list)
    return t

  def t_COLUMN(self, t):

--- a/product/ZSQLCatalog/tests/testSQLCatalog.py
+++ b/product/ZSQLCatalog/tests/testSQLCatalog.py
@@ -212,7 +212,8 @@ class TestSQLCatalog(unittest.TestCase):
  def assertCatalogRaises(self, exception, kw):
    self.assertRaises(exception, self._catalog, src__=1, query_table='foo', **kw)

-  def catalog(self, reference_tree, kw, check_search_text=True):
+  def catalog(self, reference_tree, kw, check_search_text=True,
+      check_select_expression=True):
    reference_param_dict = self._catalog._queryResults(query_table='foo', **kw)
    query = self._catalog.buildQuery(kw)
    self.assertEqual(reference_tree, query)
@@ -220,6 +221,9 @@ class TestSQLCatalog(unittest.TestCase):
    if check_search_text:
      # XXX: sould "keyword" be always used for search text searches ?
      search_text_param_dict = self._catalog._queryResults(query_table='foo', keyword=search_text)
+      if not check_select_expression:
+        search_text_param_dict.pop('select_expression')
+        reference_param_dict.pop('select_expression')
      self.assertEqual(reference_param_dict, search_text_param_dict,
          'Query: %r\nSearchText: %r\nReference: %r\nSecond rendering: %r' % \
                       (query, search_text, reference_param_dict, search_text_param_dict))
@@ -519,7 +523,7 @@ class TestSQLCatalog(unittest.TestCase):
                 {'fulltext': 'a+b'})
    self.catalog(ReferenceQuery(ReferenceQuery(operator='match_boolean',
      fulltext=MatchList(['a +b', '+b a'])), operator='and'),
-                 {'fulltext': 'a +b'})
+                 {'fulltext': 'a +b'}, check_search_text=False)
    self.catalog(ReferenceQuery(ReferenceQuery(
        ReferenceQuery(operator='=', uid='foo'),
        ReferenceQuery(operator='match_boolean',
@@ -534,6 +538,20 @@ class TestSQLCatalog(unittest.TestCase):
    self.catalog(ReferenceQuery(ReferenceQuery(operator='match',
      fulltext='"foo" bar "baz"'), operator='and'),
      {'fulltext': '"foo" bar "baz"'})
+    # ...But each column must follow rules defined in configured SearchKey for
+    # that column (in this case: quotes must be stripped).
+    ref_query = ReferenceQuery(ReferenceQuery(ReferenceQuery(operator='match',
+      fulltext='"foo" bar'), ReferenceQuery(operator='=',
+      default='hoge \"pon'), operator='and'), operator='and')
+    self.catalog(ref_query, {
+      'keyword': 'default:"hoge \\"pon" AND fulltext:("foo" bar)'})
+    self.catalog(ref_query, {
+      'fulltext': '"foo" bar AND default:"hoge \\"pon"'})
+    ref_query = ReferenceQuery(ReferenceQuery(ReferenceQuery(operator='match',
+      fulltext='"\\"foo\\" bar"'), ReferenceQuery(operator='=',
+      default='hoge \"pon'), operator='and'), operator='and')
+    self.catalog(ref_query, {
+      'keyword': 'default:"hoge \\"pon" AND fulltext:"\\"foo\\" bar"'})

  def test_DefaultKeyTextRendering(self):
    self.catalog(ReferenceQuery(ReferenceQuery(operator='like', default='a% b'), operator='and'),