Commit 97fb950a authored by Vincent Pelletier's avatar Vincent Pelletier

Replace DocumentSearch.py extension by Base_parseSearchString restricted...

Replace DocumentSearch.py extension by Base_parseSearchString restricted script using catalog API to access expression parser.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@26878 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 30326e34
##############################################################################
#
# Copyright (c) 2006-2007 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
RULES
Single arguments:
- arg:value translates into arg='value' in query
- quotes are cleared
- if value contains spaces, punctuation or anything else it has to be put in quotes
- file is source_reference (original file name)
- language, version, reference
Multiple arguments:
- arg:xxx works the same way
- arg:(xxx,yyy) ORs both
- arg:all translates into empty tuple, which implies all available values
- state (simulation_state), type (portal_type)
Everything else is treated as searchabletext
"""
# XXX score:
# pythonicity: high
# obfuscation level: brain-twisting
# how to customize:
# (1) think for two hours
# (2) type for 20 seconds
import re
import sys
sys.path.append('/usr/lib/zope/lib/python/')
from DateTime import DateTime
def dateRangeProc(s):
"""
process date range (can be given in months or years)
"""
m=re.match('(\d)([wmy]).*',s)
try:
dif=0
gr=m.groups()
if gr[1]=='w':dif=int(gr[0])*7
if gr[1]=='m':dif=int(gr[0])*30
if gr[1]=='y':dif=int(gr[0])*365
return ('creation_from',DateTime()-dif)
except AttributeError, IndexError:
return ()
# parsing defined here
r=re.compile('(\w+:"[^"]+"|\w+:\([^)]+\)|\w+:[\(\),\w/\-.]+)')
filetyper=lambda s:('source_reference','%%.%s' % s)
filestripper=lambda s: ('source_reference',s.replace('"',''))
state=lambda s:('simulation_state',parsestates(s))
type=lambda s:('portal_type',parsestates(s))
paramsmap=dict(file=filestripper,type=type,reference='reference',filetype=filetyper,state=state,\
language='language',version='version',created=dateRangeProc)
def parsestates(s):
if s=='all':
return ()
if s[0]=='(' and s[-1]==')':
return [i.replace('"','').replace("'","") for i in s[1:-1].split(',') if i!='']
return s.replace('"','').replace("'","")
def analyze(params):
params['searchabletext']=''
def cutter(s):
ss=s.split(':')
if len(ss)==1:
params['searchabletext']+=ss[0]
if len(ss)==2:
try:
ps=paramsmap.get(ss[0])(ss[1])
params[ps[0]]=ps[1]
except TypeError:
if paramsmap.has_key(ss[0]):
params[paramsmap.get(ss[0])]=ss[1]
else:
params[ss[0]]=ss[1]
except IndexError:
return
return cutter
def parseSearchString(searchstring):
params={}
l=r.split(searchstring)
map(analyze(params),l)
params['searchabletext']=params['searchabletext'].strip()
return params
if __name__=='__main__':
#searchstring='byle cisnie zego file:"ble ble.doc" filetype:doc type:Text poza tym reference:abc-def'
#searchstring='byle "cisnie zego" state:draft file:"ble ble.doc" type:("Site","Text") poza tym reference:abc-def dupa:kwas/zbita'
searchstring='byleco created:3mth'
print parseSearchString(searchstring)
......@@ -3,27 +3,266 @@
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<tuple>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
<tuple/>
</tuple>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>parseSearchString</string> </value>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>DocumentSearch</string> </value>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_body</string> </key>
<value> <string encoding="cdata"><![CDATA[
"""\n
Make SQLCatalog parse given search string and generate an Abstract Syntax Tree.\n
Then, walk this tree and categorize criterion by type (and their alias, see code).\n
"""\n
from DateTime import DateTime\n
\n
def render_filetype_list(filetype_list):\n
return [\'%%.%s\' % (x, ) for x in filetype_list]\n
\n
def render_state_list(state_list):\n
# Note: also used to render type list\n
result = []\n
append = result.append\n
for state in state_list:\n
if state != \'all\':\n
append(state)\n
return result\n
\n
def render_date_range(date_range_list):\n
result = []\n
append = result.append\n
now = DateTime()\n
for date_range in date_range_list:\n
# XXX: original version used a regex, but we can\'t import\n
# "re" module here, so fallback on hand-crafted parsing.\n
# Original regex: \'(\\d)([wmy]).*\'\n
# State meaning:\n
# 0: we expect only decimals\n
# 1: we expect one of \'w\', \'m\', or \'y\'\n
state = 0\n
duration_char_list = []\n
multiplicator = None\n
for char in date_range:\n
if state == 0:\n
if \'0\' <= char <= \'9\':\n
duration_char_list.append(char)\n
else:\n
state = 1\n
if state == 1:\n
if len(duration_char_list):\n
if char == \'w\':\n
multiplicator = 7\n
elif char == \'m\':\n
multiplicator = 30\n
elif char == \'y\':\n
multiplicator = 365\n
break\n
if multiplicator is not None:\n
duration = int(\'\'.join(duration_char_list))\n
append(now - duration * multiplicator)\n
return result\n
\n
criterion_alias_dict = {\n
\'state\': (\'simulation_state\', render_state_list),\n
\'type\': (\'portal_type\', render_state_list),\n
\'filetype\': (\'source_reference\', render_filetype_list),\n
\'file\': (\'source_reference\', None),\n
\'created\': (\'creation_from\', render_date_range),\n
\'simulation_state\': (True, None),\n
\'language\': (True, None),\n
\'version\': (True, None),\n
\'reference\': (True, None),\n
\'portal_type\': (True, None),\n
\'source_reference\': (True, None),\n
\'creation_from\': (True, None),\n
\'searchabletext\': (True, None),\n
}\n
\n
DEFAULT_CRITERION_ALIAS = \'searchabletext\'\n
\n
def resolveCriterion(criterion_alias, criterion_value_list):\n
initial_criterion_alias = criterion_alias\n
# XXX: should be a set\n
seen_alias_dict = {} # Protection against endless loops\n
while True:\n
next_alias, value_list_renderer = criterion_alias_dict.get(criterion_alias, (DEFAULT_CRITERION_ALIAS, None))\n
if value_list_renderer is not None:\n
criterion_value_list = value_list_renderer(criterion_value_list)\n
if next_alias is True:\n
break\n
seen_alias_dict[criterion_alias] = None\n
if next_alias in seen_alias_dict:\n
raise Exeption, \'Endless alias loop detected: lookup of %r reached alias %r twice\' % (initial_criterion_alias, next_alias)\n
criterion_alias = next_alias\n
return criterion_alias, criterion_value_list\n
\n
def recurseSyntaxNode(node, criterion=DEFAULT_CRITERION_ALIAS):\n
if node.isColumn():\n
result = recurseSyntaxNode(node.getSubNode(), criterion=node.getColumnName())\n
else:\n
result = {}\n
if node.isLeaf():\n
result[criterion] = [node.getValue()]\n
else:\n
for subnode in node.getNodeList():\n
for criterion, value_list in recurseSyntaxNode(subnode, criterion=criterion).items():\n
result.setdefault(criterion, []).extend(value_list)\n
return result\n
\n
def acceptAllColumns(column):\n
return True\n
\n
node = context.getPortalObject().portal_catalog.getSQLCatalog().parseSearchText(searchstring, search_key=\'FullTextKey\', is_valid=acceptAllColumns)\n
result = {}\n
if node is None:\n
result[\'searchabletext\'] = searchstring\n
else:\n
for criterion, value_list in recurseSyntaxNode(node).items():\n
criterion, value_list = resolveCriterion(criterion, value_list)\n
result.setdefault(criterion, []).extend(value_list)\n
filtered_result = {}\n
for criterion, value_list in result.items():\n
if len(value_list) > 0:\n
filtered_result[criterion] = value_list\n
result = filtered_result\n
for criterion, value_list in result.items():\n
# XXX: yuck\n
if criterion == \'searchabletext\':\n
result[\'searchabletext\'] = \' \'.join(value_list)\n
if len(value_list) == 1:\n
result[criterion] = value_list[0]\n
if \'searchabletext\' not in result:\n
result[\'searchabletext\'] = \'\'\n
return result\n
]]></string> </value>
</item>
<item>
<key> <string>_code</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>searchstring</string> </value>
</item>
<item>
<key> <string>errors</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>func_code</string> </key>
<value>
<object>
<klass>
<global name="FuncCode" module="Shared.DC.Scripts.Signature"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>co_argcount</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>co_varnames</string> </key>
<value>
<tuple>
<string>searchstring</string>
<string>DateTime</string>
<string>render_filetype_list</string>
<string>render_state_list</string>
<string>render_date_range</string>
<string>None</string>
<string>True</string>
<string>criterion_alias_dict</string>
<string>DEFAULT_CRITERION_ALIAS</string>
<string>resolveCriterion</string>
<string>recurseSyntaxNode</string>
<string>acceptAllColumns</string>
<string>_getattr_</string>
<string>context</string>
<string>node</string>
<string>result</string>
<string>_write_</string>
<string>_getiter_</string>
<string>criterion</string>
<string>value_list</string>
<string>filtered_result</string>
<string>len</string>
<string>_getitem_</string>
</tuple>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>func_defaults</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_parseSearchString</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
<key> <string>warnings</string> </key>
<value>
<tuple/>
</value>
</item>
</dictionary>
</pickle>
......
979
\ No newline at end of file
980
\ No newline at end of file
DocumentSearch
DocumentExtraction
DocumentManagement
DocumentSecurity
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment