Commit 762baa8c authored by Jean-Paul Smets's avatar Jean-Paul Smets

Renamed extension files. Fixed bug in document extraction (whenever text is a...

Renamed extension files. Fixed bug in document extraction (whenever text is a data stream rather than a string)

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@14369 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent cc4a136f
##############################################################################
#
# Copyright (c) 2006-2007 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import string, re
redundant_chars='"\'.:;,-+<>()*~' # chars we need to strip from a word before we see if it matches, and from the searchwords to eliminate boolean mode chars
......@@ -64,18 +91,22 @@ def generateParts(context,text,sw,tags,trail,maxlines):
yield par # return the last marked part
def cutFound(context,txt,sw,tags,trail,maxlines):
def cutFound(context, txt, sw, tags, trail, maxlines):
"""
Returns an excerpt of text found in the txt string
"""
txt = str(txt)
# initialize class
FoundWord.tags=tags
# strip html tags (in case it is a web page - we show result without formatting)
r=re.compile('<script>.*?</script>',re.DOTALL|re.IGNORECASE)
r=re.compile('<head>.*?</head>',re.DOTALL|re.IGNORECASE)
txt=re.sub(r,'',txt)
r=re.compile('<([^>]+)>',re.DOTALL|re.IGNORECASE)
txt=re.sub(r,'',txt)
r=re.compile('\s+')
txt=re.sub(r,' ',txt)
txt=txt.replace('-',' - ') # to find hyphenated occurrences
r = re.compile('<script>.*?</script>',re.DOTALL|re.IGNORECASE)
r = re.compile('<head>.*?</head>',re.DOTALL|re.IGNORECASE)
txt = re.sub(r,'',txt)
r = re.compile('<([^>]+)>',re.DOTALL|re.IGNORECASE)
txt = re.sub(r,'',txt)
r = re.compile('\s+')
txt = re.sub(r,' ',txt)
txt = txt.replace('-',' - ') # to find hyphenated occurrences
text = ' '.join(txt.split('\n')).split(' ') # very rough tokenization
return [p for p in generateParts(context,text,sw,tags,trail,maxlines)]
......
##############################################################################
#
# Copyright (c) 2006-2007 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import zipfile, cStringIO, re
import xmlrpclib, base64
from Products.CMFCore.utils import getToolByName
......@@ -74,4 +101,28 @@ def getLastWorkflowDate(self, state_name='simulation_state', state=('released','
return ch['time']
return 0
# vim: syntax=python shiftwidth=2
#############################################################################
# Mail management
def findAddress(txt):
"""
find email address in a string
"""
validchars='0-9A-Za-z.\-_'
r=re.compile('[%s]+@[%s]+' % (validchars,validchars))
m=r.search(txt)
return m and m.group()
def extractParams(txt):
"""
extract parameters given in mail body
We assume that parameters are given as lines of the format:
name:value
"""
r=re.compile('^([\w_]+):([\w_/]+)$')
res=[]
for line in txt.split():
found=r.findall(line.strip())
if len(found)==1:
res.append(found[0])
return dict(res)
##############################################################################
#
# Copyright (c) 2006-2007 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
RULES
......
def asSecurityGroupId(self,**kw):
## Script (Python) "xERP5Type_asSecurityGroupId"
##bind container=container
##bind self=self
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=category_order, **kw
##title=
##
# category_order : list of base_categories we want to use to generate the group id
# kw : keys should be base categories,
# values should be value of corresponding relative urls (obtained by getBaseCategory())
#
# Example call : self.ERP5TypeSecurity_asGroupId(category_order=('site', 'group', 'function'),
# site='france/lille', group='nexedi', function='accounting/accountant')
# This will generate a string like 'LIL_NXD_ACT' where "LIL", "NXD" and "ACT" are the codification
# of respecively "france/lille", "nexedi" and "accounting/accountant" categories
#
# ERP5Type_asSecurityGroupId can also return a list of users whenever a category points
# to a Person instance. This is useful to implement user based local role assignments
code_list = []
user_list = []
# sort the category list lexicographically
# this prevents us to choose the exact order we want,
# but also prevents some human mistake to break everything by creating site_function instead of function_site
category_order=kw.get('category_order',None)
if category_order not in (None, ''):
category_order = list(category_order)
category_order.sort()
else:
category_order = []
for base_category in category_order:
if kw.has_key(base_category):
category_list = kw[base_category]
if type(category_list)==type(''):
category_list = [category_list]
for category in category_list:
category_path = '%s/%s' % (base_category, category)
category_object = self.portal_categories.getCategoryValue(category_path)
if category_object in (None, ''):
raise "SecurityRoleDefinitionError", "Category '%s' doesn't exist" % (category_path)
if category_object.getPortalType() == 'Person':
# We define a person here
user_name = category_object.getReference()
if user_name is not None: user_list.append(user_name)
elif category_object.getPortalType() == 'Project':
# We use the project reference as a group
category_code = category_object.getReference(category_object.getTitle())
code_list.append(category_code)
else:
# We define a group item here
category_code = category_object.getCodification() or category_object.getId()
code_list.append(category_code)
# Return a list of users or a single group
if user_list:
#self.log('user_list',user_list)
return user_list
#self.log('code_list',code_list)
return '_'.join(code_list)
import re
def findAddress(txt):
"""
find email address in a string
"""
validchars='0-9A-Za-z.\-_'
r=re.compile('[%s]+@[%s]+' % (validchars,validchars))
m=r.search(txt)
return m and m.group()
def extractParams(txt):
"""
extract parameters given in mail body
We assume that parameters are given as lines of the format:
name:value
"""
r=re.compile('^([\w_]+):([\w_/]+)$')
res=[]
for line in txt.split():
found=r.findall(line.strip())
if len(found)==1:
res.append(found[0])
return dict(res)
# vim: shiftwidth=2
......@@ -24,7 +24,7 @@
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>documentUtils</string> </value>
<value> <string>DocumentManagement</string> </value>
</item>
<item>
<key> <string>_owner</string> </key>
......
......@@ -24,7 +24,7 @@
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>mailUtils</string> </value>
<value> <string>DocumentManagement</string> </value>
</item>
<item>
<key> <string>_owner</string> </key>
......
......@@ -24,7 +24,7 @@
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>cutFound</string> </value>
<value> <string>DocumentExtraction</string> </value>
</item>
<item>
<key> <string>id</string> </key>
......
......@@ -24,7 +24,7 @@
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>mailUtils</string> </value>
<value> <string>DocumentManagement</string> </value>
</item>
<item>
<key> <string>id</string> </key>
......
......@@ -24,7 +24,7 @@
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>searchUtils</string> </value>
<value> <string>DocumentSearch</string> </value>
</item>
<item>
<key> <string>id</string> </key>
......
searchUtils
mailUtils
cutFound
asSecurityGroupId
documentUtils
\ No newline at end of file
DocumentSearch
DocumentExtraction
DocumentManagement
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment