Commit af458503 authored by Bartek Górny's avatar Bartek Górny

Assigning properties from document content upon ingestion;

Portal_type extraction from document content if not supplied.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@11212 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent f4d7e2d9
import zipfile, cStringIO, re
rx_atr='([\w]+)###([\w/]+)'
rx_atr=re.compile(rx_atr)
def extractContent(data):
cs=cStringIO.StringIO()
cs.write(data)
try:
z=zipfile.ZipFile(cs)
except zipfile.BadZipfile:
cs.close()
return ''
s=z.read('content.xml')
cs.close()
z.close()
return s
def getAttrFromContent(data):
return dict(rx_atr.findall(extractContent(data)))
def getDoctypeFromContent(data):
atrs=getAttrFromContent(data)
return atrs.get('doctype')
# vim: syntax=python shiftwidth=2
import re
def findAddress(txt):
validchars='A-Za-z.\-_'
validchars='0-9A-Za-z.\-_'
r=re.compile('[%s]+@[%s]+' % (validchars,validchars))
m=r.search(txt)
return m and m.group()
......
......@@ -68,14 +68,19 @@
</item>
<item>
<key> <string>_body</string> </key>
<value> <string># determine content type\n
ctype=context.content_type_registry.findTypeName(fname,None,None)\n
context.log(fname,ctype)\n
if ctype is None:\n
<value> <string># determine content type if not given\n
# first try from content\n
if not doctype:\n
doctype=context.Document_getDoctypeFromContent(data)\n
# then from filename\n
if not doctype:\n
doctype=context.content_type_registry.findTypeName(fname,None,None)\n
context.log(fname,doctype)\n
if not doctype:\n
raise Exception("content type for file %s not registered" % fname)\n
\n
# create content\n
ob=context.document_module.newContent(portal_type=ctype)\n
ob=context.document_module.newContent(portal_type=doctype)\n
ob.manage_upload(data)\n
ob.DMS_ingestFile(fname,data)\n
return ob\n
......@@ -95,7 +100,7 @@ return ob\n
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>fname,data</string> </value>
<value> <string>fname,data, doctype=None</string> </value>
</item>
<item>
<key> <string>errors</string> </key>
......@@ -115,7 +120,7 @@ return ob\n
<dictionary>
<item>
<key> <string>co_argcount</string> </key>
<value> <int>2</int> </value>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>co_varnames</string> </key>
......@@ -123,10 +128,10 @@ return ob\n
<tuple>
<string>fname</string>
<string>data</string>
<string>doctype</string>
<string>_getattr_</string>
<string>context</string>
<string>None</string>
<string>ctype</string>
<string>Exception</string>
<string>ob</string>
</tuple>
......@@ -140,7 +145,9 @@ return ob\n
<item>
<key> <string>func_defaults</string> </key>
<value>
<tuple>
<none/>
</tuple>
</value>
</item>
<item>
......
......@@ -85,6 +85,7 @@ if senderemail is None:\n
return printed\n
\n
# find sender\n
context.log(senderemail)\n
r=context.portal_catalog(portal_type=\'Email\',url_string=senderemail)\n
if len(r)==0:\n
print noSenderMsg\n
......@@ -99,6 +100,7 @@ context.log(script.getId(),\'ok, this address belongs to \'+person.getRelativeUr
try:\n
for fname,data in theMail[\'attachments\'].items():\n
obj=context.DMS_createObjectFromFile(fname,data)\n
obj.setGroup(person.getSubordinationValue().getGroup())\n
obj.manage_setLocalRoles(person.getReference(),[\'Owner\',])\n
context.DMS_notifyByEmail(address=senderemail,event=\'ingest\',object=obj)\n
except Exception,e:\n
......
......@@ -72,10 +72,20 @@
# it can be run any time\n
# is meant to extract metadata from file name and contents\n
\n
if data is None:\n
if hasattr(context,\'oo_data\'):\n
data=context.oo_data\n
else:\n
if hasattr(context,\'data\'):\n
data=data\n
if data is not None:\n
context.setPropertyListFromContent(data)\n
\n
if fname is None:\n
fname=context.getSourceReference()\n
else:\n
context.setSourceReference(fname)\n
\n
if fname is not None:\n
context.setPropertyListFromFilename(fname)\n
context.guessMimeType(fname)\n
......@@ -137,9 +147,9 @@ if fname is not None:\n
<string>fname</string>
<string>data</string>
<string>None</string>
<string>_getattr_</string>
<string>context</string>
<string>hasattr</string>
<string>context</string>
<string>_getattr_</string>
</tuple>
</value>
</item>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<tuple>
<tuple>
<string>Products.ExternalMethod.ExternalMethod</string>
<string>ExternalMethod</string>
</tuple>
<none/>
</tuple>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>__ac_local_roles__</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>_function</string> </key>
<value> <string>getDoctypeFromContent</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>documentUtils</string> </value>
</item>
<item>
<key> <string>_owner</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Document_getDoctypeFromContent</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
317
\ No newline at end of file
321
\ No newline at end of file
......@@ -3,3 +3,4 @@ searchUtils
mailUtils
cutFound
asSecurityGroupId
documentUtils
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment