Commit 42b9e4ae authored by Jean-Paul Smets's avatar Jean-Paul Smets

The changes enclosed fixe various issues in the previous implementation. It...

The changes enclosed fixe various issues in the previous implementation. It breaks webdav though (wait for next commit to get it back).

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13439 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 14cf3c06
...@@ -179,6 +179,21 @@ class Document(XMLObject): ...@@ -179,6 +179,21 @@ class Document(XMLObject):
searchable text, explicit relations, implicit relations, searchable text, explicit relations, implicit relations,
metadata, versions, languages, etc. metadata, versions, languages, etc.
Documents may either store their content directly or
cache content which is retrieved from a specified URL.
The second case if often referred as "External Document".
Standalone "External Documents" may be created by specifying
a URL to the contribution tool which is in charge of initiating
the download process and selecting the appropriate document type.
Groups of "External Documents" may also be generated from
so-called "External Source" (refer to ExternalSource class
for more information).
External Documents may be downloaded once or at
regular interval. The later can be useful to update the content
of an external source. Previous versions may be stored
in place or kept in a separate file.
There are currently two types of Document subclasses: There are currently two types of Document subclasses:
* File for binary file based documents. File * File for binary file based documents. File
...@@ -188,7 +203,10 @@ class Document(XMLObject): ...@@ -188,7 +203,10 @@ class Document(XMLObject):
* TextDocument for text based documents. TextDocument * TextDocument for text based documents. TextDocument
has subclasses such as Wiki to implement specific has subclasses such as Wiki to implement specific
methods. methods. TextDocument itself has a subclass
(XSLTDocument) which provides XSLT based analysis
and transformation of XML content based on XSLT
templates.
Document classes which implement conversion should use Document classes which implement conversion should use
the ConversionCacheMixin class so that converted values are the ConversionCacheMixin class so that converted values are
...@@ -372,6 +390,13 @@ class Document(XMLObject): ...@@ -372,6 +390,13 @@ class Document(XMLObject):
""" """
pass pass
security.declareProtected(Permissions.View, 'asText')
def asText(self):
"""
Converts the content of the document to a textual representation.
"""
return self.convert('text')
security.declareProtected(Permissions.View, 'getSearchableText') security.declareProtected(Permissions.View, 'getSearchableText')
def getSearchableText(self, md=None): def getSearchableText(self, md=None):
""" """
...@@ -787,7 +812,8 @@ class Document(XMLObject): ...@@ -787,7 +812,8 @@ class Document(XMLObject):
kw = {} kw = {}
for id in self.propertyIds(): for id in self.propertyIds():
# We should not consider file data # We should not consider file data
if id is not 'data' and self.hasProperty(id): if id not in ('data', 'categories_list', 'uid', 'id', 'text_content', ) \
and self.hasProperty(id):
kw[id] = self.getProperty(id) kw[id] = self.getProperty(id)
self._backup_input = kw # We could use volatile and pass kw in activate self._backup_input = kw # We could use volatile and pass kw in activate
# if we are garanteed that _backup_input does not # if we are garanteed that _backup_input does not
...@@ -843,6 +869,7 @@ class Document(XMLObject): ...@@ -843,6 +869,7 @@ class Document(XMLObject):
del(kw['portal_type']) del(kw['portal_type'])
except KeyError: except KeyError:
pass pass
self.edit(**kw) self.edit(**kw)
# Finish in second stage # Finish in second stage
......
...@@ -27,9 +27,9 @@ ...@@ -27,9 +27,9 @@
############################################################################## ##############################################################################
import cStringIO import cStringIO
import pdb
import re import re
import string import string
import urllib2
from AccessControl import ClassSecurityInfo, getSecurityManager from AccessControl import ClassSecurityInfo, getSecurityManager
from Globals import InitializeClass, DTMLFile from Globals import InitializeClass, DTMLFile
...@@ -41,8 +41,6 @@ from zLOG import LOG ...@@ -41,8 +41,6 @@ from zLOG import LOG
from DateTime import DateTime from DateTime import DateTime
from Acquisition import aq_base from Acquisition import aq_base
NO_DISCOVER_METADATA_KEY = '_v_no_discover_metadata'
USER_NAME_KEY = '_v_document_user_login'
TEMP_NEW_OBJECT_KEY = '_v_new_object' TEMP_NEW_OBJECT_KEY = '_v_new_object'
_marker = [] # Create a new marker object. _marker = [] # Create a new marker object.
...@@ -50,16 +48,17 @@ _marker = [] # Create a new marker object. ...@@ -50,16 +48,17 @@ _marker = [] # Create a new marker object.
class ContributionTool(BaseTool): class ContributionTool(BaseTool):
""" """
ContributionTool provides an abstraction layer to unify the contribution ContributionTool provides an abstraction layer to unify the contribution
of documents into an ERP5Site. of documents into an ERP5 Site.
ContributionTool is configured in portal_types in ContributionTool needs to be configured in portal_types (allowed contents) so
such way that it can store Text, Spreadsheet, PDF, etc. that it can store Text, Spreadsheet, PDF, etc.
The method to use is portal_contributions.newContent, which should receive The main method of ContributionTool is newContent. This method can
either a portal type or a file name from which type can be derived or a file from which be provided various parameters from which the portal type and document
content type can be derived, otherwise it will fail. metadata can be derived.
Configuration Scripts: Configuration Scripts:
- ContributionTool_getPropertyDictFromFileName: receives file name and a - ContributionTool_getPropertyDictFromFileName: receives file name and a
dict derived from filename by regular expression, and does any necesary dict derived from filename by regular expression, and does any necesary
operations (e.g. mapping document type id onto a real portal_type). operations (e.g. mapping document type id onto a real portal_type).
...@@ -68,7 +67,6 @@ class ContributionTool(BaseTool): ...@@ -68,7 +67,6 @@ class ContributionTool(BaseTool):
id = 'portal_contributions' id = 'portal_contributions'
meta_type = 'ERP5 Contribution Tool' meta_type = 'ERP5 Contribution Tool'
portal_type = 'Contribution Tool' portal_type = 'Contribution Tool'
allowed_types = ('File', 'Image', 'Text') # XXX Is this really needed ?
# Declarative Security # Declarative Security
security = ClassSecurityInfo() security = ClassSecurityInfo()
...@@ -77,17 +75,17 @@ class ContributionTool(BaseTool): ...@@ -77,17 +75,17 @@ class ContributionTool(BaseTool):
manage_overview = DTMLFile( 'explainContributionTool', _dtmldir ) manage_overview = DTMLFile( 'explainContributionTool', _dtmldir )
security.declarePrivate('findTypeName') security.declarePrivate('findTypeName')
def findTypeName(self, file_name, ob): def findTypeName(self, file_name, document):
""" """
Finds the appropriate portal type based on the file name Finds the appropriate portal type based on the file name
or if necessary the content of ob or if necessary the content of the document.
""" """
portal_type = None portal_type = None
# We should only consider those portal_types which share the # We should only consider those portal_types which share the
# same meta_type with the current object # same meta_type with the current object
valid_portal_type_list = [] valid_portal_type_list = []
for pt in self.portal_types.objectValues(): for pt in self.portal_types.objectValues():
if pt.meta_type == ob.meta_type: if pt.meta_type == document.meta_type:
valid_portal_type_list.append(pt.id) valid_portal_type_list.append(pt.id)
# Check if the filename tells which portal_type this is # Check if the filename tells which portal_type this is
...@@ -100,7 +98,7 @@ class ContributionTool(BaseTool): ...@@ -100,7 +98,7 @@ class ContributionTool(BaseTool):
# to check which of the candidates is suitable # to check which of the candidates is suitable
if portal_type is None: if portal_type is None:
# The document is now responsible of telling all its properties # The document is now responsible of telling all its properties
portal_type = ob.getPropertyDictFromContent().get('portal_type', None) portal_type = document.getPropertyDictFromContent().get('portal_type', None)
if portal_type is not None: if portal_type is not None:
# we check if it matches the candidate list, if there were any # we check if it matches the candidate list, if there were any
if len(portal_type_list)>1 and portal_type not in portal_type_list: if len(portal_type_list)>1 and portal_type not in portal_type_list:
...@@ -113,8 +111,8 @@ class ContributionTool(BaseTool): ...@@ -113,8 +111,8 @@ class ContributionTool(BaseTool):
if portal_type is None: if portal_type is None:
# We can not do anything anymore # We can not do anything anymore
return ob.portal_type #return document.portal_type # XXX Wrong
#return None return None
if portal_type not in valid_portal_type_list: if portal_type not in valid_portal_type_list:
# We will not be able to migrate ob to portal_type # We will not be able to migrate ob to portal_type
...@@ -124,7 +122,7 @@ class ContributionTool(BaseTool): ...@@ -124,7 +122,7 @@ class ContributionTool(BaseTool):
return portal_type return portal_type
security.declareProtected(Permissions.AddPortalContent, 'newContent') security.declareProtected(Permissions.AddPortalContent, 'newContent')
def newContent(self, id=None, portal_type=None, def newContent(self, id=None, portal_type=None, url=None,
discover_metadata=1, temp_object=0, discover_metadata=1, temp_object=0,
user_login=None, **kw): user_login=None, **kw):
""" """
...@@ -134,7 +132,8 @@ class ContributionTool(BaseTool): ...@@ -134,7 +132,8 @@ class ContributionTool(BaseTool):
the content. the content.
user_login is the name under which the content will be created user_login is the name under which the content will be created
XXX - Is this a security hole ? XXX - this is a security hole which needs to be fixed by
making sure only Manager can use this parameter
NOTE: NOTE:
We always generate ID. So, we must prevent using the one We always generate ID. So, we must prevent using the one
...@@ -147,20 +146,32 @@ class ContributionTool(BaseTool): ...@@ -147,20 +146,32 @@ class ContributionTool(BaseTool):
# Try to find the file_name # Try to find the file_name
file_name = None file_name = None
# check if file was provided mime_type = None
file = kw.get('file', None) if url is None:
if file is not None: # check if file was provided
file_name = file.filename file = kw.get('file', None)
if file is not None:
file_name = file.filename
else:
# some channels supply data and file-name separately
# this is the case for example for email ingestion
# in this case, we build a file wrapper for it
data = kw.get('data', None)
if data is not None:
file_name = kw.get('file_name', None)
if file_name is not None:
file = cStringIO.StringIO()
file.write(data)
file.seek(0)
else: else:
# some channels supply data and file name separately # build a new file from the url
# we have to build an object file = urllib2.urlopen(url)
data = kw.get('data', None) file_name = url.split('/')[-1]
if data is not None: if hasattr(file, 'headers'):
file_name = kw.get('file_name', None) headers = file.headers
if file_name is not None: if hasattr(headers, 'type'):
file = cStringIO.StringIO() mime_type = headers.type
file.write(data) kw['file'] = file
file.seek(0)
# If the portal_type was provided, we can go faster # If the portal_type was provided, we can go faster
if portal_type is not None and portal_type != '': if portal_type is not None and portal_type != '':
...@@ -178,11 +189,11 @@ class ContributionTool(BaseTool): ...@@ -178,11 +189,11 @@ class ContributionTool(BaseTool):
raise ValueError, "could not determine portal type" raise ValueError, "could not determine portal type"
# So we will simulate WebDAV to get an empty object # So we will simulate WebDAV to get an empty object
# with PUT_factory # with PUT_factory - we provide the mime_type as
ob = self.PUT_factory( file_name, None, None ) # parameter
ob = self.PUT_factory( file_name, mime_type, None )
# Raise an error if we could not guess the portal type # Raise an error if we could not guess the portal type
# XXX Maybe we should try to pass the typ param
if ob is None: if ob is None:
raise ValueError, "Could not determine the document type" raise ValueError, "Could not determine the document type"
...@@ -197,9 +208,7 @@ class ContributionTool(BaseTool): ...@@ -197,9 +208,7 @@ class ContributionTool(BaseTool):
BaseTool._delObject(self, file_name) BaseTool._delObject(self, file_name)
# Move the document to where it belongs # Move the document to where it belongs
if not discover_metadata: setattr(self, NO_DISCOVER_METADATA_KEY, 1) document = self._setObject(file_name, ob, user_login=user_login)
setattr(ob, USER_NAME_KEY, user_login)
document = self._setObject(file_name, ob)
# Time to empty the cache # Time to empty the cache
if hasattr(self, '_v_document_cache'): if hasattr(self, '_v_document_cache'):
...@@ -209,7 +218,7 @@ class ContributionTool(BaseTool): ...@@ -209,7 +218,7 @@ class ContributionTool(BaseTool):
# Reindex it and return the document # Reindex it and return the document
# XXX seems we have to commit now, otherwise it is not reindexed properly later # XXX seems we have to commit now, otherwise it is not reindexed properly later
# dunno why # dunno why
get_transaction().commit() get_transaction().commit() # XXX-JPS - WHAT IS THIS ?????????????????????
document.reindexObject() document.reindexObject()
return document return document
...@@ -241,9 +250,9 @@ class ContributionTool(BaseTool): ...@@ -241,9 +250,9 @@ class ContributionTool(BaseTool):
method = self._getTypeBasedMethod('getPropertyDictFromFileName', method = self._getTypeBasedMethod('getPropertyDictFromFileName',
fallback_script_id = 'ContributionTool_getPropertyDictFromFileName') fallback_script_id = 'ContributionTool_getPropertyDictFromFileName')
property_dict = method(file_name, property_dict) property_dict = method(file_name, property_dict)
if property_dict.has_key('portal_type'): if property_dict.has_key('portal_type') and property_dict['portal_type']:
# we have to return portal_type as a tuple # we have to return portal_type as a tuple
# because we can allow for having multiple types (candidates) # because we should allow for having multiple candidate types
property_dict['portal_type'] = (property_dict['portal_type'],) property_dict['portal_type'] = (property_dict['portal_type'],)
else: else:
# we have to find candidates by file extenstion # we have to find candidates by file extenstion
...@@ -279,30 +288,41 @@ class ContributionTool(BaseTool): ...@@ -279,30 +288,41 @@ class ContributionTool(BaseTool):
Refer to: NullResource.PUT Refer to: NullResource.PUT
""" """
# Find the portal type based on file name and content # _setObject is called by constructInstance at a time
# We provide ob in the context of self to make sure scripting is possible # when the object has no portal_type defined yet. It
portal_type = self.findTypeName(name, ob.__of__(self)) # will be removed later on. We can safely store the
if portal_type is None: # document inside us at this stage. Else we
raise TypeError, "Unable to determine portal type" # must find out where to store it.
if not ob.__dict__.has_key('portal_type'):
# We know the portal_type, let us find the module BaseTool._setObject(self, name, ob)
module = self.getDefaultModule(portal_type) document = self[name]
else:
# Set the object on the module and fix the portal_type and id # We give the system a last chance to analyse the
new_id = module.generateNewId() # portal_type based on the document content
ob.portal_type = portal_type # (ex. a Memo is a kind of Text which can be identified
ob.id = new_id # by the fact it includes some specific content)
module._setObject(new_id, ob) portal_type = self.findTypeName(name, ob.__of__(self))
if portal_type is None: portal_type = ob.portal_type
# We can now discover metadata unless NO_DISCOVER_METADATA_KEY was set on ob ob._setPortalTypeName(portal_type) # This is redundant with finishConstruction
document = module[new_id] # but necessary to move objects to appropriate
user_login = getattr(self, USER_NAME_KEY, None) # location based on their content. Since the
if not getattr(ob, NO_DISCOVER_METADATA_KEY, 0): document.discoverMetadata(file_name=name, user_login=user_login) # object is already constructed here, we
# can safely change its portal_type
# Keep the document close to us # Now we know the portal_type, let us find the module
if not hasattr(self, '_v_document_cache'): # to which we should move the document to
self._v_document_cache = {} module = self.getDefaultModule(ob.portal_type)
self._v_document_cache[name] = document.getRelativeUrl() new_id = module.generateNewId()
ob.id = new_id
module._setObject(new_id, ob)
# We can now discover metadata
document = module[new_id]
document.discoverMetadata(file_name=name, user_login=user_login)
# Keep the document close to us
if not hasattr(self, '_v_document_cache'):
self._v_document_cache = {}
self._v_document_cache[name] = document.getRelativeUrl()
# Return document to newContent method # Return document to newContent method
return document return document
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment