Commit 37d5b105 authored by Yusei Tahara's avatar Yusei Tahara

Refactored dms ingestion system.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@18459 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent e274d767
...@@ -98,6 +98,8 @@ class SnapshotMixin: ...@@ -98,6 +98,8 @@ class SnapshotMixin:
class ConversionError(Exception):pass class ConversionError(Exception):pass
class NotConvertedError(Exception):pass
class ConversionCacheMixin: class ConversionCacheMixin:
""" """
This class provides a generic API to store in the ZODB This class provides a generic API to store in the ZODB
...@@ -921,7 +923,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): ...@@ -921,7 +923,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
returns properties which should be set on the document returns properties which should be set on the document
""" """
if not self.hasBaseData(): if not self.hasBaseData():
self.convertToBaseFormat() raise NotConvertedError
method = self._getTypeBasedMethod('getPropertyDictFromContent', method = self._getTypeBasedMethod('getPropertyDictFromContent',
fallback_script_id='Document_getPropertyDictFromContent') fallback_script_id='Document_getPropertyDictFromContent')
return method() return method()
...@@ -1165,26 +1167,15 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): ...@@ -1165,26 +1167,15 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
Use accessors (getBaseData, setBaseData, hasBaseData, etc.) Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
""" """
try: try:
msg = self._convertToBaseFormat() # Call implemetation method message = self._convertToBaseFormat() # Call implemetation method
self.clearConversionCache() # Conversion cache is now invalid self.clearConversionCache() # Conversion cache is now invalid
if msg is None: if message is None:
msg = 'Converted to %s.' % self.getBaseContentType() # XXX Need to translate.
self.convertFile(comment=msg) # Invoke workflow method message = 'Converted to %s.' % self.getBaseContentType()
except NotImplementedError:# we don't do any workflow action if nothing has been done self.convertFile(comment=message) # Invoke workflow method
msg = '' except NotImplementedError:
except ConversionError, e: message = ''
msg = 'Problem: %s' % (str(e) or 'undefined.') return message
#self.processFile(comment=msg)
raise ConversionError, msg
except Fault, e:
msg = 'Problem: %s' % (repr(e) or 'undefined.')
#self.processFile(comment=msg)
raise Fault, msg
except socket.error, e:
msg = 'Problem: %s' % (repr(e) or 'undefined.')
#self.processFile(comment=msg)
raise socket.error, msg
return msg
def _convertToBaseFormat(self): def _convertToBaseFormat(self):
""" """
...@@ -1204,6 +1195,14 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): ...@@ -1204,6 +1195,14 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
""" """
raise NotImplementedError raise NotImplementedError
security.declareProtected(Permissions.ModifyPortalContent, 'isSupportBaseDataConversion')
def isSupportBaseDataConversion(self):
"""
This is a public interface to check a document that is support conversion
to base format and can be overridden in subclasses.
"""
return False
def convertFile(self, **kw): def convertFile(self, **kw):
""" """
Workflow transition invoked when conversion occurs. Workflow transition invoked when conversion occurs.
......
...@@ -458,7 +458,14 @@ class ContributionTool(BaseTool): ...@@ -458,7 +458,14 @@ class ContributionTool(BaseTool):
# If we need to discoverMetadata synchronously, it must # If we need to discoverMetadata synchronously, it must
# be for user interface and should thus be handled by # be for user interface and should thus be handled by
# ZODB scripts # ZODB scripts
document.activate().discoverMetadata(file_name=name, user_login=user_login)
# XXX converting state is for only document which is necessary to
# convert base format.
portal_workflow = self.getPortalObject().portal_workflow
if 'processing_status_workflow' in portal_workflow.getChainFor(document):
document.processFile() # move to converting state.
document.activate().Document_convertToBaseFormatAndDiscoverMetadata(
file_name=name, user_login=user_login)
else: else:
if document.isExternalDocument(): if document.isExternalDocument():
document = existing_document document = existing_document
......
...@@ -38,7 +38,9 @@ from Products.CMFCore.utils import getToolByName, _setCacheHeaders ...@@ -38,7 +38,9 @@ from Products.CMFCore.utils import getToolByName, _setCacheHeaders
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.Cache import CachingMethod from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5.Document.File import File from Products.ERP5.Document.File import File
from Products.ERP5.Document.Document import ConversionCacheMixin, ConversionError from Products.ERP5.Document.Document import ConversionCacheMixin
from Products.ERP5.Document.Document import ConversionError
from Products.ERP5.Document.Document import NotConvertedError
from Products.ERP5.Document.File import _unpackData from Products.ERP5.Document.File import _unpackData
from zLOG import LOG, ERROR from zLOG import LOG, ERROR
...@@ -139,6 +141,13 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -139,6 +141,13 @@ class OOoDocument(File, ConversionCacheMixin):
rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE) rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE)
rx_compr = re.compile('\s+') rx_compr = re.compile('\s+')
security.declareProtected(Permissions.ModifyPortalContent, 'isSupportBaseDataConversion')
def isSupportBaseDataConversion(self):
"""
OOoDocument is needed to conversion to base format.
"""
return True
def _setFile(self, data, precondition=None): def _setFile(self, data, precondition=None):
File._setFile(self, data, precondition=precondition) File._setFile(self, data, precondition=precondition)
if self.hasBaseData(): if self.hasBaseData():
...@@ -170,7 +179,7 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -170,7 +179,7 @@ class OOoDocument(File, ConversionCacheMixin):
return File.index_html(self, REQUEST, RESPONSE) return File.index_html(self, REQUEST, RESPONSE)
# Make sure file is converted to base format # Make sure file is converted to base format
if not self.hasBaseData(): if not self.hasBaseData():
self.convertToBaseFormat() raise NotConvertedError
# Else try to convert the document and return it # Else try to convert the document and return it
mime, result = self.convert(format=format, display=display, **kw) mime, result = self.convert(format=format, display=display, **kw)
if not mime: if not mime:
...@@ -215,7 +224,7 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -215,7 +224,7 @@ class OOoDocument(File, ConversionCacheMixin):
to provide an extensive list of conversion formats. to provide an extensive list of conversion formats.
""" """
if not self.hasBaseData(): if not self.hasBaseData():
self.convertToBaseFormat() raise NotConvertedError
def cached_getTargetFormatItemList(content_type): def cached_getTargetFormatItemList(content_type):
server_proxy = self._mkProxy() server_proxy = self._mkProxy()
...@@ -288,7 +297,7 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -288,7 +297,7 @@ class OOoDocument(File, ConversionCacheMixin):
Communicates with server to convert a file Communicates with server to convert a file
""" """
if not self.hasBaseData(): if not self.hasBaseData():
self.convertToBaseFormat() raise NotConvertedError
if format == 'text-content': if format == 'text-content':
# Extract text from the ODF file # Extract text from the ODF file
cs = cStringIO.StringIO() cs = cStringIO.StringIO()
...@@ -333,7 +342,8 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -333,7 +342,8 @@ class OOoDocument(File, ConversionCacheMixin):
is_html = 0 is_html = 0
original_format = format original_format = format
if format == 'base-data': if format == 'base-data':
if not self.hasBaseData(): self.convertToBaseFormat() if not self.hasBaseData():
raise NotConvertedError
return self.getBaseContentType(), self.getBaseData() return self.getBaseContentType(), self.getBaseData()
if format == 'pdf': if format == 'pdf':
format_list = [x for x in self.getTargetFormatList() format_list = [x for x in self.getTargetFormatList()
...@@ -360,7 +370,7 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -360,7 +370,7 @@ class OOoDocument(File, ConversionCacheMixin):
raise ConversionError("OOoDocument: target format %s is not supported" % format) raise ConversionError("OOoDocument: target format %s is not supported" % format)
# Check if we have already a base conversion # Check if we have already a base conversion
if not self.hasBaseData(): if not self.hasBaseData():
self.convertToBaseFormat() raise NotConvertedError
# Return converted file # Return converted file
if display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST: if display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST:
has_format = self.hasConversion(format=format) has_format = self.hasConversion(format=format)
...@@ -482,24 +492,26 @@ class OOoDocument(File, ConversionCacheMixin): ...@@ -482,24 +492,26 @@ class OOoDocument(File, ConversionCacheMixin):
security.declareProtected(Permissions.ModifyPortalContent, security.declareProtected(Permissions.ModifyPortalContent,
'updateBaseMetadata') 'updateBaseMetadata')
def updateBaseMetadata(self, *arg, **kw): def updateBaseMetadata(self, **kw):
""" """
Updates metadata information in the converted OOo document Updates metadata information in the converted OOo document
based on the values provided by the user. This is implemented based on the values provided by the user. This is implemented
through the invocation of the conversion server. through the invocation of the conversion server.
""" """
data = self.getBaseData() if not self.hasBaseData():
if data in ('', None): raise NotConvertedError
raise ValueError, "OOoDocument: BaseData is empty. Document is not converted yet."
self.clearConversionCache()
server_proxy = self._mkProxy() server_proxy = self._mkProxy()
response_code, response_dict, response_message = \ response_code, response_dict, response_message = \
server_proxy.run_setmetadata(self.getId(), server_proxy.run_setmetadata(self.getId(),
enc(_unpackData(data)), enc(_unpackData(self.getBaseData())),
kw) kw)
if response_code == 200: if response_code == 200:
# successful meta data extraction # successful meta data extraction
self._setBaseData(dec(response_dict['data'])) self._setBaseData(dec(response_dict['data']))
self.updateFileMetadata() # record in workflow history # XXX must put appropriate comments.
else: else:
# Explicitly raise the exception! # Explicitly raise the exception!
raise ConversionError("OOoDocument: error getting document metadata %s:%s" raise ConversionError("OOoDocument: error getting document metadata %s:%s"
......
...@@ -444,6 +444,8 @@ class TestDocument(ERP5TypeTestCase, ZopeTestCase.Functional): ...@@ -444,6 +444,8 @@ class TestDocument(ERP5TypeTestCase, ZopeTestCase.Functional):
doc.edit(file=makeFileUpload('import_data_list.ods')) doc.edit(file=makeFileUpload('import_data_list.ods'))
doc.publish() doc.publish()
get_transaction().commit() get_transaction().commit()
self.tic()
get_transaction().commit()
uf = self.portal.acl_users uf = self.portal.acl_users
uf._doAddUser('member_user2', 'secret', ['Member'], []) uf._doAddUser('member_user2', 'secret', ['Member'], [])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment