From 37d5b105672aeddb2aa90232d9700dc83e6e171d Mon Sep 17 00:00:00 2001
From: Yusei Tahara <yusei@nexedi.com>
Date: Fri, 21 Dec 2007 09:11:14 +0000
Subject: [PATCH] Refactored dms ingestion system.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@18459 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5/Document/Document.py       | 39 ++++++++++++-------------
 product/ERP5/Tool/ContributionTool.py   |  9 +++++-
 product/ERP5OOo/Document/OOoDocument.py | 34 ++++++++++++++-------
 product/ERP5OOo/tests/testDms.py        |  2 ++
 4 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py
index b9efcd7e2b..ccba05aecc 100644
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -98,6 +98,8 @@ class SnapshotMixin:
 
 class ConversionError(Exception):pass
 
+class NotConvertedError(Exception):pass
+
 class ConversionCacheMixin:
   """
     This class provides a generic API to store in the ZODB
@@ -921,7 +923,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
       returns properties which should be set on the document
     """
     if not self.hasBaseData():
-      self.convertToBaseFormat()
+      raise NotConvertedError
     method = self._getTypeBasedMethod('getPropertyDictFromContent',
         fallback_script_id='Document_getPropertyDictFromContent')
     return method()
@@ -1165,26 +1167,15 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
       Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
     """
     try:
-      msg = self._convertToBaseFormat() # Call implemetation method
+      message = self._convertToBaseFormat() # Call implemetation method
       self.clearConversionCache() # Conversion cache is now invalid
-      if msg is None:
-        msg = 'Converted to %s.' % self.getBaseContentType()
-      self.convertFile(comment=msg) # Invoke workflow method
-    except NotImplementedError:# we don't do any workflow action if nothing has been done
-      msg = '' 
-    except ConversionError, e:
-      msg = 'Problem: %s' % (str(e) or 'undefined.')
-      #self.processFile(comment=msg)
-      raise ConversionError, msg
-    except Fault, e:
-      msg = 'Problem: %s' % (repr(e) or 'undefined.')
-      #self.processFile(comment=msg)
-      raise Fault, msg
-    except socket.error, e:
-      msg = 'Problem: %s' % (repr(e) or 'undefined.')
-      #self.processFile(comment=msg)
-      raise socket.error, msg
-    return msg
+      if message is None:
+        # XXX Need to translate.
+        message = 'Converted to %s.' % self.getBaseContentType()
+      self.convertFile(comment=message) # Invoke workflow method
+    except NotImplementedError:
+      message = ''
+    return message
 
   def _convertToBaseFormat(self):
     """
@@ -1204,6 +1195,14 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
     """
     raise NotImplementedError
 
+  security.declareProtected(Permissions.ModifyPortalContent, 'isSupportBaseDataConversion')
+  def isSupportBaseDataConversion(self):
+    """
+    This is a public interface to check a document that is support conversion
+    to base format and can be overridden in subclasses.
+    """
+    return False
+
   def convertFile(self, **kw):
     """
     Workflow transition invoked when conversion occurs.
diff --git a/product/ERP5/Tool/ContributionTool.py b/product/ERP5/Tool/ContributionTool.py
index 0b2cb5caae..efe03c90e7 100644
--- a/product/ERP5/Tool/ContributionTool.py
+++ b/product/ERP5/Tool/ContributionTool.py
@@ -458,7 +458,14 @@ class ContributionTool(BaseTool):
           # If we need to discoverMetadata synchronously, it must
           # be for user interface and should thus be handled by
           # ZODB scripts
-          document.activate().discoverMetadata(file_name=name, user_login=user_login)
+
+          # XXX converting state is for only document which is necessary to
+          # convert base format.
+          portal_workflow = self.getPortalObject().portal_workflow
+          if 'processing_status_workflow' in portal_workflow.getChainFor(document):
+            document.processFile() # move to converting state.
+          document.activate().Document_convertToBaseFormatAndDiscoverMetadata(
+            file_name=name, user_login=user_login)
       else:
         if document.isExternalDocument():
           document = existing_document
diff --git a/product/ERP5OOo/Document/OOoDocument.py b/product/ERP5OOo/Document/OOoDocument.py
index b588cc72d9..7d008abaab 100644
--- a/product/ERP5OOo/Document/OOoDocument.py
+++ b/product/ERP5OOo/Document/OOoDocument.py
@@ -38,7 +38,9 @@ from Products.CMFCore.utils import getToolByName, _setCacheHeaders
 from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
 from Products.ERP5Type.Cache import CachingMethod
 from Products.ERP5.Document.File import File
-from Products.ERP5.Document.Document import ConversionCacheMixin, ConversionError
+from Products.ERP5.Document.Document import ConversionCacheMixin
+from Products.ERP5.Document.Document import ConversionError
+from Products.ERP5.Document.Document import NotConvertedError
 from Products.ERP5.Document.File import _unpackData
 from zLOG import LOG, ERROR
 
@@ -139,6 +141,13 @@ class OOoDocument(File, ConversionCacheMixin):
   rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE)
   rx_compr = re.compile('\s+')
 
+  security.declareProtected(Permissions.ModifyPortalContent, 'isSupportBaseDataConversion')
+  def isSupportBaseDataConversion(self):
+    """
+    OOoDocument is needed to conversion to base format.
+    """
+    return True
+
   def _setFile(self, data, precondition=None):
     File._setFile(self, data, precondition=precondition)
     if self.hasBaseData():
@@ -170,7 +179,7 @@ class OOoDocument(File, ConversionCacheMixin):
       return File.index_html(self, REQUEST, RESPONSE)
     # Make sure file is converted to base format
     if not self.hasBaseData():
-      self.convertToBaseFormat()
+      raise NotConvertedError
     # Else try to convert the document and return it
     mime, result = self.convert(format=format, display=display, **kw)
     if not mime:
@@ -215,7 +224,7 @@ class OOoDocument(File, ConversionCacheMixin):
       to provide an extensive list of conversion formats.
     """
     if not self.hasBaseData():
-      self.convertToBaseFormat()
+      raise NotConvertedError
 
     def cached_getTargetFormatItemList(content_type):
       server_proxy = self._mkProxy()
@@ -288,7 +297,7 @@ class OOoDocument(File, ConversionCacheMixin):
       Communicates with server to convert a file 
     """
     if not self.hasBaseData():
-      self.convertToBaseFormat()
+      raise NotConvertedError
     if format == 'text-content':
       # Extract text from the ODF file
       cs = cStringIO.StringIO()
@@ -333,7 +342,8 @@ class OOoDocument(File, ConversionCacheMixin):
     is_html = 0
     original_format = format
     if format == 'base-data':
-      if not self.hasBaseData(): self.convertToBaseFormat()
+      if not self.hasBaseData():
+        raise NotConvertedError
       return self.getBaseContentType(), self.getBaseData()
     if format == 'pdf':
       format_list = [x for x in self.getTargetFormatList()
@@ -360,7 +370,7 @@ class OOoDocument(File, ConversionCacheMixin):
       raise ConversionError("OOoDocument: target format %s is not supported" % format)
     # Check if we have already a base conversion
     if not self.hasBaseData():
-      self.convertToBaseFormat()
+      raise NotConvertedError
     # Return converted file
     if display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST:
       has_format = self.hasConversion(format=format)
@@ -482,24 +492,26 @@ class OOoDocument(File, ConversionCacheMixin):
 
   security.declareProtected(Permissions.ModifyPortalContent,
                             'updateBaseMetadata')
-  def updateBaseMetadata(self, *arg, **kw):
+  def updateBaseMetadata(self, **kw):
     """
       Updates metadata information in the converted OOo document
       based on the values provided by the user. This is implemented
       through the invocation of the conversion server.
     """
-    data = self.getBaseData()
-    if data in ('', None):
-      raise ValueError, "OOoDocument: BaseData is empty. Document is not converted yet."
+    if not self.hasBaseData():
+      raise NotConvertedError
+
+    self.clearConversionCache()
 
     server_proxy = self._mkProxy()
     response_code, response_dict, response_message = \
           server_proxy.run_setmetadata(self.getId(),
-                                       enc(_unpackData(data)),
+                                       enc(_unpackData(self.getBaseData())),
                                        kw)
     if response_code == 200:
       # successful meta data extraction
       self._setBaseData(dec(response_dict['data']))
+      self.updateFileMetadata() # record in workflow history # XXX must put appropriate comments.
     else:
       # Explicitly raise the exception!
       raise ConversionError("OOoDocument: error getting document metadata %s:%s"
diff --git a/product/ERP5OOo/tests/testDms.py b/product/ERP5OOo/tests/testDms.py
index ae7469e955..4fdf7139f2 100644
--- a/product/ERP5OOo/tests/testDms.py
+++ b/product/ERP5OOo/tests/testDms.py
@@ -444,6 +444,8 @@ class TestDocument(ERP5TypeTestCase, ZopeTestCase.Functional):
     doc.edit(file=makeFileUpload('import_data_list.ods'))
     doc.publish()
     get_transaction().commit()
+    self.tic()
+    get_transaction().commit()
 
     uf = self.portal.acl_users
     uf._doAddUser('member_user2', 'secret', ['Member'], [])
-- 
2.30.9