-Remove Conversion API in Document.py as it is now in mixin/convertable

-Modified to include methods that returns allowed target item list for conversion git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@34092 20353a03-c40f-0410-a6d1-a30d3c3de9de

-Remove Conversion API in Document.py as it is now in mixin/convertable
-Modified to include methods that returns allowed target item list for conversion git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@34092 20353a03-c40f-0410-a6d1-a30d3c3de9de
ceca5d61 · Mame Coumba Sall · 6993cc96 · ceca5d61 · ceca5d61 · ceca5d61
Commit ceca5d61 authored Mar 25, 2010 by Mame Coumba Sall
4 changed files
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -56,6 +56,12 @@ from Products.PythonScripts.Utility import allow_class
 # Mixin Import
 from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
+from Products.ERP5.mixin.convertable import ConvertableMixin
+from Products.ERP5.mixin.text_convertable import TextConvertableMixin
+from Products.ERP5.mixin.base_convertable import BaseConvertableMixin
+from Products.ERP5.mixin.html_convertable import HTMLConvertableMixin
+from Products.ERP5.mixin.metadata_discoverable import MetadataDiscoverableMixin
+from Products.ERP5.mixin.document import DocumentMixin
 _MARKER = []
 VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -306,7 +312,8 @@ class UpdateMixIn:
    return method()
-class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
+class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConvertableMixin, TextConvertableMixin,HTMLConvertableMixin,
+          DocumentMixin, BaseConvertableMixin, MetadataDiscoverableMixin, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
  """Document is an abstract class with all methods related to document
  management in ERP5. This includes searchable text, explicit relations,
  implicit relations, metadata, versions, languages, etc.
@@ -1077,267 +1084,6 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, S
    method = self._getTypeBasedMethod('finishIngestion', fallback_script_id='Document_finishIngestion')
    return method()
-  # Conversion methods
-  security.declareProtected(Permissions.AccessContentsInformation, 'convert')
-  def convert(self, format, **kw):
-    """
-      Main content conversion function, returns result which should
-      be returned and stored in cache.
-      format - the format specied in the form of an extension
-      string (ex. jpeg, html, text, txt, etc.)
-      **kw can be various things - e.g. resolution
-      Default implementation returns an empty string (html, text)
-      or raises an error.
-      TODO:
-      - implement guards API so that conversion to certain
-        formats require certain permission
-    """
-    if format == 'html':
-      return 'text/html', '' # XXX - Why ?
-    if format in ('text', 'txt'):
-      return 'text/plain', '' # XXX - Why ?
-    raise NotImplementedError
-  security.declareProtected(Permissions.View, 'asSubjectText')
-  def asSubjectText(self, **kw):
-    """
-      Converts the subject of the document to a textual representation.
-    """
-    subject = self.getSubject()
-    if not subject:
-      # XXX not sure if this fallback is a good idea.
-      subject = self.getTitle()
-    if subject is None:
-      subject = ''
-    return str(subject)
-  security.declareProtected(Permissions.View, 'asText')
-  def asText(self, **kw):
-    """
-      Converts the content of the document to a textual representation.
-    """
-    kw['format'] = 'txt'
-    mime, data = self.convert(**kw)
-    return str(data)
-  security.declareProtected(Permissions.View, 'asEntireHTML')
-  def asEntireHTML(self, **kw):
-    """
-      Returns a complete HTML representation of the document
-      (with body tags, etc.). Adds if necessary a base
-      tag so that the document can be displayed in an iframe
-      or standalone.
-      Actual conversion is delegated to _asHTML
-    """
-    html = self._asHTML(**kw)
-    if self.getUrlString():
-      # If a URL is defined, add the base tag
-      # if base is defined yet.
-      html = str(html)
-      if not html.find('<base') >= 0:
-        base = '<base href="%s">' % self.getContentBaseURL()
-        html = html.replace('<head>', '<head>%s' % base)
-      self.setConversion(html, mime='text/html', format='base-html')
-    return html
-  security.declarePrivate('_asHTML')
-  def _asHTML(self, **kw):
-    """
-      A private method which converts to HTML. This method
-      is the one to override in subclasses.
-    """
-    if not self.hasBaseData():
-      raise ConversionError('This document has not been processed yet.')
-    try:
-      # FIXME: no substitution may occur in this case.
-      mime, data = self.getConversion(format='base-html')
-      return data
-    except KeyError:
-      kw['format'] = 'html'
-      mime, html = self.convert(**kw)
-      return html
-  security.declareProtected(Permissions.View, 'asStrippedHTML')
-  def asStrippedHTML(self, **kw):
-    """
-      Returns a stripped HTML representation of the document
-      (without html and body tags, etc.) which can be used to inline
-      a preview of the document.
-    """
-    if not self.hasBaseData():
-      return ''
-    try:
-      # FIXME: no substitution may occur in this case.
-      mime, data = self.getConversion(format='stripped-html')
-      return data
-    except KeyError:
-      kw['format'] = 'html'
-      mime, html = self.convert(**kw)
-      return self._stripHTML(str(html))
-  def _guessEncoding(self, string):
-    """
-      Try to guess the encoding for this string.
-      Returns None if no encoding can be guessed.
-    """
-    try:
-      import chardet
-    except ImportError:
-      return None
-    return chardet.detect(string).get('encoding', None)
-  def _stripHTML(self, html, charset=None):
-    """
-      A private method which can be reused by subclasses
-      to strip HTML content
-    """
-    body_list = re.findall(self.body_parser, str(html))
-    if len(body_list):
-      stripped_html = body_list[0]
-    else:
-      stripped_html = html
-    # find charset and convert to utf-8
-    charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this
-                                         # is datastream instance but hard to do better
-    if charset and not charset_list:
-      # Use optional parameter is we can not find encoding in HTML
-      charset_list = [charset]
-    if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
-      try:
-        stripped_html = unicode(str(stripped_html),
-                                charset_list[0]).encode('utf-8')
-      except (UnicodeDecodeError, LookupError):
-        return str(stripped_html)
-    return stripped_html
-  def _safeHTML(self, html, format='text/x-html-safe', charset=None):
-    """
-      A private method to strip HTML content in safe mode,
-      w/o emmbed javascript, forms and any external plugins imports.
-      This should be used when we do not trust the user (Anonymous)
-      who push data into database.
-      - html: content to strip
-      - format: destination format
-      - charset: charset used to encode string. Take precedence
-      on charset values found in html string
-    """
-    portal = self.getPortalObject()
-    if charset is None:
-      # find charset
-      charset_list = self.charset_parser.findall(html)
-      if charset_list:
-        charset = charset_list[0]
-    if charset and charset not in ('utf-8', 'UTF-8'):
-      try:
-        safe_html_string = html.decode(charset).encode('utf-8')
-      except (UnicodeDecodeError, LookupError):
-        pass
-      else:
-        charset = 'utf-8' # Override charset if convertion succeeds
-    transform_tool = getToolByName(portal, 'portal_transforms')
-    safe_html_string = transform_tool.convertToData(format, html,
-                                                    encoding=charset)
-    return safe_html_string
-  security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
-  def getContentInformation(self):
-    """
-    Returns the content information from the HTML conversion.
-    The default implementation tries to build a dictionnary
-    from the HTML conversion of the document and extract
-    the document title.
-    """
-    result = {}
-    html = self.asEntireHTML()
-    if not html: return result
-    title_list = re.findall(self.title_parser, str(html))
-    if title_list:
-      result['title'] = title_list[0]
-    return result
-  # Base format support
-  security.declareProtected(Permissions.ModifyPortalContent, 'convertToBaseFormat')
-  def convertToBaseFormat(self, **kw):
-    """
-      Converts the content of the document to a base format
-      which is later used for all conversions. This method
-      is common to all kinds of documents and handles
-      exceptions in a unified way.
-      Implementation is delegated to _convertToBaseFormat which
-      must be overloaded by subclasses of Document which
-      need a base format.
-      convertToBaseFormat is called upon file upload, document
-      ingestion by the processing_status_workflow.
-      NOTE: the data of the base format conversion should be stored
-      using the base_data property. Refer to Document.py propertysheet.
-      Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
-    """
-    if getattr(self, 'hasData', None) is not None and not self.hasData():
-      # Empty document cannot be converted
-      return
-    try:
-      message = self._convertToBaseFormat() # Call implemetation method
-      self.clearConversionCache() # Conversion cache is now invalid
-      if message is None:
-        # XXX Need to translate.
-        message = 'Converted to %s.' % self.getBaseContentType()
-      self.convertFile(comment=message) # Invoke workflow method
-    except NotImplementedError:
-      message = ''
-    return message
-  def _convertToBaseFormat(self):
-    """
-    """
-    raise NotImplementedError
-  security.declareProtected(Permissions.AccessContentsInformation,
-                            'isSupportBaseDataConversion')
-  def isSupportBaseDataConversion(self):
-    """
-    """
-    return False
-  def convertFile(self, **kw): # XXX - It it really useful to explicitly define ?
-    """
-    Workflow transition invoked when conversion occurs.
-    """
-  convertFile = WorkflowMethod(convertFile)
-  security.declareProtected(Permissions.AccessContentsInformation,
-                            'getMetadataMappingDict')
-  def getMetadataMappingDict(self):
-    """
-    Return a dict of metadata mapping used to update base metadata of the
-    document
-    """
-    try:
-      method = self._getTypeBasedMethod('getMetadataMappingDict')
-    except KeyError, AttributeError:
-      method = None
-    if method is not None:
-      return method()
-    else:
-      return {}
-  security.declareProtected(Permissions.ModifyPortalContent, 'updateBaseMetadata')
-  def updateBaseMetadata(self, **kw):
-    """
-    Update the base format data with the latest properties entered
-    by the user. For example, if title is changed in ERP5 interface,
-    the base format file should be updated accordingly.
-    Default implementation does nothing. Refer to OOoDocument class
-    for an example of implementation.
-    """
-    pass
  # Transformation API
  security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
  def populateContent(self):

--- a/product/ERP5/Document/Image.py
+++ b/product/ERP5/Document/Image.py
@@ -53,6 +53,8 @@ except ImportError:
 from zLOG import LOG, WARNING
 from Products.CMFCore.utils import getToolByName
+#Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
 default_displays_id_list = ('nano', 'micro', 'thumbnail',
                            'xsmall', 'small', 'medium',
@@ -60,7 +62,7 @@ default_displays_id_list = ('nano', 'micro', 'thumbnail',
 default_formats = ['jpg', 'jpeg', 'png', 'gif', 'pnm', 'ppm']
-class Image(File, OFSImage):
+class Image(File, OFSImage, ConvertableMixin):
  """
    An Image is a File which contains image data. It supports
    various conversions of format, size, resolution through
@@ -322,11 +324,36 @@ class Image(File, OFSImage):
    return mime_type, result
  # Conversion API
+  security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+  def getAllowedTargetItemList(self):
+    import commands
+    import re
+    import os
+    new_result = []
+    filename = os.path.abspath(self.getSourceReference())
+    result = commands.getstatusoutput('convert -list format %s ' % self.getSourceReference())
+    new_list = re.split('\n',result[1])
+    allowed = []
+    for new_str in new_list:
+      test_str = new_str.lstrip()
+      pattern = re.compile(r'''([A-z]+[*]?\s+[A-z]+\s+[rw+-]+\s+[A-z]+\s+[A-z]+\D+[A-z]+)''',re.VERBOSE)
+      if re.match(pattern,test_str):
+        new_result.append(test_str)
+    len_new_result = len(new_result)
+    for i in range(0,len_new_result):
+      allowed.append(list((new_result[i].split()[1].lower(),' '.join(new_result[i].split()[3:])))) 
+    return [(y, x) for x, y in allowed]
  security.declareProtected(Permissions.AccessContentsInformation, 'convert')
  def convert(self, format, display=None, quality=75, resolution=None, frame=None, **kw):
    """
    Implementation of conversion for Image files
    """
+    # Raise an error if the format is not permitted
+    if not self.isTargetFormatPermitted(format):
+      raise Unauthorized("User does not have enough permission to access document"
+				     " in %s format" % (format or 'original'))
    if format in ('text', 'txt', 'html', 'base_html', 'stripped-html'):
      try:
        return self.getConversion(format=format)

--- a/product/ERP5/Document/PDFDocument.py
+++ b/product/ERP5/Document/PDFDocument.py
@@ -37,7 +37,13 @@ from Products.ERP5.Document.Image import Image
 from Products.ERP5.Document.Document import ConversionError
 from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
-class PDFDocument(Image, CachedConvertableMixin):
+from zLOG import LOG, WARNING
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
+class PDFDocument(Image, ConvertableMixin, CachedConvertableMixin):
  """
  PDFDocument is a subclass of Image which is able to
  extract text content from a PDF file either as text
@@ -98,6 +104,11 @@ class PDFDocument(Image, CachedConvertableMixin):
                            resolution=resolution, frame=frame)
  # Conversion API
+  security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+  def getAllowedTargetItemList(self):
+    return Image.getAllowedTargetItemList(self) + \
+       [('Text', 'txt'),('Plain Text','text'), ('HTML Document', 'html')]
  security.declareProtected(Permissions.AccessContentsInformation, 'convert')
  def convert(self, format, **kw):
    """

--- a/product/ERP5/Document/TextDocument.py
+++ b/product/ERP5/Document/TextDocument.py
@@ -43,9 +43,12 @@ try:
 except ImportError:
  from Products.ERP5Type.patches.string import Template
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
 DEFAULT_TEXT_FORMAT = 'text/html'
-class TextDocument(Document, TextContent):
+class TextDocument(Document, TextContent, ConvertableMixin):
    """
        A Document contains text which can be formatted using
        *Structured Text* or *HTML*. Text can be automatically translated
@@ -146,6 +149,10 @@ class TextDocument(Document, TextContent):
      if format is None:
        # The default is to use ERP5 Forms to render the page
        return self.view()
+      # Raise an error if the format is not permitted
+      if not self.isTargetFormatPermitted(format):
+	raise Unauthorized("User does not have enough permission to access document"
+				" in %s format" % (format or 'original'))
      mime, data = self.convert(format=format) 
      RESPONSE.setHeader('Content-Length', len(str(data))) # XXX - Not efficient 
                                                           # if datastream instance
@@ -201,11 +208,24 @@ class TextDocument(Document, TextContent):
      return self._substituteTextContent(subject, safe_substitute=safe_substitute,
                                         **substitution_method_parameter_dict)
+    security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+    def getAllowedTargetItemList(self):
+      mime_type = getToolByName(self, 'mimetypes_registry')
+      allowed=[]
+      for extension in mime_type.extensions:
+        allowed.append((mime_type.extensions[extension].name(),extension))
+      return [(y, x) for x, y in allowed]
    security.declareProtected(Permissions.AccessContentsInformation, 'convert')
    def convert(self, format, substitution_method_parameter_dict=None, safe_substitute=True, **kw):
      """
        Convert text using portal_transforms or oood
      """
+      # Raise an error if the format is not permitted
+      if not self.isTargetFormatPermitted(format):
+	raise Unauthorized("User does not have enough permission to access document"
+					 " in %s format" % (format or 'original'))
      # Accelerate rendering in Web mode
      _setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
      # Return the raw content