Code review and refactoring based on Document API.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13631 20353a03-c40f-0410-a6d1-a30d3c3de9de

Code review and refactoring based on Document API.
git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@13631 20353a03-c40f-0410-a6d1-a30d3c3de9de
fb493724 · Jean-Paul Smets · d7067b8b · fb493724 · fb493724 · fb493724
Commit fb493724 authored Mar 26, 2007 by Jean-Paul Smets
4 changed files
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -42,6 +42,9 @@ from Products.ERP5Type.WebDAVSupport import TextContent
 from Products.ERP5Type.Message import Message
 from Products.ERP5Type.Utils import convertToUpperCase, convertToMixedCase
 from Products.ERP5.Document.Url import UrlMixIn
+from Products.ERP5.Tool.ContributionTool import MAX_REPEAT
+from zLOG import LOG
 _MARKER = []
 VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -51,6 +54,47 @@ def makeSortedTuple(kw):
  items.sort()
  return tuple(items)
+class SnapshotMixin:
+  """
+    This class provides a generic API to store in the ZODB
+    PDF snapshots of objects and documents with the
+    goal to keep a facsimile copy of documents as they
+    were at a given date.
+  """
+  # Declarative security
+  security = ClassSecurityInfo()
+  security.declareObjectProtected(Permissions.AccessContentsInformation)
+  security.declareProtected(Permissions.ModifyPortalContent, 'createSnapshot')
+  def createSnapshot(self):
+    """
+      Create a snapshot (PDF). This is the normal way to modifiy
+      snapshot_data. Once a snapshot is taken, a new snapshot
+      can not be taken.
+      NOTE: use getSnapshotData and hasSnapshotData accessors
+      to access a snapshot.
+      NOTE2: implementation of createSnapshot should probably
+      be delegated to a types base method since this it
+      is configuration dependent.
+    """
+    if self.hasSnapshotData():
+      raise ConversionError('This document already has a snapshot.')
+    self._setSnapshotData(self.convert(format='pdf'))
+  security.declareProtected(Permissions.ManagePortal, 'deleteSnapshot')
+  def deleteSnapshot(self):
+    """
+      Deletes the snapshot - in theory this should never be done.
+      It is there for programmers and system administrators.
+    """
+    try:
+      del(self.snapshot_data)
+    except AttributeError:
+      pass
 class ConversionError(Exception):pass
 class ConversionCacheMixin:
@@ -107,8 +151,7 @@ class ConversionCacheMixin:
  security.declareProtected(Permissions.View, 'getCacheTime')
  def getCacheTime(self, **format):
    """
-      Checks when 
+      Checks when if ever was the file produced
-if ever was the file produced
    """
    self.updateConversionCache()
    return self._cached_time.get(makeSortedTuple(format), 0)
@@ -148,7 +191,7 @@ if ever was the file produced
    tformat = makeSortedTuple(format)
    return self._cached_mime.get(tformat, ''), self._cached_data.get(tformat, '')
-  security.declareProtected(Permissions.View, 'getConversionCacheInfo')
+  security.declareProtected(Permissions.ViewManagementScreens, 'getConversionCacheInfo')
  def getConversionCacheInfo(self):
    """
    Get cache details as string (for debugging)
@@ -173,7 +216,7 @@ if ever was the file produced
    return s
-class Document(XMLObject, UrlMixIn):
+class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
  """
      Document is an abstract class with all methods
      related to document management in ERP5. This includes
@@ -239,7 +282,7 @@ class Document(XMLObject, UrlMixIn):
      input      -   data supplied with http request or set on the object during (2) (e.g.
                     discovered from email text)
-      file_name  -    data which might be encoded in file name
+      file_name  -   data which might be encoded in file name
      user_login -   information about user who is contributing the file
      content    -   data which might be derived from document content
@@ -335,6 +378,8 @@ class Document(XMLObject, UrlMixIn):
  # Regular expressions
  href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE)
+  body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL)
+  title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL)
  # Declarative security
  security = ClassSecurityInfo()
@@ -347,25 +392,25 @@ class Document(XMLObject, UrlMixIn):
                    , PropertySheet.DublinCore
                    , PropertySheet.Version
                    , PropertySheet.Document
-                    , PropertySheet.Snapshot
                    , PropertySheet.ExternalDocument
                    , PropertySheet.Url
                    , PropertySheet.Periodicity
+                    , PropertySheet.Snapshot
                    )
  # Declarative interfaces
  __implements__ = ()
-  searchable_property_list = ('title', 'description', 'id', 'reference',
+  searchable_property_list = ('asText', 'title', 'description', 'id', 'reference',
-                              'version', 'short_title', 'keyword',
+                              'version', 'short_title',
-                              'subject', 'source_reference', 'source_project_title')
+                              'subject', 'source_reference', 'source_project_title',)
  data = '' # some day this will be in property sheets
  base_format = 'base storage format'
  ### Content processing methods
  security.declareProtected(Permissions.View, 'index_html')
-  def index_html(self, REQUEST, RESPONSE, format=None, force=0, **kw):
+  def index_html(self, REQUEST, RESPONSE, format=None, **kw):
    """
      We follow here the standard Zope API for files and images
      and extend it to support format conversion. The idea
@@ -382,16 +427,14 @@ class Document(XMLObject, UrlMixIn):
      Should return appropriate format (calling convert
      if necessary) and set headers.
-      format - the format specied in the form of an extension
+      format -- the format specied in the form of an extension
      string (ex. jpeg, html, text, txt, etc.)
-      force - convert doc even if it has a cached version which seems to be up2date
-      **kw can be various things - e.g. resolution
+      **kw -- can be various things - e.g. resolution
      TODO:
      - implement guards API so that conversion to certain
        formats require certain permission
-      - force parameter should be somehow restricted
-        to prevent denial of service attack
    """
    pass
@@ -421,6 +464,7 @@ class Document(XMLObject, UrlMixIn):
      else:
        val = list(val)
      return val
    searchable_text = reduce(add, map(lambda x: getPropertyListOrValue(x),
                                                self.searchable_property_list))
    searchable_text = ' '.join(searchable_text)
@@ -429,23 +473,30 @@ class Document(XMLObject, UrlMixIn):
  # Compatibility with CMF Catalog
  SearchableText = getSearchableText
+  security.declareProtected(Permissions.AccessContentsInformation, 'isExternalDocument')
+  def isExternalDocument(self):
+    """
+    Return true if this document was obtained from an external source
+    """
+    return bool(self.getUrlString())
  ### Relation getters
+  security.declareProtected(Permissions.View, 'getSearchableReferenceList')
  def getSearchableReferenceList(self):
    """
-      Public Method
      This method returns a list of dictionaries which can
      be used to find objects by reference. It uses for
      that a regular expression defined at system level
      preferences.
    """
-    text = self.getSearchableText()
+    text = self.getSearchableText() # XXX getSearchableText or asText ?
    regexp = self.portal_preferences.getPreferredDocumentReferenceRegularExpression()
    try:
      rx_search = re.compile(regexp)
    except TypeError: # no regexp in preference
-      self.log('please set document reference regexp in preferences')
+      LOG('ERP5/Document/Document.getSearchableReferenceList', 0,
-      return []
+          'Document regular expression must be set in portal preferences')
+      return ()
    res = rx_search.finditer(text)
    res = [(r.group(), r.groupdict()) for r in res]
    return res
@@ -785,8 +836,8 @@ class Document(XMLObject, UrlMixIn):
      Based on the document content, find out as many properties as needed.
      returns properties which should be set on the document
    """
-    # XXX this method should first make sure we have text content
+    if not self.hasBaseData():
-    # or do a conversion
+      self.convertToBaseFormat()
    method = self._getTypeBasedMethod('getPropertyDictFromContent',
        fallback_script_id='Document_getPropertyDictFromContent')
    return method()
@@ -821,7 +872,7 @@ class Document(XMLObject, UrlMixIn):
  ### Metadata disovery and ingestion methods
  security.declareProtected(Permissions.ModifyPortalContent, 'discoverMetadata')
-  def discoverMetadata(self, file_name, user_login=None): # XXX Was filename always there ? at least make it optional
+  def discoverMetadata(self, file_name=None, user_login=None):
    """
      This is the main metadata discovery function - controls the process
      of discovering data from various sources. The discovery itself is
@@ -843,88 +894,39 @@ class Document(XMLObject, UrlMixIn):
    order_list.reverse()
    # Start with everything until content - build a dictionary according to the order
-    content_index = order_list.index('content')
    kw = {}
-    first_list = order_list[:content_index]
+    for order_id in order_list:
-    for order_id in first_list:
      if order_id not in VALID_ORDER_KEY_LIST:
        # Prevent security attack or bad preferences
        raise AttributeError, "%s is not in valid order key list" % order_id
      method_id = 'getPropertyDictFrom%s' % convertToUpperCase(order_id)
      method = getattr(self, method_id)
      if order_id == 'file_name':
-        result = method(file_name)
+        if file_name is not None: result = method(file_name)
      elif order_id == 'user_login':
-        result = method(user_login)
+        if user_login is not None: result = method(user_login)
      else:
        result = method()
      if result is not None:
+        # LOG('discoverMetadata %s' % order_id, 0, repr(result))
        kw.update(result)
-    # Edit content
+    # Prepare the content edit parameters - portal_type should not be changed
-    try:
-      del(kw['portal_type'])
-    except KeyError:
-      pass
-    self.edit(**kw)
-    # Finish in second stage
-    self.activate().finishMetadataDiscovery(user_login=user_login)
-  security.declareProtected(Permissions.ModifyPortalContent, 'finishMetadataDiscovery')
-  def finishMetadataDiscovery(self, user_login):
-    """
-      This is called by portal_activities, to leave time-consuming procedures
-      for later. It converts what needs conversion to base, and
-      does things that can be done only after it is converted).
-    """
-    self.convertToBaseFormat()
-    # Get the order from preferences
-    # Preference is made of a sequence of 'user_login', 'content', 'file_name', 'input'
-    method = self._getTypeBasedMethod('getPreferredDocumentMetadataDiscoveryOrderList', 
-        fallback_script_id = 'Document_getPreferredDocumentMetadataDiscoveryOrderList')
-    order_list = list(method())
-    order_list.reverse()
-    # do content and everything after content
-    content_index = order_list.index('content')
-    second_list = order_list[content_index:]
-    kw = {}
-    for order_id in second_list:
-      if order_id not in VALID_ORDER_KEY_LIST:
-        # Prevent security attack or bad preferences
-        raise AttributeError, "%s is not in valid order key list" % order_id
-      method_id = 'getPropertyDictFrom%s' % convertToUpperCase(order_id)
-      method = getattr(self, method_id)
-      if order_id == 'file_name':
-        result = method(self.getSourceReference())
-      elif order_id == 'user_login':
-        result = method(user_login)
-      else:
-        result = method()
-      if result is not None:
-        kw.update(result)
-    # Edit content
    try:
      del(kw['portal_type'])
    except KeyError:
      pass
-    self.edit(**kw)
-    # Erase backup attributes
+    self._edit(**kw) # Try not to invoke an automatic transition here
-    if hasattr(self, '_backup_input'):
+    self.finishIngestion() # Finish ingestion by calling method
-      delattr(self, '_backup_input')
+    self.reindexObject()
-    # Finish ingestion by calling method
-    self.finishIngestion()
  security.declareProtected(Permissions.ModifyPortalContent, 'finishIngestion')
  def finishIngestion(self):
    """
-      Finish the ingestion process by calling the appropriate script
+      Finish the ingestion process by calling the appropriate script. This
+      script can for example allocate a reference number automatically if
+      no reference was defined.
    """
    return self._getTypeBasedMethod('finishIngestion',
        fallback_script_id='Document_finishIngestion')
@@ -939,39 +941,86 @@ class Document(XMLObject, UrlMixIn):
      string (ex. jpeg, html, text, txt, etc.)
      **kw can be various things - e.g. resolution
+      Default implementation returns an empty string (html, text)
+      or raises an error.
      TODO:
      - implement guards API so that conversion to certain
        formats require certain permission
    """
-    pass
+    if format == 'html':
+      return 'text/html', ''
+    if format in ('text', 'txt'):
+      return 'text/plain', ''
+    raise NotImplementedError
  security.declareProtected(Permissions.View, 'asText')
  def asText(self):
    """
      Converts the content of the document to a textual representation.
    """
-    return self.convert(format='txt')
+    mime, data = self.convert(format='txt')
+    return data
  security.declareProtected(Permissions.View, 'asHTML')
  def asHTML(self):
    """
      Returns a complete HTML representation of the document
-      (with body tags, etc.).
+      (with body tags, etc.). Adds if necessary a base
-    """
+      tag so that the document can be displayed in an iframe
-    return self.convert(format='html')
+      or standalone.
+    """
+    if self.hasConversion(format='base-html'):
+      mime, data = self.getConversion(format='base-html')
+      return data
+    mime, html = self.convert(format='html')
+    if self.getUrlString():
+      # If a URL is defined, add the base tag
+      # if base is defined yet.
+      html = str(html)
+      if not html.find('<base') >= 0:
+        base = '<base href="%s">' % self.getContentBaseURL()
+        html = html.replace('<head>', '<head>%s' % base)
+      # We do not implement cache yet since it increases ZODB
+      # for probably no reason. More research needed
+      # self.setConversion(html, mime='text/html', format='base-html')
+    return html
  security.declareProtected(Permissions.View, 'asStrippedHTML')
  def asStrippedHTML(self):
    """
-      Returns a stipped HTML representation of the document
+      Returns a stripped HTML representation of the document
-      (without body tags, etc.) which can be used to inline
+      (without html and body tags, etc.) which can be used to inline
      a preview of the document.
    """
-    return self.convert(format='html')
+    if self.hasConversion(format='stripped-html'):
+      mime, data = self.getConversion(format='stripped-html')
+      return data
+    mime, html = self.convert(format='html')
+    body_list = re.findall(self.body_parser, str(html))
+    if len(body_list):
+      return body_list[0]
+    return html
+  security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
+  def getContentInformation(self):
+    """
+    Returns the content information from the HTML conversion.
+    The default implementation tries to build a dictionnary
+    from the HTML conversion of the document and extract
+    the document title.
+    """
+    result = {}
+    html = self.asHTML()
+    if not html: return result
+    title_list = re.findall(self.title_parser, str(html))
+    if title_list:
+      result['title'] = title_list[0]
+    return result
  # Base format support
  security.declareProtected(Permissions.ModifyPortalContent, 'convertToBaseFormat')
-  def convertToBaseFormat(self, REQUEST=None):
+  def convertToBaseFormat(self):
    """
      Converts the content of the document to a base format
      which is later used for all conversions. This method
@@ -990,7 +1039,8 @@ class Document(XMLObject, UrlMixIn):
      Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
    """
    try:
-      msg = self._convertToBaseFormat()
+      msg = self._convertToBaseFormat() # Call implemetation method
+      self.clearConversionCache() # Conversion cache is now invalid
      if msg is None:
        msg = 'Converted to %s.' % self.base_format
      self.convertFile(comment=msg) # Invoke workflow method
@@ -1025,36 +1075,6 @@ class Document(XMLObject, UrlMixIn):
    """
    raise NotImplementedError
-  # Snapshot methods - XXX since this can be useful beyond
-  # documents, it should be moved to MixIn class which may
-  # be used, for example, to take a snapshot of an invoice.
-  security.declareProtected(Permissions.ModifyPortalContent, 'createSnapshot')
-  def createSnapshot(self):
-    """
-      Create a snapshot (PDF). This is the normal way to modifiy
-      snapshot_data. Once a snapshot is taken, a new snapshot
-      can not be taken.
-      NOTE: use getSnapshotData and hasSnapshotData accessors
-      to access a snapshot.
-      NOTE2: implementation of createSnapshot should probably
-      be delegated to a types base method since this it
-      is configuration dependent.
-    """
-    pass
-  security.declareProtected(Permissions.ManagePortal, 'deleteSnapshot')
-  def deleteSnapshot(self):
-    """
-      Deletes the snapshot - in theory this should never be done.
-      It is there for programmers and system administrators.
-    """
-    try:
-      del(self.snapshot_data)
-    except AttributeError:
-      pass
  # Transformation API
  security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
  def populateContent(self):
@@ -1091,12 +1111,12 @@ class Document(XMLObject, UrlMixIn):
    return re.findall(self.href_parser, str(html_content))
  security.declareProtected(Permissions.ModifyPortalContent, 'updateContentFromURL')
-  def updateContentFromURL(self):
+  def updateContentFromURL(self, repeat=MAX_REPEAT, crawling_depth=0):
    """
      Download and update content of this document from its source URL.
      Implementation is handled by ContributionTool.
    """
-    self.portal_contributions.updateContentFromURL(self)
+    self.portal_contributions.updateContentFromURL(self, repeat=repeat, crawling_depth=crawling_depth)
  security.declareProtected(Permissions.ModifyPortalContent, 'crawlContent')
  def crawlContent(self):
@@ -1111,7 +1131,7 @@ class Document(XMLObject, UrlMixIn):
      Returns the content base URL based on the actual content or
      on its URL.
    """
-    # XXX TODO - try to retrive base URL from content
+    # XXX TODO - try to retrieve base URL from content
    # If no base_url defined, define the base URL from our URL
    base_url = self.asURL()
    base_url_list = base_url.split('/')
@@ -1123,7 +1143,7 @@ class Document(XMLObject, UrlMixIn):
  def getNextAlarmDate(self):
    """
    This method is only there to have something to test.
-    Serious refactory of Alarm, Periodicity and CalendarPeriod
+    Serious refactoring of Alarm, Periodicity and CalendarPeriod
    classes is needed.
    """
-    return DateTime() + .01
+    return DateTime() + .1
--- a/product/ERP5/Document/File.py
+++ b/product/ERP5/Document/File.py
@@ -42,18 +42,6 @@ from DateTime import DateTime
 mimetypes.init()
-rs=[]
-rs.append(re.compile('<HEAD>.*</HEAD>',re.DOTALL|re.MULTILINE|re.IGNORECASE))
-rs.append(re.compile('<!DOCTYPE[^>]*>'))
-rs.append(re.compile('<.?(HTML|BODY)[^>]*>',re.DOTALL|re.MULTILINE|re.IGNORECASE))
-def stripHtml(txt): # XXX-JPS to be moved to TextDocument
-  for r in rs:
-    txt=r.sub('',txt)
-  return txt
 class File(Document, CMFFile, ConversionCacheMixin):
  """
      A File can contain raw data which can be uploaded and downloaded.
@@ -86,15 +74,18 @@ class File(Document, CMFFile, ConversionCacheMixin):
  # Default Properties
  property_sheets = ( PropertySheet.Base
-    , PropertySheet.CategoryCore
+                    , PropertySheet.XMLObject
-    , PropertySheet.DublinCore
+                    , PropertySheet.CategoryCore
-    , PropertySheet.Version
+                    , PropertySheet.DublinCore
-    , PropertySheet.Reference
+                    , PropertySheet.Version
-    , PropertySheet.Document
+                    , PropertySheet.Reference
-    , PropertySheet.Data
+                    , PropertySheet.Document
+                    , PropertySheet.Data
+                    , PropertySheet.ExternalDocument
+                    , PropertySheet.Url
+                    , PropertySheet.Periodicity
    )
  # Declarative interfaces
  #__implements__ = ( , )
@@ -152,7 +143,7 @@ class File(Document, CMFFile, ConversionCacheMixin):
    self.reindexObject()
  security.declarePrivate('_unpackData')
-  def _unpackData(self,data):
+  def _unpackData(self, data):
    """
    Unpack Pdata into string
    """
@@ -178,11 +169,9 @@ class File(Document, CMFFile, ConversionCacheMixin):
    return content_type
  security.declareProtected(Permissions.ModifyPortalContent,'PUT')
-  def PUT(self,REQUEST,RESPONSE):
+  def PUT(self, REQUEST, RESPONSE):
    self.clearConversionCache()
-    CMFFile.PUT(self,REQUEST,RESPONSE)
+    CMFFile.PUT(self, REQUEST, RESPONSE)
-    self.discoverMetadata(fname=self.getId())
  # DAV Support
  index_html = CMFFile.index_html 
@@ -191,5 +180,3 @@ class File(Document, CMFFile, ConversionCacheMixin):
  manage_FTPget = CMFFile.manage_FTPget
  manage_FTPlist = CMFFile.manage_FTPlist
  manage_FTPstat = CMFFile.manage_FTPstat
-# vim: syntax=python shiftwidth=2 
--- a/product/ERP5/Document/Image.py
+++ b/product/ERP5/Document/Image.py
@@ -94,12 +94,16 @@ class Image(File, OFSImage):
  # Default Properties
  property_sheets = ( PropertySheet.Base
+                    , PropertySheet.XMLObject
                    , PropertySheet.CategoryCore
                    , PropertySheet.DublinCore
                    , PropertySheet.Version
                    , PropertySheet.Reference
                    , PropertySheet.Document
                    , PropertySheet.Data
+                    , PropertySheet.ExternalDocument
+                    , PropertySheet.Url
+                    , PropertySheet.Periodicity
                    )
  #
@@ -188,7 +192,7 @@ class Image(File, OFSImage):
          if not self.hasConversion(display=display, format=format,
                                    quality=quality, resolution=resolution):
              # Generate photo on-the-fly
-              self._makeDisplayPhoto(display, 1, format=format, quality=quality, resolution=resolution)
+              self._makeDisplayPhoto(display, format=format, quality=quality, resolution=resolution)
          mime, image = self.getConversion(display=display, format=format,
                                     quality=quality ,resolution=resolution)
          width, height = (image.width, image.height)
@@ -290,10 +294,11 @@ class Image(File, OFSImage):
          if not self.hasConversion(display=display, format=format,
                                    quality=quality,resolution=resolution):
              # Generate photo on-the-fly
-              self._makeDisplayPhoto(display, 1, format=format, quality=quality,resolution=resolution)
+              self._makeDisplayPhoto(display, format=format, quality=quality,resolution=resolution)
          # Return resized image
          mime, image = self.getConversion(display=display, format=format,
                                     quality=quality ,resolution=resolution)
+          RESPONSE.setHeader('Content-Type', mime)
          return image.index_html(REQUEST, RESPONSE)
      # Return original image
@@ -307,7 +312,6 @@ class Image(File, OFSImage):
  def _resize(self, display, width, height, quality=75, format='', resolution=None):
      """Resize and resample photo."""
      newimg = StringIO()
-      os.putenv('TMPDIR', '/tmp') # because if we run zope as root, we have /root/tmp here and convert goes crazy
      if sys.platform == 'win32':
          from win32pipe import popen2
@@ -324,8 +328,9 @@ class Image(File, OFSImage):
            imgout, imgin = popen2('convert -quality %s -geometry %sx%s - -'
                                  % (quality, width, height))
          else:
-            LOG('Resolution',0,str(resolution))
+            # LOG('Resolution',0,str(resolution))
-            cmd = 'convert -density %sx%s -quality %s -geometry %sx%s - -' % (resolution, resolution, quality, width, height)
+            cmd = 'convert -density %sx%s -quality %s -geometry %sx%s - -' % (resolution,
+                                                        resolution, quality, width, height)
            imgout, imgin = popen2(cmd)
      imgin.write(str(self.getData()))
@@ -357,9 +362,9 @@ class Image(File, OFSImage):
                                                                 quality=quality,resolution=resolution))
      return image
-  def _makeDisplayPhoto(self, display, force=0, format='', quality=75, resolution=None):
+  def _makeDisplayPhoto(self, display, format='', quality=75, resolution=None):
      """Create given display."""
-      if not self.hasConversion(display=display, format=format, quality=quality,resolution=resolution) or force:
+      if not self.hasConversion(display=display, format=format, quality=quality,resolution=resolution):
          image = self._getDisplayPhoto(display, format=format, quality=quality, resolution=resolution)
          self.setConversion(image,  mime=image.content_type,
                                     display=display, format=format,

--- a/product/ERP5/Document/TextDocument.py
+++ b/product/ERP5/Document/TextDocument.py
@@ -125,7 +125,11 @@ class TextDocument(Document, TextContent):
      if format is None:
        # The default is to use ERP5 Forms to render the page
        return self.view()
-      return self.convert(format=format)
+      mime, data = self.convert(format=format) 
+      RESPONSE.setHeader('Content-Length', len(data))
+      RESPONSE.setHeader('Content-Type', mime)
+      RESPONSE.setHeader('Accept-Ranges', 'bytes')
+      return data
    security.declareProtected(Permissions.View, 'convert')
    def convert(self, format, **kw):
@@ -136,30 +140,14 @@ class TextDocument(Document, TextContent):
      _setCacheHeaders(self, {'format' : format})
      # Return the raw content
      if format == 'raw':
-        return self.getTextContent()
+        return 'text/plain', self.getTextContent()
      mime_type = getToolByName(self, 'mimetypes_registry').lookupExtension('name.%s' % format)
      src_mimetype = self.getTextFormat()
      if not src_mimetype.startswith('text/'):
        src_mimetype = 'text/%s' % src_mimetype
-      return getToolByName(self, 'portal_transforms').convertTo(mime_type,
+      return mime_type, getToolByName(self, 'portal_transforms').convertTo(mime_type,
                           self.getTextContent(), object=self, mimetype=src_mimetype)
    def __call__(self):
      _setCacheHeaders(self, {})
      return Document.__call__(self)
-    ### Content indexing methods
-    security.declareProtected(Permissions.View, 'getSearchableText')
-    def getSearchableText(self, md=None):
-        """\
-        Used by the catalog for basic full text indexing
-        We should try to do some kind of file conversion here so that getTextContent
-        returns something more readable.
-        """
-        searchable_text = "%s %s %s %s %s" %  (self.getTitle(), self.getShortTitle(),
-                                               self.getDescription(),
-                                               self.getId(), self.getTextContent())
-        return searchable_text
-    # Compatibility with CMF Catalog / CPS sites
-    SearchableText = getSearchableText # XXX-JPS - Here wa have a security issue - ask seb what to do