TextDocument.py 10.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
##############################################################################
#
# Copyright (c) 2002 Nexedi SARL and Contributors. All Rights Reserved.
#                    Jean-Paul Smets-Solanes <jp@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

from AccessControl import ClassSecurityInfo
30
from Products.ERP5Type.Base import WorkflowMethod
31
from Products.CMFCore.utils import getToolByName
32
from Products.CMFCore.utils import _setCacheHeaders, _ViewEmulator
33 34 35
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5.Document.Document import Document
from Products.ERP5Type.WebDAVSupport import TextContent
Ivan Tyagov's avatar
Ivan Tyagov committed
36
from Products.CMFDefault.utils import isHTMLSafe
Jean-Paul Smets's avatar
Jean-Paul Smets committed
37 38
import re

39 40
DEFAULT_TEXT_FORMAT = 'text/html'

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
class TextDocument(Document, TextContent):
    """
        A Document contains text which can be formatted using
        *Structured Text* or *HTML*. Text can be automatically translated
        through the use of 'message catalogs'.

        Document inherits from XMLObject and can
        be synchronized accross multiple sites.

        Version Management: the notion of version depends on the
        type of application. For example, in the case (1) of Transformation
        (BOM), all versions are considered as equal and may be kept
        indefinitely for both archive and usage purpose. In the case (2)
        of Person data, the new version replaces the previous one
        in place and is not needed for archive. In the case (3) of
        a web page, the new version replaces the previous one,
        the previous one being kept in place for archive.

        Subcontent: documents may include subcontent (files, images, etc.)
        so that publication of rich content can be path independent.
    """

    meta_type = 'ERP5 Text Document'
    portal_type = 'Text Document'
    add_permission = Permissions.AddPortalContent
    isPortalContent = 1
    isRADContent = 1
68
    isDocument = 1
69 70 71 72 73 74 75 76 77 78 79 80

    # Declarative security
    security = ClassSecurityInfo()
    security.declareObjectProtected(Permissions.AccessContentsInformation)

    # Declarative properties
    property_sheets = ( PropertySheet.Base
                      , PropertySheet.XMLObject
                      , PropertySheet.CategoryCore
                      , PropertySheet.DublinCore
                      , PropertySheet.Version
                      , PropertySheet.Document
Jean-Paul Smets's avatar
Jean-Paul Smets committed
81 82 83
                      , PropertySheet.Snapshot
                      , PropertySheet.ExternalDocument
                      , PropertySheet.Url
84 85 86 87 88 89
                      , PropertySheet.TextDocument
                      )

    # Declarative interfaces
    __implements__ = ()

90 91 92 93 94 95 96
    # Explicit inheritance
    security.declareProtected(Permissions.ModifyPortalContent, 'PUT')
    PUT = TextContent.PUT # We have a security issue here with Zope < 2.8

    security.declareProtected(Permissions.View, 'manage_FTPget')
    manage_FTPget = TextContent.manage_FTPget

97 98 99 100 101 102 103 104 105 106 107 108 109
    # File handling
    security.declarePrivate( '_edit' )
    def _edit(self, **kw):
      """\
        This is used to edit files which contain HTML content.
      """
      if kw.has_key('file'):
        file = kw.get('file')
        text_content = file.read()
        headers, body, format = self.handleText(text=text_content)
        kw.setdefault('text_format', format)
        kw.setdefault('text_content', text_content)
        del kw['file']
110 111 112 113 114 115 116 117 118 119 120 121
      # The following has been commented because a TextDocument
      # instance may contain something else than HTML
      ## Check if it's safe to save HTML content
      ## By default FCKEditor used to edit Web Pages wouldn't allow inserting
      ## HTML tags (will replace them accordingly) so this is the last possible 
      ## step where we can check if any other scripts wouldn't try to set manually
      ## bad HTML content.
      # if isHTMLSafe(kw.get('text_content', '')):
      #  Document._edit(self, **kw)
      # else:
      #  raise ValueError, "HTML contains illegal tags."
      Document._edit(self, **kw)
122 123 124 125

    security.declareProtected( Permissions.ModifyPortalContent, 'edit' )
    edit = WorkflowMethod( _edit )
    
126 127 128 129 130 131 132 133 134 135 136 137 138 139
    # Default Display
    security.declareProtected(Permissions.View, 'index_html')
    def index_html(self, REQUEST, RESPONSE, format=None, **kw):
      """
        Unlike for images and files, we want to provide
        in the case of HTML a nice standard display with
        all the layout of a Web Site. If no format is provided,
        the default rendering will use the standard ERP5 machinery.
        By providing a format parameter, it is possible to
        convert the text content into various formats.
      """
      if format is None:
        # The default is to use ERP5 Forms to render the page
        return self.view()
140
      mime, data = self.convert(format=format) 
141 142
      RESPONSE.setHeader('Content-Length', len(str(data))) # XXX - Not efficient 
                                                           # if datastream instance
143 144 145
      RESPONSE.setHeader('Content-Type', mime)
      RESPONSE.setHeader('Accept-Ranges', 'bytes')
      return data
Jean-Paul Smets's avatar
Jean-Paul Smets committed
146 147 148 149

    security.declareProtected(Permissions.View, 'convert')
    def convert(self, format, **kw):
      """
150
        Convert text using portal_transforms or oood
Jean-Paul Smets's avatar
Jean-Paul Smets committed
151
      """
Jean-Paul Smets's avatar
Jean-Paul Smets committed
152
      # Accelerate rendering in Web mode
153
      _setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
154 155
      # Return the raw content
      if format == 'raw':
156
        return 'text/plain', self.getTextContent()
157
      mime_type = getToolByName(self, 'mimetypes_registry').lookupExtension('name.%s' % format)
158
      src_mimetype = self.getTextFormat(DEFAULT_TEXT_FORMAT)
159 160
      if not src_mimetype.startswith('text/'):
        src_mimetype = 'text/%s' % src_mimetype
161 162 163
      # check if document has set text_content and convert if necessary
      text_content = self.getTextContent()
      if text_content is not None:
164 165 166 167 168 169 170 171 172 173
        if src_mimetype == 'text/html':
          #Works with oood
          kw['REQUEST'] = self.REQUEST
          return self._asFormat(text_content, format, src_mimetype, **kw)
        else:
          portal_transforms = getToolByName(self, 'portal_transforms')
          return mime_type, portal_transforms.convertTo(mime_type,
                                                        text_content,
                                                        object = self,
                                                        mimetype = src_mimetype)
174 175 176
      else:
        # text_content is not set, return empty string instead of None
        return mime_type, ''
177

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
    def _asFormat(self, ooo, format, src_mimetype, REQUEST=None, batch_mode=0):
      """
        Transform text_content into ODF or many output format supported by oood
      """
      # Create a temp OOoDocument
      from Products.ERP5Type.Document import newTempOOoDocument
      tmp_ooo = newTempOOoDocument(self, self.title_or_id())
      tmp_ooo.edit(base_data=ooo,
                   fname=self.title_or_id(),
                   source_reference=self.title_or_id(),
                   base_content_type=src_mimetype,)
      tmp_ooo.oo_data = ooo
      #Convert it into ODT
      tmp_ooo.convert('odt')
      if format == 'pdf' and not batch_mode:
        # Slightly different implementation
        # now convert it to pdf
        tgts = [x[1] for x in tmp_ooo.getTargetFormatItemList()
                if x[1].endswith('pdf')]
        if len(tgts) > 1:
          REQUEST.RESPONSE.setHeader('Content-type', 'text/html')
          REQUEST.RESPONSE.setHeader('Content-disposition', 'inline;filename=%s.pdf' % self.title_or_id())
          raise ValueError, 'multiple pdf formats found - this shouldnt happen'
        if len(tgts) == 0:
          REQUEST.RESPONSE.setHeader('Content-type', 'text/html')
          REQUEST.RESPONSE.setHeader('Content-disposition', 'inline;filename=%s.pdf' % self.title_or_id())
          raise ValueError, 'no pdf format found'
        fmt = tgts[0]
        #Apply transformation in output format
        mime, data = tmp_ooo.convert(fmt)
        if REQUEST is not None:
            REQUEST.RESPONSE.setHeader('Content-type', 'application/pdf')
            REQUEST.RESPONSE.setHeader('Content-disposition', 'attachment;filename=%s.pdf' % self.title_or_id())
        return data
      #Apply transformation in output format
      mime, data = tmp_ooo.convert(format)
      if REQUEST is not None and not batch_mode:
        REQUEST.RESPONSE.setHeader('Content-type', mime)
        REQUEST.RESPONSE.setHeader('Content-disposition', 'attachment;filename=%s.%s' % (self.title_or_id(),format))
          # FIXME the above lines should return zip format when html was requested
      return data

Jean-Paul Smets's avatar
Jean-Paul Smets committed
220
    def __call__(self):
221
      _setCacheHeaders(_ViewEmulator().__of__(self), {})
Jean-Paul Smets's avatar
Jean-Paul Smets committed
222
      return Document.__call__(self)
223 224 225 226 227 228 229

    security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL')
    def getContentBaseURL(self):
      """
        Returns the content base URL based on the actual content
        (in HTML)
      """
230
      html = self._asHTML()
231 232 233
      base_list = re.findall(self.base_parser, str(html))
      if base_list:
        return base_list[0]
234
      return Document.getContentBaseURL(self)
235

236
    security.declareProtected(Permissions.AccessContentsInformation, 'hasBaseData')
237
    def hasBaseData(self):
238 239 240 241
      """
        A TextDocument store its data in the "text_content" property. Since
        there is no such thing as base_data in TextDocument, having base_data
        is equivalent to having some text_content.
242 243
      """
      return self.hasTextContent()