OOoDocument.py 17.7 KB
Newer Older
Bartek Górny's avatar
Bartek Górny committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
##############################################################################
#
# Copyright (c) 2002-2006 Nexedi SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

28
import xmlrpclib
Jean-Paul Smets's avatar
Jean-Paul Smets committed
29
from xmlrpclib import Fault
30 31 32 33
import base64
import re
import zipfile
import cStringIO
Jean-Paul Smets's avatar
Jean-Paul Smets committed
34
import socket
35 36
from DateTime import DateTime

Bartek Górny's avatar
Bartek Górny committed
37 38 39 40 41 42 43
from AccessControl import ClassSecurityInfo
from OFS.Image import Pdata
from Products.CMFCore.utils import getToolByName
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.Message import Message
from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5Type.XMLObject import XMLObject
44 45
from Products.ERP5.Document.File import File, stripHtml
from Products.ERP5.Document.Document import ConversionCacheMixin
46
from Products.CMFCore.utils import getToolByName
47
from Products.DCWorkflow.DCWorkflow import ValidationFailed
Bartek Górny's avatar
Bartek Górny committed
48 49 50 51

enc=base64.encodestring
dec=base64.decodestring

52 53 54
_MARKER = []


55
class ConversionError(Exception):pass
Bartek Górny's avatar
Bartek Górny committed
56

57

58
class OOoDocument(File, ConversionCacheMixin):
Bartek Górny's avatar
Bartek Górny committed
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
  """
    A file document able to convert OOo compatible files to
    any OOo supported format, to capture metadata and to
    update metadata in OOo documents.

    This class can be used:

    - to create an OOo document database with powerful indexing (r/o)
      and metadata handling (r/w) features (ex. change title in ERP5 ->
      title is changed in OOo document)

    - to massively convert MS Office documents to OOo format

    - to easily keep snapshots (in PDF and/or OOo format) of OOo documents
      generated from OOo templates

    This class may be used in the future:

    - to create editable OOo templates (ex. by adding tags in WYSIWYG mode
      and using tags to make document dynamic - ask kevin for more info)

    - to automatically sign / encrypt OOo documents based on user

    - to automatically sign / encrypt PDF generated from OOo documents based on user

    This class should not be used:

    - to store files in formats not supported by OOo

    - to stored pure images (use Image for that)

    - as a general file conversion system (use portal_transforms for that)
Jean-Paul Smets's avatar
Jean-Paul Smets committed
91 92 93

    TODO:
    - better permissions
Bartek Górny's avatar
Bartek Górny committed
94 95 96 97 98 99 100 101
  """
  # CMF Type Definition
  meta_type = 'ERP5 OOo Document'
  portal_type = 'OOo Document'
  isPortalContent = 1
  isRADContent = 1

  # Global variables
102 103
  snapshot = None
  oo_data = None
Bartek Górny's avatar
Bartek Górny committed
104 105 106 107 108 109 110 111 112 113 114

  # Declarative security
  security = ClassSecurityInfo()
  security.declareObjectProtected(Permissions.AccessContentsInformation)

  # Default Properties
  property_sheets = ( PropertySheet.Base
                    , PropertySheet.CategoryCore
                    , PropertySheet.DublinCore
                    , PropertySheet.Version
                    , PropertySheet.Reference
115
                    , PropertySheet.TextDocument
116
                    , PropertySheet.Document
Bartek Górny's avatar
Bartek Górny committed
117 118
                    )

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
  _properties =  (
   # XXX-JPS mime_type should be guessed is possible for the stored file
   # In any case, it should be named differently because the name
   # is too unclear. Moreover, the usefulness of this property is
   # doubtful besides download of converted file. It would be acceptable
   # for me that this property is stored as an internal property
   # or, better, in the conversion workflow attributes.
   #
   # Properties are meant for "orginal document" information,
   # not for calculated attributes.
      { 'id'          : 'mime_type',
        'description' : 'mime type of the converted OOo file stored',
        'type'        : 'string',
        'mode'        : ''},
  )

135 136
  base_format = 'Open Document Format'

137
  # regexps for stripping xml from docs
138 139
  rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE)
  rx_compr = re.compile('\s+')
140

141
  searchable_property_list = File.searchable_property_list + ('text_content', ) # XXX - good idea - should'n this be made more general ?
142

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  def index_html(self, REQUEST, RESPONSE, format=None, force=0):
    """
      Standard function - gets converted version (from cache or new)
      sets headers and returns converted data.

      Format can be only one string (because we are OOoDocument and do not
      accept more formatting arguments).

      Force can force conversion.
    """
    self.log(format, force)
    if (not self.hasOOFile()) or force:
      self.convertToBase()
    if format is None:
      result = self.getOOFile()
      mime = self.getMimeType()
      self.log(mime)
    else:
      try:
        mime, result = self.convert(format=format, force=force)
      except ConversionError, e:
        raise # should we do something here?
    #RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime)) XXX to be implemented
    RESPONSE.setHeader('Content-Type', mime)
    #RESPONSE.setHeader('Content-Length', self.size) XXX to be implemented
    RESPONSE.setHeader('Accept-Ranges', 'bytes')
    # XXX here we should find out extension for this mime type and append to filename
    RESPONSE.setBase(None)
    return result

173
  def _getServerCoordinate(self):
Bartek Górny's avatar
Bartek Górny committed
174
    """
Aurel's avatar
Aurel committed
175
      Returns OOo conversion server data from
176
      preferences
Bartek Górny's avatar
Bartek Górny committed
177
    """
178
    pref = getToolByName(self, 'portal_preferences')
179 180
    adr = pref.getPreferredOoodocServerAddress()
    nr = pref.getPreferredOoodocServerPortNumber()
181
    if adr is None or nr is None:
Jean-Paul Smets's avatar
Jean-Paul Smets committed
182
      raise ConversionError('You should set conversion server coordinates in preferences')
183
    return adr, nr
Bartek Górny's avatar
Bartek Górny committed
184 185

  def _mkProxy(self):
186
    sp=xmlrpclib.ServerProxy('http://%s:%d' % self._getServerCoordinate(), allow_none=True)
Bartek Górny's avatar
Bartek Górny committed
187 188
    return sp

189
  def returnMessage(self, msg, code=0):
Bartek Górny's avatar
Bartek Górny committed
190
    """
191 192
      code > 0 indicates a problem
      we distinguish data return from message by checking if it is a tuple
Jean-Paul Smets's avatar
Jean-Paul Smets committed
193 194 195

      XXX - This is an error. UI translation is the responsability
      of skins (scripts of page templates).
Bartek Górny's avatar
Bartek Górny committed
196
    """
197 198
    m = Message(domain='ui', message=msg)
    return (code, m)
Bartek Górny's avatar
Bartek Górny committed
199 200 201 202 203 204 205 206 207 208 209

  security.declareProtected(Permissions.AccessContentsInformation,'getTargetFormatList')
  def getTargetFormatItemList(self):
    """
      Returns a list of acceptable formats for conversion
      in the form of tuples (for listfield in ERP5Form)

      XXX - to be implemented better (with extended API to conversion server)
      XXX - what does this mean? I don't understand
    """
    # Caching method implementation
210
    def cached_getTargetFormatItemList(content_type):
Bartek Górny's avatar
Bartek Górny committed
211
      sp=self._mkProxy()
212
      allowed=sp.getAllowedTargets(content_type)
213
      return [[y,x] for x,y in allowed] # have to reverse tuple order
Bartek Górny's avatar
Bartek Górny committed
214 215

    cached_getTargetFormatItemList = CachingMethod(cached_getTargetFormatItemList,
Aurel's avatar
Aurel committed
216
                                        id = "OOoDocument_getTargetFormatItemList",
Aurel's avatar
Aurel committed
217
                                                   cache_factory='erp5_ui_short')
218
    return cached_getTargetFormatItemList(self.getContentType())
Bartek Górny's avatar
Bartek Górny committed
219

220
  security.declareProtected(Permissions.AccessContentsInformation, 'getTargetFormatList')
Bartek Górny's avatar
Bartek Górny committed
221 222 223 224 225 226
  def getTargetFormatList(self):
    """
      Returns a list of acceptable formats for conversion
    """
    return map(lambda x: x[0], self.getTargetFormatItemList())

227
  security.declareProtected(Permissions.ModifyPortalContent, 'reset')
228
  def reset(self):
229 230
    """
      make the object a non-converted one, as if it was brand new
Jean-Paul Smets's avatar
Jean-Paul Smets committed
231 232

      XXX-JPS more explicit name needed
233
    """
234
    self.clearConversionCache()
235 236
    self.oo_data = None
    m = self.returnMessage('new')
237 238 239
    msg = str(m[1])
    portal_workflow = getToolByName(self, 'portal_workflow')
    portal_workflow.doActionFor(self, 'process', comment=msg)
240

Bartek Górny's avatar
Bartek Górny committed
241 242 243
  security.declareProtected(Permissions.ModifyPortalContent,'isAllowed')
  def isAllowed(self, format):
    """
244 245
      Checks if the current document can be converted
      into the specified format.
Jean-Paul Smets's avatar
Jean-Paul Smets committed
246 247

      XXX-JPS more explicit name needed
Bartek Górny's avatar
Bartek Górny committed
248
    """
249
    allowed = self.getTargetFormatItemList()
Bartek Górny's avatar
Bartek Górny committed
250
    if allowed is None: return False
251
    return (format in [x[1] for x in allowed])
Bartek Górny's avatar
Bartek Górny committed
252 253

  security.declareProtected(Permissions.ModifyPortalContent,'editMetadata')
254
  def editMetadata(self, newmeta):
Bartek Górny's avatar
Bartek Górny committed
255
    """
256 257 258
      Updates metadata information in the converted OOo document
      based on the values provided by the user. This is implemented
      through the invocation of the conversion server.
Bartek Górny's avatar
Bartek Górny committed
259
    """
260 261 262
    sp = self._mkProxy()
    kw = sp.run_setmetadata(self.getTitle(), enc(self._unpackData(self.oo_data)), newmeta)
    self.oo_data = Pdata(dec(kw['data']))
263
    self._setMetaData(kw['meta'])
Bartek Górny's avatar
Bartek Górny committed
264 265
    return True # XXX why return ? - why not?

266
  security.declarePrivate('_convertToBase')
267
  def _convertToBase(self):
Bartek Górny's avatar
Bartek Górny committed
268
    """
269 270 271
      Converts the original document into ODF
      by invoking the conversion server. Store the result
      on the object. Update metadata information.
Bartek Górny's avatar
Bartek Górny committed
272
    """
273 274 275
    sp = self._mkProxy()
    kw = sp.run_convert(self.getSourceReference(), enc(self._unpackData(self.data)))
    self.oo_data = Pdata(dec(kw['data']))
Aurel's avatar
Aurel committed
276
    # now we get text content
277
    text_data = self.extractTextContent()
278 279
    self.setTextContent(text_data)
    self._setMetaData(kw['meta'])
Bartek Górny's avatar
Bartek Górny committed
280

281 282 283
  security.declareProtected(Permissions.View,'extractTextContent')
  def extractTextContent(self):
    """
284
      extract plain text from ooo docs - the simplest way possible, works for all ODF formats
285
    """
286
    cs = cStringIO.StringIO()
287
    cs.write(self._unpackData(self.oo_data))
288 289 290 291
    z = zipfile.ZipFile(cs)
    s = z.read('content.xml')
    s = self.rx_strip.sub(" ", s) # strip xml
    s = self.rx_compr.sub(" ", s) # compress multiple spaces
292 293 294 295
    cs.close()
    z.close()
    return s

296

Bartek Górny's avatar
Bartek Górny committed
297 298 299
  security.declarePrivate('_setMetaData')
  def _setMetaData(self,meta):
    """
300
      Sets metadata properties of the ERP5 object.
Bartek Górny's avatar
Bartek Górny committed
301

302 303 304
      XXX - please double check that some properties
      are not already defined in the Document class (which is used
      for Web Page in ERP5)
Bartek Górny's avatar
Bartek Górny committed
305

306 307 308 309
      XXX - it would be quite nice if the metadata structure
            could also support user fields in OOo
            (user fields are so useful actually...)
            XXX - I think it does (BG)
Bartek Górny's avatar
Bartek Górny committed
310 311
    """
    for k,v in meta.items():
312 313 314 315
      meta[k] = v.encode('utf-8')
    self.setTitle(meta.get('title', ''))
    self.setSubject(meta.get('keywords', '').split())
    self.setDescription(meta.get('description', ''))
316
    #self.setLanguage(meta.get('language',''))
317
    if meta.get('MIMEType', False):
318
      self.setContentType(meta['MIMEType'])
319
    #self.setReference(meta.get('reference',''))
Bartek Górny's avatar
Bartek Górny committed
320

321
  security.declareProtected(Permissions.View, 'getOOFile')
322
  def getOOFile(self):
Bartek Górny's avatar
Bartek Górny committed
323
    """
324
      Return the converted OOo document.
Bartek Górny's avatar
Bartek Górny committed
325

326 327 328
      XXX - use a propertysheet for this instead. We have a type
            called data in property sheet. Look at File implementation
      XXX - doesn't seem to be there...
Bartek Górny's avatar
Bartek Górny committed
329
    """
330
    data = self.oo_data
Bartek Górny's avatar
Bartek Górny committed
331 332
    return data

333
  security.declareProtected(Permissions.View, 'hasOOFile')
334
  def hasOOFile(self):
Bartek Górny's avatar
Bartek Górny committed
335
    """
336
      Checks whether we have an OOo converted file
Bartek Górny's avatar
Bartek Górny committed
337
    """
338 339 340
    _marker = []
    if getattr(self, 'oo_data',_marker) is not _marker: # XXX - use propertysheet accessors
      return getattr(self, 'oo_data') is not None
Bartek Górny's avatar
Bartek Górny committed
341 342
    return False

343
  security.declareProtected(Permissions.View, 'hasSnapshot')
Bartek Górny's avatar
Bartek Górny committed
344 345
  def hasSnapshot(self):
    """
346
      Checks whether we have a snapshot.
Bartek Górny's avatar
Bartek Górny committed
347
    """
348 349 350
    _marker = []
    if getattr(self, 'snapshot', _marker) is not _marker: # XXX - use propertysheet accessors
      return getattr(self, 'snapshot') is not None
Bartek Górny's avatar
Bartek Górny committed
351 352 353 354 355
    return False

  security.declareProtected(Permissions.ModifyPortalContent,'createSnapshot')
  def createSnapshot(self,REQUEST=None):
    """
356
      Create a PDF snapshot
Bartek Górny's avatar
Bartek Górny committed
357

358 359 360
      XXX - we should not create a snapshot if some error happened at conversion
            is this checked ?
      XXX - error at conversion raises an exception, so it should be ok
Bartek Górny's avatar
Bartek Górny committed
361 362 363
    """
    if self.hasSnapshot():
      if REQUEST is not None:
364
        return self.returnMessage('already has a snapshot', 1)
365
      raise ConversionError('already has a snapshot')
Bartek Górny's avatar
Bartek Górny committed
366
    # making snapshot
367
    # we have to figure out which pdf format to use
368 369 370
    tgts = [x[1] for x in self.getTargetFormatItemList() if x[1].endswith('pdf')]
    if len(tgts) > 1:
      return self.returnMessage('multiple pdf formats found - this shouldnt happen', 2)
371
    if len(tgts)==0:
372
      return self.returnMessage('no pdf format found',1)
373
    fmt = tgts[0]
374
    self.makeFile(fmt)
375
    self.snapshot = Pdata(self._unpackData(self.getConversion(format = fmt)[1]))
Bartek Górny's avatar
Bartek Górny committed
376 377 378
    return self.returnMessage('snapshot created')

  security.declareProtected(Permissions.View,'getSnapshot')
379
  def getSnapshot(self, REQUEST=None):
Bartek Górny's avatar
Bartek Górny committed
380
    """
381
      Returns the snapshot.
Bartek Górny's avatar
Bartek Górny committed
382 383 384
    """
    if not self.hasSnapshot():
      self.createSnapshot()
385
    return self.snapshot
Bartek Górny's avatar
Bartek Górny committed
386 387 388 389

  security.declareProtected(Permissions.ManagePortal,'deleteSnapshot')
  def deleteSnapshot(self):
    """
390
      Deletes the snapshot - in theory this should never be done
Bartek Górny's avatar
Bartek Górny committed
391 392 393 394 395 396
    """
    try:
      del(self.snapshot)
    except AttributeError:
      pass

397
  def getHtmlRepresentation(self):
398 399 400
    """
      get simplified html version to display
    """
401
    # we have to figure out which html format to use
402 403
    tgts = [x[1] for x in self.getTargetFormatItemList() if x[1].startswith('html')]
    if len(tgts) == 0:
404
      return 'no html representation available'
405 406 407
    fmt = tgts[0]
    fmt, data = self.convert(fmt)
    cs = cStringIO.StringIO()
408
    cs.write(self._unpackData(data))
409 410
    z = zipfile.ZipFile(cs)
    h = 'could not extract anything'
411
    for f in z.infolist():
412
      fn = f.filename
413
      if fn.endswith('html'):
414
        h = z.read(fn)
415 416 417
        break
    z.close()
    cs.close()
418
    return stripHtml(h)
419

420 421
  security.declareProtected(Permissions.View, 'convert')
  def convert(self, format, REQUEST=None, force=0):
Bartek Górny's avatar
Bartek Górny committed
422
    """
423 424 425
      Get file in a given format.
      Runs makeFile to make sure we have the requested version cached,
      then returns from cache.
Bartek Górny's avatar
Bartek Górny committed
426
    """
427 428 429
    # first check if we have base
    if not self.hasOOFile():
      self.convertToBase()
Bartek Górny's avatar
Bartek Górny committed
430
    if not self.isAllowed(format):
431 432 433 434
      if REQUEST is not None:
        return self.returnMessage('can not convert to ' + format + ' for some reason',1)
      else:
        raise ConversionError, 'can not convert to ' + format + ' for some reason'
Bartek Górny's avatar
Bartek Górny committed
435
    try:
436 437
      # make if necessary, return from cache
      self.makeFile(format, force)
438 439
      return self.getConversion(format = format)
    except ConversionError,e:
440 441 442
      if REQUEST is not None:
        return self.returnMessage(str(e), 2)
      raise
Bartek Górny's avatar
Bartek Górny committed
443

444 445
  security.declareProtected(Permissions.View, 'isFileChanged')
  def isFileChanged(self, format):
Bartek Górny's avatar
Bartek Górny committed
446
    """
447 448
      Checks whether the file was converted (or uploaded) after last generation of
      the target format
Bartek Górny's avatar
Bartek Górny committed
449
    """
450
    return not self.hasConversion(format=format)
Bartek Górny's avatar
Bartek Górny committed
451

452 453
  security.declareProtected(Permissions.ModifyPortalContent, 'makeFile')
  def makeFile(self, format, force=0, REQUEST=None, **kw):
Bartek Górny's avatar
Bartek Górny committed
454
    """
455 456 457 458 459 460 461
      This method implement the file conversion cache:
        * check if the format is supported
        * check date of last conversion to OOo, compare with date of last
        * if necessary, create new file and cache
        * update file generation time

      Fails silently if we have an up to date version.
Bartek Górny's avatar
Bartek Górny committed
462

463 464
      TODO:
        * support of images in html conversion (as subobjects for example)
Jean-Paul Smets's avatar
Jean-Paul Smets committed
465 466

      XXX-JPS more explicit name needed for method
Bartek Górny's avatar
Bartek Górny committed
467 468
    """
    if not self.isAllowed(format):
469
      errstr = '%s format is not supported' % format
Bartek Górny's avatar
Bartek Górny committed
470
      if REQUEST is not None:
471
        return self.returnMessage(errstr, 2)
472
      raise ConversionError(errstr)
473
    if not self.hasOOFile():
Bartek Górny's avatar
Bartek Górny committed
474
      if REQUEST is not None:
475
        return self.returnMessage('needs conversion', 1)
476
      raise ConversionError('needs conversion')
477
    if self.isFileChanged(format) or force:
Bartek Górny's avatar
Bartek Górny committed
478
      try:
479
        mime, data = self._makeFile(format)
480
        self.setConversion(data, mime, format = format)
481
        #self._p_changed = 1 # XXX not sure it is necessary
482
      except xmlrpclib.Fault, e:
Bartek Górny's avatar
Bartek Górny committed
483
        if REQUEST is not None:
484
          return self.returnMessage('Problem: %s' % str(e), 2)
Bartek Górny's avatar
Bartek Górny committed
485
        else:
486 487
          raise ConversionError(str(e))
      self.updateConversion(format = format)
Bartek Górny's avatar
Bartek Górny committed
488 489 490 491
      if REQUEST is not None:
        return self.returnMessage('%s created' % format)
    else:
      if REQUEST is not None:
492
        return self.returnMessage('%s file is up to date' % format, 1)
Bartek Górny's avatar
Bartek Górny committed
493 494 495 496

  security.declarePrivate('_makeFile')
  def _makeFile(self,format):
    """
497
      Communicates with server to convert a file
Bartek Górny's avatar
Bartek Górny committed
498 499
    """
    # real version:
500 501 502
    sp = self._mkProxy()
    kw = sp.run_generate(self.getSourceReference(), enc(self._unpackData(self.oo_data)), None, format)
    return kw['mime'], Pdata(dec(kw['data']))
Bartek Górny's avatar
Bartek Górny committed
503

504
  # make sure to call the right edit methods
505 506
  _edit = File._edit
  edit = File.edit
507 508 509

  # BG copied from File in case
  security.declareProtected('FTP access', 'manage_FTPget', 'manage_FTPstat', 'manage_FTPlist')
510 511 512
  manage_FTPget = File.manage_FTPget
  manage_FTPlist = File.manage_FTPlist
  manage_FTPstat = File.manage_FTPstat
513 514


Aurel's avatar
Aurel committed
515
# vim: syntax=python shiftwidth=2
Bartek Górny's avatar
Bartek Górny committed
516