OOoDocument.py 23.5 KB
Newer Older
1
# -*- coding: utf-8 -*-
Bartek Górny's avatar
Bartek Górny committed
2 3 4
##############################################################################
#
# Copyright (c) 2002-2006 Nexedi SARL and Contributors. All Rights Reserved.
5
# Copyright (c) 2006-2007 Nexedi SA and Contributors. All Rights Reserved.
Bartek Górny's avatar
Bartek Górny committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

30
import xmlrpclib, base64, re, zipfile, cStringIO
31
from warnings import warn
32
from xmlrpclib import Fault
33 34
from xmlrpclib import Transport
from xmlrpclib import SafeTransport
Bartek Górny's avatar
Bartek Górny committed
35
from AccessControl import ClassSecurityInfo
36
from AccessControl import Unauthorized
Bartek Górny's avatar
Bartek Górny committed
37
from OFS.Image import Pdata
38
from OFS.Image import File as OFSFile
39 40 41 42
try:
    from OFS.content_types import guess_content_type
except ImportError:
    from zope.contenttype import guess_content_type
43 44
from Products.CMFCore.utils import getToolByName, _setCacheHeaders,\
    _ViewEmulator
45
from Products.ERP5Type import Permissions, PropertySheet, Constraint
Bartek Górny's avatar
Bartek Górny committed
46
from Products.ERP5Type.Cache import CachingMethod
47
from Products.ERP5.Document.File import File
48
from Products.ERP5.Document.Document import PermanentURLMixIn
49 50
from Products.ERP5.Document.Document import ConversionError
from Products.ERP5.Document.Document import NotConvertedError
51
from zLOG import LOG, ERROR
52

53 54 55
# Mixin Import
from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin

Bartek Górny's avatar
Bartek Górny committed
56 57 58
enc=base64.encodestring
dec=base64.decodestring

59
_MARKER = []
60
STANDARD_IMAGE_FORMAT_LIST = ('png', 'jpg', 'gif', 'tiff', )
61

62 63 64 65 66 67
class TimeoutTransport(SafeTransport):
  """A xmlrpc transport with configurable timeout.
  """
  def __init__(self, timeout=None, scheme='http'):
    self._timeout = timeout
    self._scheme = scheme
68 69 70 71 72 73 74
    # On Python 2.6, .__init__() of Transport and SafeTransport must be called
    # to set up the ._use_datetime attribute.
    # sigh... too bad we can't use super() here, as SafeTransport is not
    # a new-style class (as of Python 2.4 to 2.6)
    # remove the gettattr below when we drop support for Python 2.4
    super__init__ = getattr(SafeTransport, '__init__', lambda self: None)
    super__init__(self)
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

  def send_content(self, connection, request_body):
    connection.putheader("Content-Type", "text/xml")
    connection.putheader("Content-Length", str(len(request_body)))
    connection.endheaders()
    if self._timeout:
      connection._conn.sock.settimeout(self._timeout)
    if request_body:
      connection.send(request_body)

  def make_connection(self, h):
    if self._scheme == 'http':
      return Transport.make_connection(self, h)
    return SafeTransport.make_connection(self, h)


91
class OOoDocument(PermanentURLMixIn, File, CachedConvertableMixin):
Bartek Górny's avatar
Bartek Górny committed
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
  """
    A file document able to convert OOo compatible files to
    any OOo supported format, to capture metadata and to
    update metadata in OOo documents.

    This class can be used:

    - to create an OOo document database with powerful indexing (r/o)
      and metadata handling (r/w) features (ex. change title in ERP5 ->
      title is changed in OOo document)

    - to massively convert MS Office documents to OOo format

    - to easily keep snapshots (in PDF and/or OOo format) of OOo documents
      generated from OOo templates

    This class may be used in the future:

    - to create editable OOo templates (ex. by adding tags in WYSIWYG mode
      and using tags to make document dynamic - ask kevin for more info)

    - to automatically sign / encrypt OOo documents based on user

    - to automatically sign / encrypt PDF generated from OOo documents based on user

    This class should not be used:

    - to store files in formats not supported by OOo

    - to stored pure images (use Image for that)

    - as a general file conversion system (use portal_transforms for that)
Jean-Paul Smets's avatar
Jean-Paul Smets committed
124 125 126

    TODO:
    - better permissions
Bartek Górny's avatar
Bartek Górny committed
127 128 129 130 131
  """
  # CMF Type Definition
  meta_type = 'ERP5 OOo Document'
  portal_type = 'OOo Document'

132
  searchable_property_list = ('asText', 'title', 'description', 'id', 'reference',
133 134
                              'version', 'short_title',
                              'subject', 'source_reference', 'source_project_title',)
Bartek Górny's avatar
Bartek Górny committed
135 136 137 138 139 140 141

  # Declarative security
  security = ClassSecurityInfo()
  security.declareObjectProtected(Permissions.AccessContentsInformation)

  # Default Properties
  property_sheets = ( PropertySheet.Base
142 143
                    , PropertySheet.XMLObject
                    , PropertySheet.Reference
Bartek Górny's avatar
Bartek Górny committed
144 145 146
                    , PropertySheet.CategoryCore
                    , PropertySheet.DublinCore
                    , PropertySheet.Version
147
                    , PropertySheet.Document
148 149 150 151
                    , PropertySheet.Snapshot
                    , PropertySheet.ExternalDocument
                    , PropertySheet.Url
                    , PropertySheet.Periodicity
152
                    , PropertySheet.SortIndex
Bartek Górny's avatar
Bartek Górny committed
153 154
                    )

155
  # regular expressions for stripping xml from ODF documents
156 157
  rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE)
  rx_compr = re.compile('\s+')
158

159 160
  security.declareProtected(Permissions.AccessContentsInformation,
                            'isSupportBaseDataConversion')
161 162 163 164 165 166
  def isSupportBaseDataConversion(self):
    """
    OOoDocument is needed to conversion to base format.
    """
    return True

167 168 169 170
  def _setFile(self, data, precondition=None):
    File._setFile(self, data, precondition=precondition)
    if self.hasBaseData():
      # This is a hack - XXX - new accessor needed to delete properties
Yusei Tahara's avatar
Yusei Tahara committed
171 172 173 174
      try:
        delattr(self, 'base_data')
      except AttributeError:
        pass
175

176
  security.declareProtected(Permissions.View, 'index_html')
177
  def index_html(self, REQUEST, RESPONSE, format=None, display=None, **kw):
178
    """
179 180 181
      Default renderer with conversion support. Format is
      a string. The list of available formats can be obtained
      by calling getTargetFormatItemList.
182
    """
183
    # Accelerate rendering in Web mode
184
    _setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
185 186 187 188 189 190

    # Verify that the format is acceptable (from permission point of view)
    method = self._getTypeBasedMethod('checkConversionFormatPermission', 
        fallback_script_id = 'Document_checkConversionFormatPermission')
    if not method(format=format):
      raise Unauthorized("OOoDocument: user does not have enough permission to access document"
191
                         " in %s format" % (format or 'original'))
192

193
    # Return the original file by default
194 195 196 197
    if self.getSourceReference() is not None:
      filename = self.getSourceReference()
    else:
      filename = self.getId()
198
    if format is None:
199 200
      RESPONSE.setHeader('Content-Disposition',
                         'attachment; filename="%s"' % filename)
201 202 203
      return File.index_html(self, REQUEST, RESPONSE)
    # Make sure file is converted to base format
    if not self.hasBaseData():
204
      raise NotConvertedError
205
    # Else try to convert the document and return it
206
    mime, result = self.convert(format=format, display=display, **kw)
207
    converted_filename = '%s.%s'%('.'.join(filename.split('.')[:-1]),  format)
208 209
    if not mime:
      mime = getToolByName(self, 'mimetypes_registry').lookupExtension('name.%s' % format)
Kazuhiko Shiozaki's avatar
Kazuhiko Shiozaki committed
210 211 212 213
    RESPONSE.setHeader('Content-Length', len(result))
    RESPONSE.setHeader('Content-Type', mime)
    RESPONSE.setHeader('Accept-Ranges', 'bytes')
    if format not in STANDARD_IMAGE_FORMAT_LIST:
214 215
      RESPONSE.setHeader('Content-Disposition',
                         'attachment; filename="%s"' % converted_filename)
Kazuhiko Shiozaki's avatar
Kazuhiko Shiozaki committed
216
    return result
217

218
  # Format conversion implementation
219
  def _getServerCoordinate(self):
Bartek Górny's avatar
Bartek Górny committed
220
    """
221 222
      Returns the oood conversion server coordinates
      as defined in preferences.
Bartek Górny's avatar
Bartek Górny committed
223
    """
224 225 226
    preference_tool = getToolByName(self, 'portal_preferences')
    address = preference_tool.getPreferredOoodocServerAddress()
    port = preference_tool.getPreferredOoodocServerPortNumber()
227
    if address in ('', None) or port in ('', None) :
228
      raise ConversionError('OOoDocument: can not proceed with conversion:'
229
            ' conversion server host and port is not defined in preferences')
230
    return address, port
Bartek Górny's avatar
Bartek Górny committed
231 232 233

  def _mkProxy(self):
    """
234
      Create an XML-RPC proxy to access the conversion server.
Bartek Górny's avatar
Bartek Górny committed
235
    """
236 237 238 239
    server_proxy = xmlrpclib.ServerProxy(
             'http://%s:%d' % self._getServerCoordinate(),
             allow_none=True,
             transport=TimeoutTransport(timeout=360, scheme='http'))
240
    return server_proxy
Bartek Górny's avatar
Bartek Górny committed
241

242 243
  security.declareProtected(Permissions.AccessContentsInformation,
                            'getTargetFormatItemList')
Bartek Górny's avatar
Bartek Górny committed
244 245 246 247 248
  def getTargetFormatItemList(self):
    """
      Returns a list of acceptable formats for conversion
      in the form of tuples (for listfield in ERP5Form)

249 250
      NOTE: it is the responsability of the conversion server
      to provide an extensive list of conversion formats.
Bartek Górny's avatar
Bartek Górny committed
251
    """
252
    if not self.hasBaseData():
253
      raise NotConvertedError
254

255
    def cached_getTargetFormatItemList(content_type):
256
      server_proxy = self._mkProxy()
257
      try:
258 259 260 261 262 263 264 265 266 267
        allowed_target_item_list = server_proxy.getAllowedTargetItemList(
                                                      content_type)
        try:
          response_code, response_dict, response_message = \
                                             allowed_target_item_list
        except ValueError:
          # Compatibility with older oood where getAllowedTargetItemList only
          # returned response_dict
          response_code, response_dict, response_message = \
                         200, dict(response_data=allowed_target_item_list), ''
268

269 270 271 272 273
        if response_code == 200:
          allowed = response_dict['response_data']
        else:
          # This is very temporary code - XXX needs to be changed
          # so that the system can retry
274
          raise ConversionError("OOoDocument: can not get list of allowed acceptable"
275 276
                                " formats for conversion: %s (%s)" % (
                                      response_code, response_message))
277

278 279 280 281
      except Fault, f:
        allowed = server_proxy.getAllowedTargets(content_type)
        warn('Your oood version is too old, using old method '
            'getAllowedTargets instead of getAllowedTargetList',
282
             DeprecationWarning)
283 284 285

      # tuple order is reversed to be compatible with ERP5 Form
      return [(y, x) for x, y in allowed]
Bartek Górny's avatar
Bartek Górny committed
286

287
    # Cache valid format list
288 289 290 291
    cached_getTargetFormatItemList = CachingMethod(
                                cached_getTargetFormatItemList,
                                id="OOoDocument_getTargetFormatItemList",
                                cache_factory='erp5_ui_medium')
Bartek Górny's avatar
Bartek Górny committed
292

293 294
    return cached_getTargetFormatItemList(self.getBaseContentType())

295 296
  security.declareProtected(Permissions.AccessContentsInformation,
                            'getTargetFormatTitleList')
297
  def getTargetFormatTitleList(self):
Bartek Górny's avatar
Bartek Górny committed
298 299 300 301 302
    """
      Returns a list of acceptable formats for conversion
    """
    return map(lambda x: x[0], self.getTargetFormatItemList())

303 304
  security.declareProtected(Permissions.AccessContentsInformation,
                            'getTargetFormatList')
305
  def getTargetFormatList(self):
Bartek Górny's avatar
Bartek Górny committed
306
    """
307
      Returns a list of acceptable formats for conversion
Bartek Górny's avatar
Bartek Górny committed
308
    """
309
    return map(lambda x: x[1], self.getTargetFormatItemList())
Bartek Górny's avatar
Bartek Górny committed
310

311 312
  security.declareProtected(Permissions.ModifyPortalContent,
                            'isTargetFormatAllowed')
313
  def isTargetFormatAllowed(self, format):
314
    """
315 316 317 318 319 320 321 322 323 324
      Checks if the current document can be converted
      into the specified target format.
    """
    return format in self.getTargetFormatList()

  security.declarePrivate('_convert')
  def _convert(self, format):
    """
      Communicates with server to convert a file 
    """
325
    if not self.hasBaseData():
326
      raise NotConvertedError
327 328 329
    if format == 'text-content':
      # Extract text from the ODF file
      cs = cStringIO.StringIO()
330
      cs.write(str(self.getBaseData()))
331 332 333 334 335 336
      z = zipfile.ZipFile(cs)
      s = z.read('content.xml')
      s = self.rx_strip.sub(" ", s) # strip xml
      s = self.rx_compr.sub(" ", s) # compress multiple spaces
      cs.close()
      z.close()
337
      return 'text/plain', s
338
    server_proxy = self._mkProxy()
339
    orig_format = self.getBaseContentType()
340
    generate_result = server_proxy.run_generate(self.getId(),
341
                                       enc(str(self.getBaseData())),
342
                                       None,
343 344
                                       format,
                                       orig_format)
345 346 347 348 349 350
    try:
      response_code, response_dict, response_message = generate_result
    except ValueError:
      # This is for backward compatibility with older oood version returning
      # only response_dict
      response_dict = generate_result
351

352
    # XXX: handle possible OOOd server failure
353
    return response_dict['mime'], Pdata(dec(response_dict['data']))
354

355
  # Conversion API
356
  security.declareProtected(Permissions.AccessContentsInformation, 'convert')
357
  def convert(self, format, display=None, **kw):
358 359 360 361
    """Convert the document to the given format.

    If a conversion is already stored for this format, it is returned
    directly, otherwise the conversion is stored for the next time.
Bartek Górny's avatar
Bartek Górny committed
362
    """
363 364
    #XXX if document is empty, stop to try to convert.
    #XXX but I don't know what is a appropriate mime-type.(Yusei)
365
    if self.get_size() == 0:
366
      return 'text/plain', ''
367

368 369
    # Make sure we can support html and pdf by default
    is_html = 0
370
    requires_pdf_first = 0
371
    original_format = format
372
    if format == 'base-data':
373 374
      if not self.hasBaseData():
        raise NotConvertedError
375
      return self.getBaseContentType(), str(self.getBaseData())
376
    if format == 'pdf':
377 378
      format_list = [x for x in self.getTargetFormatList()
                                          if x.endswith('pdf')]
379
      format = format_list[0]
380
    elif format in STANDARD_IMAGE_FORMAT_LIST:
381 382
      format_list = [x for x in self.getTargetFormatList()
                                          if x.endswith(format)]
383 384 385 386 387 388 389 390
      if len(format_list):
        format = format_list[0]
      else:
        # We must fist make a PDF
        requires_pdf_first = 1
        format_list = [x for x in self.getTargetFormatList()
                                          if x.endswith('pdf')]
        format = format_list[0]
391
    elif format == 'html':
392 393
      format_list = [x for x in self.getTargetFormatList()
                              if x.startswith('html') or x.endswith('html')]
394 395
      format = format_list[0]
      is_html = 1
396 397
    elif format in ('txt', 'text', 'text-content'):
      format_list = self.getTargetFormatList()
398 399 400 401
      # if possible, we try to get utf8 text. ('enc.txt' will encode to utf8)
      if 'enc.txt' in format_list:
        format = 'enc.txt'
      elif format not in format_list:
402 403 404 405 406 407 408
        #Text conversion is not supported by oood, do it in other way
        if not self.hasConversion(format=original_format):
          #Do real conversion for text
          mime, data = self._convert(format='text-content')
          self.setConversion(data, mime, format=original_format)
          return mime, data
        return self.getConversion(format=original_format)
409 410
    # Raise an error if the format is not supported
    if not self.isTargetFormatAllowed(format):
411
      raise ConversionError("OOoDocument: target format %s is not supported" % format)
412 413
    # Check if we have already a base conversion
    if not self.hasBaseData():
414
      raise NotConvertedError
415
    # Return converted file
416 417 418 419 420 421 422 423 424
    if requires_pdf_first:
      # We should use original_format whenever we wish to
      # display an image version of a document which needs to go
      # through PDF
      if display is None:
        has_format = self.hasConversion(format=original_format)
      else:
        has_format = self.hasConversion(format=original_format, display=display)
    elif display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST:
425
      has_format = self.hasConversion(format=original_format)
426
    else:
427
      has_format = self.hasConversion(format=original_format, display=display)
428
    if not has_format:
429 430 431 432 433 434
      # Do real conversion
      mime, data = self._convert(format)
      if is_html:
        # Extra processing required since
        # we receive a zip file
        cs = cStringIO.StringIO()
435
        cs.write(str(data))
Jean-Paul Smets's avatar
Jean-Paul Smets committed
436
        z = zipfile.ZipFile(cs) # A disk file would be more RAM efficient
437 438 439
        for f in z.infolist():
          fn = f.filename
          if fn.endswith('html'):
440 441 442
            if self.getPortalType() == 'Presentation'\
                  and not (fn.find('impr') >= 0):
              continue
443 444 445
            data = z.read(fn)
            break
        mime = 'text/html'
446
        self._populateConversionCacheWithHTML(zip_file=z) # Maybe some parts should be asynchronous for
447
                                         # better usability
448 449
        z.close()
        cs.close()
450 451
      if (display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST) \
        and not requires_pdf_first:
452
        self.setConversion(data, mime, format=original_format)
453
      else:
Jean-Paul Smets's avatar
Jean-Paul Smets committed
454
        temp_image = self.portal_contributions.newContent(
455 456 457
                                       portal_type='Image',
                                       temp_object=1)
        temp_image._setData(data)
458
        mime, data = temp_image.convert(original_format, display=display)
459 460 461 462 463 464 465
        if requires_pdf_first:
          if display is None:
            self.setConversion(data, mime, format=original_format)
          else:
            self.setConversion(data, mime, format=original_format, display=display)
        else:
          if display is None:
466
            self.setConversion(data, mime, format=original_format)
467
          else:
468
            self.setConversion(data, mime, format=original_format, display=display)
469 470
    if requires_pdf_first:
      format = original_format
471
    if display is None or original_format not in STANDARD_IMAGE_FORMAT_LIST:
472
      return self.getConversion(format=original_format)
473
    else:
474
      return self.getConversion(format=original_format, display=display)
475

476 477 478 479 480 481 482
  security.declareProtected(Permissions.View, 'asTextContent')
  def asTextContent(self):
    """
      Extract plain text from ooo docs by stripping the XML file.
      This is the simplest way, the most universal and it is compatible
      will all formats.
    """
483 484 485 486 487
    if not self.hasConversion(format='txt'):
      mime, data = self._convert(format='text-content')
      self.setConversion(data, mime, format='txt')
      return mime, data
    return self.getConversion(format='txt')
488

489
  security.declareProtected(Permissions.ModifyPortalContent,
490 491
                            '_populateConversionCacheWithHTML')
  def _populateConversionCacheWithHTML(self, zip_file=None):
492 493 494 495 496
    """
    Extract content from the ODF zip file and populate the document.
    Optional parameter zip_file prevents from converting content twice.
    """
    if zip_file is None:
497
      format_list = [x for x in self.getTargetFormatList()
498
                                if x.startswith('html') or x.endswith('html')]
499 500 501
      format = format_list[0]
      mime, data = self._convert(format)
      archive_file = cStringIO.StringIO()
502
      archive_file.write(str(data))
503 504 505 506 507 508
      zip_file = zipfile.ZipFile(archive_file)
      must_close = 1
    else:
      must_close = 0
    for f in zip_file.infolist():
      file_name = f.filename
509 510
      document = self.get(file_name, None)
      if document is not None:
511
        self.manage_delObjects([file_name]) # For compatibility with old implementation
512
      if file_name.endswith('html'):
513 514
        mime = 'text/html'
        data = zip_file.read(file_name)
515
      else:
516 517
        mime = guess_content_type(file_name)[0]
        data = Pdata(zip_file.read(file_name))
518
      self.setConversion(data, mime=mime, format='_embedded', file_name=file_name)
519 520 521 522
    if must_close:
      zip_file.close()
      archive_file.close()

523
  def _getExtensibleContent(self, request, name):
524
    try:
525
      mime, data = self.getConversion(format='_embedded', file_name=name)
526
      return OFSFile(name, name, data, content_type=mime).__of__(self.aq_parent)
527 528
    except KeyError:
      return PermanentURLMixIn._getExtensibleContent(self, request, name)
529

530
  # Base format implementation
531 532 533 534 535 536
  security.declareProtected(Permissions.AccessContentsInformation, 'hasBaseData')
  def hasBaseData(self):
    """
      OOo instances implement conversion to a base format. We should therefore
      use the default accessor.
    """
Jean-Paul Smets's avatar
Typo.  
Jean-Paul Smets committed
537
    return self._baseHasBaseData()
538

539 540
  security.declarePrivate('_convertToBaseFormat')
  def _convertToBaseFormat(self):
Bartek Górny's avatar
Bartek Górny committed
541
    """
542 543 544
      Converts the original document into ODF
      by invoking the conversion server. Store the result
      on the object. Update metadata information.
Bartek Górny's avatar
Bartek Górny committed
545
    """
546
    server_proxy = self._mkProxy()
547 548
    response_code, response_dict, response_message = server_proxy.run_convert(
                                      self.getSourceReference() or self.getId(),
549
                                      enc(str(self.getData())))
550 551 552 553 554 555 556 557
    if response_code == 200:
      # sucessfully converted document
      self._setBaseData(dec(response_dict['data']))
      metadata = response_dict['meta']
      self._base_metadata = metadata
      if metadata.get('MIMEType', None) is not None:
        self._setBaseContentType(metadata['MIMEType'])
    else:
558 559
      # Explicitly raise the exception!
      raise ConversionError(
560 561
                "OOoDocument: Error converting document to base format %s:%s:"
                                       % (response_code, response_message))
Bartek Górny's avatar
Bartek Górny committed
562

563 564
  security.declareProtected(Permissions.AccessContentsInformation,
                            'getContentInformation')
565
  def getContentInformation(self):
Bartek Górny's avatar
Bartek Górny committed
566
    """
567 568
      Returns the metadata extracted by the conversion
      server.
Bartek Górny's avatar
Bartek Górny committed
569
    """
570
    return getattr(self, '_base_metadata', {})
Bartek Górny's avatar
Bartek Górny committed
571

572 573
  security.declareProtected(Permissions.ModifyPortalContent,
                            'updateBaseMetadata')
574
  def updateBaseMetadata(self, **kw):
Bartek Górny's avatar
Bartek Górny committed
575
    """
576 577 578
      Updates metadata information in the converted OOo document
      based on the values provided by the user. This is implemented
      through the invocation of the conversion server.
Bartek Górny's avatar
Bartek Górny committed
579
    """
580 581 582 583
    if not self.hasBaseData():
      raise NotConvertedError

    self.clearConversionCache()
584

585
    server_proxy = self._mkProxy()
586 587
    response_code, response_dict, response_message = \
          server_proxy.run_setmetadata(self.getId(),
588
                                       enc(str(self.getBaseData())),
589
                                       kw)
590 591 592
    if response_code == 200:
      # successful meta data extraction
      self._setBaseData(dec(response_dict['data']))
593
      self.updateFileMetadata() # record in workflow history # XXX must put appropriate comments.
594
    else:
595
      # Explicitly raise the exception!
596
      raise ConversionError("OOoDocument: error getting document metadata %s:%s"
597
                        % (response_code, response_message))