Commit fb9a0d60 authored by Arnaud Fontaine's avatar Arnaud Fontaine

Introduce HTML to PDF Transform through Conversion Server (currently using wkhtmltopdf) (MR !955).

Conversion Server code is no longer bound to OOo, as emphasized by:
  * Renaming of Preference Properties ooodoc_server* to document_conversion_server*.
  * Conversion Server exceptions are already defined in Document.py.
  * Conversion Server also handles video/audio/... conversions.

Thus, refactor the code to connect to Conversion Server by moving it from
Products.ERP5OOo.Document to Products.ERP5.Document.Document (while keeping
backward compatibility):
  * Renamed:
    + OOoServerProxy => DocumentConversionServerProxy
    + OOO_SERVER_PROXY_TIMEOUT => DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT
    + OOO_SERVER_RETRY => DOCUMENT_CONVERSION_SERVER_RETRY
  * Moved:
    + enc
    + dec
    + global_server_proxy_uri_failure_time

Also, Introduced erp5.module.TransformLib (in erp5_core as currently all Transforms
are there even though it should probably not be so) to define DocumentConversionServerTransform
and which will hold libtransforms content when this will be moved to ZODB Components.

Note: Ideally, OOOdCommandTransform should inherit from DocumentConversionServerTransform
but wkhtmltopdf Handler on Cloudooo side is a hack only implemented in Manager.convertFile()
whereas OOOdCommandTransform still uses legacy Manager.run_generate(), so leave it as it is
to avoid breaking things (this will be addressed in a separate MR).

/reviewed-on !955
parent 4c25f638
......@@ -28,7 +28,7 @@
# Cloudooo uses zip= argument, which is also a python builtin
# pylint: disable=redefined-builtin
from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy
from Products.ERP5.Document.Document import DocumentConversionServerProxy
from base64 import b64encode, b64decode
from zExceptions import Unauthorized
......@@ -45,7 +45,7 @@ def convertDocumentByConversionServer(
if REQUEST is not None:
raise Unauthorized
proxy = OOoServerProxy(self)
proxy = DocumentConversionServerProxy(self)
return b64decode(
proxy.convertFile(
b64encode(data),
......
......@@ -28,7 +28,7 @@
##############################################################################
import re
from zLOG import LOG
from zLOG import LOG, WARNING
from AccessControl import ClassSecurityInfo
from Acquisition import aq_base
from Products.ERP5Type.Accessor.Constant import PropertyGetter as ConstantGetter
......@@ -79,6 +79,133 @@ class DocumentProxyError(Exception):pass
class NotConvertedError(Exception):pass
allow_class(NotConvertedError)
import base64
enc = base64.encodestring
dec = base64.decodestring
DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT = 360
DOCUMENT_CONVERSION_SERVER_RETRY = 0
# store time (as int) where we had last failure in order
# to try using proxy server that worked the most recently
global_server_proxy_uri_failure_time = {}
from Products.CMFCore.utils import getToolByName
from functools import partial
from xmlrpclib import Fault, ServerProxy, ProtocolError
from AccessControl import Unauthorized
from Products.ERP5Type.ConnectionPlugin.TimeoutTransport import TimeoutTransport
from socket import error as SocketError
from DateTime import DateTime
class DocumentConversionServerProxy():
"""
xmlrpc-like ServerProxy object adapted for conversion server
"""
def __init__(self, context):
self._serverproxy_list = []
preference_tool = getToolByName(context, 'portal_preferences')
self._ooo_server_retry = (
preference_tool.getPreferredDocumentConversionServerRetry() or
DOCUMENT_CONVERSION_SERVER_RETRY)
uri_list = preference_tool.getPreferredDocumentConversionServerUrlList()
if not uri_list:
address = preference_tool.getPreferredOoodocServerAddress()
port = preference_tool.getPreferredOoodocServerPortNumber()
if not (address and port):
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' conversion server url is not defined in preferences')
LOG('Document', WARNING, 'PreferredOoodocServer{Address,PortNumber}' + \
' are DEPRECATED please use PreferredDocumentServerUrl instead', error=True)
uri_list = ['%s://%s:%s' % ('http', address, port)]
timeout = (preference_tool.getPreferredOoodocServerTimeout() or
DOCUMENT_CONVERSION_SERVER_PROXY_TIMEOUT)
for uri in uri_list:
if uri.startswith("http://"):
scheme = "http"
elif uri.startswith("https://"):
scheme = "https"
else:
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' preferred conversion server url is invalid')
transport = TimeoutTransport(timeout=timeout, scheme=scheme)
self._serverproxy_list.append((uri, ServerProxy(uri, allow_none=True, transport=transport)))
def _proxy_function(self, func_name, *args, **kw):
result_error_set_list = []
protocol_error_list = []
socket_error_list = []
fault_error_list = []
count = 0
serverproxy_list = self._serverproxy_list
# we have list of tuple (uri, ServerProxy()). Sort by uri having oldest failure
serverproxy_list.sort(key=lambda x: global_server_proxy_uri_failure_time.get(x[0], 0))
while True:
retry_server_list = []
for uri, server_proxy in serverproxy_list:
func = getattr(server_proxy, func_name)
failure = True
try:
# Cloudooo return result in (200 or 402, dict(), '') format or just based type
# 402 for error and 200 for ok
result_set = func(*args, **kw)
except SocketError, e:
message = 'Socket Error: %s' % (repr(e) or 'undefined.')
socket_error_list.append(message)
retry_server_list.append((uri, server_proxy))
except ProtocolError, e:
# Network issue
message = "%s: %s %s" % (e.url, e.errcode, e.errmsg)
if e.errcode == -1:
message = "%s: Connection refused" % (e.url)
protocol_error_list.append(message)
retry_server_list.append((uri, server_proxy))
except Fault, e:
# Return not supported data types
fault_error_list.append(e)
else:
failure = False
if not(failure):
try:
response_code, response_dict, response_message = result_set
except ValueError:
# Compatibility for old oood, result is based type, like string
response_code = 200
if response_code == 200:
return result_set
else:
# If error, try next one
result_error_set_list.append(result_set)
# Still there ? this means we had no result,
# avoid using same server again
global_server_proxy_uri_failure_time[uri] = int(DateTime())
# All servers are failed
if count == self._ooo_server_retry or len(retry_server_list) == 0:
break
count += 1
serverproxy_list = retry_server_list
# Check error type
# Return only one error result for compability
if len(result_error_set_list):
return result_error_set_list[0]
if len(protocol_error_list):
raise ConversionError("Protocol error while contacting OOo conversion: "
"%s" % (','.join(protocol_error_list)))
if len(socket_error_list):
raise SocketError("%s" % (','.join(socket_error_list)))
if len(fault_error_list):
raise fault_error_list[0]
def __getattr__(self, attr):
return partial(self._proxy_function, attr)
from Products.ERP5.mixin.extensible_traversable import DocumentExtensibleTraversableMixin
class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin,
......
# -*- coding: utf-8 -*-
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from erp5.component.module.TransformLib import DocumentConversionServerTransform
class TransformHtmlToPdf(DocumentConversionServerTransform):
"""
Transforms HTML to PDF through document conversion server
"""
implements(ITransform)
__name__ = 'html_to_pdf'
inputs = ('text/html',)
output = 'application/pdf'
def _getFormatFromMimetype(self, mimetype):
# XXX: mimetypes.guess_extension() for text/html may returns either '.htm'
# or '.html' but the former is not supported by wkhtmltopdf Handler
# (https://lab.nexedi.com/nexedi/cloudooo/merge_requests/20)
return 'html' if mimetype == 'text/html' else 'pdf'
def convert(self, *args, **kwargs):
# wkhtmltopdf handler currently requires conversion_kw (hack in convertFile())...
if 'conversion_kw' not in kwargs:
kwargs['conversion_kw'] = {'encoding': 'utf-8'}
return DocumentConversionServerTransform.convert(self, *args, **kwargs)
def register():
return TransformHtmlToPdf()
\ No newline at end of file
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Document Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>document.erp5.TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Document Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.Workflow"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_log</string> </key>
<value>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
# -*- coding: utf-8 -*-
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
from Products.ERP5.Document.Document import DocumentConversionServerProxy, ConversionError, enc, dec
class DocumentConversionServerTransform:
"""
Transformer using Conversion Server
"""
implements(ITransform)
# Name of the Transform as registered in portal_transforms
__name__ = None
# Tuple of source MIME types
inputs = ()
# Destination MIME type
output = ''
def __init__(self, name=None):
if name is not None:
self.__name__ = name
def name(self):
return self.__name__
def _getAllowedSourceMimetypeFromConversionServer(self, server_proxy):
for mimetype in self.inputs:
for allowed_mimetype, _ in server_proxy.getAllowedConversionFormatList(mimetype):
if mimetype == allowed_mimetype:
return mimetype
return None
def _getFormatFromMimetype(self, mimetype):
"""
XXX: This should not be done here but Conversion Server API to get
supported Format/Extension is deprecated (topic under discussion)
"""
import mimetypes
extension = mimetypes.guess_extension(mimetype)
if extension is None:
raise ConversionError("Could not guess extension from mimetype '%s'" % mimetype)
return extension.split('.', 1)[1]
def convert(self, orig, data, context=None, **kwargs):
server_proxy = DocumentConversionServerProxy(context)
source_mimetype = self._getAllowedSourceMimetypeFromConversionServer(server_proxy)
if source_mimetype is None:
raise ConversionError("Format(s) not allowed on Conversion Server %r" % self.inputs)
source_format = self._getFormatFromMimetype(source_mimetype)
destination_format = self._getFormatFromMimetype(self.output)
data.setData(dec(server_proxy.convertFile(
enc(orig),
source_format,
destination_format,
# Default values are ConversionServer default ones
kwargs.get('zip', False),
kwargs.get('refresh', False),
kwargs.get('conversion_kw', {}))))
return data
\ No newline at end of file
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Module Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>TransformLib</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>module.erp5.TransformLib</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Module Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.Workflow"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_log</string> </key>
<value>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Transform" module="Products.PortalTransforms.Transform"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_config</string> </key>
<value>
<object>
<klass>
<global id="1.1" name="UserDict" module="UserDict"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>__allow_access_to_unprotected_subobjects__</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_config_metadata</string> </key>
<value>
<object>
<klass> <reference id="1.1"/> </klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>html_to_pdf</string> </value>
</item>
<item>
<key> <string>inputs</string> </key>
<value>
<tuple>
<string>text/html</string>
</tuple>
</value>
</item>
<item>
<key> <string>module</string> </key>
<value> <string>erp5.component.document.TransformHtmlToPdf</string> </value>
</item>
<item>
<key> <string>output</string> </key>
<value> <string>application/pdf</string> </value>
</item>
<item>
<key> <string>output_encoding</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -6,6 +6,7 @@ document.erp5.TransformDocToDocx
document.erp5.TransformDocxToDocy
document.erp5.TransformDocyToDocx
document.erp5.TransformDocxToOdt
document.erp5.TransformHtmlToPdf
document.erp5.TransformXlsxToOds
document.erp5.TransformXlsxToXlsy
document.erp5.TransformXlsyToXlsx
......
module.erp5.TransformLib
\ No newline at end of file
......@@ -27,13 +27,10 @@
#
##############################################################################
import xmlrpclib, base64, re, zipfile, cStringIO
import re, zipfile, cStringIO
from warnings import warn
from xmlrpclib import Fault, ServerProxy, ProtocolError
from AccessControl import ClassSecurityInfo
from AccessControl import Unauthorized
from OFS.Image import Pdata
from OFS.Image import File as OFSFile
from zope.contenttype import guess_content_type
from Products.CMFCore.utils import getToolByName
from Products.ERP5Type import Permissions, PropertySheet
......@@ -41,140 +38,22 @@ from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5.Document.File import File
from Products.ERP5.Document.Document import Document, \
VALID_IMAGE_FORMAT_LIST, ConversionError, NotConvertedError
from Products.ERP5.Document.Image import getDefaultImageQuality
from Products.ERP5Type.Utils import fill_args_from_request
from zLOG import LOG, WARNING, ERROR
from functools import partial
# Mixin Import
from Products.ERP5.mixin.base_convertable import BaseConvertableFileMixin
from Products.ERP5.mixin.text_convertable import TextConvertableMixin
from Products.ERP5.mixin.extensible_traversable import OOoDocumentExtensibleTraversableMixin
from DateTime import DateTime
# connection plugins
from Products.ERP5Type.ConnectionPlugin.TimeoutTransport import TimeoutTransport
from socket import error as SocketError
enc=base64.encodestring
dec=base64.decodestring
EMBEDDED_FORMAT = '_embedded'
OOO_SERVER_PROXY_TIMEOUT = 360
OOO_SERVER_RETRY = 0
# store time (as int) where we had last failure in order
# to try using proxy server that worked the most recently
global_server_proxy_uri_failure_time = {}
class OOoServerProxy():
"""
xmlrpc-like ServerProxy object adapted for OOo conversion server
"""
def __init__(self, context):
self._serverproxy_list = []
preference_tool = getToolByName(context, 'portal_preferences')
self._ooo_server_retry = preference_tool.getPreferredDocumentConversionServerRetry() or OOO_SERVER_RETRY
uri_list = preference_tool.getPreferredDocumentConversionServerUrlList()
if not uri_list:
address = preference_tool.getPreferredOoodocServerAddress()
port = preference_tool.getPreferredOoodocServerPortNumber()
if not (address and port):
raise ConversionError('OOoDocument: cannot proceed with conversion:'
' conversion server url is not defined in preferences')
LOG('OOoDocument', WARNING, 'PreferredOoodocServer{Address,PortNumber}' + \
' are DEPRECATED please use PreferredDocumentServerUrl instead', error=True)
uri_list = ['%s://%s:%s' % ('http', address, port)]
timeout = preference_tool.getPreferredOoodocServerTimeout() \
or OOO_SERVER_PROXY_TIMEOUT
for uri in uri_list:
if uri.startswith("http://"):
scheme = "http"
elif uri.startswith("https://"):
scheme = "https"