Commit a985974c authored by Tristan Cavelier's avatar Tristan Cavelier Committed by Cédric Le Ninivin

Add support of OnlyOffice document (as ziplike file)

Originaly, the y format (docy, xlsy, ppty) is a file extension that is use by an OnlyOffice document.

An OnlyOffice document is composed by : 

- a file with extension in (docy, xlsy, ppty)
- a separate media folder containing media files (Ex: media/image1.png)

Several problems : 

- Two documents cannot be in the same folder
- How to send / get from Cloudooo to ERP5 ?
- The document cannot be stored easily in ERP5 (embedded files ? seperate images linked to the document by predecessors ??)
- How to download an OnlyOffice document from ERP5 ? How to load it on OfficeJS ?

To solve, we embbed all the files composing the document into one zip (like docx, odt, ...) : 

- Two documents can be in the same folder
- We send / get from Cloudooo as usual with portal transform
- The document can be stored in File data in ERP5 like any other docx document.
- document.getData allows to download the document data and JSZip can be used to extract easily the body in OfficeJS.

To keep backward compatibility, a docy can be : 
- the OnlyOffice document body (`startsWith("DOCY;v2;")`)
- can be a zip file (`startsWith("PK\x03\x04")`) containing one file at the root of the archive + optional media folder

/reviewed-on nexedi/cloudooo!8
parents da030887 f6eb36a4
##############################################################################
#
# Copyright (c) 2009-2011 Nexedi SA and Contributors. All Rights Reserved.
# Gabriel M. Monnerat <gabriel@tiolive.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from xml.etree import ElementTree
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile, mktemp
import sys
import os
from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
from cloudooo.file import File
from cloudooo.util import logger, zipTree, unzip
AVS_OFFICESTUDIO_FILE_UNKNOWN = "0"
AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX = "65"
AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX = "129"
AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSX = "132"
AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX = "257"
AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_PDF = "513"
AVS_OFFICESTUDIO_FILE_TEAMLAB_DOCY = "4097"
AVS_OFFICESTUDIO_FILE_TEAMLAB_XLSY = "4098"
AVS_OFFICESTUDIO_FILE_TEAMLAB_PPTY = "4099"
AVS_OFFICESTUDIO_FILE_CANVAS_WORD = "8193"
AVS_OFFICESTUDIO_FILE_CANVAS_SPREADSHEET = "8194"
AVS_OFFICESTUDIO_FILE_CANVAS_PRESENTATION = "8195"
AVS_OFFICESTUDIO_FILE_OTHER_HTMLZIP = "2051"
AVS_OFFICESTUDIO_FILE_OTHER_ZIP = "2057"
format_code_map = {
"docy": AVS_OFFICESTUDIO_FILE_CANVAS_WORD,
"docx": AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX,
"xlsy": AVS_OFFICESTUDIO_FILE_CANVAS_SPREADSHEET,
"xlsx": AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX,
"ppty": AVS_OFFICESTUDIO_FILE_CANVAS_PRESENTATION,
"pptx": AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX,
}
yformat_tuple = ("docy", "xlsy", "ppty")
class Handler(object):
"""ImageMagic Handler is used to handler images."""
implements(IHandler)
def __init__(self, base_folder_url, data, source_format, **kw):
"""
base_folder_url(string)
The requested url for data base folder
data(string)
The opened and readed file into a string
source_format(string)
The source format of the inputed file
"""
self.base_folder_url = base_folder_url
self.file = File(base_folder_url, data, source_format)
self.environment = kw.get("env", {})
def convert(self, destination_format=None, **kw):
""" Convert the inputed file to output as format that were informed """
source_format = self.file.source_format
logger.debug("x2t convert: %s > %s" % (source_format, destination_format))
# init vars and xml configuration file
in_format = format_code_map[source_format]
out_format = format_code_map[destination_format]
root_dir = self.file.directory_name
input_dir = os.path.join(root_dir, "input");
output_dir = os.path.join(root_dir, "output");
final_file_name = os.path.join(root_dir, "document.%s" % destination_format)
input_file_name = self.file.getUrl()
output_file_name = final_file_name
config_file_name = os.path.join(root_dir, "config.xml")
if source_format in yformat_tuple:
os.mkdir(input_dir)
unzip(self.file.getUrl(), input_dir)
for _, _, files in os.walk(input_dir):
input_file_name, = files
break
input_file_name = os.path.join(input_dir, input_file_name)
if destination_format in yformat_tuple:
os.mkdir(output_dir)
output_file_name = os.path.join(output_dir, "body.txt")
config_file = open(config_file_name, "w")
config = {
# 'm_sKey': 'from',
'm_sFileFrom': input_file_name,
'm_nFormatFrom': in_format,
'm_sFileTo': output_file_name,
'm_nFormatTo': out_format,
# 'm_bPaid': 'true',
# 'm_bEmbeddedFonts': 'false',
# 'm_bFromChanges': 'false',
# 'm_sFontDir': '/usr/share/fonts',
# 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
}
root = ElementTree.Element('root')
for key, value in config.items():
ElementTree.SubElement(root, key).text = value
ElementTree.ElementTree(root).write(config_file, encoding='utf-8', xml_declaration=True, default_namespace=None, method="xml")
config_file.close()
# run convertion binary
p = Popen(
["x2t", config_file.name],
stdout=PIPE,
stderr=PIPE,
close_fds=True,
env=self.environment,
)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise RuntimeError("x2t: exit code %d != 0\n+ %s\n> stdout: %s\n> stderr: %s@ x2t xml:\n%s" % (p.returncode, " ".join(["x2t", config_file.name]), stdout, stderr, " " + open(config_file.name).read().replace("\n", "\n ")))
if destination_format in yformat_tuple:
zipTree(
final_file_name,
(output_file_name, ""),
(os.path.join(os.path.dirname(output_file_name), "media"), ""),
)
self.file.reload(final_file_name)
try:
return self.file.getContent()
finally:
self.file.trash()
def getMetadata(self, base_document=False):
"""Returns a dictionary with all metadata of document.
along with the metadata.
"""
raise NotImplementedError
def setMetadata(self, metadata={}):
"""Returns image with new metadata.
Keyword arguments:
metadata -- expected an dictionary with metadata.
"""
raise NotImplementedError
##############################################################################
#
# Copyright (c) 2009-2010 Nexedi SA and Contributors. All Rights Reserved.
# Gabriel M. Monnerat <gabriel@tiolive.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import magic
import os.path
from zipfile import ZipFile
from cStringIO import StringIO
from cloudooo.handler.x2t.handler import Handler
from cloudooo.tests.handlerTestCase import HandlerTestCase, make_suite
class TestHandler(HandlerTestCase):
def afterSetUp(self):
self.kw = dict(env=dict(PATH=self.env_path))
def testConvertXlsx(self):
"""Test conversion of xlsx to xlsy and back"""
y_data = Handler(self.tmp_url, open("data/test.xlsx").read(), "xlsx", **self.kw).convert("xlsy")
y_body_data = ZipFile(StringIO(y_data)).open("body.txt").read()
self.assertTrue(y_body_data.startswith("XLSY;v2;5883;"), "%r... does not start with 'XLSY;v2;5883;'" % (y_body_data[:20],))
x_data = Handler(self.tmp_url, y_data, "xlsy", **self.kw).convert("xlsx")
# magic inspired by https://github.com/minad/mimemagic/pull/19/files
self.assertIn("xl/", x_data[:2000])
def testConvertXlsy(self):
"""Test conversion of xlsy to xlsx and back"""
x_data = Handler(self.tmp_url, open("data/test.xlsy").read(), "xlsy", **self.kw).convert("xlsx")
self.assertIn("xl/", x_data[:2000])
y_data = Handler(self.tmp_url, x_data, "xlsx", **self.kw).convert("xlsy")
y_body_data = ZipFile(StringIO(y_data)).open("body.txt").read()
self.assertTrue(y_body_data.startswith("XLSY;v2;10579;"), "%r... does not start with 'XLSY;v2;10579;'" % (y_body_data[:20],))
def testConvertDocx(self):
"""Test conversion of docx to docy and back"""
y_data = Handler(self.tmp_url, open("data/test_with_image.docx").read(), "docx", **self.kw).convert("docy")
y_zip = ZipFile(StringIO(y_data))
y_body_data = y_zip.open("body.txt").read()
self.assertTrue(y_body_data.startswith("DOCY;v5;2795;"), "%r... does not start with 'DOCY;v5;2795;'" % (y_body_data[:20],))
y_zip.open("media/image1.png")
x_data = Handler(self.tmp_url, y_data, "docy", **self.kw).convert("docx")
# magic inspired by https://github.com/minad/mimemagic/pull/19/files
self.assertIn("word/", x_data[:2000])
def testConvertDocy(self):
"""Test conversion of docy to docx and back"""
x_data = Handler(self.tmp_url, open("data/test_with_image.docy").read(), "docy", **self.kw).convert("docx")
self.assertIn("word/", x_data[:2000])
y_data = Handler(self.tmp_url, x_data, "docx", **self.kw).convert("docy")
y_zip = ZipFile(StringIO(y_data))
y_body_data = y_zip.open("body.txt").read()
self.assertTrue(y_body_data.startswith("DOCY;v5;7519;"), "%r... does not start with 'DOCY;v5;7519;'" % (y_body_data[:20],))
y_zip.open("media/image1.png")
def testgetMetadataFromImage(self):
"""Test getMetadata not implemented form yformats"""
handler = Handler(self.tmp_url, "", "xlsy", **self.kw)
self.assertRaises(NotImplementedError, handler.getMetadata)
def testsetMetadata(self):
"""Test setMetadata not implemented for yformats"""
handler = Handler(self.tmp_url, "", "xlsy", **self.kw)
self.assertRaises(NotImplementedError, handler.setMetadata)
def test_suite():
return make_suite(TestHandler)
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from os.path import join
from cloudooo.tests.cloudoooTestCase import TestCase, make_suite
class TestServer(TestCase):
"""Test XmlRpc Server. Needs cloudooo server started"""
def ConversionScenarioList(self):
return [
(join('data', 'test.xlsx'), "xlsx", "xlsy", "application/zip"),
(join('data', 'test.xlsy'), "xlsy", "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
(join('data', 'test_with_image.docx'), "docx", "docy", "application/zip"),
# Here, the content type should be application/vnd.openxmlformats-officedocument.wordprocessingml.document
# but "magic" seems to not detect it correctly. However, the document can be read correctly by LibreOffice.
(join('data', 'test_with_image.docy'), "docy", "docx", "application/octet-stream"),
]
def testConvertHtmltoPdf(self):
"""Converts xlsx,docx to their y format and y to x"""
self.runConversionList(self.ConversionScenarioList())
def FaultConversionScenarioList(self):
return [
# Test to verify if server fail when a empty string is sent
('', '', ''),
# Try convert one xlsx for a invalid format
(open(join('data', 'test.xlsx')).read(), 'xlsx', 'xyz'),
]
def test_suite():
return make_suite(TestServer)
......@@ -56,6 +56,7 @@ application/vnd.oasis.opendocument.database
application/vnd.oasis.opendocument.image odi
application/vnd.ms-word.document.macroEnabled.12 docm
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
application/x-asc-text docy
application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
application/vnd.ms-powerpoint.template.macroEnabled.12 potm
application/vnd.openxmlformats-officedocument.presentationml.template potx
......@@ -64,10 +65,12 @@ application/vnd.ms-powerpoint.slideshow.macroEnabled.12
application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/x-asc-presentation ppty
application/vnd.ms-excel.addin.macroEnabled.12 xlam
application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/x-asc-spreadsheet xlsy
application/vnd.ms-excel.template.macroEnabled.12 xltm
application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
application/x-msmetafile wmf
......
......@@ -29,6 +29,8 @@
import logging
import mimetypes
import pkg_resources
import os
from zipfile import ZipFile, ZIP_DEFLATED
logger = logging.getLogger('Cloudooo')
......@@ -98,3 +100,36 @@ def convertStringToBool(string):
return False
else:
return None
def zipTree(destination, *tree_path_list):
"""
destination may be a path or a StringIO
tree_path_list is a list that may contain a path or a couple(path, archive_root)
"""
def archive(arg, archive_root):
archive_name = os.path.join(archive_root, os.path.basename(arg))
if os.path.islink(arg):
pass # XXX logger.warn("zipTree: symlink %r ignored\n" % arg)
elif os.path.isdir(arg):
for r, _, ff in os.walk(arg):
zfile.write(r, archive_name)
for f in ff:
archive(os.path.join(r, f), archive_name)
elif os.path.isfile(arg):
zfile.write(arg, archive_name)
else:
pass # XXX logger.warn("zipTree: unknown %r ignored\n" % arg)
zfile = ZipFile(destination, "w", ZIP_DEFLATED)
for tree_path in tree_path_list:
if isinstance(tree_path, tuple):
archive(*tree_path)
else:
archive(tree_path, os.path.dirname(tree_path))
zfile.close()
return destination
def unzip(source, destination):
zipfile = ZipFile(source)
zipfile.extractall(destination)
zipfile.close()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment