Commit 7f44ed79 authored by Boris Kocherov's avatar Boris Kocherov

Merge branch 'nexedi'

parents 9a104b68 c0752dba
include README.txt
include CHANGES.txt
include README.rst
include CHANGELOG.rst
recursive-include cloudooo *
......@@ -53,7 +53,7 @@ class TestHandler(HandlerTestCase):
metadata = handler.getMetadata()
self.assertEquals(metadata.get("Compression"), "Zip")
self.assertEquals(metadata.get("Colorspace"), "sRGB")
self.assertEquals(metadata.get("Matte color"), "grey74")
self.assertEquals(metadata.get("Alpha color"), "grey74")
def testsetMetadata(self):
""" Test if metadata are inserted correclty """
......
......@@ -108,7 +108,12 @@ class UnoConverter(object):
elif filter_name == "impress_pdf_Export":
property = PropertyValue('FilterData', 0,
uno.Any('[]com.sun.star.beans.PropertyValue',
(PropertyValue('ExportNotesPages', 0, True, 0),),), 0)
(PropertyValue('ExportNotesPages', 0, True, 0),
PropertyValue('SelectPdfVersion', 0, 1, 0),),), 0)
elif "pdf_Export" in filter_name :
property = PropertyValue('FilterData', 0,
uno.Any('[]com.sun.star.beans.PropertyValue',
(PropertyValue('SelectPdfVersion', 0, 1, 0),),), 0)
elif filter_name in ("draw_html_Export", "HTML (StarCalc)"):
property = PropertyValue('FilterData', 0,
uno.Any('[]com.sun.star.beans.PropertyValue',
......
......@@ -35,11 +35,17 @@ except ImportError:
import helper_util
from getopt import getopt, GetoptError
# python3 support
try:
basestring
except NameError:
basestring = str
try:
long
except NameError:
long = int
__doc__ = """
usage: unomimemapper [options]
......@@ -76,7 +82,7 @@ class UnoMimemapper(object):
for obj in iter(element_list):
if obj.Name in ignore_name_list:
continue
if not isinstance(obj.Value, (bool, int, basestring, tuple)):
if not isinstance(obj.Value, (bool, int, long, basestring, tuple)):
continue
element_dict[obj.Name] = obj.Value
service_dict[name] = element_dict
......
......@@ -136,10 +136,10 @@ class MimeMapper(object):
filter_dict, type_dict = json.loads(stdout)
ooo_disable_filter_list = kw.get("ooo_disable_filter_list") or [] + [
'writer_jpg_Export', # Seems not working from cloudooo in Libre Office 4.3.3.2
'writer_png_Export', # Seems not working from cloudooo in Libre Office 4.3.3.2
'draw_eps_Export', # Seems not working from cloudooo in Libre Office 5.0.0.5
'impress_eps_Export', # Seems not working from cloudooo in Libre Office 5.0.0.5
# 'writer_jpg_Export', # Seems not working from cloudooo in Libre Office 4.3.3.2
# 'writer_png_Export', # Seems not working from cloudooo in Libre Office 4.3.3.2
# 'draw_eps_Export', # Seems not working from cloudooo in Libre Office 5.0.0.5
# 'impress_eps_Export', # Seems not working from cloudooo in Libre Office 5.0.0.5
]
ooo_disable_filter_name_list = kw.get("ooo_disable_filter_name_list") or [] + [
'Text', # Use 'Text - Choose Encoding' instead
......
......@@ -36,10 +36,10 @@ text_expected_tuple = (
('docx', 'Office Open XML Text'),
('fodt', 'Flat XML ODF Text Document'),
('html', 'HTML Document (Writer)'),
# ('jpg', 'JPEG - Joint Photographic Experts Group'),
('jpg', 'JPEG - Joint Photographic Experts Group'),
('odt', 'ODF Text Document'),
('pdf', 'PDF - Portable Document Format'),
# ('png', 'PNG - Portable Network Graphic'),
('png', 'PNG - Portable Network Graphic'),
('rtf', 'Rich Text'),
('txt', 'Text - Choose Encoding'),
('docy', 'Docy'),
......@@ -51,7 +51,7 @@ global_expected_tuple = (
drawing_expected_tuple = (
('bmp', 'BMP - Windows Bitmap'),
('emf', 'EMF - Enhanced Metafile'),
# ('eps', 'EPS - Encapsulated PostScript'),
('eps', 'EPS - Encapsulated PostScript'),
('fodg', 'Flat XML ODF Drawing'),
('gif', 'GIF - Graphics Interchange Format'),
('html', 'HTML Document (Draw)'),
......@@ -84,7 +84,7 @@ web_expected_tuple = (
presentation_expected_tuple = (
('bmp', 'BMP - Windows Bitmap'),
('emf', 'EMF - Enhanced Metafile'),
# ('eps', 'EPS - Encapsulated PostScript'),
('eps', 'EPS - Encapsulated PostScript'),
('fodp', 'Flat XML ODF Presentation'),
('gif', 'GIF - Graphics Interchange Format'),
('html', 'HTML Document (Impress)'),
......@@ -119,8 +119,10 @@ spreadsheet_expected_tuple = (
('html', 'HTML Document (Calc)'),
('ods', 'ODF Spreadsheet'),
('pdf', 'PDF - Portable Document Format'),
('png', 'PNG - Portable Network Graphic'),
('slk', 'SYLK'),
('xls', 'Microsoft Excel 97-2003'),
('xlsm', 'Microsoft Excel 2007-2016 XML (macro enabled)'),
('ms.xlsx', 'Microsoft Excel 2007-2013 XML'),
('xlsx', 'Office Open XML Spreadsheet'),
('xlsy', 'Xlsy'),
......
......@@ -480,25 +480,23 @@ class TestServer(TestCase):
"""Test if getImageItemList can get the list of images items from odt file"""
data = encodestring(open("./data/granulate_test.odt").read())
image_list = self.proxy.getImageItemList(data, "odt")
self.assertEquals([['10000000000000C80000009C76245A92.jpg', ''],
['10000201000000C80000004EE2BCEED0.png', 'TioLive Logo'],
['10000201000000C80000004EE2BCEED0.png', ''],
['2000004F00004233000013707E7DE37A.svm', 'Python Logo'],
['10000201000000C80000004EE2BCEED0.png',
'Again TioLive Logo']],
image_list)
self.assertEquals([['10000000000000C80000009CBF079A6E41EE290C.jpg', ''],
['10000201000000C80000004EF26C99A54A61B987.png', 'TioLive Logo'],
['10000201000000C80000004EF26C99A54A61B987.png', ''],
['2000004F0000423300001370ADF6545B2997B448.svm', 'Python Logo'],
['10000201000000C80000004EF26C99A54A61B987.png', 'Again TioLive Logo']],
image_list)
def testGetImageItemListFromDoc(self):
"""Test if getImageItemList can get the list of images items from doc file"""
data = encodestring(open("./data/granulate_test.doc").read())
image_list = self.proxy.getImageItemList(data, "doc")
self.assertEquals([['10000000000000C80000009C76245A92.jpg', ''],
['10000201000000C80000004EE2BCEED0.png', 'TioLive Logo'],
['10000201000000C80000004EE2BCEED0.png', ''],
['2000031600004233000013706A5EA1C8.wmf', 'Python Logo'],
['10000201000000C80000004EE2BCEED0.png',
'Again TioLive Logo']],
image_list)
self.assertEquals([['10000000000000C80000009CBF079A6E41EE290C.jpg', ''],
['10000201000000C80000004EF26C99A54A61B987.png', 'TioLive Logo'],
['10000201000000C80000004EF26C99A54A61B987.png', ''],
['2000031600004233000013702113A0E70B910778.wmf', 'Python Logo'],
['10000201000000C80000004EF26C99A54A61B987.png', 'Again TioLive Logo']],
image_list)
def testGetImageFromOdt(self):
"""Test if getImage can get a image from odt file after zip"""
......@@ -516,7 +514,7 @@ class TestServer(TestCase):
#so compare with the server return.
data_odt = self.proxy.convertFile(data, 'doc', 'odt', False)
zip = ZipFile(StringIO(decodestring(data_odt)))
image_id = '10000000000000C80000009C76245A92.jpg'
image_id = '10000000000000C80000009CBF079A6E41EE290C.jpg'
original_image = zip.read('Pictures/%s' % image_id)
geted_image = decodestring(self.proxy.getImage(data, image_id, "doc"))
self.assertEquals(original_image, geted_image)
......
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
from cloudooo.file import File
from cloudooo.util import logger
from subprocess import Popen, PIPE
from tempfile import mktemp
from os.path import basename
from base64 import b64decode
def keyNameToOption(key_name, prefix=""):
return "--" + prefix + key_name.replace("_", "-")
class Handler(object):
"""ImageMagic Handler is used to handler images."""
implements(IHandler)
def __init__(self, base_folder_url, data, source_format, **kw):
""" Load pdf document """
self.base_folder_url = base_folder_url
self.file = File(base_folder_url, data, source_format)
self.environment = kw.get("env", {})
def makeTempFile(self, destination_format=None):
path = mktemp(
suffix='.%s' % destination_format,
dir=self.file.directory_name,
)
return path
def convertPathToUrl(self, path):
if path.startswith("/"):
return "file://" + path
raise ValueError("path %r is not absolute" % path)
def convert(self, destination_format=None, **kw):
"""Convert a image"""
logger.debug("wkhtmltopdf convert: %s > %s" % (self.file.source_format, destination_format))
output_path = self.makeTempFile(destination_format)
command = self.makeWkhtmltopdfCommandList(
self.convertPathToUrl(self.file.getUrl()),
output_path,
conversion_kw=kw,
)
stdout, stderr = Popen(
command,
stdout=PIPE,
stderr=PIPE,
close_fds=True,
env=self.environment,
cwd=self.file.directory_name,
).communicate()
self.file.reload(output_path)
try:
return self.file.getContent()
finally:
self.file.trash()
def getMetadata(self, base_document=False):
"""Returns a dictionary with all metadata of document.
along with the metadata.
"""
return NotImplementedError
def setMetadata(self, metadata={}):
"""Returns image with new metadata.
Keyword arguments:
metadata -- expected an dictionary with metadata.
"""
raise NotImplementedError
def makeSwitchOptionList(self, allowed_option_list, option_dict):
"""
A switch option is enable if it exists.
Ex: for : --grayscale
option_dict : {"grayscale": True}
result : ["--grayscale"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value:
option_list.append(keyNameToOption(option_name))
return option_list
def makeNoPrefixedOptionList(self, allowed_option_list, option_dict):
"""
A "no" prefixed option is an option that if disable contains a
"no" prefix.
Ex: for : --images (and --no-images)
option_dict : {"images": False}
result : ["--no-images"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value is not None:
option_list.append(keyNameToOption(option_name, prefix="" if value else "no-"))
return option_list
def makeEnablePrefixedOptionList(self, allowed_option_list, option_dict):
"""
An "enable" prefixed option is an option that if enable contains a
"enable" prefix else contains a "disable" prefix.
Ex: for : --enable-external-links (and --disable-external-links)
option_dict : {"enable_external_links": False}
result : ["--disable-external-links"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value is not None:
if value:
option_list.append(keyNameToOption(option_name))
else:
option_list.append(keyNameToOption(option_name[7:], prefix="disable-"))
return option_list
def makeIncludeInPrefixedOptionList(self, allowed_option_list, option_dict):
"""
An "include-in" prefixed option is an option that if enable contains a
"include-in" prefix else contains a "exclude-from" prefix.
Ex: for : --include-in-outline (and --exclude-from-outline)
option_dict : {"include_in_outline": False}
result : ["--exclude-from-outline"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value is not None:
if value:
option_list.append(keyNameToOption(option_name))
else:
option_list.append(keyNameToOption(option_name[11:], prefix="exclude-from-"))
return option_list
def makeOneStringArgumentOptionList(self, allowed_option_list, option_dict):
"""
A one-string-argument option is a option that require an argument
which is a string.
Ex: for : --title <text>
option_dict : {"title": "Hello World!"}
result : ["--title", "Hello World!"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value is not None:
option_list += [keyNameToOption(option_name), str(value)]
return option_list
def makeRepeatableOneStringArgumentOptionList(self, allowed_option_list, option_dict):
"""
A repeatable one-string-argument option is a option that require one
string argument, this option can be set several times.
Ex: for : --allow <path>
option_dict : {"allow_list": ["a", "b"]}
result : ["--allow", "a", "--allow", "b"]
"""
option_list = []
for option_name in allowed_option_list:
value_list = option_dict.get(option_name)
if value_list:
for value in value_list:
option_list += [keyNameToOption(option_name[:-5]), str(value)]
return option_list
def makeRepeatableTwoStringArgumentOptionList(self, allowed_option_list, option_dict):
"""
A repeatable two-string-argument option is a option that require two
string arguments, this option can be set several times.
Ex: for : --cookie <name> <value>
option_dict : {"cookie_list": [("a", "b"), ("c", "d")]}
result : ["--cookie", "a", "b", "--cookie", "c", "d"]
"""
option_list = []
for option_name in allowed_option_list:
tuple_list = option_dict.get(option_name)
if tuple_list:
for name, value in tuple_list:
option_list += [keyNameToOption(option_name[:-5]), str(name), str(value)]
return option_list
def makeDataUrlArgumentOptionList(self, allowed_option_list, option_dict,
url_type="url", destination_format=None,
use_switch=True):
"""
A data-file-argument option is a option that require an url argument.
Here, we don't want option value to be an url but data, so that
we can put the data to a temp file an use it's url as option value.
Ex: for : --user-style-sheet <url> (and url_type="url")
option_dict : {"user_style_sheet_data": b64encode("body { background-color: black; }")}
result : ["--user-style-sheet", "file:///tmp/tmp.XYZ.css"]
Ex: for : --checkbox-svg <path> (and url_type="path")
option_dict : {"checkbox_svg_data": b64encode("<svg>....</svg>")}
result : ["--checkbox-svg", "/tmp/tmp.XYZ.svg"]
Ex: for : --xsl-style-sheet <file> (and url_type="file")
option_dict : {"xsl_style_sheet_data": b64encode("table { border: none; }")}
result : ["--xsl-style-sheet", "tmp.XYZ.css"]
"""
option_list = []
for option_name in allowed_option_list:
value = option_dict.get(option_name)
if value is not None:
# creates a tmp file in the directory which will be trashed
path = self.makeTempFile(destination_format=destination_format)
open(path, "wb").write(b64decode(value))
if url_type == "url":
path = self.convertPathToUrl(path)
elif url_type == "file":
path = basename(path)
if use_switch:
option_list += [keyNameToOption(option_name[:-5]), path]
else:
option_list.append(path)
return option_list
def makeDataPathArgumentOptionList(self, *args, **kw):
return self.makeDataUrlArgumentOptionList(*args, url_type="path", **kw)
def makeDataFileArgumentOptionList(self, *args, **kw):
return self.makeDataUrlArgumentOptionList(*args, url_type="file", **kw)
def makeRepeatableDataUrlArgumentOptionList(self, allowed_option_list,
option_dict, **kw):
option_list = []
for option_name in allowed_option_list:
data_list = option_dict.get(option_name)
if data_list:
for data in data_list:
option_name = option_name[:-5]
option_list += self.makeDataUrlArgumentOptionList([
option_name,
], {option_name: data}, **kw)
return option_list
def makeWkhtmltopdfCommandList(self, *args, **kw):
# http://wkhtmltopdf.org/usage/wkhtmltopdf.txt
conversion_kw = kw.get("conversion_kw", {})
command = ["wkhtmltopdf"]
# Global Options
command += self.makeNoPrefixedOptionList(["collate"], conversion_kw)
command += self.makeSwitchOptionList([
#"extended-help",
"grayscale",
#"help",
#"htmldoc",
#"licence",
"lowquality",
#"manpage",
"no_pdf_compression",
#"quiet", # we decide
#"read_args_from_stdin", # only for several command line at a time
#"readme",
#"version",
], conversion_kw)
command += self.makeOneStringArgumentOptionList([
#"cookie_jar", # no cookie jar
"copies",
"dpi",
"image_dpi",
"image_quality",
"margin_bottom",
"margin_left",
"margin_right",
"margin_top",
"orientation",
"page_height",
"page_size",
"page_width",
"title",
], conversion_kw)
# Outline Options
command += self.makeNoPrefixedOptionList(["outline"], conversion_kw)
#"dump_default_toc_xsl",
command += self.makeOneStringArgumentOptionList([
#"dump_outline",
"outline_depth",
], conversion_kw)
# Page Options
command += self.makeNoPrefixedOptionList([
"background",
"custom_header_propagation",
"images",
"print_media_type",
#"debug_javascript", # we decide
#"stop_slow_scripts", # we decide
], conversion_kw)
command += self.makeEnablePrefixedOptionList([
"enable_external_links",
"enable_forms",
"enable_internal_links",
"enable_javascript",
#"enable_local_file_access", # we decide
#"enable_plugins",
"enable_smart_shrinking",
"enable_toc_back_links",
], conversion_kw)
command += ["--disable-local-file-access"]
command += self.makeIncludeInPrefixedOptionList([
"include_in_outline",
], conversion_kw)
command += self.makeSwitchOptionList(["default_header"], conversion_kw)
command += self.makeOneStringArgumentOptionList([
#"cache_dir", # we decide
"encoding",
"javascript_delay",
"load_error_handling",
"load_media_error_handling",
"minimum_font_size",
"page_offset",
#"password", # too dangerous
#"proxy", # we decide
#"username", # too dangerous
"viewport_size",
"window_status",
"zoom",
], conversion_kw)
#"allow", # we decide
command += self.makeDataPathArgumentOptionList([
# <option_name>_data
"checkbox_checked_svg_data",
"checkbox_svg_data",
"radiobutton_checked_svg_data",
"radiobutton_svg_data",
], conversion_kw, destination_format="svg")
command += self.makeDataUrlArgumentOptionList([
"user_style_sheet_data",
], conversion_kw, destination_format="css")
#"run_script_list", # too dangerous, fills --run-script
command += self.makeRepeatableTwoStringArgumentOptionList([
# <option_name>_list
"cookie_list",
"custom_header_list",
#"post_list",
#"post_file_list",
], conversion_kw)
# Headers and Footer Options
command += self.makeNoPrefixedOptionList([
"footer_line",
"header_line",
], conversion_kw)
command += self.makeOneStringArgumentOptionList([
"footer_center",
"footer_font_name",
"footer_font_size",
"footer_left",
"footer_right",
"footer_spacing",
"header_center",
"header_font_name",
"header_font_size",
"header_left",
"header_right", # there's a --top option (not documented)
# may be we can do header_right_top option
"header_spacing",
], conversion_kw)
command += self.makeDataUrlArgumentOptionList([
# <option_name>_data
"footer_html_data",
"header_html_data",
], conversion_kw, destination_format="html")
command += self.makeRepeatableTwoStringArgumentOptionList([
"replace",
], conversion_kw)
# Custom Options
command += self.makeRepeatableDataUrlArgumentOptionList([
"before_toc_data_list",
], conversion_kw, destination_format="html", use_switch=False)
# TOC Options
value = conversion_kw.get("toc")
if value:
command += ["toc"]
command += self.makeEnablePrefixedOptionList([
"enable_dotted_lines",
"enable_toc_links",
], conversion_kw)
command += self.makeOneStringArgumentOptionList([
"toc_header_text",
"toc_level_indentation",
"toc_text_size_shrink",
], conversion_kw)
command += self.makeDataFileArgumentOptionList([
"xsl_style_sheet_data",
], conversion_kw, destination_format="xsl")
# Custom Options
command += self.makeRepeatableDataUrlArgumentOptionList([
"after_toc_data_list",
"before_body_data_list",
], conversion_kw, destination_format="html", use_switch=False)
command += args[:-1] # input_url
command += self.makeRepeatableDataUrlArgumentOptionList([
"after_body_data_list",
], conversion_kw, destination_format="html", use_switch=False)
command += args[-1:] # output_path
return command
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:outline="http://wkhtmltopdf.org/outline"
xmlns="http://www.w3.org/1999/xhtml">
<xsl:output method="html" encoding="utf-8" indent="yes" doctype-system="about:legacy-compat"/>
<xsl:template match="outline:outline">
<xsl:param name="count" select="0" />
<html>
<head>
<title>Table of Contents</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style>
html, body {
overflow-x: initial !important;
}
</style>
</head>
<body>
<p class="ci-document-faux-h1">Table of Contents</p>
<div class="ci-document-table-of-content">
<p class="ci-document-helper"></p>
<ol>
<xsl:apply-templates select="outline:item/outline:item">
<xsl:with-param name="count" select="$count" />
</xsl:apply-templates>
</ol>
</div>
</body>
</html>
</xsl:template>
<xsl:template match="outline:item">
<xsl:param name="count" />
<xsl:choose>
<xsl:when test="number($count)=0">
<li class="ci-document-table-of-content-lead-item">
<xsl:if test="@title!=''">
<div>
<a>
<xsl:if test="@link">
<xsl:attribute name="href"><xsl:value-of select="@link"/></xsl:attribute>
</xsl:if>
<xsl:if test="@backLink">
<xsl:attribute name="name"><xsl:value-of select="@backLink"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="@title" />
</a>
<span><xsl:value-of select="@page" /> </span>
</div>
</xsl:if>
<ol>
<xsl:comment>added to prevent self-closing tags in QtXmlPatterns</xsl:comment>
<xsl:apply-templates select="outline:item"></xsl:apply-templates>
</ol>
</li>
</xsl:when>
<xsl:otherwise>
<li>
<xsl:if test="@title!=''">
<div>
<a>
<xsl:if test="@link">
<xsl:attribute name="href"><xsl:value-of select="@link"/></xsl:attribute>
</xsl:if>
<xsl:if test="@backLink">
<xsl:attribute name="name"><xsl:value-of select="@backLink"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="@title" />
</a>
<span><xsl:value-of select="@page" /> </span>
</div>
</xsl:if>
<ol>
<xsl:comment>added to prevent self-closing tags in QtXmlPatterns</xsl:comment>
<xsl:apply-templates select="outline:item"></xsl:apply-templates>
</ol>
</li>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Test Title With Opacity Style</title>
</head>
<body>
<p style="opacity:0.5;">Hello</p>
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Test Title With PNG Data URL</title>
</head>
<body>
<p>Hello</p>
<img src="" />
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Test Title With Script</title>
<script>
window.addEventListener("load", function () {
document.querySelector("p").textContent = atob("V29ybGQ=");
});
</script>
</head>
<body>
<p>Hello</p>
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Test Title With Table of Content</title>
</head>
<body>
<h1>Title</h1>
<p>Description paragraph</p>
<h2>Sub Title</h2>
<p>Subject paragraph</p>
<h2>Sub Title number 2</h2>
<p>Again subject paragraph</p>
</body>
</html>
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from base64 import b64encode
import magic
from cloudooo.handler.wkhtmltopdf.handler import Handler
from cloudooo.tests.handlerTestCase import HandlerTestCase, make_suite
class TestHandler(HandlerTestCase):
def afterSetUp(self):
self.kw = dict(env=dict(PATH=self.env_path))
def _testBase(self, html_path, **conversion_kw):
html_file = open(html_path).read()
handler = Handler(self.tmp_url, html_file, "html", **self.kw)
pdf_file = handler.convert("pdf", **conversion_kw)
mime = magic.Magic(mime=True)
pdf_mimetype = mime.from_buffer(pdf_file)
self.assertEquals("application/pdf", pdf_mimetype)
def testConvertHtmlWithPngDataUrlToPdf(self):
"""Test conversion of html with png data url to pdf"""
self._testBase("data/test_with_png_dataurl.html")
def testConvertHtmlWithScriptToPdf(self):
"""Test conversion of html with script to pdf"""
self._testBase("data/test_with_script.html")
def testConvertHtmlWithOpacityStyleToPdf(self):
"""Test conversion of html with opacity style to pdf
Opacity style in a web pages causes Segmentation Fault only if wkhtmltopdf
is not connected to a graphical service like Xorg.
"""
self._testBase("data/test_with_opacity_style.html")
# TODO: def testConvertHtmlWithHeaderAndFooter(self):
def testConvertHtmlWithTableOfContent(self):
"""Test conversion of html with an additional table of content"""
self._testBase(
"data/test_with_toc.html",
toc=True,
xsl_style_sheet_data=b64encode(open("data/test_toc.xsl").read()),
)
# XXX how to check for table of content presence ?
def testsetMetadata(self):
""" Test if metadata are inserted correclty """
handler = Handler(self.tmp_url, "", "png", **self.kw)
self.assertRaises(NotImplementedError, handler.setMetadata)
def test_suite():
return make_suite(TestHandler)
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from os.path import join
from cloudooo.tests.cloudoooTestCase import TestCase, make_suite
class TestServer(TestCase):
"""Test XmlRpc Server. Needs cloudooo server started"""
def ConversionScenarioList(self):
return [
(join('data', 'test_with_png_dataurl.html'), "html", "pdf", "application/pdf"),
(join('data', 'test_with_script.html'), "html", "pdf", "application/pdf"),
]
def testConvertHtmltoPdf(self):
"""Converts html to pdf"""
self.runConversionList(self.ConversionScenarioList())
def FaultConversionScenarioList(self):
return [
# Test to verify if server fail when a empty string is sent
('', '', ''),
# Try convert one html for a invalid format
(open(join('data', 'test_with_png_dataurl.html')).read(), 'html', 'xyz'),
]
def test_suite():
return make_suite(TestServer)
......@@ -26,16 +26,16 @@
#
##############################################################################
from xml.etree import ElementTree
from os.path import join, dirname, realpath
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from tempfile import NamedTemporaryFile, mktemp
import sys
import os
from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
from cloudooo.file import File
from cloudooo.util import logger
from cloudooo.util import logger, zipTree, unzip
AVS_OFFICESTUDIO_FILE_UNKNOWN = "0"
AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX = "65"
......@@ -52,7 +52,7 @@ AVS_OFFICESTUDIO_FILE_CANVAS_PRESENTATION = "8195"
AVS_OFFICESTUDIO_FILE_OTHER_HTMLZIP = "2051"
AVS_OFFICESTUDIO_FILE_OTHER_ZIP = "2057"
Ext2Formats = {
format_code_map = {
"docy": AVS_OFFICESTUDIO_FILE_CANVAS_WORD,
"docx": AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX,
"xlsy": AVS_OFFICESTUDIO_FILE_CANVAS_SPREADSHEET,
......@@ -61,11 +61,6 @@ Ext2Formats = {
"pptx": AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX,
}
dir_name = dirname(realpath(sys.argv[0]))
#dir_name = join(dirname(realpath(__file__)), 'bin')
converter_bin = join(dir_name, 'x2t')
converter_lib_dirname = join(dir_name, 'lib')
yformat_map = {
'docy': 'docx',
'xlsy': 'xlsx',
......@@ -78,9 +73,13 @@ yformat_service_map = {
'ppty': 'com.sun.star.presentation.PresentationDocument',
}
yformat_tuple = ("docy", "xlsy", "ppty")
class Handler(object):
"""ImageMagic Handler is used to handler images."""
"""
X2T Handler is used to convert Microsoft Office 2007 documents to OnlyOffice
documents.
"""
implements(IHandler)
......@@ -91,55 +90,83 @@ class Handler(object):
data(string)
The opened and readed file into a string
source_format(string)
The source format of the inputed file"""
The source format of the inputed file
"""
self.base_folder_url = base_folder_url
self.file = File(base_folder_url, data, source_format)
self.environment = kw.get("env", {})
self.environment['LD_LIBRARY_PATH'] = converter_lib_dirname
def convert(self, destination_format=None, **kw):
""" Convert the inputed file to output as format that were informed """
logger.debug("yformat convert x2t: %s > %s" % (self.file.source_format, destination_format))
in_format = Ext2Formats.get(self.file.source_format)
out_format = Ext2Formats.get(destination_format)
with NamedTemporaryFile(suffix='.%s' % destination_format, dir=self.base_folder_url) as output_file:
config = {
# 'm_sKey': 'from',
'm_sFileFrom': self.file.getUrl(),
'm_nFormatFrom': in_format,
'm_sFileTo': output_file.name,
'm_nFormatTo': out_format,
# 'm_bPaid': 'true',
# 'm_bEmbeddedFonts': 'false',
# 'm_bFromChanges': 'false',
# 'm_sFontDir': '/usr/share/fonts',
# 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
}
with NamedTemporaryFile(suffix=".xml", dir=self.base_folder_url) as temp_xml:
root = ElementTree.Element('root')
for key, value in config.items():
ElementTree.SubElement(root, key).text = value
ElementTree.ElementTree(root).write(temp_xml, encoding='utf-8', xml_declaration=True, default_namespace=None,
method="xml")
temp_xml.flush()
p = Popen([converter_bin, temp_xml.name],
env=self.environment,
stdout=PIPE,
stderr=PIPE,
close_fds=True,
)
stdout, stderr = p.communicate()
with open(output_file.name) as output_file1:
file_content = output_file1.read()
return_code_msg = "yformat convert x2t return:{}".format(p.returncode)
if p.returncode != 0 or not file_content:
raise Exception(return_code_msg + '\n' + stderr)
logger.debug(stdout)
logger.debug(stderr)
logger.debug("yformat convert x2t return:{}".format(p.returncode))
source_format = self.file.source_format
logger.debug("x2t convert: %s > %s" % (source_format, destination_format))
# init vars and xml configuration file
in_format = format_code_map[source_format]
out_format = format_code_map[destination_format]
root_dir = self.file.directory_name
input_dir = os.path.join(root_dir, "input");
output_dir = os.path.join(root_dir, "output");
final_file_name = os.path.join(root_dir, "document.%s" % destination_format)
input_file_name = self.file.getUrl()
output_file_name = final_file_name
config_file_name = os.path.join(root_dir, "config.xml")
if source_format in yformat_tuple:
os.mkdir(input_dir)
unzip(self.file.getUrl(), input_dir)
for _, _, files in os.walk(input_dir):
input_file_name, = files
break
input_file_name = os.path.join(input_dir, input_file_name)
if destination_format in yformat_tuple:
os.mkdir(output_dir)
output_file_name = os.path.join(output_dir, "body.txt")
config_file = open(config_file_name, "w")
config = {
# 'm_sKey': 'from',
'm_sFileFrom': input_file_name,
'm_nFormatFrom': in_format,
'm_sFileTo': output_file_name,
'm_nFormatTo': out_format,
# 'm_bPaid': 'true',
# 'm_bEmbeddedFonts': 'false',
# 'm_bFromChanges': 'false',
# 'm_sFontDir': '/usr/share/fonts',
# 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
}
root = ElementTree.Element('root')
for key, value in config.items():
ElementTree.SubElement(root, key).text = value
ElementTree.ElementTree(root).write(config_file, encoding='utf-8', xml_declaration=True, default_namespace=None, method="xml")
config_file.close()
# run convertion binary
p = Popen(
["x2t", config_file.name],
stdout=PIPE,
stderr=PIPE,
close_fds=True,
env=self.environment,
)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise RuntimeError("x2t: exit code %d != 0\n+ %s\n> stdout: %s\n> stderr: %s@ x2t xml:\n%s" % (p.returncode, " ".join(["x2t", config_file.name]), stdout, stderr, " " + open(config_file.name).read().replace("\n", "\n ")))
if destination_format in yformat_tuple:
zipTree(
final_file_name,
(output_file_name, ""),
(os.path.join(os.path.dirname(output_file_name), "media"), ""),
)
self.file.reload(final_file_name)
try:
return self.file.getContent()
finally:
self.file.trash()
return file_content
def getMetadata(self, base_document=False):
"""Returns a dictionary with all metadata of document.
......
##############################################################################
#
# Copyright (c) 2009-2010 Nexedi SA and Contributors. All Rights Reserved.
# Gabriel M. Monnerat <gabriel@tiolive.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import magic
import os.path
from zipfile import ZipFile
from cStringIO import StringIO
from cloudooo.handler.x2t.handler import Handler
from cloudooo.tests.handlerTestCase import HandlerTestCase, make_suite
class TestHandler(HandlerTestCase):
def afterSetUp(self):
self.kw = dict(env=dict(PATH=self.env_path))
def testConvertXlsx(self):
"""Test conversion of xlsx to xlsy and back"""
y_data = Handler(self.tmp_url, open("data/test.xlsx").read(), "xlsx", **self.kw).convert("xlsy")
y_body_data = ZipFile(StringIO(y_data)).open("body.txt").read()
self.assertTrue(y_body_data.startswith("XLSY;v2;5883;"), "%r... does not start with 'XLSY;v2;5883;'" % (y_body_data[:20],))
x_data = Handler(self.tmp_url, y_data, "xlsy", **self.kw).convert("xlsx")
# magic inspired by https://github.com/minad/mimemagic/pull/19/files
self.assertIn("xl/", x_data[:2000])
def testConvertXlsy(self):
"""Test conversion of xlsy to xlsx and back"""
x_data = Handler(self.tmp_url, open("data/test.xlsy").read(), "xlsy", **self.kw).convert("xlsx")
self.assertIn("xl/", x_data[:2000])
y_data = Handler(self.tmp_url, x_data, "xlsx", **self.kw).convert("xlsy")
y_body_data = ZipFile(StringIO(y_data)).open("body.txt").read()
self.assertTrue(y_body_data.startswith("XLSY;v2;10579;"), "%r... does not start with 'XLSY;v2;10579;'" % (y_body_data[:20],))
def testConvertDocx(self):
"""Test conversion of docx to docy and back"""
y_data = Handler(self.tmp_url, open("data/test_with_image.docx").read(), "docx", **self.kw).convert("docy")
y_zip = ZipFile(StringIO(y_data))
y_body_data = y_zip.open("body.txt").read()
self.assertTrue(y_body_data.startswith("DOCY;v5;2795;"), "%r... does not start with 'DOCY;v5;2795;'" % (y_body_data[:20],))
y_zip.open("media/image1.png")
x_data = Handler(self.tmp_url, y_data, "docy", **self.kw).convert("docx")
# magic inspired by https://github.com/minad/mimemagic/pull/19/files
self.assertIn("word/", x_data[:2000])
def testConvertDocy(self):
"""Test conversion of docy to docx and back"""
x_data = Handler(self.tmp_url, open("data/test_with_image.docy").read(), "docy", **self.kw).convert("docx")
self.assertIn("word/", x_data[:2000])
y_data = Handler(self.tmp_url, x_data, "docx", **self.kw).convert("docy")
y_zip = ZipFile(StringIO(y_data))
y_body_data = y_zip.open("body.txt").read()
self.assertTrue(y_body_data.startswith("DOCY;v5;7519;"), "%r... does not start with 'DOCY;v5;7519;'" % (y_body_data[:20],))
y_zip.open("media/image1.png")
def testgetMetadataFromImage(self):
"""Test getMetadata not implemented form yformats"""
handler = Handler(self.tmp_url, "", "xlsy", **self.kw)
self.assertRaises(NotImplementedError, handler.getMetadata)
def testsetMetadata(self):
"""Test setMetadata not implemented for yformats"""
handler = Handler(self.tmp_url, "", "xlsy", **self.kw)
self.assertRaises(NotImplementedError, handler.setMetadata)
def test_suite():
return make_suite(TestHandler)
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from os.path import join
from cloudooo.tests.cloudoooTestCase import TestCase, make_suite
class TestServer(TestCase):
"""Test XmlRpc Server. Needs cloudooo server started"""
def ConversionScenarioList(self):
return [
(join('data', 'test.xlsx'), "xlsx", "xlsy", "application/zip"),
(join('data', 'test.xlsy'), "xlsy", "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
(join('data', 'test_with_image.docx'), "docx", "docy", "application/zip"),
# Here, the content type should be application/vnd.openxmlformats-officedocument.wordprocessingml.document
# but "magic" seems to not detect it correctly. However, the document can be read correctly by LibreOffice.
(join('data', 'test_with_image.docy'), "docy", "docx", "application/octet-stream"),
]
def testConvertHtmltoPdf(self):
"""Converts xlsx,docx to their y format and y to x"""
self.runConversionList(self.ConversionScenarioList())
def FaultConversionScenarioList(self):
return [
# Test to verify if server fail when a empty string is sent
('', '', ''),
# Try convert one xlsx for a invalid format
(open(join('data', 'test.xlsx')).read(), 'xlsx', 'xyz'),
]
def test_suite():
return make_suite(TestServer)
......@@ -73,7 +73,7 @@ class Manager(object):
self.handler_dict = self.kw.pop("handler_dict")
def convertFile(self, file, source_format, destination_format, zip=False,
refresh=False):
refresh=False, conversion_kw={}):
"""Returns the converted file in the given format.
Keywords arguments:
file -- File as string in base64
......@@ -92,7 +92,7 @@ class Manager(object):
decodestring(file),
source_format,
**self.kw)
decode_data = handler.convert(destination_format)
decode_data = handler.convert(destination_format, **conversion_kw)
return encodestring(decode_data)
def updateFileMetadata(self, file, source_format, metadata_dict):
......
......@@ -56,6 +56,7 @@ application/vnd.oasis.opendocument.database
application/vnd.oasis.opendocument.image odi
application/vnd.ms-word.document.macroEnabled.12 docm
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
application/x-asc-text docy
application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
application/vnd.ms-powerpoint.template.macroEnabled.12 potm
application/vnd.openxmlformats-officedocument.presentationml.template potx
......@@ -64,10 +65,12 @@ application/vnd.ms-powerpoint.slideshow.macroEnabled.12
application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/x-asc-presentation ppty
application/vnd.ms-excel.addin.macroEnabled.12 xlam
application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/x-asc-spreadsheet xlsy
application/vnd.ms-excel.template.macroEnabled.12 xltm
application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
application/x-msmetafile wmf
......
......@@ -29,6 +29,8 @@
import logging
import mimetypes
import pkg_resources
import os
from zipfile import ZipFile, ZIP_DEFLATED
logger = logging.getLogger('Cloudooo')
......@@ -98,3 +100,36 @@ def convertStringToBool(string):
return False
else:
return None
def zipTree(destination, *tree_path_list):
"""
destination may be a path or a StringIO
tree_path_list is a list that may contain a path or a couple(path, archive_root)
"""
def archive(arg, archive_root):
archive_name = os.path.join(archive_root, os.path.basename(arg))
if os.path.islink(arg):
pass # XXX logger.warn("zipTree: symlink %r ignored\n" % arg)
elif os.path.isdir(arg):
for r, _, ff in os.walk(arg):
zfile.write(r, archive_name)
for f in ff:
archive(os.path.join(r, f), archive_name)
elif os.path.isfile(arg):
zfile.write(arg, archive_name)
else:
pass # XXX logger.warn("zipTree: unknown %r ignored\n" % arg)
zfile = ZipFile(destination, "w", ZIP_DEFLATED)
for tree_path in tree_path_list:
if isinstance(tree_path, tuple):
archive(*tree_path)
else:
archive(tree_path, os.path.dirname(tree_path))
zfile.close()
return destination
def unzip(source, destination):
zipfile = ZipFile(source)
zipfile.extractall(destination)
zipfile.close()
......@@ -6,7 +6,7 @@ version = '1.2.5-dev'
def read(name):
return open(name).read()
long_description = (read('README.txt') + '\n' + read('CHANGES.txt'))
long_description = (read('README.rst') + '\n' + read('CHANGELOG.rst'))
install_requires = [
# -*- Extra requirements: -*-
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment