Commit b255c894 authored by Julien Muchembled's avatar Julien Muchembled

PortalTransforms: merge upstream 2.0

This fixes test_20_reStructuredText partially.

Conflicts:
	Products/PortalTransforms/TransformEngine.py
	Products/PortalTransforms/libtransforms/commandtransform.py
	Products/PortalTransforms/transforms/safe_html.py
	Products/PortalTransforms/utils.py

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@41726 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 4adafd42
This diff is collapsed.
......@@ -3,10 +3,16 @@
from time import time
from Acquisition import aq_base
_marker = object()
class Cache:
def __init__(self, context, _id='_v_transform_cache'):
self.context = context
def __init__(self, obj, context=None, _id='_v_transform_cache'):
self.obj = obj
if context is None:
self.context = obj
else:
self.context = context
self._id =_id
def _genCacheKey(self, identifier, *args):
......@@ -17,17 +23,19 @@ class Cache:
key = key.replace('+', '_')
key = key.replace('-', '_')
key = key.replace(' ', '_')
if hasattr(aq_base(self.context), 'absolute_url'):
return key, self.context.absolute_url()
return key
def setCache(self, key, value):
"""cache a value indexed by key"""
if not value.isCacheable():
return
context = self.context
obj = self.obj
key = self._genCacheKey(key)
if getattr(aq_base(context), self._id, None) is None:
setattr(context, self._id, {})
getattr(context, self._id)[key] = (time(), value)
if getattr(aq_base(obj), self._id, None) is None:
setattr(obj, self._id, {})
getattr(obj, self._id)[key] = (time(), value)
return key
def getCache(self, key):
......@@ -36,9 +44,9 @@ class Cache:
return None if not present
else return a tuple (time spent in cache, value)
"""
context = self.context
obj = self.obj
key = self._genCacheKey(key)
dict = getattr(context, self._id, None)
dict = getattr(obj, self._id, None)
if dict is None :
return None
try:
......@@ -46,18 +54,18 @@ class Cache:
return time() - orig_time, value
except TypeError:
return None
def purgeCache(self, key=None):
"""Remove cache
"""
context = self.context
obj = self.obj
id = self._id
if not shasattr(context, id):
if getattr(obj, id, _marker) is _marker:
return
if key is None:
delattr(context, id)
delattr(obj, id)
else:
cache = getattr(context, id)
cache = getattr(obj, id)
key = self._genCacheKey(key)
if cache.has_key(key):
del cache[key]
......@@ -87,28 +87,30 @@ class popentransform:
def convert(self, data, cache, **kwargs):
command = "%s %s" % (self.binary, self.binaryArgs)
if not self.useStdin:
tmpfile, tmpname = tempfile.mkstemp(text=False) # create tmp
os.write(tmpfile, data) # write data to tmp using a file descriptor
os.close(tmpfile) # close it so the other process can read it
command = command % { 'infile' : tmpname } # apply tmp name to command
cin, couterr = os.popen4(command, 'b')
if self.useStdin:
cin.write(str(data))
status = cin.close()
out = self.getData(couterr)
couterr.close()
if not self.useStdin:
# remove tmp file
os.unlink(tmpname)
cache.setData(out)
return cache
tmpname = None
try:
if not self.useStdin:
tmpfile, tmpname = tempfile.mkstemp(text=False) # create tmp
os.write(tmpfile, data) # write data to tmp using a file descriptor
os.close(tmpfile) # close it so the other process can read it
command = command % { 'infile' : tmpname } # apply tmp name to command
cin, couterr = os.popen4(command, 'b')
if self.useStdin:
cin.write(str(data))
status = cin.close()
out = self.getData(couterr)
couterr.close()
cache.setData(out)
return cache
finally:
if not self.useStdin and tmpname is not None:
# remove tmp file
os.unlink(tmpname)
from subprocess import Popen, PIPE
import shlex
......
import re
import os
import sys
from sgmllib import SGMLParser
from sgmllib import SGMLParser, SGMLParseError
try:
# Need to be imported before win32api to avoid dll loading
......@@ -207,7 +207,26 @@ class StrippingParser( SGMLParser ):
self.result = "%s</%s>" % (self.result, tag)
remTag = '</%s>' % tag
def parse_declaration(self, i):
"""Fix handling of CDATA sections. Code borrowed from BeautifulSoup.
"""
j = None
if self.rawdata[i:i+9] == '<![CDATA[':
k = self.rawdata.find(']]>', i)
if k == -1:
k = len(self.rawdata)
data = self.rawdata[i+9:k]
j = k+3
self.result.append("<![CDATA[%s]]>" % data)
else:
try:
j = SGMLParser.parse_declaration(self, i)
except SGMLParseError:
toHandle = self.rawdata[i:]
self.result.append(toHandle)
j = i + len(toHandle)
return j
def scrubHTML( html ):
""" Strip illegal HTML tags from string text. """
......
## Testing Markdown
`code` and _italic_ and *bold* and even a [link](http://plone.org).
Fööbär
......@@ -15,6 +15,10 @@
</tr>
</table>
<p>This is a text used as a blind text.</p>
<div><![CDATA[
Some CDATA text.
]]>
</div>
<ul>
<li>A sample list item1</li>
<li>A sample list item2</li>
......
......@@ -3,4 +3,5 @@
<h2> Testing Markdown </h2>
<p> <code>code</code> and <em>italic</em> and <em>bold</em> and even a <a href="http://plone.org">link</a>.
</p>
<p>Fööbär</p>
<h2 class="title">Heading 1</h2>
<p>Some text.</p>
<div class="section">
<h3><a id="heading-2" name="heading-2">Heading 2</a></h3>
<p>Some text, bla ble bli blo blu. Yes, i know this is <a class="reference" href="http://www.example.com">Stupid</a>.</p>
<div class="section" id="heading-2">
<h3>Heading 2</h3>
<p>Some text, bla ble bli blo blu. Yes, i know this is<a class="reference external" href="http://www.example.com">Stupid</a>.</p>
</div>
<h2 class="title">Title</h2>
<h3 class="subtitle">Subtitle</h3>
<p>This is a test document to make sure subtitle gets the right heading.</p>
<div class="section">
<h3><a id="now-the-real-heading" name="now-the-real-heading">Now the real heading</a></h3>
<div class="section" id="now-the-real-heading">
<h3>Now the real heading</h3>
<p>The brown fox jumped over the lazy dog.</p>
<div class="section">
<h4><a id="with-a-subheading" name="with-a-subheading">With a subheading</a></h4>
<p>Some text, bla ble bli blo blu. Yes, i know this is <a class="reference" href="http://www.example.com">Stupid</a>.</p>
<div class="section" id="with-a-subheading">
<h4>With a subheading</h4>
<p>Some text, bla ble bli blo blu. Yes, i know this is<a class="reference external" href="http://www.example.com">Stupid</a>.</p>
</div>
</div>
......@@ -6,6 +6,10 @@
</tr>
</table>
<p>This is a text used as a blind text.</p>
<div><![CDATA[
Some CDATA text.
]]>
</div>
<ul>
<li>A sample list item1</li>
<li>A sample list item2</li>
......
......@@ -67,6 +67,15 @@ class DummyHtmlFilter2(BaseTransform):
data.setData("<div class='dummy'>%s</div>" % orig)
return data
class QuxToVHost(DummyHtmlFilter1):
__name__ = 'qux_to_vhost'
def convert(self, orig, data, context, **kwargs):
data.setData(re.sub('qux', context.REQUEST['SERVER_URL'], orig))
return data
class TransformNoIO(BaseTransform):
implements(ITransform)
......@@ -223,6 +232,52 @@ class TestEngine(ATSiteTestCase):
out = self.engine.convertTo(mt, other_data, mimetype=mt, object=self)
self.failUnlessEqual(out.getData(), other_data, out.getData())
def testCacheWithVHost(self):
"""Ensure that the transform cache key includes virtual
hosting so that transforms which are dependent on the virtual
hosting don't get invalid data from the cache. This happens,
for example, in the resolve UID functionality used by visual
editors."""
mt = 'text/x-html-safe'
self.engine.registerTransform(QuxToVHost())
required = ['qux_to_vhost']
self.engine.manage_addPolicy(mt, required)
data = '<a href="qux">vhost link</a>'
out = self.engine.convertTo(
mt, data, mimetype='text/html', object=self.folder,
context=self.folder)
self.failUnlessEqual(
out.getData(), '<a href="http://nohost">vhost link</a>',
out.getData())
# Test when object is not a context
out = self.engine.convertTo(
mt, data, mimetype='text/html', object=self,
context=self.folder)
self.failUnlessEqual(
out.getData(), '<a href="http://nohost">vhost link</a>',
out.getData())
# Change the virtual hosting
self.folder.REQUEST['SERVER_URL'] = 'http://otherhost'
out = self.engine.convertTo(
mt, data, mimetype='text/html', object=self.folder,
context=self.folder)
self.failUnlessEqual(
out.getData(), '<a href="http://otherhost">vhost link</a>',
out.getData())
# Test when object is not a context
out = self.engine.convertTo(
mt, data, mimetype='text/html', object=self,
context=self.folder)
self.failUnlessEqual(
out.getData(), '<a href="http://otherhost">vhost link</a>',
out.getData())
def test_suite():
from unittest import TestSuite, makeSuite
......
......@@ -16,6 +16,87 @@ class TestGraph(ATSiteTestCase):
out = self.engine.convertTo('text/plain', data, filename=FILE_PATH)
self.failUnless(out.getData())
def testFindPath(self):
originalMap = self.engine._mtmap
"""
The dummy map used for this test corresponds to a graph
depicted in ASCII art below :
+---+
| |
| v
+-->1<-->2-->4-->6<--7
^ ^ |
| | |
v | |
3<---+ |
^ |
| |
v |
5<-------+
"""
# we need a DummyTransform class
class DT:
def __init__(self, name):
self._name = name
def name(self):
return self._name
dummyMap1 = {
'1': { '1': [DT('transform1-1')],
'2': [DT('transform1-2')],
'3': [DT('transform1-3')]},
'2': { '1': [DT('transform2-1')],
'3': [DT('transform2-3')],
'4': [DT('transform2-4')]},
'3': { '1': [DT('transform3-1')],
'2': [DT('transform3-2')],
'5': [DT('transform3-5')]},
'4': { '5': [DT('transform4-5')],
'6': [DT('transform4-6')]},
'5': { '3': [DT('transform5-3')]},
'7': { '6': [DT('transform7-6')]}
}
expectedPathes = {
'1-1': [],
'1-2': ['transform1-2'],
'1-3': ['transform1-3'],
'1-4': ['transform1-2', 'transform2-4'],
'1-5': ['transform1-3', 'transform3-5'],
'1-6': ['transform1-2', 'transform2-4', 'transform4-6'],
'1-7': None,
'2-1': ['transform2-1'],
'2-2': [],
'2-4': ['transform2-4'],
'4-2': ['transform4-5', 'transform5-3', 'transform3-2'],
'5-3': ['transform5-3']
}
self.engine._mtmap = dummyMap1
for orig in ['1','2','3','4','5','6','7']:
for target in ['1','2','3','4','5','6','7']:
# build the name of the path
pathName = orig + '-' + target
# do we have any expectation for this path ?
if pathName in expectedPathes.keys():
# we do. Here is the expected shortest path
expectedPath = expectedPathes[pathName]
# what's the shortest path according to the engine ?
gotPath = self.engine._findPath(orig,target)
# just keep the name of the transforms, please
if gotPath is not None:
gotPath = [transform.name() for transform in gotPath]
# this must be the same as in our expectation
self.assertEquals(expectedPath, gotPath)
self.engine._mtmap = originalMap
def testFindPathWithEmptyTransform(self):
""" _findPath should not throw "index out of range" when dealing with
empty transforms list
"""
dummyMap = {'1': {'2': []}}
self.engine._mtmap = dummyMap
self.engine._findPath('1','2')
def testIdentity(self):
orig = 'Some text'
converted = self.engine.convertTo(
......
import os
import logging
from Testing import ZopeTestCase
from Products.Archetypes.tests.atsitetestcase import ATSiteTestCase
from Products.CMFCore.utils import getToolByName
from utils import input_file_path, output_file_path, normalize_html,\
load, matching_inputs
from Products.PortalTransforms.data import datastream
from Products.PortalTransforms.interfaces import IDataStream
from Products.PortalTransforms.interfaces import idatastream
from Products.MimetypesRegistry.MimeTypesTool import MimeTypesTool
from Products.PortalTransforms.TransformEngine import TransformTool
from Products.PortalTransforms.libtransforms.utils import MissingBinary
from Products.PortalTransforms.transforms.image_to_gif import image_to_gif
......@@ -24,7 +21,6 @@ from Products.PortalTransforms.transforms.textile_to_html import HAS_TEXTILE
from Products.PortalTransforms.transforms.markdown_to_html import HAS_MARKDOWN
from os.path import exists
import sys
# we have to set locale because lynx output is locale sensitive !
os.environ['LC_ALL'] = 'C'
logger = logging.getLogger('PortalTransforms')
......@@ -59,9 +55,11 @@ class TransformTest(ATSiteTestCase):
got = self.normalize(got)
output.close()
self.assertEquals(got, expected,
got_start = got.strip()[:30]
expected_start = expected.strip()[:30]
self.assertEquals(got_start, expected_start,
'[%s]\n\n!=\n\n[%s]\n\nIN %s(%s)' % (
got, expected, self.transform.name(), self.input))
got_start, expected_start, self.transform.name(), self.input))
self.assertEquals(self.subobjects, len(res_data.getSubObjects()),
'%s\n\n!=\n\n%s\n\nIN %s(%s)' % (
self.subobjects, len(res_data.getSubObjects()),
......@@ -70,13 +68,13 @@ class TransformTest(ATSiteTestCase):
def testSame(self):
try:
self.do_convert(filename=self.input)
except MissingBinary, e:
except MissingBinary:
pass
def testSameNoFilename(self):
try:
self.do_convert()
except MissingBinary, e:
except MissingBinary:
pass
def __repr__(self):
......@@ -86,12 +84,13 @@ class PILTransformsTest(ATSiteTestCase):
def afterSetUp(self):
ATSiteTestCase.afterSetUp(self)
self.pt = self.portal.portal_transforms
self.mimetypes_registry = getToolByName(self.portal, 'mimetypes_registry')
def test_image_to_bmp(self):
self.pt.registerTransform(image_to_bmp())
imgFile = open(input_file_path('logo.jpg'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/jpeg')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/jpeg')
data = self.pt.convertTo(target_mimetype='image/x-ms-bmp',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/x-ms-bmp')
......@@ -99,7 +98,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_gif())
imgFile = open(input_file_path('logo.png'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/png')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/png')
data = self.pt.convertTo(target_mimetype='image/gif',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/gif')
......@@ -107,7 +106,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_jpeg())
imgFile = open(input_file_path('logo.gif'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/gif')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/gif')
data = self.pt.convertTo(target_mimetype='image/jpeg',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/jpeg')
......@@ -115,7 +114,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_png())
imgFile = open(input_file_path('logo.jpg'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/jpeg')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/jpeg')
data = self.pt.convertTo(target_mimetype='image/png',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/png')
......@@ -123,7 +122,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_pcx())
imgFile = open(input_file_path('logo.gif'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/gif')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/gif')
data = self.pt.convertTo(target_mimetype='image/pcx',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/pcx')
......@@ -131,7 +130,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_ppm())
imgFile = open(input_file_path('logo.png'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/png')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/png')
data = self.pt.convertTo(target_mimetype='image/x-portable-pixmap',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/x-portable-pixmap')
......@@ -139,7 +138,7 @@ class PILTransformsTest(ATSiteTestCase):
self.pt.registerTransform(image_to_tiff())
imgFile = open(input_file_path('logo.jpg'), 'rb')
data = imgFile.read()
self.failUnlessEqual(self.portal.mimetypes_registry.classify(data),'image/jpeg')
self.failUnlessEqual(self.mimetypes_registry.classify(data),'image/jpeg')
data = self.pt.convertTo(target_mimetype='image/tiff',orig=data)
self.failUnlessEqual(data.getMetadata()['mimetype'], 'image/tiff')
......
......@@ -5,6 +5,7 @@ from sys import modules
from os.path import join, abspath, dirname, basename
def normalize_html(s):
s = re.sub(r"&nbsp;", " ", s)
s = re.sub(r"\s+", " ", s)
s = re.sub(r"(?s)\s+<", "<", s)
s = re.sub(r"(?s)>\s+", ">", s)
......
"""
Uses the http://www.freewisdom.org/projects/python-markdown/ module to do its handy work
author: Tom Lazar <tom@tomster.org> at the archipelago sprint 2006
Uses the http://www.freewisdom.org/projects/python-markdown/ module
Author: Tom Lazar <tom@tomster.org> at the archipelago sprint 2006
"""
import os
from zope.interface import implements
from Products.CMFDefault.utils import bodyfinder
from Products.PortalTransforms.interfaces import ITransform
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.libtransforms.utils import bin_search
from Products.PortalTransforms.libtransforms.utils import sansext
from Products.PortalTransforms.utils import log
try:
......@@ -23,7 +16,7 @@ except ImportError:
log('markdown_to_html: Could not import python-markdown.')
else:
HAS_MARKDOWN = True
class markdown:
implements(ITransform)
......@@ -37,11 +30,16 @@ class markdown:
def convert(self, orig, data, **kwargs):
if HAS_MARKDOWN:
html = markdown_transformer.markdown(orig)
# markdown expects unicode input:
orig = unicode(orig.decode('utf-8'))
# PortalTransforms, however expects a string as result,
# so we encode the unicode result back to UTF8:
html = markdown_transformer.markdown(orig).encode('utf-8')
else:
html = orig
data.setData(html)
return data
def register():
return markdown()
import re, tempfile
import os, os.path
from Products.PortalTransforms.libtransforms.utils import bin_search, \
sansext, bodyfinder, scrubHTML
import os
from Products.PortalTransforms.libtransforms.utils import bodyfinder, scrubHTML
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
class document(commandtransform):
......
......@@ -31,7 +31,7 @@ VALID_TAGS['ins'] = 1
VALID_TAGS['del'] = 1
VALID_TAGS['q'] = 1
VALID_TAGS['map'] = 1
VALID_TAGS['area'] = 1
VALID_TAGS['area'] = 0
VALID_TAGS['abbr'] = 1
VALID_TAGS['acronym'] = 1
VALID_TAGS['var'] = 1
......@@ -71,6 +71,10 @@ VALID_TAGS['source'] = 1
VALID_TAGS['time'] = 1
VALID_TAGS['video'] = 1
# add some tags to nasty. These should also probably be backported to CMFDefault.
NASTY_TAGS['style'] = 1 # this helps improve Word HTML cleanup.
NASTY_TAGS['meta'] = 1 # allowed by parsers, but can cause unexpected behavior
msg_pat = """
<div class="system-message">
......@@ -203,7 +207,7 @@ class StrippingParser(HTMLParser):
if not self.raise_error: continue
else: raise IllegalHTML, 'Script event "%s" not allowed.' % k
elif v is None:
self.result.append(' %s' % (k,))
self.result.append(' %s' % k)
elif remove_script and hasScript(v):
if not self.raise_error: continue
else: raise IllegalHTML, 'Script URI "%s" not allowed.' % v
......@@ -238,6 +242,26 @@ class StrippingParser(HTMLParser):
self.result.append('</%s>' % tag)
#remTag = '</%s>' % tag
def parse_declaration(self, i):
"""Fix handling of CDATA sections. Code borrowed from BeautifulSoup.
"""
j = None
if self.rawdata[i:i+9] == '<![CDATA[':
k = self.rawdata.find(']]>', i)
if k == -1:
k = len(self.rawdata)
data = self.rawdata[i+9:k]
j = k+3
self.result.append("<![CDATA[%s]]>" % data)
else:
try:
j = HTMLParser.parse_declaration(self, i)
except HTMLParseError:
toHandle = self.rawdata[i:]
self.result.append(toHandle)
j = i + len(toHandle)
return j
def getResult(self):
return ''.join(self.result)
......@@ -262,13 +286,13 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS,
class SafeHTML:
"""Simple transform which uses CMFDefault functions to
clean potentially bad tags.
clean potentially bad tags.
Tags must explicit be allowed in valid_tags to pass. Only
the tags themself are removed, not their contents. If tags
are removed and in nasty_tags, they are removed with
all of their contents.
all of their contents.
Objects will not be transformed again with changed settings.
You need to clear the cache by e.g.
1.) restarting your zope or
......@@ -291,6 +315,10 @@ class SafeHTML:
'output': self.output,
'valid_tags': VALID_TAGS,
'nasty_tags': NASTY_TAGS,
'stripped_attributes': ['lang','valign','halign','border','frame','rules','cellspacing','cellpadding','bgcolor'],
'stripped_combinations': {'table th td': 'width height'},
'style_whitelist': ['text-align', 'list-style-type', 'float'],
'class_blacklist': [],
'remove_javascript': 1,
'disable_transform': 0,
'default_encoding': 'utf-8',
......@@ -310,6 +338,19 @@ class SafeHTML:
'everything they contain (like applet, object). ' +
'They are only deleted if they are not marked as valid_tags.',
('tag', 'value')),
'stripped_attributes': ('list',
'stripped_attributes',
'These attributes are stripped from any tag.'),
'stripped_combinations' : ('dict',
'stripped_combinations',
'These attributes are stripped from any tag.',
('tag', 'value')),
'style_whitelist': ('list',
'style_whitelist',
'These CSS styles are allowed in style attributes.'),
'class_blacklist': ('list',
'class_blacklist',
'These class names are not allowed in class attributes.'),
'remove_javascript' : ("int",
'remove_javascript',
'1 to remove javascript attributes that begin with on (e.g. onClick) ' +
......@@ -355,7 +396,9 @@ class SafeHTML:
repaired = 0
while True:
try:
orig = scrubHTML(
# Do 2 passes. This provides more reliable filtering of certain
# malicious HTML (cf upstream commit svn10522).
for repeat in range(2): orig = scrubHTML(
orig,
valid=self.config.get('valid_tags', {}),
nasty=self.config.get('nasty_tags', {}),
......@@ -366,6 +409,8 @@ class SafeHTML:
data.setData(msg_pat % ("Error", str(inst)))
break
except HTMLParseError:
if repeat:
raise # try to repair only on first pass
# ouch !
# HTMLParser is not able to parse very dirty HTML string
if not repaired:
......
......@@ -45,20 +45,23 @@ class word_to_html:
def convert(self, data, cache, **kwargs):
orig_file = 'unknown.doc'
doc = None
try:
doc = document(orig_file, data)
doc.convert()
html = doc.html()
doc = document(orig_file, data)
doc.convert()
html = doc.html()
path, images = doc.subObjects(doc.tmpdir)
objects = {}
if images:
doc.fixImages(path, images, objects)
path, images = doc.subObjects(doc.tmpdir)
objects = {}
if images:
doc.fixImages(path, images, objects)
doc.cleanDir(doc.tmpdir)
cache.setData(html)
cache.setSubObjects(objects)
return cache
cache.setData(html)
cache.setSubObjects(objects)
return cache
finally:
if doc is not None:
doc.cleanDir(doc.tmpdir)
def register():
return word_to_html()
......@@ -6,37 +6,37 @@ from Products.PortalTransforms.libtransforms.utils import bin_search, MissingBin
COMMAND_CONFIGS = (
('lynx_dump', '.html',
{'binary_path' : 'lynx',
'command_line' : '-dump %s',
'command_line' : '-dump %(input)s',
'inputs' : ('text/html',),
'output' : 'text/plain',
}),
('tidy_html', '.html',
{'binary_path' : 'tidy',
'command_line' : '%s',
'command_line' : '%(input)s',
'inputs' : ('text/html',),
'output' : 'text/html',
}),
('rtf_to_html', None,
{'binary_path' : 'unrtf',
'command_line' : '%s',
'command_line' : '%(input)s',
'inputs' : ('application/rtf',),
'output' : 'text/html',
}),
('ppt_to_html', None,
{'binary_path' : 'ppthtml',
'command_line' : '%s',
'command_line' : '%(input)s',
'inputs' : ('application/vnd.ms-powerpoint',),
'output' : 'text/html',
}),
('excel_to_html', None,
{'binary_path' : 'xlhtml',
'command_line' : '-nh -a %s',
'command_line' : '-nh -a %(input)s',
'inputs' : ('application/vnd.ms-excel',),
'output' : 'text/html',
}),
('ps_to_text', None,
{'binary_path' : 'ps2ascii',
'command_line' : '%s',
'command_line' : '%(input)s',
'inputs' : ('application/postscript',),
'output' : 'text/plain',
}),
......
......@@ -8,10 +8,10 @@ class TransformException(Exception):
FB_REGISTRY = None
# logging function
from zLOG import LOG, INFO
from zLOG import LOG, DEBUG
#logger = logging.getLogger('PortalTransforms')
def log(message, severity=INFO):
def log(message, severity=DEBUG):
LOG('PortalTransforms', severity, message)
#logger.log(severity, message)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment