Commit 8f4c7667 authored by Nicolas Delaby's avatar Nicolas Delaby

Better support for flawed html in safe_html

Check that declared encoding is supported by python before using it.
parent 4269d424
......@@ -1726,6 +1726,26 @@ document.write('<sc'+'ript type="text/javascript" src="
'Even BeautifulSoup is not able to parse such HTML')
def test_safeHTML_unknown_codec(self):
"""Some html declare unknown codecs.
web_page_portal_type = 'Web Page'
module = self.portal.getDefaultModule(web_page_portal_type)
web_page = module.newContent(portal_type=web_page_portal_type)
html_content = """
<meta http-equiv="Content-Type" content="text/html; charset=unicode" />
<body><p> blablabla</p></body>
safe_html = web_page.convert('html')[1]
self.assertTrue('unicode' not in safe_html)
self.assertTrue('utf-8' in safe_html)
def test_parallel_conversion(self):
"""Check that conversion engine is able to fill in
cache without overwrite previous conversion
......@@ -3,6 +3,7 @@ from zLOG import ERROR
from HTMLParser import HTMLParser, HTMLParseError
import re
from cgi import escape
import codecs
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
......@@ -224,7 +225,14 @@ class StrippingParser(HTMLParser):
self.default_encoding and self.default_encoding not in v:
match =
if match is not None:
self.original_charset ='charset')
charset ='charset')
except LookupError:
# If a codec is not known by python, it is better
# to prevent it's usage
charset = None
self.original_charset = charset
v = charset_parser.sub(
CharsetReplacer(self.default_encoding), v)
self.result.append(' %s="%s"' % (k, escape(v, True)))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment