Commit 8f4c7667 authored by Nicolas Delaby's avatar Nicolas Delaby

Better support for flawed html in safe_html

Check that declared encoding is supported by python before using it.
parent 4269d424
......@@ -1726,6 +1726,26 @@ document.write('<sc'+'ript type="text/javascript" src="http://somosite.bg/utb.ph
expectedFailure(self.fail)(
'Even BeautifulSoup is not able to parse such HTML')
def test_safeHTML_unknown_codec(self):
"""Some html declare unknown codecs.
"""
web_page_portal_type = 'Web Page'
module = self.portal.getDefaultModule(web_page_portal_type)
web_page = module.newContent(portal_type=web_page_portal_type)
html_content = """
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=unicode" />
<title>BLa</title>
</head>
<body><p> blablabla</p></body>
</html>"""
web_page.edit(text_content=html_content)
safe_html = web_page.convert('html')[1]
self.assertTrue('unicode' not in safe_html)
self.assertTrue('utf-8' in safe_html)
def test_parallel_conversion(self):
"""Check that conversion engine is able to fill in
cache without overwrite previous conversion
......
......@@ -3,6 +3,7 @@ from zLOG import ERROR
from HTMLParser import HTMLParser, HTMLParseError
import re
from cgi import escape
import codecs
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implements
......@@ -224,7 +225,14 @@ class StrippingParser(HTMLParser):
self.default_encoding and self.default_encoding not in v:
match = charset_parser.search(v)
if match is not None:
self.original_charset = match.group('charset')
charset = match.group('charset')
try:
codecs.lookup(charset)
except LookupError:
# If a codec is not known by python, it is better
# to prevent it's usage
charset = None
self.original_charset = charset
v = charset_parser.sub(
CharsetReplacer(self.default_encoding), v)
self.result.append(' %s="%s"' % (k, escape(v, True)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment