Commit 554a3b82 authored by Victor Stinner's avatar Victor Stinner

Issue #6662: Fix parsing of malformatted charref (&#bad;)

parent f0757a29
......@@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase):
i = self.updatepos(i, k)
continue
else:
if ";" in rawdata[i:]: #bail by consuming &#
self.handle_data(rawdata[0:2])
i = self.updatepos(i, 2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
......
......@@ -313,6 +313,13 @@ DOCTYPE html [
("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])
])
def test_malformatted_charref(self):
self._run_check("<p>&#bad;</p>", [
("starttag", "p", []),
("data", "&#bad;"),
("endtag", "p"),
])
def test_main():
test_support.run_unittest(HTMLParserTestCase)
......
......@@ -29,6 +29,8 @@ C-API
Library
-------
- Issue #6662: Fix parsing of malformatted charref (&#bad;)
- Issue #8016: Add the CP858 codec.
- Issue #3924: Ignore cookies with invalid "version" field in cookielib.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment