Commit 7b66b12f authored by Victor Stinner's avatar Victor Stinner

Merged revisions 81504 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

................
  r81504 | victor.stinner | 2010-05-24 23:46:25 +0200 (lun., 24 mai 2010) | 13 lines

  Recorded merge of revisions 81500-81501 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/trunk

  ........
    r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines

    Issue #6662: Fix parsing of malformatted charref (&#bad;)
  ........
    r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines

    Add the author of the last fix (Issue #6662)
  ........
................
parent 45c5695c
...@@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase): ...@@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase):
i = self.updatepos(i, k) i = self.updatepos(i, k)
continue continue
else: else:
if ";" in rawdata[i:]: #bail by consuming &#
self.handle_data(rawdata[0:2])
i = self.updatepos(i, 2)
break break
elif startswith('&', i): elif startswith('&', i):
match = entityref.match(rawdata, i) match = entityref.match(rawdata, i)
......
...@@ -136,6 +136,13 @@ text ...@@ -136,6 +136,13 @@ text
("data", "\n"), ("data", "\n"),
]) ])
def test_malformatted_charref(self):
self._run_check("<p>&#bad;</p>", [
("starttag", "p", []),
("data", "&#bad;"),
("endtag", "p"),
])
def test_unclosed_entityref(self): def test_unclosed_entityref(self):
self._run_check("&entityref foo", [ self._run_check("&entityref foo", [
("entityref", "entityref"), ("entityref", "entityref"),
......
...@@ -846,3 +846,4 @@ Siebren van der Zee ...@@ -846,3 +846,4 @@ Siebren van der Zee
Uwe Zessin Uwe Zessin
Tarek Ziadé Tarek Ziadé
Peter Åstrand Peter Åstrand
Fredrik Håård
...@@ -54,6 +54,9 @@ C-API ...@@ -54,6 +54,9 @@ C-API
Library Library
------- -------
- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
Fredrik Håård
- Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM - Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM
twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and
StreamWriter classes. StreamWriter classes.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment