Commit 91ec2e8a authored by Ezio Melotti's avatar Ezio Melotti

#13273: merge with 3.2.

parents 455036fd f50ffa94
...@@ -30,7 +30,7 @@ attrfind = re.compile( ...@@ -30,7 +30,7 @@ attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
attrfind_tolerant = re.compile( attrfind_tolerant = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?') r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
locatestarttagend = re.compile(r""" locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
...@@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase): ...@@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase):
assert match, 'unexpected call to parse_starttag()' assert match, 'unexpected call to parse_starttag()'
k = match.end() k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower() self.lasttag = tag = rawdata[i+1:k].lower()
while k < endpos: while k < endpos:
if self.strict: if self.strict:
m = attrfind.match(rawdata, k) m = attrfind.match(rawdata, k)
else: else:
m = attrfind_tolerant.search(rawdata, k) m = attrfind_tolerant.match(rawdata, k)
if not m: if not m:
break break
attrname, rest, attrvalue = m.group(1, 2, 3) attrname, rest, attrvalue = m.group(1, 2, 3)
......
...@@ -373,6 +373,39 @@ class HTMLParserTolerantTestCase(TestCaseBase): ...@@ -373,6 +373,39 @@ class HTMLParserTolerantTestCase(TestCaseBase):
[('action', 'bogus|&#()value')])], [('action', 'bogus|&#()value')])],
collector = self.collector) collector = self.collector)
def test_issue13273(self):
html = ('<div style="" ><b>The <a href="some_url">rain</a> '
'<br /> in <span>Spain</span></b></div>')
expected = [
('starttag', 'div', [('style', '')]),
('starttag', 'b', []),
('data', 'The '),
('starttag', 'a', [('href', 'some_url')]),
('data', 'rain'),
('endtag', 'a'),
('data', ' '),
('startendtag', 'br', []),
('data', ' in '),
('starttag', 'span', []),
('data', 'Spain'),
('endtag', 'span'),
('endtag', 'b'),
('endtag', 'div')
]
self._run_check(html, expected, collector=self.collector)
def test_issue13273_2(self):
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
expected = [
('starttag', 'div', [('style', ''), ('foo', 'bar')]),
('starttag', 'b', []),
('data', 'The '),
('starttag', 'a', [('href', 'some_url')]),
('data', 'rain'),
('endtag', 'a'),
]
self._run_check(html, expected, collector=self.collector)
def test_unescape_function(self): def test_unescape_function(self):
p = html.parser.HTMLParser() p = html.parser.HTMLParser()
self.assertEqual(p.unescape('&#bad;'),'&#bad;') self.assertEqual(p.unescape('&#bad;'),'&#bad;')
......
...@@ -341,6 +341,9 @@ Core and Builtins ...@@ -341,6 +341,9 @@ Core and Builtins
Library Library
------- -------
- Issue #13273: fix a bug that prevented HTMLParser to properly detect some
tags when strict=False.
- Issue #11183: Add finer-grained exceptions to the ssl module, so that - Issue #11183: Add finer-grained exceptions to the ssl module, so that
you don't have to inspect the exception's attributes in the common case. you don't have to inspect the exception's attributes in the common case.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment