Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
91ec2e8a
Commit
91ec2e8a
authored
Oct 28, 2011
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Plain Diff
#13273: merge with 3.2.
parents
455036fd
f50ffa94
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
3 deletions
+38
-3
Lib/html/parser.py
Lib/html/parser.py
+2
-3
Lib/test/test_htmlparser.py
Lib/test/test_htmlparser.py
+33
-0
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/html/parser.py
View file @
91ec2e8a
...
...
@@ -30,7 +30,7 @@ attrfind = re.compile(
r'
\
s
*
([
a
-
zA
-
Z_
][
-
.:
a
-
zA
-
Z_0
-
9
]
*
)(
\
s
*=
\
s
*
'
r'
(
\
'[^
\
'
]*
\
'
|"[^"]*"|[^
\
s
"
\
'
=<>`]*))?'
)
attrfind_tolerant
=
re
.
compile
(
r'\
s*([
a-zA-Z_][-.:a-zA-Z_0-9]*)(\
s*=
\s*'
r'
,?
\
s*([
a-zA-Z_][-.:a-zA-Z_0-9]*)(\
s*=
\s*'
r'(\'[^\']*\'|"[^"]*"|[^>\
s]*))?
')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
...
...
@@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase):
assert
match
,
'unexpected call to parse_starttag()'
k
=
match
.
end
()
self
.
lasttag
=
tag
=
rawdata
[
i
+
1
:
k
].
lower
()
while
k
<
endpos
:
if
self
.
strict
:
m
=
attrfind
.
match
(
rawdata
,
k
)
else
:
m
=
attrfind_tolerant
.
sear
ch
(
rawdata
,
k
)
m
=
attrfind_tolerant
.
mat
ch
(
rawdata
,
k
)
if
not
m
:
break
attrname
,
rest
,
attrvalue
=
m
.
group
(
1
,
2
,
3
)
...
...
Lib/test/test_htmlparser.py
View file @
91ec2e8a
...
...
@@ -373,6 +373,39 @@ class HTMLParserTolerantTestCase(TestCaseBase):
[(
'action'
,
'bogus|&#()value'
)])],
collector
=
self
.
collector
)
def
test_issue13273
(
self
):
html
=
(
'<div style="" ><b>The <a href="some_url">rain</a> '
'<br /> in <span>Spain</span></b></div>'
)
expected
=
[
(
'starttag'
,
'div'
,
[(
'style'
,
''
)]),
(
'starttag'
,
'b'
,
[]),
(
'data'
,
'The '
),
(
'starttag'
,
'a'
,
[(
'href'
,
'some_url'
)]),
(
'data'
,
'rain'
),
(
'endtag'
,
'a'
),
(
'data'
,
' '
),
(
'startendtag'
,
'br'
,
[]),
(
'data'
,
' in '
),
(
'starttag'
,
'span'
,
[]),
(
'data'
,
'Spain'
),
(
'endtag'
,
'span'
),
(
'endtag'
,
'b'
),
(
'endtag'
,
'div'
)
]
self
.
_run_check
(
html
,
expected
,
collector
=
self
.
collector
)
def
test_issue13273_2
(
self
):
html
=
'<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
expected
=
[
(
'starttag'
,
'div'
,
[(
'style'
,
''
),
(
'foo'
,
'bar'
)]),
(
'starttag'
,
'b'
,
[]),
(
'data'
,
'The '
),
(
'starttag'
,
'a'
,
[(
'href'
,
'some_url'
)]),
(
'data'
,
'rain'
),
(
'endtag'
,
'a'
),
]
self
.
_run_check
(
html
,
expected
,
collector
=
self
.
collector
)
def
test_unescape_function
(
self
):
p
=
html
.
parser
.
HTMLParser
()
self
.
assertEqual
(
p
.
unescape
(
'&#bad;'
),
'&#bad;'
)
...
...
Misc/NEWS
View file @
91ec2e8a
...
...
@@ -341,6 +341,9 @@ Core and Builtins
Library
-------
-
Issue
#
13273
:
fix
a
bug
that
prevented
HTMLParser
to
properly
detect
some
tags
when
strict
=
False
.
-
Issue
#
11183
:
Add
finer
-
grained
exceptions
to
the
ssl
module
,
so
that
you
don
't have to inspect the exception'
s
attributes
in
the
common
case
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment