Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
dea6c21a
Commit
dea6c21a
authored
Apr 18, 2012
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Plain Diff
#14538: merge with 3.2.
parents
b0b22423
0780b6bc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
3 deletions
+16
-3
Lib/html/parser.py
Lib/html/parser.py
+3
-3
Lib/test/test_htmlparser.py
Lib/test/test_htmlparser.py
+10
-0
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/html/parser.py
View file @
dea6c21a
...
@@ -22,7 +22,7 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
...
@@ -22,7 +22,7 @@ charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen
=
re
.
compile
(
'<[a-zA-Z]'
)
starttagopen
=
re
.
compile
(
'<[a-zA-Z]'
)
piclose
=
re
.
compile
(
'>'
)
piclose
=
re
.
compile
(
'>'
)
commentclose
=
re
.
compile
(
r'--\
s*>
')
commentclose
=
re
.
compile
(
r'--\
s*>
')
tagfind = re.compile('
[
a
-
zA
-
Z
][
-
.
a
-
zA
-
Z0
-
9
:
_
]
*
')
tagfind = re.compile('
([
a
-
zA
-
Z
][
-
.
a
-
zA
-
Z0
-
9
:
_
]
*
)(
?
:
\
s
|/
(
?!
>
))
*
')
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant = re.compile('
[
a
-
zA
-
Z
][
^
\
t
\
n
\
r
\
f
/>
\
x00
]
*
')
tagfind_tolerant = re.compile('
[
a
-
zA
-
Z
][
^
\
t
\
n
\
r
\
f
/>
\
x00
]
*
')
...
@@ -36,7 +36,7 @@ attrfind = re.compile(
...
@@ -36,7 +36,7 @@ attrfind = re.compile(
r'\
s*([
a-zA-Z_][-.:a-zA-Z_0-9]*)(\
s*=
\s*'
r'\
s*([
a-zA-Z_][-.:a-zA-Z_0-9]*)(\
s*=
\s*'
r'(\'[^\']*\'|"[^"]*"|[^\
s
"\'=<>`]*))?'
)
r'(\'[^\']*\'|"[^"]*"|[^\
s
"\'=<>`]*))?'
)
attrfind_tolerant
=
re
.
compile
(
attrfind_tolerant
=
re
.
compile
(
r'
[\
s/]*
((?<=[
\'"
\
s/])[^
\
s/>][^
\
s/=>]*)(
\
s*=+
\
s*
'
r'((?<=[\'"\
s/])[^
\s/>][^\
s/=>]*)(
\s*=+\
s*
'
r'
(
\
'[^
\
'
]*
\
'
|"[^"]*"|(?![
\
'
"])[^>
\
s]*))?(?:
\
s|/(?!>))*'
)
r'
(
\
'[^
\
'
]*
\
'
|"[^"]*"|(?![
\
'
"])[^>
\
s]*))?(?:
\
s|/(?!>))*'
)
locatestarttagend
=
re
.
compile
(
r"""
locatestarttagend
=
re
.
compile
(
r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
...
@@ -327,7 +327,7 @@ class HTMLParser(_markupbase.ParserBase):
...
@@ -327,7 +327,7 @@ class HTMLParser(_markupbase.ParserBase):
match
=
tagfind
.
match
(
rawdata
,
i
+
1
)
match
=
tagfind
.
match
(
rawdata
,
i
+
1
)
assert
match
,
'unexpected call to parse_starttag()'
assert
match
,
'unexpected call to parse_starttag()'
k
=
match
.
end
()
k
=
match
.
end
()
self
.
lasttag
=
tag
=
rawdata
[
i
+
1
:
k
]
.
lower
()
self
.
lasttag
=
tag
=
match
.
group
(
1
)
.
lower
()
while
k
<
endpos
:
while
k
<
endpos
:
if
self
.
strict
:
if
self
.
strict
:
m
=
attrfind
.
match
(
rawdata
,
k
)
m
=
attrfind
.
match
(
rawdata
,
k
)
...
...
Lib/test/test_htmlparser.py
View file @
dea6c21a
...
@@ -409,6 +409,16 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
...
@@ -409,6 +409,16 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
(
'starttag'
,
'a'
,
[(
'foo'
,
None
),
(
'='
,
None
),
(
'bar'
,
None
)])
(
'starttag'
,
'a'
,
[(
'foo'
,
None
),
(
'='
,
None
),
(
'bar'
,
None
)])
]
]
self
.
_run_check
(
html
,
expected
)
self
.
_run_check
(
html
,
expected
)
#see issue #14538
html
=
(
'<meta><meta / ><meta // ><meta / / >'
'<meta/><meta /><meta //><meta//>'
)
expected
=
[
(
'starttag'
,
'meta'
,
[]),
(
'starttag'
,
'meta'
,
[]),
(
'starttag'
,
'meta'
,
[]),
(
'starttag'
,
'meta'
,
[]),
(
'startendtag'
,
'meta'
,
[]),
(
'startendtag'
,
'meta'
,
[]),
(
'startendtag'
,
'meta'
,
[]),
(
'startendtag'
,
'meta'
,
[]),
]
self
.
_run_check
(
html
,
expected
)
def
test_declaration_junk_chars
(
self
):
def
test_declaration_junk_chars
(
self
):
self
.
_run_check
(
"<!DOCTYPE foo $ >"
,
[(
'decl'
,
'DOCTYPE foo $ '
)])
self
.
_run_check
(
"<!DOCTYPE foo $ >"
,
[(
'decl'
,
'DOCTYPE foo $ '
)])
...
...
Misc/NEWS
View file @
dea6c21a
...
@@ -58,6 +58,9 @@ Library
...
@@ -58,6 +58,9 @@ Library
- Issue #14087: multiprocessing: add Condition.wait_for(). Patch by sbt.
- Issue #14087: multiprocessing: add Condition.wait_for(). Patch by sbt.
- Issue #14538: HTMLParser can now parse correctly start tags that contain
a bare '
/
'.
- Issue #14452: SysLogHandler no longer inserts a UTF-8 BOM into the message.
- Issue #14452: SysLogHandler no longer inserts a UTF-8 BOM into the message.
- Issue #14386: Expose the dict_proxy internal type as types.MappingProxyType.
- Issue #14386: Expose the dict_proxy internal type as types.MappingProxyType.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment