Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
176630ec
Commit
176630ec
authored
Feb 10, 2012
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Plain Diff
#13960: merge with 3.2.
parents
9f90a731
fa3702dc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
58 additions
and
2 deletions
+58
-2
Lib/html/parser.py
Lib/html/parser.py
+24
-1
Lib/test/test_htmlparser.py
Lib/test/test_htmlparser.py
+30
-0
Misc/NEWS
Misc/NEWS
+4
-1
No files found.
Lib/html/parser.py
View file @
176630ec
...
...
@@ -184,7 +184,17 @@ class HTMLParser(_markupbase.ParserBase):
elif startswith("
<
?
", i):
k = self.parse_pi(i)
elif startswith("
<
!
", i):
k = self.parse_declaration(i)
# this might fail with things like <! not a comment > or
# <! -- space before '--' -->. When strict is True an
# error is raised, when it's False they will be considered
# as bogus comments and parsed (see parse_bogus_comment).
if self.strict:
k = self.parse_declaration(i)
else:
try:
k = self.parse_declaration(i)
except HTMLParseError:
k = self.parse_bogus_comment(i)
elif (i + 1) < n:
self.handle_data("
<
")
k = i + 1
...
...
@@ -256,6 +266,19 @@ class HTMLParser(_markupbase.ParserBase):
i
=
self
.
updatepos
(
i
,
n
)
self
.
rawdata
=
rawdata
[
i
:]
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def
parse_bogus_comment
(
self
,
i
,
report
=
1
):
rawdata
=
self
.
rawdata
if
rawdata
[
i
:
i
+
2
]
!=
'<!'
:
self
.
error
(
'unexpected call to parse_comment()'
)
pos
=
rawdata
.
find
(
'>'
,
i
+
2
)
if
pos
==
-
1
:
return
-
1
if
report
:
self
.
handle_comment
(
rawdata
[
i
+
2
:
pos
])
return
pos
+
1
# Internal -- parse processing instr, return end or -1 if not terminated
def
parse_pi
(
self
,
i
):
rawdata
=
self
.
rawdata
...
...
Lib/test/test_htmlparser.py
View file @
176630ec
...
...
@@ -323,6 +323,23 @@ DOCTYPE html [
(
"endtag"
,
element_lower
)],
collector
=
Collector
())
def
test_comments
(
self
):
html
=
(
"<!-- I'm a valid comment -->"
'<!--me too!-->'
'<!------>'
'<!---->'
'<!----I have many hyphens---->'
'<!-- I have a > in the middle -->'
'<!-- and I have -- in the middle! -->'
)
expected
=
[(
'comment'
,
" I'm a valid comment "
),
(
'comment'
,
'me too!'
),
(
'comment'
,
'--'
),
(
'comment'
,
''
),
(
'comment'
,
'--I have many hyphens--'
),
(
'comment'
,
' I have a > in the middle '
),
(
'comment'
,
' and I have -- in the middle! '
)]
self
.
_run_check
(
html
,
expected
)
def
test_condcoms
(
self
):
html
=
(
'<!--[if IE & !(lte IE 8)]>aren
\
'
t<![endif]-->'
'<!--[if IE 8]>condcoms<![endif]-->'
...
...
@@ -426,6 +443,19 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
# see #12888
self
.
assertEqual
(
p
.
unescape
(
'{ '
*
1050
),
'{ '
*
1050
)
def
test_broken_comments
(
self
):
html
=
(
'<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!!! another bogus comment !!!>'
)
expected
=
[
(
'comment'
,
' not really a comment '
),
(
'comment'
,
' not a comment either --'
),
(
'comment'
,
' -- close enough --'
),
(
'comment'
,
'!! another bogus comment !!!'
),
]
self
.
_run_check
(
html
,
expected
)
def
test_broken_condcoms
(
self
):
# these condcoms are missing the '--' after '<!' and before the '>'
html
=
(
'<![if !(IE)]>broken condcom<![endif]>'
...
...
Misc/NEWS
View file @
176630ec
...
...
@@ -466,6 +466,9 @@ Core and Builtins
Library
-------
-
Issue
#
13960
:
HTMLParser
is
now
able
to
handle
broken
comments
when
strict
=
False
.
-
Issue
#
13921
:
Undocument
and
clean
up
sqlite3
.
OptimizedUnicode
,
which
is
obsolete
in
Python
3.
x
.
It
's now aliased to str for
backwards compatibility.
...
...
@@ -498,7 +501,7 @@ Library
- Issue #10881: Fix test_site failure with OS X framework builds.
- Issue #964437 Make IDLE help window non-modal.
- Issue #964437
:
Make IDLE help window non-modal.
Patch by Guilherme Polo and Roger Serwy.
- Issue #13734: Add os.fwalk(), a directory walking function yielding file
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment