Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
304261e8
Commit
304261e8
authored
Nov 18, 2011
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Plain Diff
#13358: merge with 3.2.
parents
def4728f
15cb4892
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
4 deletions
+27
-4
Lib/html/parser.py
Lib/html/parser.py
+4
-3
Lib/test/test_htmlparser.py
Lib/test/test_htmlparser.py
+21
-1
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Lib/html/parser.py
View file @
304261e8
...
...
@@ -14,7 +14,6 @@ import re
# Regular expressions used for parsing
interesting_normal
=
re
.
compile
(
'[&<]'
)
interesting_cdata
=
re
.
compile
(
r'<(/|\
Z)
')
incomplete
=
re
.
compile
(
'&[a-zA-Z#]'
)
entityref
=
re
.
compile
(
'&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]'
)
...
...
@@ -149,8 +148,8 @@ class HTMLParser(_markupbase.ParserBase):
return self.__starttag_text
def set_cdata_mode(self, elem):
self.interesting = interesting_cdata
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</
\
s*%s
\
s*>' % self.cdata_elem, re.I)
def clear_cdata_mode(self):
self.interesting = interesting_normal
...
...
@@ -168,6 +167,8 @@ class HTMLParser(_markupbase.ParserBase):
if match:
j = match.start()
else:
if self.cdata_elem:
break
j = n
if i < j: self.handle_data(rawdata[i:j])
i = self.updatepos(i, j)
...
...
@@ -250,7 +251,7 @@ class HTMLParser(_markupbase.ParserBase):
else
:
assert
0
,
"interesting.search() lied"
# end while
if
end
and
i
<
n
:
if
end
and
i
<
n
and
not
self
.
cdata_elem
:
self
.
handle_data
(
rawdata
[
i
:
n
])
i
=
self
.
updatepos
(
i
,
n
)
self
.
rawdata
=
rawdata
[
i
:]
...
...
Lib/test/test_htmlparser.py
View file @
304261e8
...
...
@@ -301,7 +301,27 @@ DOCTYPE html [
(
"data"
,
content
),
(
"endtag"
,
element_lower
)])
def
test_cdata_with_closing_tags
(
self
):
# see issue #13358
# make sure that HTMLParser calls handle_data only once for each CDATA.
# The normal event collector normalizes the events in get_events,
# so we override it to return the original list of events.
class
Collector
(
EventCollector
):
def
get_events
(
self
):
return
self
.
events
content
=
"""<!-- not a comment --> ¬-an-entity-ref;
<a href="" /> </p><p> <span></span></style>
'</script' + '>'"""
for
element
in
[
' script'
,
'script '
,
' script '
,
'
\
n
script'
,
'script
\
n
'
,
'
\
n
script
\
n
'
]:
element_lower
=
element
.
lower
().
strip
()
s
=
'<script>{content}</{element}>'
.
format
(
element
=
element
,
content
=
content
)
self
.
_run_check
(
s
,
[(
"starttag"
,
element_lower
,
[]),
(
"data"
,
content
),
(
"endtag"
,
element_lower
)],
collector
=
Collector
())
class
HTMLParserTolerantTestCase
(
HTMLParserStrictTestCase
):
...
...
Misc/NEWS
View file @
304261e8
...
...
@@ -377,6 +377,8 @@ Core and Builtins
Library
-------
-
Issue
#
13358
:
HTMLParser
now
calls
handle_data
only
once
for
each
CDATA
.
-
Issue
#
4147
:
minidom
's toprettyxml no longer adds whitespace around a text
node when it is the only child of an element. Initial patch by Dan
Kenigsberg.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment