Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
6537be7f
Commit
6537be7f
authored
Apr 07, 2011
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Plain Diff
#7311: merge with 3.2.
parents
cece8cfe
2e3607c1
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
1 deletion
+20
-1
Lib/html/parser.py
Lib/html/parser.py
+1
-1
Lib/test/test_htmlparser.py
Lib/test/test_htmlparser.py
+17
-0
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Lib/html/parser.py
View file @
6537be7f
...
...
@@ -28,7 +28,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
# make it correctly strict without breaking backward compatibility.
attrfind = re.compile(
r'
\
s
*
([
a
-
zA
-
Z_
][
-
.:
a
-
zA
-
Z_0
-
9
]
*
)(
\
s
*=
\
s
*
'
r'
(
\
'[^
\
'
]*
\
'
|"[^"]*"|[
-a-zA-Z0-9./,:;+*%?!&$
\
(
\
)_#=~@
]*))?'
)
r'
(
\
'[^
\
'
]*
\
'
|"[^"]*"|[
^
\
s
"
\
'
=<>`
]*))?'
)
attrfind_tolerant
=
re
.
compile
(
r'\
s*([
a-zA-Z_][-.:a-zA-Z_0-9]*)(\
s*=
\s*'
r'(\'[^\']*\'|"[^"]*"|[^>\
s]*))?
')
...
...
Lib/test/test_htmlparser.py
View file @
6537be7f
...
...
@@ -217,6 +217,23 @@ DOCTYPE html [
(
"starttag"
,
"a"
,
[(
"href"
,
"mailto:xyz@example.com"
)]),
])
def
test_attr_nonascii
(
self
):
# see issue 7311
self
.
_run_check
(
"<img src=/foo/bar.png alt=
\
u4e2d
\
u6587
>"
,
[
(
"starttag"
,
"img"
,
[(
"src"
,
"/foo/bar.png"
),
(
"alt"
,
"
\
u4e2d
\
u6587
"
)]),
])
self
.
_run_check
(
"<a title='
\
u30c6
\
u30b9
\
u30c8
' "
"href='
\
u30c6
\
u30b9
\
u30c8
.html'>"
,
[
(
"starttag"
,
"a"
,
[(
"title"
,
"
\
u30c6
\
u30b9
\
u30c8
"
),
(
"href"
,
"
\
u30c6
\
u30b9
\
u30c8
.html"
)]),
])
self
.
_run_check
(
'<a title="
\
u30c6
\
u30b9
\
u30c8
" '
'href="
\
u30c6
\
u30b9
\
u30c8
.html">'
,
[
(
"starttag"
,
"a"
,
[(
"title"
,
"
\
u30c6
\
u30b9
\
u30c8
"
),
(
"href"
,
"
\
u30c6
\
u30b9
\
u30c8
.html"
)]),
])
def
test_attr_entity_replacement
(
self
):
self
.
_run_check
(
"""<a b='&><"''>"""
,
[
(
"starttag"
,
"a"
,
[(
"b"
,
"&><
\
"
'"
)]),
...
...
Misc/NEWS
View file @
6537be7f
...
...
@@ -94,6 +94,8 @@ Core and Builtins
Library
-------
- Issue #7311: fix html.parser to accept non-ASCII attribute values.
- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
subpararts with an 8bit CTE into unicode instead of preserving the bytes.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment