Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
16e6f7de
Commit
16e6f7de
authored
Mar 07, 2019
by
Steve Dower
Committed by
GitHub
Mar 07, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201)
parent
1f58f4fa
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
61 additions
and
0 deletions
+61
-0
Doc/library/urllib.parse.rst
Doc/library/urllib.parse.rst
+18
-0
Lib/test/test_urlparse.py
Lib/test/test_urlparse.py
+23
-0
Lib/urllib/parse.py
Lib/urllib/parse.py
+17
-0
Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
....d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
+3
-0
No files found.
Doc/library/urllib.parse.rst
View file @
16e6f7de
...
...
@@ -124,6 +124,11 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
Characters in the :attr:`netloc` attribute that decompose under NFKC
normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
...
...
@@ -136,6 +141,10 @@ or on combining URL components into a URL string.
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
.. versionchanged:: 3.8
Characters that affect netloc parsing under NFKC normalization will
now raise :exc:`ValueError`.
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
...
...
@@ -259,10 +268,19 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
Characters in the :attr:`netloc` attribute that decompose under NFKC
normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
decomposed before parsing, no error will be raised.
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
.. versionchanged:: 3.8
Characters that affect netloc parsing under NFKC normalization will
now raise :exc:`ValueError`.
.. function:: urlunsplit(parts)
...
...
Lib/test/test_urlparse.py
View file @
16e6f7de
import
sys
import
unicodedata
import
unittest
import
urllib.parse
...
...
@@ -994,6 +996,27 @@ class UrlParseTestCase(unittest.TestCase):
expected
.
append
(
name
)
self
.
assertCountEqual
(
urllib
.
parse
.
__all__
,
expected
)
def
test_urlsplit_normalization
(
self
):
# Certain characters should never occur in the netloc,
# including under normalization.
# Ensure that ALL of them are detected and cause an error
illegal_chars
=
'/:#?@'
hex_chars
=
{
'{:04X}'
.
format
(
ord
(
c
))
for
c
in
illegal_chars
}
denorm_chars
=
[
c
for
c
in
map
(
chr
,
range
(
128
,
sys
.
maxunicode
))
if
(
hex_chars
&
set
(
unicodedata
.
decomposition
(
c
).
split
()))
and
c
not
in
illegal_chars
]
# Sanity check that we found at least one such character
self
.
assertIn
(
'
\
u2100
'
,
denorm_chars
)
self
.
assertIn
(
'
\
uFF03
'
,
denorm_chars
)
for
scheme
in
[
"http"
,
"https"
,
"ftp"
]:
for
c
in
denorm_chars
:
url
=
"{}://netloc{}false.netloc/path"
.
format
(
scheme
,
c
)
with
self
.
subTest
(
url
=
url
,
char
=
'{:04X}'
.
format
(
ord
(
c
))):
with
self
.
assertRaises
(
ValueError
):
urllib
.
parse
.
urlsplit
(
url
)
class
Utility_Tests
(
unittest
.
TestCase
):
"""Testcase to test the various utility functions in the urllib."""
...
...
Lib/urllib/parse.py
View file @
16e6f7de
...
...
@@ -396,6 +396,21 @@ def _splitnetloc(url, start=0):
delim
=
min
(
delim
,
wdelim
)
# use earliest delim position
return
url
[
start
:
delim
],
url
[
delim
:]
# return (domain, rest)
def
_checknetloc
(
netloc
):
if
not
netloc
or
netloc
.
isascii
():
return
# looking for characters like \u2100 that expand to 'a/c'
# IDNA uses NFKC equivalence, so normalize for this check
import
unicodedata
netloc2
=
unicodedata
.
normalize
(
'NFKC'
,
netloc
)
if
netloc
==
netloc2
:
return
_
,
_
,
netloc
=
netloc
.
rpartition
(
'@'
)
# anything to the left of '@' is okay
for
c
in
'/?#@:'
:
if
c
in
netloc2
:
raise
ValueError
(
"netloc '"
+
netloc2
+
"' contains invalid "
+
"characters under NFKC normalization"
)
def
urlsplit
(
url
,
scheme
=
''
,
allow_fragments
=
True
):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
...
...
@@ -424,6 +439,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url
,
fragment
=
url
.
split
(
'#'
,
1
)
if
'?'
in
url
:
url
,
query
=
url
.
split
(
'?'
,
1
)
_checknetloc
(
netloc
)
v
=
SplitResult
(
'http'
,
netloc
,
url
,
query
,
fragment
)
_parse_cache
[
key
]
=
v
return
_coerce_result
(
v
)
...
...
@@ -447,6 +463,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url
,
fragment
=
url
.
split
(
'#'
,
1
)
if
'?'
in
url
:
url
,
query
=
url
.
split
(
'?'
,
1
)
_checknetloc
(
netloc
)
v
=
SplitResult
(
scheme
,
netloc
,
url
,
query
,
fragment
)
_parse_cache
[
key
]
=
v
return
_coerce_result
(
v
)
...
...
Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
0 → 100644
View file @
16e6f7de
Changes urlsplit() to raise ValueError when the URL contains characters that
decompose under IDNA encoding (NFKC-normalization) into characters that
affect how the URL is parsed.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment