Commit 90e01e50 authored by postmasters's avatar postmasters Committed by Victor Stinner

urllib: Simplify splithost by calling into urlparse. (#1849)

The current regex based splitting produces a wrong result. For example::

  http://abc#@def

Web browsers parse that URL as ``http://abc/#@def``, that is, the host
is ``abc``, the path is ``/``, and the fragment is ``#@def``.
parent 5cc7ac24
...@@ -755,28 +755,35 @@ class UrlParseTestCase(unittest.TestCase): ...@@ -755,28 +755,35 @@ class UrlParseTestCase(unittest.TestCase):
def test_parse_fragments(self): def test_parse_fragments(self):
# Exercise the allow_fragments parameter of urlparse() and urlsplit() # Exercise the allow_fragments parameter of urlparse() and urlsplit()
tests = ( tests = (
("http:#frag", "path"), ("http:#frag", "path", "frag"),
("//example.net#frag", "path"), ("//example.net#frag", "path", "frag"),
("index.html#frag", "path"), ("index.html#frag", "path", "frag"),
(";a=b#frag", "params"), (";a=b#frag", "params", "frag"),
("?a=b#frag", "query"), ("?a=b#frag", "query", "frag"),
("#frag", "path"), ("#frag", "path", "frag"),
("abc#@frag", "path", "@frag"),
("//abc#@frag", "path", "@frag"),
("//abc:80#@frag", "path", "@frag"),
("//abc#@frag:80", "path", "@frag:80"),
) )
for url, attr in tests: for url, attr, expected_frag in tests:
for func in (urllib.parse.urlparse, urllib.parse.urlsplit): for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
if attr == "params" and func is urllib.parse.urlsplit: if attr == "params" and func is urllib.parse.urlsplit:
attr = "path" attr = "path"
with self.subTest(url=url, function=func): with self.subTest(url=url, function=func):
result = func(url, allow_fragments=False) result = func(url, allow_fragments=False)
self.assertEqual(result.fragment, "") self.assertEqual(result.fragment, "")
self.assertTrue(getattr(result, attr).endswith("#frag")) self.assertTrue(
getattr(result, attr).endswith("#" + expected_frag))
self.assertEqual(func(url, "", False).fragment, "") self.assertEqual(func(url, "", False).fragment, "")
result = func(url, allow_fragments=True) result = func(url, allow_fragments=True)
self.assertEqual(result.fragment, "frag") self.assertEqual(result.fragment, expected_frag)
self.assertFalse(getattr(result, attr).endswith("frag")) self.assertFalse(
self.assertEqual(func(url, "", True).fragment, "frag") getattr(result, attr).endswith(expected_frag))
self.assertEqual(func(url).fragment, "frag") self.assertEqual(func(url, "", True).fragment,
expected_frag)
self.assertEqual(func(url).fragment, expected_frag)
def test_mixed_types_rejected(self): def test_mixed_types_rejected(self):
# Several functions that process either strings or ASCII encoded bytes # Several functions that process either strings or ASCII encoded bytes
...@@ -983,6 +990,26 @@ class Utility_Tests(unittest.TestCase): ...@@ -983,6 +990,26 @@ class Utility_Tests(unittest.TestCase):
self.assertEqual(splithost('/foo/bar/baz.html'), self.assertEqual(splithost('/foo/bar/baz.html'),
(None, '/foo/bar/baz.html')) (None, '/foo/bar/baz.html'))
# bpo-30500: # starts a fragment.
self.assertEqual(splithost('//127.0.0.1#@host.com'),
('127.0.0.1', '/#@host.com'))
self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
('127.0.0.1', '/#@host.com:80'))
self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
('127.0.0.1:80', '/#@host.com'))
# Empty host is returned as empty string.
self.assertEqual(splithost("///file"),
('', '/file'))
# Trailing semicolon, question mark and hash symbol are kept.
self.assertEqual(splithost("//example.net/file;"),
('example.net', '/file;'))
self.assertEqual(splithost("//example.net/file?"),
('example.net', '/file?'))
self.assertEqual(splithost("//example.net/file#"),
('example.net', '/file#'))
def test_splituser(self): def test_splituser(self):
splituser = urllib.parse.splituser splituser = urllib.parse.splituser
self.assertEqual(splituser('User:Pass@www.python.org:080'), self.assertEqual(splituser('User:Pass@www.python.org:080'),
......
...@@ -947,7 +947,7 @@ def splithost(url): ...@@ -947,7 +947,7 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog global _hostprog
if _hostprog is None: if _hostprog is None:
_hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL) _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
match = _hostprog.match(url) match = _hostprog.match(url)
if match: if match:
......
...@@ -1091,6 +1091,7 @@ Max Neunhöffer ...@@ -1091,6 +1091,7 @@ Max Neunhöffer
Anthon van der Neut Anthon van der Neut
George Neville-Neil George Neville-Neil
Hieu Nguyen Hieu Nguyen
Nam Nguyen
Johannes Nicolai Johannes Nicolai
Samuel Nicolary Samuel Nicolary
Jonathan Niehof Jonathan Niehof
......
...@@ -12,7 +12,7 @@ Core and Builtins ...@@ -12,7 +12,7 @@ Core and Builtins
- bpo-30682: Removed a too-strict assertion that failed for certain f-strings, - bpo-30682: Removed a too-strict assertion that failed for certain f-strings,
such as eval("f'\\\n'") and eval("f'\\\r'"). such as eval("f'\\\n'") and eval("f'\\\r'").
- bpo-30501: The compiler now produces more optimal code for complex condition - bpo-30501: The compiler now produces more optimal code for complex condition
expressions in the "if", "while" and "assert" statement, the "if" expression, expressions in the "if", "while" and "assert" statement, the "if" expression,
and generator expressions and comprehensions. and generator expressions and comprehensions.
...@@ -365,6 +365,11 @@ Extension Modules ...@@ -365,6 +365,11 @@ Extension Modules
Library Library
------- -------
- [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse
fragments. For example, ``splithost('http://127.0.0.1#@evil.com/')`` now
correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``
as the host in an authentification (``login@host``).
- bpo-30038: Fix race condition between signal delivery and wakeup file - bpo-30038: Fix race condition between signal delivery and wakeup file
descriptor. Patch by Nathaniel Smith. descriptor. Patch by Nathaniel Smith.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment