Commit d4324bac authored by Victor Stinner's avatar Victor Stinner Committed by GitHub

bpo-30500: urllib: Simplify splithost by calling into urlparse. (#1849) (#2294)

The current regex based splitting produces a wrong result. For example::

  http://abc#@def

Web browsers parse that URL as ``http://abc/#@def``, that is, the host
is ``abc``, the path is ``/``, and the fragment is ``#@def``.
(cherry picked from commit 90e01e50)
parent b39a7481
......@@ -879,6 +879,26 @@ class Utility_Tests(unittest.TestCase):
self.assertEqual(splithost('/foo/bar/baz.html'),
(None, '/foo/bar/baz.html'))
# bpo-30500: # starts a fragment.
self.assertEqual(splithost('//127.0.0.1#@host.com'),
('127.0.0.1', '/#@host.com'))
self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
('127.0.0.1', '/#@host.com:80'))
self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
('127.0.0.1:80', '/#@host.com'))
# Empty host is returned as empty string.
self.assertEqual(splithost("///file"),
('', '/file'))
# Trailing semicolon, question mark and hash symbol are kept.
self.assertEqual(splithost("//example.net/file;"),
('example.net', '/file;'))
self.assertEqual(splithost("//example.net/file?"),
('example.net', '/file?'))
self.assertEqual(splithost("//example.net/file#"),
('example.net', '/file#'))
def test_splituser(self):
splituser = urllib.splituser
self.assertEqual(splituser('User:Pass@www.python.org:080'),
......
......@@ -1093,8 +1093,7 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
import re
_hostprog = re.compile('^//([^/?]*)(.*)$')
_hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match:
......
......@@ -993,6 +993,7 @@ Chad Netzer
Max Neunhöffer
George Neville-Neil
Hieu Nguyen
Nam Nguyen
Johannes Nicolai
Samuel Nicolary
Jonathan Niehof
......
......@@ -52,6 +52,11 @@ Extension Modules
Library
-------
- [Security] bpo-30500: Fix urllib.splithost() to correctly parse
fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``
as the host in an authentification (``login@host``).
- [Security] bpo-29591: Update expat copy from 2.1.1 to 2.2.0 to get fixes
of CVE-2016-0718 and CVE-2016-4472. See
https://sourceforge.net/p/expat/bugs/537/ for more information.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment