Commit 1be320eb authored by Senthil Kumaran's avatar Senthil Kumaran

Issue9374 - Generic parsing of query and fragment portion of urls for any scheme

parent 8d886046
...@@ -636,11 +636,20 @@ class UrlParseTestCase(unittest.TestCase): ...@@ -636,11 +636,20 @@ class UrlParseTestCase(unittest.TestCase):
('s3', 'foo.com', '/stuff', '', '', '')) ('s3', 'foo.com', '/stuff', '', '', ''))
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
('x-newscheme', 'foo.com', '/stuff', '', '', '')) ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
# And for bytes... # And for bytes...
self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
(b's3', b'foo.com', b'/stuff', b'', b'', b'')) (b's3', b'foo.com', b'/stuff', b'', b'', b''))
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
def test_mixed_types_rejected(self): def test_mixed_types_rejected(self):
# Several functions that process either strings or ASCII encoded bytes # Several functions that process either strings or ASCII encoded bytes
......
...@@ -44,16 +44,9 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', ...@@ -44,16 +44,9 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
'mms', '', 'sftp'] 'mms', '', 'sftp']
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
'nntp', 'wais', 'https', 'shttp', 'snews',
'file', 'prospero', '']
# Characters valid in scheme names # Characters valid in scheme names
scheme_chars = ('abcdefghijklmnopqrstuvwxyz' scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
...@@ -357,9 +350,9 @@ def urlsplit(url, scheme='', allow_fragments=True): ...@@ -357,9 +350,9 @@ def urlsplit(url, scheme='', allow_fragments=True):
if (('[' in netloc and ']' not in netloc) or if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)): (']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL") raise ValueError("Invalid IPv6 URL")
if allow_fragments and scheme in uses_fragment and '#' in url: if allow_fragments and '#' in url:
url, fragment = url.split('#', 1) url, fragment = url.split('#', 1)
if scheme in uses_query and '?' in url: if '?' in url:
url, query = url.split('?', 1) url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment) v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v _parse_cache[key] = v
......
...@@ -63,6 +63,9 @@ Core and Builtins ...@@ -63,6 +63,9 @@ Core and Builtins
Library Library
------- -------
- Issue #9374: Generic parsing of query and fragment portions of url for any
scheme. Supported both by RFC3986 and RFC2396.
- Issue #14798: Fix the functions in pyclbr to raise an ImportError - Issue #14798: Fix the functions in pyclbr to raise an ImportError
when the first part of a dotted name is not a package. Patch by when the first part of a dotted name is not a package. Patch by
Xavier de Gaye. Xavier de Gaye.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment