Commit aac6957f authored by Senthil Kumaran's avatar Senthil Kumaran

#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.

This helps in handling certain types invalid urls in a conservative manner.
parent 048a05ee
...@@ -160,6 +160,7 @@ class RuleLine: ...@@ -160,6 +160,7 @@ class RuleLine:
if path == '' and not allowance: if path == '' and not allowance:
# an empty value means allow all # an empty value means allow all
allowance = True allowance = True
path = urlparse.urlunparse(urlparse.urlparse(path))
self.path = urllib.quote(path) self.path = urllib.quote(path)
self.allowance = allowance self.allowance = allowance
......
...@@ -228,6 +228,18 @@ bad = ['/some/path'] ...@@ -228,6 +228,18 @@ bad = ['/some/path']
RobotTest(15, doc, good, bad) RobotTest(15, doc, good, bad)
# 16. Empty query (issue #17403). Normalizing the url first.
doc = """
User-agent: *
Allow: /some/path?
Disallow: /another/path?
"""
good = ['/some/path?']
bad = ['/another/path?']
RobotTest(16, doc, good, bad)
class NetworkTestCase(unittest.TestCase): class NetworkTestCase(unittest.TestCase):
......
...@@ -15,6 +15,10 @@ Core and Builtins ...@@ -15,6 +15,10 @@ Core and Builtins
Library Library
------- -------
- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
ruleline. This helps in handling certain types invalid urls in a conservative
manner. Patch contributed by Mher Movsisyan.
- Implement inequality on weakref.WeakSet. - Implement inequality on weakref.WeakSet.
- Issue #17981: Closed socket on error in SysLogHandler. - Issue #17981: Closed socket on error in SysLogHandler.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment