Commit e825642e authored by Senthil Kumaran's avatar Senthil Kumaran

issue20753 - robotparser tests should not rely upon external resource when not required.

Specifically, it was relying a URL which gave HTTP 403 and used it to assert
it's methods, this changes undoes that and provides a local http server with
similar properties.

Patch contributed by Vajrasky Kok.
parent b9da8449
...@@ -4,6 +4,9 @@ import urllib.robotparser ...@@ -4,6 +4,9 @@ import urllib.robotparser
from urllib.error import URLError, HTTPError from urllib.error import URLError, HTTPError
from urllib.request import urlopen from urllib.request import urlopen
from test import support from test import support
import threading
from http.server import BaseHTTPRequestHandler, HTTPServer
class RobotTestCase(unittest.TestCase): class RobotTestCase(unittest.TestCase):
def __init__(self, index=None, parser=None, url=None, good=None, agent=None): def __init__(self, index=None, parser=None, url=None, good=None, agent=None):
...@@ -247,33 +250,51 @@ bad = ['/another/path?'] ...@@ -247,33 +250,51 @@ bad = ['/another/path?']
RobotTest(16, doc, good, bad) RobotTest(16, doc, good, bad)
class NetworkTestCase(unittest.TestCase): class RobotHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_error(403, "Forbidden access")
def log_message(self, format, *args):
pass
class PasswordProtectedSiteTestCase(unittest.TestCase):
def setUp(self):
self.server = HTTPServer((support.HOST, 0), RobotHandler)
self.t = threading.Thread(
name='HTTPServer serving',
target=self.server.serve_forever,
# Short poll interval to make the test finish quickly.
# Time between requests is short enough that we won't wake
# up spuriously too many times.
kwargs={'poll_interval':0.01})
self.t.daemon = True # In case this function raises.
self.t.start()
def tearDown(self):
self.server.shutdown()
self.t.join()
self.server.server_close()
def runTest(self):
self.testPasswordProtectedSite()
def testPasswordProtectedSite(self): def testPasswordProtectedSite(self):
support.requires('network') addr = self.server.server_address
with support.transient_internet('mueblesmoraleda.com'): url = 'http://' + support.HOST + ':' + str(addr[1])
url = 'http://mueblesmoraleda.com' robots_url = url + "/robots.txt"
robots_url = url + "/robots.txt" parser = urllib.robotparser.RobotFileParser()
# First check the URL is usable for our purposes, since the parser.set_url(url)
# test site is a bit flaky. parser.read()
try: self.assertFalse(parser.can_fetch("*", robots_url))
urlopen(robots_url)
except HTTPError as e: def __str__(self):
if e.code not in {401, 403}: return '%s' % self.__class__.__name__
self.skipTest(
"%r should return a 401 or 403 HTTP error, not %r" class NetworkTestCase(unittest.TestCase):
% (robots_url, e.code))
else:
self.skipTest(
"%r should return a 401 or 403 HTTP error, not succeed"
% (robots_url))
parser = urllib.robotparser.RobotFileParser()
parser.set_url(url)
try:
parser.read()
except URLError:
self.skipTest('%s is unavailable' % url)
self.assertEqual(parser.can_fetch("*", robots_url), False)
@unittest.skip('does not handle the gzip encoding delivered by pydotorg') @unittest.skip('does not handle the gzip encoding delivered by pydotorg')
def testPythonOrg(self): def testPythonOrg(self):
...@@ -288,6 +309,7 @@ class NetworkTestCase(unittest.TestCase): ...@@ -288,6 +309,7 @@ class NetworkTestCase(unittest.TestCase):
def load_tests(loader, suite, pattern): def load_tests(loader, suite, pattern):
suite = unittest.makeSuite(NetworkTestCase) suite = unittest.makeSuite(NetworkTestCase)
suite.addTest(tests) suite.addTest(tests)
suite.addTest(PasswordProtectedSiteTestCase())
return suite return suite
if __name__=='__main__': if __name__=='__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment