Commit 1410206b authored by Hye-Shik Chang's avatar Hye-Shik Chang

Fix a bug that robotparser starves memory when the server responses

in HTTP/0.9 due to dissonance of httplib.LineAndFileWrapper and
urllib.addbase.
parent 9d036da9
......@@ -1181,7 +1181,9 @@ class LineAndFileWrapper:
self.readlines = self._file.readlines
def read(self, amt=None):
assert not self._line_consumed and self._line_left
if self._line_consumed:
return self._file.read(amt)
assert self._line_left
if amt is None or amt > self._line_left:
s = self._line[self._line_offset:]
self._done()
......@@ -1201,11 +1203,17 @@ class LineAndFileWrapper:
return s
def readline(self):
if self._line_consumed:
return self._file.readline()
assert self._line_left
s = self._line[self._line_offset:]
self._done()
return s
def readlines(self, size=None):
if self._line_consumed:
return self._file.readlines(size)
assert self._line_left
L = [self._line[self._line_offset:]]
self._done()
if size is None:
......
"""Regresssion tests for urllib"""
import urllib
import httplib
import unittest
from test import test_support
import os
import mimetools
import StringIO
def hexescape(char):
"""Escape char as RFC 2396 specifies"""
......@@ -88,6 +90,37 @@ class urlopen_FileTests(unittest.TestCase):
for line in self.returned_obj.__iter__():
self.assertEqual(line, self.text)
class urlopen_HttpTests(unittest.TestCase):
"""Test urlopen() opening a fake http connection."""
def fakehttp(self, fakedata):
class FakeSocket(StringIO.StringIO):
def sendall(self, str): pass
def makefile(self, mode, name): return self
def read(self, amt=None):
if self.closed: return ''
return StringIO.StringIO.read(self, amt)
def readline(self, length=None):
if self.closed: return ''
return StringIO.StringIO.readline(self, length)
class FakeHTTPConnection(httplib.HTTPConnection):
def connect(self):
self.sock = FakeSocket(fakedata)
assert httplib.HTTP._connection_class == httplib.HTTPConnection
httplib.HTTP._connection_class = FakeHTTPConnection
def unfakehttp(self):
httplib.HTTP._connection_class = httplib.HTTPConnection
def test_read(self):
self.fakehttp('Hello!')
try:
fp = urllib.urlopen("http://python.org/")
self.assertEqual(fp.readline(), 'Hello!')
self.assertEqual(fp.readline(), '')
finally:
self.unfakehttp()
class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files"""
......@@ -410,6 +443,7 @@ class Pathname_Tests(unittest.TestCase):
def test_main():
test_support.run_unittest(
urlopen_FileTests,
urlopen_HttpTests,
urlretrieve_FileTests,
QuotingTests,
UnquotingTests,
......
......@@ -322,6 +322,8 @@ Extension modules
Library
-------
- urllib.urlopen().readline() now handles HTTP/0.9 correctly.
- refactored site.py into functions. Also wrote regression tests for the
module.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment