Commit dbe0982b authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #8260: The read(), readline() and readlines() methods of

codecs.StreamReader returned incomplete data when were called after
readline() or read(size).  Based on patch by Amaury Forgeot d'Arc.
parent 0742cae3
...@@ -475,15 +475,12 @@ class StreamReader(Codec): ...@@ -475,15 +475,12 @@ class StreamReader(Codec):
# read until we get the required number of characters (if available) # read until we get the required number of characters (if available)
while True: while True:
# can the request be satisfied from the character buffer? # can the request be satisfied from the character buffer?
if chars < 0: if chars >= 0:
if size < 0:
if self.charbuffer:
break
elif len(self.charbuffer) >= size:
break
else:
if len(self.charbuffer) >= chars: if len(self.charbuffer) >= chars:
break break
elif size >= 0:
if len(self.charbuffer) >= size:
break
# we need more data # we need more data
if size < 0: if size < 0:
newdata = self.stream.read() newdata = self.stream.read()
...@@ -491,6 +488,8 @@ class StreamReader(Codec): ...@@ -491,6 +488,8 @@ class StreamReader(Codec):
newdata = self.stream.read(size) newdata = self.stream.read(size)
# decode bytes (those remaining from the last call included) # decode bytes (those remaining from the last call included)
data = self.bytebuffer + newdata data = self.bytebuffer + newdata
if not data:
break
try: try:
newchars, decodedbytes = self.decode(data, self.errors) newchars, decodedbytes = self.decode(data, self.errors)
except UnicodeDecodeError as exc: except UnicodeDecodeError as exc:
......
...@@ -175,6 +175,40 @@ class ReadTest(MixInCheckStateHandling): ...@@ -175,6 +175,40 @@ class ReadTest(MixInCheckStateHandling):
size*"a", size*"a",
) )
def test_mixed_readline_and_read(self):
lines = ["Humpty Dumpty sat on a wall,\n",
"Humpty Dumpty had a great fall.\r\n",
"All the king's horses and all the king's men\r",
"Couldn't put Humpty together again."]
data = ''.join(lines)
def getreader():
stream = io.BytesIO(data.encode(self.encoding))
return codecs.getreader(self.encoding)(stream)
# Issue #8260: Test readline() followed by read()
f = getreader()
self.assertEqual(f.readline(), lines[0])
self.assertEqual(f.read(), ''.join(lines[1:]))
self.assertEqual(f.read(), '')
# Issue #16636: Test readline() followed by readlines()
f = getreader()
self.assertEqual(f.readline(), lines[0])
self.assertEqual(f.readlines(), lines[1:])
self.assertEqual(f.read(), '')
# Test read() followed by read()
f = getreader()
self.assertEqual(f.read(size=40, chars=5), data[:5])
self.assertEqual(f.read(), data[5:])
self.assertEqual(f.read(), '')
# Issue #12446: Test read() followed by readlines()
f = getreader()
self.assertEqual(f.read(size=40, chars=5), data[:5])
self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:])
self.assertEqual(f.read(), '')
def test_bug1175396(self): def test_bug1175396(self):
s = [ s = [
'<%!--===================================================\r\n', '<%!--===================================================\r\n',
...@@ -2370,8 +2404,6 @@ class TransformCodecTest(unittest.TestCase): ...@@ -2370,8 +2404,6 @@ class TransformCodecTest(unittest.TestCase):
def test_readline(self): def test_readline(self):
for encoding in bytes_transform_encodings: for encoding in bytes_transform_encodings:
if encoding in ['uu_codec', 'zlib_codec']:
continue
with self.subTest(encoding=encoding): with self.subTest(encoding=encoding):
sin = codecs.encode(b"\x80", encoding) sin = codecs.encode(b"\x80", encoding)
reader = codecs.getreader(encoding)(io.BytesIO(sin)) reader = codecs.getreader(encoding)(io.BytesIO(sin))
......
...@@ -36,6 +36,10 @@ Core and Builtins ...@@ -36,6 +36,10 @@ Core and Builtins
Library Library
------- -------
- Issue #8260: The read(), readline() and readlines() methods of
codecs.StreamReader returned incomplete data when were called after
readline() or read(size). Based on patch by Amaury Forgeot d'Arc.
- Issue #20105: the codec exception chaining now correctly sets the - Issue #20105: the codec exception chaining now correctly sets the
traceback of the original exception as its __traceback__ attribute. traceback of the original exception as its __traceback__ attribute.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment