Commit 00b56185 authored by Antoine Pitrou's avatar Antoine Pitrou

#2523: binary buffered reading is quadratic

parent db902aa5
...@@ -893,8 +893,12 @@ class BufferedReader(_BufferedIOMixin): ...@@ -893,8 +893,12 @@ class BufferedReader(_BufferedIOMixin):
""" """
raw._checkReadable() raw._checkReadable()
_BufferedIOMixin.__init__(self, raw) _BufferedIOMixin.__init__(self, raw)
self._read_buf = b""
self.buffer_size = buffer_size self.buffer_size = buffer_size
self._reset_read_buf()
def _reset_read_buf(self):
self._read_buf = b""
self._read_pos = 0
def read(self, n=None): def read(self, n=None):
"""Read n bytes. """Read n bytes.
...@@ -904,25 +908,50 @@ class BufferedReader(_BufferedIOMixin): ...@@ -904,25 +908,50 @@ class BufferedReader(_BufferedIOMixin):
mode. If n is negative, read until EOF or until read() would mode. If n is negative, read until EOF or until read() would
block. block.
""" """
if n is None:
n = -1
nodata_val = b"" nodata_val = b""
while n < 0 or len(self._read_buf) < n: empty_values = (b"", None)
to_read = max(self.buffer_size, buf = self._read_buf
n if n is not None else 2*len(self._read_buf)) pos = self._read_pos
current = self.raw.read(to_read)
if current in (b"", None): # Special case for when the number of bytes to read is unspecified.
nodata_val = current if n is None or n == -1:
self._reset_read_buf()
chunks = [buf[pos:]] # Strip the consumed bytes.
current_size = 0
while True:
# Read until EOF or until read() would block.
chunk = self.raw.read()
if chunk in empty_values:
nodata_val = chunk
break
current_size += len(chunk)
chunks.append(chunk)
return b"".join(chunks) or nodata_val
# The number of bytes to read is specified, return at most n bytes.
avail = len(buf) - pos # Length of the available buffered data.
if n <= avail:
# Fast path: the data to read is fully buffered.
self._read_pos += n
return buf[pos:pos+n]
# Slow path: read from the stream until enough bytes are read,
# or until an EOF occurs or until read() would block.
chunks = [buf[pos:]]
wanted = max(self.buffer_size, n)
while avail < n:
chunk = self.raw.read(wanted)
if chunk in empty_values:
nodata_val = chunk
break break
self._read_buf += current avail += len(chunk)
if self._read_buf: chunks.append(chunk)
if n < 0: # n is more then avail only when an EOF occurred or when
n = len(self._read_buf) # read() would have blocked.
out = self._read_buf[:n] n = min(n, avail)
self._read_buf = self._read_buf[n:] out = b"".join(chunks)
else: self._read_buf = out[n:] # Save the extra data in the buffer.
out = nodata_val self._read_pos = 0
return out return out[:n] if out else nodata_val
def peek(self, n=0): def peek(self, n=0):
"""Returns buffered bytes without advancing the position. """Returns buffered bytes without advancing the position.
...@@ -932,13 +961,14 @@ class BufferedReader(_BufferedIOMixin): ...@@ -932,13 +961,14 @@ class BufferedReader(_BufferedIOMixin):
than self.buffer_size. than self.buffer_size.
""" """
want = min(n, self.buffer_size) want = min(n, self.buffer_size)
have = len(self._read_buf) have = len(self._read_buf) - self._read_pos
if have < want: if have < want:
to_read = self.buffer_size - have to_read = self.buffer_size - have
current = self.raw.read(to_read) current = self.raw.read(to_read)
if current: if current:
self._read_buf += current self._read_buf = self._read_buf[self._read_pos:] + current
return self._read_buf self._read_pos = 0
return self._read_buf[self._read_pos:]
def read1(self, n): def read1(self, n):
"""Reads up to n bytes, with at most one read() system call.""" """Reads up to n bytes, with at most one read() system call."""
...@@ -947,16 +977,16 @@ class BufferedReader(_BufferedIOMixin): ...@@ -947,16 +977,16 @@ class BufferedReader(_BufferedIOMixin):
if n <= 0: if n <= 0:
return b"" return b""
self.peek(1) self.peek(1)
return self.read(min(n, len(self._read_buf))) return self.read(min(n, len(self._read_buf) - self._read_pos))
def tell(self): def tell(self):
return self.raw.tell() - len(self._read_buf) return self.raw.tell() - len(self._read_buf) + self._read_pos
def seek(self, pos, whence=0): def seek(self, pos, whence=0):
if whence == 1: if whence == 1:
pos -= len(self._read_buf) pos -= len(self._read_buf) - self._read_pos
pos = self.raw.seek(pos, whence) pos = self.raw.seek(pos, whence)
self._read_buf = b"" self._reset_read_buf()
return pos return pos
...@@ -1125,14 +1155,14 @@ class BufferedRandom(BufferedWriter, BufferedReader): ...@@ -1125,14 +1155,14 @@ class BufferedRandom(BufferedWriter, BufferedReader):
# First do the raw seek, then empty the read buffer, so that # First do the raw seek, then empty the read buffer, so that
# if the raw seek fails, we don't lose buffered data forever. # if the raw seek fails, we don't lose buffered data forever.
pos = self.raw.seek(pos, whence) pos = self.raw.seek(pos, whence)
self._read_buf = b"" self._reset_read_buf()
return pos return pos
def tell(self): def tell(self):
if (self._write_buf): if self._write_buf:
return self.raw.tell() + len(self._write_buf) return self.raw.tell() + len(self._write_buf)
else: else:
return self.raw.tell() - len(self._read_buf) return BufferedReader.tell(self)
def truncate(self, pos=None): def truncate(self, pos=None):
if pos is None: if pos is None:
...@@ -1161,8 +1191,9 @@ class BufferedRandom(BufferedWriter, BufferedReader): ...@@ -1161,8 +1191,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
def write(self, b): def write(self, b):
if self._read_buf: if self._read_buf:
self.raw.seek(-len(self._read_buf), 1) # Undo readahead # Undo readahead
self._read_buf = b"" self.raw.seek(self._read_pos - len(self._read_buf), 1)
self._reset_read_buf()
return BufferedWriter.write(self, b) return BufferedWriter.write(self, b)
......
...@@ -22,6 +22,9 @@ Library ...@@ -22,6 +22,9 @@ Library
file name rather than a ZipInfo instance, so files are extracted with file name rather than a ZipInfo instance, so files are extracted with
mode 0600 rather than 000 under Unix. mode 0600 rather than 000 under Unix.
- Issue #2523: Fix quadratic behaviour when read()ing a binary file without
asking for a specific length.
What's new in Python 3.0b2? What's new in Python 3.0b2?
=========================== ===========================
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment