Commit 12a08c47 authored by hajoscher's avatar hajoscher Committed by INADA Naoki

bpo-34010: Fix tarfile read performance regression (GH-8020)

During buffered read, use a list followed by join instead of extending a bytes object.
This is how it was done before but changed in commit b506dc32.
parent 97ae32c9
...@@ -525,7 +525,7 @@ class _Stream: ...@@ -525,7 +525,7 @@ class _Stream:
if not buf: if not buf:
break break
t.append(buf) t.append(buf)
buf = "".join(t) buf = b"".join(t)
else: else:
buf = self._read(size) buf = self._read(size)
self.pos += len(buf) self.pos += len(buf)
...@@ -538,6 +538,7 @@ class _Stream: ...@@ -538,6 +538,7 @@ class _Stream:
return self.__read(size) return self.__read(size)
c = len(self.dbuf) c = len(self.dbuf)
t = [self.dbuf]
while c < size: while c < size:
buf = self.__read(self.bufsize) buf = self.__read(self.bufsize)
if not buf: if not buf:
...@@ -546,26 +547,27 @@ class _Stream: ...@@ -546,26 +547,27 @@ class _Stream:
buf = self.cmp.decompress(buf) buf = self.cmp.decompress(buf)
except self.exception: except self.exception:
raise ReadError("invalid compressed data") raise ReadError("invalid compressed data")
self.dbuf += buf t.append(buf)
c += len(buf) c += len(buf)
buf = self.dbuf[:size] t = b"".join(t)
self.dbuf = self.dbuf[size:] self.dbuf = t[size:]
return buf return t[:size]
def __read(self, size): def __read(self, size):
"""Return size bytes from stream. If internal buffer is empty, """Return size bytes from stream. If internal buffer is empty,
read another block from the stream. read another block from the stream.
""" """
c = len(self.buf) c = len(self.buf)
t = [self.buf]
while c < size: while c < size:
buf = self.fileobj.read(self.bufsize) buf = self.fileobj.read(self.bufsize)
if not buf: if not buf:
break break
self.buf += buf t.append(buf)
c += len(buf) c += len(buf)
buf = self.buf[:size] t = b"".join(t)
self.buf = self.buf[size:] self.buf = t[size:]
return buf return t[:size]
# class _Stream # class _Stream
class _StreamProxy(object): class _StreamProxy(object):
......
Fixed a performance regression for reading streams with tarfile. The
buffered read should use a list, instead of appending to a bytes object.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment