Commit 04bedfa3 authored by Łukasz Langa's avatar Łukasz Langa

Issue #27199: TarFile expose copyfileobj bufsize to improve throughput

Patch by Jason Fried.
parent f5781958
...@@ -228,21 +228,21 @@ def calc_chksums(buf): ...@@ -228,21 +228,21 @@ def calc_chksums(buf):
signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
return unsigned_chksum, signed_chksum return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None, exception=OSError): def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
"""Copy length bytes from fileobj src to fileobj dst. """Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content. If length is None, copy the entire content.
""" """
bufsize = bufsize or 16 * 1024
if length == 0: if length == 0:
return return
if length is None: if length is None:
shutil.copyfileobj(src, dst) shutil.copyfileobj(src, dst, bufsize)
return return
BUFSIZE = 16 * 1024 blocks, remainder = divmod(length, bufsize)
blocks, remainder = divmod(length, BUFSIZE)
for b in range(blocks): for b in range(blocks):
buf = src.read(BUFSIZE) buf = src.read(bufsize)
if len(buf) < BUFSIZE: if len(buf) < bufsize:
raise exception("unexpected end of data") raise exception("unexpected end of data")
dst.write(buf) dst.write(buf)
...@@ -1403,7 +1403,8 @@ class TarFile(object): ...@@ -1403,7 +1403,8 @@ class TarFile(object):
def __init__(self, name=None, mode="r", fileobj=None, format=None, def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): errors="surrogateescape", pax_headers=None, debug=None,
errorlevel=None, copybufsize=None):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode' file or 'w' to create a new file overwriting an existing one. `mode'
...@@ -1459,6 +1460,7 @@ class TarFile(object): ...@@ -1459,6 +1460,7 @@ class TarFile(object):
self.errorlevel = errorlevel self.errorlevel = errorlevel
# Init datastructures. # Init datastructures.
self.copybufsize = copybufsize
self.closed = False self.closed = False
self.members = [] # list of members as TarInfo objects self.members = [] # list of members as TarInfo objects
self._loaded = False # flag if all members have been read self._loaded = False # flag if all members have been read
...@@ -1558,7 +1560,7 @@ class TarFile(object): ...@@ -1558,7 +1560,7 @@ class TarFile(object):
saved_pos = fileobj.tell() saved_pos = fileobj.tell()
try: try:
return func(name, "r", fileobj, **kwargs) return func(name, "r", fileobj, **kwargs)
except (ReadError, CompressionError) as e: except (ReadError, CompressionError):
if fileobj is not None: if fileobj is not None:
fileobj.seek(saved_pos) fileobj.seek(saved_pos)
continue continue
...@@ -1963,10 +1965,10 @@ class TarFile(object): ...@@ -1963,10 +1965,10 @@ class TarFile(object):
buf = tarinfo.tobuf(self.format, self.encoding, self.errors) buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
self.fileobj.write(buf) self.fileobj.write(buf)
self.offset += len(buf) self.offset += len(buf)
bufsize=self.copybufsize
# If there's data to follow, append it. # If there's data to follow, append it.
if fileobj is not None: if fileobj is not None:
copyfileobj(fileobj, self.fileobj, tarinfo.size) copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
if remainder > 0: if remainder > 0:
self.fileobj.write(NUL * (BLOCKSIZE - remainder)) self.fileobj.write(NUL * (BLOCKSIZE - remainder))
...@@ -2148,15 +2150,16 @@ class TarFile(object): ...@@ -2148,15 +2150,16 @@ class TarFile(object):
""" """
source = self.fileobj source = self.fileobj
source.seek(tarinfo.offset_data) source.seek(tarinfo.offset_data)
bufsize = self.copybufsize
with bltn_open(targetpath, "wb") as target: with bltn_open(targetpath, "wb") as target:
if tarinfo.sparse is not None: if tarinfo.sparse is not None:
for offset, size in tarinfo.sparse: for offset, size in tarinfo.sparse:
target.seek(offset) target.seek(offset)
copyfileobj(source, target, size, ReadError) copyfileobj(source, target, size, ReadError, bufsize)
target.seek(tarinfo.size) target.seek(tarinfo.size)
target.truncate() target.truncate()
else: else:
copyfileobj(source, target, tarinfo.size, ReadError) copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
def makeunknown(self, tarinfo, targetpath): def makeunknown(self, tarinfo, targetpath):
"""Make a file from a TarInfo object with an unknown type """Make a file from a TarInfo object with an unknown type
...@@ -2235,7 +2238,7 @@ class TarFile(object): ...@@ -2235,7 +2238,7 @@ class TarFile(object):
os.lchown(targetpath, u, g) os.lchown(targetpath, u, g)
else: else:
os.chown(targetpath, u, g) os.chown(targetpath, u, g)
except OSError as e: except OSError:
raise ExtractError("could not change owner") raise ExtractError("could not change owner")
def chmod(self, tarinfo, targetpath): def chmod(self, tarinfo, targetpath):
...@@ -2244,7 +2247,7 @@ class TarFile(object): ...@@ -2244,7 +2247,7 @@ class TarFile(object):
if hasattr(os, 'chmod'): if hasattr(os, 'chmod'):
try: try:
os.chmod(targetpath, tarinfo.mode) os.chmod(targetpath, tarinfo.mode)
except OSError as e: except OSError:
raise ExtractError("could not change mode") raise ExtractError("could not change mode")
def utime(self, tarinfo, targetpath): def utime(self, tarinfo, targetpath):
...@@ -2254,7 +2257,7 @@ class TarFile(object): ...@@ -2254,7 +2257,7 @@ class TarFile(object):
return return
try: try:
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
except OSError as e: except OSError:
raise ExtractError("could not change modification time") raise ExtractError("could not change modification time")
#-------------------------------------------------------------------------- #--------------------------------------------------------------------------
......
...@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1 ...@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
Patch by Jason Fried.
- Issue #27948: In f-strings, only allow backslashes inside the braces - Issue #27948: In f-strings, only allow backslashes inside the braces
(where the expressions are). This is a breaking change from the 3.6 (where the expressions are). This is a breaking change from the 3.6
alpha releases, where backslashes are allowed anywhere in an alpha releases, where backslashes are allowed anywhere in an
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment