Commit 6baa5027 authored by Lars Gustäbel's avatar Lars Gustäbel

Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()

work correctly together with readline().

Will backport to 2.5.
parent 55c54a2f
...@@ -628,140 +628,194 @@ class _BZ2Proxy(object): ...@@ -628,140 +628,194 @@ class _BZ2Proxy(object):
#------------------------ #------------------------
# Extraction file object # Extraction file object
#------------------------ #------------------------
class ExFileObject(object): class _FileInFile(object):
"""File-like object for reading an archive member. """A thin wrapper around an existing file object that
Is returned by TarFile.extractfile(). Support for provides a part of its data as an individual file
sparse files included. object.
""" """
def __init__(self, tarfile, tarinfo): def __init__(self, fileobj, offset, size, sparse=None):
self.fileobj = tarfile.fileobj self.fileobj = fileobj
self.name = tarinfo.name self.offset = offset
self.mode = "r" self.size = size
self.closed = False self.sparse = sparse
self.offset = tarinfo.offset_data self.position = 0
self.size = tarinfo.size
self.pos = 0L
self.linebuffer = ""
if tarinfo.issparse():
self.sparse = tarinfo.sparse
self.read = self._readsparse
else:
self.read = self._readnormal
def __read(self, size): def tell(self):
"""Overloadable read method. """Return the current file position.
""" """
return self.fileobj.read(size) return self.position
def readline(self, size=-1): def seek(self, position):
"""Read a line with approx. size. If size is negative, """Seek to a position in the file.
read a whole line. readline() and read() must not
be mixed up (!).
""" """
if size < 0: self.position = position
size = sys.maxint
nl = self.linebuffer.find("\n") def read(self, size=None):
if nl >= 0: """Read data from the file.
nl = min(nl, size) """
if size is None:
size = self.size - self.position
else: else:
size -= len(self.linebuffer) size = min(size, self.size - self.position)
while (nl < 0 and size > 0):
buf = self.read(min(size, 100))
if not buf:
break
self.linebuffer += buf
size -= len(buf)
nl = self.linebuffer.find("\n")
if nl == -1:
s = self.linebuffer
self.linebuffer = ""
return s
buf = self.linebuffer[:nl]
self.linebuffer = self.linebuffer[nl + 1:]
while buf[-1:] == "\r":
buf = buf[:-1]
return buf + "\n"
def readlines(self): if self.sparse is None:
"""Return a list with all (following) lines. return self.readnormal(size)
""" else:
result = [] return self.readsparse(size)
while True:
line = self.readline()
if not line: break
result.append(line)
return result
def _readnormal(self, size=None): def readnormal(self, size):
"""Read operation for regular files. """Read operation for regular files.
""" """
if self.closed: self.fileobj.seek(self.offset + self.position)
raise ValueError("file is closed") self.position += size
self.fileobj.seek(self.offset + self.pos) return self.fileobj.read(size)
bytesleft = self.size - self.pos
if size is None:
bytestoread = bytesleft
else:
bytestoread = min(size, bytesleft)
self.pos += bytestoread
return self.__read(bytestoread)
def _readsparse(self, size=None): def readsparse(self, size):
"""Read operation for sparse files. """Read operation for sparse files.
""" """
if self.closed:
raise ValueError("file is closed")
if size is None:
size = self.size - self.pos
data = [] data = []
while size > 0: while size > 0:
buf = self._readsparsesection(size) buf = self.readsparsesection(size)
if not buf: if not buf:
break break
size -= len(buf) size -= len(buf)
data.append(buf) data.append(buf)
return "".join(data) return "".join(data)
def _readsparsesection(self, size): def readsparsesection(self, size):
"""Read a single section of a sparse file. """Read a single section of a sparse file.
""" """
section = self.sparse.find(self.pos) section = self.sparse.find(self.position)
if section is None: if section is None:
return "" return ""
toread = min(size, section.offset + section.size - self.pos) size = min(size, section.offset + section.size - self.position)
if isinstance(section, _data): if isinstance(section, _data):
realpos = section.realpos + self.pos - section.offset realpos = section.realpos + self.position - section.offset
self.pos += toread
self.fileobj.seek(self.offset + realpos) self.fileobj.seek(self.offset + realpos)
return self.__read(toread) self.position += size
return self.fileobj.read(size)
else: else:
self.pos += toread self.position += size
return NUL * toread return NUL * size
#class _FileInFile
class ExFileObject(object):
"""File-like object for reading an archive member.
Is returned by TarFile.extractfile().
"""
blocksize = 1024
def __init__(self, tarfile, tarinfo):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
getattr(tarinfo, "sparse", None))
self.name = tarinfo.name
self.mode = "r"
self.closed = False
self.size = tarinfo.size
self.position = 0
self.buffer = ""
def read(self, size=None):
"""Read at most size bytes from the file. If size is not
present or None, read all data until EOF is reached.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
buf = ""
if self.buffer:
if size is None:
buf = self.buffer
self.buffer = ""
else:
buf = self.buffer[:size]
self.buffer = self.buffer[size:]
if size is None:
buf += self.fileobj.read()
else:
buf += self.fileobj.read(size - len(buf))
self.position += len(buf)
return buf
def readline(self, size=-1):
"""Read one entire line from the file. If size is present
and non-negative, return a string with at most that
size, which may be an incomplete line.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if "\n" in self.buffer:
pos = self.buffer.find("\n") + 1
else:
buffers = [self.buffer]
while True:
buf = self.fileobj.read(self.blocksize)
buffers.append(buf)
if not buf or "\n" in buf:
self.buffer = "".join(buffers)
pos = self.buffer.find("\n") + 1
if pos == 0:
# no newline found.
pos = len(self.buffer)
break
if size != -1:
pos = min(size, pos)
buf = self.buffer[:pos]
self.buffer = self.buffer[pos:]
self.position += len(buf)
return buf
def readlines(self):
"""Return a list with all remaining lines.
"""
result = []
while True:
line = self.readline()
if not line: break
result.append(line)
return result
def tell(self): def tell(self):
"""Return the current file position. """Return the current file position.
""" """
return self.pos if self.closed:
raise ValueError("I/O operation on closed file")
return self.position
def seek(self, pos, whence=0): def seek(self, pos, whence=os.SEEK_SET):
"""Seek to a position in the file. """Seek to a position in the file.
""" """
self.linebuffer = "" if self.closed:
if whence == 0: raise ValueError("I/O operation on closed file")
self.pos = min(max(pos, 0), self.size)
if whence == 1: if whence == os.SEEK_SET:
self.position = min(max(pos, 0), self.size)
elif whence == os.SEEK_CUR:
if pos < 0: if pos < 0:
self.pos = max(self.pos + pos, 0) self.position = max(self.position + pos, 0)
else: else:
self.pos = min(self.pos + pos, self.size) self.position = min(self.position + pos, self.size)
if whence == 2: elif whence == os.SEEK_END:
self.pos = max(min(self.size + pos, self.size), 0) self.position = max(min(self.size + pos, self.size), 0)
else:
raise ValueError("Invalid argument")
self.buffer = ""
self.fileobj.seek(self.position)
def close(self): def close(self):
"""Close the file object. """Close the file object.
...@@ -769,20 +823,13 @@ class ExFileObject(object): ...@@ -769,20 +823,13 @@ class ExFileObject(object):
self.closed = True self.closed = True
def __iter__(self): def __iter__(self):
"""Get an iterator over the file object. """Get an iterator over the file's lines.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
return self
def next(self):
"""Get the next item from the file iterator.
""" """
result = self.readline() while True:
if not result: line = self.readline()
raise StopIteration if not line:
return result break
yield line
#class ExFileObject #class ExFileObject
#------------------ #------------------
......
...@@ -110,7 +110,7 @@ class ReadTest(BaseTest): ...@@ -110,7 +110,7 @@ class ReadTest(BaseTest):
"""Test seek() method of _FileObject, incl. random reading. """Test seek() method of _FileObject, incl. random reading.
""" """
if self.sep != "|": if self.sep != "|":
filename = "0-REGTYPE" filename = "0-REGTYPE-TEXT"
self.tar.extract(filename, dirname()) self.tar.extract(filename, dirname())
f = open(os.path.join(dirname(), filename), "rb") f = open(os.path.join(dirname(), filename), "rb")
data = f.read() data = f.read()
...@@ -149,6 +149,16 @@ class ReadTest(BaseTest): ...@@ -149,6 +149,16 @@ class ReadTest(BaseTest):
s2 = fobj.readlines() s2 = fobj.readlines()
self.assert_(s1 == s2, self.assert_(s1 == s2,
"readlines() after seek failed") "readlines() after seek failed")
fobj.seek(0)
self.assert_(len(fobj.readline()) == fobj.tell(),
"tell() after readline() failed")
fobj.seek(512)
self.assert_(len(fobj.readline()) + 512 == fobj.tell(),
"tell() after seek() and readline() failed")
fobj.seek(0)
line = fobj.readline()
self.assert_(fobj.read() == data[len(line):],
"read() after readline() failed")
fobj.close() fobj.close()
def test_old_dirtype(self): def test_old_dirtype(self):
......
...@@ -103,6 +103,9 @@ Core and builtins ...@@ -103,6 +103,9 @@ Core and builtins
Library Library
------- -------
- Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
work correctly together with readline().
- Patch #1484695: The tarfile module now raises a HeaderError exception - Patch #1484695: The tarfile module now raises a HeaderError exception
if a buffer given to frombuf() is invalid. if a buffer given to frombuf() is invalid.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment