Commit 1c38546e authored by Nadeem Vawda's avatar Nadeem Vawda

Issue #12646: Add an 'eof' attribute to zlib.Decompress.

This will make it easier to detect truncated input streams.

Also, make zlib's error messages more consistent.
parent 74b6abf6
...@@ -152,7 +152,7 @@ Compression objects support the following methods: ...@@ -152,7 +152,7 @@ Compression objects support the following methods:
compress a set of data that share a common initial prefix. compress a set of data that share a common initial prefix.
Decompression objects support the following methods, and two attributes: Decompression objects support the following methods and attributes:
.. attribute:: Decompress.unused_data .. attribute:: Decompress.unused_data
...@@ -162,13 +162,6 @@ Decompression objects support the following methods, and two attributes: ...@@ -162,13 +162,6 @@ Decompression objects support the following methods, and two attributes:
available. If the whole bytestring turned out to contain compressed data, this is available. If the whole bytestring turned out to contain compressed data, this is
``b""``, an empty bytes object. ``b""``, an empty bytes object.
The only way to determine where a bytestring of compressed data ends is by actually
decompressing it. This means that when compressed data is contained part of a
larger file, you can only find the end of it by reading data and feeding it
followed by some non-empty bytestring into a decompression object's
:meth:`decompress` method until the :attr:`unused_data` attribute is no longer
empty.
.. attribute:: Decompress.unconsumed_tail .. attribute:: Decompress.unconsumed_tail
...@@ -179,6 +172,17 @@ Decompression objects support the following methods, and two attributes: ...@@ -179,6 +172,17 @@ Decompression objects support the following methods, and two attributes:
:meth:`decompress` method call in order to get correct output. :meth:`decompress` method call in order to get correct output.
.. attribute:: Decompress.eof
A boolean indicating whether the end of the compressed data stream has been
reached.
This makes it possible to distinguish between a properly-formed compressed
stream, and an incomplete or truncated one.
.. versionadded:: 3.3
.. method:: Decompress.decompress(data[, max_length]) .. method:: Decompress.decompress(data[, max_length])
Decompress *data*, returning a bytes object containing the uncompressed data Decompress *data*, returning a bytes object containing the uncompressed data
......
...@@ -447,6 +447,26 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): ...@@ -447,6 +447,26 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
y += dco.flush() y += dco.flush()
self.assertEqual(y, b'foo') self.assertEqual(y, b'foo')
def test_decompress_eof(self):
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo'
dco = zlib.decompressobj()
self.assertFalse(dco.eof)
dco.decompress(x[:-5])
self.assertFalse(dco.eof)
dco.decompress(x[-5:])
self.assertTrue(dco.eof)
dco.flush()
self.assertTrue(dco.eof)
def test_decompress_eof_incomplete_stream(self):
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo'
dco = zlib.decompressobj()
self.assertFalse(dco.eof)
dco.decompress(x[:-5])
self.assertFalse(dco.eof)
dco.flush()
self.assertFalse(dco.eof)
if hasattr(zlib.compressobj(), "copy"): if hasattr(zlib.compressobj(), "copy"):
def test_compresscopy(self): def test_compresscopy(self):
# Test copying a compression object # Test copying a compression object
......
...@@ -254,6 +254,9 @@ Core and Builtins ...@@ -254,6 +254,9 @@ Core and Builtins
Library Library
------- -------
- Issue #12646: Add an 'eof' attribute to zlib.Decompress, to make it easier to
detect truncated input streams.
- Issue #11513: Fix exception handling ``tarfile.TarFile.gzopen()`` when - Issue #11513: Fix exception handling ``tarfile.TarFile.gzopen()`` when
the file cannot be opened. the file cannot be opened.
......
...@@ -43,6 +43,7 @@ typedef struct ...@@ -43,6 +43,7 @@ typedef struct
z_stream zst; z_stream zst;
PyObject *unused_data; PyObject *unused_data;
PyObject *unconsumed_tail; PyObject *unconsumed_tail;
char eof;
int is_initialised; int is_initialised;
#ifdef WITH_THREAD #ifdef WITH_THREAD
PyThread_type_lock lock; PyThread_type_lock lock;
...@@ -89,6 +90,7 @@ newcompobject(PyTypeObject *type) ...@@ -89,6 +90,7 @@ newcompobject(PyTypeObject *type)
self = PyObject_New(compobject, type); self = PyObject_New(compobject, type);
if (self == NULL) if (self == NULL)
return NULL; return NULL;
self->eof = 0;
self->is_initialised = 0; self->is_initialised = 0;
self->unused_data = PyBytes_FromStringAndSize("", 0); self->unused_data = PyBytes_FromStringAndSize("", 0);
if (self->unused_data == NULL) { if (self->unused_data == NULL) {
...@@ -291,7 +293,7 @@ PyZlib_decompress(PyObject *self, PyObject *args) ...@@ -291,7 +293,7 @@ PyZlib_decompress(PyObject *self, PyObject *args)
err = inflateEnd(&zst); err = inflateEnd(&zst);
if (err != Z_OK) { if (err != Z_OK) {
zlib_error(zst, err, "while finishing data decompression"); zlib_error(zst, err, "while finishing decompression");
goto error; goto error;
} }
...@@ -476,7 +478,7 @@ PyZlib_objcompress(compobject *self, PyObject *args) ...@@ -476,7 +478,7 @@ PyZlib_objcompress(compobject *self, PyObject *args)
*/ */
if (err != Z_OK && err != Z_BUF_ERROR) { if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while compressing"); zlib_error(self->zst, err, "while compressing data");
Py_DECREF(RetVal); Py_DECREF(RetVal);
RetVal = NULL; RetVal = NULL;
goto error; goto error;
...@@ -611,12 +613,13 @@ PyZlib_objdecompress(compobject *self, PyObject *args) ...@@ -611,12 +613,13 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
Py_DECREF(RetVal); Py_DECREF(RetVal);
goto error; goto error;
} }
self->eof = 1;
/* We will only get Z_BUF_ERROR if the output buffer was full /* We will only get Z_BUF_ERROR if the output buffer was full
but there wasn't more output when we tried again, so it is but there wasn't more output when we tried again, so it is
not an error condition. not an error condition.
*/ */
} else if (err != Z_OK && err != Z_BUF_ERROR) { } else if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while decompressing"); zlib_error(self->zst, err, "while decompressing data");
Py_DECREF(RetVal); Py_DECREF(RetVal);
RetVal = NULL; RetVal = NULL;
goto error; goto error;
...@@ -697,7 +700,7 @@ PyZlib_flush(compobject *self, PyObject *args) ...@@ -697,7 +700,7 @@ PyZlib_flush(compobject *self, PyObject *args)
if (err == Z_STREAM_END && flushmode == Z_FINISH) { if (err == Z_STREAM_END && flushmode == Z_FINISH) {
err = deflateEnd(&(self->zst)); err = deflateEnd(&(self->zst));
if (err != Z_OK) { if (err != Z_OK) {
zlib_error(self->zst, err, "from deflateEnd()"); zlib_error(self->zst, err, "while finishing compression");
Py_DECREF(RetVal); Py_DECREF(RetVal);
RetVal = NULL; RetVal = NULL;
goto error; goto error;
...@@ -765,6 +768,7 @@ PyZlib_copy(compobject *self) ...@@ -765,6 +768,7 @@ PyZlib_copy(compobject *self)
Py_XDECREF(retval->unconsumed_tail); Py_XDECREF(retval->unconsumed_tail);
retval->unused_data = self->unused_data; retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail; retval->unconsumed_tail = self->unconsumed_tail;
retval->eof = self->eof;
/* Mark it as being initialized */ /* Mark it as being initialized */
retval->is_initialised = 1; retval->is_initialised = 1;
...@@ -816,6 +820,7 @@ PyZlib_uncopy(compobject *self) ...@@ -816,6 +820,7 @@ PyZlib_uncopy(compobject *self)
Py_XDECREF(retval->unconsumed_tail); Py_XDECREF(retval->unconsumed_tail);
retval->unused_data = self->unused_data; retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail; retval->unconsumed_tail = self->unconsumed_tail;
retval->eof = self->eof;
/* Mark it as being initialized */ /* Mark it as being initialized */
retval->is_initialised = 1; retval->is_initialised = 1;
...@@ -885,10 +890,11 @@ PyZlib_unflush(compobject *self, PyObject *args) ...@@ -885,10 +890,11 @@ PyZlib_unflush(compobject *self, PyObject *args)
various data structures. Note we should only get Z_STREAM_END when various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH */ flushmode is Z_FINISH */
if (err == Z_STREAM_END) { if (err == Z_STREAM_END) {
err = inflateEnd(&(self->zst)); self->eof = 1;
self->is_initialised = 0; self->is_initialised = 0;
err = inflateEnd(&(self->zst));
if (err != Z_OK) { if (err != Z_OK) {
zlib_error(self->zst, err, "from inflateEnd()"); zlib_error(self->zst, err, "while finishing decompression");
Py_DECREF(retval); Py_DECREF(retval);
retval = NULL; retval = NULL;
goto error; goto error;
...@@ -936,6 +942,7 @@ static PyMethodDef Decomp_methods[] = ...@@ -936,6 +942,7 @@ static PyMethodDef Decomp_methods[] =
static PyMemberDef Decomp_members[] = { static PyMemberDef Decomp_members[] = {
{"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY}, {"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY},
{"unconsumed_tail", T_OBJECT, COMP_OFF(unconsumed_tail), READONLY}, {"unconsumed_tail", T_OBJECT, COMP_OFF(unconsumed_tail), READONLY},
{"eof", T_BOOL, COMP_OFF(eof), READONLY},
{NULL}, {NULL},
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment