Commit 252f4dc6 authored by Nadeem Vawda's avatar Nadeem Vawda

Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressobj().flush().

Additionally, fix a bug where a MemoryError in allocating a bytes object could
leave the decompressor object in an invalid state (with its unconsumed_tail
member being NULL).

Patch by Serhiy Storchaka.
parent 67f089f8
...@@ -429,15 +429,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): ...@@ -429,15 +429,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
def test_decompress_unused_data(self): def test_decompress_unused_data(self):
# Repeated calls to decompress() after EOF should accumulate data in # Repeated calls to decompress() after EOF should accumulate data in
# dco.unused_data, instead of just storing the arg to the last call. # dco.unused_data, instead of just storing the arg to the last call.
x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE source = b'abcdefghijklmnopqrstuvwxyz'
for step in 1, 2, 100: remainder = b'0123456789'
dco = zlib.decompressobj() y = zlib.compress(source)
data = b''.join(dco.decompress(x[i : i + step]) x = y + remainder
for i in range(0, len(x), step)) for maxlen in 0, 1000:
data += dco.flush() for step in 1, 2, len(y), len(x):
dco = zlib.decompressobj()
self.assertEqual(data, HAMLET_SCENE) data = b''
self.assertEqual(dco.unused_data, HAMLET_SCENE) for i in range(0, len(x), step):
if i < len(y):
self.assertEqual(dco.unused_data, b'')
if maxlen == 0:
data += dco.decompress(x[i : i + step])
self.assertEqual(dco.unconsumed_tail, b'')
else:
data += dco.decompress(
dco.unconsumed_tail + x[i : i + step], maxlen)
data += dco.flush()
self.assertEqual(data, source)
self.assertEqual(dco.unconsumed_tail, b'')
self.assertEqual(dco.unused_data, remainder)
if hasattr(zlib.decompressobj(), "copy"): if hasattr(zlib.decompressobj(), "copy"):
def test_decompresscopy(self): def test_decompresscopy(self):
......
...@@ -140,9 +140,11 @@ Core and Builtins ...@@ -140,9 +140,11 @@ Core and Builtins
Library Library
------- -------
- Issue #16350: zlib.Decompress.decompress() now accumulates data from - Issue #16350: zlib.decompressobj().decompress() now accumulates data from
successive calls after EOF in unused_data, instead of only saving the argument successive calls after EOF in unused_data, instead of only saving the argument
to the last call. Patch by Serhiy Storchaka. to the last call. decompressobj().flush() now correctly sets unused_data and
unconsumed_tail. A bug in the handling of MemoryError when setting the
unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
- Issue #12759: sre_parse now raises a proper error when the name of the group - Issue #12759: sre_parse now raises a proper error when the name of the group
is missing. Initial patch by Serhiy Storchaka. is missing. Initial patch by Serhiy Storchaka.
......
...@@ -467,6 +467,49 @@ PyZlib_objcompress(compobject *self, PyObject *args) ...@@ -467,6 +467,49 @@ PyZlib_objcompress(compobject *self, PyObject *args)
return RetVal; return RetVal;
} }
/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
self->unused_data or self->unconsumed_tail, as appropriate. */
static int
save_unconsumed_input(compobject *self, int err)
{
if (err == Z_STREAM_END) {
/* The end of the compressed data has been reached. Store the leftover
input data in self->unused_data. */
if (self->zst.avail_in > 0) {
Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
Py_ssize_t new_size;
PyObject *new_data;
if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
PyErr_NoMemory();
return -1;
}
new_size = old_size + self->zst.avail_in;
new_data = PyString_FromStringAndSize(NULL, new_size);
if (new_data == NULL)
return -1;
Py_MEMCPY(PyString_AS_STRING(new_data),
PyString_AS_STRING(self->unused_data), old_size);
Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
self->zst.next_in, self->zst.avail_in);
Py_DECREF(self->unused_data);
self->unused_data = new_data;
self->zst.avail_in = 0;
}
}
if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) {
/* This code handles two distinct cases:
1. Output limit was reached. Save leftover input in unconsumed_tail.
2. All input data was consumed. Clear unconsumed_tail. */
PyObject *new_data = PyString_FromStringAndSize(
(char *)self->zst.next_in, self->zst.avail_in);
if (new_data == NULL)
return -1;
Py_DECREF(self->unconsumed_tail);
self->unconsumed_tail = new_data;
}
return 0;
}
PyDoc_STRVAR(decomp_decompress__doc__, PyDoc_STRVAR(decomp_decompress__doc__,
"decompress(data, max_length) -- Return a string containing the decompressed\n" "decompress(data, max_length) -- Return a string containing the decompressed\n"
"version of the data.\n" "version of the data.\n"
...@@ -541,60 +584,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args) ...@@ -541,60 +584,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
} }
if(max_length) { if (save_unconsumed_input(self, err) < 0) {
/* Not all of the compressed data could be accommodated in a buffer of
the specified size. Return the unconsumed tail in an attribute. */
Py_DECREF(self->unconsumed_tail);
self->unconsumed_tail = PyString_FromStringAndSize((char *)self->zst.next_in,
self->zst.avail_in);
}
else if (PyString_GET_SIZE(self->unconsumed_tail) > 0) {
/* All of the compressed data was consumed. Clear unconsumed_tail. */
Py_DECREF(self->unconsumed_tail);
self->unconsumed_tail = PyString_FromStringAndSize("", 0);
}
if(!self->unconsumed_tail) {
Py_DECREF(RetVal); Py_DECREF(RetVal);
RetVal = NULL; RetVal = NULL;
goto error; goto error;
} }
/* The end of the compressed data has been reached, so set the /* This is the logical place to call inflateEnd, but the old behaviour of
unused_data attribute to a string containing the remainder of the only calling it on flush() is preserved. */
data in the string. Note that this is also a logical place to call
inflateEnd, but the old behaviour of only calling it on flush() is if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) {
preserved.
*/
if (err == Z_STREAM_END) {
if (self->zst.avail_in > 0) {
/* Append the leftover data to the existing value of unused_data. */
Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data);
Py_ssize_t new_size = old_size + self->zst.avail_in;
PyObject *new_data;
if (new_size <= old_size) { /* Check for overflow. */
PyErr_NoMemory();
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
new_data = PyString_FromStringAndSize(NULL, new_size);
if (new_data == NULL) {
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
Py_MEMCPY(PyString_AS_STRING(new_data),
PyString_AS_STRING(self->unused_data), old_size);
Py_MEMCPY(PyString_AS_STRING(new_data) + old_size,
self->zst.next_in, self->zst.avail_in);
Py_DECREF(self->unused_data);
self->unused_data = new_data;
}
/* We will only get Z_BUF_ERROR if the output buffer was full /* We will only get Z_BUF_ERROR if the output buffer was full
but there wasn't more output when we tried again, so it is but there wasn't more output when we tried again, so it is
not an error condition. not an error condition.
*/ */
} else if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while decompressing"); zlib_error(self->zst, err, "while decompressing");
Py_DECREF(RetVal); Py_DECREF(RetVal);
RetVal = NULL; RetVal = NULL;
...@@ -848,6 +851,12 @@ PyZlib_unflush(compobject *self, PyObject *args) ...@@ -848,6 +851,12 @@ PyZlib_unflush(compobject *self, PyObject *args)
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
} }
if (save_unconsumed_input(self, err) < 0) {
Py_DECREF(retval);
retval = NULL;
goto error;
}
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
various data structures. Note we should only get Z_STREAM_END when various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH */ flushmode is Z_FINISH */
...@@ -861,6 +870,7 @@ PyZlib_unflush(compobject *self, PyObject *args) ...@@ -861,6 +870,7 @@ PyZlib_unflush(compobject *self, PyObject *args)
goto error; goto error;
} }
} }
_PyString_Resize(&retval, self->zst.total_out - start_total_out); _PyString_Resize(&retval, self->zst.total_out - start_total_out);
error: error:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment