Commit 4ffd05d7 authored by animalize's avatar animalize Committed by Gregory P. Smith

bpo-21872: fix lzma library decompresses data incompletely (GH-14048)

* 1. add test case with wrong behavior
* 2. fix bug when max_length == -1
* 3. allow b"" as valid input data for decompress_buf()
* 4. when max_length >= 0, let needs_input mechanism works
* add more asserts to test case
parent 2f1b8575
This diff is collapsed.
Fix :mod:`lzma`: module decompresses data incompletely. When decompressing a
FORMAT_ALONE format file, and it doesn't have the end marker, sometimes the
last one to dozens bytes can't be output. Patch by Ma Lin.
...@@ -872,9 +872,6 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) ...@@ -872,9 +872,6 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
PyObject *result; PyObject *result;
lzma_stream *lzs = &d->lzs; lzma_stream *lzs = &d->lzs;
if (lzs->avail_in == 0)
return PyBytes_FromStringAndSize(NULL, 0);
if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
else else
...@@ -891,7 +888,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) ...@@ -891,7 +888,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
Py_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
lzret = lzma_code(lzs, LZMA_RUN); lzret = lzma_code(lzs, LZMA_RUN);
data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result); data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0)
lzret = LZMA_OK; /* That wasn't a real error */
Py_END_ALLOW_THREADS Py_END_ALLOW_THREADS
if (catch_lzma_error(lzret)) if (catch_lzma_error(lzret))
goto error; goto error;
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
...@@ -899,15 +899,19 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) ...@@ -899,15 +899,19 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
if (lzret == LZMA_STREAM_END) { if (lzret == LZMA_STREAM_END) {
d->eof = 1; d->eof = 1;
break; break;
} else if (lzs->avail_in == 0) {
break;
} else if (lzs->avail_out == 0) { } else if (lzs->avail_out == 0) {
/* Need to check lzs->avail_out before lzs->avail_in.
Maybe lzs's internal state still have a few bytes
can be output, grow the output buffer and continue
if max_lengh < 0. */
if (data_size == max_length) if (data_size == max_length)
break; break;
if (grow_buffer(&result, max_length) == -1) if (grow_buffer(&result, max_length) == -1)
goto error; goto error;
lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
lzs->avail_out = PyBytes_GET_SIZE(result) - data_size; lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
} else if (lzs->avail_in == 0) {
break;
} }
} }
if (data_size != PyBytes_GET_SIZE(result)) if (data_size != PyBytes_GET_SIZE(result))
...@@ -990,8 +994,20 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length) ...@@ -990,8 +994,20 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
} }
else if (lzs->avail_in == 0) { else if (lzs->avail_in == 0) {
lzs->next_in = NULL; lzs->next_in = NULL;
if (lzs->avail_out == 0) {
/* (avail_in==0 && avail_out==0)
Maybe lzs's internal state still have a few bytes can
be output, try to output them next time. */
d->needs_input = 0;
/* if max_length < 0, lzs->avail_out always > 0 */
assert(max_length >= 0);
} else {
/* Input buffer exhausted, output buffer has space. */
d->needs_input = 1; d->needs_input = 1;
} }
}
else { else {
d->needs_input = 0; d->needs_input = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment