Commit e99e9776 authored by Martin Panter's avatar Martin Panter

Issue #25626: Change zlib to accept Py_ssize_t and cap to UINT_MAX

The underlying zlib library stores sizes in “unsigned int”. The corresponding
Python parameters are all sizes of buffers filled in by zlib, so it is okay
to reduce higher values to the UINT_MAX internal cap. OverflowError is still
raised for sizes that do not fit in Py_ssize_t.

Sizes are now limited to Py_ssize_t rather than unsigned long, because Python
byte strings cannot be larger than Py_ssize_t. Previously this could result
in a SystemError on 32-bit platforms.

This resolves a regression in the gzip module when reading more than UINT_MAX
or LONG_MAX bytes in one call, introduced by revision 62723172412c.
parent d13cade3
...@@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c`` ...@@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c``
/*[python input] /*[python input]
class uint_converter(CConverter): class capped_uint_converter(CConverter):
type = 'unsigned int' type = 'unsigned int'
converter = 'uint_converter' converter = 'capped_uint_converter'
[python start generated code]*/ [python start generated code]*/
/*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
This block adds a converter to Argument Clinic named ``uint``. Parameters This block adds a converter to Argument Clinic named ``capped_uint``. Parameters
declared as ``uint`` will be declared as type ``unsigned int``, and will declared as ``capped_uint`` will be declared as type ``unsigned int``, and will
be parsed by the ``'O&'`` format unit, which will call the ``uint_converter`` be parsed by the ``'O&'`` format unit, which will call the
converter function. ``capped_uint_converter`` converter function. ``capped_uint`` variables
``uint`` variables automatically support default values. automatically support default values.
More sophisticated custom converters can insert custom C code to More sophisticated custom converters can insert custom C code to
handle initialization and cleanup. handle initialization and cleanup.
......
...@@ -1608,12 +1608,15 @@ class _MemoryWatchdog: ...@@ -1608,12 +1608,15 @@ class _MemoryWatchdog:
def bigmemtest(size, memuse, dry_run=True): def bigmemtest(size, memuse, dry_run=True):
"""Decorator for bigmem tests. """Decorator for bigmem tests.
'minsize' is the minimum useful size for the test (in arbitrary, 'size' is a requested size for the test (in arbitrary, test-interpreted
test-interpreted units.) 'memuse' is the number of 'bytes per size' for units.) 'memuse' is the number of bytes per unit for the test, or a good
the test, or a good estimate of it. estimate of it. For example, a test that needs two byte buffers, of 4 GiB
each, could be decorated with @bigmemtest(size=_4G, memuse=2).
if 'dry_run' is False, it means the test doesn't support dummy runs
when -M is not specified. The 'size' argument is normally passed to the decorated test method as an
extra argument. If 'dry_run' is true, the value passed to the test method
may be less than the requested value. If 'dry_run' is false, it means the
test doesn't support dummy runs when -M is not specified.
""" """
def decorator(f): def decorator(f):
def wrapper(self): def wrapper(self):
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import unittest import unittest
from test import support from test import support
from test.support import bigmemtest, _4G
import os import os
import io import io
import struct import struct
...@@ -116,6 +117,14 @@ class TestGzip(BaseTest): ...@@ -116,6 +117,14 @@ class TestGzip(BaseTest):
self.assertEqual(f.tell(), nread) self.assertEqual(f.tell(), nread)
self.assertEqual(b''.join(blocks), data1 * 50) self.assertEqual(b''.join(blocks), data1 * 50)
@bigmemtest(size=_4G, memuse=1)
def test_read_large(self, size):
# Read chunk size over UINT_MAX should be supported, despite zlib's
# limitation per low-level call
compressed = gzip.compress(data1, compresslevel=1)
f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb')
self.assertEqual(f.read(size), data1)
def test_io_on_closed_object(self): def test_io_on_closed_object(self):
# Test that I/O operations on closed GzipFile objects raise a # Test that I/O operations on closed GzipFile objects raise a
# ValueError, just like the corresponding functions on file objects. # ValueError, just like the corresponding functions on file objects.
......
...@@ -122,11 +122,17 @@ class ExceptionTestCase(unittest.TestCase): ...@@ -122,11 +122,17 @@ class ExceptionTestCase(unittest.TestCase):
self.assertRaises(ValueError, zlib.decompressobj().flush, 0) self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
self.assertRaises(ValueError, zlib.decompressobj().flush, -1) self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
@support.cpython_only
def test_overflow(self):
with self.assertRaisesRegex(OverflowError, 'int too large'):
zlib.decompress(b'', 15, sys.maxsize + 1)
with self.assertRaisesRegex(OverflowError, 'int too large'):
zlib.decompressobj().flush(sys.maxsize + 1)
class BaseCompressTestCase(object): class BaseCompressTestCase(object):
def check_big_compress_buffer(self, size, compress_func): def check_big_compress_buffer(self, size, compress_func):
_1M = 1024 * 1024 _1M = 1024 * 1024
fmt = "%%0%dx" % (2 * _1M)
# Generate 10MB worth of random, and expand it by repeating it. # Generate 10MB worth of random, and expand it by repeating it.
# The assumption is that zlib's memory is not big enough to exploit # The assumption is that zlib's memory is not big enough to exploit
# such spread out redundancy. # such spread out redundancy.
...@@ -196,6 +202,18 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): ...@@ -196,6 +202,18 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase):
finally: finally:
data = None data = None
@bigmemtest(size=_4G, memuse=1)
def test_large_bufsize(self, size):
# Test decompress(bufsize) parameter greater than the internal limit
data = HAMLET_SCENE * 10
compressed = zlib.compress(data, 1)
self.assertEqual(zlib.decompress(compressed, 15, size), data)
def test_custom_bufsize(self):
data = HAMLET_SCENE * 10
compressed = zlib.compress(data, 1)
self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data)
class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
# Test compression object # Test compression object
...@@ -364,6 +382,21 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): ...@@ -364,6 +382,21 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
self.assertRaises(ValueError, dco.decompress, b"", -1) self.assertRaises(ValueError, dco.decompress, b"", -1)
self.assertEqual(b'', dco.unconsumed_tail) self.assertEqual(b'', dco.unconsumed_tail)
def test_maxlen_large(self):
# Sizes up to sys.maxsize should be accepted, although zlib is
# internally limited to expressing sizes with unsigned int
data = HAMLET_SCENE * 10
self.assertGreater(len(data), zlib.DEF_BUF_SIZE)
compressed = zlib.compress(data, 1)
dco = zlib.decompressobj()
self.assertEqual(dco.decompress(compressed, sys.maxsize), data)
def test_maxlen_custom(self):
data = HAMLET_SCENE * 10
compressed = zlib.compress(data, 1)
dco = zlib.decompressobj()
self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100])
def test_clear_unconsumed_tail(self): def test_clear_unconsumed_tail(self):
# Issue #12050: calling decompress() without providing max_length # Issue #12050: calling decompress() without providing max_length
# should clear the unconsumed_tail attribute. # should clear the unconsumed_tail attribute.
...@@ -537,6 +570,22 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): ...@@ -537,6 +570,22 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
data = zlib.compress(input2) data = zlib.compress(input2)
self.assertEqual(dco.flush(), input1[1:]) self.assertEqual(dco.flush(), input1[1:])
@bigmemtest(size=_4G, memuse=1)
def test_flush_large_length(self, size):
# Test flush(length) parameter greater than internal limit UINT_MAX
input = HAMLET_SCENE * 10
data = zlib.compress(input, 1)
dco = zlib.decompressobj()
dco.decompress(data, 1)
self.assertEqual(dco.flush(size), input[1:])
def test_flush_custom_length(self):
input = HAMLET_SCENE * 10
data = zlib.compress(input, 1)
dco = zlib.decompressobj()
dco.decompress(data, 1)
self.assertEqual(dco.flush(CustomInt()), input[1:])
@requires_Compress_copy @requires_Compress_copy
def test_compresscopy(self): def test_compresscopy(self):
# Test copying a compression object # Test copying a compression object
...@@ -725,5 +774,10 @@ LAERTES ...@@ -725,5 +774,10 @@ LAERTES
""" """
class CustomInt:
def __int__(self):
return 100
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -77,6 +77,13 @@ Core and Builtins ...@@ -77,6 +77,13 @@ Core and Builtins
Library Library
------- -------
- Issue #25626: Change three zlib functions to accept sizes that fit in
Py_ssize_t, but internally cap those sizes to UINT_MAX. This resolves a
regression in 3.5 where GzipFile.read() failed to read chunks larger than 2
or 4 GiB. The change affects the zlib.Decompress.decompress() max_length
parameter, the zlib.decompress() bufsize parameter, and the
zlib.Decompress.flush() length parameter.
- Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True) - Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True)
when the OS gives priority to errors such as EACCES over EEXIST. when the OS gives priority to errors such as EACCES over EEXIST.
......
...@@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args) ...@@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args)
unsigned int bufsize = DEF_BUF_SIZE; unsigned int bufsize = DEF_BUF_SIZE;
if (!PyArg_ParseTuple(args, "y*|iO&:decompress", if (!PyArg_ParseTuple(args, "y*|iO&:decompress",
&data, &wbits, uint_converter, &bufsize)) &data, &wbits, capped_uint_converter, &bufsize))
goto exit; goto exit;
return_value = zlib_decompress_impl(module, &data, wbits, bufsize); return_value = zlib_decompress_impl(module, &data, wbits, bufsize);
...@@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args) ...@@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args)
unsigned int max_length = 0; unsigned int max_length = 0;
if (!PyArg_ParseTuple(args, "y*|O&:decompress", if (!PyArg_ParseTuple(args, "y*|O&:decompress",
&data, uint_converter, &max_length)) &data, capped_uint_converter, &max_length))
goto exit; goto exit;
return_value = zlib_Decompress_decompress_impl(self, &data, max_length); return_value = zlib_Decompress_decompress_impl(self, &data, max_length);
...@@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args) ...@@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args)
unsigned int length = DEF_BUF_SIZE; unsigned int length = DEF_BUF_SIZE;
if (!PyArg_ParseTuple(args, "|O&:flush", if (!PyArg_ParseTuple(args, "|O&:flush",
uint_converter, &length)) capped_uint_converter, &length))
goto exit; goto exit;
return_value = zlib_Decompress_flush_impl(self, length); return_value = zlib_Decompress_flush_impl(self, length);
...@@ -438,4 +438,4 @@ exit: ...@@ -438,4 +438,4 @@ exit:
#ifndef ZLIB_COMPRESS_COPY_METHODDEF #ifndef ZLIB_COMPRESS_COPY_METHODDEF
#define ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF
#endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/ /*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/
...@@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level) ...@@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level)
/*[python input] /*[python input]
class uint_converter(CConverter): class capped_uint_converter(CConverter):
type = 'unsigned int' type = 'unsigned int'
converter = 'uint_converter' converter = 'capped_uint_converter'
c_ignored_default = "0" c_ignored_default = "0"
[python start generated code]*/ [python start generated code]*/
/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/ /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/
static int static int
uint_converter(PyObject *obj, void *ptr) capped_uint_converter(PyObject *obj, void *ptr)
{ {
long val; PyObject *long_obj;
unsigned long uval; Py_ssize_t val;
val = PyLong_AsLong(obj); long_obj = (PyObject *)_PyLong_FromNbInt(obj);
if (long_obj == NULL) {
return 0;
}
val = PyLong_AsSsize_t(long_obj);
Py_DECREF(long_obj);
if (val == -1 && PyErr_Occurred()) { if (val == -1 && PyErr_Occurred()) {
uval = PyLong_AsUnsignedLong(obj);
if (uval == (unsigned long)-1 && PyErr_Occurred())
return 0; return 0;
} }
else {
if (val < 0) { if (val < 0) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"value must be positive"); "value must be positive");
return 0; return 0;
} }
uval = (unsigned long)val;
}
if (uval > UINT_MAX) { if ((size_t)val > UINT_MAX) {
PyErr_SetString(PyExc_OverflowError, *(unsigned int *)ptr = UINT_MAX;
"Python int too large for C unsigned int"); }
return 0; else {
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t,
unsigned int);
} }
*(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int);
return 1; return 1;
} }
...@@ -272,7 +272,7 @@ zlib.decompress ...@@ -272,7 +272,7 @@ zlib.decompress
Compressed data. Compressed data.
wbits: int(c_default="MAX_WBITS") = MAX_WBITS wbits: int(c_default="MAX_WBITS") = MAX_WBITS
The window buffer size. The window buffer size.
bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE
The initial output buffer size. The initial output buffer size.
/ /
...@@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data. ...@@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data.
static PyObject * static PyObject *
zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits,
unsigned int bufsize) unsigned int bufsize)
/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/ /*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/
{ {
PyObject *result_str = NULL; PyObject *result_str = NULL;
Byte *input; Byte *input;
...@@ -691,7 +691,7 @@ zlib.Decompress.decompress ...@@ -691,7 +691,7 @@ zlib.Decompress.decompress
data: Py_buffer data: Py_buffer
The binary data to decompress. The binary data to decompress.
max_length: uint = 0 max_length: capped_uint = 0
The maximum allowable length of the decompressed data. The maximum allowable length of the decompressed data.
Unconsumed input data will be stored in Unconsumed input data will be stored in
the unconsumed_tail attribute. the unconsumed_tail attribute.
...@@ -707,7 +707,7 @@ Call the flush() method to clear these buffers. ...@@ -707,7 +707,7 @@ Call the flush() method to clear these buffers.
static PyObject * static PyObject *
zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data,
unsigned int max_length) unsigned int max_length)
/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/ /*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/
{ {
int err; int err;
unsigned int old_length, length = DEF_BUF_SIZE; unsigned int old_length, length = DEF_BUF_SIZE;
...@@ -1048,7 +1048,7 @@ error: ...@@ -1048,7 +1048,7 @@ error:
/*[clinic input] /*[clinic input]
zlib.Decompress.flush zlib.Decompress.flush
length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE
the initial size of the output buffer. the initial size of the output buffer.
/ /
...@@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data. ...@@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data.
static PyObject * static PyObject *
zlib_Decompress_flush_impl(compobject *self, unsigned int length) zlib_Decompress_flush_impl(compobject *self, unsigned int length)
/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/ /*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/
{ {
int err; int err;
unsigned int new_length; unsigned int new_length;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment