Commit 7b98f54a authored by Sam Sneddon's avatar Sam Sneddon Committed by GitHub

Avoid integer overflow when decoding bytes/charptr (GH-3535)

Fixes GH-3534.
parent 090809da
......@@ -518,9 +518,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
if (stop < 0)
stop += length;
}
length = stop - start;
if (unlikely(length <= 0))
if (unlikely(stop <= start))
return PyUnicode_FromUnicode(NULL, 0);
length = stop - start;
cstring += start;
if (decode_func) {
return decode_func(cstring, length, errors);
......@@ -554,9 +554,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
}
if (stop > length)
stop = length;
length = stop - start;
if (unlikely(length <= 0))
if (unlikely(stop <= start))
return PyUnicode_FromUnicode(NULL, 0);
length = stop - start;
cstring += start;
if (decode_func) {
return decode_func(cstring, length, errors);
......
cimport cython
cdef extern from *:
cdef Py_ssize_t PY_SSIZE_T_MIN
cdef Py_ssize_t PY_SSIZE_T_MAX
SSIZE_T_MAX = PY_SSIZE_T_MAX
SSIZE_T_MIN = PY_SSIZE_T_MIN
b_a = b'a'
b_b = b'b'
......@@ -114,6 +122,14 @@ def bytes_decode(bytes s, start=None, stop=None):
<BLANKLINE>
>>> print(bytes_decode(s, -300, -500))
<BLANKLINE>
>>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MIN))
<BLANKLINE>
>>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MAX))
abaab
>>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MIN))
<BLANKLINE>
>>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MAX))
<BLANKLINE>
>>> s[:'test'] # doctest: +ELLIPSIS
Traceback (most recent call last):
......
cimport cython
cdef extern from *:
cdef Py_ssize_t PY_SSIZE_T_MIN
cdef Py_ssize_t PY_SSIZE_T_MAX
############################################################
# tests for char* slicing
......@@ -118,6 +123,19 @@ def slice_charptr_dynamic_bounds_non_name():
(cstring+1)[:].decode('UTF-8'),
(cstring+1)[return1():return5()].decode('UTF-8'))
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
def slice_charptr_decode_large_bounds():
"""
>>> print(str(slice_charptr_decode_large_bounds()).replace("u'", "'"))
('abcABCqtp', '', '', '')
"""
return (cstring[PY_SSIZE_T_MIN:9].decode('UTF-8'),
cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MIN].decode('UTF-8'),
cstring[PY_SSIZE_T_MIN:PY_SSIZE_T_MIN].decode('UTF-8'),
cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MAX].decode('UTF-8'))
cdef return1(): return 1
cdef return3(): return 3
cdef return4(): return 4
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment