Commit 788fb733 authored by Stefan Behnel's avatar Stefan Behnel

Optimise internal bounds checks like "0 <= i <= limit" using a single unsigned comparison.

See https://bugs.python.org/issue28397
parent d5aef036
...@@ -857,7 +857,7 @@ if (unlikely(__pyx_memoryview_slice_memviewslice( ...@@ -857,7 +857,7 @@ if (unlikely(__pyx_memoryview_slice_memviewslice(
if ({{wraparound}} && (__pyx_tmp_idx < 0)) if ({{wraparound}} && (__pyx_tmp_idx < 0))
__pyx_tmp_idx += __pyx_tmp_shape; __pyx_tmp_idx += __pyx_tmp_shape;
if ({{boundscheck}} && (__pyx_tmp_idx < 0 || __pyx_tmp_idx >= __pyx_tmp_shape)) { if ({{boundscheck}} && !__Pyx_is_valid_index(__pyx_tmp_idx, __pyx_tmp_shape)) {
{{if not have_gil}} {{if not have_gil}}
#ifdef WITH_THREAD #ifdef WITH_THREAD
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();
......
...@@ -398,7 +398,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ss ...@@ -398,7 +398,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_{{type}}_Fast(PyObject *o, Py_ss
if (wraparound & unlikely(i < 0)) { if (wraparound & unlikely(i < 0)) {
wrapped_i += Py{{type}}_GET_SIZE(o); wrapped_i += Py{{type}}_GET_SIZE(o);
} }
if ((!boundscheck) || likely((0 <= wrapped_i) & (wrapped_i < Py{{type}}_GET_SIZE(o)))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, Py{{type}}_GET_SIZE(o)))) {
PyObject *r = Py{{type}}_GET_ITEM(o, wrapped_i); PyObject *r = Py{{type}}_GET_ITEM(o, wrapped_i);
Py_INCREF(r); Py_INCREF(r);
return r; return r;
...@@ -416,7 +416,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, ...@@ -416,7 +416,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
if (is_list || PyList_CheckExact(o)) { if (is_list || PyList_CheckExact(o)) {
Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
PyObject *r = PyList_GET_ITEM(o, n); PyObject *r = PyList_GET_ITEM(o, n);
Py_INCREF(r); Py_INCREF(r);
return r; return r;
...@@ -424,7 +424,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, ...@@ -424,7 +424,7 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
} }
else if (PyTuple_CheckExact(o)) { else if (PyTuple_CheckExact(o)) {
Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
PyObject *r = PyTuple_GET_ITEM(o, n); PyObject *r = PyTuple_GET_ITEM(o, n);
Py_INCREF(r); Py_INCREF(r);
return r; return r;
...@@ -482,7 +482,7 @@ static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObje ...@@ -482,7 +482,7 @@ static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObje
#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
if (is_list || PyList_CheckExact(o)) { if (is_list || PyList_CheckExact(o)) {
Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o));
if ((!boundscheck) || likely((n >= 0) & (n < PyList_GET_SIZE(o)))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) {
PyObject* old = PyList_GET_ITEM(o, n); PyObject* old = PyList_GET_ITEM(o, n);
Py_INCREF(v); Py_INCREF(v);
PyList_SET_ITEM(o, n, v); PyList_SET_ITEM(o, n, v);
......
...@@ -165,7 +165,7 @@ static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t ...@@ -165,7 +165,7 @@ static PyObject* __Pyx__PyList_PopIndex(PyObject* L, PyObject* py_ix, Py_ssize_t
if (cix < 0) { if (cix < 0) {
cix += size; cix += size;
} }
if (likely(0 <= cix && cix < size)) { if (likely(__Pyx_is_valid_index(cix, size))) {
PyObject* v = PyList_GET_ITEM(L, cix); PyObject* v = PyList_GET_ITEM(L, cix);
Py_SIZE(L) -= 1; Py_SIZE(L) -= 1;
size -= 1; size -= 1;
......
...@@ -331,7 +331,7 @@ static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ss ...@@ -331,7 +331,7 @@ static CYTHON_INLINE int __Pyx_GetItemInt_ByteArray_Fast(PyObject* string, Py_ss
if (wraparound | boundscheck) { if (wraparound | boundscheck) {
length = PyByteArray_GET_SIZE(string); length = PyByteArray_GET_SIZE(string);
if (wraparound & unlikely(i < 0)) i += length; if (wraparound & unlikely(i < 0)) i += length;
if ((!boundscheck) || likely((0 <= i) & (i < length))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
return (unsigned char) (PyByteArray_AS_STRING(string)[i]); return (unsigned char) (PyByteArray_AS_STRING(string)[i]);
} else { } else {
PyErr_SetString(PyExc_IndexError, "bytearray index out of range"); PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
...@@ -361,7 +361,7 @@ static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ss ...@@ -361,7 +361,7 @@ static CYTHON_INLINE int __Pyx_SetItemInt_ByteArray_Fast(PyObject* string, Py_ss
if (wraparound | boundscheck) { if (wraparound | boundscheck) {
length = PyByteArray_GET_SIZE(string); length = PyByteArray_GET_SIZE(string);
if (wraparound & unlikely(i < 0)) i += length; if (wraparound & unlikely(i < 0)) i += length;
if ((!boundscheck) || likely((0 <= i) & (i < length))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
PyByteArray_AS_STRING(string)[i] = (char) v; PyByteArray_AS_STRING(string)[i] = (char) v;
return 0; return 0;
} else { } else {
...@@ -394,7 +394,7 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py ...@@ -394,7 +394,7 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py
if (wraparound | boundscheck) { if (wraparound | boundscheck) {
length = __Pyx_PyUnicode_GET_LENGTH(ustring); length = __Pyx_PyUnicode_GET_LENGTH(ustring);
if (wraparound & unlikely(i < 0)) i += length; if (wraparound & unlikely(i < 0)) i += length;
if ((!boundscheck) || likely((0 <= i) & (i < length))) { if ((!boundscheck) || likely(__Pyx_is_valid_index(i, length))) {
return __Pyx_PyUnicode_READ_CHAR(ustring, i); return __Pyx_PyUnicode_READ_CHAR(ustring, i);
} else { } else {
PyErr_SetString(PyExc_IndexError, "string index out of range"); PyErr_SetString(PyExc_IndexError, "string index out of range");
...@@ -752,15 +752,15 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i ...@@ -752,15 +752,15 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i
/////////////// bytes_index /////////////// /////////////// bytes_index ///////////////
static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) { static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
if (index < 0)
index += PyBytes_GET_SIZE(bytes);
if (check_bounds) { if (check_bounds) {
Py_ssize_t size = PyBytes_GET_SIZE(bytes); Py_ssize_t size = PyBytes_GET_SIZE(bytes);
if (unlikely(index >= size) | ((index < 0) & unlikely(index < -size))) { if (unlikely(!__Pyx_is_valid_index(index, size))) {
PyErr_SetString(PyExc_IndexError, "string index out of range"); PyErr_SetString(PyExc_IndexError, "string index out of range");
return (char) -1; return (char) -1;
} }
} }
if (index < 0)
index += PyBytes_GET_SIZE(bytes);
return PyBytes_AS_STRING(bytes)[index]; return PyBytes_AS_STRING(bytes)[index];
} }
...@@ -990,7 +990,7 @@ static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyO ...@@ -990,7 +990,7 @@ static CYTHON_INLINE int __Pyx_PyByteArray_AppendObject(PyObject* bytearray, PyO
{ {
// CPython calls PyNumber_Index() internally // CPython calls PyNumber_Index() internally
ival = __Pyx_PyIndex_AsSsize_t(value); ival = __Pyx_PyIndex_AsSsize_t(value);
if (unlikely((ival < 0) | (ival > 255))) { if (unlikely(!__Pyx_is_valid_index(ival, 256))) {
if (ival == -1 && PyErr_Occurred()) if (ival == -1 && PyErr_Occurred())
return -1; return -1;
goto bad_range; goto bad_range;
...@@ -1012,7 +1012,7 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value ...@@ -1012,7 +1012,7 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value
static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value) { static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value) {
PyObject *pyval, *retval; PyObject *pyval, *retval;
#if CYTHON_COMPILING_IN_CPYTHON #if CYTHON_COMPILING_IN_CPYTHON
if (likely((value >= 0) & (value <= 255))) { if (likely(__Pyx_is_valid_index(value, 256))) {
Py_ssize_t n = Py_SIZE(bytearray); Py_ssize_t n = Py_SIZE(bytearray);
if (likely(n != PY_SSIZE_T_MAX)) { if (likely(n != PY_SSIZE_T_MAX)) {
if (unlikely(PyByteArray_Resize(bytearray, n + 1) < 0)) if (unlikely(PyByteArray_Resize(bytearray, n + 1) < 0))
......
...@@ -16,6 +16,14 @@ ...@@ -16,6 +16,14 @@
(is_signed || likely(v < (type)PY_SSIZE_T_MAX || \ (is_signed || likely(v < (type)PY_SSIZE_T_MAX || \
v == (type)PY_SSIZE_T_MAX))) ) v == (type)PY_SSIZE_T_MAX))) )
static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
// Optimisation from Section 14.2 "Bounds Checking" in
// https://www.agner.org/optimize/optimizing_cpp.pdf
// See https://bugs.python.org/issue28397
// The cast to unsigned effectively tests for "0 <= i < limit".
return (size_t) i < (size_t) limit;
}
// fast and unsafe abs(Py_ssize_t) that ignores the overflow for (-PY_SSIZE_T_MAX-1) // fast and unsafe abs(Py_ssize_t) that ignores the overflow for (-PY_SSIZE_T_MAX-1)
#if defined (__cplusplus) && __cplusplus >= 201103L #if defined (__cplusplus) && __cplusplus >= 201103L
#include <cstdlib> #include <cstdlib>
...@@ -529,18 +537,23 @@ static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject*); ...@@ -529,18 +537,23 @@ static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject*);
/////////////// ObjectAsUCS4 /////////////// /////////////// ObjectAsUCS4 ///////////////
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject* x) { static Py_UCS4 __Pyx__PyObject_AsPy_UCS4_raise_error(long ival) {
long ival; if (ival < 0) {
ival = __Pyx_PyInt_As_long(x);
if (unlikely(ival < 0)) {
if (!PyErr_Occurred()) if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"cannot convert negative value to Py_UCS4"); "cannot convert negative value to Py_UCS4");
return (Py_UCS4)-1; } else {
} else if (unlikely(ival > 1114111)) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UCS4"); "value too large to convert to Py_UCS4");
}
return (Py_UCS4)-1; return (Py_UCS4)-1;
}
static Py_UCS4 __Pyx__PyObject_AsPy_UCS4(PyObject* x) {
long ival;
ival = __Pyx_PyInt_As_long(x);
if (unlikely(!__Pyx_is_valid_index(ival, 1114111 + 1))) {
return __Pyx__PyObject_AsPy_UCS4_raise_error(ival);
} }
return (Py_UCS4)ival; return (Py_UCS4)ival;
} }
...@@ -582,14 +595,16 @@ static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) { ...@@ -582,14 +595,16 @@ static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
#endif #endif
ival = __Pyx_PyInt_As_long(x); ival = __Pyx_PyInt_As_long(x);
} }
if (unlikely(ival < 0)) { if (unlikely(!__Pyx_is_valid_index(ival, maxval + 1))) {
if (ival < 0) {
if (!PyErr_Occurred()) if (!PyErr_Occurred())
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"cannot convert negative value to Py_UNICODE"); "cannot convert negative value to Py_UNICODE");
return (Py_UNICODE)-1; return (Py_UNICODE)-1;
} else if (unlikely(ival > maxval)) { } else {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"value too large to convert to Py_UNICODE"); "value too large to convert to Py_UNICODE");
}
return (Py_UNICODE)-1; return (Py_UNICODE)-1;
} }
return (Py_UNICODE)ival; return (Py_UNICODE)ival;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment