Commit f1748bb8 authored by Sam Gross's avatar Sam Gross Committed by GitHub

[0.29] Use atomic reference counting in MemoryView in more cases (GH-4912) (GH-4915)

This fixes a few issues in MemoryView_C.c to allow atomic reference
counting to be used in more cases.

 - Enable GNU atomics for `__GNUC__` >= 5. Previously, GCC 5.0, 6.0, X.0
   versions used lock-based reference counting due to an incorrect preprocessor check.

 - Typo in `__GNUC_PATCHLEVEL__` macro (missing underscores)

 - Enable atomics in MSVC and fix returned values. InterlockedExchangeAdd
   returns the *initial* value (like __sync_fetch_and_add).
   InterlockedIncrement returned the *resulting* value (post increment),
   which would have been incorrect if MSVC atomics had been enabled.

Also avoids allocating a lock in MemoryView when atomics are available,
which additionally fixes a thread-safety issue in the "nogil" CPython fork.

* Use _InterlockedExchangeAdd intrinsic

The InterlockedExchangeSubtract function isn't available in older
versions of MSVC, while InterlockedExchangeAdd is available since
Windows XP.

The intrinsic variant (with the underscore prefix) avoids needing to
include the entire Windows.h header.

* Only use MSVC atomics when compiling for the "nogil" CPython fork
  to prevent potential breakage of existing Windows setups.
parent d5835270
......@@ -23,6 +23,7 @@ cdef extern from "<string.h>":
void *memset(void *b, int c, size_t len)
cdef extern from *:
bint CYTHON_ATOMICS
int __Pyx_GetBuffer(object, Py_buffer *, int) except -1
void __Pyx_ReleaseBuffer(Py_buffer *)
......@@ -351,6 +352,7 @@ cdef class memoryview(object):
(<__pyx_buffer *> &self.view).obj = Py_None
Py_INCREF(Py_None)
if not CYTHON_ATOMICS:
global __pyx_memoryview_thread_locks_used
if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED:
self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]
......
......@@ -26,36 +26,29 @@ typedef struct {
#endif
#define __pyx_atomic_int_type int
// todo: Portland pgcc, maybe OS X's OSAtomicIncrement32,
// libatomic + autotools-like distutils support? Such a pain...
#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 || \
(__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) && \
!defined(__i386__)
#if CYTHON_ATOMICS && (__GNUC__ >= 5 || (__GNUC__ == 4 && \
(__GNUC_MINOR__ > 1 || \
(__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 2))))
/* gcc >= 4.1.2 */
#define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1)
#define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1)
#define __pyx_atomic_incr_aligned(value) __sync_fetch_and_add(value, 1)
#define __pyx_atomic_decr_aligned(value) __sync_fetch_and_sub(value, 1)
#ifdef __PYX_DEBUG_ATOMICS
#warning "Using GNU atomics"
#endif
#elif CYTHON_ATOMICS && defined(_MSC_VER) && 0
#elif CYTHON_ATOMICS && defined(_MSC_VER) && CYTHON_COMPILING_IN_NOGIL
/* msvc */
#include <Windows.h>
#include <intrin.h>
#undef __pyx_atomic_int_type
#define __pyx_atomic_int_type LONG
#define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value)
#define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value)
#define __pyx_atomic_int_type long
#pragma intrinsic (_InterlockedExchangeAdd)
#define __pyx_atomic_incr_aligned(value) _InterlockedExchangeAdd(value, 1)
#define __pyx_atomic_decr_aligned(value) _InterlockedExchangeAdd(value, -1)
#ifdef __PYX_DEBUG_ATOMICS
#pragma message ("Using MSVC atomics")
#endif
#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0
#define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value)
#define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value)
#ifdef __PYX_DEBUG_ATOMICS
#warning "Using Intel atomics"
#endif
#else
#undef CYTHON_ATOMICS
#define CYTHON_ATOMICS 0
......@@ -69,9 +62,9 @@ typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
#if CYTHON_ATOMICS
#define __pyx_add_acquisition_count(memview) \
__pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
__pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview))
#define __pyx_sub_acquisition_count(memview) \
__pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
__pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview))
#else
#define __pyx_add_acquisition_count(memview) \
__pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
......
......@@ -24,8 +24,8 @@ def foo(dtype_t[:] a, dtype_t_out[:, :] b):
_WARNINGS = """
20:10: 'cpdef_method' redeclared
31:10: 'cpdef_cname_method' redeclared
446:72: Argument evaluation order in C function call is undefined and may not be as expected
446:72: Argument evaluation order in C function call is undefined and may not be as expected
749:34: Argument evaluation order in C function call is undefined and may not be as expected
749:34: Argument evaluation order in C function call is undefined and may not be as expected
448:72: Argument evaluation order in C function call is undefined and may not be as expected
448:72: Argument evaluation order in C function call is undefined and may not be as expected
751:34: Argument evaluation order in C function call is undefined and may not be as expected
751:34: Argument evaluation order in C function call is undefined and may not be as expected
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment