Commit fd2a6fab authored by Kirill Smelkov's avatar Kirill Smelkov

libgolang: Fix globals atexit race condition of ~refptr vs access from another thread

Until now we were defining errors like this:

    const error ErrSomething  = errors::New("abc");

However there is a problem here: the runtime will call ErrSomething
destructor on program exit, and for refptr<T> the destructor does 2
things: a) decrefs the object, and b) switches embedded pointer to NULL;

If another thread is still running both "a" and "b" can become race
conditions if that T2 uses ErrSomething via e.g. just

    ...
    if (err == ErrSomething)
        ...

Here is, for example TSAN report about ErrPyStopped usage when accessed by timer thread:

    WARNING: ThreadSanitizer: data race (pid=4224)
     Read of size 8 at 0x7f0d6840e150 by thread T92:
       #0 golang::refptr<golang::_error>::refptr(golang::refptr<golang::_error> const&) golang/libgolang.h:525 (liblibpyxruntime.so.0.1+0x4edc)
       #1 golang::pyx::runtime::PyFunc::operator()() const golang/runtime/libpyxruntime.cpp:222 (liblibpyxruntime.so.0.1+0x423d)
       #2 std::_Function_handler<void (), golang::pyx::runtime::PyFunc>::_M_invoke(std::_Any_data const&) /usr/include/c++/8/bits/std_function.h:297 (_time.so+0x1382b)
       #3 std::function<void ()>::operator()() const /usr/include/c++/8/bits/std_function.h:687 (liblibgolang.so.0.1+0x2ede6)
       #4 golang::time::_Timer::_fire(double, int) golang/time.cpp:197 (liblibgolang.so.0.1+0x48cbe)
       #5 operator() golang/time.cpp:171 (liblibgolang.so.0.1+0x4894c)
       #6 __invoke_impl<void, golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:60 (liblibgolang.so.0.1+0x4a142)
       #7 __invoke<golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:95 (liblibgolang.so.0.1+0x4a080)
       #8 __call<void, 0> /usr/include/c++/8/functional:400 (liblibgolang.so.0.1+0x49fcc)
       #9 operator()<> /usr/include/c++/8/functional:484 (liblibgolang.so.0.1+0x49cee)
       #10 _M_invoke /usr/include/c++/8/bits/std_function.h:297 (liblibgolang.so.0.1+0x49858)
       #11 std::function<void ()>::operator()() const /usr/include/c++/8/bits/std_function.h:687 (liblibgolang.so.0.1+0x2ede6)
       #12 operator() golang/libgolang.h:326 (liblibgolang.so.0.1+0x48fa1)
       #13 _FUN golang/libgolang.h:324 (liblibgolang.so.0.1+0x49004)
       #14 <null> <null> (python2+0x194d13)

     Previous write of size 8 at 0x7f0d6840e150 by main thread:
       #0 golang::refptr<golang::_error>::~refptr() golang/libgolang.h:510 (liblibpyxruntime.so.0.1+0x4d39)
       #1 at_exit_wrapper ../../../../src/libsanitizer/tsan/tsan_interceptors.cc:389 (libtsan.so.0+0x28693)

     As if synchronized via sleep:
       #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors.cc:366 (libtsan.so.0+0x49960)
       #1 __pyx_f_6golang_7runtime_15_runtime_thread_nanosleep golang/runtime/_runtime_thread.c:1702 (_runtime_thread.so+0x48f2)
       #2 _tasknanosleep golang/runtime/libgolang.cpp:1237 (liblibgolang.so.0.1+0x2cca7)
       #3 golang::time::sleep(double) golang/runtime/libgolang.cpp:1251 (liblibgolang.so.0.1+0x2ce0b)
       #4 golang::time::_Timer::_fire(double, int) golang/time.cpp:179 (liblibgolang.so.0.1+0x48bd6)
       #5 operator() golang/time.cpp:171 (liblibgolang.so.0.1+0x4894c)
       #6 __invoke_impl<void, golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:60 (liblibgolang.so.0.1+0x4a142)
       #7 __invoke<golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:95 (liblibgolang.so.0.1+0x4a080)
       #8 __call<void, 0> /usr/include/c++/8/functional:400 (liblibgolang.so.0.1+0x49fcc)
       #9 operator()<> /usr/include/c++/8/functional:484 (liblibgolang.so.0.1+0x49cee)
       #10 _M_invoke /usr/include/c++/8/bits/std_function.h:297 (liblibgolang.so.0.1+0x49858)
       #11 std::function<void ()>::operator()() const /usr/include/c++/8/bits/std_function.h:687 (liblibgolang.so.0.1+0x2ede6)
       #12 operator() golang/libgolang.h:326 (liblibgolang.so.0.1+0x48fa1)
       #13 _FUN golang/libgolang.h:324 (liblibgolang.so.0.1+0x49004)
       #14 <null> <null> (python2+0x194d13)

     Location is global 'golang::pyx::runtime::ErrPyStopped' of size 8 at 0x7f0d6840e150 (liblibpyxruntime.so.0.1+0x000000009150)

     Thread T92 (tid=7834, running) created by thread T15 at:
       #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors.cc:915 (libtsan.so.0+0x2be1b)
       #1 PyThread_start_new_thread <null> (python2+0x194ccf)
       #2 _taskgo golang/runtime/libgolang.cpp:124 (liblibgolang.so.0.1+0x288e7)
       #3 go<golang::time::_Timer::reset(double)::<lambda(int)>, int> golang/libgolang.h:324 (liblibgolang.so.0.1+0x490b7)
       #4 golang::time::_Timer::reset(double) golang/time.cpp:170 (liblibgolang.so.0.1+0x48b36)
       #5 __pyx_f_6golang_5_time_timer_reset_pyexc golang/_time.cpp:3271 (_time.so+0xa506)
       #6 __pyx_pf_6golang_5_time_7PyTimer_6reset golang/_time.cpp:2824 (_time.so+0x97d8)
       #7 __pyx_pw_6golang_5_time_7PyTimer_7reset golang/_time.cpp:2789 (_time.so+0x9751)
       #8 PyEval_EvalFrameEx <null> (python2+0xf27f7)
       #9 std::_Function_handler<void (), golang::pyx::runtime::PyFunc>::_M_invoke(std::_Any_data const&) /usr/include/c++/8/bits/std_function.h:297 (_time.so+0x1382b)
       #10 std::function<void ()>::operator()() const /usr/include/c++/8/bits/std_function.h:687 (liblibgolang.so.0.1+0x2ede6)
       #11 golang::time::_Timer::_fire(double, int) golang/time.cpp:197 (liblibgolang.so.0.1+0x48cbe)
       #12 operator() golang/time.cpp:171 (liblibgolang.so.0.1+0x4894c)
       #13 __invoke_impl<void, golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:60 (liblibgolang.so.0.1+0x4a142)
       #14 __invoke<golang::time::_Timer::reset(double)::<lambda(int)>&, int&> /usr/include/c++/8/bits/invoke.h:95 (liblibgolang.so.0.1+0x4a080)
       #15 __call<void, 0> /usr/include/c++/8/functional:400 (liblibgolang.so.0.1+0x49fcc)
       #16 operator()<> /usr/include/c++/8/functional:484 (liblibgolang.so.0.1+0x49cee)
       #17 _M_invoke /usr/include/c++/8/bits/std_function.h:297 (liblibgolang.so.0.1+0x49858)
       #18 std::function<void ()>::operator()() const /usr/include/c++/8/bits/std_function.h:687 (liblibgolang.so.0.1+0x2ede6)
       #19 operator() golang/libgolang.h:326 (liblibgolang.so.0.1+0x48fa1)
       #20 _FUN golang/libgolang.h:324 (liblibgolang.so.0.1+0x49004)
       #21 <null> <null> (python2+0x194d13)

    SUMMARY: ThreadSanitizer: data race golang/libgolang.h:525 in golang::refptr<golang::_error>::refptr(golang::refptr<golang::_error> const&)

-> Fix it by not deallocating the object, nor clearing the pointer on smart pointer destruction.

To do so in ergonomic way introduce another template type global<X>,
which can be used instead of X and change types of all global error
variables from `const error` to `const global<error>`. Don't change pxd
to offload pyx users from thinking about global/not-global aspect.

Top-level documentation is TODO.
parent 33cf3113
......@@ -226,6 +226,7 @@ cdef extern from * nogil:
extern void _test_select_inplace();
extern void _test_defer();
extern void _test_refptr();
extern void _test_global();
extern void _test_sync_once_cpp();
"""
void _test_chan_cpp_refcount() except +topyexc
......@@ -238,6 +239,7 @@ cdef extern from * nogil:
void _test_select_inplace() except +topyexc
void _test_defer() except +topyexc
void _test_refptr() except +topyexc
void _test_global() except +topyexc
void _test_sync_once_cpp() except +topyexc
def test_chan_cpp_refcount():
with nogil:
......@@ -269,6 +271,9 @@ def test_defer():
def test_refptr():
with nogil:
_test_refptr()
def test_global():
with nogil:
_test_global()
def test_sync_once_cpp(): # TODO move -> _sync_test.pyx
with nogil:
_test_sync_once_cpp()
......
......@@ -65,8 +65,8 @@ Context background() {
}
const error canceled = errors::New("context canceled");
const error deadlineExceeded = errors::New("deadline exceeded");
const global<error> canceled = errors::New("context canceled");
const global<error> deadlineExceeded = errors::New("deadline exceeded");
// _BaseCtx is the common base for Contexts implemented in this package.
......
......@@ -66,10 +66,10 @@ typedef refptr<_Context> Context;
LIBGOLANG_API Context background();
// canceled is the error returned by Context.err when context is canceled.
extern LIBGOLANG_API const error canceled;
extern LIBGOLANG_API const global<error> canceled;
// deadlineExceeded is the error returned by Context.err when time goes past context's deadline.
extern LIBGOLANG_API const error deadlineExceeded;
extern LIBGOLANG_API const global<error> deadlineExceeded;
// with_cancel creates new context that can be canceled on its own.
//
......
......@@ -481,6 +481,7 @@ private:
template<typename T> class refptr;
template<typename T> refptr<T> adoptref(T *_obj);
template<typename T> refptr<T> newref (T *_obj);
template<typename R> class global;
// refptr<T> is smart pointer to T which manages T lifetime via reference counting.
//
......@@ -492,6 +493,9 @@ template<typename T>
class refptr {
T *_obj;
typedef T obj_type;
friend global<refptr<T>>;
public:
// nil if not explicitly initialized
inline refptr() { _obj = NULL; }
......@@ -564,6 +568,77 @@ public:
inline T *_ptr() const { return _obj; }
};
// global<refptr<T>> is like refptr<T> but does not deallocate the object on pointer destruction.
//
// The sole reason for global to exist is to avoid race condition on program exit
// when global refptr<T> pointer is destructed, but another thread that
// continues to run, accesses the pointer.
//
// global<X> should be interoperable where X is used, for example:
//
// const global<error> ErrSomething = errors::New("abc")
// error err = ...;
// if (err == ErrSomething)
// ...
template<typename R>
class global {
// implementation note:
// - don't use base parent for refptr and global with common functionality
// in parent, because e.g. copy-constructors cannot-be inherited.
// - don't use _refptr<T, bool global> template not to make compiler error
// messages longer for common refptr<T> case.
//
// -> just mimic refptr<T> functionality with a bit of duplication.
typedef typename R::obj_type T;
T *_obj;
public:
// global does not release reference to its object, nor clears ._obj
inline ~global() {}
// cast global<refptr<T>> -> to refptr<T>
operator refptr<T> () const {
return newref<T>(_obj);
}
// nil if not explicitly initialized
inline global() { _obj = NULL; }
// init from refptr<T>
inline global(const refptr<T>& from) {
_obj = from._obj;
if (_obj != NULL)
_obj->incref();
}
// = nil
inline global(nullptr_t) { _obj = NULL; }
inline global& operator=(nullptr_t) {
if (_obj != NULL)
_obj->decref();
_obj = NULL;
return *this;
}
// copy - no need due to refptr<T> cast
// move - no need due to refptr<T> cast
// compare wrt nil
inline bool operator==(nullptr_t) const { return (_obj == NULL); }
inline bool operator!=(nullptr_t) const { return (_obj != NULL); }
// compare wrt refptr
inline bool operator==(const refptr<T>& p2) const { return (_obj == p2._obj); }
inline bool operator!=(const refptr<T>& p2) const { return (_obj != p2._obj); }
// dereference, so that e.g. p->method() automatically works as p._obj->method().
inline T* operator-> () const { return _obj; }
inline T& operator* () const { return *_obj; }
// access to raw pointer
inline T *_ptr() const { return _obj; }
};
// adoptref wraps raw T* pointer into refptr<T> and transfers object ownership to it.
//
// The object is assumed to have reference 1 initially.
......
......@@ -43,7 +43,7 @@ namespace pyx {
namespace runtime {
// ErrPyStopped indicates that Python interpreter is stopped.
extern LIBPYXRUNTIME_API const error ErrPyStopped;
extern LIBPYXRUNTIME_API const global<error> ErrPyStopped;
// PyError wraps Python exception into error.
// PyError can be used from nogil code.
......
......@@ -588,6 +588,99 @@ void _test_refptr() {
// p goes out of scope and destroys obj
}
void _test_global() {
global<refptr<MyObj>> g;
ASSERT(g == NULL);
ASSERT(!(g != NULL));
ASSERT(g._ptr() == NULL);
MyObj *obj = new MyObj();
refptr<MyObj> p = adoptref(obj);
ASSERT(obj->refcnt() == 1);
obj->i = 3;
ASSERT(g._ptr() == NULL);
ASSERT(p._ptr() == obj);
ASSERT(!(g == p));
ASSERT(!(p == g));
ASSERT(g != p);
ASSERT(p != g);
// copy = global <- refptr
g = p;
ASSERT(obj->refcnt() == 2);
ASSERT(g._ptr() == obj);
ASSERT(p._ptr() == obj);
ASSERT(!(g == NULL));
ASSERT(g != NULL);
ASSERT(g == p);
ASSERT(p == g);
ASSERT(!(g != p));
ASSERT(!(p != g));
ASSERT(g->i == 3); // ->
g->i = 4;
ASSERT(obj->i == 4);
ASSERT((*g).i == 4); // *
(*g).i = 3;
ASSERT(obj->i == 3);
// global = nil - obj reference is released
ASSERT(obj->refcnt() == 2);
g = NULL;
ASSERT(obj->refcnt() == 1);
ASSERT(g._ptr() == NULL);
// copy ctor global <- refptr
{
global<refptr<MyObj>> h(p);
ASSERT(obj->refcnt() == 2);
ASSERT(g._ptr() == NULL);
ASSERT(h._ptr() == obj);
ASSERT(p._ptr() == obj);
ASSERT(!(h == g));
ASSERT(!(g == h));
ASSERT(h == p);
ASSERT(p == h);
ASSERT(h != g);
ASSERT(g != h);
ASSERT(!(h != p));
ASSERT(!(p != h));
// h goes out of scope, but obj reference is _not_ released
}
ASSERT(obj->refcnt() == 2); // NOTE _not_ 1
// reinit g again
g = p;
ASSERT(obj->refcnt() == 3);
ASSERT(g._ptr() == obj);
ASSERT(p._ptr() == obj);
// copy ctor refptr <- global
{
refptr<MyObj> q(g);
ASSERT(obj->refcnt() == 4);
ASSERT(g._ptr() == obj);
ASSERT(p._ptr() == obj);
ASSERT(q._ptr() == obj);
// q goes out of scope - obj decref'ed
}
ASSERT(obj->refcnt() == 3);
// copy = refptr <- global
{
refptr<MyObj> q;
q = g;
ASSERT(obj->refcnt() == 4);
ASSERT(g._ptr() == obj);
ASSERT(p._ptr() == obj);
ASSERT(q._ptr() == obj);
// q goes out of scope - obj decref'ed
}
ASSERT(obj->refcnt() == 3);
}
// ---- sync:: ----
......
......@@ -86,7 +86,7 @@ static tuple<PyGILState_STATE, bool> pygil_ensure() {
}
// errors
const error ErrPyStopped = errors::New("Python interpreter is stopped");
const global<error> ErrPyStopped = errors::New("Python interpreter is stopped");
error PyErr_Fetch() {
PyObject *pyexc_type, *pyexc_value, *pyexc_tb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment