Commit b2e6142c authored by Jason Madden's avatar Jason Madden

Fix an occasional crash in monkey patched 3.6/test_socket.py

First the crash report:

```
  testInterruptedRecvmsgTimeout (__main__.InterruptedRecvTimeoutTest) ... Traceback (most recent call last):
    File "/home/travis/build/gevent/gevent/src/gevent/_ffi/loop.py", line 83, in python_callback
      def python_callback(self, handle, revents):
    File "/home/travis/build/gevent/gevent/src/greentest/3.6/test_socket.py", line 3698, in <lambda>
      lambda signum, frame: 1 / 0)
  ZeroDivisionError: division by zero
  Fri Dec 15 16:09:36 2017
  ok
  testInterruptedSendTimeout (__main__.InterruptedSendTimeoutTest) ... Fatal Python error: ffi.from_handle() detected that the address passed points to garbage. If it is really the result of ffi.new_handle(), then the Python object has already been garbage collected

  Thread 0x00002b26d4c01700 (most recent call first):
    File "/home/travis/build/gevent/gevent/src/gevent/_threading.py", line 152 in wait
    File "/home/travis/build/gevent/gevent/src/gevent/_threading.py", line 436 in get
    File "/home/travis/build/gevent/gevent/src/gevent/threadpool.py", line 200 in _worker

  Thread 0x00002b26d4a00700 (most recent call first):
    File "/home/travis/build/gevent/gevent/src/gevent/_threading.py", line 152 in wait
    File "/home/travis/build/gevent/gevent/src/gevent/_threading.py", line 436 in get
    File "/home/travis/build/gevent/gevent/src/gevent/threadpool.py", line 200 in _worker

  Current thread 0x00002b26cc7b2a00 (most recent call first):
    File "/home/travis/build/gevent/gevent/src/gevent/_ffi/loop.py", line 178 in python_stop
    File "/home/travis/build/gevent/gevent/src/gevent/libuv/loop.py", line 289 in run
    File "/home/travis/build/gevent/gevent/src/gevent/hub.py", line 688 in run
```

The main problem turned out to be the way in which libuv reports
signals at random times, much worse than the way libev does. This led
to callback functions unexpectedly returning 0, because that's the
default for an unhandled exception or an onerror handler that returns
None. That in turn led to calls to python_stop that we weren't
expecting using a handle that had already been deallocated.

The fix is to both be defensive about what handles we try to use, and
to be more explicit about our error returns.
parent a0bda58b
...@@ -29,6 +29,12 @@ ...@@ -29,6 +29,12 @@
# useless-suppression: the only way to avoid repeating it for specific statements everywhere that we # useless-suppression: the only way to avoid repeating it for specific statements everywhere that we
# do Py2/Py3 stuff is to put it here. Sadly this means that we might get better but not realize it. # do Py2/Py3 stuff is to put it here. Sadly this means that we might get better but not realize it.
# duplicate-code: Yeah, the compatibility ssl modules are much the same # duplicate-code: Yeah, the compatibility ssl modules are much the same
# In pylint 1.8.0, inconsistent-return-statements are created for the wrong reasons.
# This code raises it, even though there's only one return (the implicit 'return None' is presumably
# what triggers it):
# def foo():
# if baz:
# return 1
disable=wrong-import-position, disable=wrong-import-position,
wrong-import-order, wrong-import-order,
missing-docstring, missing-docstring,
...@@ -44,9 +50,10 @@ disable=wrong-import-position, ...@@ -44,9 +50,10 @@ disable=wrong-import-position,
cyclic-import, cyclic-import,
too-many-arguments, too-many-arguments,
redefined-builtin, redefined-builtin,
useless-suppression, useless-suppression,
duplicate-code, duplicate-code,
undefined-all-variable undefined-all-variable,
inconsistent-return-statements
[FORMAT] [FORMAT]
......
...@@ -2,7 +2,7 @@ setuptools ...@@ -2,7 +2,7 @@ setuptools
wheel wheel
cython>=0.27.3 cython>=0.27.3
greenlet>=0.4.10 greenlet>=0.4.10
pylint>=1.7.1 pylint>=1.8.0
prospector[with_pyroma] prospector[with_pyroma]
coverage>=4.0 coverage>=4.0
coveralls>=1.0 coveralls>=1.0
......
...@@ -17,8 +17,17 @@ def _pid_dbg(*args, **kwargs): ...@@ -17,8 +17,17 @@ def _pid_dbg(*args, **kwargs):
print(os.getpid(), *args, **kwargs) print(os.getpid(), *args, **kwargs)
GEVENT_DEBUG = 0 GEVENT_DEBUG = 0
CRITICAL = 1
ERROR = 3
DEBUG = 5
TRACE = 9 TRACE = 9
if os.getenv('GEVENT_DEBUG') == 'trace': if os.getenv("GEVENT_DEBUG") == 'critical':
GEVENT_DEBUG = CRITICAL
elif os.getenv("GEVENT_DEBUG") == 'error':
GEVENT_DEBUG = ERROR
elif os.getenv('GEVENT_DEBUG') == 'debug':
GEVENT_DEBUG = DEBUG
elif os.getenv('GEVENT_DEBUG') == 'trace':
_dbg = _pid_dbg _dbg = _pid_dbg
GEVENT_DEBUG = TRACE GEVENT_DEBUG = TRACE
...@@ -11,6 +11,7 @@ from weakref import ref as WeakRef ...@@ -11,6 +11,7 @@ from weakref import ref as WeakRef
from gevent._ffi import _dbg from gevent._ffi import _dbg
from gevent._ffi import GEVENT_DEBUG from gevent._ffi import GEVENT_DEBUG
from gevent._ffi import TRACE from gevent._ffi import TRACE
from gevent._ffi import CRITICAL
from gevent._ffi.callback import callback from gevent._ffi.callback import callback
__all__ = [ __all__ = [
...@@ -62,23 +63,18 @@ EVENTS = GEVENT_CORE_EVENTS = _EVENTSType() ...@@ -62,23 +63,18 @@ EVENTS = GEVENT_CORE_EVENTS = _EVENTSType()
#### ####
class _Callbacks(object): class _Callbacks(object):
if GEVENT_DEBUG >= TRACE:
def __init__(self, ffi):
self.ffi = ffi
self.callbacks = []
def from_handle(self, handle): # pylint:disable=method-hidden def __init__(self, ffi):
_dbg("Getting from handle", handle) self.ffi = ffi
traceback.print_stack() self.callbacks = []
x = self.ffi.from_handle(handle) if GEVENT_DEBUG < TRACE:
_dbg("Got from handle", handle, x) self.from_handle = ffi.from_handle
return x
else: def from_handle(self, handle): # pylint:disable=method-hidden
def __init__(self, ffi): _dbg("Getting from handle", handle)
self.ffi = ffi x = self.ffi.from_handle(handle)
self.callbacks = [] _dbg("Got from handle", handle, x)
self.from_handle = self.ffi.from_handle return x
def python_callback(self, handle, revents): def python_callback(self, handle, revents):
""" """
...@@ -88,21 +84,33 @@ class _Callbacks(object): ...@@ -88,21 +84,33 @@ class _Callbacks(object):
An exception occurred during the callback and you must call An exception occurred during the callback and you must call
:func:`_python_handle_error` to deal with it. The Python watcher :func:`_python_handle_error` to deal with it. The Python watcher
object will have the exception tuple saved in ``_exc_info``. object will have the exception tuple saved in ``_exc_info``.
- 0
Everything went according to plan. You should check to see if the libev
watcher is still active, and call :func:`_python_stop` if so. This will
clean up the memory.
- 1 - 1
Everything went according to plan. You should check to see if the libev
watcher is still active, and call :func:`python_stop` if it is not. This will
clean up the memory. Finding the watcher still active at the event loop level,
but not having stopped itself at the gevent level is a buggy scenario and
shouldn't happen.
- 2
Everything went according to plan, but the watcher has already Everything went according to plan, but the watcher has already
been stopped. Its memory may no longer be valid. been stopped. Its memory may no longer be valid.
This function should never return 0, as that's the default value that
Python exceptions will produce.
""" """
orig_ffi_watcher = None orig_ffi_watcher = None
try: try:
# Even dereferencing the handle needs to be inside the try/except; # Even dereferencing the handle needs to be inside the try/except;
# if we don't return normally (e.g., a signal) then we wind up going # if we don't return normally (e.g., a signal) then we wind up going
# to the 'onerror' handler, which # to the 'onerror' handler (unhandled_onerror), which
# is not what we want; that can permanently wedge the loop depending # is not what we want; that can permanently wedge the loop depending
# on which callback was executing # on which callback was executing.
# XXX: See comments in that function. We may be able to restart and do better?
if not handle:
# Hmm, a NULL handle. That's not supposed to happen.
# We can easily get into a loop if we deref it and allow that
# to raise.
_dbg("python_callback got null handle")
return 1
the_watcher = self.from_handle(handle) the_watcher = self.from_handle(handle)
orig_ffi_watcher = the_watcher._watcher orig_ffi_watcher = the_watcher._watcher
args = the_watcher.args args = the_watcher.args
...@@ -114,8 +122,9 @@ class _Callbacks(object): ...@@ -114,8 +122,9 @@ class _Callbacks(object):
args = (revents, ) + args[1:] args = (revents, ) + args[1:]
the_watcher.callback(*args) the_watcher.callback(*args)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
_dbg("Got exception servicing watcher with handle", handle)
# It's possible for ``the_watcher`` to be undefined (UnboundLocalError) # It's possible for ``the_watcher`` to be undefined (UnboundLocalError)
# if we threw an exception on the line that created that variable. # if we threw an exception (signal) on the line that created that variable.
# This is typically the case with a signal under libuv # This is typically the case with a signal under libuv
try: try:
the_watcher the_watcher
...@@ -132,11 +141,17 @@ class _Callbacks(object): ...@@ -132,11 +141,17 @@ class _Callbacks(object):
# It didn't stop itself, *and* it didn't stop itself, reset # It didn't stop itself, *and* it didn't stop itself, reset
# its watcher, and start itself again. libuv's io watchers MAY # its watcher, and start itself again. libuv's io watchers MAY
# do that. # do that.
_dbg("The watcher has not stopped itself", the_watcher) # The normal, expected scenario when we find the watcher still
return 0 # in the keepaliveset is that it is still active at the event loop
return 1 # It stopped itself # level, so we don't expect that python_stop gets called.
_dbg("The watcher has not stopped itself, possibly still active", the_watcher)
return 1
return 2 # it stopped itself
def python_handle_error(self, handle, _revents): def python_handle_error(self, handle, _revents):
_dbg("Handling error for handle", handle)
if not handle:
return
try: try:
watcher = self.from_handle(handle) watcher = self.from_handle(handle)
exc_info = watcher._exc_info exc_info = watcher._exc_info
...@@ -167,14 +182,44 @@ class _Callbacks(object): ...@@ -167,14 +182,44 @@ class _Callbacks(object):
def unhandled_onerror(self, t, v, tb): def unhandled_onerror(self, t, v, tb):
# This is supposed to be called for signals, etc. # This is supposed to be called for signals, etc.
# This is the onerror= value for CFFI.
# If we return None, C will get a value of 0/NULL;
# if we raise, CFFI will print the exception and then
# return 0/NULL; (unless error= was configured)
# If things go as planned, we return the value that asks
# C to call back and check on if the watcher needs to be closed or
# not.
# XXX: TODO: Could this cause events to be lost? Maybe we need to return
# a value that causes the C loop to try the callback again?
# at least for signals under libuv, which are delivered at very odd times.
# Hopefully the event still shows up when we poll the next time.
watcher = None
if tb is not None: if tb is not None:
handle = tb.tb_frame.f_locals['handle'] handle = tb.tb_frame.f_locals['handle']
watcher = self.from_handle(handle) if handle: # handle could be NULL
watcher = self.from_handle(handle)
if watcher is not None:
watcher.loop._check_callback_handle_error(t, v, tb) watcher.loop._check_callback_handle_error(t, v, tb)
return 1
else: else:
raise v raise v
def python_stop(self, handle): def python_stop(self, handle):
if not handle:
print(
"WARNING: gevent: Unable to dereference handle; not stopping watcher. "
"Native resources may leak. This is most likely a bug in gevent.",
file=sys.stderr)
# The alternative is to crash with no helpful information
# NOTE: Raising exceptions here does nothing, they're swallowed by CFFI.
# Since the C level passed in a null pointer, even dereferencing the handle
# will just produce some exceptions.
if GEVENT_DEBUG < CRITICAL:
return
import pdb; pdb.set_trace()
_dbg("python_stop: stopping watcher with handle", handle)
watcher = self.from_handle(handle) watcher = self.from_handle(handle)
watcher.stop() watcher.stop()
...@@ -512,7 +557,7 @@ class AbstractLoop(object): ...@@ -512,7 +557,7 @@ class AbstractLoop(object):
def fork(self, ref=True, priority=None): def fork(self, ref=True, priority=None):
return self._watchers.fork(self, ref, priority) return self._watchers.fork(self, ref, priority)
def async(self, ref=True, priority=None): def async(self, ref=True, priority=None): # XXX: Yeah, we know. pylint:disable=assign-to-new-keyword
return self._watchers.async(self, ref, priority) return self._watchers.async(self, ref, priority)
if sys.platform != "win32": if sys.platform != "win32":
......
...@@ -30,7 +30,7 @@ static void _gevent_generic_callback(struct ev_loop* loop, ...@@ -30,7 +30,7 @@ static void _gevent_generic_callback(struct ev_loop* loop,
// and allowing memory to be freed // and allowing memory to be freed
python_handle_error(handle, revents); python_handle_error(handle, revents);
break; break;
case 0: case 1:
// Code to stop the event. Note that if python_callback // Code to stop the event. Note that if python_callback
// has disposed of the last reference to the handle, // has disposed of the last reference to the handle,
// `watcher` could now be invalid/disposed memory! // `watcher` could now be invalid/disposed memory!
...@@ -38,8 +38,16 @@ static void _gevent_generic_callback(struct ev_loop* loop, ...@@ -38,8 +38,16 @@ static void _gevent_generic_callback(struct ev_loop* loop,
python_stop(handle); python_stop(handle);
} }
break; break;
default: case 2:
assert(cb_result == 1);
// watcher is already stopped and dead, nothing to do. // watcher is already stopped and dead, nothing to do.
break;
default:
fprintf(stderr,
"WARNING: gevent: Unexpected return value %d from Python callback "
"for watcher %p and handle %d\n",
cb_result,
watcher, handle);
// XXX: Possible leaking of resources here? Should we be
// closing the watcher?
} }
} }
...@@ -11,7 +11,9 @@ static void (*gevent_noop)(void* handle) = &_gevent_noop; ...@@ -11,7 +11,9 @@ static void (*gevent_noop)(void* handle) = &_gevent_noop;
static void _gevent_generic_callback1(void* vwatcher, int arg) static void _gevent_generic_callback1(void* vwatcher, int arg)
{ {
const uv_handle_t* watcher = (uv_handle_t*)vwatcher; const uv_handle_t* watcher = (uv_handle_t*)vwatcher;
const void* handle = watcher->data; // Python code may set this to NULL. // Python code may set this to NULL or even change it
// out from under us, which would tend to break things.
void* handle = watcher->data;
const int cb_result = python_callback(handle, arg); const int cb_result = python_callback(handle, arg);
switch(cb_result) { switch(cb_result) {
case -1: case -1:
...@@ -20,62 +22,89 @@ static void _gevent_generic_callback1(void* vwatcher, int arg) ...@@ -20,62 +22,89 @@ static void _gevent_generic_callback1(void* vwatcher, int arg)
// and allowing memory to be freed // and allowing memory to be freed
python_handle_error(handle, arg); python_handle_error(handle, arg);
break; break;
case 0: case 1:
// Code to stop the event. Note that if python_callback // Code to stop the event IF NEEDED. Note that if python_callback
// has disposed of the last reference to the handle, // has disposed of the last reference to the handle,
// `watcher` could now be invalid/disposed memory! // `watcher` could now be invalid/disposed memory!
if (!uv_is_active(watcher)) { if (!uv_is_active(watcher)) {
python_stop(handle); if (watcher->data != handle) {
if (watcher->data) {
// If Python set the data to NULL, then they
// expected to be stopped. That's fine.
// Otherwise, something weird happened.
fprintf(stderr,
"WARNING: gevent: watcher handle changed in callback "
"from %p to %p for watcher at %p of type %d\n",
handle, watcher->data, watcher, watcher->type);
// There's a very good chance that the object the
// handle referred to has been changed and/or the
// old handle has been deallocated (most common), so
// passing the old handle will crash. Instead we
// pass a sigil to let python distinguish this case.
python_stop(NULL);
}
}
else {
python_stop(handle);
}
} }
break; break;
default: case 2:
assert(cb_result == 1);
// watcher is already stopped and dead, nothing to do. // watcher is already stopped and dead, nothing to do.
break;
default:
fprintf(stderr,
"WARNING: gevent: Unexpected return value %d from Python callback "
"for watcher %p (of type %d) and handle %p\n",
cb_result,
watcher, watcher->type, handle);
// XXX: Possible leaking of resources here? Should we be
// closing the watcher?
} }
} }
static void _gevent_generic_callback0(void* handle) static void _gevent_generic_callback0(void* handle)
{ {
_gevent_generic_callback1(handle, 0); _gevent_generic_callback1(handle, 0);
} }
static void _gevent_poll_callback2(void* handle, int status, int events) static void _gevent_poll_callback2(void* handle, int status, int events)
{ {
_gevent_generic_callback1(handle, status < 0 ? status : events); _gevent_generic_callback1(handle, status < 0 ? status : events);
} }
static void _gevent_fs_event_callback3(void* handle, const char* filename, int events, int status) static void _gevent_fs_event_callback3(void* handle, const char* filename, int events, int status)
{ {
_gevent_generic_callback1(handle, status < 0 ? status : events); _gevent_generic_callback1(handle, status < 0 ? status : events);
} }
typedef struct _gevent_fs_poll_s { typedef struct _gevent_fs_poll_s {
uv_fs_poll_t handle; uv_fs_poll_t handle;
uv_stat_t curr; uv_stat_t curr;
uv_stat_t prev; uv_stat_t prev;
} gevent_fs_poll_t; } gevent_fs_poll_t;
static void _gevent_fs_poll_callback3(void* handlep, int status, const uv_stat_t* prev, const uv_stat_t* curr) static void _gevent_fs_poll_callback3(void* handlep, int status, const uv_stat_t* prev, const uv_stat_t* curr)
{ {
// stat pointers are valid for this callback only. // stat pointers are valid for this callback only.
// if given, copy them into our structure, where they can be reached // if given, copy them into our structure, where they can be reached
// from python, just like libev's watcher does, before calling // from python, just like libev's watcher does, before calling
// the callback. // the callback.
// The callback is invoked with status < 0 if path does not exist // The callback is invoked with status < 0 if path does not exist
// or is inaccessible. The watcher is not stopped but your // or is inaccessible. The watcher is not stopped but your
// callback is not called again until something changes (e.g. when // callback is not called again until something changes (e.g. when
// the file is created or the error reason changes). // the file is created or the error reason changes).
// In that case the fields will be 0 in curr/prev. // In that case the fields will be 0 in curr/prev.
gevent_fs_poll_t* handle = (gevent_fs_poll_t*)handlep; gevent_fs_poll_t* handle = (gevent_fs_poll_t*)handlep;
assert(status == 0); assert(status == 0);
handle->curr = *curr; handle->curr = *curr;
handle->prev = *prev; handle->prev = *prev;
_gevent_generic_callback1((uv_handle_t*)handle, 0); _gevent_generic_callback1((uv_handle_t*)handle, 0);
} }
...@@ -372,7 +372,7 @@ class io(_base.IoMixin, watcher): ...@@ -372,7 +372,7 @@ class io(_base.IoMixin, watcher):
if w is not None and w.callback is not None: if w is not None and w.callback is not None:
# There's still a reference to it, and it's started, # There's still a reference to it, and it's started,
# so we can't stop. # so we can't stop.
continue return
# If we get here, nothing was started # If we get here, nothing was started
# so we can take ourself out of the polling set # so we can take ourself out of the polling set
self.stop() self.stop()
...@@ -523,7 +523,7 @@ class child(_SimulatedWithAsyncMixin, ...@@ -523,7 +523,7 @@ class child(_SimulatedWithAsyncMixin,
self._async.send() self._async.send()
class async(_base.AsyncMixin, watcher): class async(_base.AsyncMixin, watcher): # XXX: Yeah, we know pylint:disable=assign-to-new-keyword
def _watcher_ffi_init(self, args): def _watcher_ffi_init(self, args):
# It's dangerous to have a raw, non-initted struct # It's dangerous to have a raw, non-initted struct
......
...@@ -307,6 +307,9 @@ def main(): ...@@ -307,6 +307,9 @@ def main():
if 'PYTHONFAULTHANDLER' not in os.environ: if 'PYTHONFAULTHANDLER' not in os.environ:
os.environ['PYTHONFAULTHANDLER'] = 'true' os.environ['PYTHONFAULTHANDLER'] = 'true'
if 'GEVENT_DEBUG' not in os.environ:
os.environ['GEVENT_DEBUG'] = 'error'
tests = discover(options.tests, options.ignore, coverage) tests = discover(options.tests, options.ignore, coverage)
if options.discover: if options.discover:
for cmd, options in tests: for cmd, options in tests:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment