Commit b3537301 authored by Jason Madden's avatar Jason Madden Committed by GitHub

Merge pull request #1120 from gevent/faster-stack

Speed up Greenlet creation on CPython
parents b84e1bd9 8d9ed09b
...@@ -87,13 +87,15 @@ ...@@ -87,13 +87,15 @@
- Greenlet objects now keep track of their spawning parent greenlet - Greenlet objects now keep track of their spawning parent greenlet
and the code location that spawned them, in addition to maintaining and the code location that spawned them, in addition to maintaining
a "spawn tree local" mapping. Based on a proposal from PayPal and a "spawn tree local" mapping. This adds some runtime overhead in
comments by Mahmoud Hashemi and Kurt Rose. See :issue:`755` and relative terms, but absolute numbers are still relatively small.
:pr:`1115`. As always, feedback is appreciated. Based on a proposal from PayPal and comments by Mahmoud Hashemi and
Kurt Rose. See :issue:`755` and :pr:`1115`. As always, feedback is
appreciated.
- The :mod:`gevent.greenlet` module is now compiled with Cython to - The :mod:`gevent.greenlet` module is now compiled with Cython to
offset any performance decrease due to :issue:`755`. Please open offset any performance decrease due to :issue:`755`. Please open
issues for any compatibility concerns. See :pr:`1115`. issues for any compatibility concerns. See :pr:`1115` and :pr:`1120`.
- Greenlet objects now have a ``minimal_ident`` property. It functions - Greenlet objects now have a ``minimal_ident`` property. It functions
similarly to ``Thread.ident`` or ``id`` by uniquely identifying the similarly to ``Thread.ident`` or ``id`` by uniquely identifying the
......
...@@ -4,6 +4,8 @@ cimport cython ...@@ -4,6 +4,8 @@ cimport cython
from gevent.__ident cimport IdentRegistry from gevent.__ident cimport IdentRegistry
cdef bint _greenlet_imported cdef bint _greenlet_imported
cdef bint _PYPY cdef bint _PYPY
cdef sys_getframe
cdef sys_exc_info
cdef extern from "greenlet/greenlet.h": cdef extern from "greenlet/greenlet.h":
...@@ -25,6 +27,23 @@ cdef inline void greenlet_init(): ...@@ -25,6 +27,23 @@ cdef inline void greenlet_init():
PyGreenlet_Import() PyGreenlet_Import()
_greenlet_imported = True _greenlet_imported = True
cdef extern from "Python.h":
ctypedef class types.CodeType [object PyCodeObject]:
pass
cdef extern from "frameobject.h":
ctypedef class types.FrameType [object PyFrameObject]:
cdef CodeType f_code
cdef int f_lineno
# We can't declare this in the object, because it's
# allowed to be NULL, and Cython can't handle that.
# We have to go through the python machinery to get a
# proper None instead.
# cdef FrameType f_back
cdef void _init() cdef void _init()
cdef class SpawnedLink: cdef class SpawnedLink:
...@@ -42,18 +61,18 @@ cdef class FailureSpawnedLink(SpawnedLink): ...@@ -42,18 +61,18 @@ cdef class FailureSpawnedLink(SpawnedLink):
@cython.final @cython.final
@cython.internal @cython.internal
cdef class _Frame: cdef class _Frame:
cdef readonly object f_code cdef readonly CodeType f_code
cdef readonly int f_lineno cdef readonly int f_lineno
cdef public _Frame f_back cdef readonly _Frame f_back
@cython.final @cython.final
@cython.locals( @cython.locals(frames=list,frame=FrameType)
previous=_Frame, cdef inline list _extract_stack(int limit)
first=_Frame,
next_frame=_Frame)
cdef _Frame _extract_stack(int limit, _Frame f_back)
@cython.final
@cython.locals(previous=_Frame, frame=tuple, f=_Frame)
cdef _Frame _Frame_from_list(list frames)
cdef class Greenlet(greenlet): cdef class Greenlet(greenlet):
...@@ -61,7 +80,10 @@ cdef class Greenlet(greenlet): ...@@ -61,7 +80,10 @@ cdef class Greenlet(greenlet):
cdef readonly args cdef readonly args
cdef readonly object spawning_greenlet cdef readonly object spawning_greenlet
cdef public dict spawn_tree_locals cdef public dict spawn_tree_locals
cdef readonly _Frame spawning_stack
# This is accessed with getattr() dynamically so it
# must be visible to Python
cdef readonly list _spawning_stack_frames
cdef list _links cdef list _links
cdef tuple _exc_info cdef tuple _exc_info
......
...@@ -2,13 +2,15 @@ ...@@ -2,13 +2,15 @@
# cython: auto_pickle=False,embedsignature=True,always_allow_keywords=False # cython: auto_pickle=False,embedsignature=True,always_allow_keywords=False
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
import sys from sys import _getframe as sys_getframe
from sys import exc_info as sys_exc_info
from weakref import ref as wref from weakref import ref as wref
from greenlet import greenlet from greenlet import greenlet
from gevent._compat import reraise from gevent._compat import reraise
from gevent._compat import PYPY as _PYPY
from gevent._tblib import dump_traceback from gevent._tblib import dump_traceback
from gevent._tblib import load_traceback from gevent._tblib import load_traceback
from gevent.hub import GreenletExit from gevent.hub import GreenletExit
...@@ -18,8 +20,7 @@ from gevent.hub import get_hub ...@@ -18,8 +20,7 @@ from gevent.hub import get_hub
from gevent.hub import iwait from gevent.hub import iwait
from gevent.hub import wait from gevent.hub import wait
from gevent.timeout import Timeout from gevent.timeout import Timeout
from gevent._util import Lazy
_PYPY = hasattr(sys, 'pypy_version_info')
__all__ = [ __all__ = [
...@@ -108,26 +109,28 @@ class _Frame(object): ...@@ -108,26 +109,28 @@ class _Frame(object):
self.f_lineno = f_lineno self.f_lineno = f_lineno
self.f_back = None self.f_back = None
f_globals = property(lambda _self: None) @property
def f_globals(self):
return None
def _extract_stack(limit, f_back): def _Frame_from_list(frames):
previous = None previous = None
frame = sys._getframe() for frame in reversed(frames):
first = None f = _Frame(*frame)
f.f_back = previous
previous = f
return previous
first = previous = _Frame(frame.f_code, frame.f_lineno) def _extract_stack(limit):
limit -= 1 frame = sys_getframe()
frame = frame.f_back frames = []
while limit and frame is not None: while limit and frame is not None:
limit -= 1 limit -= 1
next_frame = _Frame(frame.f_code, frame.f_lineno) frames.append((frame.f_code, frame.f_lineno))
previous.f_back = next_frame
previous = next_frame
frame = frame.f_back frame = frame.f_back
previous.f_back = f_back return frames
return first
_greenlet__init__ = greenlet.__init__ _greenlet__init__ = greenlet.__init__
...@@ -274,9 +277,16 @@ class Greenlet(greenlet): ...@@ -274,9 +277,16 @@ class Greenlet(greenlet):
# Its children get separate locals. # Its children get separate locals.
spawner.spawn_tree_locals = self.spawn_tree_locals spawner.spawn_tree_locals = self.spawn_tree_locals
self._spawning_stack_frames = _extract_stack(self.spawning_stack_limit)
self._spawning_stack_frames.extend(getattr(spawner, '_spawning_stack_frames', []))
self.spawning_stack = _extract_stack(self.spawning_stack_limit, @Lazy
getattr(spawner, 'spawning_stack', None)) def spawning_stack(self):
# Store this in the __dict__. We don't use it from the C
# code. It's tempting to discard _spawning_stack_frames
# after this, but child greenlets may still be created
# that need it.
return _Frame_from_list(self._spawning_stack_frames)
def _get_minimal_ident(self): def _get_minimal_ident(self):
reg = self.parent.ident_registry reg = self.parent.ident_registry
...@@ -476,9 +486,9 @@ class Greenlet(greenlet): ...@@ -476,9 +486,9 @@ class Greenlet(greenlet):
.. versionadded:: 1.1 .. versionadded:: 1.1
""" """
exc_info = self._exc_info ei = self._exc_info
if exc_info is not None and exc_info[0] is not None: if ei is not None and ei[0] is not None:
return (exc_info[0], exc_info[1], load_traceback(exc_info[2])) return (ei[0], ei[1], load_traceback(ei[2]))
def throw(self, *args): def throw(self, *args):
"""Immediately switch into the greenlet and raise an exception in it. """Immediately switch into the greenlet and raise an exception in it.
...@@ -708,7 +718,7 @@ class Greenlet(greenlet): ...@@ -708,7 +718,7 @@ class Greenlet(greenlet):
try: try:
result = self._run(*self.args, **self.kwargs) result = self._run(*self.args, **self.kwargs)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
self._report_error(sys.exc_info()) self._report_error(sys_exc_info())
return return
self._report_result(result) self._report_result(result)
finally: finally:
...@@ -801,7 +811,7 @@ class Greenlet(greenlet): ...@@ -801,7 +811,7 @@ class Greenlet(greenlet):
try: try:
link(self) link(self)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
self.parent.handle_error((link, self), *sys.exc_info()) self.parent.handle_error((link, self), *sys_exc_info())
class _dummy_event(object): class _dummy_event(object):
...@@ -828,7 +838,7 @@ def _kill(glet, exception, waiter): ...@@ -828,7 +838,7 @@ def _kill(glet, exception, waiter):
glet.throw(exception) glet.throw(exception)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
# XXX do we need this here? # XXX do we need this here?
glet.parent.handle_error(glet, *sys.exc_info()) glet.parent.handle_error(glet, *sys_exc_info())
if waiter is not None: if waiter is not None:
waiter.switch() waiter.switch()
...@@ -863,7 +873,7 @@ def _killall3(greenlets, exception, waiter): ...@@ -863,7 +873,7 @@ def _killall3(greenlets, exception, waiter):
try: try:
g.throw(exception) g.throw(exception)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys.exc_info()) g.parent.handle_error(g, *sys_exc_info())
if not g.dead: if not g.dead:
diehards.append(g) diehards.append(g)
waiter.switch(diehards) waiter.switch(diehards)
...@@ -875,7 +885,7 @@ def _killall(greenlets, exception): ...@@ -875,7 +885,7 @@ def _killall(greenlets, exception):
try: try:
g.throw(exception) g.throw(exception)
except: # pylint:disable=bare-except except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys.exc_info()) g.parent.handle_error(g, *sys_exc_info())
def killall(greenlets, exception=GreenletExit, block=True, timeout=None): def killall(greenlets, exception=GreenletExit, block=True, timeout=None):
......
"""Benchmarking spawn() performance. """Benchmarking spawn() performance.
""" """
from __future__ import print_function, absolute_import, division from __future__ import print_function, absolute_import, division
import sys
import os import perf
import time
try: try:
xrange xrange
except NameError: except NameError:
xrange = range xrange = range
if hasattr(time, "perf_counter"):
curtime = time.perf_counter # 3.3
elif sys.platform.startswith('win'):
curtime = time.clock
else:
curtime = time.time
N = 100000 N = 10000
counter = 0 counter = 0
def incr(sleep, **_kwargs): def incr(sleep, **_kwargs):
global counter global counter
counter += 1 counter += 1
sleep(0)
def noop(_p): def noop(_p):
pass pass
class Options(object):
# TODO: Add back an argument for that
eventlet_hub = None
loops = None
def __init__(self, sleep, join, **kwargs):
self.kwargs = kwargs
self.sleep = sleep
self.join = join
class Times(object):
def __init__(self,
spawn_duration,
sleep_duration=-1,
join_duration=-1):
self.spawn_duration = spawn_duration
self.sleep_duration = sleep_duration
self.join_duration = join_duration
def _report(name, delta):
print('%8s: %3.2f microseconds per greenlet' % (name, delta * 1000000.0 / N))
def test(spawn, sleep, kwargs): def _test(spawn, sleep, options):
start = curtime() global counter
counter = 0
before_spawn = perf.perf_counter()
for _ in xrange(N): for _ in xrange(N):
spawn(incr, sleep, **kwargs) spawn(incr, sleep, **options.kwargs)
_report('spawning', curtime() - start)
assert counter == 0, counter before_sleep = perf.perf_counter()
start = curtime() if options.sleep:
sleep(0) assert counter == 0, counter
_report('sleep(0)', curtime() - start) sleep(0)
assert counter == N, (counter, N) after_sleep = perf.perf_counter()
assert counter == N, (counter, N)
else:
after_sleep = before_sleep
if options.join:
before_join = perf.perf_counter()
options.join()
after_join = perf.perf_counter()
join_duration = after_join - before_join
else:
join_duration = -1
return Times(before_sleep - before_spawn,
after_sleep - before_sleep,
join_duration)
def test(spawn, sleep, options):
all_times = [_test(spawn, sleep, options)
for _ in xrange(options.loops)]
spawn_duration = sum(x.spawn_duration for x in all_times)
sleep_duration = sum(x.sleep_duration for x in all_times)
join_duration = sum(x.sleep_duration for x in all_times
if x != -1)
return Times(spawn_duration, sleep_duration, join_duration)
def bench_none(options): def bench_none(options):
kwargs = options.kwargs options.sleep = False
start = curtime() def spawn(f, sleep, **kwargs):
for _ in xrange(N): return f(sleep, **kwargs)
incr(noop, **kwargs) from time import sleep
assert counter == N, (counter, N) return test(spawn,
_report('noop', curtime() - start) sleep,
options)
def bench_gevent(options): def bench_gevent(options):
import gevent
print('using gevent from %s' % gevent.__file__)
from gevent import spawn, sleep from gevent import spawn, sleep
test(spawn, sleep, options.kwargs) return test(spawn, sleep, options)
def bench_geventraw(options): def bench_geventraw(options):
import gevent
print('using gevent from %s' % gevent.__file__)
from gevent import sleep, spawn_raw from gevent import sleep, spawn_raw
test(spawn_raw, sleep, options.kwargs) return test(spawn_raw, sleep, options)
def bench_geventpool(options): def bench_geventpool(options):
import gevent
print('using gevent from %s' % gevent.__file__)
from gevent import sleep from gevent import sleep
from gevent.pool import Pool from gevent.pool import Pool
p = Pool() p = Pool()
test(p.spawn, sleep, options.kwargs) if options.join:
start = curtime() options.join = p.join
times = test(p.spawn, sleep, options)
p.join() return times
_report('joining', curtime() - start)
def bench_eventlet(options): def bench_eventlet(options):
try:
import eventlet
except ImportError:
if options.ignore_import_errors:
return
raise
print('using eventlet from %s' % eventlet.__file__)
from eventlet import spawn, sleep from eventlet import spawn, sleep
from eventlet.hubs import use_hub from eventlet.hubs import use_hub
if options.eventlet_hub is not None: if options.eventlet_hub is not None:
use_hub(options.eventlet_hub) use_hub(options.eventlet_hub)
test(spawn, sleep, options.kwargs) return test(spawn, sleep, options)
def bench_all():
from time import sleep
error = 0
names = sorted(all())
for func in names:
cmd = '%s %s %s --ignore-import-errors' % (sys.executable, __file__, func)
print(cmd)
sys.stdout.flush()
sleep(0.01)
if os.system(cmd):
error = 1
print('%s failed' % cmd)
print('')
for func in names:
cmd = '%s %s --with-kwargs %s --ignore-import-errors' % (sys.executable, __file__, func)
print(cmd)
sys.stdout.flush()
if os.system(cmd):
error = 1
print('%s failed' % cmd)
print('')
if error:
sys.exit(1)
def all(): def all():
result = [x for x in globals() if x.startswith('bench_') and x != 'bench_all'] result = [x for x in globals() if x.startswith('bench_') and x != 'bench_all']
try: result.sort()
result.sort(key=lambda x: globals()[x].func_code.co_firstlineno)
except AttributeError:
result.sort(key=lambda x: globals()[x].__code__.co_firstlineno)
result = [x.replace('bench_', '') for x in result] result = [x.replace('bench_', '') for x in result]
return result return result
def all_functions():
return [globals()['bench_%s' % x] for x in all()]
def main(): def main():
import argparse def worker_cmd(cmd, args):
parser = argparse.ArgumentParser() cmd.extend(args.benchmark)
parser.add_argument('--with-kwargs', default=False, action='store_true')
parser.add_argument('--eventlet-hub') runner = perf.Runner(add_cmdline_args=worker_cmd)
parser.add_argument('--ignore-import-errors', action='store_true') runner.argparser.add_argument('benchmark',
parser.add_argument('benchmark', choices=all() + ['all']) nargs='*',
options = parser.parse_args() default='all',
if options.with_kwargs: choices=all() + ['all'])
options.kwargs = {'foo': 1, 'bar': 'hello'}
else: def spawn_time(loops, func, options):
options.kwargs = {} options.loops = loops
if options.benchmark == 'all': times = func(options)
bench_all() return times.spawn_duration
def sleep_time(loops, func, options):
options.loops = loops
times = func(options)
return times.sleep_duration
def join_time(loops, func, options):
options.loops = loops
times = func(options)
return times.join_duration
args = runner.parse_args()
if 'all' in args.benchmark or args.benchmark == 'all':
args.benchmark = ['all']
names = all()
else: else:
function = globals()['bench_' + options.benchmark] names = args.benchmark
function(options)
names = sorted(set(names))
for name in names:
runner.bench_time_func(name + ' spawn',
spawn_time,
globals()['bench_' + name],
Options(False, False),
inner_loops=N)
if name != 'none':
runner.bench_time_func(name + ' sleep',
sleep_time,
globals()['bench_' + name],
Options(True, False),
inner_loops=N)
if 'geventpool' in names:
runner.bench_time_func('geventpool join',
join_time,
bench_geventpool,
Options(True, True),
inner_loops=N)
for name in names:
runner.bench_time_func(name + ' spawn kwarg',
spawn_time,
globals()['bench_' + name],
Options(False, False, foo=1, bar='hello'),
inner_loops=N)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment