Commit 760a080c authored by Jason Madden's avatar Jason Madden

Speed up Greenlet creation on CPython

Two ways: store tuples instead of _frame objects and use direct access
to two of the attributes of the CPython frame objects.

Benchmarks:

+------------------------+-----------------+------------------------------+
| Benchmark              | spawn_27_master | spawn_27_tuple2              |
+========================+=================+==============================+
| eventlet sleep         | 9.12 us         | 8.77 us: 1.04x faster (-4%)  |
+------------------------+-----------------+------------------------------+
| gevent spawn           | 14.5 us         | 13.2 us: 1.10x faster (-9%)  |
+------------------------+-----------------+------------------------------+
| gevent sleep           | 1.63 us         | 1.86 us: 1.14x slower (+14%) |
+------------------------+-----------------+------------------------------+
| geventpool spawn       | 30.4 us         | 23.6 us: 1.29x faster (-22%) |
+------------------------+-----------------+------------------------------+
| geventpool sleep       | 4.30 us         | 4.55 us: 1.06x slower (+6%)  |
+------------------------+-----------------+------------------------------+
| geventpool join        | 1.70 us         | 1.83 us: 1.08x slower (+8%)  |
+------------------------+-----------------+------------------------------+
| gevent spawn kwarg     | 16.5 us         | 13.5 us: 1.22x faster (-18%) |
+------------------------+-----------------+------------------------------+
| geventpool spawn kwarg | 30.5 us         | 23.9 us: 1.27x faster (-22%) |
+------------------------+-----------------+------------------------------+

Not significant (7): eventlet spawn; geventraw spawn; geventraw sleep;
none spawn; eventlet spawn kwarg; geventraw spawn kwarg; none spawn
kwarg

+------------------------+-----------------+------------------------------+
| Benchmark              | spawn_36_master | spawn_36_tuple2              |
+========================+=================+==============================+
| gevent spawn           | 13.2 us         | 11.9 us: 1.12x faster (-10%) |
+------------------------+-----------------+------------------------------+
| gevent sleep           | 1.71 us         | 1.90 us: 1.11x slower (+11%) |
+------------------------+-----------------+------------------------------+
| geventpool spawn       | 19.9 us         | 15.9 us: 1.25x faster (-20%) |
+------------------------+-----------------+------------------------------+
| geventpool sleep       | 3.54 us         | 3.75 us: 1.06x slower (+6%)  |
+------------------------+-----------------+------------------------------+
| geventpool spawn kwarg | 20.3 us         | 15.9 us: 1.27x faster (-22%) |
+------------------------+-----------------+------------------------------+
| geventraw spawn kwarg  | 5.80 us         | 6.10 us: 1.05x slower (+5%)  |
+------------------------+-----------------+------------------------------+

Not significant (9): eventlet spawn; eventlet sleep; geventraw spawn;
geventraw sleep; none spawn; geventpool join; eventlet spawn kwarg;
gevent spawn kwarg; none spawn kwarg

+------------------+-------------------+------------------------------+
| Benchmark        | spawn_pypy_master | spawn_pypy_tuple2            |
+==================+===================+==============================+
| eventlet spawn   | 30.5 us           | 28.9 us: 1.05x faster (-5%)  |
+------------------+-------------------+------------------------------+
| eventlet sleep   | 3.39 us           | 3.19 us: 1.06x faster (-6%)  |
+------------------+-------------------+------------------------------+
| gevent spawn     | 9.89 us           | 17.2 us: 1.73x slower (+73%) |
+------------------+-------------------+------------------------------+
| gevent sleep     | 3.14 us           | 3.99 us: 1.27x slower (+27%) |
+------------------+-------------------+------------------------------+
| geventpool spawn | 12.3 us           | 20.1 us: 1.63x slower (+63%) |
+------------------+-------------------+------------------------------+

Not significant (1): geventpool sleep

+------------------------+---------------+-------------------------------+
| Benchmark              | spawn_36_13a1 | spawn_36_tuple2               |
+========================+===============+===============================+
| eventlet spawn         | 14.0 us       | 13.2 us: 1.06x faster (-6%)   |
+------------------------+---------------+-------------------------------+
| gevent spawn           | 4.25 us       | 11.9 us: 2.79x slower (+179%) |
+------------------------+---------------+-------------------------------+
| gevent sleep           | 2.78 us       | 1.90 us: 1.46x faster (-32%)  |
+------------------------+---------------+-------------------------------+
| geventpool spawn       | 10.4 us       | 15.9 us: 1.52x slower (+52%)  |
+------------------------+---------------+-------------------------------+
| geventpool sleep       | 5.52 us       | 3.75 us: 1.47x faster (-32%)  |
+------------------------+---------------+-------------------------------+
| geventraw spawn        | 2.56 us       | 5.09 us: 1.99x slower (+99%)  |
+------------------------+---------------+-------------------------------+
| geventraw sleep        | 738 ns        | 838 ns: 1.14x slower (+14%)   |
+------------------------+---------------+-------------------------------+
| geventpool join        | 3.94 us       | 1.75 us: 2.25x faster (-56%)  |
+------------------------+---------------+-------------------------------+
| gevent spawn kwarg     | 5.50 us       | 12.1 us: 2.19x slower (+119%) |
+------------------------+---------------+-------------------------------+
| geventpool spawn kwarg | 11.3 us       | 15.9 us: 1.41x slower (+41%)  |
+------------------------+---------------+-------------------------------+
| geventraw spawn kwarg  | 3.90 us       | 6.10 us: 1.56x slower (+56%)  |
+------------------------+---------------+-------------------------------+

Not significant (4): eventlet sleep; none spawn; eventlet spawn kwarg; none spawn kwarg

The eventlet, sleep, join and raw tests serve as controls, so we can see
that there's up to ~10% variance between most runs anyway.

CPython 3.6 shows the least variance so those 10-20% improvement
numbers are probably fairly close.

PyPy sadly gets *slower* with this change for reasons that are utterly
unclear.

Compared to 1.3a1 (last benchmark) we're still up to 2-3x slower.

Creation of a raw greenlet shows 2.66us on CPython 3.6.4 vs the 3.65us
I reported in #755.
parent 64f76d9c
......@@ -4,6 +4,8 @@ cimport cython
from gevent.__ident cimport IdentRegistry
cdef bint _greenlet_imported
cdef bint _PYPY
cdef sys_getframe
cdef sys_exc_info
cdef extern from "greenlet/greenlet.h":
......@@ -25,6 +27,23 @@ cdef inline void greenlet_init():
PyGreenlet_Import()
_greenlet_imported = True
cdef extern from "Python.h":
ctypedef class types.CodeType [object PyCodeObject]:
pass
cdef extern from "frameobject.h":
ctypedef class types.FrameType [object PyFrameObject]:
cdef CodeType f_code
cdef int f_lineno
# We can't declare this in the object, because it's
# allowed to be NULL, and Cython can't handle that.
# We have to go through the python machinery to get a
# proper None instead.
# cdef FrameType f_back
cdef void _init()
cdef class SpawnedLink:
......@@ -42,18 +61,18 @@ cdef class FailureSpawnedLink(SpawnedLink):
@cython.final
@cython.internal
cdef class _Frame:
cdef readonly object f_code
cdef readonly CodeType f_code
cdef readonly int f_lineno
cdef public _Frame f_back
cdef readonly _Frame f_back
@cython.final
@cython.locals(
previous=_Frame,
first=_Frame,
next_frame=_Frame)
cdef _Frame _extract_stack(int limit, _Frame f_back)
@cython.locals(frames=list,frame=FrameType)
cdef inline list _extract_stack(int limit)
@cython.final
@cython.locals(previous=_Frame, frame=tuple, f=_Frame)
cdef _Frame _Frame_from_list(list frames)
cdef class Greenlet(greenlet):
......@@ -61,7 +80,10 @@ cdef class Greenlet(greenlet):
cdef readonly args
cdef readonly object spawning_greenlet
cdef public dict spawn_tree_locals
cdef readonly _Frame spawning_stack
# This is accessed with getattr() dynamically so it
# must be visible to Python
cdef readonly list _spawning_stack_frames
cdef list _links
cdef tuple _exc_info
......
......@@ -2,13 +2,15 @@
# cython: auto_pickle=False,embedsignature=True,always_allow_keywords=False
from __future__ import absolute_import, print_function, division
import sys
from sys import _getframe as sys_getframe
from sys import exc_info as sys_exc_info
from weakref import ref as wref
from greenlet import greenlet
from gevent._compat import reraise
from gevent._compat import PYPY as _PYPY
from gevent._tblib import dump_traceback
from gevent._tblib import load_traceback
from gevent.hub import GreenletExit
......@@ -18,8 +20,7 @@ from gevent.hub import get_hub
from gevent.hub import iwait
from gevent.hub import wait
from gevent.timeout import Timeout
_PYPY = hasattr(sys, 'pypy_version_info')
from gevent._util import Lazy
__all__ = [
......@@ -108,26 +109,28 @@ class _Frame(object):
self.f_lineno = f_lineno
self.f_back = None
f_globals = property(lambda _self: None)
@property
def f_globals(self):
return None
def _extract_stack(limit, f_back):
def _Frame_from_list(frames):
previous = None
frame = sys._getframe()
first = None
for frame in reversed(frames):
f = _Frame(*frame)
f.f_back = previous
previous = f
return previous
first = previous = _Frame(frame.f_code, frame.f_lineno)
limit -= 1
frame = frame.f_back
def _extract_stack(limit):
frame = sys_getframe()
frames = []
while limit and frame is not None:
limit -= 1
next_frame = _Frame(frame.f_code, frame.f_lineno)
previous.f_back = next_frame
previous = next_frame
frames.append((frame.f_code, frame.f_lineno))
frame = frame.f_back
previous.f_back = f_back
return first
return frames
_greenlet__init__ = greenlet.__init__
......@@ -274,9 +277,16 @@ class Greenlet(greenlet):
# Its children get separate locals.
spawner.spawn_tree_locals = self.spawn_tree_locals
self._spawning_stack_frames = _extract_stack(self.spawning_stack_limit)
self._spawning_stack_frames.extend(getattr(spawner, '_spawning_stack_frames', []))
self.spawning_stack = _extract_stack(self.spawning_stack_limit,
getattr(spawner, 'spawning_stack', None))
@Lazy
def spawning_stack(self):
# Store this in the __dict__. We don't use it from the C
# code. It's tempting to discard _spawning_stack_frames
# after this, but child greenlets may still be created
# that need it.
return _Frame_from_list(self._spawning_stack_frames)
def _get_minimal_ident(self):
reg = self.parent.ident_registry
......@@ -476,9 +486,9 @@ class Greenlet(greenlet):
.. versionadded:: 1.1
"""
exc_info = self._exc_info
if exc_info is not None and exc_info[0] is not None:
return (exc_info[0], exc_info[1], load_traceback(exc_info[2]))
ei = self._exc_info
if ei is not None and ei[0] is not None:
return (ei[0], ei[1], load_traceback(ei[2]))
def throw(self, *args):
"""Immediately switch into the greenlet and raise an exception in it.
......@@ -708,7 +718,7 @@ class Greenlet(greenlet):
try:
result = self._run(*self.args, **self.kwargs)
except: # pylint:disable=bare-except
self._report_error(sys.exc_info())
self._report_error(sys_exc_info())
return
self._report_result(result)
finally:
......@@ -801,7 +811,7 @@ class Greenlet(greenlet):
try:
link(self)
except: # pylint:disable=bare-except
self.parent.handle_error((link, self), *sys.exc_info())
self.parent.handle_error((link, self), *sys_exc_info())
class _dummy_event(object):
......@@ -828,7 +838,7 @@ def _kill(glet, exception, waiter):
glet.throw(exception)
except: # pylint:disable=bare-except
# XXX do we need this here?
glet.parent.handle_error(glet, *sys.exc_info())
glet.parent.handle_error(glet, *sys_exc_info())
if waiter is not None:
waiter.switch()
......@@ -863,7 +873,7 @@ def _killall3(greenlets, exception, waiter):
try:
g.throw(exception)
except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys.exc_info())
g.parent.handle_error(g, *sys_exc_info())
if not g.dead:
diehards.append(g)
waiter.switch(diehards)
......@@ -875,7 +885,7 @@ def _killall(greenlets, exception):
try:
g.throw(exception)
except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys.exc_info())
g.parent.handle_error(g, *sys_exc_info())
def killall(greenlets, exception=GreenletExit, block=True, timeout=None):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment