Commit 54211e32 authored by Jeroen Demeyer's avatar Jeroen Demeyer Committed by Stefan Behnel

Support METH_FASTCALL for Cython functions (GH-3101)

parent ff8e254e
...@@ -3111,11 +3111,9 @@ class DefNode(FuncDefNode): ...@@ -3111,11 +3111,9 @@ class DefNode(FuncDefNode):
# METH_VARARGS to METH_FASTCALL # METH_VARARGS to METH_FASTCALL
# 2. Special methods like __call__ always use the METH_VARGARGS # 2. Special methods like __call__ always use the METH_VARGARGS
# calling convention # calling convention
# 3. For the moment, CyFunctions do not support METH_FASTCALL
mf = sig.method_flags() mf = sig.method_flags()
if (mf and TypeSlots.method_varargs in mf and if mf and TypeSlots.method_varargs in mf and not self.entry.is_special:
not self.entry.is_special and not self.is_cyfunction): # 3. If the function uses the full args tuple, it's more
# 4. If the function uses the full args tuple, it's more
# efficient to use METH_VARARGS. This happens when the function # efficient to use METH_VARARGS. This happens when the function
# takes *args but no other positional arguments (apart from # takes *args but no other positional arguments (apart from
# possibly self). We don't do the analogous check for keyword # possibly self). We don't do the analogous check for keyword
......
...@@ -1939,9 +1939,6 @@ if VALUE is not None: ...@@ -1939,9 +1939,6 @@ if VALUE is not None:
rhs.binding = True rhs.binding = True
node.is_cyfunction = rhs.binding node.is_cyfunction = rhs.binding
if rhs.binding:
# For the moment, CyFunctions do not support METH_FASTCALL
node.entry.signature.use_fastcall = False
return self._create_assignment(node, rhs, env) return self._create_assignment(node, rhs, env)
def _create_assignment(self, def_node, rhs, env): def _create_assignment(self, def_node, rhs, env):
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
typedef struct { typedef struct {
PyCFunctionObject func; PyCFunctionObject func;
#if CYTHON_BACKPORT_VECTORCALL
__pyx_vectorcallfunc func_vectorcall;
#endif
#if PY_VERSION_HEX < 0x030500A0 #if PY_VERSION_HEX < 0x030500A0
PyObject *func_weakreflist; PyObject *func_weakreflist;
#endif #endif
...@@ -71,10 +74,22 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, ...@@ -71,10 +74,22 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
static int __pyx_CyFunction_init(void); static int __pyx_CyFunction_init(void);
#if CYTHON_METH_FASTCALL
static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
#if CYTHON_BACKPORT_VECTORCALL
#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall)
#else
#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func.vectorcall)
#endif
#endif
//////////////////// CythonFunction //////////////////// //////////////////// CythonFunction ////////////////////
//@substitute: naming //@substitute: naming
//@requires: CommonStructures.c::FetchCommonType //@requires: CommonStructures.c::FetchCommonType
//@requires: ObjectHandling.c::PyMethodNew //@requires: ObjectHandling.c::PyMethodNew
//@requires: ObjectHandling.c::PyVectorcallFastCallDict
#include <structmember.h> #include <structmember.h>
...@@ -477,6 +492,27 @@ static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int f ...@@ -477,6 +492,27 @@ static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int f
op->defaults_kwdict = NULL; op->defaults_kwdict = NULL;
op->defaults_getter = NULL; op->defaults_getter = NULL;
op->func_annotations = NULL; op->func_annotations = NULL;
#if CYTHON_METH_FASTCALL
switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS)) {
case METH_NOARGS:
__Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS;
break;
case METH_O:
__Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O;
break;
// case METH_FASTCALL is not used
case METH_FASTCALL | METH_KEYWORDS:
__Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS;
break;
// case METH_VARARGS is not used
case METH_VARARGS | METH_KEYWORDS:
__Pyx_CyFunction_func_vectorcall(op) = NULL;
break;
default:
PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
return NULL;
}
#endif
PyObject_GC_Track(op); PyObject_GC_Track(op);
return (PyObject *) op; return (PyObject *) op;
} }
...@@ -609,10 +645,7 @@ static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, Py ...@@ -609,10 +645,7 @@ static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, Py
} }
break; break;
default: default:
PyErr_SetString(PyExc_SystemError, "Bad call flags in " PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
"__Pyx_CyFunction_Call. METH_OLDARGS is no "
"longer supported!");
return NULL; return NULL;
} }
PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
...@@ -627,6 +660,16 @@ static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *a ...@@ -627,6 +660,16 @@ static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *a
static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) {
PyObject *result; PyObject *result;
__pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
#if CYTHON_METH_FASTCALL
/* Prefer vectorcall if available. This is not the typical case, as
* CPython would normally use vectorcall directly instead of tp_call. */
__pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc);
if (vc) {
return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), PyTuple_GET_SIZE(args), kw);
}
#endif
if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
Py_ssize_t argc; Py_ssize_t argc;
PyObject *new_args; PyObject *new_args;
...@@ -652,19 +695,142 @@ static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, P ...@@ -652,19 +695,142 @@ static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, P
return result; return result;
} }
#if CYTHON_METH_FASTCALL
// Check that kwnames is empty (if you want to allow keyword arguments,
// simply pass kwnames=NULL) and figure out what to do with "self".
// Return value:
// 1: self = args[0]
// 0: self = cyfunc->func.m_self
// -1: error
static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames)
{
int ret = 0;
if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
if (unlikely(nargs < 1)) {
PyErr_Format(PyExc_TypeError, "%.200s() needs an argument",
cyfunc->func.m_ml->ml_name);
return -1;
}
ret = 1;
}
if (unlikely(kwnames) && PyTuple_GET_SIZE(kwnames)) {
PyErr_Format(PyExc_TypeError,
"%.200s() takes no keyword arguments", cyfunc->func.m_ml->ml_name);
return -1;
}
return ret;
}
static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
{
__pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
PyMethodDef* def = cyfunc->func.m_ml;
#if CYTHON_BACKPORT_VECTORCALL
Py_ssize_t nargs = (Py_ssize_t)nargsf;
#else
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
#endif
PyObject *self;
switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
case 1:
self = args[0];
args += 1;
nargs -= 1;
break;
case 0:
self = cyfunc->func.m_self;
break;
default:
return NULL;
}
if (unlikely(nargs != 0)) {
PyErr_Format(PyExc_TypeError,
"%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
def->ml_name, nargs);
return NULL;
}
return def->ml_meth(self, NULL);
}
static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
{
__pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
PyMethodDef* def = cyfunc->func.m_ml;
#if CYTHON_BACKPORT_VECTORCALL
Py_ssize_t nargs = (Py_ssize_t)nargsf;
#else
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
#endif
PyObject *self;
switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
case 1:
self = args[0];
args += 1;
nargs -= 1;
break;
case 0:
self = cyfunc->func.m_self;
break;
default:
return NULL;
}
if (unlikely(nargs != 1)) {
PyErr_Format(PyExc_TypeError,
"%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
def->ml_name, nargs);
return NULL;
}
return def->ml_meth(self, args[0]);
}
static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
{
__pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
PyMethodDef* def = cyfunc->func.m_ml;
#if CYTHON_BACKPORT_VECTORCALL
Py_ssize_t nargs = (Py_ssize_t)nargsf;
#else
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
#endif
PyObject *self;
switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) {
case 1:
self = args[0];
args += 1;
nargs -= 1;
break;
case 0:
self = cyfunc->func.m_self;
break;
default:
return NULL;
}
return ((_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames);
}
#endif
static PyTypeObject __pyx_CyFunctionType_type = { static PyTypeObject __pyx_CyFunctionType_type = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"cython_function_or_method", /*tp_name*/ "cython_function_or_method", /*tp_name*/
sizeof(__pyx_CyFunctionObject), /*tp_basicsize*/ sizeof(__pyx_CyFunctionObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor) __Pyx_CyFunction_dealloc, /*tp_dealloc*/ (destructor) __Pyx_CyFunction_dealloc, /*tp_dealloc*/
#if !CYTHON_METH_FASTCALL
0, /*tp_print*/ 0, /*tp_print*/
#elif CYTHON_BACKPORT_VECTORCALL
(printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), /*tp_vectorcall_offset backported into tp_print*/
#else
offsetof(__pyx_CyFunctionObject, func.vectorcall), /*tp_vectorcall_offset*/
#endif
0, /*tp_getattr*/ 0, /*tp_getattr*/
0, /*tp_setattr*/ 0, /*tp_setattr*/
#if PY_MAJOR_VERSION < 3 #if PY_MAJOR_VERSION < 3
0, /*tp_compare*/ 0, /*tp_compare*/
#else #else
0, /*reserved*/ 0, /*tp_as_async*/
#endif #endif
(reprfunc) __Pyx_CyFunction_repr, /*tp_repr*/ (reprfunc) __Pyx_CyFunction_repr, /*tp_repr*/
0, /*tp_as_number*/ 0, /*tp_as_number*/
...@@ -678,6 +844,9 @@ static PyTypeObject __pyx_CyFunctionType_type = { ...@@ -678,6 +844,9 @@ static PyTypeObject __pyx_CyFunctionType_type = {
0, /*tp_as_buffer*/ 0, /*tp_as_buffer*/
#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR #ifdef Py_TPFLAGS_METHOD_DESCRIPTOR
Py_TPFLAGS_METHOD_DESCRIPTOR | Py_TPFLAGS_METHOD_DESCRIPTOR |
#endif
#ifdef _Py_TPFLAGS_HAVE_VECTORCALL
_Py_TPFLAGS_HAVE_VECTORCALL |
#endif #endif
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
0, /*tp_doc*/ 0, /*tp_doc*/
...@@ -1145,7 +1314,7 @@ static PyTypeObject __pyx_FusedFunctionType_type = { ...@@ -1145,7 +1314,7 @@ static PyTypeObject __pyx_FusedFunctionType_type = {
#if PY_MAJOR_VERSION < 3 #if PY_MAJOR_VERSION < 3
0, /*tp_compare*/ 0, /*tp_compare*/
#else #else
0, /*reserved*/ 0, /*tp_as_async*/
#endif #endif
0, /*tp_repr*/ 0, /*tp_repr*/
0, /*tp_as_number*/ 0, /*tp_as_number*/
......
...@@ -211,6 +211,9 @@ ...@@ -211,6 +211,9 @@
#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) #define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1)
#endif #endif
/* Whether to use METH_FASTCALL with a fake backported implementation of vectorcall */
#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1)
#if CYTHON_USE_PYLONG_INTERNALS #if CYTHON_USE_PYLONG_INTERNALS
#include "longintrepr.h" #include "longintrepr.h"
/* These short defines can easily conflict with other code */ /* These short defines can easily conflict with other code */
...@@ -469,6 +472,13 @@ class __Pyx_FakeReference { ...@@ -469,6 +472,13 @@ class __Pyx_FakeReference {
#define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords
#endif #endif
#if CYTHON_VECTORCALL
#define __pyx_vectorcallfunc vectorcallfunc
#elif CYTHON_BACKPORT_VECTORCALL
typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args,
size_t nargsf, PyObject *kwnames);
#endif
#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) #if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
#define PyObject_Malloc(s) PyMem_Malloc(s) #define PyObject_Malloc(s) PyMem_Malloc(s)
#define PyObject_Free(p) PyMem_Free(p) #define PyObject_Free(p) PyMem_Free(p)
......
...@@ -1945,6 +1945,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCall(PyObject *func, PyObject ...@@ -1945,6 +1945,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCall(PyObject *func, PyObject
if (f) { if (f) {
return f(func, args, nargs, NULL); return f(func, args, nargs, NULL);
} }
#elif __Pyx_CyFunction_USED && CYTHON_BACKPORT_VECTORCALL
if (Py_TYPE(func) == __pyx_CyFunctionType) {
__pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func);
if (f) return f(func, args, nargs, NULL);
}
#endif #endif
if (nargs == 0) { if (nargs == 0) {
...@@ -2310,6 +2315,79 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { ...@@ -2310,6 +2315,79 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
} }
/////////////// PyVectorcallFastCallDict.proto ///////////////
#if CYTHON_METH_FASTCALL
static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, Py_ssize_t nargs, PyObject *kw);
#endif
/////////////// PyVectorcallFastCallDict ///////////////
#if CYTHON_METH_FASTCALL
// Slow path when kw is non-empty
static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, Py_ssize_t nargs, PyObject *kw)
{
// Code based on _PyObject_FastCallDict() and _PyStack_UnpackDict() from CPython
PyObject *res = NULL;
PyObject *kwnames;
PyObject **newargs;
PyObject **kwvalues;
Py_ssize_t i, pos;
PyObject *key, *value;
unsigned long keys_are_strings;
Py_ssize_t nkw = PyDict_GET_SIZE(kw);
// Copy positional arguments
newargs = (PyObject **)PyMem_Malloc((nargs + nkw) * sizeof(args[0]));
if (unlikely(newargs == NULL)) {
PyErr_NoMemory();
return NULL;
}
for (i = 0; i < nargs; i++) newargs[i] = args[i];
// Copy keyword arguments
kwnames = PyTuple_New(nkw);
if (unlikely(kwnames == NULL)) {
PyMem_Free(newargs);
return NULL;
}
kwvalues = newargs + nargs;
pos = i = 0;
keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS;
while (PyDict_Next(kw, &pos, &key, &value)) {
keys_are_strings &= Py_TYPE(key)->tp_flags;
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(kwnames, i, key);
kwvalues[i] = value;
i++;
}
if (unlikely(!keys_are_strings)) {
PyErr_SetString(PyExc_TypeError, "keywords must be strings");
goto cleanup;
}
// The actual call
res = vc(func, newargs, nargs, kwnames);
cleanup:
Py_DECREF(kwnames);
for (i = 0; i < nkw; i++)
Py_DECREF(kwvalues[i]);
PyMem_Free(newargs);
return res;
}
static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, Py_ssize_t nargs, PyObject *kw)
{
if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) {
return vc(func, args, nargs, NULL);
}
return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw);
}
#endif
/////////////// MatrixMultiply.proto /////////////// /////////////// MatrixMultiply.proto ///////////////
#if PY_VERSION_HEX >= 0x03050000 #if PY_VERSION_HEX >= 0x03050000
......
...@@ -68,17 +68,14 @@ cdef class SelfCast: ...@@ -68,17 +68,14 @@ cdef class SelfCast:
cdef extern from *: cdef extern from *:
int PyCFunction_Check(op)
int PyCFunction_GET_FLAGS(op) int PyCFunction_GET_FLAGS(op)
def has_fastcall(meth): def has_fastcall(meth):
""" """
Given a builtin_function_or_method ``meth``, return whether it uses Given a builtin_function_or_method or cyfunction ``meth``,
``METH_FASTCALL``. return whether it uses ``METH_FASTCALL``.
""" """
if not PyCFunction_Check(meth):
raise TypeError("not a builtin_function_or_method")
# Hardcode METH_FASTCALL constant equal to 0x80 for simplicity # Hardcode METH_FASTCALL constant equal to 0x80 for simplicity
return bool(PyCFunction_GET_FLAGS(meth) & 0x80) return bool(PyCFunction_GET_FLAGS(meth) & 0x80)
...@@ -100,6 +97,13 @@ def fastcall_function(**kw): ...@@ -100,6 +97,13 @@ def fastcall_function(**kw):
""" """
return kw return kw
@cython.binding(True)
def fastcall_cyfunction(**kw):
"""
>>> assert_fastcall(fastcall_cyfunction)
"""
return kw
cdef class Dummy: cdef class Dummy:
@cython.binding(False) @cython.binding(False)
def fastcall_method(self, x, *args, **kw): def fastcall_method(self, x, *args, **kw):
...@@ -107,3 +111,18 @@ cdef class Dummy: ...@@ -107,3 +111,18 @@ cdef class Dummy:
>>> assert_fastcall(Dummy().fastcall_method) >>> assert_fastcall(Dummy().fastcall_method)
""" """
return tuple(args) + tuple(kw) return tuple(args) + tuple(kw)
cdef class CyDummy:
@cython.binding(True)
def fastcall_method(self, x, *args, **kw):
"""
>>> assert_fastcall(CyDummy.fastcall_method)
"""
return tuple(args) + tuple(kw)
class PyDummy:
def fastcall_method(self, x, *args, **kw):
"""
>>> assert_fastcall(PyDummy.fastcall_method)
"""
return tuple(args) + tuple(kw)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment