Commit 677efaa6 authored by Stefan Behnel's avatar Stefan Behnel

optimise 1-arg/no-args calls to PyCFunction by avoiding tuple packing if the function allows it

--HG--
extra : transplant_source : %E7%F6%F6%0F%80%3A%B2%BDs%7E%8D%80p%2AYX%3B%08%DBL
parent e9b8307e
...@@ -56,6 +56,8 @@ Features added ...@@ -56,6 +56,8 @@ Features added
Optimizations Optimizations
------------- -------------
* Simple calls to C implemented Python functions/methods are faster.
* The "and"/"or" operators try to avoid unnecessary coercions of their * The "and"/"or" operators try to avoid unnecessary coercions of their
arguments. They now evaluate the truth value of each argument arguments. They now evaluate the truth value of each argument
independently and only coerce the final result of the whole expression independently and only coerce the final result of the whole expression
...@@ -71,8 +73,6 @@ Optimizations ...@@ -71,8 +73,6 @@ Optimizations
* Calls to ``slice()`` are translated to a straight C-API call. * Calls to ``slice()`` are translated to a straight C-API call.
* Simple Python method calls are about 10% faster.
Bugs fixed Bugs fixed
---------- ----------
......
...@@ -4701,15 +4701,24 @@ class SimpleCallNode(CallNode): ...@@ -4701,15 +4701,24 @@ class SimpleCallNode(CallNode):
if self.function.entry and self.function.entry.utility_code: if self.function.entry and self.function.entry.utility_code:
code.globalstate.use_utility_code(self.function.entry.utility_code) code.globalstate.use_utility_code(self.function.entry.utility_code)
if func_type.is_pyobject: if func_type.is_pyobject:
arg_code = self.arg_tuple.py_result() if func_type is not type_type and not self.arg_tuple.args and self.arg_tuple.is_literal:
code.globalstate.use_utility_code(UtilityCode.load_cached( code.globalstate.use_utility_code(UtilityCode.load_cached(
"PyObjectCall", "ObjectHandling.c")) "PyObjectCallNoArg", "ObjectHandling.c"))
code.putln( code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % ( "%s = __Pyx_PyObject_CallNoArg(%s); %s" % (
self.result(), self.result(),
self.function.py_result(), self.function.py_result(),
arg_code, code.error_goto_if_null(self.result(), self.pos)))
code.error_goto_if_null(self.result(), self.pos))) else:
arg_code = self.arg_tuple.py_result()
code.globalstate.use_utility_code(UtilityCode.load_cached(
"PyObjectCall", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(),
self.function.py_result(),
arg_code,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result()) code.put_gotref(self.py_result())
elif func_type.is_cfunction: elif func_type.is_cfunction:
if self.has_optional_args: if self.has_optional_args:
...@@ -4800,14 +4809,14 @@ class PyMethodCallNode(SimpleCallNode): ...@@ -4800,14 +4809,14 @@ class PyMethodCallNode(SimpleCallNode):
self.allocate_temp_result(code) self.allocate_temp_result(code)
self.function.generate_evaluation_code(code) self.function.generate_evaluation_code(code)
assert self.arg_tuple.mult_factor is None
args = self.arg_tuple.args args = self.arg_tuple.args
for arg in args: for arg in args:
arg.generate_evaluation_code(code) arg.generate_evaluation_code(code)
self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True) self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=bool(args))
function = code.funcstate.allocate_temp(py_object_type, manage_ref=False) function = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
arg_offset = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False) arg_offset = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln("%s = 0;" % arg_offset) code.putln("%s = 0;" % arg_offset)
code.putln("%s = %s;" % (function, self.function.py_result())) code.putln("%s = %s;" % (function, self.function.py_result()))
...@@ -4818,47 +4827,72 @@ class PyMethodCallNode(SimpleCallNode): ...@@ -4818,47 +4827,72 @@ class PyMethodCallNode(SimpleCallNode):
# the following is always true in Py3 (kept only for safety), # the following is always true in Py3 (kept only for safety),
# but is false for unbound methods in Py2 # but is false for unbound methods in Py2
code.putln("if (likely(%s)) {" % self_arg) code.putln("if (likely(%s)) {" % self_arg)
code.put_incref(self_arg, py_object_type) if args:
code.put_incref(self_arg, py_object_type)
code.putln("%s = PyMethod_GET_FUNCTION(%s);" % (function, function)) code.putln("%s = PyMethod_GET_FUNCTION(%s);" % (function, function))
code.putln("%s = 1;" % arg_offset) code.putln("%s = 1;" % arg_offset)
code.putln("}") code.putln("}")
code.putln("}") code.putln("}")
code.putln("%s = PyTuple_New(%d+%s); %s" % ( if not args:
args_tuple, len(args), arg_offset, # fastest special case: try to avoid tuple creation
code.error_goto_if_null(args_tuple, self.pos))) code.putln("if (%s == 1) {" % arg_offset)
code.put_gotref(args_tuple) code.funcstate.release_temp(arg_offset)
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_CallOneArg(%s, %s); %s" % (
self.result(),
function, self_arg,
code.error_goto_if_null(self.result(), self.pos)))
code.funcstate.release_temp(self_arg) # borrowed ref in this case
code.putln("} else {")
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCallNoArg", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_CallNoArg(%s); %s" % (
self.result(),
function,
code.error_goto_if_null(self.result(), self.pos)))
code.putln("}")
code.put_gotref(self.py_result())
else:
args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln("%s = PyTuple_New(%d+%s); %s" % (
args_tuple, len(args), arg_offset,
code.error_goto_if_null(args_tuple, self.pos)))
code.put_gotref(args_tuple)
code.putln("if (%s == 1) {" % arg_offset) code.putln("if (%s == 1) {" % arg_offset)
code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % ( code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % (
args_tuple, self_arg, self_arg, self_arg)) args_tuple, self_arg, self_arg, self_arg)) # stealing owned ref in this case
code.funcstate.release_temp(self_arg) code.funcstate.release_temp(self_arg)
code.putln("}") code.putln("}")
for i, arg in enumerate(args): for i, arg in enumerate(args):
arg.make_owned_reference(code) arg.make_owned_reference(code)
code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % ( code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
args_tuple, i, arg_offset, arg.py_result())) args_tuple, i, arg_offset, arg.py_result()))
code.put_giveref(arg.py_result()) code.put_giveref(arg.py_result())
code.funcstate.release_temp(arg_offset) code.funcstate.release_temp(arg_offset)
for arg in args: for arg in args:
arg.generate_post_assignment_code(code) arg.generate_post_assignment_code(code)
arg.free_temps(code) arg.free_temps(code)
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c")) UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
code.putln( code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % ( "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(), self.result(),
function, args_tuple, function, args_tuple,
code.error_goto_if_null(self.result(), self.pos))) code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result()) code.put_gotref(self.py_result())
code.put_decref_clear(args_tuple, py_object_type) code.put_decref_clear(args_tuple, py_object_type)
code.funcstate.release_temp(args_tuple) code.funcstate.release_temp(args_tuple)
code.funcstate.release_temp(function)
code.funcstate.release_temp(function)
self.function.generate_disposal_code(code) self.function.generate_disposal_code(code)
self.function.free_temps(code) self.function.free_temps(code)
......
...@@ -3754,7 +3754,7 @@ class FinalOptimizePhase(Visitor.CythonTransform): ...@@ -3754,7 +3754,7 @@ class FinalOptimizePhase(Visitor.CythonTransform):
Replace likely Python method calls by a specialised PyMethodCallNode. Replace likely Python method calls by a specialised PyMethodCallNode.
""" """
self.visitchildren(node) self.visitchildren(node)
if node.function.type.is_cfunction and isinstance(node.function, ExprNodes.NameNode): if node.function.type.is_cfunction and node.function.is_name:
if node.function.name == 'isinstance' and len(node.args) == 2: if node.function.name == 'isinstance' and len(node.args) == 2:
type_arg = node.args[1] type_arg = node.args[1]
if type_arg.type.is_builtin_type and type_arg.type.name == 'type': if type_arg.type.is_builtin_type and type_arg.type.name == 'type':
...@@ -3763,11 +3763,11 @@ class FinalOptimizePhase(Visitor.CythonTransform): ...@@ -3763,11 +3763,11 @@ class FinalOptimizePhase(Visitor.CythonTransform):
node.function.type = node.function.entry.type node.function.type = node.function.entry.type
PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type) PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr) node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
elif node.function.type.is_pyobject: elif node.function.type.is_pyobject and node.function.type is not Builtin.type_type:
# we could do it for all calls, but attributes are most likely to result in a method call # we could do it for all calls, but attributes are most likely to result in a method call
if node.function.is_attribute: if node.function.is_attribute:
if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not ( if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
node.arg_tuple.is_literal or node.arg_tuple.mult_factor): node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and node.arg_tuple.args)):
node = ExprNodes.PyMethodCallNode.from_node( node = ExprNodes.PyMethodCallNode.from_node(
node, function=node.function, arg_tuple=node.arg_tuple, type=node.type) node, function=node.function, arg_tuple=node.arg_tuple, type=node.type)
return node return node
......
...@@ -1158,6 +1158,90 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg ...@@ -1158,6 +1158,90 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg
#endif #endif
/////////////// PyObjectCallOneArg.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); /*proto*/
/////////////// PyObjectCallOneArg ///////////////
//@requires: PyObjectCall
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
PyObject *self, *result;
PyCFunction cfunc;
if (!(PyCFunction_Check(func)
#ifdef __Pyx_CyFunction_USED
|| PyObject_TypeCheck(func, __pyx_CyFunctionType)
#endif
) || !(PyCFunction_GET_FLAGS(func) & METH_O)) {
PyObject* args = PyTuple_Pack(1, arg);
if (unlikely(!args)) return NULL;
result = __Pyx_PyObject_Call(func, args, NULL);
Py_DECREF(args);
return result;
}
// fast and simple case we are optimising for
cfunc = PyCFunction_GET_FUNCTION(func);
self = PyCFunction_GET_SELF(func);
if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
return NULL;
result = cfunc(self, arg);
Py_LeaveRecursiveCall();
if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
PyErr_SetString(
PyExc_SystemError,
"NULL result without error in PyObject_Call");
}
return result;
}
#else
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
PyObject* args = PyTuple_Pack(1, arg);
return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL;
}
#endif
/////////////// PyObjectCallNoArg.proto ///////////////
//@requires: PyObjectCall
//@substitute: naming
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); /*proto*/
#else
#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, $empty_tuple, NULL)
#endif
/////////////// PyObjectCallNoArg ///////////////
//@requires: PyObjectCall
//@substitute: naming
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
PyObject *self, *result;
PyCFunction cfunc;
if (!PyCFunction_Check(func) || !(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
return __Pyx_PyObject_Call(func, $empty_tuple, NULL);
}
cfunc = PyCFunction_GET_FUNCTION(func);
self = PyCFunction_GET_SELF(func);
if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
return NULL;
result = cfunc(self, NULL);
Py_LeaveRecursiveCall();
if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
PyErr_SetString(
PyExc_SystemError,
"NULL result without error in PyObject_Call");
}
return result;
}
#endif
/////////////// MatrixMultiply.proto /////////////// /////////////// MatrixMultiply.proto ///////////////
#if PY_VERSION_HEX >= 0x03050000 #if PY_VERSION_HEX >= 0x03050000
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment