Commit 7a83089c authored by Fredrik Lundh's avatar Fredrik Lundh

needforspeed: backed out the Py_LOCAL-isation of ceval; the massive in-

lining killed performance on certain Intel boxes, and the "aggressive"
macro itself gives most of the benefits on others.
parent 2d23d5bf
...@@ -137,13 +137,17 @@ typedef Py_intptr_t Py_ssize_t; ...@@ -137,13 +137,17 @@ typedef Py_intptr_t Py_ssize_t;
# endif # endif
#endif #endif
/* PY_LOCAL can be used instead of static to get the fastest possible calling /* Py_LOCAL can be used instead of static to get the fastest possible calling
* convention for functions that are local to a given module. It also enables * convention for functions that are local to a given module.
* inlining, where suitable.
* *
* If PY_LOCAL_AGGRESSIVE is defined before python.h is included, a more * Py_LOCAL_INLINE does the same thing, and also explicitly requests inlining,
* "aggressive" inlining is enabled. This may lead to code bloat, and may * for platforms that support that.
* slow things down for those reasons. Use with care. *
* If PY_LOCAL_AGGRESSIVE is defined before python.h is included, more
* "aggressive" inlining/optimizaion is enabled for the entire module. This
* may lead to code bloat, and may slow things down for those reasons. It may
* also lead to errors, if the code relies on pointer aliasing. Use with
* care.
* *
* NOTE: You can only use this for functions that are entirely local to a * NOTE: You can only use this for functions that are entirely local to a
* module; functions that are exported via method tables, callbacks, etc, * module; functions that are exported via method tables, callbacks, etc,
...@@ -160,11 +164,14 @@ typedef Py_intptr_t Py_ssize_t; ...@@ -160,11 +164,14 @@ typedef Py_intptr_t Py_ssize_t;
/* ignore warnings if the compiler decides not to inline a function */ /* ignore warnings if the compiler decides not to inline a function */
#pragma warning(disable: 4710) #pragma warning(disable: 4710)
/* fastest possible local call under MSVC */ /* fastest possible local call under MSVC */
#define Py_LOCAL(type) static __inline type __fastcall #define Py_LOCAL(type) static type __fastcall
#define Py_LOCAL_INLINE(type) static __inline type __fastcall
#elif defined(USE_INLINE) #elif defined(USE_INLINE)
#define Py_LOCAL(type) static inline type #define Py_LOCAL(type) static type
#define Py_LOCAL_INLINE(type) static inline type
#else #else
#define Py_LOCAL(type) static type #define Py_LOCAL(type) static type
#define Py_LOCAL_INLINE(type) static type
#endif #endif
#include <stdlib.h> #include <stdlib.h>
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
XXX document it! XXX document it!
*/ */
/* enable more aggressive local inlining (platform dependent) */ /* enable more aggressive intra-module optimizations, where available */
#define PY_LOCAL_AGGRESSIVE #define PY_LOCAL_AGGRESSIVE
#include "Python.h" #include "Python.h"
...@@ -19,11 +19,6 @@ ...@@ -19,11 +19,6 @@
#include <ctype.h> #include <ctype.h>
#if defined(_MSC_VER)
/* enable more aggressive optimization for visual studio */
#pragma optimize("agtw", on)
#endif
#ifndef WITH_TSC #ifndef WITH_TSC
#define READ_TIMESTAMP(var) #define READ_TIMESTAMP(var)
...@@ -38,7 +33,7 @@ typedef unsigned long long uint64; ...@@ -38,7 +33,7 @@ typedef unsigned long long uint64;
#define READ_TIMESTAMP(var) ppc_getcounter(&var) #define READ_TIMESTAMP(var) ppc_getcounter(&var)
Py_LOCAL(void) static void
ppc_getcounter(uint64 *v) ppc_getcounter(uint64 *v)
{ {
register unsigned long tbu, tb, tbu2; register unsigned long tbu, tb, tbu2;
...@@ -91,44 +86,44 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); ...@@ -91,44 +86,44 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
/* Forward declarations */ /* Forward declarations */
#ifdef WITH_TSC #ifdef WITH_TSC
Py_LOCAL(PyObject *) call_function(PyObject ***, int, uint64*, uint64*); static PyObject * call_function(PyObject ***, int, uint64*, uint64*);
#else #else
Py_LOCAL(PyObject *) call_function(PyObject ***, int); static PyObject * call_function(PyObject ***, int);
#endif #endif
Py_LOCAL(PyObject *) fast_function(PyObject *, PyObject ***, int, int, int); static PyObject * fast_function(PyObject *, PyObject ***, int, int, int);
Py_LOCAL(PyObject *) do_call(PyObject *, PyObject ***, int, int); static PyObject * do_call(PyObject *, PyObject ***, int, int);
Py_LOCAL(PyObject *) ext_do_call(PyObject *, PyObject ***, int, int, int); static PyObject * ext_do_call(PyObject *, PyObject ***, int, int, int);
Py_LOCAL(PyObject *) update_keyword_args(PyObject *, int, PyObject ***,PyObject *); static PyObject * update_keyword_args(PyObject *, int, PyObject ***,PyObject *);
Py_LOCAL(PyObject *) update_star_args(int, int, PyObject *, PyObject ***); static PyObject * update_star_args(int, int, PyObject *, PyObject ***);
Py_LOCAL(PyObject *) load_args(PyObject ***, int); static PyObject * load_args(PyObject ***, int);
#define CALL_FLAG_VAR 1 #define CALL_FLAG_VAR 1
#define CALL_FLAG_KW 2 #define CALL_FLAG_KW 2
#ifdef LLTRACE #ifdef LLTRACE
static int lltrace; static int lltrace;
Py_LOCAL(int) prtrace(PyObject *, char *); static int prtrace(PyObject *, char *);
#endif #endif
Py_LOCAL(int) call_trace(Py_tracefunc, PyObject *, PyFrameObject *, static int call_trace(Py_tracefunc, PyObject *, PyFrameObject *,
int, PyObject *); int, PyObject *);
Py_LOCAL(void) call_trace_protected(Py_tracefunc, PyObject *, static void call_trace_protected(Py_tracefunc, PyObject *,
PyFrameObject *, int, PyObject *); PyFrameObject *, int, PyObject *);
Py_LOCAL(void) call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *); static void call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *);
Py_LOCAL(int) maybe_call_line_trace(Py_tracefunc, PyObject *, static int maybe_call_line_trace(Py_tracefunc, PyObject *,
PyFrameObject *, int *, int *, int *); PyFrameObject *, int *, int *, int *);
Py_LOCAL(PyObject *) apply_slice(PyObject *, PyObject *, PyObject *); static PyObject * apply_slice(PyObject *, PyObject *, PyObject *);
Py_LOCAL(int) assign_slice(PyObject *, PyObject *, static int assign_slice(PyObject *, PyObject *,
PyObject *, PyObject *); PyObject *, PyObject *);
Py_LOCAL(PyObject *) cmp_outcome(int, PyObject *, PyObject *); static PyObject * cmp_outcome(int, PyObject *, PyObject *);
Py_LOCAL(PyObject *) import_from(PyObject *, PyObject *); static PyObject * import_from(PyObject *, PyObject *);
Py_LOCAL(int) import_all_from(PyObject *, PyObject *); static int import_all_from(PyObject *, PyObject *);
Py_LOCAL(PyObject *) build_class(PyObject *, PyObject *, PyObject *); static PyObject * build_class(PyObject *, PyObject *, PyObject *);
Py_LOCAL(int) exec_statement(PyFrameObject *, static int exec_statement(PyFrameObject *,
PyObject *, PyObject *, PyObject *); PyObject *, PyObject *, PyObject *);
Py_LOCAL(void) set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *); static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *);
Py_LOCAL(void) reset_exc_info(PyThreadState *); static void reset_exc_info(PyThreadState *);
Py_LOCAL(void) format_exc_check_arg(PyObject *, char *, PyObject *); static void format_exc_check_arg(PyObject *, char *, PyObject *);
Py_LOCAL(PyObject *) string_concatenate(PyObject *, PyObject *, static PyObject * string_concatenate(PyObject *, PyObject *,
PyFrameObject *, unsigned char *); PyFrameObject *, unsigned char *);
#define NAME_ERROR_MSG \ #define NAME_ERROR_MSG \
...@@ -484,8 +479,8 @@ enum why_code { ...@@ -484,8 +479,8 @@ enum why_code {
WHY_YIELD = 0x0040 /* 'yield' operator */ WHY_YIELD = 0x0040 /* 'yield' operator */
}; };
Py_LOCAL(enum why_code) do_raise(PyObject *, PyObject *, PyObject *); static enum why_code do_raise(PyObject *, PyObject *, PyObject *);
Py_LOCAL(int) unpack_iterable(PyObject *, int, PyObject **); static int unpack_iterable(PyObject *, int, PyObject **);
/* for manipulating the thread switch and periodic "stuff" - used to be /* for manipulating the thread switch and periodic "stuff" - used to be
per thread, now just a pair o' globals */ per thread, now just a pair o' globals */
...@@ -2902,7 +2897,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, ...@@ -2902,7 +2897,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
*/ */
Py_LOCAL(void) static void
set_exc_info(PyThreadState *tstate, set_exc_info(PyThreadState *tstate,
PyObject *type, PyObject *value, PyObject *tb) PyObject *type, PyObject *value, PyObject *tb)
{ {
...@@ -2947,7 +2942,7 @@ set_exc_info(PyThreadState *tstate, ...@@ -2947,7 +2942,7 @@ set_exc_info(PyThreadState *tstate,
PySys_SetObject("exc_traceback", tb); PySys_SetObject("exc_traceback", tb);
} }
Py_LOCAL(void) static void
reset_exc_info(PyThreadState *tstate) reset_exc_info(PyThreadState *tstate)
{ {
PyFrameObject *frame; PyFrameObject *frame;
...@@ -2994,7 +2989,7 @@ reset_exc_info(PyThreadState *tstate) ...@@ -2994,7 +2989,7 @@ reset_exc_info(PyThreadState *tstate)
/* Logic for the raise statement (too complicated for inlining). /* Logic for the raise statement (too complicated for inlining).
This *consumes* a reference count to each of its arguments. */ This *consumes* a reference count to each of its arguments. */
Py_LOCAL(enum why_code) static enum why_code
do_raise(PyObject *type, PyObject *value, PyObject *tb) do_raise(PyObject *type, PyObject *value, PyObject *tb)
{ {
if (type == NULL) { if (type == NULL) {
...@@ -3103,7 +3098,7 @@ do_raise(PyObject *type, PyObject *value, PyObject *tb) ...@@ -3103,7 +3098,7 @@ do_raise(PyObject *type, PyObject *value, PyObject *tb)
/* Iterate v argcnt times and store the results on the stack (via decreasing /* Iterate v argcnt times and store the results on the stack (via decreasing
sp). Return 1 for success, 0 if error. */ sp). Return 1 for success, 0 if error. */
Py_LOCAL(int) static int
unpack_iterable(PyObject *v, int argcnt, PyObject **sp) unpack_iterable(PyObject *v, int argcnt, PyObject **sp)
{ {
int i = 0; int i = 0;
...@@ -3150,7 +3145,7 @@ Error: ...@@ -3150,7 +3145,7 @@ Error:
#ifdef LLTRACE #ifdef LLTRACE
Py_LOCAL(int) static int
prtrace(PyObject *v, char *str) prtrace(PyObject *v, char *str)
{ {
printf("%s ", str); printf("%s ", str);
...@@ -3161,7 +3156,7 @@ prtrace(PyObject *v, char *str) ...@@ -3161,7 +3156,7 @@ prtrace(PyObject *v, char *str)
} }
#endif #endif
Py_LOCAL(void) static void
call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f) call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f)
{ {
PyObject *type, *value, *traceback, *arg; PyObject *type, *value, *traceback, *arg;
...@@ -3187,7 +3182,7 @@ call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f) ...@@ -3187,7 +3182,7 @@ call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f)
} }
} }
Py_LOCAL(void) static void
call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame,
int what, PyObject *arg) int what, PyObject *arg)
{ {
...@@ -3204,7 +3199,7 @@ call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, ...@@ -3204,7 +3199,7 @@ call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame,
} }
} }
Py_LOCAL(int) static int
call_trace(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, call_trace(Py_tracefunc func, PyObject *obj, PyFrameObject *frame,
int what, PyObject *arg) int what, PyObject *arg)
{ {
...@@ -3239,7 +3234,7 @@ _PyEval_CallTracing(PyObject *func, PyObject *args) ...@@ -3239,7 +3234,7 @@ _PyEval_CallTracing(PyObject *func, PyObject *args)
return result; return result;
} }
Py_LOCAL(int) static int
maybe_call_line_trace(Py_tracefunc func, PyObject *obj, maybe_call_line_trace(Py_tracefunc func, PyObject *obj,
PyFrameObject *frame, int *instr_lb, int *instr_ub, PyFrameObject *frame, int *instr_lb, int *instr_ub,
int *instr_prev) int *instr_prev)
...@@ -3467,7 +3462,7 @@ PyEval_GetFuncDesc(PyObject *func) ...@@ -3467,7 +3462,7 @@ PyEval_GetFuncDesc(PyObject *func)
} }
} }
Py_LOCAL(void) static void
err_args(PyObject *func, int flags, int nargs) err_args(PyObject *func, int flags, int nargs)
{ {
if (flags & METH_NOARGS) if (flags & METH_NOARGS)
...@@ -3514,7 +3509,7 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \ ...@@ -3514,7 +3509,7 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
x = call; \ x = call; \
} }
Py_LOCAL(PyObject *) static PyObject *
call_function(PyObject ***pp_stack, int oparg call_function(PyObject ***pp_stack, int oparg
#ifdef WITH_TSC #ifdef WITH_TSC
, uint64* pintr0, uint64* pintr1 , uint64* pintr0, uint64* pintr1
...@@ -3605,7 +3600,7 @@ call_function(PyObject ***pp_stack, int oparg ...@@ -3605,7 +3600,7 @@ call_function(PyObject ***pp_stack, int oparg
done before evaluating the frame. done before evaluating the frame.
*/ */
Py_LOCAL(PyObject *) static PyObject *
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk) fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
{ {
PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
...@@ -3658,7 +3653,7 @@ fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk) ...@@ -3658,7 +3653,7 @@ fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
PyFunction_GET_CLOSURE(func)); PyFunction_GET_CLOSURE(func));
} }
Py_LOCAL(PyObject *) static PyObject *
update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack, update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack,
PyObject *func) PyObject *func)
{ {
...@@ -3698,7 +3693,7 @@ update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack, ...@@ -3698,7 +3693,7 @@ update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack,
return kwdict; return kwdict;
} }
Py_LOCAL(PyObject *) static PyObject *
update_star_args(int nstack, int nstar, PyObject *stararg, update_star_args(int nstack, int nstar, PyObject *stararg,
PyObject ***pp_stack) PyObject ***pp_stack)
{ {
...@@ -3723,7 +3718,7 @@ update_star_args(int nstack, int nstar, PyObject *stararg, ...@@ -3723,7 +3718,7 @@ update_star_args(int nstack, int nstar, PyObject *stararg,
return callargs; return callargs;
} }
Py_LOCAL(PyObject *) static PyObject *
load_args(PyObject ***pp_stack, int na) load_args(PyObject ***pp_stack, int na)
{ {
PyObject *args = PyTuple_New(na); PyObject *args = PyTuple_New(na);
...@@ -3738,7 +3733,7 @@ load_args(PyObject ***pp_stack, int na) ...@@ -3738,7 +3733,7 @@ load_args(PyObject ***pp_stack, int na)
return args; return args;
} }
Py_LOCAL(PyObject *) static PyObject *
do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
{ {
PyObject *callargs = NULL; PyObject *callargs = NULL;
...@@ -3774,7 +3769,7 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) ...@@ -3774,7 +3769,7 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
return result; return result;
} }
Py_LOCAL(PyObject *) static PyObject *
ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk) ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk)
{ {
int nstar = 0; int nstar = 0;
...@@ -3886,7 +3881,7 @@ _PyEval_SliceIndex(PyObject *v, Py_ssize_t *pi) ...@@ -3886,7 +3881,7 @@ _PyEval_SliceIndex(PyObject *v, Py_ssize_t *pi)
PyType_HasFeature((x)->ob_type, Py_TPFLAGS_HAVE_INDEX) \ PyType_HasFeature((x)->ob_type, Py_TPFLAGS_HAVE_INDEX) \
&& (x)->ob_type->tp_as_number->nb_index)) && (x)->ob_type->tp_as_number->nb_index))
Py_LOCAL(PyObject *) static PyObject *
apply_slice(PyObject *u, PyObject *v, PyObject *w) /* return u[v:w] */ apply_slice(PyObject *u, PyObject *v, PyObject *w) /* return u[v:w] */
{ {
PyTypeObject *tp = u->ob_type; PyTypeObject *tp = u->ob_type;
...@@ -3912,7 +3907,7 @@ apply_slice(PyObject *u, PyObject *v, PyObject *w) /* return u[v:w] */ ...@@ -3912,7 +3907,7 @@ apply_slice(PyObject *u, PyObject *v, PyObject *w) /* return u[v:w] */
} }
} }
Py_LOCAL(int) static int
assign_slice(PyObject *u, PyObject *v, PyObject *w, PyObject *x) assign_slice(PyObject *u, PyObject *v, PyObject *w, PyObject *x)
/* u[v:w] = x */ /* u[v:w] = x */
{ {
...@@ -3946,7 +3941,7 @@ assign_slice(PyObject *u, PyObject *v, PyObject *w, PyObject *x) ...@@ -3946,7 +3941,7 @@ assign_slice(PyObject *u, PyObject *v, PyObject *w, PyObject *x)
} }
} }
Py_LOCAL(PyObject *) static PyObject *
cmp_outcome(int op, register PyObject *v, register PyObject *w) cmp_outcome(int op, register PyObject *v, register PyObject *w)
{ {
int res = 0; int res = 0;
...@@ -3979,7 +3974,7 @@ cmp_outcome(int op, register PyObject *v, register PyObject *w) ...@@ -3979,7 +3974,7 @@ cmp_outcome(int op, register PyObject *v, register PyObject *w)
return v; return v;
} }
Py_LOCAL(PyObject *) static PyObject *
import_from(PyObject *v, PyObject *name) import_from(PyObject *v, PyObject *name)
{ {
PyObject *x; PyObject *x;
...@@ -3993,7 +3988,7 @@ import_from(PyObject *v, PyObject *name) ...@@ -3993,7 +3988,7 @@ import_from(PyObject *v, PyObject *name)
return x; return x;
} }
Py_LOCAL(int) static int
import_all_from(PyObject *locals, PyObject *v) import_all_from(PyObject *locals, PyObject *v)
{ {
PyObject *all = PyObject_GetAttrString(v, "__all__"); PyObject *all = PyObject_GetAttrString(v, "__all__");
...@@ -4050,7 +4045,7 @@ import_all_from(PyObject *locals, PyObject *v) ...@@ -4050,7 +4045,7 @@ import_all_from(PyObject *locals, PyObject *v)
return err; return err;
} }
Py_LOCAL(PyObject *) static PyObject *
build_class(PyObject *methods, PyObject *bases, PyObject *name) build_class(PyObject *methods, PyObject *bases, PyObject *name)
{ {
PyObject *metaclass = NULL, *result, *base; PyObject *metaclass = NULL, *result, *base;
...@@ -4102,7 +4097,7 @@ build_class(PyObject *methods, PyObject *bases, PyObject *name) ...@@ -4102,7 +4097,7 @@ build_class(PyObject *methods, PyObject *bases, PyObject *name)
return result; return result;
} }
Py_LOCAL(int) static int
exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals, exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals,
PyObject *locals) PyObject *locals)
{ {
...@@ -4198,7 +4193,7 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals, ...@@ -4198,7 +4193,7 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals,
return 0; return 0;
} }
Py_LOCAL(void) static void
format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj) format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj)
{ {
char *obj_str; char *obj_str;
...@@ -4213,7 +4208,7 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj) ...@@ -4213,7 +4208,7 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj)
PyErr_Format(exc, format_str, obj_str); PyErr_Format(exc, format_str, obj_str);
} }
Py_LOCAL(PyObject *) static PyObject *
string_concatenate(PyObject *v, PyObject *w, string_concatenate(PyObject *v, PyObject *w,
PyFrameObject *f, unsigned char *next_instr) PyFrameObject *f, unsigned char *next_instr)
{ {
...@@ -4288,7 +4283,7 @@ string_concatenate(PyObject *v, PyObject *w, ...@@ -4288,7 +4283,7 @@ string_concatenate(PyObject *v, PyObject *w,
#ifdef DYNAMIC_EXECUTION_PROFILE #ifdef DYNAMIC_EXECUTION_PROFILE
Py_LOCAL(PyObject *) static PyObject *
getarray(long a[256]) getarray(long a[256])
{ {
int i; int i;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment