Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
df9ec460
Commit
df9ec460
authored
Jun 23, 2015
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #631 from kmod/perf4
more rewriting
parents
9415d7c1
73fc53c9
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
170 additions
and
17 deletions
+170
-17
src/asm_writing/icinfo.cpp
src/asm_writing/icinfo.cpp
+4
-0
src/asm_writing/rewriter.h
src/asm_writing/rewriter.h
+1
-1
src/runtime/objmodel.cpp
src/runtime/objmodel.cpp
+153
-16
test/tests/weakref1.py
test/tests/weakref1.py
+6
-0
test/tests/weakref_proxy.py
test/tests/weakref_proxy.py
+6
-0
No files found.
src/asm_writing/icinfo.cpp
View file @
df9ec460
...
...
@@ -290,6 +290,10 @@ bool ICInfo::shouldAttempt() {
retry_in
--
;
return
false
;
}
// Note(kmod): in some pathological deeply-recursive cases, it's important that we set the
// retry counter even if we attempt it again. We could probably handle this by setting
// the backoff to 0 on commit, and then setting the retry to the backoff here.
return
!
isMegamorphic
();
}
...
...
src/asm_writing/rewriter.h
View file @
df9ec460
...
...
@@ -321,7 +321,7 @@ private:
// Loads the constant into any register or if already in a register just return it
assembler
::
Register
loadConst
(
uint64_t
val
,
Location
otherThan
=
Location
::
any
());
llvm
::
DenseM
ap
<
uint64_t
,
RewriterVar
*>
constToVar
;
std
::
unordered_m
ap
<
uint64_t
,
RewriterVar
*>
constToVar
;
};
...
...
src/runtime/objmodel.cpp
View file @
df9ec460
...
...
@@ -2579,6 +2579,31 @@ extern "C" void dumpEx(void* p, int levels) {
}
}
if
(
isSubclass
(
b
->
cls
,
function_cls
))
{
BoxedFunction
*
f
=
static_cast
<
BoxedFunction
*>
(
b
);
CLFunction
*
cl
=
f
->
f
;
if
(
cl
->
source
)
{
printf
(
"User-defined function '%s'
\n
"
,
cl
->
source
->
getName
().
c_str
());
}
else
{
printf
(
"A builtin function
\n
"
);
}
printf
(
"Has %ld function versions
\n
"
,
cl
->
versions
.
size
());
for
(
CompiledFunction
*
cf
:
cl
->
versions
)
{
if
(
cf
->
is_interpreted
)
{
printf
(
"[interpreted]
\n
"
);
}
else
{
bool
got_name
;
std
::
string
name
=
g
.
func_addr_registry
.
getFuncNameAtAddress
(
cf
->
code
,
true
,
&
got_name
);
if
(
got_name
)
printf
(
"%s
\n
"
,
name
.
c_str
());
else
printf
(
"%p
\n
"
,
cf
->
code
);
}
}
}
if
(
isSubclass
(
b
->
cls
,
module_cls
))
{
printf
(
"The '%s' module
\n
"
,
static_cast
<
BoxedModule
*>
(
b
)
->
name
().
c_str
());
}
...
...
@@ -2862,6 +2887,16 @@ static inline Box*& getArg(int idx, Box*& arg1, Box*& arg2, Box*& arg3, Box** ar
return
args
[
idx
-
3
];
}
static
inline
RewriterVar
*
getArg
(
int
idx
,
CallRewriteArgs
*
rewrite_args
)
{
if
(
idx
==
0
)
return
rewrite_args
->
arg1
;
if
(
idx
==
1
)
return
rewrite_args
->
arg2
;
if
(
idx
==
2
)
return
rewrite_args
->
arg3
;
return
rewrite_args
->
args
->
getAttr
(
sizeof
(
Box
*
)
*
(
idx
-
3
));
}
static
StatCounter
slowpath_pickversion
(
"slowpath_pickversion"
);
static
CompiledFunction
*
pickVersion
(
CLFunction
*
f
,
int
num_output_args
,
Box
*
oarg1
,
Box
*
oarg2
,
Box
*
oarg3
,
Box
**
oargs
)
{
...
...
@@ -2979,14 +3014,22 @@ static KeywordDest placeKeyword(const ParamNames& param_names, llvm::SmallVector
}
}
static
Box
*
_callFuncHelper
(
BoxedFunctionBase
*
func
,
ArgPassSpec
argspec
,
Box
*
arg1
,
Box
*
arg2
,
Box
*
arg3
,
void
**
extra_args
)
{
Box
**
args
=
(
Box
**
)
extra_args
[
0
];
auto
keyword_names
=
(
const
std
::
vector
<
BoxedString
*>*
)
extra_args
[
1
];
return
callFunc
(
func
,
NULL
,
argspec
,
arg1
,
arg2
,
arg3
,
args
,
keyword_names
);
}
static
StatCounter
slowpath_callfunc
(
"slowpath_callfunc"
);
static
StatCounter
slowpath_callfunc_slowpath
(
"slowpath_callfunc_slowpath"
);
Box
*
callFunc
(
BoxedFunctionBase
*
func
,
CallRewriteArgs
*
rewrite_args
,
ArgPassSpec
argspec
,
Box
*
arg1
,
Box
*
arg2
,
Box
*
arg3
,
Box
**
args
,
const
std
::
vector
<
BoxedString
*>*
keyword_names
)
{
#if STAT_TIMERS
StatTimer
::
assertActive
();
STAT_TIMER
(
t0
,
"us_timer_slowpath_callFunc"
,
0
);
#endif
/*
* Procedure:
* - First match up positional arguments; any extra go to varargs. error if too many.
...
...
@@ -3003,15 +3046,14 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
int
num_output_args
=
f
->
numReceivedArgs
();
int
num_passed_args
=
argspec
.
totalPassed
();
if
(
argspec
.
has_starargs
||
argspec
.
has_kwargs
||
f
->
isGenerator
())
{
rewrite_args
=
NULL
;
REWRITE_ABORTED
(
""
);
}
// These could be handled:
if
(
argspec
.
num_keywords
)
{
rewrite_args
=
NULL
;
REWRITE_ABORTED
(
""
);
if
(
num_passed_args
>=
1
)
assert
(
gc
::
isValidGCObject
(
arg1
)
||
!
arg1
);
if
(
num_passed_args
>=
2
)
assert
(
gc
::
isValidGCObject
(
arg2
)
||
!
arg2
);
if
(
num_passed_args
>=
3
)
assert
(
gc
::
isValidGCObject
(
arg3
)
||
!
arg3
);
for
(
int
i
=
3
;
i
<
num_passed_args
;
i
++
)
{
assert
(
gc
::
isValidGCObject
(
args
[
i
-
3
])
||
args
[
i
-
3
]
==
NULL
);
}
// TODO Should we guard on the CLFunction or the BoxedFunctionBase?
...
...
@@ -3038,13 +3080,93 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
// Fast path: if it's a simple-enough call, we don't have to do anything special. On a simple
// django-admin test this covers something like 93% of all calls to callFunc.
if
(
!
f
->
isGenerator
())
{
if
(
argspec
.
num_keywords
==
0
&&
!
argspec
.
has_starargs
&&
!
argspec
.
has_kwargs
&&
argspec
.
num_args
==
f
->
num_args
&&
!
f
->
takes_varargs
&&
!
f
->
takes_kwargs
)
{
return
callCLFunc
(
f
,
rewrite_args
,
argspec
.
num_args
,
closure
,
NULL
,
func
->
globals
,
arg1
,
arg2
,
arg3
,
args
);
if
(
argspec
.
num_keywords
==
0
&&
argspec
.
has_starargs
==
f
->
takes_varargs
&&
!
argspec
.
has_kwargs
&&
!
f
->
takes_kwargs
&&
argspec
.
num_args
==
f
->
num_args
)
{
// If the caller passed starargs, we can only pass those directly to the callee if it's a tuple,
// since otherwise modifications by the callee would be visible to the caller (hence why varargs
// received by the caller are always tuples).
// This is why we can't pass kwargs here.
if
(
argspec
.
has_starargs
)
{
Box
*
given_varargs
=
getArg
(
argspec
.
num_args
+
argspec
.
num_keywords
,
arg1
,
arg2
,
arg3
,
args
);
if
(
given_varargs
->
cls
==
tuple_cls
)
{
if
(
rewrite_args
)
{
getArg
(
argspec
.
num_args
+
argspec
.
num_keywords
,
rewrite_args
)
->
addAttrGuard
(
offsetof
(
Box
,
cls
),
(
intptr_t
)
tuple_cls
);
}
return
callCLFunc
(
f
,
rewrite_args
,
argspec
.
num_args
+
argspec
.
has_starargs
+
argspec
.
has_kwargs
,
closure
,
NULL
,
func
->
globals
,
arg1
,
arg2
,
arg3
,
args
);
}
}
else
{
return
callCLFunc
(
f
,
rewrite_args
,
argspec
.
num_args
+
argspec
.
has_starargs
+
argspec
.
has_kwargs
,
closure
,
NULL
,
func
->
globals
,
arg1
,
arg2
,
arg3
,
args
);
}
}
}
slowpath_callfunc_slowpath
.
log
();
if
(
argspec
.
has_starargs
||
argspec
.
has_kwargs
||
argspec
.
num_keywords
||
f
->
isGenerator
())
{
// These are the cases that we won't be able to rewrite.
// So instead, just rewrite them to be a call to callFunc, which helps a little bit.
// TODO we should extract the rest of this function from the end of this block,
// put it in a different function, and have the rewrites target that.
// Note(kmod): I tried moving this section to runtimeCallInternal, ie to the place that calls
// callFunc. The thought was that this would let us apply this same optimization to other
// internal callables. It ended up hurting perf slightly; my theory is that it's because if
// an internal callable failed, it's better to call the non-internal version since it's lower
// overhead.
// To investigate the cases where we can't rewrite, enable this block.
// This also will also log the times that we call into callFunc directly
// from a rewrite.
#if 0
char buf[80];
snprintf(buf, sizeof(buf), "zzz_aborted_%d_args_%d_%d_%d_%d_params_%d_%d_%d_%d", f->isGenerator(),
argspec.num_args, argspec.num_keywords, argspec.has_starargs, argspec.has_kwargs, f->num_args,
f->num_defaults, f->takes_varargs, f->takes_kwargs);
uint64_t* counter = Stats::getStatCounter(buf);
Stats::log(counter);
#endif
if
(
rewrite_args
)
{
Rewriter
*
rewriter
=
rewrite_args
->
rewriter
;
// rewriter->trap();
RewriterVar
*
args_array
=
rewriter
->
allocate
(
2
);
if
(
num_passed_args
>=
4
)
{
RELEASE_ASSERT
(
rewrite_args
->
args
,
""
);
args_array
->
setAttr
(
0
,
rewrite_args
->
args
);
}
if
(
argspec
.
num_keywords
)
args_array
->
setAttr
(
8
,
rewriter
->
loadConst
((
intptr_t
)
keyword_names
));
else
args_array
->
setAttr
(
8
,
rewriter
->
loadConst
(
0
));
RewriterVar
::
SmallVector
arg_vec
;
arg_vec
.
push_back
(
rewrite_args
->
obj
);
arg_vec
.
push_back
(
rewriter
->
loadConst
(
argspec
.
asInt
(),
Location
::
forArg
(
1
)));
if
(
num_passed_args
>=
1
)
arg_vec
.
push_back
(
rewrite_args
->
arg1
);
else
arg_vec
.
push_back
(
rewriter
->
loadConst
(
0
,
Location
::
forArg
(
2
)));
if
(
num_passed_args
>=
2
)
arg_vec
.
push_back
(
rewrite_args
->
arg2
);
else
arg_vec
.
push_back
(
rewriter
->
loadConst
(
0
,
Location
::
forArg
(
3
)));
if
(
num_passed_args
>=
3
)
arg_vec
.
push_back
(
rewrite_args
->
arg3
);
else
arg_vec
.
push_back
(
rewriter
->
loadConst
(
0
,
Location
::
forArg
(
4
)));
arg_vec
.
push_back
(
args_array
);
for
(
auto
v
:
arg_vec
)
assert
(
v
);
RewriterVar
*
r_rtn
=
rewriter
->
call
(
true
,
(
void
*
)
_callFuncHelper
,
arg_vec
);
rewrite_args
->
out_success
=
true
;
rewrite_args
->
out_rtn
=
r_rtn
;
rewrite_args
=
NULL
;
}
}
if
(
rewrite_args
)
{
// We might have trouble if we have more output args than input args,
// such as if we need more space to pass defaults.
...
...
@@ -3598,8 +3720,23 @@ extern "C" Box* runtimeCall(Box* obj, ArgPassSpec argspec, Box* arg1, Box* arg2,
#if 0 && STAT_TIMERS
static uint64_t* st_id = Stats::getStatCounter("us_timer_slowpath_runtimecall_patchable");
static uint64_t* st_id_nopatch = Stats::getStatCounter("us_timer_slowpath_runtimecall_nopatch");
bool havepatch = (bool)getICInfo(__builtin_extract_return_addr(__builtin_return_address(0)));
ScopedStatTimer st(havepatch ? st_id : st_id_nopatch, 10);
static uint64_t* st_id_megamorphic = Stats::getStatCounter("us_timer_slowpath_runtimecall_megamorphic");
ICInfo* icinfo = getICInfo(__builtin_extract_return_addr(__builtin_return_address(0)));
uint64_t* counter;
if (!icinfo)
counter = st_id_nopatch;
else if (icinfo->isMegamorphic())
counter = st_id_megamorphic;
else {
counter = Stats::getStatCounter("us_timer_slowpath_runtimecall_patchable_" + std::string(obj->cls->tp_name));
}
ScopedStatTimer st(counter, 10);
static int n = 0;
n++;
if (n == 57261)
printf("");
#endif
if
(
rewriter
.
get
())
{
...
...
@@ -4821,7 +4958,7 @@ Box* typeCallInternal(BoxedFunctionBase* f, CallRewriteArgs* rewrite_args, ArgPa
// this is ok with not using StlCompatAllocator since we will manually register these objects with the GC
static
std
::
vector
<
Box
*>
allowable_news
;
if
(
allowable_news
.
empty
())
{
for
(
BoxedClass
*
allowed_cls
:
{
object_cls
,
enumerate_cls
,
xrange_cls
})
{
for
(
BoxedClass
*
allowed_cls
:
{
object_cls
,
enumerate_cls
,
xrange_cls
,
tuple_cls
,
list_cls
,
dict_cls
})
{
auto
new_obj
=
typeLookup
(
allowed_cls
,
new_str
,
NULL
);
gc
::
registerPermanentRoot
(
new_obj
);
allowable_news
.
push_back
(
new_obj
);
...
...
test/tests/weakref1.py
View file @
df9ec460
...
...
@@ -19,4 +19,10 @@ def fact(n):
w
=
doStuff
()
fact
(
10
)
# try to clear some memory
def
recurse
(
f
,
n
):
if
n
:
return
recurse
(
f
,
n
-
1
)
return
f
()
recurse
(
gc
.
collect
,
50
)
gc
.
collect
()
test/tests/weakref_proxy.py
View file @
df9ec460
...
...
@@ -21,6 +21,12 @@ def getWR():
wr
=
getWR
()
fact
(
100
)
# try to clear some memory
def
recurse
(
f
,
n
):
if
n
:
return
recurse
(
f
,
n
-
1
)
return
f
()
recurse
(
gc
.
collect
,
50
)
gc
.
collect
()
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment