Commit 2c33f2ee authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #548 from kmod/perf

Perf investigations
parents fb499bc7 9d0f85c6
......@@ -949,12 +949,16 @@ $(CMAKE_SETUP_RELEASE):
@mkdir -p $(CMAKE_DIR_RELEASE)
cd $(CMAKE_DIR_RELEASE); CC='clang' CXX='clang++' cmake -GNinja $(HOME)/pyston -DCMAKE_BUILD_TYPE=Release
# Shared modules (ie extension modules that get built using pyston on setup.py) that we will ask CMake
# to build. You can flip this off to allow builds to continue even if self-hosting the sharedmods would fail.
CMAKE_SHAREDMODS := sharedmods ext_pyston
.PHONY: pyston_dbg pyston_release
pyston_dbg: $(CMAKE_SETUP_DBG)
$(NINJA) -C $(HOME)/pyston-build-dbg pyston copy_stdlib copy_libpyston sharedmods ext_pyston ext_cpython $(NINJAFLAGS)
$(NINJA) -C $(HOME)/pyston-build-dbg pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
ln -sf $(HOME)/pyston-build-dbg/pyston pyston_dbg
pyston_release: $(CMAKE_SETUP_RELEASE)
$(NINJA) -C $(HOME)/pyston-build-release pyston copy_stdlib copy_libpyston sharedmods ext_pyston ext_cpython $(NINJAFLAGS)
$(NINJA) -C $(HOME)/pyston-build-release pyston copy_stdlib copy_libpyston $(CMAKE_SHAREDMODS) ext_cpython $(NINJAFLAGS)
ln -sf $(HOME)/pyston-build-release/pyston pyston_release
endif
CMAKE_DIR_GCC := $(HOME)/pyston-build-gcc
......@@ -1016,35 +1020,35 @@ check$1 test$1: $(PYTHON_EXE_DEPS) pyston$1 $(CHECK_DEPS)
.PHONY: run$1 dbg$1
run$1: pyston$1 $$(RUN_DEPS)
PYTHONPATH=test/test_extension ./pyston$1 $$(ARGS)
PYTHONPATH=test/test_extension:$${PYTHONPATH} ./pyston$1 $$(ARGS)
dbg$1: pyston$1 $$(RUN_DEPS)
PYTHONPATH=test/test_extension zsh -c 'ulimit -m $$(MAX_DBG_MEM_KB); $$(GDB) $$(GDB_CMDS) --args ./pyston$1 $$(ARGS)'
PYTHONPATH=test/test_extension:$${PYTHONPATH} zsh -c 'ulimit -m $$(MAX_DBG_MEM_KB); $$(GDB) $$(GDB_CMDS) --args ./pyston$1 $$(ARGS)'
nosearch_run$1_%: %.py pyston$1 $$(RUN_DEPS)
$(VERB) PYTHONPATH=test/test_extension zsh -c 'ulimit -m $$(MAX_MEM_KB); time ./pyston$1 $$(ARGS) $$<'
$(VERB) PYTHONPATH=test/test_extension:$${PYTHONPATH} zsh -c 'ulimit -m $$(MAX_MEM_KB); time ./pyston$1 $$(ARGS) $$<'
$$(call make_search,run$1_%)
nosearch_dbg$1_%: %.py pyston$1 $$(RUN_DEPS)
$(VERB) PYTHONPATH=test/test_extension zsh -c 'ulimit -m $$(MAX_DBG_MEM_KB); $$(GDB) $$(GDB_CMDS) --args ./pyston$1 $$(ARGS) $$<'
$(VERB) PYTHONPATH=test/test_extension:$${PYTHONPATH} zsh -c 'ulimit -m $$(MAX_DBG_MEM_KB); $$(GDB) $$(GDB_CMDS) --args ./pyston$1 $$(ARGS) $$<'
$$(call make_search,dbg$1_%)
ifneq ($$(ENABLE_VALGRIND),0)
nosearch_memcheck$1_%: %.py pyston$1 $$(RUN_DEPS)
PYTHONPATH=test/test_extension $$(VALGRIND) --tool=memcheck --leak-check=no --db-attach=yes ./pyston$1 $$(ARGS) $$<
PYTHONPATH=test/test_extension:$${PYTHONPATH} $$(VALGRIND) --tool=memcheck --leak-check=no --db-attach=yes ./pyston$1 $$(ARGS) $$<
$$(call make_search,memcheck$1_%)
nosearch_memcheck_gdb$1_%: %.py pyston$1 $$(RUN_DEPS)
set +e; PYTHONPATH=test/test_extension $$(VALGRIND) -v -v -v -v -v --tool=memcheck --leak-check=no --track-origins=yes --vgdb=yes --vgdb-error=0 ./pyston$1 $$(ARGS) $$< & export PID=$$$$! ; \
set +e; PYTHONPATH=test/test_extension:$${PYTHONPATH} $$(VALGRIND) -v -v -v -v -v --tool=memcheck --leak-check=no --track-origins=yes --vgdb=yes --vgdb-error=0 ./pyston$1 $$(ARGS) $$< & export PID=$$$$! ; \
$$(GDB) --ex "set confirm off" --ex "target remote | $$(DEPS_DIR)/valgrind-3.10.0-install/bin/vgdb" --ex "continue" --ex "bt" ./pyston$1; kill -9 $$$$PID
$$(call make_search,memcheck_gdb$1_%)
nosearch_memleaks$1_%: %.py pyston$1 $$(RUN_DEPS)
PYTHONPATH=test/test_extension $$(VALGRIND) --tool=memcheck --leak-check=full --leak-resolution=low --show-reachable=yes ./pyston$1 $$(ARGS) $$<
PYTHONPATH=test/test_extension:$${PYTHONPATH} $$(VALGRIND) --tool=memcheck --leak-check=full --leak-resolution=low --show-reachable=yes ./pyston$1 $$(ARGS) $$<
$$(call make_search,memleaks$1_%)
nosearch_cachegrind$1_%: %.py pyston$1 $$(RUN_DEPS)
PYTHONPATH=test/test_extension $$(VALGRIND) --tool=cachegrind ./pyston$1 $$(ARGS) $$<
PYTHONPATH=test/test_extension:$${PYTHONPATH} $$(VALGRIND) --tool=cachegrind ./pyston$1 $$(ARGS) $$<
$$(call make_search,cachegrind$1_%)
endif
.PHONY: perf$1_%
nosearch_perf$1_%: %.py pyston$1
PYTHONPATH=test/test_extension perf record -g -- ./pyston$1 -q -p $$(ARGS) $$<
PYTHONPATH=test/test_extension:$${PYTHONPATH} perf record -g -- ./pyston$1 -q -p $$(ARGS) $$<
@$(MAKE) perf_report
$$(call make_search,perf$1_%)
......@@ -1204,17 +1208,25 @@ sharedmods: $(SHAREDMODS_OBJS)
.PHONY: ext_pyston
ext_pyston: $(TEST_EXT_MODULE_OBJS)
# Makefile hackery: we can build test extensions with any build configuration of pyston,
# so try to guess one that will end up being built anyway, and use that as the dependency.
ifneq ($(findstring release,$(MAKECMDGOALS))$(findstring perf,$(MAKECMDGOALS)),)
BUILD_PY:=pyston_release
else
BUILD_PY:=pyston_dbg
endif
# Hax: we want to generate multiple targets from a single rule, and run the rule only if the
# dependencies have been updated, and only run it once for all the targets.
# So just tell make to generate the first extension module, and that the non-first ones just
# depend on the first one.
$(firstword $(TEST_EXT_MODULE_OBJS)): $(TEST_EXT_MODULE_SRCS) | pyston_dbg
$(VERB) cd $(TEST_DIR)/test_extension; time ../../pyston_dbg setup.py build
$(firstword $(TEST_EXT_MODULE_OBJS)): $(TEST_EXT_MODULE_SRCS) | $(BUILD_PY)
$(VERB) cd $(TEST_DIR)/test_extension; time ../../$(BUILD_PY) setup.py build
$(VERB) cd $(TEST_DIR)/test_extension; ln -sf $(TEST_EXT_MODULE_NAMES:%=build/lib.linux2-2.7/%.pyston.so) .
$(VERB) touch -c $(TEST_EXT_MODULE_OBJS)
$(wordlist 2,9999,$(TEST_EXT_MODULE_OBJS)): $(firstword $(TEST_EXT_MODULE_OBJS))
$(firstword $(SHAREDMODS_OBJS)): $(SHAREDMODS_SRCS) | pyston_dbg
$(VERB) cd $(TEST_DIR)/test_extension; time ../../pyston_dbg ../../from_cpython/setup.py build --build-lib ../../lib_pyston
$(firstword $(SHAREDMODS_OBJS)): $(SHAREDMODS_SRCS) | $(BUILD_PY)
$(VERB) cd $(TEST_DIR)/test_extension; time ../../$(BUILD_PY) ../../from_cpython/setup.py build --build-lib ../../lib_pyston
$(VERB) touch -c $(SHAREDMODS_OBJS)
$(wordlist 2,9999,$(SHAREDMODS_OBJS)): $(firstword $(SHAREDMODS_OBJS))
......
def f():
for i in xrange(10000000):
{}
f()
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), "../test/integration/django"))
from django.template.base import Origin, Template, Context, TemplateDoesNotExist
from django.conf import settings
from django.apps import apps
......
......@@ -290,6 +290,10 @@ bool ICInfo::shouldAttempt() {
retry_in--;
return false;
}
return times_rewritten < MEGAMORPHIC_THRESHOLD;
return !isMegamorphic();
}
bool ICInfo::isMegamorphic() {
return times_rewritten >= MEGAMORPHIC_THRESHOLD;
}
}
......@@ -120,6 +120,7 @@ public:
void clear(ICSlotInfo* entry);
bool shouldAttempt();
bool isMegamorphic();
friend class ICSlotRewrite;
};
......
......@@ -746,8 +746,8 @@ void Rewriter::abort() {
finished = true;
rewrite->abort();
static StatCounter rewriter_aborts("rewriter_aborts");
rewriter_aborts.log();
static StatCounter ic_rewrites_aborted("ic_rewrites_aborted");
ic_rewrites_aborted.log();
}
void RewriterVar::bumpUse() {
......@@ -786,10 +786,10 @@ void Rewriter::commit() {
assert(!finished);
initPhaseEmitting();
static StatCounter rewriter_assemblyfail("rewriter_assemblyfail");
static StatCounter ic_rewrites_aborted_assemblyfail("ic_rewrites_aborted_assemblyfail");
auto on_assemblyfail = [&]() {
rewriter_assemblyfail.log();
ic_rewrites_aborted_assemblyfail.log();
this->abort();
};
......@@ -971,8 +971,8 @@ void Rewriter::commit() {
finished = true;
static StatCounter rewriter_commits("rewriter_commits");
rewriter_commits.log();
static StatCounter ic_rewrites_committed("ic_rewrites_committed");
ic_rewrites_committed.log();
}
bool Rewriter::finishAssembly(ICSlotInfo* picked_slot, int continue_offset) {
......@@ -1404,8 +1404,8 @@ Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>&
args.push_back(var);
}
static StatCounter rewriter_starts("rewriter_starts");
rewriter_starts.log();
static StatCounter ic_rewrites_starts("ic_rewrites");
ic_rewrites_starts.log();
static StatCounter rewriter_spillsavoided("rewriter_spillsavoided");
// Calculate the list of live-ins based off the live-outs list,
......@@ -1457,22 +1457,28 @@ Rewriter* Rewriter::createRewriter(void* rtn_addr, int num_args, const char* deb
assert(!getICInfo(rtn_addr));
}
static StatCounter rewriter_attempts("rewriter_attempts");
rewriter_attempts.log();
static StatCounter ic_attempts("ic_attempts");
static StatCounter ic_attempts_nopatch("ic_attempts_nopatch");
static StatCounter ic_attempts_skipped("ic_attempts_skipped");
static StatCounter ic_attempts_skipped_megamorphic("ic_attempts_skipped_megamorphic");
static StatCounter ic_attempts_started("ic_attempts_started");
static StatCounter rewriter_nopatch("rewriter_nopatch");
static StatCounter rewriter_skipped("rewriter_skipped");
ic_attempts.log();
if (!ic) {
rewriter_nopatch.log();
ic_attempts_nopatch.log();
return NULL;
}
if (!ic->shouldAttempt()) {
rewriter_skipped.log();
ic_attempts_skipped.log();
if (ic->isMegamorphic())
ic_attempts_skipped_megamorphic.log();
return NULL;
}
ic_attempts_started.log();
return new Rewriter(ic->startRewrite(debug_name), num_args, ic->getLiveOuts());
}
......@@ -1641,6 +1647,8 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
assembler::Assembler _a(start_addr, slowpath_start - start_addr);
//_a.trap();
if (slowpath_start - start_addr > 20)
_a.jmp(assembler::JumpDestination::fromStart(slowpath_start - start_addr));
_a.fillWithNops();
assembler::Assembler assem(slowpath_start, end_addr - slowpath_start);
......
......@@ -213,9 +213,16 @@ CompiledFunction* compileFunction(CLFunction* f, FunctionSpecialization* spec, E
ss << "\033[34;1mDoing OSR-entry partial compile of " << source->fn << ":" << name
<< ", starting with backedge to block " << entry_descriptor->backedge->target->idx;
}
ss << " at effort level " << (int)effort;
ss << " at effort level " << (int)effort << '\n';
if (entry_descriptor && VERBOSITY("irgen") >= 2) {
for (const auto& p : entry_descriptor->args) {
ss << p.first.str() << ": " << p.second->debugName() << '\n';
}
}
ss << "\033[0m";
printf("%s\n", ss.str().c_str());
printf("%s", ss.str().c_str());
}
#ifndef NDEBUG
......
......@@ -518,10 +518,12 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
#if STAT_ALLOCATIONS
#define ALLOC_STATS(cls) \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
Stats::log(Stats::getStatId(per_name_alloc_name)); \
Stats::log(Stats::getStatId(per_name_allocsize_name), size);
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
std::string per_name_allocsize_name = "allocsize." + std::string(cls->tp_name); \
Stats::log(Stats::getStatId(per_name_alloc_name)); \
Stats::log(Stats::getStatId(per_name_allocsize_name), size); \
}
#define ALLOC_STATS_VAR(cls) \
if (cls->tp_name) { \
std::string per_name_alloc_name = "alloc." + std::string(cls->tp_name); \
......@@ -550,7 +552,6 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
// asserts in the 1-arg operator new function:
#define DEFAULT_CLASS_SIMPLE(default_cls) \
void* operator new(size_t size, BoxedClass * cls) __attribute__((visibility("default"))) { \
ALLOC_STATS(cls); \
return Box::operator new(size, cls); \
} \
void* operator new(size_t size) __attribute__((visibility("default"))) { \
......@@ -601,7 +602,6 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
} \
\
void* operator new(size_t size, BoxedClass * cls, size_t nitems) __attribute__((visibility("default"))) { \
ALLOC_STATS_VAR(default_cls) \
assert(cls->tp_itemsize == itemsize); \
return BoxVar::operator new(size, cls, nitems); \
} \
......
......@@ -28,7 +28,18 @@ namespace pyston {
namespace gc {
static StatCounter gc_alloc_bytes("zzz_gc_alloc_bytes");
#if STAT_ALLOCATIONS
static StatCounter gc_alloc_bytes("gc_alloc_bytes");
static StatCounter gc_alloc_bytes_typed[] = {
StatCounter("gc_alloc_bytes_???"), //
StatCounter("gc_alloc_bytes_python"), //
StatCounter("gc_alloc_bytes_conservative"), //
StatCounter("gc_alloc_bytes_precise"), //
StatCounter("gc_alloc_bytes_untracked"), //
StatCounter("gc_alloc_bytes_hidden_class"), //
};
#endif
extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
STAT_TIMER(t0, "us_timer_gc_alloc");
size_t alloc_bytes = bytes + sizeof(GCAllocation);
......@@ -94,6 +105,7 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
#if STAT_ALLOCATIONS
gc_alloc_bytes.log(bytes);
gc_alloc_bytes_typed[(int)kind_id].log(bytes);
#endif
return r;
......
......@@ -111,9 +111,12 @@ static unsigned bytesAllocatedSinceCollection;
static __thread unsigned thread_bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
static StatCounter gc_registered_bytes("gc_registered_bytes");
void registerGCManagedBytes(size_t bytes) {
thread_bytesAllocatedSinceCollection += bytes;
if (unlikely(thread_bytesAllocatedSinceCollection > ALLOCBYTES_PER_COLLECTION / 4)) {
gc_registered_bytes.log(thread_bytesAllocatedSinceCollection);
bytesAllocatedSinceCollection += thread_bytesAllocatedSinceCollection;
thread_bytesAllocatedSinceCollection = 0;
......
......@@ -180,6 +180,8 @@ extern "C" PyObject* _PyObject_New(PyTypeObject* tp) noexcept {
// Analogue of PyType_GenericNew
void* BoxVar::operator new(size_t size, BoxedClass* cls, size_t nitems) {
ALLOC_STATS_VAR(cls);
assert(cls);
ASSERT(cls->tp_basicsize >= size, "%s", cls->tp_name);
assert(cls->tp_itemsize > 0);
......@@ -191,6 +193,8 @@ void* BoxVar::operator new(size_t size, BoxedClass* cls, size_t nitems) {
}
void* Box::operator new(size_t size, BoxedClass* cls) {
ALLOC_STATS(cls);
assert(cls);
ASSERT(cls->tp_basicsize >= size, "%s", cls->tp_name);
assert(cls->tp_itemsize == 0);
......
......@@ -50,7 +50,7 @@ def lookupAsHeapAddr(n):
while True:
l = _heap_proc.stdout.readline()
if l.startswith("Pyston v0.2"):
if l.startswith("Pyston v"):
break
_heap_proc.stdin.write("dumpAddr(%d)\nprint '!!!!'\n" % n)
......@@ -136,11 +136,16 @@ equivalent to '--heap-map-args ./pyston_release -i BENCHMARK'.
addr = l.split(':')[0]
count = counts.pop(addr.strip(), 0)
m = re.search("movabs \\$0x([0-9a-f]+),", l)
extra = ""
m = re.search("movabs \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
m = re.search("mov \\$0x([0-9a-f]{4,}),", l)
if m:
n = int(m.group(1), 16)
extra = lookupConstant(n)
if args.collapse_nops and l.endswith("\tnop"):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment