Commit 2537d743 authored by Marius Wachtler's avatar Marius Wachtler

New context switching code for generators

This is a huge speed improvement for generators,
fasta.py takes 8secs now instead of 18secs
parent a3a12bb6
cmake_minimum_required(VERSION 2.8)
project(pyston)
project(pyston C CXX ASM)
include(ExternalProject)
......
......@@ -289,6 +289,7 @@ STDLIB_SRCS := $(wildcard src/runtime/inline/*.cpp)
SRCS := $(MAIN_SRCS) $(STDLIB_SRCS)
STDLIB_OBJS := stdlib.bc.o stdlib.stripped.bc.o
STDLIB_RELEASE_OBJS := stdlib.release.bc.o
ASM_SRCS := $(wildcard src/runtime/*.S)
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c $(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c $(EXTRA_STDOBJECT_SRCS)
......@@ -297,10 +298,10 @@ FROM_CPYTHON_SRCS := $(addprefix from_cpython/Modules/,$(STDMODULE_SRCS)) $(addp
# The stdlib objects have slightly longer dependency chains,
# so put them first in the list:
OBJS := $(STDLIB_OBJS) $(SRCS:.cpp=.o) $(FROM_CPYTHON_SRCS:.c=.o)
ASTPRINT_OBJS := $(STDLIB_OBJS) $(BASE_SRCS:.cpp=.o) $(FROM_CPYTHON_SRCS:.c=.o)
PROFILE_OBJS := $(STDLIB_RELEASE_OBJS) $(MAIN_SRCS:.cpp=.prof.o) $(STDLIB_SRCS:.cpp=.release.o) $(FROM_CPYTHON_SRCS:.c=.release.o)
OPT_OBJS := $(STDLIB_RELEASE_OBJS) $(SRCS:.cpp=.release.o) $(FROM_CPYTHON_SRCS:.c=.release.o)
OBJS := $(STDLIB_OBJS) $(SRCS:.cpp=.o) $(FROM_CPYTHON_SRCS:.c=.o) $(ASM_SRCS)
ASTPRINT_OBJS := $(STDLIB_OBJS) $(BASE_SRCS:.cpp=.o) $(FROM_CPYTHON_SRCS:.c=.o) $(ASM_SRCS)
PROFILE_OBJS := $(STDLIB_RELEASE_OBJS) $(MAIN_SRCS:.cpp=.prof.o) $(STDLIB_SRCS:.cpp=.release.o) $(FROM_CPYTHON_SRCS:.c=.release.o) $(ASM_SRCS)
OPT_OBJS := $(STDLIB_RELEASE_OBJS) $(SRCS:.cpp=.release.o) $(FROM_CPYTHON_SRCS:.c=.release.o) $(ASM_SRCS)
OPTIONAL_SRCS := src/codegen/profiling/oprofile.cpp src/codegen/profiling/pprof.cpp
TOOL_SRCS := $(wildcard $(TOOLS_DIR)/*.cpp)
......
......@@ -18,7 +18,7 @@ file(GLOB CODEGEN_SRCS codegen/*.cpp)
file(GLOB CORE_SRCS core/*.cpp)
file(GLOB GC_SRCS gc/*.cpp)
file(GLOB RUNTIME_BUILTIN_MODULES_SRCS runtime/builtin_modules/*.cpp)
file(GLOB RUNTIME_SRCS runtime/*.cpp)
file(GLOB RUNTIME_SRCS runtime/*.cpp runtime/*.S)
if(ENABLE_GPERFTOOLS)
set(OPTIONAL_SRCS ${OPTIONAL_SRCS} codegen/profiling/pprof.cpp)
......
......@@ -29,6 +29,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/stackmaps.h"
#include "core/util.h"
#include "runtime/ctxswitching.h"
#include "runtime/generator.h"
#include "runtime/traceback.h"
#include "runtime/types.h"
......@@ -390,11 +391,21 @@ public:
if ((unw_word_t)generatorEntry <= ip && ip < generator_entry_end) {
// for generators continue unwinding in the context in which the generator got called
static_assert(sizeof(ucontext_t) == sizeof(unw_context_t), "");
unw_word_t bp;
unw_get_reg(&this->cursor, UNW_TDEP_BP, &bp);
ucontext_t* remote_ctx = getReturnContextForGeneratorFrame((void*)bp);
unw_init_local(&cursor, remote_ctx);
Context* remote_ctx = getReturnContextForGeneratorFrame((void*)bp);
// setup unw_context_t struct from the infos we have, seems like this is enough to make unwinding work.
memset(&ctx, 0, sizeof(ctx));
ctx.uc_mcontext.gregs[REG_R12] = remote_ctx->r12;
ctx.uc_mcontext.gregs[REG_R13] = remote_ctx->r13;
ctx.uc_mcontext.gregs[REG_R14] = remote_ctx->r14;
ctx.uc_mcontext.gregs[REG_R15] = remote_ctx->r15;
ctx.uc_mcontext.gregs[REG_RBX] = remote_ctx->rbx;
ctx.uc_mcontext.gregs[REG_RBP] = remote_ctx->rbp;
ctx.uc_mcontext.gregs[REG_RIP] = remote_ctx->rip;
ctx.uc_mcontext.gregs[REG_RSP] = (greg_t)remote_ctx;
unw_init_local(&cursor, &ctx);
}
// keep unwinding
......
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This functions are influenced by boost context written by Oliver Kowalke
// Context* makeContext(void* stack_top, void (*start_func)(intptr_t))
.text
.globl makeContext
.type makeContext,@function
.align 16
makeContext:
leaq -64(%rdi), %rax // make space for the init Context (56bytes + 8byte alignment)
movq %rsi, 48(%rax) // set context->ip to the start_func param
ret // return new adjusted top of stack
.size makeContext,.-makeContext
// void swapContext(Context** old_context, Context* new_context, intptr_t arg)
.globl swapContext
.type swapContext,@function
.align 16
swapContext:
pushq %rbp // save regs
pushq %rbx
pushq %r15
pushq %r14
pushq %r13
pushq %r12
movq %rsp, (%rdi) // store current SP in *old_context
movq %rsi, %rsp // point SP to new_context
movq %rdx, %rdi // let arg be the first argument to start_func
popq %r12 // restore regs
popq %r13
popq %r14
popq %r15
popq %rbx
popq %rbp
popq %r8 // load new_context->ip into r8
jmp *%r8 // jump to context->ip
.size swapContext,.-swapContext
.section .note.GNU-stack,"",%progbits // we don't need executable stack
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_RUNTIME_CTXSWITCHING_H
#define PYSTON_RUNTIME_CTXSWITCHING_H
#include <cstdint>
namespace pyston {
struct Context {
int64_t r12, r13, r14, r15, rbx, rbp, rip;
};
static_assert(sizeof(Context) == 8 * 7, "");
extern "C" Context* makeContext(void* stack_top, void (*start_func)(intptr_t));
extern "C" void swapContext(Context** old_context, Context* new_context, intptr_t arg);
}
#endif
......@@ -25,6 +25,7 @@
#include "core/stats.h"
#include "core/types.h"
#include "gc/collector.h"
#include "runtime/ctxswitching.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
#include "runtime/util.h"
......@@ -58,17 +59,17 @@ public:
}
};
ucontext* getReturnContextForGeneratorFrame(void* frame_addr) {
Context* getReturnContextForGeneratorFrame(void* frame_addr) {
BoxedGenerator* generator = s_generator_map[frame_addr];
assert(generator);
return &generator->returnContext;
return generator->returnContext;
}
void generatorEntry(BoxedGenerator* g) {
assert(g->cls == generator_cls);
assert(g->function->cls == function_cls);
threading::pushGenerator(g, g->stack_begin, (void*)g->returnContext.uc_mcontext.gregs[REG_RSP]);
threading::pushGenerator(g, g->stack_begin, g->returnContext);
try {
RegisterHelper context_registerer(g, __builtin_frame_address(0));
......@@ -86,7 +87,7 @@ void generatorEntry(BoxedGenerator* g) {
// we returned from the body of the generator. next/send/throw will notify the caller
g->entryExited = true;
threading::popGenerator();
swapcontext(&g->context, &g->returnContext);
swapContext(&g->context, g->returnContext, 0);
}
Box* generatorIter(Box* s) {
......@@ -106,7 +107,7 @@ Box* generatorSend(Box* s, Box* v) {
self->returnValue = v;
self->running = true;
swapcontext(&self->returnContext, &self->context);
swapContext(&self->returnContext, self->context, (intptr_t)self);
self->running = false;
// propagate exception to the caller
......@@ -150,8 +151,8 @@ extern "C" Box* yield(BoxedGenerator* obj, Box* value) {
self->returnValue = value;
threading::popGenerator();
swapcontext(&self->context, &self->returnContext);
threading::pushGenerator(obj, obj->stack_begin, (void*)obj->returnContext.uc_mcontext.gregs[REG_RSP]);
swapContext(&self->context, self->returnContext, 0);
threading::pushGenerator(obj, obj->stack_begin, obj->returnContext);
// if the generator receives a exception from the caller we have to throw it
if (self->exception.type) {
......@@ -172,7 +173,7 @@ extern "C" BoxedGenerator* createGenerator(BoxedFunctionBase* function, Box* arg
extern "C" BoxedGenerator::BoxedGenerator(BoxedFunctionBase* function, Box* arg1, Box* arg2, Box* arg3, Box** args)
: function(function), arg1(arg1), arg2(arg2), arg3(arg3), args(nullptr), entryExited(false), running(false),
returnValue(nullptr), exception(nullptr, nullptr, nullptr) {
returnValue(nullptr), exception(nullptr, nullptr, nullptr), context(nullptr), returnContext(nullptr) {
giveAttr("__name__", boxString(function->f->source->getName()));
......@@ -183,9 +184,6 @@ extern "C" BoxedGenerator::BoxedGenerator(BoxedFunctionBase* function, Box* arg1
memcpy(&this->args->elts[0], args, numArgs * sizeof(Box*));
}
getcontext(&context);
context.uc_link = 0;
uint64_t stack_low = next_stack_addr;
uint64_t stack_high = stack_low + MAX_STACK_SIZE;
next_stack_addr = stack_high;
......@@ -198,9 +196,6 @@ extern "C" BoxedGenerator::BoxedGenerator(BoxedFunctionBase* function, Box* arg1
MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0);
assert(p == initial_stack_limit);
context.uc_stack.ss_sp = initial_stack_limit;
context.uc_stack.ss_size = INITIAL_STACK_SIZE;
// Create an inaccessible redzone so that the generator stack won't grow indefinitely.
// Looks like it throws a SIGBUS if we reach the redzone; it's unclear if that's better
// or worse than being able to consume all available memory.
......@@ -219,7 +214,9 @@ extern "C" BoxedGenerator::BoxedGenerator(BoxedFunctionBase* function, Box* arg1
#error "implement me"
#endif
makecontext(&context, (void (*)(void))generatorEntry, 1, this);
assert(((intptr_t)stack_begin & (~(intptr_t)(0xF))) == (intptr_t)stack_begin && "stack must be aligned");
context = makeContext(stack_begin, (void (*)(intptr_t))generatorEntry);
}
extern "C" void generatorGCHandler(GCVisitor* v, Box* b) {
......@@ -251,10 +248,8 @@ extern "C" void generatorGCHandler(GCVisitor* v, Box* b) {
v->visitPotentialRange((void**)&g->returnContext,
((void**)&g->returnContext) + sizeof(g->returnContext) / sizeof(void*));
} else {
v->visitPotentialRange((void**)&g->context, ((void**)&g->context) + sizeof(g->context) / sizeof(void*));
#if STACK_GROWS_DOWN
v->visitPotentialRange((void**)g->context.uc_mcontext.gregs[REG_RSP], (void**)g->stack_begin);
v->visitPotentialRange((void**)g->context, (void**)g->stack_begin);
#endif
}
}
......
......@@ -18,15 +18,16 @@
#include "core/types.h"
#include "runtime/types.h"
struct ucontext;
namespace pyston {
struct Context;
extern BoxedClass* generator_cls;
void setupGenerator();
void generatorEntry(BoxedGenerator* g);
ucontext* getReturnContextForGeneratorFrame(void* frame_addr);
Context* getReturnContextForGeneratorFrame(void* frame_addr);
extern "C" Box* yield(BoxedGenerator* obj, Box* value);
extern "C" BoxedGenerator* createGenerator(BoxedFunctionBase* function, Box* arg1, Box* arg2, Box* arg3, Box** args);
......
......@@ -602,7 +602,7 @@ public:
Box* returnValue;
ExcInfo exception;
ucontext_t context, returnContext;
struct Context* context, *returnContext;
void* stack_begin;
BoxedGenerator(BoxedFunctionBase* function, Box* arg1, Box* arg2, Box* arg3, Box** args);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment