Commit 73140839 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #557 from kmod/pyc

Make our pyc handling more robust
parents 6775ce65 57858b67
......@@ -221,7 +221,7 @@ add_test(NAME pyston_defaults COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/te
add_test(NAME pyston_defaults_cpython_tests COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-S -k --exit-code-only --skip-failing -t30 ${CMAKE_SOURCE_DIR}/test/cpython)
add_test(NAME pyston_defaults_integration_tests COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-S -k --exit-code-only --skip-failing -t300 ${CMAKE_SOURCE_DIR}/test/integration)
add_test(NAME pyston_max_compilation_tier COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -R ./pyston -j${TEST_THREADS} -a=-O -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)
add_test(NAME pyston_old_parser COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -a=-x -R ./pyston -j1 -a=-n -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)
add_test(NAME pyston_old_parser COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/tools/tester.py -a=-x -R ./pyston -j${TEST_THREADS} -a=-n -a=-S -k ${CMAKE_SOURCE_DIR}/test/tests)
# format
file(GLOB_RECURSE FORMAT_FILES ${CMAKE_SOURCE_DIR}/src/*.h ${CMAKE_SOURCE_DIR}/src/*.cpp)
......
......@@ -42,7 +42,10 @@ private:
static const int BUFSIZE = 1024;
char buf[BUFSIZE];
int start, end;
// exactly one of these should be set and valid:
FILE* fp;
std::vector<char> data;
InternedStringPool* intern_pool;
......@@ -54,6 +57,7 @@ private:
public:
void fill() {
if (fp) {
memmove(buf, buf + start, end - start);
end -= start;
start = 0;
......@@ -61,15 +65,23 @@ public:
if (VERBOSITY("parsing") >= 3)
printf("filled, now at %d-%d\n", start, end);
}
}
BufferedReader(FILE* fp) : start(0), end(0), fp(fp), intern_pool(NULL) {}
BufferedReader(FILE* fp) : start(0), end(0), fp(fp), data(), intern_pool(NULL) {}
BufferedReader(std::vector<char> data, int start_offset = 0)
: start(start_offset), end(data.size()), fp(NULL), data(std::move(data)), intern_pool(NULL) {}
int bytesBuffered() { return (end - start); }
uint8_t readByte() {
ensure(1);
RELEASE_ASSERT(end > start, "premature eof");
if (fp) {
return buf[start++];
} else {
return data[start++];
}
}
uint16_t readShort() { return (readByte() << 8) | (readByte()); }
uint32_t readUInt() { return (readShort() << 16) | (readShort()); }
......@@ -988,7 +1000,7 @@ AST_Module* parse_file(const char* fn) {
if (ENABLE_PYPA_PARSER) {
AST_Module* rtn = pypa_parse(fn);
assert(rtn);
RELEASE_ASSERT(rtn, "unknown parse error");
return rtn;
}
......@@ -1014,17 +1026,17 @@ AST_Module* parse_file(const char* fn) {
const char* getMagic() {
if (ENABLE_PYPA_PARSER)
return "a\ncK";
return "a\ncL";
else
return "a\nck";
return "a\ncl";
}
#define MAGIC_STRING_LENGTH 4
#define CHECKSUM_LENGTH 4
#define LENGTH_LENGTH sizeof(int)
#define CHECKSUM_LENGTH 1
enum class ParseResult {
SUCCESS,
FAILURE,
PYC_UNWRITABLE,
};
static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Module*& module) {
......@@ -1037,17 +1049,22 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod
int checksum_start = ftell(cache_fp);
int bytes_written = -1;
// Currently just use the length as the checksum
static_assert(sizeof(bytes_written) >= CHECKSUM_LENGTH, "");
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
static_assert(sizeof(bytes_written) == LENGTH_LENGTH, "");
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
bytes_written = 0;
uint8_t checksum = -1;
static_assert(sizeof(checksum) == CHECKSUM_LENGTH, "");
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp);
checksum = 0;
if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn);
if (!module)
return ParseResult::FAILURE;
bytes_written += serializeAST(module, cache_fp);
RELEASE_ASSERT(module, "unknown parse error");
auto p = serializeAST(module, cache_fp);
checksum = p.second;
bytes_written += p.first;
} else {
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
char buf[80];
......@@ -1057,13 +1074,18 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Mod
break;
bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
for (int i = 0; i < nread; i++) {
checksum ^= buf[i];
}
}
int code = pclose(parser);
assert(code == 0);
}
fseek(cache_fp, checksum_start, SEEK_SET);
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
fwrite(&checksum, 1, CHECKSUM_LENGTH, cache_fp);
fclose(cache_fp);
return ParseResult::SUCCESS;
......@@ -1092,26 +1114,45 @@ AST_Module* caching_parse_file(const char* fn) {
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
code = stat(cache_fn.c_str(), &cache_stat);
assert(code == 0);
if (code != 0)
return parse_file(fn);
}
std::vector<char> file_data;
int tries = 0;
while (true) {
FILE* fp = fopen(cache_fn.c_str(), "r");
assert(fp);
bool good = (bool)fp;
if (good) {
char buf[1024];
while (true) {
bool good = true;
int read = fread(buf, 1, 1024, fp);
for (int i = 0; i < read; i++)
file_data.push_back(buf[i]);
if (read == 0) {
if (ferror(fp))
good = false;
break;
}
}
fclose(fp);
fp = NULL;
}
if (file_data.size() < MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH)
good = false;
if (good) {
char buf[MAGIC_STRING_LENGTH];
int read = fread(buf, 1, MAGIC_STRING_LENGTH, fp);
if (read != MAGIC_STRING_LENGTH || strncmp(buf, getMagic(), MAGIC_STRING_LENGTH) != 0) {
if (strncmp(&file_data[0], getMagic(), MAGIC_STRING_LENGTH) != 0) {
if (VERBOSITY()) {
printf("Warning: corrupt or non-Pyston .pyc file found; ignoring\n");
}
......@@ -1120,54 +1161,71 @@ AST_Module* caching_parse_file(const char* fn) {
}
if (good) {
int length = 0;
fseek(fp, MAGIC_STRING_LENGTH, SEEK_SET);
static_assert(sizeof(length) >= CHECKSUM_LENGTH, "");
int read = fread(&length, 1, CHECKSUM_LENGTH, fp);
int length;
static_assert(sizeof(length) == LENGTH_LENGTH, "");
length = *reinterpret_cast<int*>(&file_data[MAGIC_STRING_LENGTH]);
int expected_total_length = MAGIC_STRING_LENGTH + CHECKSUM_LENGTH + length;
int expected_total_length = MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH + length;
if (read != CHECKSUM_LENGTH || expected_total_length != cache_stat.st_size) {
if (expected_total_length != file_data.size()) {
if (VERBOSITY()) {
printf("Warning: truncated .pyc file found; ignoring\n");
}
good = false;
} else {
RELEASE_ASSERT(length > 0 && length < 10 * 1048576, "invalid file length: %d (file size is %ld)",
length, file_data.size());
}
}
if (good) {
uint8_t checksum;
static_assert(sizeof(checksum) == CHECKSUM_LENGTH, "");
checksum = *reinterpret_cast<uint8_t*>(&file_data[MAGIC_STRING_LENGTH + LENGTH_LENGTH]);
for (int i = MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH; i < file_data.size(); i++) {
checksum ^= file_data[i];
}
if (checksum != 0) {
if (VERBOSITY())
printf("pyc checksum failed!\n");
good = false;
}
}
if (good) {
std::unique_ptr<BufferedReader> reader(
new BufferedReader(file_data, MAGIC_STRING_LENGTH + LENGTH_LENGTH + CHECKSUM_LENGTH));
AST* rtn = readASTMisc(reader.get());
reader->fill();
if (rtn && reader->bytesBuffered() == 0) {
assert(rtn->type == AST_TYPE::Module);
return ast_cast<AST_Module>(rtn);
}
good = false;
}
assert(!good);
tries++;
RELEASE_ASSERT(tries <= 5, "repeatedly failing to parse file");
if (!good) {
fclose(fp);
assert(!fp);
file_data.clear();
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
code = stat(cache_fn.c_str(), &cache_stat);
assert(code == 0);
fp = fopen(cache_fn.c_str(), "r");
assert(fp);
} else {
break;
if (code != 0)
return parse_file(fn);
}
}
BufferedReader* reader = new BufferedReader(fp);
AST* rtn = readASTMisc(reader);
reader->fill();
assert(reader->bytesBuffered() == 0);
delete reader;
fclose(fp);
assert(rtn->type == AST_TYPE::Module);
return ast_cast<AST_Module>(rtn);
}
}
......@@ -24,34 +24,42 @@ namespace {
class SerializeASTVisitor : public ASTVisitor {
private:
FILE* file;
uint8_t checksum;
public:
static unsigned int write(AST_Module* module, FILE* file) {
static std::pair<unsigned int, uint8_t> write(AST_Module* module, FILE* file) {
SerializeASTVisitor visitor(file);
unsigned long start_pos = ftell(file);
visitor.writeASTMisc(module);
return ftell(file) - start_pos;
return std::make_pair(ftell(file) - start_pos, visitor.checksum);
}
private:
SerializeASTVisitor(FILE* file) : file(file) {}
SerializeASTVisitor(FILE* file) : file(file), checksum(0) {}
virtual ~SerializeASTVisitor() {}
void writeByte(uint8_t v) { fwrite(&v, 1, sizeof(v), file); }
void writeByte(uint8_t v) {
fwrite(&v, 1, sizeof(v), file);
checksum ^= v;
}
void writeShort(uint16_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
// I guess we use big-endian:
for (int i = 1; i >= 0; i--) {
writeByte((v >> (i * 8)) & 0xff);
}
}
void writeUInt(uint32_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
for (int i = 3; i >= 0; i--) {
writeByte((v >> (i * 8)) & 0xff);
}
}
void writeULL(uint64_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
for (int i = 7; i >= 0; i--) {
writeByte((v >> (i * 8)) & 0xff);
}
}
void writeDouble(double v) {
......@@ -65,6 +73,9 @@ private:
void writeString(const std::string& v) {
writeShort(v.size());
fwrite(v.c_str(), 1, v.size(), file);
for (int i = 0; i < v.size(); i++) {
checksum ^= v[i];
}
}
void writeString(const InternedString v) { writeString(v.str()); }
......@@ -537,7 +548,7 @@ private:
};
}
unsigned long serializeAST(AST_Module* module, FILE* file) {
std::pair<unsigned long, uint8_t> serializeAST(AST_Module* module, FILE* file) {
return SerializeASTVisitor::write(module, file);
}
}
......@@ -15,11 +15,13 @@
#ifndef PYSTON_CODEGEN_SERIALIZEAST_H
#define PYSTON_CODEGEN_SERIALIZEAST_H
#include <cstdint>
#include <cstdio>
#include <utility>
namespace pyston {
class AST_Module;
unsigned long serializeAST(AST_Module* module, FILE* file);
std::pair<unsigned long, uint8_t> serializeAST(AST_Module* module, FILE* file);
}
#endif // PYSTON_CODEGEN_SERIALIZEAST_H
......@@ -351,6 +351,17 @@ void disableGC() {
static int ncollections = 0;
static bool should_not_reenter_gc = false;
void startGCUnexpectedRegion() {
RELEASE_ASSERT(!should_not_reenter_gc, "");
should_not_reenter_gc = true;
}
void endGCUnexpectedRegion() {
RELEASE_ASSERT(should_not_reenter_gc, "");
should_not_reenter_gc = false;
}
void runCollection() {
static StatCounter sc("gc_collections");
sc.log();
......
......@@ -61,6 +61,12 @@ void enableGC();
// These are mostly for debugging:
bool isValidGCObject(void* p);
bool isNonheapRoot(void* p);
// Debugging/validation helpers: if a GC should not happen in certain sections (ex during unwinding),
// use these functions to mark that. This is different from disableGC/enableGC, since it causes an
// assert rather than delaying of the next GC.
void startGCUnexpectedRegion();
void endGCUnexpectedRegion();
}
}
......
......@@ -61,9 +61,14 @@ static bool unbuffered = false;
static const char* argv0;
static int pipefds[2];
static void signal_parent_watcher() {
char buf[1];
int r = write(pipefds[1], buf, 1);
RELEASE_ASSERT(r == 1, "");
// Send our current PID to the parent, in case we forked.
union {
char buf[4];
int pid;
};
pid = getpid();
int r = write(pipefds[1], buf, 4);
RELEASE_ASSERT(r == 4, "");
while (true) {
sleep(1);
......@@ -109,18 +114,35 @@ static void enableGdbSegfaultWatcher() {
}
while (true) {
char buf[1];
int r = read(pipefds[0], buf, 1);
union {
char buf[4];
int died_child_pid;
};
int r = read(pipefds[0], buf, 4);
if (r == 1) {
fprintf(stderr, "Parent process woken up by child; collecting backtrace and killing child\n");
if (r > 0) {
RELEASE_ASSERT(r == 4, "%d", r);
fprintf(stderr, "Parent process woken up by child %d; collecting backtrace and killing child\n",
died_child_pid);
char pidbuf[20];
snprintf(pidbuf, sizeof(pidbuf), "%d", gdb_child_pid);
snprintf(pidbuf, sizeof(pidbuf), "%d", died_child_pid);
close(STDOUT_FILENO);
dup2(STDERR_FILENO, STDOUT_FILENO);
if (gdb_child_pid != died_child_pid) {
// If the non-direct-child died, we want to backtrace the one that signalled us,
// but we want to make sure to kill the original child.
char origpid_buf[30];
snprintf(origpid_buf, sizeof(origpid_buf), "attach %d", gdb_child_pid);
r = execlp("gdb", "gdb", "-p", pidbuf, argv0, "-batch", "-ex", "set pagination 0", "-ex",
"thread apply all bt", "-ex", "kill", "-ex", origpid_buf, "-ex", "kill", "-ex",
"quit -11", NULL);
} else {
r = execlp("gdb", "gdb", "-p", pidbuf, argv0, "-batch", "-ex", "set pagination 0", "-ex",
"thread apply all bt", "-ex", "kill", "-ex", "quit -11", NULL);
}
RELEASE_ASSERT(0, "%d %d %s", r, errno, strerror(errno));
}
......
......@@ -89,7 +89,9 @@ struct ExcData {
assert(this);
assert(canary == CANARY_VALUE);
assert(exc.type && exc.value && exc.traceback);
assert(gc::isValidGCObject(exc.type) && gc::isValidGCObject(exc.value) && gc::isValidGCObject(exc.traceback));
ASSERT(gc::isValidGCObject(exc.type), "%p", exc.type);
ASSERT(gc::isValidGCObject(exc.value), "%p", exc.value);
ASSERT(gc::isValidGCObject(exc.traceback), "%p", exc.traceback);
assert(this == &exception_ferry);
}
};
......@@ -601,9 +603,6 @@ static inline void unwind_loop(const ExcData* exc_data) {
// The unwinder entry-point.
static void unwind(const ExcData* exc) {
exc->check();
if (exc->exc.value->hasattr("magic_break")) {
(void)(0 == 0);
}
unwind_loop(exc);
// unwind_loop returned, couldn't find any handler. ruh-roh.
panic();
......@@ -677,6 +676,7 @@ extern "C" void* __cxa_allocate_exception(size_t size) noexcept {
// Takes the value that resume() sent us in RAX, and returns a pointer to the exception object actually thrown. In our
// case, these are the same, and should always be &pyston::exception_ferry.
extern "C" void* __cxa_begin_catch(void* exc_obj_in) noexcept {
pyston::gc::endGCUnexpectedRegion();
assert(exc_obj_in);
pyston::us_unwind_resume_catch.log(pyston::per_thread_resume_catch_timer.end());
......@@ -699,6 +699,7 @@ extern "C" void __cxa_end_catch() {
extern "C" std::type_info EXCINFO_TYPE_INFO;
extern "C" void __cxa_throw(void* exc_obj, std::type_info* tinfo, void (*dtor)(void*)) {
pyston::gc::startGCUnexpectedRegion();
assert(!pyston::in_cleanup_code);
assert(exc_obj);
RELEASE_ASSERT(tinfo == &EXCINFO_TYPE_INFO, "can't throw a non-ExcInfo value! type info: %p", tinfo);
......@@ -706,7 +707,9 @@ extern "C" void __cxa_throw(void* exc_obj, std::type_info* tinfo, void (*dtor)(v
if (VERBOSITY("cxx_unwind"))
printf("***** __cxa_throw() *****\n");
pyston::unwind((const pyston::ExcData*)exc_obj);
const pyston::ExcData* exc_data = (const pyston::ExcData*)exc_obj;
exc_data->check();
pyston::unwind(exc_data);
}
extern "C" void* __cxa_get_exception_ptr(void* exc_obj_in) noexcept {
......
......@@ -43,6 +43,7 @@ Box* createAndRunModule(const std::string& name, const std::string& fn) {
BoxedModule* module = createModule(name, fn.c_str());
AST_Module* ast = caching_parse_file(fn.c_str());
assert(ast);
try {
compileAndRunModule(ast, module);
} catch (ExcInfo e) {
......@@ -67,6 +68,7 @@ static Box* createAndRunModule(const std::string& name, const std::string& fn, c
module->setattr(path_str, path_list, NULL);
AST_Module* ast = caching_parse_file(fn.c_str());
assert(ast);
try {
compileAndRunModule(ast, module);
} catch (ExcInfo e) {
......
"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum
"""
# skip-if: '-x' in EXTRA_JIT_ARGS
# - too slow
# Note: CPython doesn't pass this test
import os
import sys
import multiprocessing
def worker():
global done
for i in xrange(1000):
del sys.modules["pyc_import_target"]
import pyc_import_target
done = True
import pyc_import_target
path = os.path.join(os.path.dirname(__file__), "pyc_import_target.pyc")
assert os.path.exists(path)
TEST_THREADS = 3
l = []
for i in xrange(TEST_THREADS):
p = multiprocessing.Process(target=worker)
p.start()
l.append(p)
idx = 0
while l:
p = l.pop()
while p.is_alive():
for i in xrange(100):
if os.path.exists(path):
os.remove(path)
for i in xrange(100):
if os.path.exists(path):
with open(path, "rw+") as f:
f.write(chr(i) * 100)
f.truncate(200)
p.join()
assert p.exitcode == 0, p.exitcode
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment