Commit 1e98809f authored by Chris Toshok's avatar Chris Toshok

Merge pull request #395 from undingen/pypa_cache

Cache pypa parsed AST to disk
parents 7b9c4682 9dbdf752
......@@ -25,6 +25,7 @@
#include "llvm/Support/Path.h"
#include "codegen/pypa-parser.h"
#include "codegen/serialize_ast.h"
#include "core/ast.h"
#include "core/options.h"
#include "core/stats.h"
......@@ -993,22 +994,27 @@ AST_Module* parse_file(const char* fn) {
return ast_cast<AST_Module>(rtn);
}
#define MAGIC_STRING "a\ncj"
const char* getMagic() {
if (ENABLE_PYPA_PARSER)
return "a\ncJ";
else
return "a\ncj";
}
#define MAGIC_STRING_LENGTH 4
#define CHECKSUM_LENGTH 4
enum class ParseResult {
SUCCESS,
FAILURE,
PYC_UNWRITABLE,
};
static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Module*& module) {
FILE* cache_fp = fopen(cache_fn.c_str(), "w");
if (!cache_fp)
return ParseResult::PYC_UNWRITABLE;
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
fwrite(MAGIC_STRING, 1, MAGIC_STRING_LENGTH, cache_fp);
fwrite(getMagic(), 1, MAGIC_STRING_LENGTH, cache_fp);
int checksum_start = ftell(cache_fp);
......@@ -1018,17 +1024,25 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
bytes_written = 0;
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
}
int code = pclose(parser);
assert(code == 0);
if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn);
if (!module)
return ParseResult::FAILURE;
bytes_written += serializeAST(module, cache_fp);
} else {
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
}
int code = pclose(parser);
assert(code == 0);
}
fseek(cache_fp, checksum_start, SEEK_SET);
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
......@@ -1041,11 +1055,9 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
// it's not a huge deal right now, but this caching version can significantly cut down
// on the startup time (40ms -> 10ms).
AST_Module* caching_parse_file(const char* fn) {
static StatCounter us_parsing("us_parsing");
Timer _t("parsing");
if (ENABLE_PYPA_PARSER) {
return pypa_parse(fn);
}
_t.setExitCallback([](long t) { us_parsing.log(t); });
int code;
std::string cache_fn = std::string(fn) + "c";
......@@ -1056,7 +1068,14 @@ AST_Module* caching_parse_file(const char* fn) {
code = stat(cache_fn.c_str(), &cache_stat);
if (code != 0 || cache_stat.st_mtime < source_stat.st_mtime
|| (cache_stat.st_mtime == source_stat.st_mtime && cache_stat.st_mtim.tv_nsec < source_stat.st_mtim.tv_nsec)) {
auto result = _reparse(fn, cache_fn);
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
......@@ -1073,7 +1092,7 @@ AST_Module* caching_parse_file(const char* fn) {
if (good) {
char buf[MAGIC_STRING_LENGTH];
int read = fread(buf, 1, MAGIC_STRING_LENGTH, fp);
if (read != MAGIC_STRING_LENGTH || strncmp(buf, MAGIC_STRING, MAGIC_STRING_LENGTH) != 0) {
if (read != MAGIC_STRING_LENGTH || strncmp(buf, getMagic(), MAGIC_STRING_LENGTH) != 0) {
if (VERBOSITY()) {
printf("Warning: corrupt or non-Pyston .pyc file found; ignoring\n");
}
......@@ -1099,7 +1118,14 @@ AST_Module* caching_parse_file(const char* fn) {
if (!good) {
fclose(fp);
auto result = _reparse(fn, cache_fn);
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
......@@ -1123,10 +1149,6 @@ AST_Module* caching_parse_file(const char* fn) {
assert(rtn->type == AST_TYPE::Module);
long us = _t.end();
static StatCounter us_parsing("us_parsing");
us_parsing.log(us);
return ast_cast<AST_Module>(rtn);
}
}
......@@ -25,6 +25,7 @@
#include <sys/stat.h>
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/SwapByteOrder.h"
#include "core/ast.h"
#include "core/options.h"
......@@ -43,6 +44,7 @@ namespace pyston {
void location(AST* t, pypa::Ast& a) {
t->lineno = a.line;
assert(a.column < 100000);
t->col_offset = a.column;
}
......@@ -635,8 +637,12 @@ struct stmt_dispatcher {
ptr->body = readItem(e.body, interned_strings);
if (e.globals)
ptr->globals = readItem(e.globals, interned_strings);
else
ptr->globals = NULL;
if (e.locals)
ptr->locals = readItem(e.locals, interned_strings);
else
ptr->locals = NULL;
return ptr;
}
......@@ -801,6 +807,7 @@ struct stmt_dispatcher {
AST_Expr* ptr = new AST_Expr();
location(ptr, d);
AST_Str* str = new AST_Str();
location(str, d);
ptr->value = str;
str->str_type = d.unicode ? AST_Str::UNICODE : AST_Str::STR;
str->str_data = d.doc;
......
......@@ -15,6 +15,8 @@
#ifndef PYSTON_CODEGEN_PYPAPARSER_H
#define PYSTON_CODEGEN_PYPAPARSER_H
#include <cstdio>
namespace pyston {
class AST_Module;
AST_Module* pypa_parse(char const* file_path);
......
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "codegen/serialize_ast.h"
#include "llvm/Support/SwapByteOrder.h"
#include "core/ast.h"
namespace pyston {
namespace {
class SerializeASTVisitor : public ASTVisitor {
private:
FILE* file;
public:
static unsigned int write(AST_Module* module, FILE* file) {
SerializeASTVisitor visitor(file);
unsigned long start_pos = ftell(file);
visitor.writeASTMisc(module);
return ftell(file) - start_pos;
}
private:
SerializeASTVisitor(FILE* file) : file(file) {}
virtual ~SerializeASTVisitor() {}
void writeByte(uint8_t v) { fwrite(&v, 1, sizeof(v), file); }
void writeShort(uint16_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
}
void writeUInt(uint32_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
}
void writeULL(uint64_t v) {
v = llvm::sys::getSwappedBytes(v); // TODO: assumes little endian machine
fwrite(&v, 1, sizeof(v), file);
}
void writeDouble(double v) {
union {
double v;
uint64_t u;
} u{.v = v };
writeULL(u.u);
}
void writeString(const std::string& v) {
writeShort(v.size());
fwrite(v.c_str(), 1, v.size(), file);
}
void writeString(const InternedString v) { writeString(v.str()); }
void writeStringVector(const std::vector<InternedString>& vec) {
writeShort(vec.size());
for (auto&& e : vec) {
writeString(e);
}
}
void writeExpr(AST_expr* e) {
if (!e) {
writeByte(0x00);
} else {
writeByte(e->type);
writeByte(0xae); // check byte
e->accept(this);
}
}
void writeExprVector(const std::vector<AST_expr*>& vec) {
writeShort(vec.size());
for (auto* e : vec) {
writeExpr(e);
}
}
void writeStmt(AST_stmt* e) {
writeByte(e->type);
writeByte(0xae); // check byte
e->accept(this);
}
void writeStmtVector(const std::vector<AST_stmt*>& vec) {
writeShort(vec.size());
for (auto* e : vec) {
writeStmt(e);
}
}
void writeColOffset(uint32_t v) {
assert(v < 100000 || v == -1);
writeULL(v == -1 ? 0 : v);
}
void writeLineno(uint64_t v) { writeULL(v); }
void writeASTMisc(AST* e) {
writeByte(e->type);
writeByte(0xae); // check byte
switch (e->type) {
case AST_TYPE::alias:
case AST_TYPE::arguments:
case AST_TYPE::comprehension:
case AST_TYPE::ExceptHandler:
case AST_TYPE::keyword:
case AST_TYPE::Module:
return e->accept(this);
default:
assert(0);
}
}
template <class T> void writeMiscVector(std::vector<T*>& vec) {
writeShort(vec.size());
for (auto&& e : vec) {
writeASTMisc(e);
}
}
virtual bool visit_alias(AST_alias* node) {
writeString(node->asname);
writeString(node->name);
return true;
}
virtual bool visit_arguments(AST_arguments* node) {
writeExprVector(node->args);
writeExprVector(node->defaults);
writeString(node->kwarg);
writeString(node->vararg);
return true;
}
virtual bool visit_assert(AST_Assert* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->msg);
writeExpr(node->test);
return true;
}
virtual bool visit_assign(AST_Assign* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExprVector(node->targets);
writeExpr(node->value);
return true;
}
virtual bool visit_augassign(AST_AugAssign* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeByte(node->op_type);
writeExpr(node->target);
writeExpr(node->value);
return true;
}
virtual bool visit_attribute(AST_Attribute* node) {
writeString(node->attr);
writeColOffset(node->col_offset);
writeByte(node->ctx_type);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
virtual bool visit_binop(AST_BinOp* node) {
writeColOffset(node->col_offset);
writeExpr(node->left);
writeLineno(node->lineno);
writeByte(node->op_type);
writeExpr(node->right);
return true;
}
virtual bool visit_boolop(AST_BoolOp* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeByte(node->op_type);
writeExprVector(node->values);
return true;
}
virtual bool visit_break(AST_Break* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
return true;
}
virtual bool visit_call(AST_Call* node) {
writeExprVector(node->args);
writeColOffset(node->col_offset);
writeExpr(node->func);
writeMiscVector(node->keywords);
writeExpr(node->kwargs);
writeLineno(node->lineno);
writeExpr(node->starargs);
return true;
}
virtual bool visit_compare(AST_Compare* node) {
writeColOffset(node->col_offset);
writeExprVector(node->comparators);
writeExpr(node->left);
writeLineno(node->lineno);
writeShort(node->comparators.size());
for (auto& e : node->ops) {
writeByte(e);
}
return true;
}
virtual bool visit_comprehension(AST_comprehension* node) {
writeExprVector(node->ifs);
writeExpr(node->iter);
writeExpr(node->target);
return true;
}
virtual bool visit_classdef(AST_ClassDef* node) {
writeExprVector(node->bases);
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeExprVector(node->decorator_list);
writeLineno(node->lineno);
writeString(node->name);
return true;
}
virtual bool visit_continue(AST_Continue* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
return true;
}
virtual bool visit_delete(AST_Delete* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExprVector(node->targets);
return true;
}
virtual bool visit_dict(AST_Dict* node) {
writeColOffset(node->col_offset);
writeExprVector(node->keys);
writeLineno(node->lineno);
writeExprVector(node->values);
return true;
}
virtual bool visit_dictcomp(AST_DictComp* node) {
writeColOffset(node->col_offset);
writeMiscVector(node->generators);
writeExpr(node->key);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
virtual bool visit_excepthandler(AST_ExceptHandler* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->name);
writeExpr(node->type);
return true;
}
virtual bool visit_exec(AST_Exec* node) {
writeExpr(node->body);
writeColOffset(node->col_offset);
writeExpr(node->globals);
writeLineno(node->lineno);
writeExpr(node->locals);
return true;
}
virtual bool visit_expr(AST_Expr* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
virtual bool visit_for(AST_For* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeExpr(node->iter);
writeLineno(node->lineno);
writeStmtVector(node->orelse);
writeExpr(node->target);
return true;
}
virtual bool visit_functiondef(AST_FunctionDef* node) {
writeASTMisc(node->args);
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeExprVector(node->decorator_list);
writeLineno(node->lineno);
writeString(node->name);
return true;
}
virtual bool visit_generatorexp(AST_GeneratorExp* node) {
writeColOffset(node->col_offset);
writeExpr(node->elt);
writeMiscVector(node->generators);
writeLineno(node->lineno);
return true;
}
virtual bool visit_global(AST_Global* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeStringVector(node->names);
return true;
}
virtual bool visit_if(AST_If* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeStmtVector(node->orelse);
writeExpr(node->test);
return true;
}
virtual bool visit_ifexp(AST_IfExp* node) {
writeExpr(node->body);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->orelse);
writeExpr(node->test);
return true;
}
virtual bool visit_import(AST_Import* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeMiscVector(node->names);
return true;
}
virtual bool visit_importfrom(AST_ImportFrom* node) {
writeColOffset(node->col_offset);
writeULL(node->level);
writeLineno(node->lineno);
writeString(node->module);
writeMiscVector(node->names);
return true;
}
virtual bool visit_index(AST_Index* node) {
writeExpr(node->value);
return true;
}
virtual bool visit_keyword(AST_keyword* node) {
writeString(node->arg);
writeExpr(node->value);
return true;
}
virtual bool visit_lambda(AST_Lambda* node) {
writeASTMisc(node->args);
writeExpr(node->body);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
return true;
}
virtual bool visit_list(AST_List* node) {
writeColOffset(node->col_offset);
writeByte(node->ctx_type);
writeExprVector(node->elts);
writeLineno(node->lineno);
return true;
}
virtual bool visit_listcomp(AST_ListComp* node) {
writeColOffset(node->col_offset);
writeExpr(node->elt);
writeMiscVector(node->generators);
writeLineno(node->lineno);
return true;
}
virtual bool visit_module(AST_Module* node) {
writeStmtVector(node->body);
return true;
}
virtual bool visit_name(AST_Name* node) {
writeColOffset(node->col_offset);
writeByte(node->ctx_type);
writeString(node->id);
writeLineno(node->lineno);
return true;
}
virtual bool visit_num(AST_Num* node) {
writeByte(node->num_type);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
if (node->num_type == AST_Num::INT) {
writeULL(node->n_int);
} else if (node->num_type == AST_Num::LONG) {
writeString(node->n_long);
} else if (node->num_type == AST_Num::FLOAT) {
writeDouble(node->n_float);
} else if (node->num_type == AST_Num::COMPLEX) {
writeDouble(node->n_float);
} else {
RELEASE_ASSERT(0, "%d", node->num_type);
}
return true;
}
virtual bool visit_pass(AST_Pass* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
return true;
}
virtual bool visit_print(AST_Print* node) {
writeColOffset(node->col_offset);
writeExpr(node->dest);
writeLineno(node->lineno);
writeByte(node->nl);
writeExprVector(node->values);
return true;
}
virtual bool visit_raise(AST_Raise* node) {
// "arg0" "arg1" "arg2" are called "type", "inst", and "tback" in the python ast,
// so that's the order we have to write them:
writeColOffset(node->col_offset);
writeExpr(node->arg1 /*inst*/);
writeLineno(node->lineno);
writeExpr(node->arg2 /*tback*/);
writeExpr(node->arg0 /*type*/);
return true;
}
virtual bool visit_repr(AST_Repr* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
virtual bool visit_return(AST_Return* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
virtual bool visit_set(AST_Set* node) {
writeColOffset(node->col_offset);
writeExprVector(node->elts);
writeLineno(node->lineno);
return true;
}
virtual bool visit_setcomp(AST_SetComp* node) {
writeColOffset(node->col_offset);
writeExpr(node->elt);
writeMiscVector(node->generators);
writeLineno(node->lineno);
return true;
}
virtual bool visit_slice(AST_Slice* node) {
writeExpr(node->lower);
writeExpr(node->step);
writeExpr(node->upper);
return true;
}
virtual bool visit_str(AST_Str* node) {
writeByte(node->str_type);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
if (node->str_type == AST_Str::STR) {
writeString(node->str_data);
} else if (node->str_type == AST_Str::UNICODE) {
writeString(node->str_data);
} else {
RELEASE_ASSERT(0, "%d", node->str_type);
}
return true;
}
virtual bool visit_subscript(AST_Subscript* node) {
writeColOffset(node->col_offset);
writeByte(node->ctx_type);
writeLineno(node->lineno);
writeExpr(node->slice);
writeExpr(node->value);
return true;
}
virtual bool visit_tryexcept(AST_TryExcept* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeMiscVector(node->handlers);
writeLineno(node->lineno);
writeStmtVector(node->orelse);
return true;
}
virtual bool visit_tryfinally(AST_TryFinally* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeStmtVector(node->finalbody);
writeLineno(node->lineno);
return true;
}
virtual bool visit_tuple(AST_Tuple* node) {
writeColOffset(node->col_offset);
writeByte(node->ctx_type);
writeExprVector(node->elts);
writeLineno(node->lineno);
return true;
}
virtual bool visit_unaryop(AST_UnaryOp* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeByte(node->op_type);
writeExpr(node->operand);
return true;
}
virtual bool visit_while(AST_While* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeStmtVector(node->orelse);
writeExpr(node->test);
return true;
}
virtual bool visit_with(AST_With* node) {
writeStmtVector(node->body);
writeColOffset(node->col_offset);
writeExpr(node->context_expr);
writeLineno(node->lineno);
writeExpr(node->optional_vars);
return true;
}
virtual bool visit_yield(AST_Yield* node) {
writeColOffset(node->col_offset);
writeLineno(node->lineno);
writeExpr(node->value);
return true;
}
};
}
unsigned long serializeAST(AST_Module* module, FILE* file) {
return SerializeASTVisitor::write(module, file);
}
}
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_CODEGEN_SERIALIZEAST_H
#define PYSTON_CODEGEN_SERIALIZEAST_H
#include <cstdio>
namespace pyston {
class AST_Module;
unsigned long serializeAST(AST_Module* module, FILE* file);
}
#endif // PYSTON_CODEGEN_SERIALIZEAST_H
......@@ -77,7 +77,11 @@ long Timer::end() {
}
Timer::~Timer() {
end();
if (!ended) {
long t = end();
if (exit_callback)
exit_callback(t);
}
}
bool startswith(const std::string& s, const std::string& pattern) {
......
......@@ -31,11 +31,14 @@ private:
const char* desc;
long min_usec;
bool ended;
std::function<void(long)> exit_callback;
public:
Timer(const char* desc = NULL, long min_usec = -1);
~Timer();
void setExitCallback(std::function<void(long)> _exit_callback) { exit_callback = _exit_callback; }
void restart(const char* newdesc, long new_min_usec);
void restart(const char* newdesc = NULL);
......
......@@ -180,9 +180,9 @@ static int main(int argc, char** argv) {
// if the user invoked `pyston -c command`
if (command != NULL) {
main_module = createModule("__main__", "<string>");
AST_Module* m = parse_string(command);
try {
main_module = createModule("__main__", "<string>");
AST_Module* m = parse_string(command);
compileAndRunModule(m, main_module);
} catch (ExcInfo e) {
int retcode = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment