Commit 9dbdf752 authored by Marius Wachtler's avatar Marius Wachtler

Cache pypa parsed AST to disk

before 'import pip' -I took about 2.5sec on the second run,
with this change it takes about 1.5sec
parent 0583e2e4
......@@ -25,6 +25,7 @@
#include "llvm/Support/Path.h"
#include "codegen/pypa-parser.h"
#include "codegen/serialize_ast.h"
#include "core/ast.h"
#include "core/options.h"
#include "core/stats.h"
......@@ -991,22 +992,27 @@ AST_Module* parse_file(const char* fn) {
return ast_cast<AST_Module>(rtn);
}
#define MAGIC_STRING "a\ncj"
const char* getMagic() {
if (ENABLE_PYPA_PARSER)
return "a\ncJ";
else
return "a\ncj";
}
#define MAGIC_STRING_LENGTH 4
#define CHECKSUM_LENGTH 4
enum class ParseResult {
SUCCESS,
FAILURE,
PYC_UNWRITABLE,
};
static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
static ParseResult _reparse(const char* fn, const std::string& cache_fn, AST_Module*& module) {
FILE* cache_fp = fopen(cache_fn.c_str(), "w");
if (!cache_fp)
return ParseResult::PYC_UNWRITABLE;
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
fwrite(MAGIC_STRING, 1, MAGIC_STRING_LENGTH, cache_fp);
fwrite(getMagic(), 1, MAGIC_STRING_LENGTH, cache_fp);
int checksum_start = ftell(cache_fp);
......@@ -1016,17 +1022,25 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
bytes_written = 0;
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
}
int code = pclose(parser);
assert(code == 0);
if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn);
if (!module)
return ParseResult::FAILURE;
bytes_written += serializeAST(module, cache_fp);
} else {
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
fwrite(buf, 1, nread, cache_fp);
}
int code = pclose(parser);
assert(code == 0);
}
fseek(cache_fp, checksum_start, SEEK_SET);
fwrite(&bytes_written, 1, CHECKSUM_LENGTH, cache_fp);
......@@ -1039,11 +1053,9 @@ static ParseResult _reparse(const char* fn, const std::string& cache_fn) {
// it's not a huge deal right now, but this caching version can significantly cut down
// on the startup time (40ms -> 10ms).
AST_Module* caching_parse_file(const char* fn) {
static StatCounter us_parsing("us_parsing");
Timer _t("parsing");
if (ENABLE_PYPA_PARSER) {
return pypa_parse(fn);
}
_t.setExitCallback([](long t) { us_parsing.log(t); });
int code;
std::string cache_fn = std::string(fn) + "c";
......@@ -1054,7 +1066,14 @@ AST_Module* caching_parse_file(const char* fn) {
code = stat(cache_fn.c_str(), &cache_stat);
if (code != 0 || cache_stat.st_mtime < source_stat.st_mtime
|| (cache_stat.st_mtime == source_stat.st_mtime && cache_stat.st_mtim.tv_nsec < source_stat.st_mtim.tv_nsec)) {
auto result = _reparse(fn, cache_fn);
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
......@@ -1071,7 +1090,7 @@ AST_Module* caching_parse_file(const char* fn) {
if (good) {
char buf[MAGIC_STRING_LENGTH];
int read = fread(buf, 1, MAGIC_STRING_LENGTH, fp);
if (read != MAGIC_STRING_LENGTH || strncmp(buf, MAGIC_STRING, MAGIC_STRING_LENGTH) != 0) {
if (read != MAGIC_STRING_LENGTH || strncmp(buf, getMagic(), MAGIC_STRING_LENGTH) != 0) {
if (VERBOSITY()) {
printf("Warning: corrupt or non-Pyston .pyc file found; ignoring\n");
}
......@@ -1097,7 +1116,14 @@ AST_Module* caching_parse_file(const char* fn) {
if (!good) {
fclose(fp);
auto result = _reparse(fn, cache_fn);
AST_Module* mod = 0;
auto result = _reparse(fn, cache_fn, mod);
if (mod)
return mod;
if (result == ParseResult::FAILURE)
return NULL;
if (result == ParseResult::PYC_UNWRITABLE)
return parse_file(fn);
......@@ -1121,10 +1147,6 @@ AST_Module* caching_parse_file(const char* fn) {
assert(rtn->type == AST_TYPE::Module);
long us = _t.end();
static StatCounter us_parsing("us_parsing");
us_parsing.log(us);
return ast_cast<AST_Module>(rtn);
}
}
......@@ -25,6 +25,7 @@
#include <sys/stat.h>
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/SwapByteOrder.h"
#include "core/ast.h"
#include "core/options.h"
......@@ -43,6 +44,7 @@ namespace pyston {
void location(AST* t, pypa::Ast& a) {
t->lineno = a.line;
assert(a.column < 100000);
t->col_offset = a.column;
}
......@@ -635,8 +637,12 @@ struct stmt_dispatcher {
ptr->body = readItem(e.body, interned_strings);
if (e.globals)
ptr->globals = readItem(e.globals, interned_strings);
else
ptr->globals = NULL;
if (e.locals)
ptr->locals = readItem(e.locals, interned_strings);
else
ptr->locals = NULL;
return ptr;
}
......@@ -801,6 +807,7 @@ struct stmt_dispatcher {
AST_Expr* ptr = new AST_Expr();
location(ptr, d);
AST_Str* str = new AST_Str();
location(str, d);
ptr->value = str;
str->str_type = d.unicode ? AST_Str::UNICODE : AST_Str::STR;
str->str_data = d.doc;
......
......@@ -15,6 +15,8 @@
#ifndef PYSTON_CODEGEN_PYPAPARSER_H
#define PYSTON_CODEGEN_PYPAPARSER_H
#include <cstdio>
namespace pyston {
class AST_Module;
AST_Module* pypa_parse(char const* file_path);
......
This diff is collapsed.
// Copyright (c) 2014-2015 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PYSTON_CODEGEN_SERIALIZEAST_H
#define PYSTON_CODEGEN_SERIALIZEAST_H
#include <cstdio>
namespace pyston {
class AST_Module;
unsigned long serializeAST(AST_Module* module, FILE* file);
}
#endif // PYSTON_CODEGEN_SERIALIZEAST_H
......@@ -77,7 +77,11 @@ long Timer::end() {
}
Timer::~Timer() {
end();
if (!ended) {
long t = end();
if (exit_callback)
exit_callback(t);
}
}
bool startswith(const std::string& s, const std::string& pattern) {
......
......@@ -31,11 +31,14 @@ private:
const char* desc;
long min_usec;
bool ended;
std::function<void(long)> exit_callback;
public:
Timer(const char* desc = NULL, long min_usec = -1);
~Timer();
void setExitCallback(std::function<void(long)> _exit_callback) { exit_callback = _exit_callback; }
void restart(const char* newdesc, long new_min_usec);
void restart(const char* newdesc = NULL);
......
......@@ -180,9 +180,9 @@ static int main(int argc, char** argv) {
// if the user invoked `pyston -c command`
if (command != NULL) {
main_module = createModule("__main__", "<string>");
AST_Module* m = parse_string(command);
try {
main_module = createModule("__main__", "<string>");
AST_Module* m = parse_string(command);
compileAndRunModule(m, main_module);
} catch (ExcInfo e) {
int retcode = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment