Commit a5dc1b1b authored by Marius Wachtler's avatar Marius Wachtler Committed by GitHub

Merge pull request #1268 from Daetalus/cpython_exe_parser

Remove the "CPython executable parser"
parents 5f562c46 bfe06380
......@@ -320,9 +320,6 @@ add_executable(pyston $<TARGET_OBJECTS:PYSTON_MAIN_OBJECT> $<TARGET_OBJECTS:PYST
target_link_libraries(pyston -Wl,--whole-archive stdlib -Wl,--no-whole-archive pthread m z readline sqlite3 gmp mpfr ssl crypto unwind pypa liblz4 double-conversion util ${LLVM_LIBS} ${LIBLZMA_LIBRARIES} ${OPTIONAL_LIBRARIES} ${CMAKE_BINARY_DIR}/jemalloc/lib/libjemalloc.a)
add_dependencies(pyston libjemalloc)
# copy src/codegen/parse_ast.py to the build directory
add_custom_command(TARGET pyston POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/src/codegen/parse_ast.py ${CMAKE_BINARY_DIR}/src/codegen/parse_ast.py)
add_custom_target(astcompare COMMAND ${CMAKE_SOURCE_DIR}/tools/astprint_test.sh
DEPENDS astprint
COMMENT "Running libpypa vs CPython AST result comparison test")
......@@ -342,9 +339,9 @@ endmacro()
# tests testname directory arguments
add_pyston_test(defaults tests --order-by-mtime -t50)
add_pyston_test(force_llvm tests -a=-n -a=-X -t90)
add_pyston_test(force_llvm tests -a=-n -a=-x -t90)
if(${CMAKE_BUILD_TYPE} STREQUAL "Release")
add_pyston_test(max_compilation_tier tests -a=-O -a=-X -t50)
add_pyston_test(max_compilation_tier tests -a=-O -a=-x -t50)
endif()
add_pyston_test(defaults cpython --exit-code-only --skip-failing -t100)
add_pyston_test(defaults integration --exit-code-only --skip-failing -t900)
......
......@@ -777,7 +777,7 @@ check$1 test$1: $(PYTHON_EXE_DEPS) pyston$1
@# we pass -I to cpython tests and skip failing ones because they are sloooow otherwise
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-S -k --exit-code-only --skip-failing -t50 $(TEST_DIR)/cpython $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -k -a=-S --exit-code-only --skip-failing -t600 $(TEST_DIR)/integration $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -a=-X -R pyston$1 -j$(TEST_THREADS) -a=-n -a=-S -t50 -k $(TESTS_DIR) $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -a=-x -R pyston$1 -j$(TEST_THREADS) -a=-n -a=-S -t50 -k $(TESTS_DIR) $(ARGS)
$(PYTHON) $(TOOLS_DIR)/tester.py -R pyston$1 -j$(TEST_THREADS) -a=-O -a=-S -k $(TESTS_DIR) $(ARGS)
.PHONY: run$1 dbg$1
......
......@@ -132,7 +132,7 @@ Pyston-specific flags:
<dd>Use a stripped stdlib. When running pyston_dbg, the default is to use a stdlib with full debugging symbols enabled. Passing -r changes this behavior to load a slimmer, stripped stdlib.</dd>
<dt>-x</dt>
<dd>Disable the pypa parser.</dd>
<dd>Enable the pypa parser.</dd>
Standard Python flags:
<dt>-i</dt>
......
import _ast
import struct
import sys
from types import NoneType
def _print_str(s, f):
assert len(s) < 2**32
f.write(struct.pack(">L", len(s)))
f.write(s)
TYPE_MAP = {
_ast.alias: 1,
_ast.arguments: 2,
_ast.Assert: 3,
_ast.Assign: 4,
_ast.Attribute: 5,
_ast.AugAssign: 6,
_ast.BinOp: 7,
_ast.BoolOp: 8,
_ast.Call: 9,
_ast.ClassDef: 10,
_ast.Compare: 11,
_ast.comprehension: 12,
_ast.Delete: 13,
_ast.Dict: 14,
_ast.Exec: 16,
_ast.ExceptHandler: 17,
_ast.ExtSlice: 18,
_ast.Expr: 19,
_ast.For: 20,
_ast.FunctionDef: 21,
_ast.GeneratorExp: 22,
_ast.Global: 23,
_ast.If: 24,
_ast.IfExp: 25,
_ast.Import: 26,
_ast.ImportFrom: 27,
_ast.Index: 28,
_ast.keyword: 29,
_ast.Lambda: 30,
_ast.List: 31,
_ast.ListComp: 32,
_ast.Module: 33,
_ast.Num: 34,
_ast.Name: 35,
_ast.Pass: 37,
_ast.Pow: 38,
_ast.Print: 39,
_ast.Raise: 40,
_ast.Repr: 41,
_ast.Return: 42,
_ast.Slice: 44,
_ast.Str: 45,
_ast.Subscript: 46,
_ast.TryExcept: 47,
_ast.TryFinally: 48,
_ast.Tuple: 49,
_ast.UnaryOp: 50,
_ast.With: 51,
_ast.While: 52,
_ast.Yield: 53,
_ast.Store: 54,
_ast.Load: 55,
_ast.Param: 56,
_ast.Not: 57,
_ast.In: 58,
_ast.Is: 59,
_ast.IsNot: 60,
_ast.Or: 61,
_ast.And: 62,
_ast.Eq: 63,
_ast.NotEq: 64,
_ast.NotIn: 65,
_ast.GtE: 66,
_ast.Gt: 67,
_ast.Mod: 68,
_ast.Add: 69,
_ast.Continue: 70,
_ast.Lt: 71,
_ast.LtE: 72,
_ast.Break: 73,
_ast.Sub: 74,
_ast.Del: 75,
_ast.Mult: 76,
_ast.Div: 77,
_ast.USub: 78,
_ast.BitAnd: 79,
_ast.BitOr: 80,
_ast.BitXor: 81,
_ast.RShift: 82,
_ast.LShift: 83,
_ast.Invert: 84,
_ast.UAdd: 85,
_ast.FloorDiv: 86,
_ast.Ellipsis: 87,
_ast.Expression: 88,
_ast.SetComp: 89,
}
if sys.version_info >= (2,7):
TYPE_MAP[_ast.DictComp] = 15
TYPE_MAP[_ast.Set] = 43
def convert(n, f):
assert n is None or isinstance(n, _ast.AST), repr(n)
type_idx = TYPE_MAP[type(n)] if n else 0
f.write(struct.pack(">B", type_idx))
if n is None:
return
if isinstance(n, (_ast.operator, _ast.expr_context, _ast.boolop, _ast.cmpop, _ast.unaryop)):
return
f.write('\xae')
if isinstance(n, _ast.Num):
if isinstance(n.n, int):
f.write('\x10')
elif isinstance(n.n, long):
f.write('\x30')
elif isinstance(n.n, float):
f.write('\x20')
elif isinstance(n.n, complex):
f.write('\x40')
else:
raise Exception(type(n.n))
if isinstance(n, _ast.Str):
if isinstance(n.s, str):
f.write('\x10')
elif isinstance(n.s, unicode):
f.write('\x20')
else:
raise Exception(type(n.s))
# print >>sys.stderr, n, sorted(n.__dict__.items())
for k, v in sorted(n.__dict__.items()):
if k.startswith('_'):
continue
if k in ("vararg", "kwarg", "asname", "module") and v is None:
v = ""
# elif k in ('col_offset', 'lineno'):
# continue
if isinstance(v, list):
assert len(v) < 2**16
f.write(struct.pack(">H", len(v)))
if isinstance(n, _ast.Global):
assert k == "names"
for el in v:
_print_str(el, f)
else:
for el in v:
convert(el, f)
elif isinstance(v, str):
_print_str(v, f)
elif isinstance(v, unicode):
_print_str(v.encode("utf8"), f)
elif isinstance(v, bool):
f.write(struct.pack("B", v))
elif isinstance(v, int):
f.write(struct.pack(">q", v))
elif isinstance(v, long):
_print_str(str(v), f)
elif isinstance(v, float):
f.write(struct.pack(">d", v))
elif isinstance(v, complex):
# Complex constants can only be pure imaginary
# (e.g., in 1+0j, 1 and 0j are separate literals)
assert v.real == 0.0
f.write(struct.pack(">d", v.imag))
elif v is None or isinstance(v, _ast.AST):
convert(v, f)
else:
raise Exception((n, k, repr(v)))
if __name__ == "__main__":
import time
start = time.time()
fn = sys.argv[1]
s = open(fn).read()
m = compile(s, fn, "exec", _ast.PyCF_ONLY_AST)
convert(m, sys.stdout)
......@@ -998,119 +998,53 @@ AST* readASTMisc(BufferedReader* reader) {
}
}
static std::string getParserCommandLine(const char* fn) {
llvm::SmallString<128> parse_ast_fn;
// TODO supposed to pass argv0, main_addr to this function:
parse_ast_fn = llvm::sys::fs::getMainExecutable(NULL, NULL);
assert(parse_ast_fn.size() && "could not find the path to the pyston src dir");
// Start by removing the binary name, because the "pyston" binary will break the logic below
llvm::sys::path::remove_filename(parse_ast_fn);
llvm::sys::path::append(parse_ast_fn, "src/codegen/parse_ast.py");
// We may be running in an environment where "python" resolves to pyston (ex in
// a virtualenv), so try to hard code the path to CPython.
// This should probably be a configure-time check?
return std::string("/usr/bin/python -S ") + parse_ast_fn.str().str() + " " + fn;
}
AST_Module* parse_string(const char* code, FutureFlags inherited_flags) {
inherited_flags &= ~(CO_NESTED | CO_FUTURE_DIVISION);
if (ENABLE_CPYTHON_PARSER) {
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
const char* fn = "<string>";
mod_ty mod = PyParser_ASTFromString(code, fn, Py_file_input, &cf, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
if (ENABLE_PYPA_PARSER || inherited_flags) {
if (ENABLE_PYPA_PARSER) {
AST_Module* rtn = pypa_parse_string(code, inherited_flags);
RELEASE_ASSERT(rtn, "unknown parse error (possibly: '%s'?)", strerror(errno));
return rtn;
}
RELEASE_ASSERT(!inherited_flags, "the old cpython parser doesn't support specifying initial future flags");
int size = strlen(code);
char buf[] = "pystontmp_XXXXXX";
char* tmpdir = mkdtemp(buf);
assert(tmpdir);
std::string tmp = std::string(tmpdir) + "/in.py";
if (VERBOSITY() >= 3) {
printf("writing %d bytes to %s\n", size, tmp.c_str());
}
{
FileHandle f(tmp.c_str(), "w");
fwrite(code, 1, size, f);
fputc('\n', f);
}
AST_Module* m = parse_file(tmp.c_str(), inherited_flags);
removeDirectoryIfExists(tmpdir);
return m;
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
const char* fn = "<string>";
mod_ty mod = PyParser_ASTFromString(code, fn, Py_file_input, &cf, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
AST_Module* parse_file(const char* fn, FutureFlags inherited_flags) {
Timer _t("parsing");
if (ENABLE_CPYTHON_PARSER) {
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
if (ENABLE_PYPA_PARSER) {
AST_Module* rtn = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(rtn, "unknown parse error (possibly: '%s'?)", strerror(errno));
return rtn;
}
FILE* fp = popen(getParserCommandLine(fn).c_str(), "r");
BufferedReader* reader = new BufferedReader(fp);
AST* rtn = readASTMisc(reader);
reader->fill();
ASSERT(reader->bytesBuffered() == 0, "%d", reader->bytesBuffered());
delete reader;
int code = pclose(fp);
assert(code == 0);
assert(rtn->type == AST_TYPE::Module);
long us = _t.end();
static StatCounter us_parsing("us_parsing");
us_parsing.log(us);
return ast_cast<AST_Module>(rtn);
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
auto rtn = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
return rtn;
}
const char* getMagic() {
if (ENABLE_CPYTHON_PARSER)
return "a\nCQ";
else if (ENABLE_PYPA_PARSER)
if (ENABLE_PYPA_PARSER)
return "a\ncQ";
else
return "a\ncq";
return "a\nCQ";
}
#define MAGIC_STRING_LENGTH 4
......@@ -1150,51 +1084,29 @@ static std::vector<char> _reparse(const char* fn, const std::string& cache_fn, A
file_data.insert(file_data.end(), (char*)&checksum, (char*)&checksum + CHECKSUM_LENGTH);
checksum = 0;
if (ENABLE_CPYTHON_PARSER || ENABLE_PYPA_PARSER || inherited_flags) {
if (ENABLE_CPYTHON_PARSER) {
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
module = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
} else {
module = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(module, "unknown parse error");
}
if (!cache_fp)
return std::vector<char>();
auto p = serializeAST(module, cache_fp);
checksum = p.second;
bytes_written += p.first;
if (ENABLE_PYPA_PARSER) {
module = pypa_parse(fn, inherited_flags);
RELEASE_ASSERT(module, "unknown parse error");
} else {
RELEASE_ASSERT(!inherited_flags, "the old cpython parser doesn't support specifying initial future flags");
FILE* parser = popen(getParserCommandLine(fn).c_str(), "r");
char buf[80];
while (true) {
int nread = fread(buf, 1, 80, parser);
if (nread == 0)
break;
bytes_written += nread;
if (cache_fp)
fwrite(buf, 1, nread, cache_fp);
file_data.insert(file_data.end(), buf, buf + nread);
for (int i = 0; i < nread; i++) {
checksum ^= buf[i];
}
}
int code = pclose(parser);
assert(code == 0);
FileHandle fp(fn, "r");
PyCompilerFlags cf;
cf.cf_flags = inherited_flags;
ArenaWrapper arena;
assert(arena);
mod_ty mod = PyParser_ASTFromFile(fp, fn, Py_file_input, 0, 0, &cf, NULL, arena);
if (!mod)
throwCAPIException();
assert(mod->kind != Interactive_kind);
module = static_cast<AST_Module*>(cpythonToPystonAST(mod, fn));
}
if (!cache_fp)
return std::vector<char>();
auto p = serializeAST(module, cache_fp);
checksum = p.second;
bytes_written += p.first;
fseek(cache_fp, checksum_start, SEEK_SET);
if (cache_fp)
fwrite(&bytes_written, 1, LENGTH_LENGTH, cache_fp);
......
......@@ -38,8 +38,7 @@ bool PROFILE = false;
bool DUMPJIT = false;
bool TRAP = false;
bool USE_STRIPPED_STDLIB = true; // always true
bool ENABLE_PYPA_PARSER = true;
bool ENABLE_CPYTHON_PARSER = true;
bool ENABLE_PYPA_PARSER = false;
bool USE_REGALLOC_BASIC = false;
bool PAUSE_AT_ABORT = false;
bool ENABLE_TRACEBACKS = true;
......
......@@ -36,8 +36,8 @@ extern int SPECULATION_THRESHOLD;
extern int MAX_OBJECT_CACHE_ENTRIES;
extern bool SHOW_DISASM, FORCE_INTERPRETER, FORCE_OPTIMIZE, PROFILE, DUMPJIT, TRAP, USE_STRIPPED_STDLIB,
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_BASELINEJIT, ENABLE_PYPA_PARSER, ENABLE_CPYTHON_PARSER,
USE_REGALLOC_BASIC, PAUSE_AT_ABORT, ENABLE_TRACEBACKS, FORCE_LLVM_CAPI_CALLS, FORCE_LLVM_CAPI_THROWS;
CONTINUE_AFTER_FATAL, ENABLE_INTERPRETER, ENABLE_BASELINEJIT, ENABLE_PYPA_PARSER, USE_REGALLOC_BASIC,
PAUSE_AT_ABORT, ENABLE_TRACEBACKS, FORCE_LLVM_CAPI_CALLS, FORCE_LLVM_CAPI_THROWS;
extern bool LOG_IC_ASSEMBLY, LOG_BJIT_ASSEMBLY;
......
......@@ -220,9 +220,7 @@ int handleArg(char code) {
} else if (code == 'b') {
USE_REGALLOC_BASIC = false;
} else if (code == 'x') {
ENABLE_PYPA_PARSER = false;
} else if (code == 'X') {
ENABLE_CPYTHON_PARSER = false;
ENABLE_PYPA_PARSER = true;
} else if (code == 'E') {
Py_IgnoreEnvironmentFlag = 1;
} else if (code == 'P') {
......
# fail-if: '-x' in EXTRA_JIT_ARGS
# - we don't get syntax errors through the old parser correctly
try:
exec ";"
print "worked?"
except SyntaxError:
pass
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment