Commit 5e3d338a authored by Eli Bendersky's avatar Eli Bendersky

Issue #19655: Replace the ASDL parser carried with CPython

The new parser does not rely on Spark (which is now removed from our repo),
uses modern 3.x idioms and is significantly smaller and simpler.

It generates exactly the same AST files (.h and .c), so in practice no builds
should be affected.
parent 732ac654
...@@ -57,6 +57,11 @@ Core and Builtins ...@@ -57,6 +57,11 @@ Core and Builtins
- Issue #19995: %c, %o, %x, and %X now raise TypeError on non-integer input. - Issue #19995: %c, %o, %x, and %X now raise TypeError on non-integer input.
- Issue #19655: The ASDL parser - used by the build process to generate code for
managing the Python AST in C - was rewritten. The new parser is self contained
and does not require to carry long the spark.py parser-generator library;
spark.py was removed from the source base.
- Issue #12546: Allow \x00 to be used as a fill character when using str, int, - Issue #12546: Allow \x00 to be used as a fill character when using str, int,
float, and complex __format__ methods. float, and complex __format__ methods.
......
This diff is collapsed.
#! /usr/bin/env python #! /usr/bin/env python
"""Generate C code from an ASDL description.""" """Generate C code from an ASDL description."""
# TO DO
# handle fields that have a type but no name
import os, sys import os, sys
import asdl import asdl
...@@ -14,12 +11,8 @@ MAX_COL = 80 ...@@ -14,12 +11,8 @@ MAX_COL = 80
def get_c_type(name): def get_c_type(name):
"""Return a string for the C name of the type. """Return a string for the C name of the type.
This function special cases the default types provided by asdl: This function special cases the default types provided by asdl.
identifier, string, int.
""" """
# XXX ack! need to figure out where Id is useful and where string
if isinstance(name, asdl.Id):
name = name.value
if name in asdl.builtin_types: if name in asdl.builtin_types:
return name return name
else: else:
...@@ -144,7 +137,7 @@ class TypeDefVisitor(EmitVisitor): ...@@ -144,7 +137,7 @@ class TypeDefVisitor(EmitVisitor):
class StructVisitor(EmitVisitor): class StructVisitor(EmitVisitor):
"""Visitor to generate typdefs for AST.""" """Visitor to generate typedefs for AST."""
def visitModule(self, mod): def visitModule(self, mod):
for dfn in mod.dfns: for dfn in mod.dfns:
...@@ -188,9 +181,6 @@ class StructVisitor(EmitVisitor): ...@@ -188,9 +181,6 @@ class StructVisitor(EmitVisitor):
self.visit(f, depth + 1) self.visit(f, depth + 1)
self.emit("} %s;" % cons.name, depth) self.emit("} %s;" % cons.name, depth)
self.emit("", depth) self.emit("", depth)
else:
# XXX not sure what I want here, nothing is probably fine
pass
def visitField(self, field, depth): def visitField(self, field, depth):
# XXX need to lookup field.type, because it might be something # XXX need to lookup field.type, because it might be something
...@@ -198,7 +188,7 @@ class StructVisitor(EmitVisitor): ...@@ -198,7 +188,7 @@ class StructVisitor(EmitVisitor):
ctype = get_c_type(field.type) ctype = get_c_type(field.type)
name = field.name name = field.name
if field.seq: if field.seq:
if field.type.value in ('cmpop',): if field.type == 'cmpop':
self.emit("asdl_int_seq *%(name)s;" % locals(), depth) self.emit("asdl_int_seq *%(name)s;" % locals(), depth)
else: else:
self.emit("asdl_seq *%(name)s;" % locals(), depth) self.emit("asdl_seq *%(name)s;" % locals(), depth)
...@@ -253,7 +243,7 @@ class PrototypeVisitor(EmitVisitor): ...@@ -253,7 +243,7 @@ class PrototypeVisitor(EmitVisitor):
name = f.name name = f.name
# XXX should extend get_c_type() to handle this # XXX should extend get_c_type() to handle this
if f.seq: if f.seq:
if f.type.value in ('cmpop',): if f.type == 'cmpop':
ctype = "asdl_int_seq *" ctype = "asdl_int_seq *"
else: else:
ctype = "asdl_seq *" ctype = "asdl_seq *"
...@@ -437,7 +427,7 @@ class Obj2ModVisitor(PickleVisitor): ...@@ -437,7 +427,7 @@ class Obj2ModVisitor(PickleVisitor):
self.emit("", 0) self.emit("", 0)
for f in t.fields: for f in t.fields:
self.visitField(f, t.name, sum=sum, depth=2) self.visitField(f, t.name, sum=sum, depth=2)
args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes] args = [f.name for f in t.fields] + [a.name for a in sum.attributes]
self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2) self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2)
self.emit("if (*out == NULL) goto failed;", 2) self.emit("if (*out == NULL) goto failed;", 2)
self.emit("return 0;", 2) self.emit("return 0;", 2)
...@@ -465,7 +455,7 @@ class Obj2ModVisitor(PickleVisitor): ...@@ -465,7 +455,7 @@ class Obj2ModVisitor(PickleVisitor):
self.emit("", 0) self.emit("", 0)
for f in prod.fields: for f in prod.fields:
self.visitField(f, name, prod=prod, depth=1) self.visitField(f, name, prod=prod, depth=1)
args = [f.name.value for f in prod.fields] args = [f.name for f in prod.fields]
self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1) self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1)
self.emit("return 0;", 1) self.emit("return 0;", 1)
self.emit("failed:", 0) self.emit("failed:", 0)
...@@ -487,7 +477,7 @@ class Obj2ModVisitor(PickleVisitor): ...@@ -487,7 +477,7 @@ class Obj2ModVisitor(PickleVisitor):
def isSimpleSum(self, field): def isSimpleSum(self, field):
# XXX can the members of this list be determined automatically? # XXX can the members of this list be determined automatically?
return field.type.value in ('expr_context', 'boolop', 'operator', return field.type in ('expr_context', 'boolop', 'operator',
'unaryop', 'cmpop') 'unaryop', 'cmpop')
def isNumeric(self, field): def isNumeric(self, field):
...@@ -960,7 +950,7 @@ static int exists_not_none(PyObject *obj, _Py_Identifier *id) ...@@ -960,7 +950,7 @@ static int exists_not_none(PyObject *obj, _Py_Identifier *id)
def visitProduct(self, prod, name): def visitProduct(self, prod, name):
if prod.fields: if prod.fields:
fields = name.value+"_fields" fields = name+"_fields"
else: else:
fields = "NULL" fields = "NULL"
self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' % self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' %
...@@ -987,7 +977,7 @@ static int exists_not_none(PyObject *obj, _Py_Identifier *id) ...@@ -987,7 +977,7 @@ static int exists_not_none(PyObject *obj, _Py_Identifier *id)
def visitConstructor(self, cons, name, simple): def visitConstructor(self, cons, name, simple):
if cons.fields: if cons.fields:
fields = cons.name.value+"_fields" fields = cons.name+"_fields"
else: else:
fields = "NULL" fields = "NULL"
self.emit('%s_type = make_type("%s", %s_type, %s, %d);' % self.emit('%s_type = make_type("%s", %s_type, %s, %d);' %
...@@ -1170,7 +1160,7 @@ class ObjVisitor(PickleVisitor): ...@@ -1170,7 +1160,7 @@ class ObjVisitor(PickleVisitor):
def set(self, field, value, depth): def set(self, field, value, depth):
if field.seq: if field.seq:
# XXX should really check for is_simple, but that requires a symbol table # XXX should really check for is_simple, but that requires a symbol table
if field.type.value == "cmpop": if field.type == "cmpop":
# While the sequence elements are stored as void*, # While the sequence elements are stored as void*,
# ast2obj_cmpop expects an enum # ast2obj_cmpop expects an enum
self.emit("{", depth) self.emit("{", depth)
...@@ -1249,12 +1239,15 @@ class ChainOfVisitors: ...@@ -1249,12 +1239,15 @@ class ChainOfVisitors:
common_msg = "/* File automatically generated by %s. */\n\n" common_msg = "/* File automatically generated by %s. */\n\n"
def main(srcfile): def main(srcfile, dump_module=False):
argv0 = sys.argv[0] argv0 = sys.argv[0]
components = argv0.split(os.sep) components = argv0.split(os.sep)
argv0 = os.sep.join(components[-2:]) argv0 = os.sep.join(components[-2:])
auto_gen_msg = common_msg % argv0 auto_gen_msg = common_msg % argv0
mod = asdl.parse(srcfile) mod = asdl.parse(srcfile)
if dump_module:
print('Parsed Module:')
print(mod)
if not asdl.check(mod): if not asdl.check(mod):
sys.exit(1) sys.exit(1)
if INC_DIR: if INC_DIR:
...@@ -1301,16 +1294,19 @@ if __name__ == "__main__": ...@@ -1301,16 +1294,19 @@ if __name__ == "__main__":
INC_DIR = '' INC_DIR = ''
SRC_DIR = '' SRC_DIR = ''
opts, args = getopt.getopt(sys.argv[1:], "h:c:") dump_module = False
if len(opts) != 1: opts, args = getopt.getopt(sys.argv[1:], "dh:c:")
sys.stdout.write("Must specify exactly one output file\n")
sys.exit(1)
for o, v in opts: for o, v in opts:
if o == '-h': if o == '-h':
INC_DIR = v INC_DIR = v
if o == '-c': if o == '-c':
SRC_DIR = v SRC_DIR = v
if len(args) != 1: if o == '-d':
sys.stdout.write("Must specify single input file\n") dump_module = True
if INC_DIR and SRC_DIR:
print('Must specify exactly one output file')
sys.exit(1)
elif len(args) != 1:
print('Must specify single input file')
sys.exit(1) sys.exit(1)
main(args[0]) main(args[0], dump_module)
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment