Commit b3c569ce authored by Jeremy Hylton's avatar Jeremy Hylton

The compiler package is now part of the standard library.

Remove all these files.  All except astgen.py are moved to Lib/compiler.
parent f6cc07cf
"""Package for parsing and compiling Python source code
There are several functions defined at the top level that are imported
from modules contained in the package.
parse(buf, mode="exec") -> AST
Converts a string containing Python source code to an abstract
syntax tree (AST). The AST is defined in compiler.ast.
parseFile(path) -> AST
The same as parse(open(path))
walk(ast, visitor, verbose=None)
Does a pre-order walk over the ast using the visitor instance.
See compiler.visitor for details.
compile(source, filename, mode, flags=None, dont_inherit=None)
Returns a code object. A replacement for the builtin compile() function.
compileFile(filename)
Generates a .pyc file by compilining filename.
"""
from transformer import parse, parseFile
from visitor import walk
from pycodegen import compile, compileFile
This diff is collapsed.
"""Generate ast module from specification
This script generates the ast module from a simple specification,
which makes it easy to accomodate changes in the grammar. This
approach would be quite reasonable if the grammar changed often.
Instead, it is rather complex to generate the appropriate code. And
the Node interface has changed more often than the grammar.
"""
import fileinput
import getopt
import re
import sys
from StringIO import StringIO
SPEC = "ast.txt"
COMMA = ", "
def load_boilerplate(file):
f = open(file)
buf = f.read()
f.close()
i = buf.find('### ''PROLOGUE')
j = buf.find('### ''EPILOGUE')
pro = buf[i+12:j].strip()
epi = buf[j+12:].strip()
return pro, epi
def strip_default(arg):
"""Return the argname from an 'arg = default' string"""
i = arg.find('=')
if i == -1:
return arg
t = arg[:i].strip()
return t
P_NODE = 1
P_OTHER = 2
P_NESTED = 3
P_NONE = 4
class NodeInfo:
"""Each instance describes a specific AST node"""
def __init__(self, name, args):
self.name = name
self.args = args.strip()
self.argnames = self.get_argnames()
self.argprops = self.get_argprops()
self.nargs = len(self.argnames)
self.init = []
def get_argnames(self):
if '(' in self.args:
i = self.args.find('(')
j = self.args.rfind(')')
args = self.args[i+1:j]
else:
args = self.args
return [strip_default(arg.strip())
for arg in args.split(',') if arg]
def get_argprops(self):
"""Each argument can have a property like '*' or '!'
XXX This method modifies the argnames in place!
"""
d = {}
hardest_arg = P_NODE
for i in range(len(self.argnames)):
arg = self.argnames[i]
if arg.endswith('*'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_OTHER
hardest_arg = max(hardest_arg, P_OTHER)
elif arg.endswith('!'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_NESTED
hardest_arg = max(hardest_arg, P_NESTED)
elif arg.endswith('&'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_NONE
hardest_arg = max(hardest_arg, P_NONE)
else:
d[arg] = P_NODE
self.hardest_arg = hardest_arg
if hardest_arg > P_NODE:
self.args = self.args.replace('*', '')
self.args = self.args.replace('!', '')
self.args = self.args.replace('&', '')
return d
def gen_source(self):
buf = StringIO()
print >> buf, "class %s(Node):" % self.name
print >> buf, ' nodes["%s"] = "%s"' % (self.name.lower(), self.name)
self._gen_init(buf)
print >> buf
self._gen_getChildren(buf)
print >> buf
self._gen_getChildNodes(buf)
print >> buf
self._gen_repr(buf)
buf.seek(0, 0)
return buf.read()
def _gen_init(self, buf):
print >> buf, " def __init__(self, %s):" % self.args
if self.argnames:
for name in self.argnames:
print >> buf, " self.%s = %s" % (name, name)
else:
print >> buf, " pass"
if self.init:
print >> buf, "".join([" " + line for line in self.init])
def _gen_getChildren(self, buf):
print >> buf, " def getChildren(self):"
if len(self.argnames) == 0:
print >> buf, " return ()"
else:
if self.hardest_arg < P_NESTED:
clist = COMMA.join(["self.%s" % c
for c in self.argnames])
if self.nargs == 1:
print >> buf, " return %s," % clist
else:
print >> buf, " return %s" % clist
else:
print >> buf, " children = []"
template = " children.%s(%sself.%s%s)"
for name in self.argnames:
if self.argprops[name] == P_NESTED:
print >> buf, template % ("extend", "flatten(",
name, ")")
else:
print >> buf, template % ("append", "", name, "")
print >> buf, " return tuple(children)"
def _gen_getChildNodes(self, buf):
print >> buf, " def getChildNodes(self):"
if len(self.argnames) == 0:
print >> buf, " return ()"
else:
if self.hardest_arg < P_NESTED:
clist = ["self.%s" % c
for c in self.argnames
if self.argprops[c] == P_NODE]
if len(clist) == 0:
print >> buf, " return ()"
elif len(clist) == 1:
print >> buf, " return %s," % clist[0]
else:
print >> buf, " return %s" % COMMA.join(clist)
else:
print >> buf, " nodes = []"
template = " nodes.%s(%sself.%s%s)"
for name in self.argnames:
if self.argprops[name] == P_NONE:
tmp = (" if self.%s is not None:"
" nodes.append(self.%s)")
print >> buf, tmp % (name, name)
elif self.argprops[name] == P_NESTED:
print >> buf, template % ("extend", "flatten_nodes(",
name, ")")
elif self.argprops[name] == P_NODE:
print >> buf, template % ("append", "", name, "")
print >> buf, " return tuple(nodes)"
def _gen_repr(self, buf):
print >> buf, " def __repr__(self):"
if self.argnames:
fmt = COMMA.join(["%s"] * self.nargs)
if '(' in self.args:
fmt = '(%s)' % fmt
vals = ["repr(self.%s)" % name for name in self.argnames]
vals = COMMA.join(vals)
if self.nargs == 1:
vals = vals + ","
print >> buf, ' return "%s(%s)" %% (%s)' % \
(self.name, fmt, vals)
else:
print >> buf, ' return "%s()"' % self.name
rx_init = re.compile('init\((.*)\):')
def parse_spec(file):
classes = {}
cur = None
for line in fileinput.input(file):
if line.strip().startswith('#'):
continue
mo = rx_init.search(line)
if mo is None:
if cur is None:
# a normal entry
try:
name, args = line.split(':')
except ValueError:
continue
classes[name] = NodeInfo(name, args)
cur = None
else:
# some code for the __init__ method
cur.init.append(line)
else:
# some extra code for a Node's __init__ method
name = mo.group(1)
cur = classes[name]
return classes.values()
def main():
prologue, epilogue = load_boilerplate(sys.argv[-1])
print prologue
print
classes = parse_spec(SPEC)
for info in classes:
print info.gen_source()
print epilogue
if __name__ == "__main__":
main()
sys.exit(0)
### PROLOGUE
"""Python abstract syntax node definitions
This file is automatically generated.
"""
from types import TupleType, ListType
from consts import CO_VARARGS, CO_VARKEYWORDS
def flatten(list):
l = []
for elt in list:
t = type(elt)
if t is TupleType or t is ListType:
for elt2 in flatten(elt):
l.append(elt2)
else:
l.append(elt)
return l
def flatten_nodes(list):
return [n for n in flatten(list) if isinstance(n, Node)]
def asList(nodes):
l = []
for item in nodes:
if hasattr(item, "asList"):
l.append(item.asList())
else:
t = type(item)
if t is TupleType or t is ListType:
l.append(tuple(asList(item)))
else:
l.append(item)
return l
nodes = {}
class Node: # an abstract base class
lineno = None # provide a lineno for nodes that don't have one
def getType(self):
pass # implemented by subclass
def getChildren(self):
pass # implemented by subclasses
def asList(self):
return tuple(asList(self.getChildren()))
def getChildNodes(self):
pass # implemented by subclasses
class EmptyNode(Node):
pass
### EPILOGUE
klasses = globals()
for k in nodes.keys():
nodes[k] = klasses[nodes[k]]
# operation flags
OP_ASSIGN = 'OP_ASSIGN'
OP_DELETE = 'OP_DELETE'
OP_APPLY = 'OP_APPLY'
SC_LOCAL = 1
SC_GLOBAL = 2
SC_FREE = 3
SC_CELL = 4
SC_UNKNOWN = 5
CO_OPTIMIZED = 0x0001
CO_NEWLOCALS = 0x0002
CO_VARARGS = 0x0004
CO_VARKEYWORDS = 0x0008
CO_NESTED = 0x0010
CO_GENERATOR = 0x0020
CO_GENERATOR_ALLOWED = 0x1000
CO_FUTURE_DIVISION = 0x2000
"""Parser for future statements
"""
from compiler import ast, walk
def is_future(stmt):
"""Return true if statement is a well-formed future statement"""
if not isinstance(stmt, ast.From):
return 0
if stmt.modname == "__future__":
return 1
else:
return 0
class FutureParser:
features = ("nested_scopes", "generators", "division")
def __init__(self):
self.found = {} # set
def visitModule(self, node):
stmt = node.node
for s in stmt.nodes:
if not self.check_stmt(s):
break
def check_stmt(self, stmt):
if is_future(stmt):
for name, asname in stmt.names:
if name in self.features:
self.found[name] = 1
else:
raise SyntaxError, \
"future feature %s is not defined" % name
stmt.valid_future = 1
return 1
return 0
def get_features(self):
"""Return list of features enabled by future statements"""
return self.found.keys()
class BadFutureParser:
"""Check for invalid future statements"""
def visitFrom(self, node):
if hasattr(node, 'valid_future'):
return
if node.modname != "__future__":
return
raise SyntaxError, "invalid future statement"
def find_futures(node):
p1 = FutureParser()
p2 = BadFutureParser()
walk(node, p1)
walk(node, p2)
return p1.get_features()
if __name__ == "__main__":
import sys
from compiler import parseFile, walk
for file in sys.argv[1:]:
print file
tree = parseFile(file)
v = FutureParser()
walk(tree, v)
print v.found
print
import types
def flatten(tup):
elts = []
for elt in tup:
if type(elt) == types.TupleType:
elts = elts + flatten(elt)
else:
elts.append(elt)
return elts
class Set:
def __init__(self):
self.elts = {}
def __len__(self):
return len(self.elts)
def __contains__(self, elt):
return self.elts.has_key(elt)
def add(self, elt):
self.elts[elt] = elt
def elements(self):
return self.elts.keys()
def has_elt(self, elt):
return self.elts.has_key(elt)
def remove(self, elt):
del self.elts[elt]
def copy(self):
c = Set()
c.elts.update(self.elts)
return c
class Stack:
def __init__(self):
self.stack = []
self.pop = self.stack.pop
def __len__(self):
return len(self.stack)
def push(self, elt):
self.stack.append(elt)
def top(self):
return self.stack[-1]
def __getitem__(self, index): # needed by visitContinue()
return self.stack[index]
MANGLE_LEN = 256 # magic constant from compile.c
def mangle(name, klass):
if not name.startswith('__'):
return name
if len(name) + 2 >= MANGLE_LEN:
return name
if name.endswith('__'):
return name
try:
i = 0
while klass[i] == '_':
i = i + 1
except IndexError:
return name
klass = klass[i:]
tlen = len(klass) + len(name)
if tlen > MANGLE_LEN:
klass = klass[:MANGLE_LEN-tlen]
return "_%s%s" % (klass, name)
def set_filename(filename, tree):
"""Set the filename attribute to filename on every node in tree"""
worklist = [tree]
while worklist:
node = worklist.pop(0)
node.filename = filename
worklist.extend(node.getChildNodes())
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
"""Check for errs in the AST.
The Python parser does not catch all syntax errors. Others, like
assignments with invalid targets, are caught in the code generation
phase.
The compiler package catches some errors in the transformer module.
But it seems clearer to write checkers that use the AST to detect
errors.
"""
from compiler import ast, walk
def check(tree, multi=None):
v = SyntaxErrorChecker(multi)
walk(tree, v)
return v.errors
class SyntaxErrorChecker:
"""A visitor to find syntax errors in the AST."""
def __init__(self, multi=None):
"""Create new visitor object.
If optional argument multi is not None, then print messages
for each error rather than raising a SyntaxError for the
first.
"""
self.multi = multi
self.errors = 0
def error(self, node, msg):
self.errors = self.errors + 1
if self.multi is not None:
print "%s:%s: %s" % (node.filename, node.lineno, msg)
else:
raise SyntaxError, "%s (%s:%s)" % (msg, node.filename, node.lineno)
def visitAssign(self, node):
# the transformer module handles many of these
for target in node.nodes:
pass
## if isinstance(target, ast.AssList):
## if target.lineno is None:
## target.lineno = node.lineno
## self.error(target, "can't assign to list comprehension")
This diff is collapsed.
from compiler import ast
# XXX should probably rename ASTVisitor to ASTWalker
# XXX can it be made even more generic?
class ASTVisitor:
"""Performs a depth-first walk of the AST
The ASTVisitor will walk the AST, performing either a preorder or
postorder traversal depending on which method is called.
methods:
preorder(tree, visitor)
postorder(tree, visitor)
tree: an instance of ast.Node
visitor: an instance with visitXXX methods
The ASTVisitor is responsible for walking over the tree in the
correct order. For each node, it checks the visitor argument for
a method named 'visitNodeType' where NodeType is the name of the
node's class, e.g. Class. If the method exists, it is called
with the node as its sole argument.
The visitor method for a particular node type can control how
child nodes are visited during a preorder walk. (It can't control
the order during a postorder walk, because it is called _after_
the walk has occurred.) The ASTVisitor modifies the visitor
argument by adding a visit method to the visitor; this method can
be used to visit a particular child node. If the visitor method
returns a true value, the ASTVisitor will not traverse the child
nodes.
XXX The interface for controlling the preorder walk needs to be
re-considered. The current interface is convenient for visitors
that mostly let the ASTVisitor do everything. For something like
a code generator, where you want to walk to occur in a specific
order, it's a pain to add "return 1" to the end of each method.
"""
VERBOSE = 0
def __init__(self):
self.node = None
self._cache = {}
def default(self, node, *args):
for child in node.getChildNodes():
self.dispatch(child, *args)
def dispatch(self, node, *args):
self.node = node
klass = node.__class__
meth = self._cache.get(klass, None)
if meth is None:
className = klass.__name__
meth = getattr(self.visitor, 'visit' + className, self.default)
self._cache[klass] = meth
## if self.VERBOSE > 0:
## className = klass.__name__
## if self.VERBOSE == 1:
## if meth == 0:
## print "dispatch", className
## else:
## print "dispatch", className, (meth and meth.__name__ or '')
return meth(node, *args)
def preorder(self, tree, visitor, *args):
"""Do preorder walk of tree using visitor"""
self.visitor = visitor
visitor.visit = self.dispatch
self.dispatch(tree, *args) # XXX *args make sense?
class ExampleASTVisitor(ASTVisitor):
"""Prints examples of the nodes that aren't visited
This visitor-driver is only useful for development, when it's
helpful to develop a visitor incremently, and get feedback on what
you still have to do.
"""
examples = {}
def dispatch(self, node, *args):
self.node = node
meth = self._cache.get(node.__class__, None)
className = node.__class__.__name__
if meth is None:
meth = getattr(self.visitor, 'visit' + className, 0)
self._cache[node.__class__] = meth
if self.VERBOSE > 1:
print "dispatch", className, (meth and meth.__name__ or '')
if meth:
meth(node, *args)
elif self.VERBOSE > 0:
klass = node.__class__
if not self.examples.has_key(klass):
self.examples[klass] = klass
print
print self.visitor
print klass
for attr in dir(node):
if attr[0] != '_':
print "\t", "%-12.12s" % attr, getattr(node, attr)
print
return self.default(node, *args)
# XXX this is an API change
_walker = ASTVisitor
def walk(tree, visitor, walker=None, verbose=None):
if walker is None:
walker = _walker()
if verbose is not None:
walker.VERBOSE = verbose
walker.preorder(tree, visitor)
return walker.visitor
def dumpNode(node):
print node.__class__
for attr in dir(node):
if attr[0] != '_':
print "\t", "%-10.10s" % attr, getattr(node, attr)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment