Commit 0a6f9547 authored by Guido van Rossum's avatar Guido van Rossum

Another big update, fixing all known bugs related to nesting functions

and classes.  Also add a mini main program that dumps the results for
a given file or module.
parent 4b2030fe
"""Parse a Python file and retrieve classes and methods. """Parse a Python module and describe its classes and methods.
Parse enough of a Python file to recognize class and method Parse enough of a Python file to recognize imports and class and
definitions and to find out the superclasses of a class. method definitions, and to find out the superclasses of a class.
The interface consists of a single function: The interface consists of a single function:
readmodule_ex(module [, path[, inpackage]]) readmodule_ex(module [, path])
module is the name of a Python module, path is an optional list of where module is the name of a Python module, and path is an optional
directories where the module is to be searched. If present, path is list of directories where the module is to be searched. If present,
prepended to the system search path sys.path. (inpackage is used path is prepended to the system search path sys.path. The return
internally to search for a submodule of a package.) value is a dictionary. The keys of the dictionary are the names of
The return value is a dictionary. The keys of the dictionary are the classes defined in the module (including classes that are defined
the names of the classes defined in the module (including classes via the from XXX import YYY construct). The values are class
that are defined via the from XXX import YYY construct). The values instances of the class Class defined here. One special key/value pair
are class instances of the class Class defined here. is present for packages: the key '__path__' has a list as its value
which contains the package search path.
A class is described by the class Class in this module. Instances A class is described by the class Class in this module. Instances
of this class have the following instance variables: of this class have the following instance variables:
...@@ -36,21 +37,12 @@ Instances of this class have the following instance variables: ...@@ -36,21 +37,12 @@ Instances of this class have the following instance variables:
name -- the name of the class name -- the name of the class
file -- the file in which the class was defined file -- the file in which the class was defined
lineno -- the line in the file on which the class statement occurred lineno -- the line in the file on which the class statement occurred
BUGS
- Nested classes and functions can confuse it.
PACKAGE CAVEAT
- When you call readmodule_ex for a package, dict['__path__'] is a
list, which may confuse older class browsers. (readmodule filters
these out though.)
""" """
import sys import sys
import imp import imp
import tokenize # Python tokenizer import tokenize # Python tokenizer
from token import NAME from token import NAME, DEDENT, NEWLINE
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
...@@ -86,14 +78,14 @@ def readmodule(module, path=[]): ...@@ -86,14 +78,14 @@ def readmodule(module, path=[]):
Call readmodule_ex() and then only keep Class objects from the Call readmodule_ex() and then only keep Class objects from the
resulting dictionary.''' resulting dictionary.'''
dict = readmodule_ex(module, path) dict = _readmodule(module, path)
res = {} res = {}
for key, value in dict.items(): for key, value in dict.items():
if isinstance(value, Class): if isinstance(value, Class):
res[key] = value res[key] = value
return res return res
def readmodule_ex(module, path=[], inpackage=None): def readmodule_ex(module, path=[]):
'''Read a module file and return a dictionary of classes. '''Read a module file and return a dictionary of classes.
Search for MODULE in PATH and sys.path, read and parse the Search for MODULE in PATH and sys.path, read and parse the
...@@ -105,7 +97,10 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -105,7 +97,10 @@ def readmodule_ex(module, path=[], inpackage=None):
package search path; otherwise, we are searching for a top-level package search path; otherwise, we are searching for a top-level
module, and PATH is combined with sys.path. module, and PATH is combined with sys.path.
''' '''
return _readmodule(module, path)
def _readmodule(module, path, inpackage=None):
'''Do the hard work for readmodule[_ex].'''
# Compute the full module name (prepending inpackage if set) # Compute the full module name (prepending inpackage if set)
if inpackage: if inpackage:
fullmodule = "%s.%s" % (inpackage, module) fullmodule = "%s.%s" % (inpackage, module)
...@@ -129,10 +124,10 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -129,10 +124,10 @@ def readmodule_ex(module, path=[], inpackage=None):
if i >= 0: if i >= 0:
package = module[:i] package = module[:i]
submodule = module[i+1:] submodule = module[i+1:]
parent = readmodule_ex(package, path, inpackage) parent = _readmodule(package, path, inpackage)
if inpackage: if inpackage:
package = "%s.%s" % (inpackage, package) package = "%s.%s" % (inpackage, package)
return readmodule_ex(submodule, parent['__path__'], package) return _readmodule(submodule, parent['__path__'], package)
# Search the path for the module # Search the path for the module
f = None f = None
...@@ -150,36 +145,42 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -150,36 +145,42 @@ def readmodule_ex(module, path=[], inpackage=None):
f.close() f.close()
return dict return dict
classstack = [] # stack of (class, indent) pairs stack = [] # stack of (class, indent) pairs
g = tokenize.generate_tokens(f.readline) g = tokenize.generate_tokens(f.readline)
try: try:
for tokentype, token, start, end, line in g: for tokentype, token, start, end, line in g:
if token == 'def': if tokentype == DEDENT:
lineno, thisindent = start
# close nested classes and defs
while stack and stack[-1][1] >= thisindent:
del stack[-1]
elif token == 'def':
lineno, thisindent = start lineno, thisindent = start
# close previous nested classes and defs
while stack and stack[-1][1] >= thisindent:
del stack[-1]
tokentype, meth_name, start, end, line = g.next() tokentype, meth_name, start, end, line = g.next()
if tokentype != NAME: if tokentype != NAME:
continue # Syntax error continue # Syntax error
# close all classes indented at least as much if stack:
while classstack and \ cur_class = stack[-1][0]
classstack[-1][1] >= thisindent: if isinstance(cur_class, Class):
del classstack[-1] # it's a method
if classstack:
# it's a class method
cur_class = classstack[-1][0]
cur_class._addmethod(meth_name, lineno) cur_class._addmethod(meth_name, lineno)
# else it's a nested def
else: else:
# it's a function # it's a function
dict[meth_name] = Function(module, meth_name, file, lineno) dict[meth_name] = Function(module, meth_name, file, lineno)
stack.append((None, thisindent)) # Marker for nested fns
elif token == 'class': elif token == 'class':
lineno, thisindent = start lineno, thisindent = start
# close previous nested classes and defs
while stack and stack[-1][1] >= thisindent:
del stack[-1]
tokentype, class_name, start, end, line = g.next() tokentype, class_name, start, end, line = g.next()
if tokentype != NAME: if tokentype != NAME:
continue # Syntax error continue # Syntax error
# close all classes indented at least as much
while classstack and \
classstack[-1][1] >= thisindent:
del classstack[-1]
# parse what follows the class name # parse what follows the class name
tokentype, token, start, end, line = g.next() tokentype, token, start, end, line = g.next()
inherit = None inherit = None
...@@ -208,6 +209,7 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -208,6 +209,7 @@ def readmodule_ex(module, path=[], inpackage=None):
if c in d: if c in d:
n = d[c] n = d[c]
names.append(n) names.append(n)
super = []
if token == '(': if token == '(':
level += 1 level += 1
elif token == ')': elif token == ')':
...@@ -220,20 +222,21 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -220,20 +222,21 @@ def readmodule_ex(module, path=[], inpackage=None):
super.append(token) super.append(token)
inherit = names inherit = names
cur_class = Class(module, class_name, inherit, file, lineno) cur_class = Class(module, class_name, inherit, file, lineno)
if not stack:
dict[class_name] = cur_class dict[class_name] = cur_class
classstack.append((cur_class, thisindent)) stack.append((cur_class, thisindent))
elif token == 'import' and start[1] == 0: elif token == 'import' and start[1] == 0:
modules = _getnamelist(g) modules = _getnamelist(g)
for mod, mod2 in modules: for mod, mod2 in modules:
try: try:
# Recursively read the imported module # Recursively read the imported module
if not inpackage: if not inpackage:
readmodule_ex(mod, path) _readmodule(mod, path)
else: else:
try: try:
readmodule_ex(mod, path, inpackage) _readmodule(mod, path, inpackage)
except ImportError: except ImportError:
readmodule_ex(mod) _readmodule(mod, [])
except: except:
# If we can't find or parse the imported module, # If we can't find or parse the imported module,
# too bad -- don't die here. # too bad -- don't die here.
...@@ -245,7 +248,7 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -245,7 +248,7 @@ def readmodule_ex(module, path=[], inpackage=None):
names = _getnamelist(g) names = _getnamelist(g)
try: try:
# Recursively read the imported module # Recursively read the imported module
d = readmodule_ex(mod, path, inpackage) d = _readmodule(mod, path, inpackage)
except: except:
# If we can't find or parse the imported module, # If we can't find or parse the imported module,
# too bad -- don't die here. # too bad -- don't die here.
...@@ -256,11 +259,9 @@ def readmodule_ex(module, path=[], inpackage=None): ...@@ -256,11 +259,9 @@ def readmodule_ex(module, path=[], inpackage=None):
if n in d: if n in d:
dict[n2 or n] = d[n] dict[n2 or n] = d[n]
elif n == '*': elif n == '*':
# only add a name if not already there (to mimic # don't add names that start with _
# what Python does internally) also don't add
# names that start with _
for n in d: for n in d:
if n[0] != '_' and not n in dict: if n[0] != '_':
dict[n] = d[n] dict[n] = d[n]
except StopIteration: except StopIteration:
pass pass
...@@ -306,3 +307,32 @@ def _getname(g): ...@@ -306,3 +307,32 @@ def _getname(g):
break break
parts.append(token) parts.append(token)
return (".".join(parts), token) return (".".join(parts), token)
def _main():
# Main program for testing.
import os
mod = sys.argv[1]
if os.path.exists(mod):
path = [os.path.dirname(mod)]
mod = os.path.basename(mod)
if mod.lower().endswith(".py"):
mod = mod[:-3]
else:
path = []
dict = readmodule_ex(mod, path)
objs = dict.values()
objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
getattr(b, 'lineno', 0)))
for obj in objs:
if isinstance(obj, Class):
print "class", obj.name, obj.super, obj.lineno
methods = obj.methods.items()
methods.sort(lambda a, b: cmp(a[1], b[1]))
for name, lineno in methods:
if name != "__path__":
print " def", name, lineno
elif isinstance(obj, Function):
print "def", obj.name, obj.lineno
if __name__ == "__main__":
_main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment