Commit c7a4d66e authored by Guido van Rossum's avatar Guido van Rossum

Tim Peters: Taught it more "real Python" rules without slowing it

appreciably.  Triple-quoted strings no longer confuse it, nor nested
classes or defs, nor comments starting in column 1.  Chews thru
Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no
longer missing methods in PyShell.py; etc.  Also captures defs
starting in column 1 now, but ignores them; an interface should be
added so that IDLE's class browser can show the top-level functions
too.
parent 49f0a958
...@@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this ...@@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this
shouldn't happen often. shouldn't happen often.
BUGS BUGS
Continuation lines are not dealt with at all and strings may confuse Continuation lines are not dealt with at all.
the hell out of the parser, but it usually works. While triple-quoted strings won't confuse it, lines that look like
Nested classes are not recognized. def, class, import or "from ... import" stmts inside backslash-continued
Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock single-quoted strings are treated like code. The expense of stopping
that isn't worth it.
Code that doesn't pass tabnanny or python -t will confuse it, unless
you set the module TABWIDTH vrbl (default 8) to the correct tab width
for the file.''' # ' <-- bow to font lock
import os import os
import sys import sys
...@@ -40,39 +44,35 @@ import imp ...@@ -40,39 +44,35 @@ import imp
import re import re
import string import string
TABWIDTH = 8
_getnext = re.compile(r""" _getnext = re.compile(r"""
## String slows it down by more than a factor of 2 (not because the (?P<String>
## string regexp is slow, but because there are often a lot of strings, \""" [^"\\]* (?:
## which means the regexp has to get called that many more times). (?: \\. | "(?!"") )
## (?P<String> [^"\\]*
## " [^"\\\n]* (?: \\. [^"\\\n]* )* " )*
## \"""
## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
## | ''' [^'\\]* (?:
## | \""" [^"\\]* (?: (?: \\. | '(?!'') )
## (?: \\. | "(?!"") ) [^'\\]*
## [^"\\]* )*
## )* '''
## \""" )
##
## | ''' [^'\\]* (?: | (?P<Method>
## (?: \\. | '(?!'') ) ^
## [^'\\]* (?P<MethodIndent> [ \t]* )
## )* def [ \t]+
## '''
## )
##
##| (?P<Method>
(?P<Method>
# dicey trick: assume a def not at top level is a method
^ [ \t]+ def [ \t]+
(?P<MethodName> [a-zA-Z_] \w* ) (?P<MethodName> [a-zA-Z_] \w* )
[ \t]* \( [ \t]* \(
) )
| (?P<Class> | (?P<Class>
# lightly questionable: assume only top-level classes count ^
^ class [ \t]+ (?P<ClassIndent> [ \t]* )
class [ \t]+
(?P<ClassName> [a-zA-Z_] \w* ) (?P<ClassName> [a-zA-Z_] \w* )
[ \t]* [ \t]*
(?P<ClassSupers> \( [^)\n]* \) )? (?P<ClassSupers> \( [^)\n]* \) )?
...@@ -96,11 +96,6 @@ _getnext = re.compile(r""" ...@@ -96,11 +96,6 @@ _getnext = re.compile(r"""
import [ \t]+ import [ \t]+
(?P<ImportFromList> [^#;\n]+ ) (?P<ImportFromList> [^#;\n]+ )
) )
| (?P<AtTopLevel>
# cheap trick: anything other than ws in first column
^ \S
)
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search """, re.VERBOSE | re.DOTALL | re.MULTILINE).search
_modules = {} # cache of modules we've seen _modules = {} # cache of modules we've seen
...@@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0): ...@@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
_modules[module] = dict _modules[module] = dict
return dict return dict
cur_class = None
dict = {} dict = {}
_modules[module] = dict _modules[module] = dict
imports = [] imports = []
classstack = [] # stack of (class, indent) pairs
src = f.read() src = f.read()
f.close() f.close()
...@@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0): ...@@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
break break
start, i = m.span() start, i = m.span()
if m.start("AtTopLevel") >= 0: if m.start("Method") >= 0:
# end of class definition # found a method definition or function
cur_class = None thisindent = _indent(m.group("MethodIndent"))
# close all classes indented at least as much
## elif m.start("String") >= 0: while classstack and \
## pass classstack[-1][1] >= thisindent:
del classstack[-1]
elif m.start("Method") >= 0: if classstack:
# found a method definition
if cur_class:
# and we know the class it belongs to # and we know the class it belongs to
meth_name = m.group("MethodName") meth_name = m.group("MethodName")
lineno = lineno + \ lineno = lineno + \
countnl(src, '\n', countnl(src, '\n',
last_lineno_pos, start) last_lineno_pos, start)
last_lineno_pos = start last_lineno_pos = start
cur_class = classstack[-1][0]
cur_class._addmethod(meth_name, lineno) cur_class._addmethod(meth_name, lineno)
elif m.start("String") >= 0:
pass
elif m.start("Class") >= 0: elif m.start("Class") >= 0:
# we found a class definition # we found a class definition
thisindent = _indent(m.group("ClassIndent"))
# close all classes indented at least as much
while classstack and \
classstack[-1][1] >= thisindent:
del classstack[-1]
lineno = lineno + \ lineno = lineno + \
countnl(src, '\n', last_lineno_pos, start) countnl(src, '\n', last_lineno_pos, start)
last_lineno_pos = start last_lineno_pos = start
...@@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0): ...@@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
cur_class = Class(module, class_name, inherit, cur_class = Class(module, class_name, inherit,
file, lineno) file, lineno)
dict[class_name] = cur_class dict[class_name] = cur_class
classstack.append((cur_class, thisindent))
elif m.start("Import") >= 0: elif m.start("Import") >= 0:
# import module # import module
...@@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0): ...@@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
assert 0, "regexp _getnext found something unexpected" assert 0, "regexp _getnext found something unexpected"
return dict return dict
def _indent(ws, _expandtabs=string.expandtabs):
return len(_expandtabs(ws, TABWIDTH))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment