Commit b0871cac authored by Benjamin Peterson's avatar Benjamin Peterson

Merged revisions 85510 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

................
  r85510 | benjamin.peterson | 2010-10-14 18:00:04 -0500 (Thu, 14 Oct 2010) | 61 lines

  Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3

  ........
    r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line

    wrap with parens
  ........
    r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line

    use parens
  ........
    r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line

    things which use touch_import should be pre order
  ........
    r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines

    This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers.
  ........
    r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line

    allow star_expr in testlist_gexp
  ........
    r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line

    wrap long line
  ........
    r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line

    cleanup
  ........
    r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line

    when there's a None value and a traceback, don't call type with it #9661
  ........
    r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines

    Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
  ........
    r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line

    fix urllib fixer with multiple as imports on a line #10069
  ........
    r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line

    stop abusing docstrings
  ........
    r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line

    kill sibling import
  ........
    r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line

    remove trailing whitespace
  ........
    r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line

    typo
  ........
................
parent ea5d827b
......@@ -128,7 +128,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+ | '.' '.' '.')
listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
testlist_gexp: test ( comp_for | (',' (test|star_expr))* [','] )
testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
......
"""A bottom-up tree matching algorithm implementation meant to speed
up 2to3's matching process. After the tree patterns are reduced to
their rarest linear path, a linear Aho-Corasick automaton is
created. The linear automaton traverses the linear paths from the
leaves to the root of the AST and returns a set of nodes for further
matching. This reduces significantly the number of candidate nodes."""
__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
import logging
import itertools
from collections import defaultdict
from . import pytree
from .btm_utils import reduce_tree
class BMNode(object):
"""Class for a node of the Aho-Corasick automaton used in matching"""
count = itertools.count()
def __init__(self):
self.transition_table = {}
self.fixers = []
self.id = next(BMNode.count)
self.content = ''
class BottomMatcher(object):
"""The main matcher class. After instantiating the patterns should
be added using the add_fixer method"""
def __init__(self):
self.match = set()
self.root = BMNode()
self.nodes = [self.root]
self.fixers = []
self.logger = logging.getLogger("RefactoringTool")
def add_fixer(self, fixer):
"""Reduces a fixer's pattern tree to a linear path and adds it
to the matcher(a common Aho-Corasick automaton). The fixer is
appended on the matching states and called when they are
reached"""
self.fixers.append(fixer)
tree = reduce_tree(fixer.pattern_tree)
linear = tree.get_linear_subpattern()
match_nodes = self.add(linear, start=self.root)
for match_node in match_nodes:
match_node.fixers.append(fixer)
def add(self, pattern, start):
"Recursively adds a linear pattern to the AC automaton"
#print("adding pattern", pattern, "to", start)
if not pattern:
#print("empty pattern")
return [start]
if isinstance(pattern[0], tuple):
#alternatives
#print("alternatives")
match_nodes = []
for alternative in pattern[0]:
#add all alternatives, and add the rest of the pattern
#to each end node
end_nodes = self.add(alternative, start=start)
for end in end_nodes:
match_nodes.extend(self.add(pattern[1:], end))
return match_nodes
else:
#single token
#not last
if pattern[0] not in start.transition_table:
#transition did not exist, create new
next_node = BMNode()
start.transition_table[pattern[0]] = next_node
else:
#transition exists already, follow
next_node = start.transition_table[pattern[0]]
if pattern[1:]:
end_nodes = self.add(pattern[1:], start=next_node)
else:
end_nodes = [next_node]
return end_nodes
def run(self, leaves):
"""The main interface with the bottom matcher. The tree is
traversed from the bottom using the constructed
automaton. Nodes are only checked once as the tree is
retraversed. When the automaton fails, we give it one more
shot(in case the above tree matches as a whole with the
rejected leaf), then we break for the next leaf. There is the
special case of multiple arguments(see code comments) where we
recheck the nodes
Args:
The leaves of the AST tree to be matched
Returns:
A dictionary of node matches with fixers as the keys
"""
current_ac_node = self.root
results = defaultdict(list)
for leaf in leaves:
current_ast_node = leaf
while current_ast_node:
current_ast_node.was_checked = True
for child in current_ast_node.children:
# multiple statements, recheck
if isinstance(child, pytree.Leaf) and child.value == ";":
current_ast_node.was_checked = False
break
if current_ast_node.type == 1:
#name
node_token = current_ast_node.value
else:
node_token = current_ast_node.type
if node_token in current_ac_node.transition_table:
#token matches
current_ac_node = current_ac_node.transition_table[node_token]
for fixer in current_ac_node.fixers:
if not fixer in results:
results[fixer] = []
results[fixer].append(current_ast_node)
else:
#matching failed, reset automaton
current_ac_node = self.root
if (current_ast_node.parent is not None
and current_ast_node.parent.was_checked):
#the rest of the tree upwards has been checked, next leaf
break
#recheck the rejected node once from the root
if node_token in current_ac_node.transition_table:
#token matches
current_ac_node = current_ac_node.transition_table[node_token]
for fixer in current_ac_node.fixers:
if not fixer in results.keys():
results[fixer] = []
results[fixer].append(current_ast_node)
current_ast_node = current_ast_node.parent
return results
def print_ac(self):
"Prints a graphviz diagram of the BM automaton(for debugging)"
print("digraph g{")
def print_node(node):
for subnode_key in node.transition_table.keys():
subnode = node.transition_table[subnode_key]
print("%d -> %d [label=%s] //%s" %
(node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
if subnode_key == 1:
print(subnode.content)
print_node(subnode)
print_node(self.root)
print("}")
# taken from pytree.py for debugging; only used by print_ac
_type_reprs = {}
def type_repr(type_num):
global _type_reprs
if not _type_reprs:
from .pygram import python_symbols
# printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items():
for name, val in python_symbols.__dict__.items():
if type(val) == int: _type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
"Utility functions used by the btm_matcher module"
from . import pytree
from .pgen2 import grammar, token
from .pygram import pattern_symbols, python_symbols
syms = pattern_symbols
pysyms = python_symbols
tokens = grammar.opmap
token_labels = token
TYPE_ANY = -1
TYPE_ALTERNATIVES = -2
TYPE_GROUP = -3
class MinNode(object):
"""This class serves as an intermediate representation of the
pattern tree during the conversion to sets of leaf-to-root
subpatterns"""
def __init__(self, type=None, name=None):
self.type = type
self.name = name
self.children = []
self.leaf = False
self.parent = None
self.alternatives = []
self.group = []
def __repr__(self):
return str(self.type) + ' ' + str(self.name)
def leaf_to_root(self):
"""Internal method. Returns a characteristic path of the
pattern tree. This method must be run for all leaves until the
linear subpatterns are merged into a single"""
node = self
subp = []
while node:
if node.type == TYPE_ALTERNATIVES:
node.alternatives.append(subp)
if len(node.alternatives) == len(node.children):
#last alternative
subp = [tuple(node.alternatives)]
node.alternatives = []
node = node.parent
continue
else:
node = node.parent
subp = None
break
if node.type == TYPE_GROUP:
node.group.append(subp)
#probably should check the number of leaves
if len(node.group) == len(node.children):
subp = get_characteristic_subpattern(node.group)
node.group = []
node = node.parent
continue
else:
node = node.parent
subp = None
break
if node.type == token_labels.NAME and node.name:
#in case of type=name, use the name instead
subp.append(node.name)
else:
subp.append(node.type)
node = node.parent
return subp
def get_linear_subpattern(self):
"""Drives the leaf_to_root method. The reason that
leaf_to_root must be run multiple times is because we need to
reject 'group' matches; for example the alternative form
(a | b c) creates a group [b c] that needs to be matched. Since
matching multiple linear patterns overcomes the automaton's
capabilities, leaf_to_root merges each group into a single
choice based on 'characteristic'ity,
i.e. (a|b c) -> (a|b) if b more characteristic than c
Returns: The most 'characteristic'(as defined by
get_characteristic_subpattern) path for the compiled pattern
tree.
"""
for l in self.leaves():
subp = l.leaf_to_root()
if subp:
return subp
def leaves(self):
"Generator that returns the leaves of the tree"
for child in self.children:
for x in child.leaves():
yield x
if not self.children:
yield self
def reduce_tree(node, parent=None):
"""
Internal function. Reduces a compiled pattern tree to an
intermediate representation suitable for feeding the
automaton. This also trims off any optional pattern elements(like
[a], a*).
"""
new_node = None
#switch on the node type
if node.type == syms.Matcher:
#skip
node = node.children[0]
if node.type == syms.Alternatives :
#2 cases
if len(node.children) <= 2:
#just a single 'Alternative', skip this node
new_node = reduce_tree(node.children[0], parent)
else:
#real alternatives
new_node = MinNode(type=TYPE_ALTERNATIVES)
#skip odd children('|' tokens)
for child in node.children:
if node.children.index(child)%2:
continue
reduced = reduce_tree(child, new_node)
if reduced is not None:
new_node.children.append(reduced)
elif node.type == syms.Alternative:
if len(node.children) > 1:
new_node = MinNode(type=TYPE_GROUP)
for child in node.children:
reduced = reduce_tree(child, new_node)
if reduced:
new_node.children.append(reduced)
if not new_node.children:
# delete the group if all of the children were reduced to None
new_node = None
else:
new_node = reduce_tree(node.children[0], parent)
elif node.type == syms.Unit:
if (isinstance(node.children[0], pytree.Leaf) and
node.children[0].value == '('):
#skip parentheses
return reduce_tree(node.children[1], parent)
if ((isinstance(node.children[0], pytree.Leaf) and
node.children[0].value == '[')
or
(len(node.children)>1 and
hasattr(node.children[1], "value") and
node.children[1].value == '[')):
#skip whole unit if its optional
return None
leaf = True
details_node = None
alternatives_node = None
has_repeater = False
repeater_node = None
has_variable_name = False
for child in node.children:
if child.type == syms.Details:
leaf = False
details_node = child
elif child.type == syms.Repeater:
has_repeater = True
repeater_node = child
elif child.type == syms.Alternatives:
alternatives_node = child
if hasattr(child, 'value') and child.value == '=': # variable name
has_variable_name = True
#skip variable name
if has_variable_name:
#skip variable name, '='
name_leaf = node.children[2]
if hasattr(name_leaf, 'value') and name_leaf.value == '(':
# skip parenthesis
name_leaf = node.children[3]
else:
name_leaf = node.children[0]
#set node type
if name_leaf.type == token_labels.NAME:
#(python) non-name or wildcard
if name_leaf.value == 'any':
new_node = MinNode(type=TYPE_ANY)
else:
if hasattr(token_labels, name_leaf.value):
new_node = MinNode(type=getattr(token_labels, name_leaf.value))
else:
new_node = MinNode(type=getattr(pysyms, name_leaf.value))
elif name_leaf.type == token_labels.STRING:
#(python) name or character; remove the apostrophes from
#the string value
name = name_leaf.value.strip("'")
if name in tokens:
new_node = MinNode(type=tokens[name])
else:
new_node = MinNode(type=token_labels.NAME, name=name)
elif name_leaf.type == syms.Alternatives:
new_node = reduce_tree(alternatives_node, parent)
#handle repeaters
if has_repeater:
if repeater_node.children[0].value == '*':
#reduce to None
new_node = None
elif repeater_node.children[0].value == '+':
#reduce to a single occurence i.e. do nothing
pass
else:
#TODO: handle {min, max} repeaters
raise NotImplementedError
pass
#add children
if details_node and new_node is not None:
for child in details_node.children[1:-1]:
#skip '<', '>' markers
reduced = reduce_tree(child, new_node)
if reduced is not None:
new_node.children.append(reduced)
if new_node:
new_node.parent = parent
return new_node
def get_characteristic_subpattern(subpatterns):
"""Picks the most characteristic from a list of linear patterns
Current order used is:
names > common_names > common_chars
"""
if not isinstance(subpatterns, list):
return subpatterns
if len(subpatterns)==1:
return subpatterns[0]
# first pick out the ones containing variable names
subpatterns_with_names = []
subpatterns_with_common_names = []
common_names = ['in', 'for', 'if' , 'not', 'None']
subpatterns_with_common_chars = []
common_chars = "[]().,:"
for subpattern in subpatterns:
if any(rec_test(subpattern, lambda x: type(x) is str)):
if any(rec_test(subpattern,
lambda x: isinstance(x, str) and x in common_chars)):
subpatterns_with_common_chars.append(subpattern)
elif any(rec_test(subpattern,
lambda x: isinstance(x, str) and x in common_names)):
subpatterns_with_common_names.append(subpattern)
else:
subpatterns_with_names.append(subpattern)
if subpatterns_with_names:
subpatterns = subpatterns_with_names
elif subpatterns_with_common_names:
subpatterns = subpatterns_with_common_names
elif subpatterns_with_common_chars:
subpatterns = subpatterns_with_common_chars
# of the remaining subpatterns pick out the longest one
return max(subpatterns, key=len)
def rec_test(sequence, test_func):
"""Tests test_func on all items of sequence and items of included
sub-iterables"""
for x in sequence:
if isinstance(x, (list, tuple)):
for y in rec_test(x, test_func):
yield y
else:
yield test_func(x)
......@@ -24,6 +24,7 @@ class BaseFix(object):
PATTERN = None # Most subclasses should override with a string literal
pattern = None # Compiled pattern, set by compile_pattern()
pattern_tree = None # Tree representation of the pattern
options = None # Options object passed to initializer
filename = None # The filename (set by set_filename)
logger = None # A logger (set by set_filename)
......@@ -36,6 +37,12 @@ class BaseFix(object):
_accept_type = None # [Advanced and not public] This tells RefactoringTool
# which node type to accept when there's not a pattern.
keep_line_order = False # For the bottom matcher: match with the
# original line order
BM_compatible = False # Compatibility with the bottom matching
# module; every fixer should set this
# manually
# Shortcut for access to Python grammar symbols
syms = pygram.python_symbols
......@@ -58,7 +65,9 @@ class BaseFix(object):
self.{pattern,PATTERN} in .match().
"""
if self.PATTERN is not None:
self.pattern = PatternCompiler().compile_pattern(self.PATTERN)
PC = PatternCompiler()
self.pattern, self.pattern_tree = PC.compile_pattern(self.PATTERN,
with_tree=True)
def set_filename(self, filename):
"""Set the filename, and a logger derived from it.
......
......@@ -295,8 +295,8 @@ def touch_import(package, name, node):
""" Works like `does_tree_import` but adds an import statement
if it was not imported. """
def is_import_stmt(node):
return node.type == syms.simple_stmt and node.children and \
is_import(node.children[0])
return (node.type == syms.simple_stmt and node.children and
is_import(node.children[0]))
root = find_root(node)
......@@ -319,8 +319,8 @@ def touch_import(package, name, node):
# if that also fails, we stick to the beginning of the file
if insert_pos == 0:
for idx, node in enumerate(root.children):
if node.type == syms.simple_stmt and node.children and \
node.children[0].type == token.STRING:
if (node.type == syms.simple_stmt and node.children and
node.children[0].type == token.STRING):
insert_pos = idx + 1
break
......
......@@ -12,6 +12,7 @@ from .. import fixer_base
from ..fixer_util import Call, Comma, parenthesize
class FixApply(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< 'apply'
......
......@@ -6,6 +6,7 @@ from .. import fixer_base
from ..fixer_util import Name
class FixBasestring(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "'basestring'"
......
......@@ -9,6 +9,7 @@ from ..fixer_util import Name
class FixBuffer(fixer_base.BaseFix):
BM_compatible = True
explicit = True # The user must ask for this fixer
......
......@@ -11,6 +11,9 @@ from lib2to3 import fixer_base
from lib2to3.fixer_util import Call, Name, String, Attr, touch_import
class FixCallable(fixer_base.BaseFix):
BM_compatible = True
order = "pre"
# Ignore callable(*args) or use of keywords.
# Either could be a hint that the builtin callable() is not being used.
......
......@@ -40,6 +40,8 @@ iter_exempt = fixer_util.consuming_calls | set(["iter"])
class FixDict(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< head=any+
trailer< '.' method=('keys'|'items'|'values'|
......
......@@ -34,6 +34,7 @@ def find_excepts(nodes):
yield (n, nodes[i+2])
class FixExcept(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
try_stmt< 'try' ':' (simple_stmt | suite)
......
......@@ -16,6 +16,7 @@ from ..fixer_util import Comma, Name, Call
class FixExec(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
exec_stmt< 'exec' a=any 'in' b=any [',' c=any] >
......
......@@ -13,6 +13,7 @@ from ..fixer_util import (Comma, Name, Call, LParen, RParen, Dot, Node,
class FixExecfile(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< 'execfile' trailer< '(' arglist< filename=any [',' globals=any [',' locals=any ] ] > ')' > >
......
......@@ -9,6 +9,8 @@ from lib2to3.fixer_util import Name, Attr, Call, Comma, Newline, syms
class FixExitfunc(fixer_base.BaseFix):
keep_line_order = True
BM_compatible = True
PATTERN = """
(
......
......@@ -19,6 +19,7 @@ from .. import fixer_base
from ..fixer_util import Name, Call, ListComp, in_special_context
class FixFilter(fixer_base.ConditionalFix):
BM_compatible = True
PATTERN = """
filter_lambda=power<
......
......@@ -7,6 +7,8 @@ from ..fixer_util import Name
class FixFuncattrs(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< any+ trailer< '.' attr=('func_closure' | 'func_doc' | 'func_globals'
| 'func_name' | 'func_defaults' | 'func_code'
......
......@@ -9,6 +9,8 @@ from .. import fixer_base
from ..fixer_util import BlankLine
class FixFuture(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """import_from< 'from' module_name="__future__" 'import' any >"""
# This should be run last -- some things check for the import
......
......@@ -8,6 +8,7 @@ from .. import fixer_base
from ..fixer_util import Name
class FixGetcwdu(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< 'os' trailer< dot='.' name='getcwdu' > any* >
......
......@@ -37,6 +37,7 @@ from ..fixer_util import Name, parenthesize
class FixHasKey(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
anchor=power<
......
......@@ -35,7 +35,6 @@ CMP = "(n='!=' | '==' | 'is' | n=comp_op< 'is' 'not' >)"
TYPE = "power< 'type' trailer< '(' x=any ')' > >"
class FixIdioms(fixer_base.BaseFix):
explicit = True # The user must ask for this fixer
PATTERN = r"""
......
......@@ -36,6 +36,7 @@ def traverse_imports(names):
class FixImport(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
import_from< 'from' imp=any 'import' ['('] any [')'] >
......
......@@ -84,6 +84,8 @@ def build_pattern(mapping=MAPPING):
class FixImports(fixer_base.BaseFix):
BM_compatible = True
keep_line_order = True
# This is overridden in fix_imports2.
mapping = MAPPING
......
......@@ -11,7 +11,7 @@ context = patcomp.compile_pattern("power< 'eval' trailer< '(' any ')' > >")
class FixInput(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< 'input' args=trailer< '(' [any] ')' > >
"""
......
......@@ -12,6 +12,8 @@ from ..fixer_util import Name, Attr, touch_import
class FixIntern(fixer_base.BaseFix):
BM_compatible = True
order = "pre"
PATTERN = """
power< 'intern'
......
......@@ -14,7 +14,7 @@ from ..fixer_util import token
class FixIsinstance(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power<
'isinstance'
......
......@@ -12,6 +12,7 @@ from .. import fixer_base
from ..fixer_util import Name
class FixItertools(fixer_base.BaseFix):
BM_compatible = True
it_funcs = "('imap'|'ifilter'|'izip'|'ifilterfalse')"
PATTERN = """
power< it='itertools'
......
......@@ -6,6 +6,7 @@ from lib2to3.fixer_util import BlankLine, syms, token
class FixItertoolsImports(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
import_from< 'from' 'itertools' 'import' imports=any >
""" %(locals())
......
......@@ -10,7 +10,7 @@ from lib2to3.fixer_util import is_probably_builtin
class FixLong(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "'long'"
def transform(self, node, results):
......
......@@ -26,6 +26,7 @@ from ..fixer_util import Name, Call, ListComp, in_special_context
from ..pygram import python_symbols as syms
class FixMap(fixer_base.ConditionalFix):
BM_compatible = True
PATTERN = """
map_none=power<
......
......@@ -143,6 +143,7 @@ def fixup_indent(suite):
class FixMetaclass(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
classdef<any*>
......
......@@ -13,6 +13,7 @@ MAP = {
}
class FixMethodattrs(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< any+ trailer< '.' attr=('im_func' | 'im_self' | 'im_class') > any* >
"""
......
......@@ -15,6 +15,7 @@ bind_warning = "Calls to builtin next() possibly shadowed by global binding"
class FixNext(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > >
|
......
......@@ -6,6 +6,7 @@ from .. import fixer_base
from ..fixer_util import Name, syms
class FixNonzero(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
classdef< 'class' any+ ':'
suite< any*
......
......@@ -16,7 +16,16 @@ from lib2to3 import fixer_base
from lib2to3.fixer_util import Call, Name, String, touch_import
def invocation(s):
def dec(f):
f.invocation = s
return f
return dec
class FixOperator(fixer_base.BaseFix):
BM_compatible = True
order = "pre"
methods = """
method=('isCallable'|'sequenceIncludes'
......@@ -36,34 +45,34 @@ class FixOperator(fixer_base.BaseFix):
if method is not None:
return method(node, results)
@invocation("operator.contains(%s)")
def _sequenceIncludes(self, node, results):
"""operator.contains(%s)"""
return self._handle_rename(node, results, "contains")
@invocation("hasattr(%s, '__call__')")
def _isCallable(self, node, results):
"""hasattr(%s, '__call__')"""
obj = results["obj"]
args = [obj.clone(), String(", "), String("'__call__'")]
return Call(Name("hasattr"), args, prefix=node.prefix)
@invocation("operator.mul(%s)")
def _repeat(self, node, results):
"""operator.mul(%s)"""
return self._handle_rename(node, results, "mul")
@invocation("operator.imul(%s)")
def _irepeat(self, node, results):
"""operator.imul(%s)"""
return self._handle_rename(node, results, "imul")
@invocation("isinstance(%s, collections.Sequence)")
def _isSequenceType(self, node, results):
"""isinstance(%s, collections.Sequence)"""
return self._handle_type2abc(node, results, "collections", "Sequence")
@invocation("isinstance(%s, collections.Mapping)")
def _isMappingType(self, node, results):
"""isinstance(%s, collections.Mapping)"""
return self._handle_type2abc(node, results, "collections", "Mapping")
@invocation("isinstance(%s, numbers.Number)")
def _isNumberType(self, node, results):
"""isinstance(%s, numbers.Number)"""
return self._handle_type2abc(node, results, "numbers", "Number")
def _handle_rename(self, node, results, name):
......@@ -84,6 +93,6 @@ class FixOperator(fixer_base.BaseFix):
return method
else:
sub = (str(results["obj"]),)
invocation_str = str(method.__doc__) % sub
invocation_str = method.invocation % sub
self.warning(node, "You should use '%s' here." % invocation_str)
return None
......@@ -10,6 +10,8 @@ from ..fixer_util import LParen, RParen
# XXX This doesn't support nested for loops like [x for x in 1, 2 for x in 1, 2]
class FixParen(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
atom< ('[' | '(')
(listmaker< any
......
......@@ -28,6 +28,8 @@ parend_expr = patcomp.compile_pattern(
class FixPrint(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
simple_stmt< any* bare='print' any* > | print_stmt
"""
......
......@@ -4,6 +4,7 @@ raise -> raise
raise E -> raise E
raise E, V -> raise E(V)
raise E, V, T -> raise E(V).with_traceback(T)
raise E, None, T -> raise E.with_traceback(T)
raise (((E, E'), E''), E'''), V -> raise E(V)
raise "foo", V, T -> warns about string exceptions
......@@ -29,6 +30,7 @@ from ..fixer_util import Name, Call, Attr, ArgList, is_tuple
class FixRaise(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] >
"""
......@@ -37,8 +39,9 @@ class FixRaise(fixer_base.BaseFix):
syms = self.syms
exc = results["exc"].clone()
if exc.type is token.STRING:
self.cannot_convert(node, "Python 3 does not support string exceptions")
if exc.type == token.STRING:
msg = "Python 3 does not support string exceptions"
self.cannot_convert(node, msg)
return
# Python 2 supports
......@@ -71,7 +74,12 @@ class FixRaise(fixer_base.BaseFix):
tb = results["tb"].clone()
tb.prefix = ""
e = Call(exc, args)
e = exc
# If there's a traceback and None is passed as the value, then don't
# add a call, since the user probably just wants to add a
# traceback. See issue #9661.
if val.type != token.NAME or val.value != "None":
e = Call(exc, args)
with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])]
new = pytree.Node(syms.simple_stmt, [Name("raise")] + with_tb)
new.prefix = node.prefix
......
......@@ -7,6 +7,7 @@ from ..fixer_util import Name
class FixRawInput(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< name='raw_input' trailer< '(' [any] ')' > any* >
"""
......
......@@ -14,6 +14,9 @@ from lib2to3.fixer_util import touch_import
class FixReduce(fixer_base.BaseFix):
BM_compatible = True
order = "pre"
PATTERN = """
power< 'reduce'
trailer< '('
......
......@@ -40,6 +40,7 @@ def build_pattern():
class FixRenames(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "|".join(build_pattern())
order = "pre" # Pre-order tree traversal
......
......@@ -10,6 +10,7 @@ from ..fixer_util import Call, Name, parenthesize
class FixRepr(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
atom < '`' expr=any '`' >
"""
......
......@@ -11,6 +11,7 @@ from lib2to3.fixer_util import token, syms
class FixSetLiteral(fixer_base.BaseFix):
BM_compatible = True
explicit = True
PATTERN = """power< 'set' trailer< '('
......
......@@ -9,7 +9,7 @@ from ..fixer_util import Name
class FixStandarderror(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
'StandardError'
"""
......
......@@ -14,6 +14,7 @@ from ..fixer_util import Attr, Call, Name, Number, Subscript, Node, syms
class FixSysExc(fixer_base.BaseFix):
# This order matches the ordering of sys.exc_info().
exc_info = ["exc_type", "exc_value", "exc_traceback"]
BM_compatible = True
PATTERN = """
power< 'sys' trailer< dot='.' attribute=(%s) > >
""" % '|'.join("'%s'" % e for e in exc_info)
......
......@@ -14,7 +14,7 @@ from .. import fixer_base
from ..fixer_util import Name, Call, ArgList, Attr, is_tuple
class FixThrow(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< any trailer< '.' 'throw' >
trailer< '(' args=arglist< exc=any ',' val=any [',' tb=any] > ')' >
......
......@@ -29,6 +29,10 @@ def is_docstring(stmt):
stmt.children[0].type == token.STRING
class FixTupleParams(fixer_base.BaseFix):
run_order = 4 #use a lower order since lambda is part of other
#patterns
BM_compatible = True
PATTERN = """
funcdef< 'def' any parameters< '(' args=any ')' >
['->' any] ':' suite=any+ >
......
......@@ -52,7 +52,7 @@ _TYPE_MAPPING = {
_pats = ["power< 'types' trailer< '.' name='%s' > >" % t for t in _TYPE_MAPPING]
class FixTypes(fixer_base.BaseFix):
BM_compatible = True
PATTERN = '|'.join(_pats)
def transform(self, node, results):
......
......@@ -10,7 +10,7 @@ _mapping = {"unichr" : "chr", "unicode" : "str"}
_literal_re = re.compile(r"[uU][rR]?[\'\"]")
class FixUnicode(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "STRING | 'unicode' | 'unichr'"
def transform(self, node, results):
......
......@@ -8,7 +8,7 @@
from lib2to3.fixes.fix_imports import alternates, FixImports
from lib2to3 import fixer_base
from lib2to3.fixer_util import (Name, Comma, FromImport, Newline,
find_indentation)
find_indentation, Node, syms)
MAPPING = {"urllib": [
("urllib.request",
......@@ -121,26 +121,37 @@ class FixUrllib(FixImports):
mod_dict = {}
members = results["members"]
for member in members:
member = member.value
# we only care about the actual members
if member != ",":
if member.type == syms.import_as_name:
as_name = member.children[2].value
member_name = member.children[0].value
else:
member_name = member.value
as_name = None
if member_name != ",":
for change in MAPPING[mod_member.value]:
if member in change[1]:
if change[0] in mod_dict:
mod_dict[change[0]].append(member)
else:
mod_dict[change[0]] = [member]
if member_name in change[1]:
if change[0] not in mod_dict:
modules.append(change[0])
mod_dict.setdefault(change[0], []).append(member)
new_nodes = []
indentation = find_indentation(node)
first = True
def handle_name(name, prefix):
if name.type == syms.import_as_name:
kids = [Name(name.children[0].value, prefix=prefix),
name.children[1].clone(),
name.children[2].clone()]
return [Node(syms.import_as_name, kids)]
return [Name(name.value, prefix=prefix)]
for module in modules:
elts = mod_dict[module]
names = []
for elt in elts[:-1]:
names.extend([Name(elt, prefix=pref), Comma()])
names.append(Name(elts[-1], prefix=pref))
names.extend(handle_name(elt, pref))
names.append(Comma())
names.extend(handle_name(elts[-1], pref))
new = FromImport(module, names)
if not first or node.parent.prefix.endswith(indentation):
new.prefix = indentation
......
......@@ -10,7 +10,7 @@ from .. import patcomp
class FixXrange(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power<
(name='range'|name='xrange') trailer< '(' args=any ')' >
......
......@@ -9,6 +9,7 @@ from ..fixer_util import Name
class FixXreadlines(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """
power< call=any+ trailer< '.' 'xreadlines' > trailer< '(' ')' > >
|
......
......@@ -13,6 +13,7 @@ from ..fixer_util import Name, Call, in_special_context
class FixZip(fixer_base.ConditionalFix):
BM_compatible = True
PATTERN = """
power< 'zip' args=trailer< '(' [any] ')' >
>
......
......@@ -52,14 +52,17 @@ class PatternCompiler(object):
self.pysyms = pygram.python_symbols
self.driver = driver.Driver(self.grammar, convert=pattern_convert)
def compile_pattern(self, input, debug=False):
def compile_pattern(self, input, debug=False, with_tree=False):
"""Compiles a pattern string to a nested pytree.*Pattern object."""
tokens = tokenize_wrapper(input)
try:
root = self.driver.parse_tokens(tokens, debug=debug)
except parse.ParseError as e:
raise PatternSyntaxError(str(e))
return self.compile_node(root)
if with_tree:
return self.compile_node(root), root
else:
return self.compile_node(root)
def compile_node(self, node):
"""Compiles a node, recursively.
......
......@@ -13,6 +13,8 @@ from . import pytree
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
"PatternGrammar.txt")
class Symbols(object):
......@@ -33,3 +35,6 @@ python_symbols = Symbols(python_grammar)
python_grammar_no_print_statement = python_grammar.copy()
del python_grammar_no_print_statement.keywords["print"]
pattern_grammar = driver.load_grammar(_PATTERN_GRAMMAR_FILE)
pattern_symbols = Symbols(pattern_grammar)
......@@ -16,7 +16,6 @@ import sys
import warnings
from io import StringIO
HUGE = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs = {}
......@@ -30,7 +29,6 @@ def type_repr(type_num):
if type(val) == int: _type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
class Base(object):
"""
......@@ -47,6 +45,7 @@ class Base(object):
parent = None # Parent node pointer, or None
children = () # Tuple of subnodes
was_changed = False
was_checked = False
def __new__(cls, *args, **kwds):
"""Constructor that prevents Base from being instantiated."""
......@@ -213,6 +212,16 @@ class Base(object):
return None
return self.parent.children[i-1]
def leaves(self):
for child in self.children:
for x in child.leaves():
yield x
def depth(self):
if self.parent is None:
return 0
return 1 + self.parent.depth()
def get_suffix(self):
"""
Return the string immediately following the invocant node. This is
......@@ -227,12 +236,14 @@ class Base(object):
def __str__(self):
return str(self).encode("ascii")
class Node(Base):
"""Concrete implementation for interior nodes."""
def __init__(self, type, children, context=None, prefix=None):
def __init__(self,type, children,
context=None,
prefix=None,
fixers_applied=None):
"""
Initializer.
......@@ -249,6 +260,10 @@ class Node(Base):
ch.parent = self
if prefix is not None:
self.prefix = prefix
if fixers_applied:
self.fixers_applied = fixers_applied[:]
else:
self.fixers_applied = None
def __repr__(self):
"""Return a canonical string representation."""
......@@ -273,7 +288,8 @@ class Node(Base):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Node(self.type, [ch.clone() for ch in self.children])
return Node(self.type, [ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied)
def post_order(self):
"""Return a post-order iterator for the tree."""
......@@ -341,7 +357,10 @@ class Leaf(Base):
lineno = 0 # Line where this token starts in the input
column = 0 # Column where this token tarts in the input
def __init__(self, type, value, context=None, prefix=None):
def __init__(self, type, value,
context=None,
prefix=None,
fixers_applied=[]):
"""
Initializer.
......@@ -355,6 +374,7 @@ class Leaf(Base):
self.value = value
if prefix is not None:
self._prefix = prefix
self.fixers_applied = fixers_applied[:]
def __repr__(self):
"""Return a canonical string representation."""
......@@ -380,7 +400,11 @@ class Leaf(Base):
def clone(self):
"""Return a cloned (deep) copy of self."""
return Leaf(self.type, self.value,
(self.prefix, (self.lineno, self.column)))
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied)
def leaves(self):
yield self
def post_order(self):
"""Return a post-order iterator for the tree."""
......
......@@ -24,7 +24,10 @@ from itertools import chain
# Local imports
from .pgen2 import driver, tokenize, token
from .fixer_util import find_root
from . import pytree, pygram
from . import btm_utils as bu
from . import btm_matcher as bm
def get_all_fix_names(fixer_pkg, remove_prefix=True):
......@@ -201,11 +204,28 @@ class RefactoringTool(object):
logger=self.logger)
self.pre_order, self.post_order = self.get_fixers()
self.pre_order_heads = _get_headnode_dict(self.pre_order)
self.post_order_heads = _get_headnode_dict(self.post_order)
self.files = [] # List of files that were or should be modified
self.BM = bm.BottomMatcher()
self.bmi_pre_order = [] # Bottom Matcher incompatible fixers
self.bmi_post_order = []
for fixer in chain(self.post_order, self.pre_order):
if fixer.BM_compatible:
self.BM.add_fixer(fixer)
# remove fixers that will be handled by the bottom-up
# matcher
elif fixer in self.pre_order:
self.bmi_pre_order.append(fixer)
elif fixer in self.post_order:
self.bmi_post_order.append(fixer)
self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order)
self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order)
def get_fixers(self):
"""Inspects the options to load the requested patterns and handlers.
......@@ -268,6 +288,7 @@ class RefactoringTool(object):
def refactor(self, items, write=False, doctests_only=False):
"""Refactor a list of files and directories."""
for dir_or_file in items:
if os.path.isdir(dir_or_file):
self.refactor_dir(dir_or_file, write, doctests_only)
......@@ -378,6 +399,10 @@ class RefactoringTool(object):
def refactor_tree(self, tree, name):
"""Refactors a parse tree (modifying the tree in place).
For compatible patterns the bottom matcher module is
used. Otherwise the tree is traversed node-to-node for
matches.
Args:
tree: a pytree.Node instance representing the root of the tree
to be refactored.
......@@ -386,11 +411,65 @@ class RefactoringTool(object):
Returns:
True if the tree was modified, False otherwise.
"""
for fixer in chain(self.pre_order, self.post_order):
fixer.start_tree(tree, name)
self.traverse_by(self.pre_order_heads, tree.pre_order())
self.traverse_by(self.post_order_heads, tree.post_order())
#use traditional matching for the incompatible fixers
self.traverse_by(self.bmi_pre_order_heads, tree.pre_order())
self.traverse_by(self.bmi_post_order_heads, tree.post_order())
# obtain a set of candidate nodes
match_set = self.BM.run(tree.leaves())
while any(match_set.values()):
for fixer in self.BM.fixers:
if fixer in match_set and match_set[fixer]:
#sort by depth; apply fixers from bottom(of the AST) to top
match_set[fixer].sort(key=pytree.Base.depth, reverse=True)
if fixer.keep_line_order:
#some fixers(eg fix_imports) must be applied
#with the original file's line order
match_set[fixer].sort(key=pytree.Base.get_lineno)
for node in list(match_set[fixer]):
if node in match_set[fixer]:
match_set[fixer].remove(node)
try:
find_root(node)
except AssertionError:
# this node has been cut off from a
# previous transformation ; skip
continue
if node.fixers_applied and fixer in node.fixers_applied:
# do not apply the same fixer again
continue
results = fixer.match(node)
if results:
new = fixer.transform(node, results)
if new is not None:
node.replace(new)
#new.fixers_applied.append(fixer)
for node in new.post_order():
# do not apply the fixer again to
# this or any subnode
if not node.fixers_applied:
node.fixers_applied = []
node.fixers_applied.append(fixer)
# update the original match set for
# the added code
new_matches = self.BM.run(new.leaves())
for fxr in new_matches:
if not fxr in match_set:
match_set[fxr]=[]
match_set[fxr].extend(new_matches[fxr])
for fixer in chain(self.pre_order, self.post_order):
fixer.finish_tree(tree, name)
......
# coding: utf-8
print "BOM BOOM!"
......@@ -868,6 +868,11 @@ class Test_raise(FixerTestCase):
raise Exception(5).with_traceback(6) # foo"""
self.check(b, a)
def test_None_value(self):
b = """raise Exception(5), None, tb"""
a = """raise Exception(5).with_traceback(tb)"""
self.check(b, a)
def test_tuple_value(self):
b = """raise Exception, (5, 6, 7)"""
a = """raise Exception(5, 6, 7)"""
......@@ -1812,6 +1817,9 @@ class Test_urllib(FixerTestCase):
b = "from %s import %s as foo_bar" % (old, member)
a = "from %s import %s as foo_bar" % (new, member)
self.check(b, a)
b = "from %s import %s as blah, %s" % (old, member, member)
a = "from %s import %s as blah, %s" % (new, member, member)
self.check(b, a)
def test_star(self):
for old in self.modules:
......
......@@ -178,6 +178,27 @@ class TestNodes(support.TestCase):
self.assertEqual(str(n1), "foo**bar")
self.assertTrue(isinstance(n1.children, list))
def test_leaves(self):
l1 = pytree.Leaf(100, "foo")
l2 = pytree.Leaf(100, "bar")
l3 = pytree.Leaf(100, "fooey")
n2 = pytree.Node(1000, [l1, l2])
n3 = pytree.Node(1000, [l3])
n1 = pytree.Node(1000, [n2, n3])
self.assertEqual(list(n1.leaves()), [l1, l2, l3])
def test_depth(self):
l1 = pytree.Leaf(100, "foo")
l2 = pytree.Leaf(100, "bar")
n2 = pytree.Node(1000, [l1, l2])
n3 = pytree.Node(1000, [])
n1 = pytree.Node(1000, [n2, n3])
self.assertEqual(l1.depth(), 2)
self.assertEqual(n3.depth(), 1)
self.assertEqual(n1.depth(), 0)
def test_post_order(self):
l1 = pytree.Leaf(100, "foo")
l2 = pytree.Leaf(100, "bar")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment