Commit 3e99fdee authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #26881: The modulefinder module now supports extended opcode arguments.

parents ce41287e 02d9f5e5
......@@ -284,31 +284,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
"""
labels = findlabels(code)
extended_arg = 0
starts_line = None
free = None
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
for offset, op, arg in _unpack_opargs(code):
if linestarts is not None:
starts_line = linestarts.get(i, None)
starts_line = linestarts.get(offset, None)
if starts_line is not None:
starts_line += line_offset
is_jump_target = i in labels
i = i+1
arg = None
is_jump_target = offset in labels
argval = None
argrepr = ''
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
if arg is not None:
# Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the
......@@ -319,7 +305,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasname:
argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel:
argval = i + arg
argval = offset + 3 + arg
argrepr = "to " + repr(argval)
elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames)
......@@ -329,7 +315,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs:
argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
yield Instruction(opname[op], op,
arg, argval, argrepr,
offset, starts_line, is_jump_target)
......@@ -365,26 +351,37 @@ def _disassemble_str(source, *, file=None):
disco = disassemble # XXX For backwards compatibility
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
def _unpack_opargs(code):
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
extended_arg = 0
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
i = i+1
arg = None
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
yield (offset, op, arg)
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
for offset, op, arg in _unpack_opargs(code):
if arg is not None:
label = -1
if op in hasjrel:
label = i+arg
label = offset + 3 + arg
elif op in hasjabs:
label = arg
if label >= 0:
......
......@@ -13,13 +13,12 @@ with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
import imp
# XXX Clean up once str8's cstor matches bytes.
LOAD_CONST = bytes([dis.opmap['LOAD_CONST']])
IMPORT_NAME = bytes([dis.opmap['IMPORT_NAME']])
STORE_NAME = bytes([dis.opmap['STORE_NAME']])
STORE_GLOBAL = bytes([dis.opmap['STORE_GLOBAL']])
LOAD_CONST = dis.opmap['LOAD_CONST']
IMPORT_NAME = dis.opmap['IMPORT_NAME']
STORE_NAME = dis.opmap['STORE_NAME']
STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
STORE_OPS = STORE_NAME, STORE_GLOBAL
HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
EXTENDED_ARG = dis.EXTENDED_ARG
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
......@@ -337,38 +336,30 @@ class ModuleFinder:
fullname = name + "." + sub
self._add_badmodule(fullname, caller)
def scan_opcodes_25(self, co,
unpack = struct.unpack):
def scan_opcodes(self, co):
# Scan the code, and yield 'interesting' opcode combinations
# Python 2.5 version (has absolute and relative imports)
code = co.co_code
names = co.co_names
consts = co.co_consts
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
while code:
c = bytes([code[0]])
if c in STORE_OPS:
oparg, = unpack('<H', code[1:3])
opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
if op != EXTENDED_ARG]
for i, (op, oparg) in enumerate(opargs):
if op in STORE_OPS:
yield "store", (names[oparg],)
code = code[3:]
continue
if code[:9:3] == LOAD_LOAD_AND_IMPORT:
oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
level = consts[oparg_1]
if (op == IMPORT_NAME and i >= 2
and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
level = consts[opargs[i-2][1]]
fromlist = consts[opargs[i-1][1]]
if level == 0: # absolute import
yield "absolute_import", (consts[oparg_2], names[oparg_3])
yield "absolute_import", (fromlist, names[oparg])
else: # relative import
yield "relative_import", (level, consts[oparg_2], names[oparg_3])
code = code[9:]
yield "relative_import", (level, fromlist, names[oparg])
continue
if c >= HAVE_ARGUMENT:
code = code[3:]
else:
code = code[1:]
def scan_code(self, co, m):
code = co.co_code
scanner = self.scan_opcodes_25
scanner = self.scan_opcodes
for what, args in scanner(co):
if what == "store":
name, = args
......
......@@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):
expected = "co_filename %r changed to %r" % (old_path, new_path)
self.assertIn(expected, output)
def test_extended_opargs(self):
extended_opargs_test = [
"a",
["a", "b"],
[], [],
"""\
a.py
%r
import b
b.py
""" % list(range(2**16))] # 2**16 constants
self._do_test(extended_opargs_test)
if __name__ == "__main__":
unittest.main()
......@@ -268,6 +268,8 @@ Core and Builtins
Library
-------
- Issue #26881: The modulefinder module now supports extended opcode arguments.
- Issue #23815: Fixed crashes related to directly created instances of types in
_tkinter and curses.panel modules.
......@@ -277,6 +279,8 @@ Library
- Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
instead of silently return incorrect result.
- Issue #26881: modulefinder now works with bytecode with extended args.
- Issue #26915: The __contains__ methods in the collections ABCs now check
for identity before checking equality. This better matches the behavior
of the concrete classes, allows sensible handling of NaNs, and makes it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment