Commit d490f32b authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #26647: Python interpreter now uses 16-bit wordcode instead of bytecode.

Patch by Demur Rumed.
parent 92ba6865
......@@ -31,9 +31,9 @@ the following command can be used to display the disassembly of
>>> dis.dis(myfunc)
2 0 LOAD_GLOBAL 0 (len)
3 LOAD_FAST 0 (alist)
6 CALL_FUNCTION 1
9 RETURN_VALUE
2 LOAD_FAST 0 (alist)
4 CALL_FUNCTION 1
6 RETURN_VALUE
(The "2" is a line number).
......@@ -682,8 +682,7 @@ iterations of the loop.
.. XXX explain the WHY stuff!
All of the following opcodes expect arguments. An argument is two bytes, with
the more significant byte last.
All of the following opcodes use their arguments.
.. opcode:: STORE_NAME (namei)
......
......@@ -79,9 +79,9 @@ class PythonValuesTestCase(unittest.TestCase):
continue
items.append((entry.name.decode("ascii"), entry.size))
expected = [("__hello__", 161),
("__phello__", -161),
("__phello__.spam", 161),
expected = [("__hello__", 139),
("__phello__", -139),
("__phello__.spam", 139),
]
self.assertEqual(items, expected, "PyImport_FrozenModules example "
"in Doc/library/ctypes.rst may be out of date")
......
......@@ -285,7 +285,6 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
"""
labels = findlabels(code)
starts_line = None
free = None
for offset, op, arg in _unpack_opargs(code):
if linestarts is not None:
starts_line = linestarts.get(offset, None)
......@@ -296,7 +295,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
argrepr = ''
if arg is not None:
# Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval.
# available, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the
# raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
argval = arg
......@@ -305,7 +304,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasname:
argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel:
argval = offset + 3 + arg
argval = offset + 2 + arg
argrepr = "to " + repr(argval)
elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames)
......@@ -352,23 +351,15 @@ def _disassemble_str(source, *, file=None):
disco = disassemble # XXX For backwards compatibility
def _unpack_opargs(code):
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
extended_arg = 0
n = len(code)
i = 0
while i < n:
for i in range(0, len(code), 2):
op = code[i]
offset = i
i = i+1
arg = None
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
yield (offset, op, arg)
arg = code[i+1] | extended_arg
extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
else:
arg = None
yield (i, op, arg)
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
......@@ -379,14 +370,14 @@ def findlabels(code):
labels = []
for offset, op, arg in _unpack_opargs(code):
if arg is not None:
label = -1
if op in hasjrel:
label = offset + 3 + arg
label = offset + 2 + arg
elif op in hasjabs:
label = arg
if label >= 0:
if label not in labels:
labels.append(label)
else:
continue
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
......
......@@ -225,6 +225,7 @@ _code_type = type(_write_atomic.__code__)
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483
# Python 3.6a0 3361 (lineno delta of code.co_lnotab becomes signed)
# Python 3.6a0 3370 (16 bit wordcode)
#
# MAGIC must change whenever the bytecode emitted by the compiler may no
# longer be understood by older implementations of the eval loop (usually
......@@ -233,7 +234,7 @@ _code_type = type(_write_atomic.__code__)
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.
MAGIC_NUMBER = (3361).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3370).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
_PYCACHE = '__pycache__'
......
This diff is collapsed.
......@@ -10,6 +10,9 @@ What's New in Python 3.6.0 alpha 2
Core and Builtins
-----------------
- Issue #26647: Python interpreter now uses 16-bit wordcode instead of bytecode.
Patch by Demur Rumed.
- Issue #23275: Allow assigning to an empty target list in round brackets:
() = iterable.
......
......@@ -189,7 +189,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
memset(blockstack, '\0', sizeof(blockstack));
memset(in_finally, '\0', sizeof(in_finally));
blockstack_top = 0;
for (addr = 0; addr < code_len; addr++) {
for (addr = 0; addr < code_len; addr += 2) {
unsigned char op = code[addr];
switch (op) {
case SETUP_LOOP:
......@@ -251,10 +251,6 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
}
}
}
if (op >= HAVE_ARGUMENT) {
addr += 2;
}
}
/* Verify that the blockstack tracking code didn't get lost. */
......@@ -277,7 +273,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
* can tell whether the jump goes into any blocks without coming out
* again - in that case we raise an exception below. */
delta_iblock = 0;
for (addr = min_addr; addr < max_addr; addr++) {
for (addr = min_addr; addr < max_addr; addr += 2) {
unsigned char op = code[addr];
switch (op) {
case SETUP_LOOP:
......@@ -294,10 +290,6 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
}
min_delta_iblock = Py_MIN(min_delta_iblock, delta_iblock);
if (op >= HAVE_ARGUMENT) {
addr += 2;
}
}
/* Derive the absolute iblock values from the deltas. */
......
......@@ -277,7 +277,7 @@ _PyGen_yf(PyGenObject *gen)
PyObject *bytecode = f->f_code->co_code;
unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode);
if (code[f->f_lasti + 1] != YIELD_FROM)
if (code[f->f_lasti + 2] != YIELD_FROM)
return NULL;
yf = f->f_stacktop[-1];
Py_INCREF(yf);
......@@ -376,7 +376,7 @@ gen_throw(PyGenObject *gen, PyObject *args)
assert(ret == yf);
Py_DECREF(ret);
/* Termination repetition of YIELD_FROM */
gen->gi_frame->f_lasti++;
gen->gi_frame->f_lasti += 2;
if (_PyGen_FetchStopIterationValue(&val) == 0) {
ret = gen_send_ex(gen, val, 0, 0);
Py_DECREF(val);
......
......@@ -1089,7 +1089,7 @@ static PYC_MAGIC magic_values[] = {
{ 3190, 3230, L"3.3" },
{ 3250, 3310, L"3.4" },
{ 3320, 3350, L"3.5" },
{ 3360, 3361, L"3.6" },
{ 3360, 3370, L"3.6" },
{ 0 }
};
......
......@@ -209,6 +209,7 @@
<ClInclude Include="..\Python\condvar.h" />
<ClInclude Include="..\Python\importdl.h" />
<ClInclude Include="..\Python\thread_nt.h" />
<ClInclude Include="..\Python\wordcode_helpers.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_bisectmodule.c" />
......
......@@ -420,6 +420,9 @@
<ClInclude Include="..\Python\thread_nt.h">
<Filter>Python</Filter>
</ClInclude>
<ClInclude Include="..\Python\wordcode_helpers.h">
<Filter>Python</Filter>
</ClInclude>
<ClInclude Include="..\Python\condvar.h">
<Filter>Python</Filter>
</ClInclude>
......
This diff is collapsed.
......@@ -29,6 +29,7 @@
#include "code.h"
#include "symtable.h"
#include "opcode.h"
#include "wordcode_helpers.h"
#define DEFAULT_BLOCK_SIZE 16
#define DEFAULT_BLOCKS 8
......@@ -43,7 +44,6 @@
struct instr {
unsigned i_jabs : 1;
unsigned i_jrel : 1;
unsigned i_hasarg : 1;
unsigned char i_opcode;
int i_oparg;
struct basicblock_ *i_target; /* target block (if jump instruction) */
......@@ -1080,13 +1080,14 @@ compiler_addop(struct compiler *c, int opcode)
basicblock *b;
struct instr *i;
int off;
assert(!HAS_ARG(opcode));
off = compiler_next_instr(c, c->u->u_curblock);
if (off < 0)
return 0;
b = c->u->u_curblock;
i = &b->b_instr[off];
i->i_opcode = opcode;
i->i_hasarg = 0;
i->i_oparg = 0;
if (opcode == RETURN_VALUE)
b->b_return = 1;
compiler_set_lineno(c, off);
......@@ -1168,8 +1169,9 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
Limit to 32-bit signed C int (rather than INT_MAX) for portability.
The argument of a concrete bytecode instruction is limited to 16-bit.
EXTENDED_ARG is used for 32-bit arguments. */
The argument of a concrete bytecode instruction is limited to 8-bit.
EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */
assert(HAS_ARG(opcode));
assert(0 <= oparg && oparg <= 2147483647);
off = compiler_next_instr(c, c->u->u_curblock);
......@@ -1178,7 +1180,6 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
i = &c->u->u_curblock->b_instr[off];
i->i_opcode = opcode;
i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
i->i_hasarg = 1;
compiler_set_lineno(c, off);
return 1;
}
......@@ -1189,6 +1190,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
struct instr *i;
int off;
assert(HAS_ARG(opcode));
assert(b != NULL);
off = compiler_next_instr(c, c->u->u_curblock);
if (off < 0)
......@@ -1196,7 +1198,6 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
i = &c->u->u_curblock->b_instr[off];
i->i_opcode = opcode;
i->i_target = b;
i->i_hasarg = 1;
if (absolute)
i->i_jabs = 1;
else
......@@ -4397,18 +4398,6 @@ assemble_free(struct assembler *a)
PyObject_Free(a->a_postorder);
}
/* Return the size of a basic block in bytes. */
static int
instrsize(struct instr *instr)
{
if (!instr->i_hasarg)
return 1; /* 1 byte for the opcode*/
if (instr->i_oparg > 0xffff)
return 6; /* 1 (opcode) + 1 (EXTENDED_ARG opcode) + 2 (oparg) + 2(oparg extended) */
return 3; /* 1 (opcode) + 2 (oparg) */
}
static int
blocksize(basicblock *b)
{
......@@ -4416,7 +4405,7 @@ blocksize(basicblock *b)
int size = 0;
for (i = 0; i < b->b_iused; i++)
size += instrsize(&b->b_instr[i]);
size += instrsize(b->b_instr[i].i_oparg);
return size;
}
......@@ -4536,15 +4525,12 @@ assemble_lnotab(struct assembler *a, struct instr *i)
static int
assemble_emit(struct assembler *a, struct instr *i)
{
int size, arg = 0, ext = 0;
int size, arg = 0;
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
char *code;
size = instrsize(i);
if (i->i_hasarg) {
arg = i->i_oparg;
ext = arg >> 16;
}
arg = i->i_oparg;
size = instrsize(arg);
if (i->i_lineno && !assemble_lnotab(a, i))
return 0;
if (a->a_offset + size >= len) {
......@@ -4555,19 +4541,7 @@ assemble_emit(struct assembler *a, struct instr *i)
}
code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
a->a_offset += size;
if (size == 6) {
assert(i->i_hasarg);
*code++ = (char)EXTENDED_ARG;
*code++ = ext & 0xff;
*code++ = ext >> 8;
arg &= 0xffff;
}
*code++ = i->i_opcode;
if (i->i_hasarg) {
assert(size == 3 || size == 6);
*code++ = arg & 0xff;
*code++ = arg >> 8;
}
write_op_arg((unsigned char*)code, i->i_opcode, arg, size);
return 1;
}
......@@ -4575,7 +4549,7 @@ static void
assemble_jump_offsets(struct assembler *a, struct compiler *c)
{
basicblock *b;
int bsize, totsize, extended_arg_count = 0, last_extended_arg_count;
int bsize, totsize, extended_arg_recompile;
int i;
/* Compute the size of each block and fixup jump args.
......@@ -4588,27 +4562,26 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
b->b_offset = totsize;
totsize += bsize;
}
last_extended_arg_count = extended_arg_count;
extended_arg_count = 0;
extended_arg_recompile = 0;
for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
bsize = b->b_offset;
for (i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
int isize = instrsize(instr->i_oparg);
/* Relative jumps are computed relative to
the instruction pointer after fetching
the jump instruction.
*/
bsize += instrsize(instr);
if (instr->i_jabs)
bsize += isize;
if (instr->i_jabs || instr->i_jrel) {
instr->i_oparg = instr->i_target->b_offset;
else if (instr->i_jrel) {
int delta = instr->i_target->b_offset - bsize;
instr->i_oparg = delta;
if (instr->i_jrel) {
instr->i_oparg -= bsize;
}
if (instrsize(instr->i_oparg) != isize) {
extended_arg_recompile = 1;
}
}
else
continue;
if (instr->i_oparg > 0xffff)
extended_arg_count++;
}
}
......@@ -4618,7 +4591,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
The issue is that in the first loop blocksize() is called
which calls instrsize() which requires i_oparg be set
appropriately. There is a bootstrap problem because
appropriately. There is a bootstrap problem because
i_oparg is calculated in the second loop above.
So we loop until we stop seeing new EXTENDED_ARGs.
......@@ -4626,7 +4599,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
ones in jump instructions. So this should converge
fairly quickly.
*/
} while (last_extended_arg_count != extended_arg_count);
} while (extended_arg_recompile);
}
static PyObject *
......@@ -4772,9 +4745,9 @@ dump_instr(const struct instr *i)
char arg[128];
*arg = '\0';
if (i->i_hasarg)
if (HAS_ARG(i->i_opcode)) {
sprintf(arg, "arg: %d ", i->i_oparg);
}
fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
i->i_lineno, i->i_opcode, arg, jabs, jrel);
}
......
......@@ -14,17 +14,15 @@
the appropriate bytes from M___main__.c. */
static unsigned char M___hello__[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
0,64,0,0,0,115,20,0,0,0,100,2,0,90,1,0,
101,2,0,100,0,0,131,1,0,1,100,1,0,83,40,3,
0,0,0,117,12,0,0,0,72,101,108,108,111,32,119,111,
114,108,100,33,78,84,40,3,0,0,0,117,4,0,0,0,
84,114,117,101,117,11,0,0,0,105,110,105,116,105,97,108,
105,122,101,100,117,5,0,0,0,112,114,105,110,116,40,0,
0,0,0,40,0,0,0,0,40,0,0,0,0,117,7,0,
0,0,102,108,97,103,46,112,121,117,8,0,0,0,60,109,
111,100,117,108,101,62,1,0,0,0,115,2,0,0,0,6,
1,
227,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
0,64,0,0,0,115,16,0,0,0,100,0,90,0,101,1,
100,1,131,1,1,0,100,2,83,0,41,3,84,122,12,72,
101,108,108,111,32,119,111,114,108,100,33,78,41,2,218,11,
105,110,105,116,105,97,108,105,122,101,100,218,5,112,114,105,
110,116,169,0,114,3,0,0,0,114,3,0,0,0,250,22,
46,47,84,111,111,108,115,47,102,114,101,101,122,101,47,102,
108,97,103,46,112,121,218,8,60,109,111,100,117,108,101,62,
1,0,0,0,115,2,0,0,0,4,1,
};
#define SIZE (int)sizeof(M___hello__)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/* This file contains code shared by the compiler and the peephole
optimizer.
*/
/* Minimum number of bytes necessary to encode instruction with EXTENDED_ARGs */
static int
instrsize(unsigned int oparg)
{
return oparg <= 0xff ? 2 :
oparg <= 0xffff ? 4 :
oparg <= 0xffffff ? 6 :
8;
}
/* Spits out op/oparg pair using ilen bytes. codestr should be pointed at the
desired location of the first EXTENDED_ARG */
static void
write_op_arg(unsigned char *codestr, unsigned char opcode,
unsigned int oparg, int ilen)
{
switch (ilen) {
case 8:
*codestr++ = EXTENDED_ARG;
*codestr++ = (oparg >> 24) & 0xff;
case 6:
*codestr++ = EXTENDED_ARG;
*codestr++ = (oparg >> 16) & 0xff;
case 4:
*codestr++ = EXTENDED_ARG;
*codestr++ = (oparg >> 8) & 0xff;
case 2:
*codestr++ = opcode;
*codestr++ = oparg & 0xff;
break;
default:
assert(0);
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment