Issue #26647: Python interpreter now uses 16-bit wordcode instead of bytecode.

Patch by Demur Rumed.

Issue #26647: Python interpreter now uses 16-bit wordcode instead of bytecode.
Patch by Demur Rumed.
d490f32b · Serhiy Storchaka · 92ba6865 · d490f32b · d490f32b · d490f32b
Commit d490f32b authored May 24, 2016 by Serhiy Storchaka
18 changed files
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -31,9 +31,9 @@ the following command can be used to display the disassembly of

   >>> dis.dis(myfunc)
     2           0 LOAD_GLOBAL              0 (len)
-                 3 LOAD_FAST                0 (alist)
-                 6 CALL_FUNCTION            1
-                 9 RETURN_VALUE
+                 2 LOAD_FAST                0 (alist)
+                 4 CALL_FUNCTION            1
+                 6 RETURN_VALUE

 (The "2" is a line number).

@@ -682,8 +682,7 @@ iterations of the loop.
   .. XXX explain the WHY stuff!


-All of the following opcodes expect arguments.  An argument is two bytes, with
-the more significant byte last.
+All of the following opcodes use their arguments.

 .. opcode:: STORE_NAME (namei)


--- a/Lib/ctypes/test/test_values.py
+++ b/Lib/ctypes/test/test_values.py
@@ -79,9 +79,9 @@ class PythonValuesTestCase(unittest.TestCase):
                continue
            items.append((entry.name.decode("ascii"), entry.size))

-        expected = [("__hello__", 161),
-                    ("__phello__", -161),
-                    ("__phello__.spam", 161),
+        expected = [("__hello__", 139),
+                    ("__phello__", -139),
+                    ("__phello__.spam", 139),
                    ]
        self.assertEqual(items, expected, "PyImport_FrozenModules example "
            "in Doc/library/ctypes.rst may be out of date")

--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -285,7 +285,6 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
    """
    labels = findlabels(code)
    starts_line = None
-    free = None
    for offset, op, arg in _unpack_opargs(code):
        if linestarts is not None:
            starts_line = linestarts.get(offset, None)
@@ -296,7 +295,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
        argrepr = ''
        if arg is not None:
            #  Set argval to the dereferenced value of the argument when
-            #  availabe, and argrepr to the string representation of argval.
+            #  available, and argrepr to the string representation of argval.
            #    _disassemble_bytes needs the string repr of the
            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
            argval = arg
@@ -305,7 +304,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
            elif op in hasname:
                argval, argrepr = _get_name_info(arg, names)
            elif op in hasjrel:
-                argval = offset + 3 + arg
+                argval = offset + 2 + arg
                argrepr = "to " + repr(argval)
            elif op in haslocal:
                argval, argrepr = _get_name_info(arg, varnames)
@@ -352,23 +351,15 @@ def _disassemble_str(source, *, file=None):
 disco = disassemble                     # XXX For backwards compatibility

 def _unpack_opargs(code):
-    # enumerate() is not an option, since we sometimes process
-    # multiple elements on a single pass through the loop
    extended_arg = 0
-    n = len(code)
-    i = 0
-    while i < n:
+    for i in range(0, len(code), 2):
        op = code[i]
-        offset = i
-        i = i+1
-        arg = None
        if op >= HAVE_ARGUMENT:
-            arg = code[i] + code[i+1]*256 + extended_arg
-            extended_arg = 0
-            i = i+2
-            if op == EXTENDED_ARG:
-                extended_arg = arg*65536
-        yield (offset, op, arg)
+            arg = code[i+1] | extended_arg
+            extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
+        else:
+            arg = None
+        yield (i, op, arg)

 def findlabels(code):
    """Detect all offsets in a byte code which are jump targets.
@@ -379,14 +370,14 @@ def findlabels(code):
    labels = []
    for offset, op, arg in _unpack_opargs(code):
        if arg is not None:
-            label = -1
            if op in hasjrel:
-                label = offset + 3 + arg
+                label = offset + 2 + arg
            elif op in hasjabs:
                label = arg
-            if label >= 0:
-                if label not in labels:
-                    labels.append(label)
+            else:
+                continue
+            if label not in labels:
+                labels.append(label)
    return labels

 def findlinestarts(code):

--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -225,6 +225,7 @@ _code_type = type(_write_atomic.__code__)
 #     Python 3.5b2  3350 (add GET_YIELD_FROM_ITER opcode #24400)
 #     Python 3.6a0  3360 (add FORMAT_VALUE opcode #25483
 #     Python 3.6a0  3361 (lineno delta of code.co_lnotab becomes signed)
+#     Python 3.6a0  3370 (16 bit wordcode)
 #
 # MAGIC must change whenever the bytecode emitted by the compiler may no
 # longer be understood by older implementations of the eval loop (usually
@@ -233,7 +234,7 @@ _code_type = type(_write_atomic.__code__)
 # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
 # in PC/launcher.c must also be updated.

-MAGIC_NUMBER = (3361).to_bytes(2, 'little') + b'\r\n'
+MAGIC_NUMBER = (3370).to_bytes(2, 'little') + b'\r\n'
 _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c

 _PYCACHE = '__pycache__'

--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 alpha 2
 Core and Builtins
 -----------------

+- Issue #26647: Python interpreter now uses 16-bit wordcode instead of bytecode.
+  Patch by Demur Rumed.
+
 - Issue #23275: Allow assigning to an empty target list in round brackets:
  () = iterable.


--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -189,7 +189,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
    memset(blockstack, '\0', sizeof(blockstack));
    memset(in_finally, '\0', sizeof(in_finally));
    blockstack_top = 0;
-    for (addr = 0; addr < code_len; addr++) {
+    for (addr = 0; addr < code_len; addr += 2) {
        unsigned char op = code[addr];
        switch (op) {
        case SETUP_LOOP:
@@ -251,10 +251,6 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
                }
            }
        }
-
-        if (op >= HAVE_ARGUMENT) {
-            addr += 2;
-        }
    }

    /* Verify that the blockstack tracking code didn't get lost. */
@@ -277,7 +273,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
     * can tell whether the jump goes into any blocks without coming out
     * again - in that case we raise an exception below. */
    delta_iblock = 0;
-    for (addr = min_addr; addr < max_addr; addr++) {
+    for (addr = min_addr; addr < max_addr; addr += 2) {
        unsigned char op = code[addr];
        switch (op) {
        case SETUP_LOOP:
@@ -294,10 +290,6 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
        }

        min_delta_iblock = Py_MIN(min_delta_iblock, delta_iblock);
-
-        if (op >= HAVE_ARGUMENT) {
-            addr += 2;
-        }
    }

    /* Derive the absolute iblock values from the deltas. */

--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -277,7 +277,7 @@ _PyGen_yf(PyGenObject *gen)
        PyObject *bytecode = f->f_code->co_code;
        unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode);

-        if (code[f->f_lasti + 1] != YIELD_FROM)
+        if (code[f->f_lasti + 2] != YIELD_FROM)
            return NULL;
        yf = f->f_stacktop[-1];
        Py_INCREF(yf);
@@ -376,7 +376,7 @@ gen_throw(PyGenObject *gen, PyObject *args)
            assert(ret == yf);
            Py_DECREF(ret);
            /* Termination repetition of YIELD_FROM */
-            gen->gi_frame->f_lasti++;
+            gen->gi_frame->f_lasti += 2;
            if (_PyGen_FetchStopIterationValue(&val) == 0) {
                ret = gen_send_ex(gen, val, 0, 0);
                Py_DECREF(val);

--- a/PC/launcher.c
+++ b/PC/launcher.c
@@ -1089,7 +1089,7 @@ static PYC_MAGIC magic_values[] = {
    { 3190, 3230, L"3.3" },
    { 3250, 3310, L"3.4" },
    { 3320, 3350, L"3.5" },
-    { 3360, 3361, L"3.6" },
+    { 3360, 3370, L"3.6" },
    { 0 }
 };


--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -209,6 +209,7 @@
    <ClInclude Include="..\Python\condvar.h" />
    <ClInclude Include="..\Python\importdl.h" />
    <ClInclude Include="..\Python\thread_nt.h" />
+    <ClInclude Include="..\Python\wordcode_helpers.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\Modules\_bisectmodule.c" />

--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -420,6 +420,9 @@
    <ClInclude Include="..\Python\thread_nt.h">
      <Filter>Python</Filter>
    </ClInclude>
+    <ClInclude Include="..\Python\wordcode_helpers.h">
+      <Filter>Python</Filter>
+    </ClInclude>
    <ClInclude Include="..\Python\condvar.h">
      <Filter>Python</Filter>
    </ClInclude>

--- a/Python/ceval.c
+++ b/Python/ceval.c
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -29,6 +29,7 @@
 #include "code.h"
 #include "symtable.h"
 #include "opcode.h"
+#include "wordcode_helpers.h"

 #define DEFAULT_BLOCK_SIZE 16
 #define DEFAULT_BLOCKS 8
@@ -43,7 +44,6 @@
 struct instr {
    unsigned i_jabs : 1;
    unsigned i_jrel : 1;
-    unsigned i_hasarg : 1;
    unsigned char i_opcode;
    int i_oparg;
    struct basicblock_ *i_target; /* target block (if jump instruction) */
@@ -1080,13 +1080,14 @@ compiler_addop(struct compiler *c, int opcode)
    basicblock *b;
    struct instr *i;
    int off;
+    assert(!HAS_ARG(opcode));
    off = compiler_next_instr(c, c->u->u_curblock);
    if (off < 0)
        return 0;
    b = c->u->u_curblock;
    i = &b->b_instr[off];
    i->i_opcode = opcode;
-    i->i_hasarg = 0;
+    i->i_oparg = 0;
    if (opcode == RETURN_VALUE)
        b->b_return = 1;
    compiler_set_lineno(c, off);
@@ -1168,8 +1169,9 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)

       Limit to 32-bit signed C int (rather than INT_MAX) for portability.

-       The argument of a concrete bytecode instruction is limited to 16-bit.
-       EXTENDED_ARG is used for 32-bit arguments. */
+       The argument of a concrete bytecode instruction is limited to 8-bit.
+       EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */
+    assert(HAS_ARG(opcode));
    assert(0 <= oparg && oparg <= 2147483647);

    off = compiler_next_instr(c, c->u->u_curblock);
@@ -1178,7 +1180,6 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
    i = &c->u->u_curblock->b_instr[off];
    i->i_opcode = opcode;
    i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
-    i->i_hasarg = 1;
    compiler_set_lineno(c, off);
    return 1;
 }
@@ -1189,6 +1190,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
    struct instr *i;
    int off;

+    assert(HAS_ARG(opcode));
    assert(b != NULL);
    off = compiler_next_instr(c, c->u->u_curblock);
    if (off < 0)
@@ -1196,7 +1198,6 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
    i = &c->u->u_curblock->b_instr[off];
    i->i_opcode = opcode;
    i->i_target = b;
-    i->i_hasarg = 1;
    if (absolute)
        i->i_jabs = 1;
    else
@@ -4397,18 +4398,6 @@ assemble_free(struct assembler *a)
        PyObject_Free(a->a_postorder);
 }

-/* Return the size of a basic block in bytes. */
-
-static int
-instrsize(struct instr *instr)
-{
-    if (!instr->i_hasarg)
-        return 1;               /* 1 byte for the opcode*/
-    if (instr->i_oparg > 0xffff)
-        return 6;               /* 1 (opcode) + 1 (EXTENDED_ARG opcode) + 2 (oparg) + 2(oparg extended) */
-    return 3;                   /* 1 (opcode) + 2 (oparg) */
-}
-
 static int
 blocksize(basicblock *b)
 {
@@ -4416,7 +4405,7 @@ blocksize(basicblock *b)
    int size = 0;

    for (i = 0; i < b->b_iused; i++)
-        size += instrsize(&b->b_instr[i]);
+        size += instrsize(b->b_instr[i].i_oparg);
    return size;
 }

@@ -4536,15 +4525,12 @@ assemble_lnotab(struct assembler *a, struct instr *i)
 static int
 assemble_emit(struct assembler *a, struct instr *i)
 {
-    int size, arg = 0, ext = 0;
+    int size, arg = 0;
    Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
    char *code;

-    size = instrsize(i);
-    if (i->i_hasarg) {
-        arg = i->i_oparg;
-        ext = arg >> 16;
-    }
+    arg = i->i_oparg;
+    size = instrsize(arg);
    if (i->i_lineno && !assemble_lnotab(a, i))
        return 0;
    if (a->a_offset + size >= len) {
@@ -4555,19 +4541,7 @@ assemble_emit(struct assembler *a, struct instr *i)
    }
    code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
    a->a_offset += size;
-    if (size == 6) {
-        assert(i->i_hasarg);
-        *code++ = (char)EXTENDED_ARG;
-        *code++ = ext & 0xff;
-        *code++ = ext >> 8;
-        arg &= 0xffff;
-    }
-    *code++ = i->i_opcode;
-    if (i->i_hasarg) {
-        assert(size == 3 || size == 6);
-        *code++ = arg & 0xff;
-        *code++ = arg >> 8;
-    }
+    write_op_arg((unsigned char*)code, i->i_opcode, arg, size);
    return 1;
 }

@@ -4575,7 +4549,7 @@ static void
 assemble_jump_offsets(struct assembler *a, struct compiler *c)
 {
    basicblock *b;
-    int bsize, totsize, extended_arg_count = 0, last_extended_arg_count;
+    int bsize, totsize, extended_arg_recompile;
    int i;

    /* Compute the size of each block and fixup jump args.
@@ -4588,27 +4562,26 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
            b->b_offset = totsize;
            totsize += bsize;
        }
-        last_extended_arg_count = extended_arg_count;
-        extended_arg_count = 0;
+        extended_arg_recompile = 0;
        for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
            bsize = b->b_offset;
            for (i = 0; i < b->b_iused; i++) {
                struct instr *instr = &b->b_instr[i];
+                int isize = instrsize(instr->i_oparg);
                /* Relative jumps are computed relative to
                   the instruction pointer after fetching
                   the jump instruction.
                */
-                bsize += instrsize(instr);
-                if (instr->i_jabs)
+                bsize += isize;
+                if (instr->i_jabs || instr->i_jrel) {
                    instr->i_oparg = instr->i_target->b_offset;
-                else if (instr->i_jrel) {
-                    int delta = instr->i_target->b_offset - bsize;
-                    instr->i_oparg = delta;
+                    if (instr->i_jrel) {
+                        instr->i_oparg -= bsize;
+                    }
+                    if (instrsize(instr->i_oparg) != isize) {
+                        extended_arg_recompile = 1;
+                    }
                }
-                else
-                    continue;
-                if (instr->i_oparg > 0xffff)
-                    extended_arg_count++;
            }
        }

@@ -4618,7 +4591,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)

        The issue is that in the first loop blocksize() is called
        which calls instrsize() which requires i_oparg be set
-        appropriately.          There is a bootstrap problem because
+        appropriately. There is a bootstrap problem because
        i_oparg is calculated in the second loop above.

        So we loop until we stop seeing new EXTENDED_ARGs.
@@ -4626,7 +4599,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)
        ones in jump instructions.  So this should converge
        fairly quickly.
    */
-    } while (last_extended_arg_count != extended_arg_count);
+    } while (extended_arg_recompile);
 }

 static PyObject *
@@ -4772,9 +4745,9 @@ dump_instr(const struct instr *i)
    char arg[128];

    *arg = '\0';
-    if (i->i_hasarg)
+    if (HAS_ARG(i->i_opcode)) {
        sprintf(arg, "arg: %d ", i->i_oparg);
-
+    }
    fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
                    i->i_lineno, i->i_opcode, arg, jabs, jrel);
 }

--- a/Python/frozen.c
+++ b/Python/frozen.c
@@ -14,17 +14,15 @@
   the appropriate bytes from M___main__.c. */

 static unsigned char M___hello__[] = {
-    99,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
-    0,64,0,0,0,115,20,0,0,0,100,2,0,90,1,0,
-    101,2,0,100,0,0,131,1,0,1,100,1,0,83,40,3,
-    0,0,0,117,12,0,0,0,72,101,108,108,111,32,119,111,
-    114,108,100,33,78,84,40,3,0,0,0,117,4,0,0,0,
-    84,114,117,101,117,11,0,0,0,105,110,105,116,105,97,108,
-    105,122,101,100,117,5,0,0,0,112,114,105,110,116,40,0,
-    0,0,0,40,0,0,0,0,40,0,0,0,0,117,7,0,
-    0,0,102,108,97,103,46,112,121,117,8,0,0,0,60,109,
-    111,100,117,108,101,62,1,0,0,0,115,2,0,0,0,6,
-    1,
+    227,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
+    0,64,0,0,0,115,16,0,0,0,100,0,90,0,101,1,
+    100,1,131,1,1,0,100,2,83,0,41,3,84,122,12,72,
+    101,108,108,111,32,119,111,114,108,100,33,78,41,2,218,11,
+    105,110,105,116,105,97,108,105,122,101,100,218,5,112,114,105,
+    110,116,169,0,114,3,0,0,0,114,3,0,0,0,250,22,
+    46,47,84,111,111,108,115,47,102,114,101,101,122,101,47,102,
+    108,97,103,46,112,121,218,8,60,109,111,100,117,108,101,62,
+    1,0,0,0,115,2,0,0,0,4,1,
 };

 #define SIZE (int)sizeof(M___hello__)

--- a/Python/importlib.h
+++ b/Python/importlib.h
--- a/Python/importlib_external.h
+++ b/Python/importlib_external.h
--- a/Python/peephole.c
+++ b/Python/peephole.c
--- a/Python/wordcode_helpers.h
+++ b/Python/wordcode_helpers.h
+/* This file contains code shared by the compiler and the peephole
+   optimizer.
+ */
+
+/* Minimum number of bytes necessary to encode instruction with EXTENDED_ARGs */
+static int
+instrsize(unsigned int oparg)
+{
+    return oparg <= 0xff ? 2 :
+        oparg <= 0xffff ? 4 :
+        oparg <= 0xffffff ? 6 :
+        8;
+}
+
+/* Spits out op/oparg pair using ilen bytes. codestr should be pointed at the
+   desired location of the first EXTENDED_ARG */
+static void
+write_op_arg(unsigned char *codestr, unsigned char opcode,
+    unsigned int oparg, int ilen)
+{
+    switch (ilen) {
+        case 8:
+            *codestr++ = EXTENDED_ARG;
+            *codestr++ = (oparg >> 24) & 0xff;
+        case 6:
+            *codestr++ = EXTENDED_ARG;
+            *codestr++ = (oparg >> 16) & 0xff;
+        case 4:
+            *codestr++ = EXTENDED_ARG;
+            *codestr++ = (oparg >> 8) & 0xff;
+        case 2:
+            *codestr++ = opcode;
+            *codestr++ = oparg & 0xff;
+            break;
+        default:
+            assert(0);
+    }
+}