Commit a78c7954 authored by Eric V. Smith's avatar Eric V. Smith

Issue 25483: Add an opcode to make f-string formatting more robust.

parent 2753a096
...@@ -206,6 +206,14 @@ PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); ...@@ -206,6 +206,14 @@ PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void); PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
#endif #endif
/* Masks and values used by FORMAT_VALUE opcode. */
#define FVC_MASK 0x3
#define FVC_NONE 0x0
#define FVC_STR 0x1
#define FVC_REPR 0x2
#define FVC_ASCII 0x3
#define FVS_MASK 0x4
#define FVS_HAVE_SPEC 0x4
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -122,6 +122,7 @@ extern "C" { ...@@ -122,6 +122,7 @@ extern "C" {
#define BUILD_TUPLE_UNPACK 152 #define BUILD_TUPLE_UNPACK 152
#define BUILD_SET_UNPACK 153 #define BUILD_SET_UNPACK 153
#define SETUP_ASYNC_WITH 154 #define SETUP_ASYNC_WITH 154
#define FORMAT_VALUE 155
/* EXCEPT_HANDLER is a special, implicit block type which is created when /* EXCEPT_HANDLER is a special, implicit block type which is created when
entering an except handler. It is not an opcode but we define it here entering an except handler. It is not an opcode but we define it here
......
...@@ -223,12 +223,13 @@ _code_type = type(_write_atomic.__code__) ...@@ -223,12 +223,13 @@ _code_type = type(_write_atomic.__code__)
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations) # Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205) # Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400) # Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483)
# #
# MAGIC must change whenever the bytecode emitted by the compiler may no # MAGIC must change whenever the bytecode emitted by the compiler may no
# longer be understood by older implementations of the eval loop (usually # longer be understood by older implementations of the eval loop (usually
# due to the addition of new opcodes). # due to the addition of new opcodes).
MAGIC_NUMBER = (3350).to_bytes(2, 'little') + b'\r\n' MAGIC_NUMBER = (3360).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
_PYCACHE = '__pycache__' _PYCACHE = '__pycache__'
......
...@@ -214,4 +214,6 @@ def_op('BUILD_MAP_UNPACK_WITH_CALL', 151) ...@@ -214,4 +214,6 @@ def_op('BUILD_MAP_UNPACK_WITH_CALL', 151)
def_op('BUILD_TUPLE_UNPACK', 152) def_op('BUILD_TUPLE_UNPACK', 152)
def_op('BUILD_SET_UNPACK', 153) def_op('BUILD_SET_UNPACK', 153)
def_op('FORMAT_VALUE', 155)
del def_op, name_op, jrel_op, jabs_op del def_op, name_op, jrel_op, jabs_op
...@@ -3363,6 +3363,63 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) ...@@ -3363,6 +3363,63 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
DISPATCH(); DISPATCH();
} }
TARGET(FORMAT_VALUE) {
/* Handles f-string value formatting. */
PyObject *result;
PyObject *fmt_spec;
PyObject *value;
PyObject *(*conv_fn)(PyObject *);
int which_conversion = oparg & FVC_MASK;
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
fmt_spec = have_fmt_spec ? POP() : NULL;
value = TOP();
/* See if any conversion is specified. */
switch (which_conversion) {
case FVC_STR: conv_fn = PyObject_Str; break;
case FVC_REPR: conv_fn = PyObject_Repr; break;
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
/* Must be 0 (meaning no conversion), since only four
values are allowed by (oparg & FVC_MASK). */
default: conv_fn = NULL; break;
}
/* If there's a conversion function, call it and replace
value with that result. Otherwise, just use value,
without conversion. */
if (conv_fn) {
result = conv_fn(value);
Py_DECREF(value);
if (!result) {
Py_XDECREF(fmt_spec);
goto error;
}
value = result;
}
/* If value is a unicode object, and there's no fmt_spec,
then we know the result of format(value) is value
itself. In that case, skip calling format(). I plan to
move this optimization in to PyObject_Format()
itself. */
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
/* Do nothing, just transfer ownership to result. */
result = value;
} else {
/* Actually call format(). */
result = PyObject_Format(value, fmt_spec);
Py_DECREF(value);
Py_XDECREF(fmt_spec);
if (!result)
goto error;
}
SET_TOP(result);
DISPATCH();
}
TARGET(EXTENDED_ARG) { TARGET(EXTENDED_ARG) {
opcode = NEXTOP(); opcode = NEXTOP();
oparg = oparg<<16 | NEXTARG(); oparg = oparg<<16 | NEXTARG();
......
...@@ -1067,6 +1067,10 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg) ...@@ -1067,6 +1067,10 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg)
return 1; return 1;
case GET_YIELD_FROM_ITER: case GET_YIELD_FROM_ITER:
return 0; return 0;
case FORMAT_VALUE:
/* If there's a fmt_spec on the stack, we go from 2->1,
else 1->1. */
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
default: default:
return PY_INVALID_STACK_EFFECT; return PY_INVALID_STACK_EFFECT;
} }
...@@ -3241,83 +3245,47 @@ compiler_joined_str(struct compiler *c, expr_ty e) ...@@ -3241,83 +3245,47 @@ compiler_joined_str(struct compiler *c, expr_ty e)
return 1; return 1;
} }
/* Note that this code uses the builtin functions format(), str(), /* Used to implement f-strings. Format a single value. */
repr(), and ascii(). You can break this code, or make it do odd
things, by redefining those functions. */
static int static int
compiler_formatted_value(struct compiler *c, expr_ty e) compiler_formatted_value(struct compiler *c, expr_ty e)
{ {
PyObject *conversion_name = NULL; /* Our oparg encodes 2 pieces of information: the conversion
character, and whether or not a format_spec was provided.
static PyObject *format_string;
static PyObject *str_string; Convert the conversion char to 2 bits:
static PyObject *repr_string; None: 000 0x0 FVC_NONE
static PyObject *ascii_string; !s : 001 0x1 FVC_STR
!r : 010 0x2 FVC_REPR
if (!format_string) { !a : 011 0x3 FVC_ASCII
format_string = PyUnicode_InternFromString("format");
if (!format_string) next bit is whether or not we have a format spec:
return 0; yes : 100 0x4
} no : 000 0x0
*/
if (!str_string) {
str_string = PyUnicode_InternFromString("str");
if (!str_string)
return 0;
}
if (!repr_string) {
repr_string = PyUnicode_InternFromString("repr");
if (!repr_string)
return 0;
}
if (!ascii_string) {
ascii_string = PyUnicode_InternFromString("ascii");
if (!ascii_string)
return 0;
}
ADDOP_NAME(c, LOAD_GLOBAL, format_string, names); int oparg;
/* If needed, convert via str, repr, or ascii. */ /* Evaluate the expression to be formatted. */
if (e->v.FormattedValue.conversion != -1) {
switch (e->v.FormattedValue.conversion) {
case 's':
conversion_name = str_string;
break;
case 'r':
conversion_name = repr_string;
break;
case 'a':
conversion_name = ascii_string;
break;
default:
PyErr_SetString(PyExc_SystemError,
"Unrecognized conversion character");
return 0;
}
ADDOP_NAME(c, LOAD_GLOBAL, conversion_name, names);
}
/* Evaluate the value. */
VISIT(c, expr, e->v.FormattedValue.value); VISIT(c, expr, e->v.FormattedValue.value);
/* If needed, convert via str, repr, or ascii. */ switch (e->v.FormattedValue.conversion) {
if (conversion_name) { case 's': oparg = FVC_STR; break;
/* Call the function we previously pushed. */ case 'r': oparg = FVC_REPR; break;
ADDOP_I(c, CALL_FUNCTION, 1); case 'a': oparg = FVC_ASCII; break;
case -1: oparg = FVC_NONE; break;
default:
PyErr_SetString(PyExc_SystemError,
"Unrecognized conversion character");
return 0;
} }
/* If we have a format spec, use format(value, format_spec). Otherwise,
use the single argument form. */
if (e->v.FormattedValue.format_spec) { if (e->v.FormattedValue.format_spec) {
/* Evaluate the format spec, and update our opcode arg. */
VISIT(c, expr, e->v.FormattedValue.format_spec); VISIT(c, expr, e->v.FormattedValue.format_spec);
ADDOP_I(c, CALL_FUNCTION, 2); oparg |= FVS_HAVE_SPEC;
} else {
/* No format spec specified, call format(value). */
ADDOP_I(c, CALL_FUNCTION, 1);
} }
/* And push our opcode and oparg */
ADDOP_I(c, FORMAT_VALUE, oparg);
return 1; return 1;
} }
......
This diff is collapsed.
...@@ -154,7 +154,7 @@ static void *opcode_targets[256] = { ...@@ -154,7 +154,7 @@ static void *opcode_targets[256] = {
&&TARGET_BUILD_TUPLE_UNPACK, &&TARGET_BUILD_TUPLE_UNPACK,
&&TARGET_BUILD_SET_UNPACK, &&TARGET_BUILD_SET_UNPACK,
&&TARGET_SETUP_ASYNC_WITH, &&TARGET_SETUP_ASYNC_WITH,
&&_unknown_opcode, &&TARGET_FORMAT_VALUE,
&&_unknown_opcode, &&_unknown_opcode,
&&_unknown_opcode, &&_unknown_opcode,
&&_unknown_opcode, &&_unknown_opcode,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment