Commit a78c7954 authored by Eric V. Smith's avatar Eric V. Smith

Issue 25483: Add an opcode to make f-string formatting more robust.

parent 2753a096
......@@ -206,6 +206,14 @@ PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
#endif
/* Masks and values used by FORMAT_VALUE opcode. */
#define FVC_MASK 0x3
#define FVC_NONE 0x0
#define FVC_STR 0x1
#define FVC_REPR 0x2
#define FVC_ASCII 0x3
#define FVS_MASK 0x4
#define FVS_HAVE_SPEC 0x4
#ifdef __cplusplus
}
......
......@@ -122,6 +122,7 @@ extern "C" {
#define BUILD_TUPLE_UNPACK 152
#define BUILD_SET_UNPACK 153
#define SETUP_ASYNC_WITH 154
#define FORMAT_VALUE 155
/* EXCEPT_HANDLER is a special, implicit block type which is created when
entering an except handler. It is not an opcode but we define it here
......
......@@ -223,12 +223,13 @@ _code_type = type(_write_atomic.__code__)
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483)
#
# MAGIC must change whenever the bytecode emitted by the compiler may no
# longer be understood by older implementations of the eval loop (usually
# due to the addition of new opcodes).
MAGIC_NUMBER = (3350).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3360).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
_PYCACHE = '__pycache__'
......
......@@ -214,4 +214,6 @@ def_op('BUILD_MAP_UNPACK_WITH_CALL', 151)
def_op('BUILD_TUPLE_UNPACK', 152)
def_op('BUILD_SET_UNPACK', 153)
def_op('FORMAT_VALUE', 155)
del def_op, name_op, jrel_op, jabs_op
......@@ -3363,6 +3363,63 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
DISPATCH();
}
TARGET(FORMAT_VALUE) {
/* Handles f-string value formatting. */
PyObject *result;
PyObject *fmt_spec;
PyObject *value;
PyObject *(*conv_fn)(PyObject *);
int which_conversion = oparg & FVC_MASK;
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
fmt_spec = have_fmt_spec ? POP() : NULL;
value = TOP();
/* See if any conversion is specified. */
switch (which_conversion) {
case FVC_STR: conv_fn = PyObject_Str; break;
case FVC_REPR: conv_fn = PyObject_Repr; break;
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
/* Must be 0 (meaning no conversion), since only four
values are allowed by (oparg & FVC_MASK). */
default: conv_fn = NULL; break;
}
/* If there's a conversion function, call it and replace
value with that result. Otherwise, just use value,
without conversion. */
if (conv_fn) {
result = conv_fn(value);
Py_DECREF(value);
if (!result) {
Py_XDECREF(fmt_spec);
goto error;
}
value = result;
}
/* If value is a unicode object, and there's no fmt_spec,
then we know the result of format(value) is value
itself. In that case, skip calling format(). I plan to
move this optimization in to PyObject_Format()
itself. */
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
/* Do nothing, just transfer ownership to result. */
result = value;
} else {
/* Actually call format(). */
result = PyObject_Format(value, fmt_spec);
Py_DECREF(value);
Py_XDECREF(fmt_spec);
if (!result)
goto error;
}
SET_TOP(result);
DISPATCH();
}
TARGET(EXTENDED_ARG) {
opcode = NEXTOP();
oparg = oparg<<16 | NEXTARG();
......
......@@ -1067,6 +1067,10 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg)
return 1;
case GET_YIELD_FROM_ITER:
return 0;
case FORMAT_VALUE:
/* If there's a fmt_spec on the stack, we go from 2->1,
else 1->1. */
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
default:
return PY_INVALID_STACK_EFFECT;
}
......@@ -3241,83 +3245,47 @@ compiler_joined_str(struct compiler *c, expr_ty e)
return 1;
}
/* Note that this code uses the builtin functions format(), str(),
repr(), and ascii(). You can break this code, or make it do odd
things, by redefining those functions. */
/* Used to implement f-strings. Format a single value. */
static int
compiler_formatted_value(struct compiler *c, expr_ty e)
{
PyObject *conversion_name = NULL;
static PyObject *format_string;
static PyObject *str_string;
static PyObject *repr_string;
static PyObject *ascii_string;
if (!format_string) {
format_string = PyUnicode_InternFromString("format");
if (!format_string)
return 0;
}
if (!str_string) {
str_string = PyUnicode_InternFromString("str");
if (!str_string)
return 0;
}
if (!repr_string) {
repr_string = PyUnicode_InternFromString("repr");
if (!repr_string)
return 0;
}
if (!ascii_string) {
ascii_string = PyUnicode_InternFromString("ascii");
if (!ascii_string)
return 0;
}
/* Our oparg encodes 2 pieces of information: the conversion
character, and whether or not a format_spec was provided.
Convert the conversion char to 2 bits:
None: 000 0x0 FVC_NONE
!s : 001 0x1 FVC_STR
!r : 010 0x2 FVC_REPR
!a : 011 0x3 FVC_ASCII
next bit is whether or not we have a format spec:
yes : 100 0x4
no : 000 0x0
*/
ADDOP_NAME(c, LOAD_GLOBAL, format_string, names);
int oparg;
/* If needed, convert via str, repr, or ascii. */
if (e->v.FormattedValue.conversion != -1) {
switch (e->v.FormattedValue.conversion) {
case 's':
conversion_name = str_string;
break;
case 'r':
conversion_name = repr_string;
break;
case 'a':
conversion_name = ascii_string;
break;
default:
PyErr_SetString(PyExc_SystemError,
"Unrecognized conversion character");
return 0;
}
ADDOP_NAME(c, LOAD_GLOBAL, conversion_name, names);
}
/* Evaluate the value. */
/* Evaluate the expression to be formatted. */
VISIT(c, expr, e->v.FormattedValue.value);
/* If needed, convert via str, repr, or ascii. */
if (conversion_name) {
/* Call the function we previously pushed. */
ADDOP_I(c, CALL_FUNCTION, 1);
switch (e->v.FormattedValue.conversion) {
case 's': oparg = FVC_STR; break;
case 'r': oparg = FVC_REPR; break;
case 'a': oparg = FVC_ASCII; break;
case -1: oparg = FVC_NONE; break;
default:
PyErr_SetString(PyExc_SystemError,
"Unrecognized conversion character");
return 0;
}
/* If we have a format spec, use format(value, format_spec). Otherwise,
use the single argument form. */
if (e->v.FormattedValue.format_spec) {
/* Evaluate the format spec, and update our opcode arg. */
VISIT(c, expr, e->v.FormattedValue.format_spec);
ADDOP_I(c, CALL_FUNCTION, 2);
} else {
/* No format spec specified, call format(value). */
ADDOP_I(c, CALL_FUNCTION, 1);
oparg |= FVS_HAVE_SPEC;
}
/* And push our opcode and oparg */
ADDOP_I(c, FORMAT_VALUE, oparg);
return 1;
}
......
This diff is collapsed.
......@@ -154,7 +154,7 @@ static void *opcode_targets[256] = {
&&TARGET_BUILD_TUPLE_UNPACK,
&&TARGET_BUILD_SET_UNPACK,
&&TARGET_SETUP_ASYNC_WITH,
&&_unknown_opcode,
&&TARGET_FORMAT_VALUE,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment