Commit 54f5611f authored by Eric V. Smith's avatar Eric V. Smith

Issue 27948: Allow backslashes in the literal string portion of f-strings, but...

Issue 27948: Allow backslashes in the literal string portion of f-strings, but not in the expressions. Also, require expressions to begin and end with literal curly braces.
parent 29ea6a90
...@@ -1060,7 +1060,7 @@ class HTTPConnection: ...@@ -1060,7 +1060,7 @@ class HTTPConnection:
if encode_chunked and self._http_vsn == 11: if encode_chunked and self._http_vsn == 11:
# chunked encoding # chunked encoding
chunk = f'{len(chunk):X}''\r\n'.encode('ascii') + chunk \ chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
+ b'\r\n' + b'\r\n'
self.send(chunk) self.send(chunk)
......
...@@ -280,6 +280,6 @@ class saved_test_environment: ...@@ -280,6 +280,6 @@ class saved_test_environment:
print(f"Warning -- {name} was modified by {self.testname}", print(f"Warning -- {name} was modified by {self.testname}",
file=sys.stderr, flush=True) file=sys.stderr, flush=True)
if self.verbose > 1: if self.verbose > 1:
print(f" Before: {original}""\n"f" After: {current} ", print(f" Before: {original}\n After: {current} ",
file=sys.stderr, flush=True) file=sys.stderr, flush=True)
return False return False
...@@ -735,11 +735,11 @@ class FaultHandlerTests(unittest.TestCase): ...@@ -735,11 +735,11 @@ class FaultHandlerTests(unittest.TestCase):
('EXCEPTION_INT_DIVIDE_BY_ZERO', 'int divide by zero'), ('EXCEPTION_INT_DIVIDE_BY_ZERO', 'int divide by zero'),
('EXCEPTION_STACK_OVERFLOW', 'stack overflow'), ('EXCEPTION_STACK_OVERFLOW', 'stack overflow'),
): ):
self.check_windows_exception(""" self.check_windows_exception(f"""
import faulthandler import faulthandler
faulthandler.enable() faulthandler.enable()
faulthandler._raise_exception(faulthandler._{exc}) faulthandler._raise_exception(faulthandler._{exc})
""".format(exc=exc), """,
3, 3,
name) name)
......
...@@ -119,6 +119,14 @@ f'{a * x()}'""" ...@@ -119,6 +119,14 @@ f'{a * x()}'"""
self.assertEqual(f'a}}', 'a}') self.assertEqual(f'a}}', 'a}')
self.assertEqual(f'}}b', '}b') self.assertEqual(f'}}b', '}b')
self.assertEqual(f'a}}b', 'a}b') self.assertEqual(f'a}}b', 'a}b')
self.assertEqual(f'{{}}', '{}')
self.assertEqual(f'a{{}}', 'a{}')
self.assertEqual(f'{{b}}', '{b}')
self.assertEqual(f'{{}}c', '{}c')
self.assertEqual(f'a{{b}}', 'a{b}')
self.assertEqual(f'a{{}}c', 'a{}c')
self.assertEqual(f'{{b}}c', '{b}c')
self.assertEqual(f'a{{b}}c', 'a{b}c')
self.assertEqual(f'{{{10}', '{10') self.assertEqual(f'{{{10}', '{10')
self.assertEqual(f'}}{10}', '}10') self.assertEqual(f'}}{10}', '}10')
...@@ -302,56 +310,79 @@ f'{a * x()}'""" ...@@ -302,56 +310,79 @@ f'{a * x()}'"""
["f'{\n}'", ["f'{\n}'",
]) ])
def test_no_backslashes(self): def test_backslashes_in_string_part(self):
# See issue 27921 self.assertEqual(f'\t', '\t')
self.assertEqual(r'\t', '\\t')
# These should work, but currently don't self.assertEqual(rf'\t', '\\t')
self.assertAllRaise(SyntaxError, 'backslashes not allowed', self.assertEqual(f'{2}\t', '2\t')
[r"f'\t'", self.assertEqual(f'{2}\t{3}', '2\t3')
r"f'{2}\t'", self.assertEqual(f'\t{3}', '\t3')
r"f'{2}\t{3}'",
r"f'\t{3}'", self.assertEqual(f'\u0394', '\u0394')
self.assertEqual(r'\u0394', '\\u0394')
r"f'\N{GREEK CAPITAL LETTER DELTA}'", self.assertEqual(rf'\u0394', '\\u0394')
r"f'{2}\N{GREEK CAPITAL LETTER DELTA}'", self.assertEqual(f'{2}\u0394', '2\u0394')
r"f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}'", self.assertEqual(f'{2}\u0394{3}', '2\u03943')
r"f'\N{GREEK CAPITAL LETTER DELTA}{3}'", self.assertEqual(f'\u0394{3}', '\u03943')
r"f'\u0394'", self.assertEqual(f'\U00000394', '\u0394')
r"f'{2}\u0394'", self.assertEqual(r'\U00000394', '\\U00000394')
r"f'{2}\u0394{3}'", self.assertEqual(rf'\U00000394', '\\U00000394')
r"f'\u0394{3}'", self.assertEqual(f'{2}\U00000394', '2\u0394')
self.assertEqual(f'{2}\U00000394{3}', '2\u03943')
r"f'\U00000394'", self.assertEqual(f'\U00000394{3}', '\u03943')
r"f'{2}\U00000394'",
r"f'{2}\U00000394{3}'", self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394')
r"f'\U00000394{3}'", self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943')
r"f'\x20'", self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943')
r"f'{2}\x20'", self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
r"f'{2}\x20{3}'", self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943')
r"f'\x20{3}'", self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943')
r"f'2\x20'", self.assertEqual(f'\x20', ' ')
r"f'2\x203'", self.assertEqual(r'\x20', '\\x20')
r"f'2\x203'", self.assertEqual(rf'\x20', '\\x20')
self.assertEqual(f'{2}\x20', '2 ')
self.assertEqual(f'{2}\x20{3}', '2 3')
self.assertEqual(f'\x20{3}', ' 3')
self.assertEqual(f'2\x20', '2 ')
self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3')
def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed
# differently inside f-strings.
self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
[r"f'\N'",
r"f'\N{'",
r"f'\N{GREEK CAPITAL LETTER DELTA'",
# Here are the non-f-string versions,
# which should give the same errors.
r"'\N'",
r"'\N{'",
r"'\N{GREEK CAPITAL LETTER DELTA'",
]) ])
# And these don't work now, and shouldn't work in the future. def test_no_backslashes_in_expression_part(self):
self.assertAllRaise(SyntaxError, 'backslashes not allowed', self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
[r"f'{\'a\'}'", [r"f'{\'a\'}'",
r"f'{\t3}'", r"f'{\t3}'",
r"f'{\}'",
r"rf'{\'a\'}'",
r"rf'{\t3}'",
r"rf'{\}'",
r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
]) ])
# add this when backslashes are allowed again. see issue 27921 def test_no_escapes_for_braces(self):
# these test will be needed because unicode names will be parsed # \x7b is '{'. Make sure it doesn't start an expression.
# differently once backslashes are allowed inside expressions self.assertEqual(f'\x7b2}}', '{2}')
## def test_misformed_unicode_character_name(self): self.assertEqual(f'\x7b2', '{2')
## self.assertAllRaise(SyntaxError, 'xx', self.assertEqual(f'\u007b2', '{2')
## [r"f'\N'", self.assertEqual(f'\N{LEFT CURLY BRACKET}2\N{RIGHT CURLY BRACKET}', '{2}')
## [r"f'\N{'",
## [r"f'\N{GREEK CAPITAL LETTER DELTA'",
## ])
def test_newlines_in_expressions(self): def test_newlines_in_expressions(self):
self.assertEqual(f'{0}', '0') self.assertEqual(f'{0}', '0')
...@@ -509,6 +540,14 @@ f'{a * x()}'""" ...@@ -509,6 +540,14 @@ f'{a * x()}'"""
"ruf''", "ruf''",
"FUR''", "FUR''",
"Fur''", "Fur''",
"fb''",
"fB''",
"Fb''",
"FB''",
"bf''",
"bF''",
"Bf''",
"BF''",
]) ])
def test_leading_trailing_spaces(self): def test_leading_trailing_spaces(self):
...@@ -551,8 +590,8 @@ f'{a * x()}'""" ...@@ -551,8 +590,8 @@ f'{a * x()}'"""
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character', self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
["f'{3!g}'", ["f'{3!g}'",
"f'{3!A}'", "f'{3!A}'",
"f'{3!A}'", "f'{3!3}'",
"f'{3!A}'", "f'{3!G}'",
"f'{3!!}'", "f'{3!!}'",
"f'{3!:}'", "f'{3!:}'",
"f'{3! s}'", # no space before conversion char "f'{3! s}'", # no space before conversion char
...@@ -601,6 +640,7 @@ f'{a * x()}'""" ...@@ -601,6 +640,7 @@ f'{a * x()}'"""
"f'{3!s:3'", "f'{3!s:3'",
"f'x{'", "f'x{'",
"f'x{x'", "f'x{x'",
"f'{x'",
"f'{3:s'", "f'{3:s'",
"f'{{{'", "f'{{{'",
"f'{{}}{'", "f'{{}}{'",
......
...@@ -285,12 +285,12 @@ class DirectoryTestCase(ASTTestCase): ...@@ -285,12 +285,12 @@ class DirectoryTestCase(ASTTestCase):
if test.support.verbose: if test.support.verbose:
print('Testing %s' % filename) print('Testing %s' % filename)
# it's very much a hack that I'm skipping these files, but # Some f-strings are not correctly round-tripped by
# I can't figure out why they fail. I'll fix it when I # Tools/parser/unparse.py. See issue 28002 for details.
# address issue #27948. # We need to skip files that contain such f-strings.
if os.path.basename(filename) in ('test_fstring.py', 'test_traceback.py'): if os.path.basename(filename) in ('test_fstring.py', ):
if test.support.verbose: if test.support.verbose:
print(f'Skipping {filename}: see issue 27921') print(f'Skipping {filename}: see issue 28002')
continue continue
with self.subTest(filename=filename): with self.subTest(filename=filename):
......
...@@ -326,13 +326,13 @@ class TracebackFormatTests(unittest.TestCase): ...@@ -326,13 +326,13 @@ class TracebackFormatTests(unittest.TestCase):
lineno_f = f.__code__.co_firstlineno lineno_f = f.__code__.co_firstlineno
result_f = ( result_f = (
'Traceback (most recent call last):\n' 'Traceback (most recent call last):\n'
f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display''\n' f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n'
' f()\n' ' f()\n'
f' File "{__file__}", line {lineno_f+1}, in f''\n' f' File "{__file__}", line {lineno_f+1}, in f\n'
' f()\n' ' f()\n'
f' File "{__file__}", line {lineno_f+1}, in f''\n' f' File "{__file__}", line {lineno_f+1}, in f\n'
' f()\n' ' f()\n'
f' File "{__file__}", line {lineno_f+1}, in f''\n' f' File "{__file__}", line {lineno_f+1}, in f\n'
' f()\n' ' f()\n'
# XXX: The following line changes depending on whether the tests # XXX: The following line changes depending on whether the tests
# are run through the interactive interpreter or with -m # are run through the interactive interpreter or with -m
...@@ -371,20 +371,20 @@ class TracebackFormatTests(unittest.TestCase): ...@@ -371,20 +371,20 @@ class TracebackFormatTests(unittest.TestCase):
lineno_g = g.__code__.co_firstlineno lineno_g = g.__code__.co_firstlineno
result_g = ( result_g = (
f' File "{__file__}", line {lineno_g+2}, in g''\n' f' File "{__file__}", line {lineno_g+2}, in g\n'
' return g(count-1)\n' ' return g(count-1)\n'
f' File "{__file__}", line {lineno_g+2}, in g''\n' f' File "{__file__}", line {lineno_g+2}, in g\n'
' return g(count-1)\n' ' return g(count-1)\n'
f' File "{__file__}", line {lineno_g+2}, in g''\n' f' File "{__file__}", line {lineno_g+2}, in g\n'
' return g(count-1)\n' ' return g(count-1)\n'
' [Previous line repeated 6 more times]\n' ' [Previous line repeated 6 more times]\n'
f' File "{__file__}", line {lineno_g+3}, in g''\n' f' File "{__file__}", line {lineno_g+3}, in g\n'
' raise ValueError\n' ' raise ValueError\n'
'ValueError\n' 'ValueError\n'
) )
tb_line = ( tb_line = (
'Traceback (most recent call last):\n' 'Traceback (most recent call last):\n'
f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display''\n' f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n'
' g()\n' ' g()\n'
) )
expected = (tb_line + result_g).splitlines() expected = (tb_line + result_g).splitlines()
...@@ -408,16 +408,16 @@ class TracebackFormatTests(unittest.TestCase): ...@@ -408,16 +408,16 @@ class TracebackFormatTests(unittest.TestCase):
lineno_h = h.__code__.co_firstlineno lineno_h = h.__code__.co_firstlineno
result_h = ( result_h = (
'Traceback (most recent call last):\n' 'Traceback (most recent call last):\n'
f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display''\n' f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n'
' h()\n' ' h()\n'
f' File "{__file__}", line {lineno_h+2}, in h''\n' f' File "{__file__}", line {lineno_h+2}, in h\n'
' return h(count-1)\n' ' return h(count-1)\n'
f' File "{__file__}", line {lineno_h+2}, in h''\n' f' File "{__file__}", line {lineno_h+2}, in h\n'
' return h(count-1)\n' ' return h(count-1)\n'
f' File "{__file__}", line {lineno_h+2}, in h''\n' f' File "{__file__}", line {lineno_h+2}, in h\n'
' return h(count-1)\n' ' return h(count-1)\n'
' [Previous line repeated 6 more times]\n' ' [Previous line repeated 6 more times]\n'
f' File "{__file__}", line {lineno_h+3}, in h''\n' f' File "{__file__}", line {lineno_h+3}, in h\n'
' g()\n' ' g()\n'
) )
expected = (result_h + result_g).splitlines() expected = (result_h + result_g).splitlines()
......
...@@ -402,7 +402,7 @@ class StackSummary(list): ...@@ -402,7 +402,7 @@ class StackSummary(list):
count += 1 count += 1
else: else:
if count > 3: if count > 3:
result.append(f' [Previous line repeated {count-3} more times]'+'\n') result.append(f' [Previous line repeated {count-3} more times]\n')
last_file = frame.filename last_file = frame.filename
last_line = frame.lineno last_line = frame.lineno
last_name = frame.name last_name = frame.name
...@@ -419,7 +419,7 @@ class StackSummary(list): ...@@ -419,7 +419,7 @@ class StackSummary(list):
row.append(' {name} = {value}\n'.format(name=name, value=value)) row.append(' {name} = {value}\n'.format(name=name, value=value))
result.append(''.join(row)) result.append(''.join(row))
if count > 3: if count > 3:
result.append(f' [Previous line repeated {count-3} more times]'+'\n') result.append(f' [Previous line repeated {count-3} more times]\n')
return result return result
......
...@@ -10,6 +10,13 @@ What's New in Python 3.6.0 beta 1 ...@@ -10,6 +10,13 @@ What's New in Python 3.6.0 beta 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #27948: In f-strings, only allow backslashes inside the braces
(where the expressions are). This is a breaking change from the 3.6
alpha releases, where backslashes are allowed anywhere in an
f-string. Also, require that expressions inside f-strings be
enclosed within literal braces, and not escapes like
f'\x7b"hi"\x7d'.
- Issue #28046: Remove platform-specific directories from sys.path. - Issue #28046: Remove platform-specific directories from sys.path.
- Issue #25758: Prevents zipimport from unnecessarily encoding a filename - Issue #25758: Prevents zipimport from unnecessarily encoding a filename
...@@ -56,11 +63,6 @@ Core and Builtins ...@@ -56,11 +63,6 @@ Core and Builtins
- Issue #27355: Removed support for Windows CE. It was never finished, - Issue #27355: Removed support for Windows CE. It was never finished,
and Windows CE is no longer a relevant platform for Python. and Windows CE is no longer a relevant platform for Python.
- Issue #27921: Disallow backslashes in f-strings. This is a temporary
restriction: in beta 2, backslashes will only be disallowed inside
the braces (where the expressions are). This is a breaking change
from the 3.6 alpha releases.
- Implement PEP 523. - Implement PEP 523.
- Issue #27870: A left shift of zero by a large integer no longer attempts - Issue #27870: A left shift of zero by a large integer no longer attempts
......
...@@ -4155,141 +4155,74 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) ...@@ -4155,141 +4155,74 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
return v; return v;
} }
/* Compile this expression in to an expr_ty. We know that we can /* Compile this expression in to an expr_ty. Add parens around the
temporarily modify the character before the start of this string expression, in order to allow leading spaces in the expression. */
(it's '{'), and we know we can temporarily modify the character
after this string (it is a '}'). Leverage this to create a
sub-string with enough room for us to add parens around the
expression. This is to allow strings with embedded newlines, for
example. */
static expr_ty static expr_ty
fstring_compile_expr(PyObject *str, Py_ssize_t expr_start, fstring_compile_expr(const char *expr_start, const char *expr_end,
Py_ssize_t expr_end, struct compiling *c, const node *n) struct compiling *c, const node *n)
{ {
int all_whitespace = 1;
int kind;
void *data;
PyCompilerFlags cf; PyCompilerFlags cf;
mod_ty mod; mod_ty mod;
char *utf_expr; char *str;
PyObject *o;
Py_ssize_t len;
Py_ssize_t i; Py_ssize_t i;
Py_UCS4 end_ch = -1;
int all_whitespace;
PyObject *sub = NULL;
/* We only decref sub if we allocated it with a PyUnicode_Substring.
decref_sub records that. */
int decref_sub = 0;
assert(str);
assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str));
assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str));
assert(expr_end >= expr_start); assert(expr_end >= expr_start);
assert(*(expr_start-1) == '{');
/* There has to be at least one character on each side of the assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
expression inside this str. This will have been caught before
we're called. */ /* We know there are no escapes here, because backslashes are not allowed,
assert(expr_start >= 1); and we know it's utf-8 encoded (per PEP 263). But, in order to check
assert(expr_end <= PyUnicode_GET_LENGTH(str)-1); that each char is not whitespace, we need to decode it to unicode.
Which is unfortunate, but such is life. */
/* If the substring is all whitespace, it's an error. We need to
catch this here, and not when we call PyParser_ASTFromString, /* If the substring is all whitespace, it's an error. We need to catch
because turning the expression '' in to '()' would go from this here, and not when we call PyParser_ASTFromString, because turning
being invalid to valid. */ the expression '' in to '()' would go from being invalid to valid. */
/* Note that this code says an empty string is all /* Note that this code says an empty string is all whitespace. That's
whitespace. That's important. There's a test for it: f'{}'. */ important. There's a test for it: f'{}'. */
all_whitespace = 1; o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
for (i = expr_start; i < expr_end; i++) { if (o == NULL)
if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) { return NULL;
len = PyUnicode_GET_LENGTH(o);
kind = PyUnicode_KIND(o);
data = PyUnicode_DATA(o);
for (i = 0; i < len; i++) {
if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
all_whitespace = 0; all_whitespace = 0;
break; break;
} }
} }
Py_DECREF(o);
if (all_whitespace) { if (all_whitespace) {
ast_error(c, n, "f-string: empty expression not allowed"); ast_error(c, n, "f-string: empty expression not allowed");
goto error; return NULL;
} }
/* If the substring will be the entire source string, we can't use /* Reuse len to be the length of the utf-8 input string. */
PyUnicode_Substring, since it will return another reference to len = expr_end - expr_start;
our original string. Because we're modifying the string in /* Allocate 3 extra bytes: open paren, close paren, null byte. */
place, that's a no-no. So, detect that case and just use our str = PyMem_RawMalloc(len + 3);
string directly. */ if (str == NULL)
return NULL;
if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) {
/* If str is well formed, then the first and last chars must
be '{' and '}', respectively. But, if there's a syntax
error, for example f'{3!', then the last char won't be a
closing brace. So, remember the last character we read in
order for us to restore it. */
end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1);
assert(end_ch != (Py_UCS4)-1);
/* In all cases, however, start_ch must be '{'. */
assert(PyUnicode_ReadChar(str, 0) == '{');
sub = str;
} else {
/* Create a substring object. It must be a new object, with
refcount==1, so that we can modify it. */
sub = PyUnicode_Substring(str, expr_start-1, expr_end+1);
if (!sub)
goto error;
assert(sub != str); /* Make sure it's a new string. */
decref_sub = 1; /* Remember to deallocate it on error. */
}
/* Put () around the expression. */ str[0] = '(';
if (PyUnicode_WriteChar(sub, 0, '(') < 0 || memcpy(str+1, expr_start, len);
PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0) str[len+1] = ')';
goto error; str[len+2] = 0;
/* No need to free the memory returned here: it's managed by the
string. */
utf_expr = PyUnicode_AsUTF8(sub);
if (!utf_expr)
goto error;
cf.cf_flags = PyCF_ONLY_AST; cf.cf_flags = PyCF_ONLY_AST;
mod = PyParser_ASTFromString(utf_expr, "<fstring>", mod = PyParser_ASTFromString(str, "<fstring>",
Py_eval_input, &cf, c->c_arena); Py_eval_input, &cf, c->c_arena);
PyMem_RawFree(str);
if (!mod) if (!mod)
goto error;
if (sub != str)
/* Clear instead of decref in case we ever modify this code to change
the error handling: this is safest because the XDECREF won't try
and decref it when it's NULL. */
/* No need to restore the chars in sub, since we know it's getting
ready to get deleted (refcount must be 1, since we got a new string
in PyUnicode_Substring). */
Py_CLEAR(sub);
else {
assert(!decref_sub);
assert(end_ch != (Py_UCS4)-1);
/* Restore str, which we earlier modified directly. */
if (PyUnicode_WriteChar(str, 0, '{') < 0 ||
PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0)
goto error;
}
return mod->v.Expression.body;
error:
/* Only decref sub if it was the result of a call to SubString. */
if (decref_sub)
Py_XDECREF(sub);
if (end_ch != (Py_UCS4)-1) {
/* We only get here if we modified str. Make sure that's the
case: str will be equal to sub. */
if (str == sub) {
/* Don't check the error, because we've already set the
error state (that's why we're in 'error', after
all). */
PyUnicode_WriteChar(str, 0, '{');
PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch);
}
}
return NULL; return NULL;
return mod->v.Expression.body;
} }
/* Return -1 on error. /* Return -1 on error.
...@@ -4301,35 +4234,38 @@ error: ...@@ -4301,35 +4234,38 @@ error:
doubled braces. doubled braces.
*/ */
static int static int
fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal, fstring_find_literal(const char **str, const char *end, int raw,
int recurse_lvl, struct compiling *c, const node *n) PyObject **literal, int recurse_lvl,
struct compiling *c, const node *n)
{ {
/* Get any literal string. It ends when we hit an un-doubled brace, or the /* Get any literal string. It ends when we hit an un-doubled left
end of the string. */ brace (which isn't part of a unicode name escape such as
"\N{EULER CONSTANT}"), or the end of the string. */
Py_ssize_t literal_start, literal_end; const char *literal_start = *str;
const char *literal_end;
int in_named_escape = 0;
int result = 0; int result = 0;
enum PyUnicode_Kind kind = PyUnicode_KIND(str);
void *data = PyUnicode_DATA(str);
assert(*literal == NULL); assert(*literal == NULL);
for (; *str < end; (*str)++) {
literal_start = *ofs; char ch = **str;
for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs); *(*str-2) == '\\' && *(*str-1) == 'N') {
if (ch == '{' || ch == '}') { in_named_escape = 1;
} else if (in_named_escape && ch == '}') {
in_named_escape = 0;
} else if (ch == '{' || ch == '}') {
/* Check for doubled braces, but only at the top level. If /* Check for doubled braces, but only at the top level. If
we checked at every level, then f'{0:{3}}' would fail we checked at every level, then f'{0:{3}}' would fail
with the two closing braces. */ with the two closing braces. */
if (recurse_lvl == 0) { if (recurse_lvl == 0) {
if (*ofs + 1 < PyUnicode_GET_LENGTH(str) && if (*str+1 < end && *(*str+1) == ch) {
PyUnicode_READ(kind, data, *ofs + 1) == ch) {
/* We're going to tell the caller that the literal ends /* We're going to tell the caller that the literal ends
here, but that they should continue scanning. But also here, but that they should continue scanning. But also
skip over the second brace when we resume scanning. */ skip over the second brace when we resume scanning. */
literal_end = *ofs + 1; literal_end = *str+1;
*ofs += 2; *str += 2;
result = 1; result = 1;
goto done; goto done;
} }
...@@ -4341,34 +4277,36 @@ fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal, ...@@ -4341,34 +4277,36 @@ fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
return -1; return -1;
} }
} }
/* We're either at a '{', which means we're starting another /* We're either at a '{', which means we're starting another
expression; or a '}', which means we're at the end of this expression; or a '}', which means we're at the end of this
f-string (for a nested format_spec). */ f-string (for a nested format_spec). */
break; break;
} }
} }
literal_end = *ofs; literal_end = *str;
assert(*str <= end);
assert(*ofs == PyUnicode_GET_LENGTH(str) || assert(*str == end || **str == '{' || **str == '}');
PyUnicode_READ(kind, data, *ofs) == '{' ||
PyUnicode_READ(kind, data, *ofs) == '}');
done: done:
if (literal_start != literal_end) { if (literal_start != literal_end) {
*literal = PyUnicode_Substring(str, literal_start, literal_end); if (raw)
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
literal_end-literal_start,
NULL, NULL);
else
*literal = decode_unicode_with_escapes(c, literal_start,
literal_end-literal_start);
if (!*literal) if (!*literal)
return -1; return -1;
} }
return result; return result;
} }
/* Forward declaration because parsing is recursive. */ /* Forward declaration because parsing is recursive. */
static expr_ty static expr_ty
fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
struct compiling *c, const node *n); struct compiling *c, const node *n);
/* Parse the f-string str, starting at ofs. We know *ofs starts an /* Parse the f-string at *str, ending at end. We know *str starts an
expression (so it must be a '{'). Returns the FormattedValue node, expression (so it must be a '{'). Returns the FormattedValue node,
which includes the expression, conversion character, and which includes the expression, conversion character, and
format_spec expression. format_spec expression.
...@@ -4379,23 +4317,20 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4379,23 +4317,20 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
find the end of all valid ones. Any errors inside the expression find the end of all valid ones. Any errors inside the expression
will be caught when we parse it later. */ will be caught when we parse it later. */
static int static int
fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
expr_ty *expression, struct compiling *c, const node *n) expr_ty *expression, struct compiling *c, const node *n)
{ {
/* Return -1 on error, else 0. */ /* Return -1 on error, else 0. */
Py_ssize_t expr_start; const char *expr_start;
Py_ssize_t expr_end; const char *expr_end;
expr_ty simple_expression; expr_ty simple_expression;
expr_ty format_spec = NULL; /* Optional format specifier. */ expr_ty format_spec = NULL; /* Optional format specifier. */
Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */ char conversion = -1; /* The conversion char. -1 if not specified. */
enum PyUnicode_Kind kind = PyUnicode_KIND(str);
void *data = PyUnicode_DATA(str);
/* 0 if we're not in a string, else the quote char we're trying to /* 0 if we're not in a string, else the quote char we're trying to
match (single or double quote). */ match (single or double quote). */
Py_UCS4 quote_char = 0; char quote_char = 0;
/* If we're inside a string, 1=normal, 3=triple-quoted. */ /* If we're inside a string, 1=normal, 3=triple-quoted. */
int string_type = 0; int string_type = 0;
...@@ -4412,22 +4347,30 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4412,22 +4347,30 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
/* The first char must be a left brace, or we wouldn't have gotten /* The first char must be a left brace, or we wouldn't have gotten
here. Skip over it. */ here. Skip over it. */
assert(PyUnicode_READ(kind, data, *ofs) == '{'); assert(**str == '{');
*ofs += 1; *str += 1;
expr_start = *ofs; expr_start = *str;
for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) { for (; *str < end; (*str)++) {
Py_UCS4 ch; char ch;
/* Loop invariants. */ /* Loop invariants. */
assert(nested_depth >= 0); assert(nested_depth >= 0);
assert(*ofs >= expr_start); assert(*str >= expr_start && *str < end);
if (quote_char) if (quote_char)
assert(string_type == 1 || string_type == 3); assert(string_type == 1 || string_type == 3);
else else
assert(string_type == 0); assert(string_type == 0);
ch = PyUnicode_READ(kind, data, *ofs); ch = **str;
/* Nowhere inside an expression is a backslash allowed. */
if (ch == '\\') {
/* Error: can't include a backslash character, inside
parens or strings or not. */
ast_error(c, n, "f-string expression part "
"cannot include a backslash");
return -1;
}
if (quote_char) { if (quote_char) {
/* We're inside a string. See if we're at the end. */ /* We're inside a string. See if we're at the end. */
/* This code needs to implement the same non-error logic /* This code needs to implement the same non-error logic
...@@ -4443,11 +4386,9 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4443,11 +4386,9 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
/* Does this match the string_type (single or triple /* Does this match the string_type (single or triple
quoted)? */ quoted)? */
if (string_type == 3) { if (string_type == 3) {
if (*ofs+2 < PyUnicode_GET_LENGTH(str) && if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
PyUnicode_READ(kind, data, *ofs+1) == ch &&
PyUnicode_READ(kind, data, *ofs+2) == ch) {
/* We're at the end of a triple quoted string. */ /* We're at the end of a triple quoted string. */
*ofs += 2; *str += 2;
string_type = 0; string_type = 0;
quote_char = 0; quote_char = 0;
continue; continue;
...@@ -4459,21 +4400,11 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4459,21 +4400,11 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
continue; continue;
} }
} }
/* We're inside a string, and not finished with the
string. If this is a backslash, skip the next char (it
might be an end quote that needs skipping). Otherwise,
just consume this character normally. */
if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) {
/* Just skip the next char, whatever it is. */
*ofs += 1;
}
} else if (ch == '\'' || ch == '"') { } else if (ch == '\'' || ch == '"') {
/* Is this a triple quoted string? */ /* Is this a triple quoted string? */
if (*ofs+2 < PyUnicode_GET_LENGTH(str) && if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
PyUnicode_READ(kind, data, *ofs+1) == ch &&
PyUnicode_READ(kind, data, *ofs+2) == ch) {
string_type = 3; string_type = 3;
*ofs += 2; *str += 2;
} else { } else {
/* Start of a normal string. */ /* Start of a normal string. */
string_type = 1; string_type = 1;
...@@ -4495,18 +4426,17 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4495,18 +4426,17 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
/* First, test for the special case of "!=". Since '=' is /* First, test for the special case of "!=". Since '=' is
not an allowed conversion character, nothing is lost in not an allowed conversion character, nothing is lost in
this test. */ this test. */
if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) && if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
PyUnicode_READ(kind, data, *ofs+1) == '=')
/* This isn't a conversion character, just continue. */ /* This isn't a conversion character, just continue. */
continue; continue;
}
/* Normal way out of this loop. */ /* Normal way out of this loop. */
break; break;
} else { } else {
/* Just consume this char and loop around. */ /* Just consume this char and loop around. */
} }
} }
expr_end = *ofs; expr_end = *str;
/* If we leave this loop in a string or with mismatched parens, we /* If we leave this loop in a string or with mismatched parens, we
don't care. We'll get a syntax error when compiling the don't care. We'll get a syntax error when compiling the
expression. But, we can produce a better error message, so expression. But, we can produce a better error message, so
...@@ -4520,24 +4450,24 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4520,24 +4450,24 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
return -1; return -1;
} }
if (*ofs >= PyUnicode_GET_LENGTH(str)) if (*str >= end)
goto unexpected_end_of_string; goto unexpected_end_of_string;
/* Compile the expression as soon as possible, so we show errors /* Compile the expression as soon as possible, so we show errors
related to the expression before errors related to the related to the expression before errors related to the
conversion or format_spec. */ conversion or format_spec. */
simple_expression = fstring_compile_expr(str, expr_start, expr_end, c, n); simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
if (!simple_expression) if (!simple_expression)
return -1; return -1;
/* Check for a conversion char, if present. */ /* Check for a conversion char, if present. */
if (PyUnicode_READ(kind, data, *ofs) == '!') { if (**str == '!') {
*ofs += 1; *str += 1;
if (*ofs >= PyUnicode_GET_LENGTH(str)) if (*str >= end)
goto unexpected_end_of_string; goto unexpected_end_of_string;
conversion = PyUnicode_READ(kind, data, *ofs); conversion = **str;
*ofs += 1; *str += 1;
/* Validate the conversion. */ /* Validate the conversion. */
if (!(conversion == 's' || conversion == 'r' if (!(conversion == 's' || conversion == 'r'
...@@ -4549,30 +4479,29 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4549,30 +4479,29 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
} }
/* Check for the format spec, if present. */ /* Check for the format spec, if present. */
if (*ofs >= PyUnicode_GET_LENGTH(str)) if (*str >= end)
goto unexpected_end_of_string; goto unexpected_end_of_string;
if (PyUnicode_READ(kind, data, *ofs) == ':') { if (**str == ':') {
*ofs += 1; *str += 1;
if (*ofs >= PyUnicode_GET_LENGTH(str)) if (*str >= end)
goto unexpected_end_of_string; goto unexpected_end_of_string;
/* Parse the format spec. */ /* Parse the format spec. */
format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n); format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
if (!format_spec) if (!format_spec)
return -1; return -1;
} }
if (*ofs >= PyUnicode_GET_LENGTH(str) || if (*str >= end || **str != '}')
PyUnicode_READ(kind, data, *ofs) != '}')
goto unexpected_end_of_string; goto unexpected_end_of_string;
/* We're at a right brace. Consume it. */ /* We're at a right brace. Consume it. */
assert(*ofs < PyUnicode_GET_LENGTH(str)); assert(*str < end);
assert(PyUnicode_READ(kind, data, *ofs) == '}'); assert(**str == '}');
*ofs += 1; *str += 1;
/* And now create the FormattedValue node that represents this entire /* And now create the FormattedValue node that represents this
expression with the conversion and format spec. */ entire expression with the conversion and format spec. */
*expression = FormattedValue(simple_expression, (int)conversion, *expression = FormattedValue(simple_expression, (int)conversion,
format_spec, LINENO(n), n->n_col_offset, format_spec, LINENO(n), n->n_col_offset,
c->c_arena); c->c_arena);
...@@ -4610,8 +4539,9 @@ unexpected_end_of_string: ...@@ -4610,8 +4539,9 @@ unexpected_end_of_string:
we're finished. we're finished.
*/ */
static int static int
fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, fstring_find_literal_and_expr(const char **str, const char *end, int raw,
PyObject **literal, expr_ty *expression, int recurse_lvl, PyObject **literal,
expr_ty *expression,
struct compiling *c, const node *n) struct compiling *c, const node *n)
{ {
int result; int result;
...@@ -4619,7 +4549,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4619,7 +4549,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
assert(*literal == NULL && *expression == NULL); assert(*literal == NULL && *expression == NULL);
/* Get any literal string. */ /* Get any literal string. */
result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n); result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
if (result < 0) if (result < 0)
goto error; goto error;
...@@ -4629,10 +4559,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4629,10 +4559,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
/* We have a literal, but don't look at the expression. */ /* We have a literal, but don't look at the expression. */
return 1; return 1;
assert(*ofs <= PyUnicode_GET_LENGTH(str)); if (*str >= end || **str == '}')
if (*ofs >= PyUnicode_GET_LENGTH(str) ||
PyUnicode_READ_CHAR(str, *ofs) == '}')
/* We're at the end of the string or the end of a nested /* We're at the end of the string or the end of a nested
f-string: no expression. The top-level error case where we f-string: no expression. The top-level error case where we
expect to be at the end of the string but we're at a '}' is expect to be at the end of the string but we're at a '}' is
...@@ -4640,10 +4567,9 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -4640,10 +4567,9 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
return 0; return 0;
/* We must now be the start of an expression, on a '{'. */ /* We must now be the start of an expression, on a '{'. */
assert(*ofs < PyUnicode_GET_LENGTH(str) && assert(**str == '{');
PyUnicode_READ_CHAR(str, *ofs) == '{');
if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0) if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
goto error; goto error;
return 0; return 0;
...@@ -4852,13 +4778,11 @@ FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) ...@@ -4852,13 +4778,11 @@ FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
return 0; return 0;
} }
/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f' /* Parse an f-string. The f-string is in *str to end, with no
or quotes. str is not decref'd, since we don't know if it's used elsewhere. 'f' or quotes. */
And if we're only looking at a part of a string, then decref'ing is
definitely not the right thing to do! */
static int static int
FstringParser_ConcatFstring(FstringParser *state, PyObject *str, FstringParser_ConcatFstring(FstringParser *state, const char **str,
Py_ssize_t *ofs, int recurse_lvl, const char *end, int raw, int recurse_lvl,
struct compiling *c, const node *n) struct compiling *c, const node *n)
{ {
FstringParser_check_invariants(state); FstringParser_check_invariants(state);
...@@ -4872,7 +4796,7 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str, ...@@ -4872,7 +4796,7 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
expression, literal will be NULL. If we're at the end of expression, literal will be NULL. If we're at the end of
the f-string, expression will be NULL (unless result == 1, the f-string, expression will be NULL (unless result == 1,
see below). */ see below). */
int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl, int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
&literal, &expression, &literal, &expression,
c, n); c, n);
if (result < 0) if (result < 0)
...@@ -4925,16 +4849,14 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str, ...@@ -4925,16 +4849,14 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
return -1; return -1;
} }
assert(*ofs <= PyUnicode_GET_LENGTH(str));
/* If recurse_lvl is zero, then we must be at the end of the /* If recurse_lvl is zero, then we must be at the end of the
string. Otherwise, we must be at a right brace. */ string. Otherwise, we must be at a right brace. */
if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) { if (recurse_lvl == 0 && *str < end-1) {
ast_error(c, n, "f-string: unexpected end of string"); ast_error(c, n, "f-string: unexpected end of string");
return -1; return -1;
} }
if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') { if (recurse_lvl != 0 && **str != '}') {
ast_error(c, n, "f-string: expecting '}'"); ast_error(c, n, "f-string: expecting '}'");
return -1; return -1;
} }
...@@ -4991,17 +4913,17 @@ error: ...@@ -4991,17 +4913,17 @@ error:
return NULL; return NULL;
} }
/* Given an f-string (with no 'f' or quotes) that's in str starting at /* Given an f-string (with no 'f' or quotes) that's in *str and ends
ofs, parse it into an expr_ty. Return NULL on error. Does not at end, parse it into an expr_ty. Return NULL on error. Adjust
decref str. */ str to point past the parsed portion. */
static expr_ty static expr_ty
fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
struct compiling *c, const node *n) struct compiling *c, const node *n)
{ {
FstringParser state; FstringParser state;
FstringParser_Init(&state); FstringParser_Init(&state);
if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl, if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
c, n) < 0) { c, n) < 0) {
FstringParser_Dealloc(&state); FstringParser_Dealloc(&state);
return NULL; return NULL;
...@@ -5012,19 +4934,25 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl, ...@@ -5012,19 +4934,25 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
/* n is a Python string literal, including the bracketing quote /* n is a Python string literal, including the bracketing quote
characters, and r, b, u, &/or f prefixes (if any), and embedded characters, and r, b, u, &/or f prefixes (if any), and embedded
escape sequences (if any). parsestr parses it, and returns the escape sequences (if any). parsestr parses it, and sets *result to
decoded Python string object. If the string is an f-string, set decoded Python string object. If the string is an f-string, set
*fmode and return the unparsed string object. *fstr and *fstrlen to the unparsed string object. Return 0 if no
errors occurred.
*/ */
static PyObject * static int
parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
{ {
size_t len; size_t len;
const char *s = STR(n); const char *s = STR(n);
int quote = Py_CHARMASK(*s); int quote = Py_CHARMASK(*s);
int rawmode = 0; int fmode = 0;
*bytesmode = 0;
*rawmode = 0;
*result = NULL;
*fstr = NULL;
if (Py_ISALPHA(quote)) { if (Py_ISALPHA(quote)) {
while (!*bytesmode || !rawmode) { while (!*bytesmode || !*rawmode) {
if (quote == 'b' || quote == 'B') { if (quote == 'b' || quote == 'B') {
quote = *++s; quote = *++s;
*bytesmode = 1; *bytesmode = 1;
...@@ -5034,24 +4962,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) ...@@ -5034,24 +4962,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
} }
else if (quote == 'r' || quote == 'R') { else if (quote == 'r' || quote == 'R') {
quote = *++s; quote = *++s;
rawmode = 1; *rawmode = 1;
} }
else if (quote == 'f' || quote == 'F') { else if (quote == 'f' || quote == 'F') {
quote = *++s; quote = *++s;
*fmode = 1; fmode = 1;
} }
else { else {
break; break;
} }
} }
} }
if (*fmode && *bytesmode) { if (fmode && *bytesmode) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
if (quote != '\'' && quote != '\"') { if (quote != '\'' && quote != '\"') {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
/* Skip the leading quote char. */ /* Skip the leading quote char. */
s++; s++;
...@@ -5059,12 +4987,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) ...@@ -5059,12 +4987,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
if (len > INT_MAX) { if (len > INT_MAX) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"string to parse is too long"); "string to parse is too long");
return NULL; return -1;
} }
if (s[--len] != quote) { if (s[--len] != quote) {
/* Last quote char must match the first. */ /* Last quote char must match the first. */
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
if (len >= 4 && s[0] == quote && s[1] == quote) { if (len >= 4 && s[0] == quote && s[1] == quote) {
/* A triple quoted string. We've already skipped one quote at /* A triple quoted string. We've already skipped one quote at
...@@ -5075,21 +5003,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) ...@@ -5075,21 +5003,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
/* And check that the last two match. */ /* And check that the last two match. */
if (s[--len] != quote || s[--len] != quote) { if (s[--len] != quote || s[--len] != quote) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
} }
/* Temporary hack: if this is an f-string, no backslashes are allowed. */ if (fmode) {
/* See issue 27921. */ /* Just return the bytes. The caller will parse the resulting
if (*fmode && strchr(s, '\\') != NULL) { string. */
/* Syntax error. At a later date fix this so it only checks for *fstr = s;
backslashes within the braces. */ *fstrlen = len;
ast_error(c, n, "backslashes not allowed in f-strings"); return 0;
return NULL;
} }
/* Not an f-string. */
/* Avoid invoking escape decoding routines if possible. */ /* Avoid invoking escape decoding routines if possible. */
rawmode = rawmode || strchr(s, '\\') == NULL; *rawmode = *rawmode || strchr(s, '\\') == NULL;
if (*bytesmode) { if (*bytesmode) {
/* Disallow non-ASCII characters. */ /* Disallow non-ASCII characters. */
const char *ch; const char *ch;
...@@ -5097,19 +5025,20 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode) ...@@ -5097,19 +5025,20 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
if (Py_CHARMASK(*ch) >= 0x80) { if (Py_CHARMASK(*ch) >= 0x80) {
ast_error(c, n, "bytes can only contain ASCII " ast_error(c, n, "bytes can only contain ASCII "
"literal characters."); "literal characters.");
return NULL; return -1;
} }
} }
if (rawmode) if (*rawmode)
return PyBytes_FromStringAndSize(s, len); *result = PyBytes_FromStringAndSize(s, len);
else else
return PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
} else { } else {
if (rawmode) if (*rawmode)
return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
else else
return decode_unicode_with_escapes(c, s, len); *result = decode_unicode_with_escapes(c, s, len);
} }
return *result == NULL ? -1 : 0;
} }
/* Accepts a STRING+ atom, and produces an expr_ty node. Run through /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
...@@ -5131,13 +5060,15 @@ parsestrplus(struct compiling *c, const node *n) ...@@ -5131,13 +5060,15 @@ parsestrplus(struct compiling *c, const node *n)
FstringParser_Init(&state); FstringParser_Init(&state);
for (i = 0; i < NCH(n); i++) { for (i = 0; i < NCH(n); i++) {
int this_bytesmode = 0; int this_bytesmode;
int this_fmode = 0; int this_rawmode;
PyObject *s; PyObject *s;
const char *fstr;
Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
REQ(CHILD(n, i), STRING); REQ(CHILD(n, i), STRING);
s = parsestr(c, CHILD(n, i), &this_bytesmode, &this_fmode); if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
if (!s) &fstr, &fstrlen) != 0)
goto error; goto error;
/* Check that we're not mixing bytes with unicode. */ /* Check that we're not mixing bytes with unicode. */
...@@ -5148,8 +5079,20 @@ parsestrplus(struct compiling *c, const node *n) ...@@ -5148,8 +5079,20 @@ parsestrplus(struct compiling *c, const node *n)
} }
bytesmode = this_bytesmode; bytesmode = this_bytesmode;
assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s)); if (fstr != NULL) {
int result;
assert(s == NULL && !bytesmode);
/* This is an f-string. Parse and concatenate it. */
result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
this_rawmode, 0, c, n);
if (result < 0)
goto error;
} else {
assert(bytesmode ? PyBytes_CheckExact(s) :
PyUnicode_CheckExact(s));
/* A string or byte string. */
assert(s != NULL && fstr == NULL);
if (bytesmode) { if (bytesmode) {
/* For bytes, concat as we go. */ /* For bytes, concat as we go. */
if (i == 0) { if (i == 0) {
...@@ -5160,19 +5103,14 @@ parsestrplus(struct compiling *c, const node *n) ...@@ -5160,19 +5103,14 @@ parsestrplus(struct compiling *c, const node *n)
if (!bytes_str) if (!bytes_str)
goto error; goto error;
} }
} else if (this_fmode) {
/* This is an f-string. Concatenate and decref it. */
Py_ssize_t ofs = 0;
int result = FstringParser_ConcatFstring(&state, s, &ofs, 0, c, n);
Py_DECREF(s);
if (result < 0)
goto error;
} else { } else {
assert(s != NULL && fstr == NULL);
/* This is a regular string. Concatenate it. */ /* This is a regular string. Concatenate it. */
if (FstringParser_ConcatAndDel(&state, s) < 0) if (FstringParser_ConcatAndDel(&state, s) < 0)
goto error; goto error;
} }
} }
}
if (bytesmode) { if (bytesmode) {
/* Just return the bytes object and we're done. */ /* Just return the bytes object and we're done. */
if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0) if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment