Issue 27948: Allow backslashes in the literal string portion of f-strings, but...

Issue 27948: Allow backslashes in the literal string portion of f-strings, but not in the expressions. Also, require expressions to begin and end with literal curly braces.

Issue 27948: Allow backslashes in the literal string portion of f-strings, but...
Issue 27948: Allow backslashes in the literal string portion of f-strings, but not in the expressions. Also, require expressions to begin and end with literal curly braces.
54f5611f · Eric V. Smith · 29ea6a90 · 54f5611f · 54f5611f · 54f5611f
Commit 54f5611f authored Sep 09, 2016 by Eric V. Smith
9 changed files
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -1060,7 +1060,7 @@ class HTTPConnection:
                if encode_chunked and self._http_vsn == 11:
                    # chunked encoding
-                    chunk = f'{len(chunk):X}''\r\n'.encode('ascii') + chunk \
+                    chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
                        + b'\r\n'
                self.send(chunk)

--- a/Lib/test/libregrtest/save_env.py
+++ b/Lib/test/libregrtest/save_env.py
@@ -280,6 +280,6 @@ class saved_test_environment:
                    print(f"Warning -- {name} was modified by {self.testname}",
                          file=sys.stderr, flush=True)
                    if self.verbose > 1:
-                        print(f"  Before: {original}""\n"f"  After:  {current} ",
+                        print(f"  Before: {original}\n  After:  {current} ",
                              file=sys.stderr, flush=True)
        return False
--- a/Lib/test/test_faulthandler.py
+++ b/Lib/test/test_faulthandler.py
@@ -735,11 +735,11 @@ class FaultHandlerTests(unittest.TestCase):
            ('EXCEPTION_INT_DIVIDE_BY_ZERO', 'int divide by zero'),
            ('EXCEPTION_STACK_OVERFLOW', 'stack overflow'),
        ):
-            self.check_windows_exception("""
+            self.check_windows_exception(f"""
                import faulthandler
                faulthandler.enable()
                faulthandler._raise_exception(faulthandler._{exc})
-                """.format(exc=exc),
+                """,
                3,
                name)

--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -119,6 +119,14 @@ f'{a * x()}'"""
        self.assertEqual(f'a}}', 'a}')
        self.assertEqual(f'}}b', '}b')
        self.assertEqual(f'a}}b', 'a}b')
+        self.assertEqual(f'{{}}', '{}')
+        self.assertEqual(f'a{{}}', 'a{}')
+        self.assertEqual(f'{{b}}', '{b}')
+        self.assertEqual(f'{{}}c', '{}c')
+        self.assertEqual(f'a{{b}}', 'a{b}')
+        self.assertEqual(f'a{{}}c', 'a{}c')
+        self.assertEqual(f'{{b}}c', '{b}c')
+        self.assertEqual(f'a{{b}}c', 'a{b}c')
        self.assertEqual(f'{{{10}', '{10')
        self.assertEqual(f'}}{10}', '}10')
@@ -302,56 +310,79 @@ f'{a * x()}'"""
                            ["f'{\n}'",
                             ])
-    def test_no_backslashes(self):
+    def test_backslashes_in_string_part(self):
-        # See issue 27921
+        self.assertEqual(f'\t', '\t')
+        self.assertEqual(r'\t', '\\t')
-        # These should work, but currently don't
+        self.assertEqual(rf'\t', '\\t')
-        self.assertAllRaise(SyntaxError, 'backslashes not allowed',
+        self.assertEqual(f'{2}\t', '2\t')
-                            [r"f'\t'",
+        self.assertEqual(f'{2}\t{3}', '2\t3')
-                             r"f'{2}\t'",
+        self.assertEqual(f'\t{3}', '\t3')
-                             r"f'{2}\t{3}'",
-                             r"f'\t{3}'",
+        self.assertEqual(f'\u0394', '\u0394')
+        self.assertEqual(r'\u0394', '\\u0394')
-                             r"f'\N{GREEK CAPITAL LETTER DELTA}'",
+        self.assertEqual(rf'\u0394', '\\u0394')
-                             r"f'{2}\N{GREEK CAPITAL LETTER DELTA}'",
+        self.assertEqual(f'{2}\u0394', '2\u0394')
-                             r"f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}'",
+        self.assertEqual(f'{2}\u0394{3}', '2\u03943')
-                             r"f'\N{GREEK CAPITAL LETTER DELTA}{3}'",
+        self.assertEqual(f'\u0394{3}', '\u03943')
-                             r"f'\u0394'",
+        self.assertEqual(f'\U00000394', '\u0394')
-                             r"f'{2}\u0394'",
+        self.assertEqual(r'\U00000394', '\\U00000394')
-                             r"f'{2}\u0394{3}'",
+        self.assertEqual(rf'\U00000394', '\\U00000394')
-                             r"f'\u0394{3}'",
+        self.assertEqual(f'{2}\U00000394', '2\u0394')
+        self.assertEqual(f'{2}\U00000394{3}', '2\u03943')
-                             r"f'\U00000394'",
+        self.assertEqual(f'\U00000394{3}', '\u03943')
-                             r"f'{2}\U00000394'",
-                             r"f'{2}\U00000394{3}'",
+        self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}', '\u0394')
-                             r"f'\U00000394{3}'",
+        self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
+        self.assertEqual(f'{2}\N{GREEK CAPITAL LETTER DELTA}{3}', '2\u03943')
-                             r"f'\x20'",
+        self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}{3}', '\u03943')
-                             r"f'{2}\x20'",
+        self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}', '2\u0394')
-                             r"f'{2}\x20{3}'",
+        self.assertEqual(f'2\N{GREEK CAPITAL LETTER DELTA}3', '2\u03943')
-                             r"f'\x20{3}'",
+        self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}3', '\u03943')
-                             r"f'2\x20'",
+        self.assertEqual(f'\x20', ' ')
-                             r"f'2\x203'",
+        self.assertEqual(r'\x20', '\\x20')
-                             r"f'2\x203'",
+        self.assertEqual(rf'\x20', '\\x20')
+        self.assertEqual(f'{2}\x20', '2 ')
+        self.assertEqual(f'{2}\x20{3}', '2 3')
+        self.assertEqual(f'\x20{3}', ' 3')
+        self.assertEqual(f'2\x20', '2 ')
+        self.assertEqual(f'2\x203', '2 3')
+        self.assertEqual(f'\x203', ' 3')
+    def test_misformed_unicode_character_name(self):
+        # These test are needed because unicode names are parsed
+        # differently inside f-strings.
+        self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
+                            [r"f'\N'",
+                             r"f'\N{'",
+                             r"f'\N{GREEK CAPITAL LETTER DELTA'",
+                             # Here are the non-f-string versions,
+                             #  which should give the same errors.
+                             r"'\N'",
+                             r"'\N{'",
+                             r"'\N{GREEK CAPITAL LETTER DELTA'",
                             ])
-        # And these don't work now, and shouldn't work in the future.
+    def test_no_backslashes_in_expression_part(self):
-        self.assertAllRaise(SyntaxError, 'backslashes not allowed',
+        self.assertAllRaise(SyntaxError, 'f-string expression part cannot include a backslash',
                            [r"f'{\'a\'}'",
                             r"f'{\t3}'",
+                             r"f'{\}'",
+                             r"rf'{\'a\'}'",
+                             r"rf'{\t3}'",
+                             r"rf'{\}'",
+                             r"""rf'{"\N{LEFT CURLY BRACKET}"}'""",
                             ])
-    # add this when backslashes are allowed again. see issue 27921
+    def test_no_escapes_for_braces(self):
-    # these test will be needed because unicode names will be parsed
+        # \x7b is '{'.  Make sure it doesn't start an expression.
-    # differently once backslashes are allowed inside expressions
+        self.assertEqual(f'\x7b2}}', '{2}')
-    ## def test_misformed_unicode_character_name(self):
+        self.assertEqual(f'\x7b2', '{2')
-    ##     self.assertAllRaise(SyntaxError, 'xx',
+        self.assertEqual(f'\u007b2', '{2')
-    ##                         [r"f'\N'",
+        self.assertEqual(f'\N{LEFT CURLY BRACKET}2\N{RIGHT CURLY BRACKET}', '{2}')
-    ##                         [r"f'\N{'",
-    ##                         [r"f'\N{GREEK CAPITAL LETTER DELTA'",
-    ##                          ])
    def test_newlines_in_expressions(self):
        self.assertEqual(f'{0}', '0')
@@ -509,6 +540,14 @@ f'{a * x()}'"""
                             "ruf''",
                             "FUR''",
                             "Fur''",
+                             "fb''",
+                             "fB''",
+                             "Fb''",
+                             "FB''",
+                             "bf''",
+                             "bF''",
+                             "Bf''",
+                             "BF''",
                             ])
    def test_leading_trailing_spaces(self):
@@ -551,8 +590,8 @@ f'{a * x()}'"""
        self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
                            ["f'{3!g}'",
                             "f'{3!A}'",
-                             "f'{3!A}'",
+                             "f'{3!3}'",
-                             "f'{3!A}'",
+                             "f'{3!G}'",
                             "f'{3!!}'",
                             "f'{3!:}'",
                             "f'{3! s}'",  # no space before conversion char
@@ -601,6 +640,7 @@ f'{a * x()}'"""
                             "f'{3!s:3'",
                             "f'x{'",
                             "f'x{x'",
+                             "f'{x'",
                             "f'{3:s'",
                             "f'{{{'",
                             "f'{{}}{'",

--- a/Lib/test/test_tools/test_unparse.py
+++ b/Lib/test/test_tools/test_unparse.py
@@ -285,12 +285,12 @@ class DirectoryTestCase(ASTTestCase):
            if test.support.verbose:
                print('Testing %s' % filename)
-            # it's very much a hack that I'm skipping these files, but
+            # Some f-strings are not correctly round-tripped by
-            #  I can't figure out why they fail. I'll fix it when I
+            #  Tools/parser/unparse.py.  See issue 28002 for details.
-            #  address issue #27948.
+            #  We need to skip files that contain such f-strings.
-            if os.path.basename(filename) in ('test_fstring.py', 'test_traceback.py'):
+            if os.path.basename(filename) in ('test_fstring.py', ):
                if test.support.verbose:
-                    print(f'Skipping {filename}: see issue 27921')
+                    print(f'Skipping {filename}: see issue 28002')
                continue
            with self.subTest(filename=filename):

--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -326,13 +326,13 @@ class TracebackFormatTests(unittest.TestCase):
        lineno_f = f.__code__.co_firstlineno
        result_f = (
            'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display''\n'
+            f'  File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n'
            '    f()\n'
-            f'  File "{__file__}", line {lineno_f+1}, in f''\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f\n'
            '    f()\n'
-            f'  File "{__file__}", line {lineno_f+1}, in f''\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f\n'
            '    f()\n'
-            f'  File "{__file__}", line {lineno_f+1}, in f''\n'
+            f'  File "{__file__}", line {lineno_f+1}, in f\n'
            '    f()\n'
            # XXX: The following line changes depending on whether the tests
            # are run through the interactive interpreter or with -m
@@ -371,20 +371,20 @@ class TracebackFormatTests(unittest.TestCase):
        lineno_g = g.__code__.co_firstlineno
        result_g = (
-            f'  File "{__file__}", line {lineno_g+2}, in g''\n'
+            f'  File "{__file__}", line {lineno_g+2}, in g\n'
            '    return g(count-1)\n'
-            f'  File "{__file__}", line {lineno_g+2}, in g''\n'
+            f'  File "{__file__}", line {lineno_g+2}, in g\n'
            '    return g(count-1)\n'
-            f'  File "{__file__}", line {lineno_g+2}, in g''\n'
+            f'  File "{__file__}", line {lineno_g+2}, in g\n'
            '    return g(count-1)\n'
            '  [Previous line repeated 6 more times]\n'
-            f'  File "{__file__}", line {lineno_g+3}, in g''\n'
+            f'  File "{__file__}", line {lineno_g+3}, in g\n'
            '    raise ValueError\n'
            'ValueError\n'
        )
        tb_line = (
            'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display''\n'
+            f'  File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n'
            '    g()\n'
        )
        expected = (tb_line + result_g).splitlines()
@@ -408,16 +408,16 @@ class TracebackFormatTests(unittest.TestCase):
        lineno_h = h.__code__.co_firstlineno
        result_h = (
            'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display''\n'
+            f'  File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n'
            '    h()\n'
-            f'  File "{__file__}", line {lineno_h+2}, in h''\n'
+            f'  File "{__file__}", line {lineno_h+2}, in h\n'
            '    return h(count-1)\n'
-            f'  File "{__file__}", line {lineno_h+2}, in h''\n'
+            f'  File "{__file__}", line {lineno_h+2}, in h\n'
            '    return h(count-1)\n'
-            f'  File "{__file__}", line {lineno_h+2}, in h''\n'
+            f'  File "{__file__}", line {lineno_h+2}, in h\n'
            '    return h(count-1)\n'
            '  [Previous line repeated 6 more times]\n'
-            f'  File "{__file__}", line {lineno_h+3}, in h''\n'
+            f'  File "{__file__}", line {lineno_h+3}, in h\n'
            '    g()\n'
        )
        expected = (result_h + result_g).splitlines()

--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -402,7 +402,7 @@ class StackSummary(list):
                count += 1
            else:
                if count > 3:
-                    result.append(f'  [Previous line repeated {count-3} more times]'+'\n')
+                    result.append(f'  [Previous line repeated {count-3} more times]\n')
                last_file = frame.filename
                last_line = frame.lineno
                last_name = frame.name
@@ -419,7 +419,7 @@ class StackSummary(list):
                    row.append('    {name} = {value}\n'.format(name=name, value=value))
            result.append(''.join(row))
        if count > 3:
-            result.append(f'  [Previous line repeated {count-3} more times]'+'\n')
+            result.append(f'  [Previous line repeated {count-3} more times]\n')
        return result

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,13 @@ What's New in Python 3.6.0 beta 1
 Core and Builtins
 -----------------
+- Issue #27948: In f-strings, only allow backslashes inside the braces
+  (where the expressions are).  This is a breaking change from the 3.6
+  alpha releases, where backslashes are allowed anywhere in an
+  f-string.  Also, require that expressions inside f-strings be
+  enclosed within literal braces, and not escapes like
+  f'\x7b"hi"\x7d'.
 - Issue #28046: Remove platform-specific directories from sys.path.
 - Issue #25758: Prevents zipimport from unnecessarily encoding a filename
@@ -56,11 +63,6 @@ Core and Builtins
 - Issue #27355: Removed support for Windows CE.  It was never finished,
  and Windows CE is no longer a relevant platform for Python.
- Issue #27921: Disallow backslashes in f-strings. This is a temporary
-  restriction: in beta 2, backslashes will only be disallowed inside
-  the braces (where the expressions are). This is a breaking change
-  from the 3.6 alpha releases.
 - Implement PEP 523.
 - Issue #27870: A left shift of zero by a large integer no longer attempts

--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4155,141 +4155,74 @@ decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len)
    return v;
 }
-/* Compile this expression in to an expr_ty. We know that we can
+/* Compile this expression in to an expr_ty.  Add parens around the
-   temporarily modify the character before the start of this string
+   expression, in order to allow leading spaces in the expression. */
-   (it's '{'), and we know we can temporarily modify the character
-   after this string (it is a '}').  Leverage this to create a
-   sub-string with enough room for us to add parens around the
-   expression. This is to allow strings with embedded newlines, for
-   example. */
 static expr_ty
-fstring_compile_expr(PyObject *str, Py_ssize_t expr_start,
+fstring_compile_expr(const char *expr_start, const char *expr_end,
-                     Py_ssize_t expr_end, struct compiling *c, const node *n)
+                     struct compiling *c, const node *n)
 {
+    int all_whitespace = 1;
+    int kind;
+    void *data;
    PyCompilerFlags cf;
    mod_ty mod;
-    char *utf_expr;
+    char *str;
+    PyObject *o;
+    Py_ssize_t len;
    Py_ssize_t i;
-    Py_UCS4 end_ch = -1;
-    int all_whitespace;
-    PyObject *sub = NULL;
-    /* We only decref sub if we allocated it with a PyUnicode_Substring.
-       decref_sub records that. */
-    int decref_sub = 0;
-    assert(str);
-    assert(expr_start >= 0 && expr_start < PyUnicode_GET_LENGTH(str));
-    assert(expr_end >= 0 && expr_end < PyUnicode_GET_LENGTH(str));
    assert(expr_end >= expr_start);
+    assert(*(expr_start-1) == '{');
-    /* There has to be at least one character on each side of the
+    assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
-       expression inside this str. This will have been caught before
-       we're called. */
+    /* We know there are no escapes here, because backslashes are not allowed,
-    assert(expr_start >= 1);
+       and we know it's utf-8 encoded (per PEP 263).  But, in order to check
-    assert(expr_end <= PyUnicode_GET_LENGTH(str)-1);
+       that each char is not whitespace, we need to decode it to unicode.
+       Which is unfortunate, but such is life. */
-    /* If the substring is all whitespace, it's an error. We need to
-        catch this here, and not when we call PyParser_ASTFromString,
+    /* If the substring is all whitespace, it's an error.  We need to catch
-        because turning the expression '' in to '()' would go from
+       this here, and not when we call PyParser_ASTFromString, because turning
-        being invalid to valid. */
+       the expression '' in to '()' would go from being invalid to valid. */
-    /* Note that this code says an empty string is all
+    /* Note that this code says an empty string is all whitespace.  That's
-        whitespace. That's important. There's a test for it: f'{}'. */
+       important.  There's a test for it: f'{}'. */
-    all_whitespace = 1;
+    o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
-    for (i = expr_start; i < expr_end; i++) {
+    if (o == NULL)
-        if (!Py_UNICODE_ISSPACE(PyUnicode_READ_CHAR(str, i))) {
+        return NULL;
+    len = PyUnicode_GET_LENGTH(o);
+    kind = PyUnicode_KIND(o);
+    data = PyUnicode_DATA(o);
+    for (i = 0; i < len; i++) {
+        if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
            all_whitespace = 0;
            break;
        }
    }
+    Py_DECREF(o);
    if (all_whitespace) {
        ast_error(c, n, "f-string: empty expression not allowed");
-        goto error;
+        return NULL;
    }
-    /* If the substring will be the entire source string, we can't use
+    /* Reuse len to be the length of the utf-8 input string. */
-        PyUnicode_Substring, since it will return another reference to
+    len = expr_end - expr_start;
-        our original string. Because we're modifying the string in
+    /* Allocate 3 extra bytes: open paren, close paren, null byte. */
-        place, that's a no-no. So, detect that case and just use our
+    str = PyMem_RawMalloc(len + 3);
-        string directly. */
+    if (str == NULL)
+        return NULL;
-    if (expr_start-1 == 0 && expr_end+1 == PyUnicode_GET_LENGTH(str)) {
-        /* If str is well formed, then the first and last chars must
-           be '{' and '}', respectively. But, if there's a syntax
-           error, for example f'{3!', then the last char won't be a
-           closing brace. So, remember the last character we read in
-           order for us to restore it. */
-        end_ch = PyUnicode_ReadChar(str, expr_end-expr_start+1);
-        assert(end_ch != (Py_UCS4)-1);
-        /* In all cases, however, start_ch must be '{'. */
-        assert(PyUnicode_ReadChar(str, 0) == '{');
-        sub = str;
-    } else {
-        /* Create a substring object. It must be a new object, with
-           refcount==1, so that we can modify it. */
-        sub = PyUnicode_Substring(str, expr_start-1, expr_end+1);
-        if (!sub)
-            goto error;
-        assert(sub != str);  /* Make sure it's a new string. */
-        decref_sub = 1;      /* Remember to deallocate it on error. */
-    }
-    /* Put () around the expression. */
+    str[0] = '(';
-    if (PyUnicode_WriteChar(sub, 0, '(') < 0 ||
+    memcpy(str+1, expr_start, len);
-        PyUnicode_WriteChar(sub, expr_end-expr_start+1, ')') < 0)
+    str[len+1] = ')';
-        goto error;
+    str[len+2] = 0;
-    /* No need to free the memory returned here: it's managed by the
-       string. */
-    utf_expr = PyUnicode_AsUTF8(sub);
-    if (!utf_expr)
-        goto error;
    cf.cf_flags = PyCF_ONLY_AST;
-    mod = PyParser_ASTFromString(utf_expr, "<fstring>",
+    mod = PyParser_ASTFromString(str, "<fstring>",
                                 Py_eval_input, &cf, c->c_arena);
+    PyMem_RawFree(str);
    if (!mod)
-        goto error;
-    if (sub != str)
-        /* Clear instead of decref in case we ever modify this code to change
-           the error handling: this is safest because the XDECREF won't try
-           and decref it when it's NULL. */
-        /* No need to restore the chars in sub, since we know it's getting
-           ready to get deleted (refcount must be 1, since we got a new string
-           in PyUnicode_Substring). */
-        Py_CLEAR(sub);
-    else {
-        assert(!decref_sub);
-        assert(end_ch != (Py_UCS4)-1);
-        /* Restore str, which we earlier modified directly. */
-        if (PyUnicode_WriteChar(str, 0, '{') < 0 ||
-            PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch) < 0)
-            goto error;
-    }
-    return mod->v.Expression.body;
-error:
-    /* Only decref sub if it was the result of a call to SubString. */
-    if (decref_sub)
-        Py_XDECREF(sub);
-    if (end_ch != (Py_UCS4)-1) {
-        /* We only get here if we modified str. Make sure that's the
-           case: str will be equal to sub. */
-        if (str == sub) {
-            /* Don't check the error, because we've already set the
-               error state (that's why we're in 'error', after
-               all). */
-            PyUnicode_WriteChar(str, 0, '{');
-            PyUnicode_WriteChar(str, expr_end-expr_start+1, end_ch);
-        }
-    }
        return NULL;
+    return mod->v.Expression.body;
 }
 /* Return -1 on error.
@@ -4301,35 +4234,38 @@ error:
   doubled braces.
 */
 static int
-fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
+fstring_find_literal(const char **str, const char *end, int raw,
-                     int recurse_lvl, struct compiling *c, const node *n)
+                     PyObject **literal, int recurse_lvl,
+                     struct compiling *c, const node *n)
 {
-    /* Get any literal string. It ends when we hit an un-doubled brace, or the
+    /* Get any literal string. It ends when we hit an un-doubled left
-       end of the string. */
+       brace (which isn't part of a unicode name escape such as
+       "\N{EULER CONSTANT}"), or the end of the string. */
-    Py_ssize_t literal_start, literal_end;
+    const char *literal_start = *str;
+    const char *literal_end;
+    int in_named_escape = 0;
    int result = 0;
-    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
-    void *data = PyUnicode_DATA(str);
    assert(*literal == NULL);
+    for (; *str < end; (*str)++) {
-    literal_start = *ofs;
+        char ch = **str;
-    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+        if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
-        Py_UCS4 ch = PyUnicode_READ(kind, data, *ofs);
+            *(*str-2) == '\\' && *(*str-1) == 'N') {
-        if (ch == '{' || ch == '}') {
+            in_named_escape = 1;
+        } else if (in_named_escape && ch == '}') {
+            in_named_escape = 0;
+        } else if (ch == '{' || ch == '}') {
            /* Check for doubled braces, but only at the top level. If
               we checked at every level, then f'{0:{3}}' would fail
               with the two closing braces. */
            if (recurse_lvl == 0) {
-                if (*ofs + 1 < PyUnicode_GET_LENGTH(str) &&
+                if (*str+1 < end && *(*str+1) == ch) {
-                    PyUnicode_READ(kind, data, *ofs + 1) == ch) {
                    /* We're going to tell the caller that the literal ends
                       here, but that they should continue scanning. But also
                       skip over the second brace when we resume scanning. */
-                    literal_end = *ofs + 1;
+                    literal_end = *str+1;
-                    *ofs += 2;
+                    *str += 2;
                    result = 1;
                    goto done;
                }
@@ -4341,34 +4277,36 @@ fstring_find_literal(PyObject *str, Py_ssize_t *ofs, PyObject **literal,
                    return -1;
                }
            }
            /* We're either at a '{', which means we're starting another
               expression; or a '}', which means we're at the end of this
               f-string (for a nested format_spec). */
            break;
        }
    }
-    literal_end = *ofs;
+    literal_end = *str;
+    assert(*str <= end);
-    assert(*ofs == PyUnicode_GET_LENGTH(str) ||
+    assert(*str == end || **str == '{' || **str == '}');
-           PyUnicode_READ(kind, data, *ofs) == '{' ||
-           PyUnicode_READ(kind, data, *ofs) == '}');
 done:
    if (literal_start != literal_end) {
-        *literal = PyUnicode_Substring(str, literal_start, literal_end);
+        if (raw)
+            *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
+                                                    literal_end-literal_start,
+                                                    NULL, NULL);
+        else
+            *literal = decode_unicode_with_escapes(c, literal_start,
+                                                   literal_end-literal_start);
        if (!*literal)
            return -1;
    }
    return result;
 }
 /* Forward declaration because parsing is recursive. */
 static expr_ty
-fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
              struct compiling *c, const node *n);
-/* Parse the f-string str, starting at ofs. We know *ofs starts an
+/* Parse the f-string at *str, ending at end.  We know *str starts an
   expression (so it must be a '{'). Returns the FormattedValue node,
   which includes the expression, conversion character, and
   format_spec expression.
@@ -4379,23 +4317,20 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
   find the end of all valid ones. Any errors inside the expression
   will be caught when we parse it later. */
 static int
-fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
                  expr_ty *expression, struct compiling *c, const node *n)
 {
    /* Return -1 on error, else 0. */
-    Py_ssize_t expr_start;
+    const char *expr_start;
-    Py_ssize_t expr_end;
+    const char *expr_end;
    expr_ty simple_expression;
    expr_ty format_spec = NULL; /* Optional format specifier. */
-    Py_UCS4 conversion = -1; /* The conversion char. -1 if not specified. */
+    char conversion = -1; /* The conversion char. -1 if not specified. */
-    enum PyUnicode_Kind kind = PyUnicode_KIND(str);
-    void *data = PyUnicode_DATA(str);
    /* 0 if we're not in a string, else the quote char we're trying to
       match (single or double quote). */
-    Py_UCS4 quote_char = 0;
+    char quote_char = 0;
    /* If we're inside a string, 1=normal, 3=triple-quoted. */
    int string_type = 0;
@@ -4412,22 +4347,30 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
    /* The first char must be a left brace, or we wouldn't have gotten
       here. Skip over it. */
-    assert(PyUnicode_READ(kind, data, *ofs) == '{');
+    assert(**str == '{');
-    *ofs += 1;
+    *str += 1;
-    expr_start = *ofs;
+    expr_start = *str;
-    for (; *ofs < PyUnicode_GET_LENGTH(str); *ofs += 1) {
+    for (; *str < end; (*str)++) {
-        Py_UCS4 ch;
+        char ch;
        /* Loop invariants. */
        assert(nested_depth >= 0);
-        assert(*ofs >= expr_start);
+        assert(*str >= expr_start && *str < end);
        if (quote_char)
            assert(string_type == 1 || string_type == 3);
        else
            assert(string_type == 0);
-        ch = PyUnicode_READ(kind, data, *ofs);
+        ch = **str;
+        /* Nowhere inside an expression is a backslash allowed. */
+        if (ch == '\\') {
+            /* Error: can't include a backslash character, inside
+               parens or strings or not. */
+            ast_error(c, n, "f-string expression part "
+                            "cannot include a backslash");
+            return -1;
+        }
        if (quote_char) {
            /* We're inside a string. See if we're at the end. */
            /* This code needs to implement the same non-error logic
@@ -4443,11 +4386,9 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
                /* Does this match the string_type (single or triple
                   quoted)? */
                if (string_type == 3) {
-                    if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+                    if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
-                        PyUnicode_READ(kind, data, *ofs+1) == ch &&
-                        PyUnicode_READ(kind, data, *ofs+2) == ch) {
                        /* We're at the end of a triple quoted string. */
-                        *ofs += 2;
+                        *str += 2;
                        string_type = 0;
                        quote_char = 0;
                        continue;
@@ -4459,21 +4400,11 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
                    continue;
                }
            }
-            /* We're inside a string, and not finished with the
-               string. If this is a backslash, skip the next char (it
-               might be an end quote that needs skipping). Otherwise,
-               just consume this character normally. */
-            if (ch == '\\' && *ofs+1 < PyUnicode_GET_LENGTH(str)) {
-                /* Just skip the next char, whatever it is. */
-                *ofs += 1;
-            }
        } else if (ch == '\'' || ch == '"') {
            /* Is this a triple quoted string? */
-            if (*ofs+2 < PyUnicode_GET_LENGTH(str) &&
+            if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
-                PyUnicode_READ(kind, data, *ofs+1) == ch &&
-                PyUnicode_READ(kind, data, *ofs+2) == ch) {
                string_type = 3;
-                *ofs += 2;
+                *str += 2;
            } else {
                /* Start of a normal string. */
                string_type = 1;
@@ -4495,18 +4426,17 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
            /* First, test for the special case of "!=". Since '=' is
               not an allowed conversion character, nothing is lost in
               this test. */
-            if (ch == '!' && *ofs+1 < PyUnicode_GET_LENGTH(str) &&
+            if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
-                  PyUnicode_READ(kind, data, *ofs+1) == '=')
                /* This isn't a conversion character, just continue. */
                continue;
+            }
            /* Normal way out of this loop. */
            break;
        } else {
            /* Just consume this char and loop around. */
        }
    }
-    expr_end = *ofs;
+    expr_end = *str;
    /* If we leave this loop in a string or with mismatched parens, we
       don't care. We'll get a syntax error when compiling the
       expression. But, we can produce a better error message, so
@@ -4520,24 +4450,24 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
        return -1;
    }
-    if (*ofs >= PyUnicode_GET_LENGTH(str))
+    if (*str >= end)
        goto unexpected_end_of_string;
    /* Compile the expression as soon as possible, so we show errors
       related to the expression before errors related to the
       conversion or format_spec. */
-    simple_expression = fstring_compile_expr(str, expr_start, expr_end, c, n);
+    simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
    if (!simple_expression)
        return -1;
    /* Check for a conversion char, if present. */
-    if (PyUnicode_READ(kind, data, *ofs) == '!') {
+    if (**str == '!') {
-        *ofs += 1;
+        *str += 1;
-        if (*ofs >= PyUnicode_GET_LENGTH(str))
+        if (*str >= end)
            goto unexpected_end_of_string;
-        conversion = PyUnicode_READ(kind, data, *ofs);
+        conversion = **str;
-        *ofs += 1;
+        *str += 1;
        /* Validate the conversion. */
        if (!(conversion == 's' || conversion == 'r'
@@ -4549,30 +4479,29 @@ fstring_find_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
    }
    /* Check for the format spec, if present. */
-    if (*ofs >= PyUnicode_GET_LENGTH(str))
+    if (*str >= end)
        goto unexpected_end_of_string;
-    if (PyUnicode_READ(kind, data, *ofs) == ':') {
+    if (**str == ':') {
-        *ofs += 1;
+        *str += 1;
-        if (*ofs >= PyUnicode_GET_LENGTH(str))
+        if (*str >= end)
            goto unexpected_end_of_string;
        /* Parse the format spec. */
-        format_spec = fstring_parse(str, ofs, recurse_lvl+1, c, n);
+        format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
        if (!format_spec)
            return -1;
    }
-    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
+    if (*str >= end || **str != '}')
-          PyUnicode_READ(kind, data, *ofs) != '}')
        goto unexpected_end_of_string;
    /* We're at a right brace. Consume it. */
-    assert(*ofs < PyUnicode_GET_LENGTH(str));
+    assert(*str < end);
-    assert(PyUnicode_READ(kind, data, *ofs) == '}');
+    assert(**str == '}');
-    *ofs += 1;
+    *str += 1;
-    /* And now create the FormattedValue node that represents this entire
+    /* And now create the FormattedValue node that represents this
-       expression with the conversion and format spec. */
+       entire expression with the conversion and format spec. */
    *expression = FormattedValue(simple_expression, (int)conversion,
                                 format_spec, LINENO(n), n->n_col_offset,
                                 c->c_arena);
@@ -4610,8 +4539,9 @@ unexpected_end_of_string:
      we're finished.
 */
 static int
-fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+fstring_find_literal_and_expr(const char **str, const char *end, int raw,
-                              PyObject **literal, expr_ty *expression,
+                              int recurse_lvl, PyObject **literal,
+                              expr_ty *expression,
                              struct compiling *c, const node *n)
 {
    int result;
@@ -4619,7 +4549,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
    assert(*literal == NULL && *expression == NULL);
    /* Get any literal string. */
-    result = fstring_find_literal(str, ofs, literal, recurse_lvl, c, n);
+    result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
    if (result < 0)
        goto error;
@@ -4629,10 +4559,7 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
        /* We have a literal, but don't look at the expression. */
        return 1;
-    assert(*ofs <= PyUnicode_GET_LENGTH(str));
+    if (*str >= end || **str == '}')
-    if (*ofs >= PyUnicode_GET_LENGTH(str) ||
-        PyUnicode_READ_CHAR(str, *ofs) == '}')
        /* We're at the end of the string or the end of a nested
           f-string: no expression. The top-level error case where we
           expect to be at the end of the string but we're at a '}' is
@@ -4640,10 +4567,9 @@ fstring_find_literal_and_expr(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
        return 0;
    /* We must now be the start of an expression, on a '{'. */
-    assert(*ofs < PyUnicode_GET_LENGTH(str) &&
+    assert(**str == '{');
-           PyUnicode_READ_CHAR(str, *ofs) == '{');
-    if (fstring_find_expr(str, ofs, recurse_lvl, expression, c, n) < 0)
+    if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
        goto error;
    return 0;
@@ -4852,13 +4778,11 @@ FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
    return 0;
 }
-/* Parse an f-string. The f-string is in str, starting at ofs, with no 'f'
+/* Parse an f-string. The f-string is in *str to end, with no
-   or quotes. str is not decref'd, since we don't know if it's used elsewhere.
+   'f' or quotes. */
-   And if we're only looking at a part of a string, then decref'ing is
-   definitely not the right thing to do! */
 static int
-FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
+FstringParser_ConcatFstring(FstringParser *state, const char **str,
-                            Py_ssize_t *ofs, int recurse_lvl,
+                            const char *end, int raw, int recurse_lvl,
                            struct compiling *c, const node *n)
 {
    FstringParser_check_invariants(state);
@@ -4872,7 +4796,7 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
           expression, literal will be NULL. If we're at the end of
           the f-string, expression will be NULL (unless result == 1,
           see below). */
-        int result = fstring_find_literal_and_expr(str, ofs, recurse_lvl,
+        int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
                                                   &literal, &expression,
                                                   c, n);
        if (result < 0)
@@ -4925,16 +4849,14 @@ FstringParser_ConcatFstring(FstringParser *state, PyObject *str,
            return -1;
    }
-    assert(*ofs <= PyUnicode_GET_LENGTH(str));
    /* If recurse_lvl is zero, then we must be at the end of the
       string. Otherwise, we must be at a right brace. */
-    if (recurse_lvl == 0 && *ofs < PyUnicode_GET_LENGTH(str)) {
+    if (recurse_lvl == 0 && *str < end-1) {
        ast_error(c, n, "f-string: unexpected end of string");
        return -1;
    }
-    if (recurse_lvl != 0 && PyUnicode_READ_CHAR(str, *ofs) != '}') {
+    if (recurse_lvl != 0 && **str != '}') {
        ast_error(c, n, "f-string: expecting '}'");
        return -1;
    }
@@ -4991,17 +4913,17 @@ error:
    return NULL;
 }
-/* Given an f-string (with no 'f' or quotes) that's in str starting at
+/* Given an f-string (with no 'f' or quotes) that's in *str and ends
-   ofs, parse it into an expr_ty. Return NULL on error. Does not
+   at end, parse it into an expr_ty.  Return NULL on error.  Adjust
-   decref str. */
+   str to point past the parsed portion. */
 static expr_ty
-fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
+fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
              struct compiling *c, const node *n)
 {
    FstringParser state;
    FstringParser_Init(&state);
-    if (FstringParser_ConcatFstring(&state, str, ofs, recurse_lvl,
+    if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
                                    c, n) < 0) {
        FstringParser_Dealloc(&state);
        return NULL;
@@ -5012,19 +4934,25 @@ fstring_parse(PyObject *str, Py_ssize_t *ofs, int recurse_lvl,
 /* n is a Python string literal, including the bracketing quote
   characters, and r, b, u, &/or f prefixes (if any), and embedded
-   escape sequences (if any). parsestr parses it, and returns the
+   escape sequences (if any). parsestr parses it, and sets *result to
   decoded Python string object.  If the string is an f-string, set
-   *fmode and return the unparsed string object.
+   *fstr and *fstrlen to the unparsed string object.  Return 0 if no
+   errors occurred.
 */
-static PyObject *
+static int
-parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
+parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
+         PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
 {
    size_t len;
    const char *s = STR(n);
    int quote = Py_CHARMASK(*s);
-    int rawmode = 0;
+    int fmode = 0;
+    *bytesmode = 0;
+    *rawmode = 0;
+    *result = NULL;
+    *fstr = NULL;
    if (Py_ISALPHA(quote)) {
-        while (!*bytesmode || !rawmode) {
+        while (!*bytesmode || !*rawmode) {
            if (quote == 'b' || quote == 'B') {
                quote = *++s;
                *bytesmode = 1;
@@ -5034,24 +4962,24 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
            }
            else if (quote == 'r' || quote == 'R') {
                quote = *++s;
-                rawmode = 1;
+                *rawmode = 1;
            }
            else if (quote == 'f' || quote == 'F') {
                quote = *++s;
-                *fmode = 1;
+                fmode = 1;
            }
            else {
                break;
            }
        }
    }
-    if (*fmode && *bytesmode) {
+    if (fmode && *bytesmode) {
        PyErr_BadInternalCall();
-        return NULL;
+        return -1;
    }
    if (quote != '\'' && quote != '\"') {
        PyErr_BadInternalCall();
-        return NULL;
+        return -1;
    }
    /* Skip the leading quote char. */
    s++;
@@ -5059,12 +4987,12 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
    if (len > INT_MAX) {
        PyErr_SetString(PyExc_OverflowError,
                        "string to parse is too long");
-        return NULL;
+        return -1;
    }
    if (s[--len] != quote) {
        /* Last quote char must match the first. */
        PyErr_BadInternalCall();
-        return NULL;
+        return -1;
    }
    if (len >= 4 && s[0] == quote && s[1] == quote) {
        /* A triple quoted string. We've already skipped one quote at
@@ -5075,21 +5003,21 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
        /* And check that the last two match. */
        if (s[--len] != quote || s[--len] != quote) {
            PyErr_BadInternalCall();
-            return NULL;
+            return -1;
        }
    }
-    /* Temporary hack: if this is an f-string, no backslashes are allowed. */
+    if (fmode) {
-    /* See issue 27921. */
+        /* Just return the bytes. The caller will parse the resulting
-    if (*fmode && strchr(s, '\\') != NULL) {
+           string. */
-        /* Syntax error. At a later date fix this so it only checks for
+        *fstr = s;
-           backslashes within the braces. */
+        *fstrlen = len;
-        ast_error(c, n, "backslashes not allowed in f-strings");
+        return 0;
-        return NULL;
    }
+    /* Not an f-string. */
    /* Avoid invoking escape decoding routines if possible. */
-    rawmode = rawmode || strchr(s, '\\') == NULL;
+    *rawmode = *rawmode || strchr(s, '\\') == NULL;
    if (*bytesmode) {
        /* Disallow non-ASCII characters. */
        const char *ch;
@@ -5097,19 +5025,20 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
            if (Py_CHARMASK(*ch) >= 0x80) {
                ast_error(c, n, "bytes can only contain ASCII "
                          "literal characters.");
-                return NULL;
+                return -1;
            }
        }
-        if (rawmode)
+        if (*rawmode)
-            return PyBytes_FromStringAndSize(s, len);
+            *result = PyBytes_FromStringAndSize(s, len);
        else
-            return PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
+            *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL);
    } else {
-        if (rawmode)
+        if (*rawmode)
-            return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
+            *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
        else
-            return decode_unicode_with_escapes(c, s, len);
+            *result = decode_unicode_with_escapes(c, s, len);
    }
+    return *result == NULL ? -1 : 0;
 }
 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
@@ -5131,13 +5060,15 @@ parsestrplus(struct compiling *c, const node *n)
    FstringParser_Init(&state);
    for (i = 0; i < NCH(n); i++) {
-        int this_bytesmode = 0;
+        int this_bytesmode;
-        int this_fmode = 0;
+        int this_rawmode;
        PyObject *s;
+        const char *fstr;
+        Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
        REQ(CHILD(n, i), STRING);
-        s = parsestr(c, CHILD(n, i), &this_bytesmode, &this_fmode);
+        if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
-        if (!s)
+                     &fstr, &fstrlen) != 0)
            goto error;
        /* Check that we're not mixing bytes with unicode. */
@@ -5148,8 +5079,20 @@ parsestrplus(struct compiling *c, const node *n)
        }
        bytesmode = this_bytesmode;
-        assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
+        if (fstr != NULL) {
+            int result;
+            assert(s == NULL && !bytesmode);
+            /* This is an f-string. Parse and concatenate it. */
+            result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
+                                                 this_rawmode, 0, c, n);
+            if (result < 0)
+                goto error;
+        } else {
+            assert(bytesmode ? PyBytes_CheckExact(s) :
+                   PyUnicode_CheckExact(s));
+            /* A string or byte string. */
+            assert(s != NULL && fstr == NULL);
            if (bytesmode) {
                /* For bytes, concat as we go. */
                if (i == 0) {
@@ -5160,19 +5103,14 @@ parsestrplus(struct compiling *c, const node *n)
                    if (!bytes_str)
                        goto error;
                }
-        } else if (this_fmode) {
-            /* This is an f-string. Concatenate and decref it. */
-            Py_ssize_t ofs = 0;
-            int result = FstringParser_ConcatFstring(&state, s, &ofs, 0, c, n);
-            Py_DECREF(s);
-            if (result < 0)
-                goto error;
            } else {
+                assert(s != NULL && fstr == NULL);
                /* This is a regular string. Concatenate it. */
                if (FstringParser_ConcatAndDel(&state, s) < 0)
                    goto error;
            }
        }
+    }
    if (bytesmode) {
        /* Just return the bytes object and we're done. */
        if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)