bpo-29104: Fixed parsing backslashes in f-strings. (#490)

0cd7a3f1 · Serhiy Storchaka · GitHub · d1c3c13f · 0cd7a3f1 · 0cd7a3f1
Commit 0cd7a3f1 authored May 25, 2017 by Serhiy Storchaka Committed by GitHub May 25, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 21 deletions

Lib/test/test_fstring.py Lib/test/test_fstring.py +14 -0

Misc/NEWS Misc/NEWS +2 -0

Python/ast.c Python/ast.c +32 -21

No files found.
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -361,6 +361,20 @@ f'{a * x()}'"""
        self.assertEqual(f'2\x203', '2 3')
        self.assertEqual(f'\x203', ' 3')
+        with self.assertWarns(DeprecationWarning):  # invalid escape sequence
+            value = eval(r"f'\{6*7}'")
+        self.assertEqual(value, '\\42')
+        self.assertEqual(f'\\{6*7}', '\\42')
+        self.assertEqual(fr'\{6*7}', '\\42')
+        AMPERSAND = 'spam'
+        # Get the right unicode character (&), or pick up local variable
+        # depending on the number of backslashes.
+        self.assertEqual(f'\N{AMPERSAND}', '&')
+        self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
    def test_misformed_unicode_character_name(self):
        # These test are needed because unicode names are parsed
        # differently inside f-strings.

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
 Core and Builtins
 -----------------
+- bpo-29104: Fixed parsing backslashes in f-strings.
 - bpo-27945: Fixed various segfaults with dict when input collections are
  mutated during searching, inserting or comparing.  Based on patches by
  Duane Griffin and Tim Mitchell.

--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
    while (s < end) {
        if (*s == '\\') {
            *p++ = *s++;
-            if (*s & 0x80) {
+            if (s >= end || *s & 0x80) {
                strcpy(p, "u005c");
                p += 5;
+                if (s >= end)
+                    break;
            }
        }
        if (*s & 0x80) { /* XXX inefficient */
@@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
       brace (which isn't part of a unicode name escape such as
       "\N{EULER CONSTANT}"), or the end of the string. */
-    const char *literal_start = *str;
+    const char *s = *str;
-    const char *literal_end;
+    const char *literal_start = s;
-    int in_named_escape = 0;
    int result = 0;
    assert(*literal == NULL);
-    for (; *str < end; (*str)++) {
+    while (s < end) {
-        char ch = **str;
+        char ch = *s++;
-        if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
+        if (!raw && ch == '\\' && s < end) {
-            *(*str-2) == '\\' && *(*str-1) == 'N') {
+            ch = *s++;
-            in_named_escape = 1;
+            if (ch == 'N') {
-        } else if (in_named_escape && ch == '}') {
+                if (s < end && *s++ == '{') {
-            in_named_escape = 0;
+                    while (s < end && *s++ != '}') {
-        } else if (ch == '{' || ch == '}') {
+                    }
+                    continue;
+                }
+                break;
+            }
+            if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+                return -1;
+            }
+        }
+        if (ch == '{' || ch == '}') {
            /* Check for doubled braces, but only at the top level. If
               we checked at every level, then f'{0:{3}}' would fail
               with the two closing braces. */
            if (recurse_lvl == 0) {
-                if (*str+1 < end && *(*str+1) == ch) {
+                if (s < end && *s == ch) {
                    /* We're going to tell the caller that the literal ends
                       here, but that they should continue scanning. But also
                       skip over the second brace when we resume scanning. */
-                    literal_end = *str+1;
+                    *str = s + 1;
-                    *str += 2;
                    result = 1;
                    goto done;
                }
@@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
                /* Where a single '{' is the start of a new expression, a
                   single '}' is not allowed. */
                if (ch == '}') {
+                    *str = s - 1;
                    ast_error(c, n, "f-string: single '}' is not allowed");
                    return -1;
                }
@@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
            /* We're either at a '{', which means we're starting another
               expression; or a '}', which means we're at the end of this
               f-string (for a nested format_spec). */
+            s--;
            break;
        }
    }
-    literal_end = *str;
+    *str = s;
-    assert(*str <= end);
+    assert(s <= end);
-    assert(*str == end || **str == '{' || **str == '}');
+    assert(s == end || *s == '{' || *s == '}');
 done:
-    if (literal_start != literal_end) {
+    if (literal_start != s) {
        if (raw)
            *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
-                                                    literal_end-literal_start,
+                                                    s - literal_start,
                                                    NULL, NULL);
        else
            *literal = decode_unicode_with_escapes(c, n, literal_start,
-                                                   literal_end-literal_start);
+                                                   s - literal_start);
        if (!*literal)
            return -1;
    }