Commit 0cd7a3f1 authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-29104: Fixed parsing backslashes in f-strings. (#490)

parent d1c3c13f
...@@ -361,6 +361,20 @@ f'{a * x()}'""" ...@@ -361,6 +361,20 @@ f'{a * x()}'"""
self.assertEqual(f'2\x203', '2 3') self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3') self.assertEqual(f'\x203', ' 3')
with self.assertWarns(DeprecationWarning): # invalid escape sequence
value = eval(r"f'\{6*7}'")
self.assertEqual(value, '\\42')
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')
AMPERSAND = 'spam'
# Get the right unicode character (&), or pick up local variable
# depending on the number of backslashes.
self.assertEqual(f'\N{AMPERSAND}', '&')
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
def test_misformed_unicode_character_name(self): def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed # These test are needed because unicode names are parsed
# differently inside f-strings. # differently inside f-strings.
......
...@@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1? ...@@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- bpo-29104: Fixed parsing backslashes in f-strings.
- bpo-27945: Fixed various segfaults with dict when input collections are - bpo-27945: Fixed various segfaults with dict when input collections are
mutated during searching, inserting or comparing. Based on patches by mutated during searching, inserting or comparing. Based on patches by
Duane Griffin and Tim Mitchell. Duane Griffin and Tim Mitchell.
......
...@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, ...@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
while (s < end) { while (s < end) {
if (*s == '\\') { if (*s == '\\') {
*p++ = *s++; *p++ = *s++;
if (*s & 0x80) { if (s >= end || *s & 0x80) {
strcpy(p, "u005c"); strcpy(p, "u005c");
p += 5; p += 5;
if (s >= end)
break;
} }
} }
if (*s & 0x80) { /* XXX inefficient */ if (*s & 0x80) { /* XXX inefficient */
...@@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw, ...@@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
brace (which isn't part of a unicode name escape such as brace (which isn't part of a unicode name escape such as
"\N{EULER CONSTANT}"), or the end of the string. */ "\N{EULER CONSTANT}"), or the end of the string. */
const char *literal_start = *str; const char *s = *str;
const char *literal_end; const char *literal_start = s;
int in_named_escape = 0;
int result = 0; int result = 0;
assert(*literal == NULL); assert(*literal == NULL);
for (; *str < end; (*str)++) { while (s < end) {
char ch = **str; char ch = *s++;
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 && if (!raw && ch == '\\' && s < end) {
*(*str-2) == '\\' && *(*str-1) == 'N') { ch = *s++;
in_named_escape = 1; if (ch == 'N') {
} else if (in_named_escape && ch == '}') { if (s < end && *s++ == '{') {
in_named_escape = 0; while (s < end && *s++ != '}') {
} else if (ch == '{' || ch == '}') { }
continue;
}
break;
}
if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
return -1;
}
}
if (ch == '{' || ch == '}') {
/* Check for doubled braces, but only at the top level. If /* Check for doubled braces, but only at the top level. If
we checked at every level, then f'{0:{3}}' would fail we checked at every level, then f'{0:{3}}' would fail
with the two closing braces. */ with the two closing braces. */
if (recurse_lvl == 0) { if (recurse_lvl == 0) {
if (*str+1 < end && *(*str+1) == ch) { if (s < end && *s == ch) {
/* We're going to tell the caller that the literal ends /* We're going to tell the caller that the literal ends
here, but that they should continue scanning. But also here, but that they should continue scanning. But also
skip over the second brace when we resume scanning. */ skip over the second brace when we resume scanning. */
literal_end = *str+1; *str = s + 1;
*str += 2;
result = 1; result = 1;
goto done; goto done;
} }
...@@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw, ...@@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
/* Where a single '{' is the start of a new expression, a /* Where a single '{' is the start of a new expression, a
single '}' is not allowed. */ single '}' is not allowed. */
if (ch == '}') { if (ch == '}') {
*str = s - 1;
ast_error(c, n, "f-string: single '}' is not allowed"); ast_error(c, n, "f-string: single '}' is not allowed");
return -1; return -1;
} }
...@@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw, ...@@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
/* We're either at a '{', which means we're starting another /* We're either at a '{', which means we're starting another
expression; or a '}', which means we're at the end of this expression; or a '}', which means we're at the end of this
f-string (for a nested format_spec). */ f-string (for a nested format_spec). */
s--;
break; break;
} }
} }
literal_end = *str; *str = s;
assert(*str <= end); assert(s <= end);
assert(*str == end || **str == '{' || **str == '}'); assert(s == end || *s == '{' || *s == '}');
done: done:
if (literal_start != literal_end) { if (literal_start != s) {
if (raw) if (raw)
*literal = PyUnicode_DecodeUTF8Stateful(literal_start, *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
literal_end-literal_start, s - literal_start,
NULL, NULL); NULL, NULL);
else else
*literal = decode_unicode_with_escapes(c, n, literal_start, *literal = decode_unicode_with_escapes(c, n, literal_start,
literal_end-literal_start); s - literal_start);
if (!*literal) if (!*literal)
return -1; return -1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment