Commit 5703194d authored by Stefan Behnel's avatar Stefan Behnel

Prevent character escape sequences from being resolved in raw f-strings...

Prevent character escape sequences from being resolved in raw f-strings (fr"..."). Also fix some error reporting issues along the way.
Update test_fstring.py test file from Py3.7.
parent e77528ab
...@@ -80,6 +80,9 @@ Bugs fixed ...@@ -80,6 +80,9 @@ Bugs fixed
* Compile time evaluations of (partially) constant f-strings could show incorrect * Compile time evaluations of (partially) constant f-strings could show incorrect
results. results.
* Escape sequences in raw f-strings (``fr'...'``) were resolved instead of passing
them through as expected.
* Some ref-counting issues in buffer error handling have been resolved. * Some ref-counting issues in buffer error handling have been resolved.
Other changes Other changes
......
...@@ -11,8 +11,8 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object, ...@@ -11,8 +11,8 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
bytes_literal=object, StringEncoding=object, bytes_literal=object, StringEncoding=object,
FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object, FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object,
Future=object, Options=object, error=object, warning=object, Future=object, Options=object, error=object, warning=object,
Builtin=object, ModuleNode=object, Utils=object, Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
re=object, sys=object, _parse_escape_sequences=object, _unicode=object, _bytes=object, re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint) partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint)
from io import StringIO from io import StringIO
...@@ -1013,22 +1013,25 @@ def _append_escape_sequence(kind, builder, escape_sequence, s): ...@@ -1013,22 +1013,25 @@ def _append_escape_sequence(kind, builder, escape_sequence, s):
builder.append(escape_sequence) builder.append(escape_sequence)
_parse_escape_sequences = re.compile( _parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
# escape sequences: # escape sequences:
br'(\\(?:' br'(\\(?:' +
br'[\\abfnrtv"\'{]|' (br'\\?' if is_raw else (
br'[0-7]{2,3}|' br'[\\abfnrtv"\'{]|'
br'N\{[^}]*\}|' br'[0-7]{2,3}|'
br'x[0-9a-fA-F]{2}|' br'N\{[^}]*\}|'
br'u[0-9a-fA-F]{4}|' br'x[0-9a-fA-F]{2}|'
br'U[0-9a-fA-F]{8}|' br'u[0-9a-fA-F]{4}|'
br'[NuU]|' # detect invalid escape sequences that do not match above br'U[0-9a-fA-F]{8}|'
br'[NxuU]|' # detect invalid escape sequences that do not match above
)) +
br')?|' br')?|'
# non-escape sequences: # non-escape sequences:
br'\{\{?|' br'\{\{?|'
br'\}\}?|' br'\}\}?|'
br'[^\\{}]+)'.decode('us-ascii') br'[^\\{}]+)'
).match ).decode('us-ascii')).match
for is_raw in (True, False)]
def p_f_string(s, unicode_value, pos, is_raw): def p_f_string(s, unicode_value, pos, is_raw):
...@@ -1038,13 +1041,15 @@ def p_f_string(s, unicode_value, pos, is_raw): ...@@ -1038,13 +1041,15 @@ def p_f_string(s, unicode_value, pos, is_raw):
next_start = 0 next_start = 0
size = len(unicode_value) size = len(unicode_value)
builder = StringEncoding.UnicodeLiteralBuilder() builder = StringEncoding.UnicodeLiteralBuilder()
error_pos = list(pos) # [src, line, column]
_parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
while next_start < size: while next_start < size:
end = next_start end = next_start
match = _parse_escape_sequences(unicode_value, next_start) error_pos[2] = pos[2] + end # FIXME: handle newlines in string
match = _parse_seq(unicode_value, next_start)
if match is None: if match is None:
error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string error(tuple(error_pos), "Invalid escape sequence")
error(error_pos, "Invalid escape sequence")
next_start = match.end() next_start = match.end()
part = match.group() part = match.group()
...@@ -1068,8 +1073,7 @@ def p_f_string(s, unicode_value, pos, is_raw): ...@@ -1068,8 +1073,7 @@ def p_f_string(s, unicode_value, pos, is_raw):
if part == '}}': if part == '}}':
builder.append('}') builder.append('}')
else: else:
error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
s.error("f-string: single '}' is not allowed", pos=error_pos)
else: else:
builder.append(part) builder.append(part)
...@@ -1134,12 +1138,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1134,12 +1138,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...) expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip(): if not expr_str.strip():
error(pos, "empty expression not allowed in f-string") error(expr_pos, "empty expression not allowed in f-string")
if terminal_char == '!': if terminal_char == '!':
i += 1 i += 1
if i + 2 > size: if i + 2 > size:
error(pos, "invalid conversion char at end of string") error(expr_pos, "invalid conversion char at end of string")
else: else:
conversion_char = unicode_value[i] conversion_char = unicode_value[i]
i += 1 i += 1
...@@ -1152,7 +1156,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1152,7 +1156,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
start_format_spec = i + 1 start_format_spec = i + 1
while True: while True:
if i >= size: if i >= size:
s.error("missing '}' in format specifier") s.error("missing '}' in format specifier", pos=expr_pos)
c = unicode_value[i] c = unicode_value[i]
if not in_triple_quotes and not in_string: if not in_triple_quotes and not in_string:
if c == '{': if c == '{':
......
...@@ -35,9 +35,10 @@ class TestCase(CythonTest): ...@@ -35,9 +35,10 @@ class TestCase(CythonTest):
if exception_type is SyntaxError: if exception_type is SyntaxError:
try: try:
self.fragment(str) self.fragment(str)
assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %s" % str
except CompileError: except CompileError:
assert True assert True
else:
assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %r" % str
finally: finally:
release_errors(ignore=True) release_errors(ignore=True)
else: else:
...@@ -46,7 +47,7 @@ class TestCase(CythonTest): ...@@ -46,7 +47,7 @@ class TestCase(CythonTest):
except exception_type: except exception_type:
assert True assert True
else: else:
assert False, "Invalid Cython code failed to raise %s: %s" % (exception_type, str) assert False, "Invalid Cython code failed to raise %s: %r" % (exception_type, str)
finally: finally:
if error_stack: if error_stack:
release_errors(ignore=True) release_errors(ignore=True)
...@@ -141,18 +142,9 @@ f'{a * x()}'""" ...@@ -141,18 +142,9 @@ f'{a * x()}'"""
self.assertTrue(g.__doc__ is None) self.assertTrue(g.__doc__ is None)
def __test_literal_eval(self): def __test_literal_eval(self):
# With no expressions, an f-string is okay.
self.assertEqual(ast.literal_eval("f'x'"), 'x')
self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy')
# But this should raise an error.
with self.assertRaisesRegex(ValueError, 'malformed node or string'): with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'x'") ast.literal_eval("f'x'")
# As should this, which uses a different ast node
with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'{3}'")
def __test_ast_compile_time_concat(self): def __test_ast_compile_time_concat(self):
x = [''] x = ['']
...@@ -354,6 +346,10 @@ f'{a * x()}'""" ...@@ -354,6 +346,10 @@ f'{a * x()}'"""
"f'{10:{ }}'", "f'{10:{ }}'",
"f' { } '", "f' { } '",
# The Python parser ignores also the following
# whitespace characters in additional to a space.
"f'''{\t\f\r\n}'''",
# Catch the empty expression before the # Catch the empty expression before the
# invalid conversion. # invalid conversion.
"f'{!x}'", "f'{!x}'",
...@@ -374,6 +370,12 @@ f'{a * x()}'""" ...@@ -374,6 +370,12 @@ f'{a * x()}'"""
"f'{:x'", "f'{:x'",
]) ])
# Different error message is raised for other whitespace characters.
self.assertAllRaise(SyntaxError, 'invalid character in identifier',
["f'''{\xa0}'''",
#"\xa0",
])
def test_parens_in_expressions(self): def test_parens_in_expressions(self):
self.assertEqual(f'{3,}', '(3,)') self.assertEqual(f'{3,}', '(3,)')
...@@ -435,6 +437,20 @@ f'{a * x()}'""" ...@@ -435,6 +437,20 @@ f'{a * x()}'"""
self.assertEqual(f'2\x203', '2 3') self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3') self.assertEqual(f'\x203', ' 3')
#with self.assertWarns(DeprecationWarning): # invalid escape sequence
# value = cy_eval(r"f'\{6*7}'")
#self.assertEqual(value, '\\42')
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')
AMPERSAND = 'spam'
# Get the right unicode character (&), or pick up local variable
# depending on the number of backslashes.
self.assertEqual(f'\N{AMPERSAND}', '&')
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
def test_misformed_unicode_character_name(self): def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed # These test are needed because unicode names are parsed
# differently inside f-strings. # differently inside f-strings.
...@@ -808,7 +824,8 @@ f'{a * x()}'""" ...@@ -808,7 +824,8 @@ f'{a * x()}'"""
def test_errors(self): def test_errors(self):
# see issue 26287 # see issue 26287
self.assertAllRaise((TypeError, ValueError), 'non-empty', # TypeError in Py3.4+ exc = ValueError if sys.version_info < (3, 4) else TypeError
self.assertAllRaise(exc, 'unsupported',
[r"f'{(lambda: 0):x}'", [r"f'{(lambda: 0):x}'",
r"f'{(0,):x}'", r"f'{(0,):x}'",
]) ])
...@@ -832,6 +849,11 @@ f'{a * x()}'""" ...@@ -832,6 +849,11 @@ f'{a * x()}'"""
self.assertEqual(f'{d["foo"]}', 'bar') self.assertEqual(f'{d["foo"]}', 'bar')
self.assertEqual(f"{d['foo']}", 'bar') self.assertEqual(f"{d['foo']}", 'bar')
def __test_backslash_char(self):
# Check eval of a backslash followed by a control char.
# See bpo-30682: this used to raise an assert in pydebug mode.
self.assertEqual(cy_eval('f"\\\n"'), '')
self.assertEqual(cy_eval('f"\\\r"'), '')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment