Commit 5703194d authored by Stefan Behnel's avatar Stefan Behnel

Prevent character escape sequences from being resolved in raw f-strings...

Prevent character escape sequences from being resolved in raw f-strings (fr"..."). Also fix some error reporting issues along the way.
Update test_fstring.py test file from Py3.7.
parent e77528ab
......@@ -80,6 +80,9 @@ Bugs fixed
* Compile time evaluations of (partially) constant f-strings could show incorrect
results.
* Escape sequences in raw f-strings (``fr'...'``) were resolved instead of passing
them through as expected.
* Some ref-counting issues in buffer error handling have been resolved.
Other changes
......
......@@ -11,8 +11,8 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
bytes_literal=object, StringEncoding=object,
FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object,
Future=object, Options=object, error=object, warning=object,
Builtin=object, ModuleNode=object, Utils=object,
re=object, sys=object, _parse_escape_sequences=object, _unicode=object, _bytes=object,
Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint)
from io import StringIO
......@@ -1013,22 +1013,25 @@ def _append_escape_sequence(kind, builder, escape_sequence, s):
builder.append(escape_sequence)
_parse_escape_sequences = re.compile(
_parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
# escape sequences:
br'(\\(?:'
br'[\\abfnrtv"\'{]|'
br'[0-7]{2,3}|'
br'N\{[^}]*\}|'
br'x[0-9a-fA-F]{2}|'
br'u[0-9a-fA-F]{4}|'
br'U[0-9a-fA-F]{8}|'
br'[NuU]|' # detect invalid escape sequences that do not match above
br'(\\(?:' +
(br'\\?' if is_raw else (
br'[\\abfnrtv"\'{]|'
br'[0-7]{2,3}|'
br'N\{[^}]*\}|'
br'x[0-9a-fA-F]{2}|'
br'u[0-9a-fA-F]{4}|'
br'U[0-9a-fA-F]{8}|'
br'[NxuU]|' # detect invalid escape sequences that do not match above
)) +
br')?|'
# non-escape sequences:
br'\{\{?|'
br'\}\}?|'
br'[^\\{}]+)'.decode('us-ascii')
).match
br'[^\\{}]+)'
).decode('us-ascii')).match
for is_raw in (True, False)]
def p_f_string(s, unicode_value, pos, is_raw):
......@@ -1038,13 +1041,15 @@ def p_f_string(s, unicode_value, pos, is_raw):
next_start = 0
size = len(unicode_value)
builder = StringEncoding.UnicodeLiteralBuilder()
error_pos = list(pos) # [src, line, column]
_parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
while next_start < size:
end = next_start
match = _parse_escape_sequences(unicode_value, next_start)
error_pos[2] = pos[2] + end # FIXME: handle newlines in string
match = _parse_seq(unicode_value, next_start)
if match is None:
error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string
error(error_pos, "Invalid escape sequence")
error(tuple(error_pos), "Invalid escape sequence")
next_start = match.end()
part = match.group()
......@@ -1068,8 +1073,7 @@ def p_f_string(s, unicode_value, pos, is_raw):
if part == '}}':
builder.append('}')
else:
error_pos = (pos[0], pos[1] + end, pos[2]) # FIXME: handle newlines in string
s.error("f-string: single '}' is not allowed", pos=error_pos)
s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
else:
builder.append(part)
......@@ -1134,12 +1138,12 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip():
error(pos, "empty expression not allowed in f-string")
error(expr_pos, "empty expression not allowed in f-string")
if terminal_char == '!':
i += 1
if i + 2 > size:
error(pos, "invalid conversion char at end of string")
error(expr_pos, "invalid conversion char at end of string")
else:
conversion_char = unicode_value[i]
i += 1
......@@ -1152,7 +1156,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
start_format_spec = i + 1
while True:
if i >= size:
s.error("missing '}' in format specifier")
s.error("missing '}' in format specifier", pos=expr_pos)
c = unicode_value[i]
if not in_triple_quotes and not in_string:
if c == '{':
......
......@@ -35,9 +35,10 @@ class TestCase(CythonTest):
if exception_type is SyntaxError:
try:
self.fragment(str)
assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %s" % str
except CompileError:
assert True
else:
assert held_errors(), "Invalid Cython code failed to raise SyntaxError: %r" % str
finally:
release_errors(ignore=True)
else:
......@@ -46,7 +47,7 @@ class TestCase(CythonTest):
except exception_type:
assert True
else:
assert False, "Invalid Cython code failed to raise %s: %s" % (exception_type, str)
assert False, "Invalid Cython code failed to raise %s: %r" % (exception_type, str)
finally:
if error_stack:
release_errors(ignore=True)
......@@ -141,18 +142,9 @@ f'{a * x()}'"""
self.assertTrue(g.__doc__ is None)
def __test_literal_eval(self):
# With no expressions, an f-string is okay.
self.assertEqual(ast.literal_eval("f'x'"), 'x')
self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy')
# But this should raise an error.
with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'x'")
# As should this, which uses a different ast node
with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'{3}'")
def __test_ast_compile_time_concat(self):
x = ['']
......@@ -354,6 +346,10 @@ f'{a * x()}'"""
"f'{10:{ }}'",
"f' { } '",
# The Python parser ignores also the following
# whitespace characters in additional to a space.
"f'''{\t\f\r\n}'''",
# Catch the empty expression before the
# invalid conversion.
"f'{!x}'",
......@@ -374,6 +370,12 @@ f'{a * x()}'"""
"f'{:x'",
])
# Different error message is raised for other whitespace characters.
self.assertAllRaise(SyntaxError, 'invalid character in identifier',
["f'''{\xa0}'''",
#"\xa0",
])
def test_parens_in_expressions(self):
self.assertEqual(f'{3,}', '(3,)')
......@@ -435,6 +437,20 @@ f'{a * x()}'"""
self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3')
#with self.assertWarns(DeprecationWarning): # invalid escape sequence
# value = cy_eval(r"f'\{6*7}'")
#self.assertEqual(value, '\\42')
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')
AMPERSAND = 'spam'
# Get the right unicode character (&), or pick up local variable
# depending on the number of backslashes.
self.assertEqual(f'\N{AMPERSAND}', '&')
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed
# differently inside f-strings.
......@@ -808,7 +824,8 @@ f'{a * x()}'"""
def test_errors(self):
# see issue 26287
self.assertAllRaise((TypeError, ValueError), 'non-empty', # TypeError in Py3.4+
exc = ValueError if sys.version_info < (3, 4) else TypeError
self.assertAllRaise(exc, 'unsupported',
[r"f'{(lambda: 0):x}'",
r"f'{(0,):x}'",
])
......@@ -832,6 +849,11 @@ f'{a * x()}'"""
self.assertEqual(f'{d["foo"]}', 'bar')
self.assertEqual(f"{d['foo']}", 'bar')
def __test_backslash_char(self):
# Check eval of a backslash followed by a control char.
# See bpo-30682: this used to raise an assert in pydebug mode.
self.assertEqual(cy_eval('f"\\\n"'), '')
self.assertEqual(cy_eval('f"\\\r"'), '')
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment