Commit 743365a5 authored by Stefan Behnel's avatar Stefan Behnel

Correct the positions reported for errors in f-strings.

Closes https://github.com/cython/cython/issues/3674
parent 4728b8a0
......@@ -33,6 +33,9 @@ Bugs fixed
* The C++ ``typeid()`` function was allowed in C mode.
Patch by Celelibi. (Github issue #3637)
* The error position reported for errors found in f-strings was misleading.
(Github issue #3674)
* The new ``c_api_binop_methods`` directive was added for forward compatibility, but can
only be set to True (the current default value). It can be disabled in Cython 3.0.
......
......@@ -69,7 +69,8 @@ cdef bint check_for_non_ascii_characters(unicode string)
@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
cdef p_string_literal(PyrexScanner s, kind_override=*)
cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4)
cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)
......
......@@ -882,6 +882,7 @@ def p_string_literal(s, kind_override=None):
pos = s.position()
is_python3_source = s.context.language_level >= 3
has_non_ascii_literal_characters = False
string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
kind_string = s.systring.rstrip('"\'').lower()
if len(kind_string) > 1:
if len(set(kind_string)) != len(kind_string):
......@@ -965,7 +966,7 @@ def p_string_literal(s, kind_override=None):
s.error("bytes can only contain ASCII literal characters.", pos=pos)
bytes_value = None
if kind == 'f':
unicode_value = p_f_string(s, unicode_value, pos, is_raw='r' in kind_string)
unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
s.next()
return (kind, bytes_value, unicode_value)
......@@ -1037,6 +1038,10 @@ _parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
for is_raw in (True, False)]
def _f_string_error_pos(pos, string, i):
return (pos[0], pos[1], pos[2] + i + 1) # FIXME: handle newlines in string
def p_f_string(s, unicode_value, pos, is_raw):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes.
......@@ -1044,15 +1049,13 @@ def p_f_string(s, unicode_value, pos, is_raw):
next_start = 0
size = len(unicode_value)
builder = StringEncoding.UnicodeLiteralBuilder()
error_pos = list(pos) # [src, line, column]
_parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
while next_start < size:
end = next_start
error_pos[2] = pos[2] + end # FIXME: handle newlines in string
match = _parse_seq(unicode_value, next_start)
if match is None:
error(tuple(error_pos), "Invalid escape sequence")
error(_f_string_error_pos(pos, unicode_value, next_start), "Invalid escape sequence")
next_start = match.end()
part = match.group()
......@@ -1076,7 +1079,8 @@ def p_f_string(s, unicode_value, pos, is_raw):
if part == '}}':
builder.append('}')
else:
s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
error(_f_string_error_pos(pos, unicode_value, end),
"f-string: single '}' is not allowed")
else:
builder.append(part)
......@@ -1097,16 +1101,20 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
nested_depth = 0
quote_char = NO_CHAR
in_triple_quotes = False
backslash_reported = False
while True:
if i >= size:
s.error("missing '}' in format string expression")
break # error will be reported below
c = unicode_value[i]
if quote_char != NO_CHAR:
if c == '\\':
error_pos = (pos[0], pos[1] + i, pos[2]) # FIXME: handle newlines in string
error(error_pos, "backslashes not allowed in f-strings")
# avoid redundant error reports along '\' sequences
if not backslash_reported:
error(_f_string_error_pos(pos, unicode_value, i),
"backslashes not allowed in f-strings")
backslash_reported = True
elif c == quote_char:
if in_triple_quotes:
if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
......@@ -1125,7 +1133,8 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
elif nested_depth != 0 and c in '}])':
nested_depth -= 1
elif c == '#':
s.error("format string cannot include #")
error(_f_string_error_pos(pos, unicode_value, i),
"format string cannot include #")
elif nested_depth == 0 and c in '!:}':
# allow != as a special case
if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
......@@ -1141,12 +1150,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip():
error(expr_pos, "empty expression not allowed in f-string")
error(_f_string_error_pos(pos, unicode_value, starting_index),
"empty expression not allowed in f-string")
if terminal_char == '!':
i += 1
if i + 2 > size:
error(expr_pos, "invalid conversion char at end of string")
pass # error will be reported below
else:
conversion_char = unicode_value[i]
i += 1
......@@ -1159,7 +1169,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
start_format_spec = i + 1
while True:
if i >= size:
s.error("missing '}' in format specifier", pos=expr_pos)
break # error will be reported below
c = unicode_value[i]
if not in_triple_quotes and not in_string:
if c == '{':
......@@ -1181,7 +1191,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
format_spec_str = unicode_value[start_format_spec:i]
if terminal_char != '}':
s.error("missing '}' in format string expression', found '%s'" % terminal_char)
error(_f_string_error_pos(pos, unicode_value, i),
"missing '}' in format string expression" + (
", found '%s'" % terminal_char if terminal_char else ""))
# parse the expression as if it was surrounded by parentheses
buf = StringIO('(%s)' % expr_str)
......@@ -1190,7 +1202,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
# validate the conversion char
if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
error(pos, "invalid conversion character '%s'" % conversion_char)
error(expr_pos, "invalid conversion character '%s'" % conversion_char)
# the format spec is itself treated like an f-string
if format_spec_str:
......
# mode: error
# tag: fstring
def incorrect_fstrings(x):
return [
f"{x}{'\\'}'{x+1}",
f"""{}""",
f"{}",
f"{x!}",
f"{",
f"{{}}}",
]
_ERRORS = """
6:16: backslashes not allowed in f-strings
7:14: empty expression not allowed in f-string
8:12: empty expression not allowed in f-string
9:14: missing '}' in format string expression, found '!'
10:12: empty expression not allowed in f-string
10:12: missing '}' in format string expression
11:15: f-string: single '}' is not allowed
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment