Commit fe3a65f7 authored by Robert Bradshaw's avatar Robert Bradshaw

F strings (PEP 498)

parents 37534635 b9c66f41
...@@ -2954,6 +2954,111 @@ class RawCNameExprNode(ExprNode): ...@@ -2954,6 +2954,111 @@ class RawCNameExprNode(ExprNode):
pass pass
#-------------------------------------------------------------------
#
# F-strings
#
#-------------------------------------------------------------------
class JoinedStrNode(ExprNode):
# F-strings
#
# values [UnicodeNode|FormattedValueNode] Substrings of the f-string
#
type = unicode_type
is_temp = True
subexprs = ['values']
def analyse_types(self, env):
self.values = [v.analyse_types(env).coerce_to_pyobject(env) for v in self.values]
return self
def generate_evaluation_code(self, code):
code.mark_pos(self.pos)
num_items = len(self.values)
list_var = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln('%s = PyList_New(%s); %s' % (
list_var,
num_items,
code.error_goto_if_null(list_var, self.pos)))
code.put_gotref(list_var)
for i, node in enumerate(self.values):
node.generate_evaluation_code(code)
node.make_owned_reference(code)
code.put_giveref(node.py_result())
code.putln('PyList_SET_ITEM(%s, %s, %s);' % (list_var, i, node.py_result()))
node.generate_post_assignment_code(code)
node.free_temps(code)
code.mark_pos(self.pos)
self.allocate_temp_result(code)
code.putln('%s = PyUnicode_Join(%s, %s); %s' % (
self.result(),
Naming.empty_unicode,
list_var,
code.error_goto_if_null(self.py_result(), self.pos)))
code.put_gotref(self.py_result())
code.put_decref_clear(list_var, py_object_type)
code.funcstate.release_temp(list_var)
class FormattedValueNode(ExprNode):
# {}-delimited portions of an f-string
#
# value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__
subexprs = ['value', 'format_spec']
type = py_object_type
is_temp = True
find_conversion_func = {
's': 'PyObject_Str',
'r': 'PyObject_Repr',
'a': 'PyObject_ASCII', # NOTE: Py3-only!
}.get
def analyse_types(self, env):
self.value = self.value.analyse_types(env).coerce_to_pyobject(env)
if self.format_spec:
self.format_spec = self.format_spec.analyse_types(env).coerce_to_pyobject(env)
return self
def generate_result_code(self, code):
value_result = self.value.py_result()
if self.format_spec:
format_func = '__Pyx_PyObject_Format'
format_spec = self.format_spec.py_result()
else:
# common case: expect simple Unicode pass-through if no format spec
format_func = '__Pyx_PyObject_FormatSimple'
format_spec = Naming.empty_unicode
if self.conversion_char:
fn = self.find_conversion_func(self.conversion_char)
assert fn is not None, "invalid conversion character found: '%s'" % self.conversion_char
value_result = '%s(%s)' % (fn, value_result)
code.globalstate.use_utility_code(UtilityCode.load_cached("PyObjectFormatAndDecref", "StringTools.c"))
format_func += 'AndDecref'
elif not self.format_spec:
code.globalstate.use_utility_code(UtilityCode.load_cached("PyObjectFormatSimple", "StringTools.c"))
else:
format_func = 'PyObject_Format'
code.putln("%s = %s(%s, %s); %s" % (
self.result(),
format_func,
value_result,
format_spec,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
#------------------------------------------------------------------- #-------------------------------------------------------------------
# #
# Parallel nodes (cython.parallel.thread(savailable|id)) # Parallel nodes (cython.parallel.thread(savailable|id))
......
...@@ -7,7 +7,7 @@ from __future__ import absolute_import ...@@ -7,7 +7,7 @@ from __future__ import absolute_import
raw_prefixes = "rR" raw_prefixes = "rR"
bytes_prefixes = "bB" bytes_prefixes = "bB"
string_prefixes = "uU" + bytes_prefixes string_prefixes = "fFuU" + bytes_prefixes
char_prefixes = "cC" char_prefixes = "cC"
any_string_prefix = raw_prefixes + string_prefixes + char_prefixes any_string_prefix = raw_prefixes + string_prefixes + char_prefixes
IDENT = 'IDENT' IDENT = 'IDENT'
...@@ -40,8 +40,8 @@ def make_lexicon(): ...@@ -40,8 +40,8 @@ def make_lexicon():
fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
imagconst = (intconst | fltconst) + Any("jJ") imagconst = (intconst | fltconst) + Any("jJ")
beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) | # invalid combinations of prefixes are caught in p_string_literal
Any(raw_prefixes) + Opt(Any(bytes_prefixes)) | beginstring = Opt(Rep(Any(string_prefixes + raw_prefixes)) |
Any(char_prefixes) Any(char_prefixes)
) + (Str("'") | Str('"') | Str("'''") | Str('"""')) ) + (Str("'") | Str('"') | Str("'''") | Str('"""'))
two_oct = octdigit + octdigit two_oct = octdigit + octdigit
......
...@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln('static PyObject *%s;' % Naming.builtins_cname) code.putln('static PyObject *%s;' % Naming.builtins_cname)
code.putln('static PyObject *%s;' % Naming.empty_tuple) code.putln('static PyObject *%s;' % Naming.empty_tuple)
code.putln('static PyObject *%s;' % Naming.empty_bytes) code.putln('static PyObject *%s;' % Naming.empty_bytes)
code.putln('static PyObject *%s;' % Naming.empty_unicode)
if Options.pre_import is not None: if Options.pre_import is not None:
code.putln('static PyObject *%s;' % Naming.preimport_cname) code.putln('static PyObject *%s;' % Naming.preimport_cname)
code.putln('static int %s;' % Naming.lineno_cname) code.putln('static int %s;' % Naming.lineno_cname)
...@@ -2117,6 +2118,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2117,6 +2118,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
Naming.empty_tuple, code.error_goto_if_null(Naming.empty_tuple, self.pos))) Naming.empty_tuple, code.error_goto_if_null(Naming.empty_tuple, self.pos)))
code.putln("%s = PyBytes_FromStringAndSize(\"\", 0); %s" % ( code.putln("%s = PyBytes_FromStringAndSize(\"\", 0); %s" % (
Naming.empty_bytes, code.error_goto_if_null(Naming.empty_bytes, self.pos))) Naming.empty_bytes, code.error_goto_if_null(Naming.empty_bytes, self.pos)))
code.putln("%s = PyUnicode_FromStringAndSize(\"\", 0); %s" % (
Naming.empty_unicode, code.error_goto_if_null(Naming.empty_unicode, self.pos)))
for ext_type in ('CyFunction', 'FusedFunction', 'Coroutine', 'Generator', 'StopAsyncIteration'): for ext_type in ('CyFunction', 'FusedFunction', 'Coroutine', 'Generator', 'StopAsyncIteration'):
code.putln("#ifdef __Pyx_%s_USED" % ext_type) code.putln("#ifdef __Pyx_%s_USED" % ext_type)
......
...@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state" ...@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state"
skip_dispatch_cname = pyrex_prefix + "skip_dispatch" skip_dispatch_cname = pyrex_prefix + "skip_dispatch"
empty_tuple = pyrex_prefix + "empty_tuple" empty_tuple = pyrex_prefix + "empty_tuple"
empty_bytes = pyrex_prefix + "empty_bytes" empty_bytes = pyrex_prefix + "empty_bytes"
empty_unicode = pyrex_prefix + "empty_unicode"
print_function = pyrex_prefix + "print" print_function = pyrex_prefix + "print"
print_function_kwargs = pyrex_prefix + "print_kwargs" print_function_kwargs = pyrex_prefix + "print_kwargs"
cleanup_cname = pyrex_prefix + "module_cleanup" cleanup_cname = pyrex_prefix + "module_cleanup"
......
...@@ -3,6 +3,7 @@ from __future__ import absolute_import ...@@ -3,6 +3,7 @@ from __future__ import absolute_import
import sys import sys
import copy import copy
import codecs import codecs
import itertools
from . import TypeSlots from . import TypeSlots
from .ExprNodes import not_a_constant from .ExprNodes import not_a_constant
...@@ -3934,6 +3935,44 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): ...@@ -3934,6 +3935,44 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
sequence_node.mult_factor = factor sequence_node.mult_factor = factor
return sequence_node return sequence_node
def visit_FormattedValueNode(self, node):
self.visitchildren(node)
if isinstance(node.format_spec, ExprNodes.UnicodeNode) and not node.format_spec.value:
node.format_spec = None
if node.format_spec is None and node.conversion_char is None and isinstance(node.value, ExprNodes.UnicodeNode):
return node.value
return node
def visit_JoinedStrNode(self, node):
"""
Clean up after the parser by discarding empty Unicode strings and merging
substring sequences. Empty or single-value join lists are not uncommon
because f-string format specs are always parsed into JoinedStrNodes.
"""
self.visitchildren(node)
unicode_node = ExprNodes.UnicodeNode
values = []
for is_unode_group, substrings in itertools.groupby(node.values, lambda v: isinstance(v, unicode_node)):
if is_unode_group:
substrings = list(substrings)
unode = substrings[0]
if len(substrings) > 1:
unode.value = EncodedString(u''.join(value.value for value in substrings))
# ignore empty Unicode strings
if unode.value:
values.append(unode)
else:
values.extend(substrings)
if not values:
node = ExprNodes.UnicodeNode(node.pos, value=EncodedString(''))
elif len(values) == 1:
node = values[0]
else:
node.values = values
return node
def visit_MergedDictNode(self, node): def visit_MergedDictNode(self, node):
"""Unpack **args in place if we can.""" """Unpack **args in place if we can."""
self.visitchildren(node) self.visitchildren(node)
......
...@@ -68,6 +68,10 @@ cdef p_opt_string_literal(PyrexScanner s, required_type=*) ...@@ -68,6 +68,10 @@ cdef p_opt_string_literal(PyrexScanner s, required_type=*)
cdef bint check_for_non_ascii_characters(unicode string) cdef bint check_for_non_ascii_characters(unicode string)
@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint) @cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
cdef p_string_literal(PyrexScanner s, kind_override=*) cdef p_string_literal(PyrexScanner s, kind_override=*)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t)
cdef list p_f_string(PyrexScanner s, unicode_value, pos)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4)
cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index)
cdef p_list_maker(PyrexScanner s) cdef p_list_maker(PyrexScanner s)
cdef p_comp_iter(PyrexScanner s, body) cdef p_comp_iter(PyrexScanner s, body)
cdef p_comp_for(PyrexScanner s, body) cdef p_comp_for(PyrexScanner s, body)
......
...@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object, ...@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
re=object, _unicode=object, _bytes=object, re=object, _unicode=object, _bytes=object,
partial=object, reduce=object, _IS_PY3=cython.bint) partial=object, reduce=object, _IS_PY3=cython.bint)
from io import StringIO
import re import re
import sys import sys
from unicodedata import lookup as lookup_unicodechar from unicodedata import lookup as lookup_unicodechar
from functools import partial, reduce from functools import partial, reduce
from .Scanning import PyrexScanner, FileSourceDescriptor from .Scanning import PyrexScanner, FileSourceDescriptor, StringSourceDescriptor
from . import Nodes from . import Nodes
from . import ExprNodes from . import ExprNodes
from . import Builtin from . import Builtin
...@@ -693,8 +694,12 @@ def p_atom(s): ...@@ -693,8 +694,12 @@ def p_atom(s):
return ExprNodes.UnicodeNode(pos, value = unicode_value, bytes_value = bytes_value) return ExprNodes.UnicodeNode(pos, value = unicode_value, bytes_value = bytes_value)
elif kind == 'b': elif kind == 'b':
return ExprNodes.BytesNode(pos, value = bytes_value) return ExprNodes.BytesNode(pos, value = bytes_value)
else: elif kind == 'f':
return ExprNodes.JoinedStrNode(pos, values = unicode_value)
elif kind == '':
return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value) return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value)
else:
s.error("invalid string kind '%s'" % kind)
elif sy == 'IDENT': elif sy == 'IDENT':
name = s.systring name = s.systring
s.next() s.next()
...@@ -788,42 +793,61 @@ def wrap_compile_time_constant(pos, value): ...@@ -788,42 +793,61 @@ def wrap_compile_time_constant(pos, value):
def p_cat_string_literal(s): def p_cat_string_literal(s):
# A sequence of one or more adjacent string literals. # A sequence of one or more adjacent string literals.
# Returns (kind, bytes_value, unicode_value) # Returns (kind, bytes_value, unicode_value)
# where kind in ('b', 'c', 'u', '') # where kind in ('b', 'c', 'u', 'f', '')
pos = s.position()
kind, bytes_value, unicode_value = p_string_literal(s) kind, bytes_value, unicode_value = p_string_literal(s)
if kind == 'c' or s.sy != 'BEGIN_STRING': if kind == 'c' or s.sy != 'BEGIN_STRING':
return kind, bytes_value, unicode_value return kind, bytes_value, unicode_value
bstrings, ustrings = [bytes_value], [unicode_value] bstrings, ustrings, positions = [bytes_value], [unicode_value], [pos]
bytes_value = unicode_value = None bytes_value = unicode_value = None
while s.sy == 'BEGIN_STRING': while s.sy == 'BEGIN_STRING':
pos = s.position() pos = s.position()
next_kind, next_bytes_value, next_unicode_value = p_string_literal(s) next_kind, next_bytes_value, next_unicode_value = p_string_literal(s)
if next_kind == 'c': if next_kind == 'c':
error(pos, "Cannot concatenate char literal with another string or char literal") error(pos, "Cannot concatenate char literal with another string or char literal")
continue
elif next_kind != kind: elif next_kind != kind:
error(pos, "Cannot mix string literals of different types, expected %s'', got %s''" % # concatenating f strings and normal strings is allowed and leads to an f string
(kind, next_kind)) if set([kind, next_kind]) in (set(['f', 'u']), set(['f', ''])):
else: kind = 'f'
bstrings.append(next_bytes_value) else:
ustrings.append(next_unicode_value) error(pos, "Cannot mix string literals of different types, expected %s'', got %s''" %
(kind, next_kind))
continue
bstrings.append(next_bytes_value)
ustrings.append(next_unicode_value)
positions.append(pos)
# join and rewrap the partial literals # join and rewrap the partial literals
if kind in ('b', 'c', '') or kind == 'u' and None not in bstrings: if kind in ('b', 'c', '') or kind == 'u' and None not in bstrings:
# Py3 enforced unicode literals are parsed as bytes/unicode combination # Py3 enforced unicode literals are parsed as bytes/unicode combination
bytes_value = bytes_literal(StringEncoding.join_bytes(bstrings), s.source_encoding) bytes_value = bytes_literal(StringEncoding.join_bytes(bstrings), s.source_encoding)
if kind in ('u', ''): if kind in ('u', ''):
unicode_value = EncodedString( u''.join([ u for u in ustrings if u is not None ]) ) unicode_value = EncodedString(u''.join([u for u in ustrings if u is not None]))
if kind == 'f':
unicode_value = []
for u, pos in zip(ustrings, positions):
if isinstance(u, list):
unicode_value += u
else:
# non-f-string concatenated into the f-string
unicode_value.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u)))
return kind, bytes_value, unicode_value return kind, bytes_value, unicode_value
def p_opt_string_literal(s, required_type='u'): def p_opt_string_literal(s, required_type='u'):
if s.sy == 'BEGIN_STRING': if s.sy != 'BEGIN_STRING':
kind, bytes_value, unicode_value = p_string_literal(s, required_type)
if required_type == 'u':
return unicode_value
elif required_type == 'b':
return bytes_value
else:
s.error("internal parser configuration error")
else:
return None return None
pos = s.position()
kind, bytes_value, unicode_value = p_string_literal(s, required_type)
if required_type == 'u':
if kind == 'f':
s.error("f-string not allowed here", pos)
return unicode_value
elif required_type == 'b':
return bytes_value
else:
s.error("internal parser configuration error")
def check_for_non_ascii_characters(string): def check_for_non_ascii_characters(string):
for c in string: for c in string:
...@@ -831,38 +855,55 @@ def check_for_non_ascii_characters(string): ...@@ -831,38 +855,55 @@ def check_for_non_ascii_characters(string):
return True return True
return False return False
def p_string_literal(s, kind_override=None): def p_string_literal(s, kind_override=None):
# A single string or char literal. Returns (kind, bvalue, uvalue) # A single string or char literal. Returns (kind, bvalue, uvalue)
# where kind in ('b', 'c', 'u', ''). The 'bvalue' is the source # where kind in ('b', 'c', 'u', 'f', ''). The 'bvalue' is the source
# code byte sequence of the string literal, 'uvalue' is the # code byte sequence of the string literal, 'uvalue' is the
# decoded Unicode string. Either of the two may be None depending # decoded Unicode string. Either of the two may be None depending
# on the 'kind' of string, only unprefixed strings have both # on the 'kind' of string, only unprefixed strings have both
# representations. # representations. In f-strings, the uvalue is a list of the Unicode
# strings and f-string expressions that make up the f-string.
# s.sy == 'BEGIN_STRING' # s.sy == 'BEGIN_STRING'
pos = s.position() pos = s.position()
is_raw = False is_raw = False
is_python3_source = s.context.language_level >= 3 is_python3_source = s.context.language_level >= 3
has_non_ascii_literal_characters = False has_non_ascii_literal_characters = False
kind = s.systring[:1].lower() kind_string = s.systring.rstrip('"\'').lower()
if kind == 'r': if len(set(kind_string)) != len(kind_string):
# Py3 allows both 'br' and 'rb' as prefix s.error('Duplicate string prefix character')
if s.systring[1:2].lower() == 'b': if 'b' in kind_string and 'u' in kind_string:
kind = 'b' s.error('String prefixes b and u cannot be combined')
else: if 'b' in kind_string and 'f' in kind_string:
kind = '' s.error('String prefixes b and f cannot be combined')
is_raw = True if 'u' in kind_string and 'f' in kind_string:
elif kind in 'ub': s.error('String prefixes u and f cannot be combined')
is_raw = s.systring[1:2].lower() == 'r'
elif kind != 'c': is_raw = 'r' in kind_string
if 'c' in kind_string:
# this should never happen, since the lexer does not allow combining c
# with other prefix characters
if len(kind_string) != 1:
s.error('Invalid string prefix for character literal')
kind = 'c'
elif 'f' in kind_string:
kind = 'f' # u is ignored
elif 'b' in kind_string:
kind = 'b'
elif 'u' in kind_string:
kind = 'u'
else:
kind = '' kind = ''
if kind == '' and kind_override is None and Future.unicode_literals in s.context.future_directives: if kind == '' and kind_override is None and Future.unicode_literals in s.context.future_directives:
chars = StringEncoding.StrLiteralBuilder(s.source_encoding) chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
kind = 'u' kind = 'u'
else: else:
if kind_override is not None and kind_override in 'ub': if kind_override is not None and kind_override in 'ub':
kind = kind_override kind = kind_override
if kind == 'u': if kind in ('u', 'f'): # f-strings are scanned exactly like Unicode literals, but are parsed further later
chars = StringEncoding.UnicodeLiteralBuilder() chars = StringEncoding.UnicodeLiteralBuilder()
elif kind == '': elif kind == '':
chars = StringEncoding.StrLiteralBuilder(s.source_encoding) chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
...@@ -873,7 +914,7 @@ def p_string_literal(s, kind_override=None): ...@@ -873,7 +914,7 @@ def p_string_literal(s, kind_override=None):
s.next() s.next()
sy = s.sy sy = s.sy
systr = s.systring systr = s.systring
#print "p_string_literal: sy =", sy, repr(s.systring) ### # print "p_string_literal: sy =", sy, repr(s.systring) ###
if sy == 'CHARS': if sy == 'CHARS':
chars.append(systr) chars.append(systr)
if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr): if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
...@@ -901,7 +942,7 @@ def p_string_literal(s, kind_override=None): ...@@ -901,7 +942,7 @@ def p_string_literal(s, kind_override=None):
else: else:
s.error("Invalid hex escape '%s'" % systr, s.error("Invalid hex escape '%s'" % systr,
fatal=False) fatal=False)
elif c in u'NUu' and kind in ('u', ''): # \uxxxx, \Uxxxxxxxx, \N{...} elif c in u'NUu' and kind in ('u', 'f', ''): # \uxxxx, \Uxxxxxxxx, \N{...}
chrval = -1 chrval = -1
if c == u'N': if c == u'N':
try: try:
...@@ -943,14 +984,162 @@ def p_string_literal(s, kind_override=None): ...@@ -943,14 +984,162 @@ def p_string_literal(s, kind_override=None):
bytes_value, unicode_value = chars.getstrings() bytes_value, unicode_value = chars.getstrings()
if is_python3_source and has_non_ascii_literal_characters: if is_python3_source and has_non_ascii_literal_characters:
# Python 3 forbids literal non-ASCII characters in byte strings # Python 3 forbids literal non-ASCII characters in byte strings
if kind != 'u': if kind not in ('u', 'f'):
s.error("bytes can only contain ASCII literal characters.", s.error("bytes can only contain ASCII literal characters.",
pos=pos, fatal=False) pos=pos, fatal=False)
bytes_value = None bytes_value = None
if kind == 'f':
unicode_value = p_f_string(s, unicode_value, pos)
s.next() s.next()
return (kind, bytes_value, unicode_value) return (kind, bytes_value, unicode_value)
def p_f_string(s, unicode_value, pos):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes.
values = []
i = 0
size = len(unicode_value)
current_literal_start = 0
while i < size:
c = unicode_value[i]
if c in '{}':
if i + 1 < size and unicode_value[i + 1] == c:
encoded_str = EncodedString(unicode_value[current_literal_start:i + 1])
values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
i += 2
current_literal_start = i
elif c == '}':
s.error("single '}' encountered in format string")
else:
encoded_str = EncodedString(unicode_value[current_literal_start:i])
values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
i, expr_node = p_f_string_expr(s, unicode_value, pos, i + 1)
current_literal_start = i
values.append(expr_node)
else:
i += 1
encoded_str = EncodedString(unicode_value[current_literal_start:])
values.append(ExprNodes.UnicodeNode(pos, value=encoded_str))
return values
def p_f_string_expr(s, unicode_value, pos, starting_index):
# Parses a {}-delimited expression inside an f-string. Returns a FormattedValueNode
# and the index in the string that follows the expression.
i = starting_index
size = len(unicode_value)
conversion_char = terminal_char = format_spec = None
format_spec_str = None
NO_CHAR = 2**30
nested_depth = 0
quote_char = NO_CHAR
in_triple_quotes = False
while True:
if i >= size:
s.error("missing '}' in format string expression")
c = unicode_value[i]
if quote_char != NO_CHAR:
if c == '\\':
i += 1
elif c == quote_char:
if in_triple_quotes:
if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
in_triple_quotes = False
quote_char = NO_CHAR
i += 2
else:
quote_char = NO_CHAR
elif c in '\'"':
quote_char = c
if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
in_triple_quotes = True
i += 2
elif c in '{[(':
nested_depth += 1
elif nested_depth != 0 and c in '}])':
nested_depth -= 1
elif c == '#':
s.error("format string cannot include #")
elif nested_depth == 0 and c in '!:}':
# allow != as a special case
if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
i += 1
continue
terminal_char = c
break
i += 1
# normalise line endings as the parser expects that
expr_str = unicode_value[starting_index:i].replace('\r\n', '\n').replace('\r', '\n')
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip():
s.error("empty expression not allowed in f-string")
if terminal_char == '!':
i += 1
if i + 2 > size:
s.error("invalid conversion char at end of string")
conversion_char = unicode_value[i]
i += 1
terminal_char = unicode_value[i]
if terminal_char == ':':
in_triple_quotes = False
in_string = False
nested_depth = 0
start_format_spec = i + 1
while True:
if i >= size:
s.error("missing '}' in format specifier")
c = unicode_value[i]
if not in_triple_quotes and not in_string:
if c == '{':
if nested_depth >= 1:
s.error("nesting of '{' in format specifier is not allowed")
nested_depth += 1
elif c == '}':
if nested_depth > 0:
nested_depth -= 1
else:
terminal_char = c
break
if c in '\'"':
if not in_string and i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
in_triple_quotes = not in_triple_quotes
i += 2
elif not in_triple_quotes:
in_string = not in_string
i += 1
format_spec_str = unicode_value[start_format_spec:i]
if terminal_char != '}':
s.error("missing '}' in format string expression', found '%s'" % terminal_char)
# parse the expression as if it was surrounded by parentheses
buf = StringIO('(%s)' % expr_str)
scanner = PyrexScanner(buf, expr_pos[0], parent_scanner=s, source_encoding=s.source_encoding, initial_pos=expr_pos)
expr = p_testlist(scanner) # TODO is testlist right here?
# validate the conversion char
if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
s.error("invalid conversion character '%s'" % conversion_char)
# the format spec is itself treated like an f-string
if format_spec_str:
format_spec = ExprNodes.JoinedStrNode(pos, values=p_f_string(s, format_spec_str, pos))
return i + 1, ExprNodes.FormattedValueNode(
s.position(), value=expr, conversion_char=conversion_char, format_spec=format_spec)
# since PEP 448: # since PEP 448:
# list_display ::= "[" [listmaker] "]" # list_display ::= "[" [listmaker] "]"
# listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) # listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
......
...@@ -134,6 +134,10 @@ ...@@ -134,6 +134,10 @@
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b) #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
#endif #endif
#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
#define PyObject_ASCII(o) PyObject_Repr(o)
#endif
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
#define PyBaseString_Type PyUnicode_Type #define PyBaseString_Type PyUnicode_Type
#define PyStringObject PyUnicodeObject #define PyStringObject PyUnicodeObject
......
...@@ -811,3 +811,23 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value ...@@ -811,3 +811,23 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value
Py_DECREF(retval); Py_DECREF(retval);
return 0; return 0;
} }
//////////////////// PyObjectFormatSimple.proto ////////////////////
#define __Pyx_PyObject_FormatSimple(s, f) (likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) : PyObject_Format(s, f))
//////////////////// PyObjectFormatAndDecref.proto ////////////////////
#define __Pyx_PyObject_FormatSimpleAndDecref(s, f) \
((unlikely(!s) || likely(PyUnicode_CheckExact(s))) ? s : __Pyx_PyObject_FormatAndDecref(s, f))
static CYTHON_INLINE PyObject* __Pyx_PyObject_FormatAndDecref(PyObject* s, PyObject* f);
//////////////////// PyObjectFormatAndDecref ////////////////////
static CYTHON_INLINE PyObject* __Pyx_PyObject_FormatAndDecref(PyObject* s, PyObject* f) {
PyObject *result = PyObject_Format(s, f);
Py_DECREF(s);
return result;
}
# cython: language_level=3
# mode: run
# tag: allow_unknown_names, f_strings, pep498
import ast
import types
import decimal
import unittest
import contextlib
import sys
IS_PY2 = sys.version_info[0] < 3
IS_PY26 = sys.version_info[:2] < (2, 7)
from Cython.Build.Inline import cython_inline
from Cython.TestUtils import CythonTest
from Cython.Compiler.Errors import CompileError, hold_errors, release_errors, error_stack
def cy_eval(s, **kwargs):
return cython_inline('return ' + s, force=True, **kwargs)
a_global = 'global variable'
# You could argue that I'm too strict in looking for specific error
# values with assertRaisesRegex, but without it it's way too easy to
# make a syntax error in the test strings. Especially with all of the
# triple quotes, raw strings, backslashes, etc. I think it's a
# worthwhile tradeoff. When I switched to this method, I found many
# examples where I wasn't testing what I thought I was.
class TestCase(CythonTest):
def assertAllRaise(self, exception_type, regex, error_strings):
for str in error_strings:
if exception_type is SyntaxError:
try:
self.fragment(str)
except CompileError:
assert True
else:
assert False, "Invalid Cython code failed to raise SyntaxError: %s" % str
else:
hold_errors()
try:
cython_inline(str, quiet=True)
except exception_type:
assert True
else:
assert False, "Invalid Cython code failed to raise %s: %s" % (exception_type, str)
finally:
if error_stack:
release_errors(ignore=True)
if IS_PY2:
def assertEqual(self, first, second, msg=None):
# strip u'' string prefixes in Py2
if first != second and isinstance(first, unicode):
stripped_first = first.replace("u'", "'").replace('u"', '"')
if stripped_first == second:
first = stripped_first
elif stripped_first.decode('unicode_escape') == second:
first = stripped_first.decode('unicode_escape')
super(TestCase, self).assertEqual(first, second, msg)
if IS_PY26:
@contextlib.contextmanager
def assertRaises(self, exc):
try:
yield
except exc:
pass
else:
assert False, "exception '%s' not raised" % exc
def assertIn(self, value, collection):
self.assertTrue(value in collection)
def test__format__lookup(self):
if IS_PY26:
return
elif IS_PY2:
raise unittest.SkipTest("Py3-only")
# Make sure __format__ is looked up on the type, not the instance.
class X:
def __format__(self, spec):
return 'class'
x = X()
# Add a bound __format__ method to the 'y' instance, but not
# the 'x' instance.
y = X()
y.__format__ = types.MethodType(lambda self, spec: 'instance', y)
self.assertEqual(f'{y}', format(y))
self.assertEqual(f'{y}', 'class')
self.assertEqual(format(x), format(y))
# __format__ is not called this way, but still make sure it
# returns what we expect (so we can make sure we're bypassing
# it).
self.assertEqual(x.__format__(''), 'class')
self.assertEqual(y.__format__(''), 'instance')
# This is how __format__ is actually called.
self.assertEqual(type(x).__format__(x, ''), 'class')
self.assertEqual(type(y).__format__(y, ''), 'class')
def __test_ast(self):
# Inspired by http://bugs.python.org/issue24975
class X:
def __init__(self):
self.called = False
def __call__(self):
self.called = True
return 4
x = X()
expr = """
a = 10
f'{a * x()}'"""
t = ast.parse(expr)
c = compile(t, '', 'exec')
# Make sure x was not called.
self.assertFalse(x.called)
# Actually run the code.
exec(c)
# Make sure x was called.
self.assertTrue(x.called)
def __test_literal_eval(self):
# With no expressions, an f-string is okay.
self.assertEqual(ast.literal_eval("f'x'"), 'x')
self.assertEqual(ast.literal_eval("f'x' 'y'"), 'xy')
# But this should raise an error.
with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'x{3}'")
# As should this, which uses a different ast node
with self.assertRaisesRegex(ValueError, 'malformed node or string'):
ast.literal_eval("f'{3}'")
def __test_ast_compile_time_concat(self):
x = ['']
expr = """x[0] = 'foo' f'{3}'"""
t = ast.parse(expr)
c = compile(t, '', 'exec')
exec(c)
self.assertEqual(x[0], 'foo3')
def test_literal(self):
self.assertEqual(f'', '')
self.assertEqual(f'a', 'a')
self.assertEqual(f' ', ' ')
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}',
'\N{GREEK CAPITAL LETTER DELTA}')
self.assertEqual(f'\N{GREEK CAPITAL LETTER DELTA}',
'\u0394')
self.assertEqual(f'\N{True}', '\u22a8')
self.assertEqual(rf'\N{True}', r'\NTrue')
def test_escape_order(self):
# note that hex(ord('{')) == 0x7b, so this
# string becomes f'a{4*10}b'
self.assertEqual(f'a\u007b4*10}b', 'a40b')
self.assertEqual(f'a\x7b4*10}b', 'a40b')
self.assertEqual(f'a\x7b4*10\N{RIGHT CURLY BRACKET}b', 'a40b')
self.assertEqual(f'{"a"!\N{LATIN SMALL LETTER R}}', "'a'")
self.assertEqual(f'{10\x3a02X}', '0A')
self.assertEqual(f'{10:02\N{LATIN CAPITAL LETTER X}}', '0A')
self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed",
[r"""f'a{\u007b4*10}b'""", # mis-matched brackets
])
self.assertAllRaise(SyntaxError, 'unexpected character after line continuation character',
[r"""f'{"a"\!r}'""",
r"""f'{a\!r}'""",
])
def test_unterminated_string(self):
self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
[r"""f'{"x'""",
r"""f'{"x}'""",
r"""f'{("x'""",
r"""f'{("x}'""",
])
def test_mismatched_parens(self):
self.assertAllRaise(SyntaxError, 'f-string: mismatched',
["f'{((}'",
])
def test_double_braces(self):
self.assertEqual(f'{{', '{')
self.assertEqual(f'a{{', 'a{')
self.assertEqual(f'{{b', '{b')
self.assertEqual(f'a{{b', 'a{b')
self.assertEqual(f'}}', '}')
self.assertEqual(f'a}}', 'a}')
self.assertEqual(f'}}b', '}b')
self.assertEqual(f'a}}b', 'a}b')
self.assertEqual(f'{{{10}', '{10')
self.assertEqual(f'}}{10}', '}10')
self.assertEqual(f'}}{{{10}', '}{10')
self.assertEqual(f'}}a{{{10}', '}a{10')
self.assertEqual(f'{10}{{', '10{')
self.assertEqual(f'{10}}}', '10}')
self.assertEqual(f'{10}}}{{', '10}{')
self.assertEqual(f'{10}}}a{{' '}', '10}a{}')
# Inside of strings, don't interpret doubled brackets.
self.assertEqual(f'{"{{}}"}', '{{}}')
self.assertAllRaise(TypeError, 'unhashable type',
["f'{ {{}} }'", # dict in a set
])
def test_compile_time_concat(self):
x = 'def'
self.assertEqual('abc' f'## {x}ghi', 'abc## defghi')
self.assertEqual('abc' f'{x}' 'ghi', 'abcdefghi')
self.assertEqual('abc' f'{x}' 'gh' f'i{x:4}', 'abcdefghidef ')
self.assertEqual('{x}' f'{x}', '{x}def')
self.assertEqual('{x' f'{x}', '{xdef')
self.assertEqual('{x}' f'{x}', '{x}def')
self.assertEqual('{{x}}' f'{x}', '{{x}}def')
self.assertEqual('{{x' f'{x}', '{{xdef')
self.assertEqual('x}}' f'{x}', 'x}}def')
self.assertEqual(f'{x}' 'x}}', 'defx}}')
self.assertEqual(f'{x}' '', 'def')
self.assertEqual('' f'{x}' '', 'def')
self.assertEqual('' f'{x}', 'def')
self.assertEqual(f'{x}' '2', 'def2')
self.assertEqual('1' f'{x}' '2', '1def2')
self.assertEqual('1' f'{x}', '1def')
self.assertEqual(f'{x}' f'-{x}', 'def-def')
self.assertEqual('' f'', '')
self.assertEqual('' f'' '', '')
self.assertEqual('' f'' '' f'', '')
self.assertEqual(f'', '')
self.assertEqual(f'' '', '')
self.assertEqual(f'' '' f'', '')
self.assertEqual(f'' '' f'' '', '')
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
["f'{3' f'}'", # can't concat to get a valid f-string
])
def test_comments(self):
# These aren't comments, since they're in strings.
d = {'#': 'hash'}
self.assertEqual(f'{"#"}', '#')
self.assertEqual(f'{d["#"]}', 'hash')
self.assertAllRaise(SyntaxError, "f-string cannot include '#'",
["f'{1#}'", # error because the expression becomes "(1#)"
"f'{3(#)}'",
])
def test_many_expressions(self):
# Create a string with many expressions in it. Note that
# because we have a space in here as a literal, we're actually
# going to use twice as many ast nodes: one for each literal
# plus one for each expression.
def build_fstr(n, extra=''):
return "f'" + ('{x} ' * n) + extra + "'"
x = 'X'
width = 1
# Test around 256.
for i in range(250, 260):
self.assertEqual(cy_eval(build_fstr(i), x=x, width=width), (x+' ')*i)
# Test concatenating 2 largs fstrings.
self.assertEqual(cy_eval(build_fstr(255)*3, x=x, width=width), (x+' ')*(255*3)) # CPython uses 255*256
s = build_fstr(253, '{x:{width}} ')
self.assertEqual(cy_eval(s, x=x, width=width), (x+' ')*254)
# Test lots of expressions and constants, concatenated.
s = "f'{1}' 'x' 'y'" * 1024
self.assertEqual(cy_eval(s, x=x, width=width), '1xy' * 1024)
def test_format_specifier_expressions(self):
width = 10
precision = 4
value = decimal.Decimal('12.34567')
if not IS_PY26:
self.assertEqual(f'result: {value:{width}.{precision}}', 'result: 12.35')
self.assertEqual(f'result: {value:{width!r}.{precision}}', 'result: 12.35')
self.assertEqual(f'result: {value:{width:0}.{precision:1}}', 'result: 12.35')
self.assertEqual(f'result: {value:{1}{0:0}.{precision:1}}', 'result: 12.35')
self.assertEqual(f'result: {value:{ 1}{ 0:0}.{ precision:1}}', 'result: 12.35')
self.assertEqual(f'{10:#{1}0x}', ' 0xa')
self.assertEqual(f'{10:{"#"}1{0}{"x"}}', ' 0xa')
self.assertEqual(f'{-10:-{"#"}1{0}x}', ' -0xa')
self.assertEqual(f'{-10:{"-"}#{1}0{"x"}}', ' -0xa')
# self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
["""f'{"s"!r{":10"}}'""",
# This looks like a nested format spec.
])
self.assertAllRaise(SyntaxError, "invalid syntax",
[# Invalid sytax inside a nested spec.
"f'{4:{/5}}'",
])
self.assertAllRaise(SyntaxError, "f-string: expressions nested too deeply",
[# Can't nest format specifiers.
"f'result: {value:{width:{0}}.{precision:1}}'",
])
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
[# No expansion inside conversion or for
# the : or ! itself.
"""f'{"s"!{"r"}}'""",
])
def test_side_effect_order(self):
class X:
def __init__(self):
self.i = 0
def __format__(self, spec):
self.i += 1
return str(self.i)
x = X()
self.assertEqual(f'{x} {x}', '1 2')
def test_missing_expression(self):
self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
["f'{}'",
"f'{ }'"
"f' {} '",
"f'{!r}'",
"f'{ !r}'",
"f'{10:{ }}'",
"f' { } '",
r"f'{\n}'",
r"f'{\n \n}'",
# Catch the empty expression before the
# invalid conversion.
"f'{!x}'",
"f'{ !xr}'",
"f'{!x:}'",
"f'{!x:a}'",
"f'{ !xr:}'",
"f'{ !xr:a}'",
"f'{!}'",
"f'{:}'",
# We find the empty expression before the
# missing closing brace.
"f'{!'",
"f'{!s:'",
"f'{:'",
"f'{:x'",
])
def test_parens_in_expressions(self):
self.assertEqual(f'{3,}', '(3,)')
# Add these because when an expression is evaluated, parens
# are added around it. But we shouldn't go from an invalid
# expression to a valid one. The added parens are just
# supposed to allow whitespace (including newlines).
self.assertAllRaise(SyntaxError, 'invalid syntax',
["f'{,}'",
"f'{,}'", # this is (,), which is an error
])
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
["f'{3)+(4}'",
])
self.assertAllRaise(SyntaxError, 'EOL while scanning string literal',
["f'{\n}'",
])
def test_newlines_in_expressions(self):
self.assertEqual(f'{0}', '0')
self.assertEqual(f'{0\n}', '0')
self.assertEqual(f'{0\r}', '0')
self.assertEqual(f'{\n0\n}', '0')
self.assertEqual(f'{\r0\r}', '0')
self.assertEqual(f'{\n0\r}', '0')
self.assertEqual(f'{\n0}', '0')
self.assertEqual(f'{3+\n4}', '7')
self.assertEqual(f'{3+\\\n4}', '7')
self.assertEqual(rf'''{3+
4}''', '7')
self.assertEqual(f'''{3+\
4}''', '7')
self.assertAllRaise(SyntaxError, 'f-string: empty expression not allowed',
[r"f'{\n}'",
])
def test_lambda(self):
x = 5
self.assertEqual(f'{(lambda y:x*y)("8")!r}', "'88888'")
if not IS_PY2:
self.assertEqual(f'{(lambda y:x*y)("8")!r:10}', "'88888' ")
self.assertEqual(f'{(lambda y:x*y)("8"):10}', "88888 ")
# lambda doesn't work without parens, because the colon
# makes the parser think it's a format_spec
self.assertAllRaise(SyntaxError, 'unexpected EOF while parsing',
["f'{lambda x:x}'",
])
def test_yield(self):
# Not terribly useful, but make sure the yield turns
# a function into a generator
def fn(y):
f'y:{yield y*2}'
g = fn(4)
self.assertEqual(next(g), 8)
def test_yield_send(self):
def fn(x):
yield f'x:{yield (lambda i: x * i)}'
g = fn(10)
the_lambda = next(g)
self.assertEqual(the_lambda(4), 40)
self.assertEqual(g.send('string'), 'x:string')
def test_expressions_with_triple_quoted_strings(self):
self.assertEqual(f"{'''x'''}", 'x')
self.assertEqual(f"{'''eric's'''}", "eric's")
self.assertEqual(f'{"""eric\'s"""}', "eric's")
self.assertEqual(f"{'''eric\"s'''}", 'eric"s')
self.assertEqual(f'{"""eric"s"""}', 'eric"s')
# Test concatenation within an expression
self.assertEqual(f'{"x" """eric"s""" "y"}', 'xeric"sy')
self.assertEqual(f'{"x" """eric"s"""}', 'xeric"s')
self.assertEqual(f'{"""eric"s""" "y"}', 'eric"sy')
self.assertEqual(f'{"""x""" """eric"s""" "y"}', 'xeric"sy')
self.assertEqual(f'{"""x""" """eric"s""" """y"""}', 'xeric"sy')
self.assertEqual(f'{r"""x""" """eric"s""" """y"""}', 'xeric"sy')
def test_multiple_vars(self):
x = 98
y = 'abc'
self.assertEqual(f'{x}{y}', '98abc')
self.assertEqual(f'X{x}{y}', 'X98abc')
self.assertEqual(f'{x}X{y}', '98Xabc')
self.assertEqual(f'{x}{y}X', '98abcX')
self.assertEqual(f'X{x}Y{y}', 'X98Yabc')
self.assertEqual(f'X{x}{y}Y', 'X98abcY')
self.assertEqual(f'{x}X{y}Y', '98XabcY')
self.assertEqual(f'X{x}Y{y}Z', 'X98YabcZ')
def test_closure(self):
def outer(x):
def inner():
return f'x:{x}'
return inner
self.assertEqual(outer('987')(), 'x:987')
self.assertEqual(outer(7)(), 'x:7')
def test_arguments(self):
y = 2
def f(x, width):
return f'x={x*y:{width}}'
self.assertEqual(f('foo', 10), 'x=foofoo ')
x = 'bar'
self.assertEqual(f(10, 10), 'x= 20')
def test_locals(self):
value = 123
self.assertEqual(f'v:{value}', 'v:123')
def test_missing_variable(self):
with self.assertRaises(NameError):
f'v:{value}'
def test_missing_format_spec(self):
class O:
def __format__(self, spec):
if not spec:
return '*'
return spec
self.assertEqual(f'{O():x}', 'x')
self.assertEqual(f'{O()}', '*')
self.assertEqual(f'{O():}', '*')
self.assertEqual(f'{3:}', '3')
self.assertEqual(f'{3!s:}', '3')
def test_global(self):
self.assertEqual(f'g:{a_global}', 'g:global variable')
self.assertEqual(f'g:{a_global!r}', "g:'global variable'")
a_local = 'local variable'
self.assertEqual(f'g:{a_global} l:{a_local}',
'g:global variable l:local variable')
self.assertEqual(f'g:{a_global!r}',
"g:'global variable'")
self.assertEqual(f'g:{a_global} l:{a_local!r}',
"g:global variable l:'local variable'")
self.assertIn("module 'unittest' from", f'{unittest}')
def test_shadowed_global(self):
a_global = 'really a local'
self.assertEqual(f'g:{a_global}', 'g:really a local')
self.assertEqual(f'g:{a_global!r}', "g:'really a local'")
a_local = 'local variable'
self.assertEqual(f'g:{a_global} l:{a_local}',
'g:really a local l:local variable')
self.assertEqual(f'g:{a_global!r}',
"g:'really a local'")
self.assertEqual(f'g:{a_global} l:{a_local!r}',
"g:really a local l:'local variable'")
def test_call(self):
def foo(x):
return 'x=' + str(x)
self.assertEqual(f'{foo(10)}', 'x=10')
def test_nested_fstrings(self):
y = 5
self.assertEqual(f'{f"{0}"*3}', '000')
self.assertEqual(f'{f"{y}"*3}', '555')
self.assertEqual(f'{f"{\'x\'}"*3}', 'xxx')
self.assertEqual(f"{r'x' f'{\"s\"}'}", 'xs')
self.assertEqual(f"{r'x'rf'{\"s\"}'}", 'xs')
def test_invalid_string_prefixes(self):
self.assertAllRaise(SyntaxError, 'unexpected EOF while parsing',
["fu''",
"uf''",
"Fu''",
"fU''",
"Uf''",
"uF''",
"ufr''",
"urf''",
"fur''",
"fru''",
"rfu''",
"ruf''",
"FUR''",
"Fur''",
])
def test_leading_trailing_spaces(self):
self.assertEqual(f'{ 3}', '3')
self.assertEqual(f'{ 3}', '3')
self.assertEqual(f'{\t3}', '3')
self.assertEqual(f'{\t\t3}', '3')
self.assertEqual(f'{3 }', '3')
self.assertEqual(f'{3 }', '3')
self.assertEqual(f'{3\t}', '3')
self.assertEqual(f'{3\t\t}', '3')
self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]}}',
'expr={1: 2}')
self.assertEqual(f'expr={ {x: y for x, y in [(1, 2), ]} }',
'expr={1: 2}')
def test_character_name(self):
self.assertEqual(f'{4}\N{GREEK CAPITAL LETTER DELTA}{3}',
'4\N{GREEK CAPITAL LETTER DELTA}3')
self.assertEqual(f'{{}}\N{GREEK CAPITAL LETTER DELTA}{3}',
'{}\N{GREEK CAPITAL LETTER DELTA}3')
def test_not_equal(self):
# There's a special test for this because there's a special
# case in the f-string parser to look for != as not ending an
# expression. Normally it would, while looking for !s or !r.
self.assertEqual(f'{3!=4}', 'True')
self.assertEqual(f'{3!=4:}', 'True')
self.assertEqual(f'{3!=4!s}', 'True')
self.assertEqual(f'{3!=4!s:.3}', 'Tru')
def test_conversions(self):
self.assertEqual(f'{3.14:10.10}', ' 3.14')
if not IS_PY26:
self.assertEqual(f'{3.14!s:10.10}', '3.14 ')
self.assertEqual(f'{3.14!r:10.10}', '3.14 ')
self.assertEqual(f'{3.14!a:10.10}', '3.14 ')
self.assertEqual(f'{"a"}', 'a')
self.assertEqual(f'{"a"!r}', "'a'")
self.assertEqual(f'{"a"!a}', "'a'")
# Not a conversion.
self.assertEqual(f'{"a!r"}', "a!r")
# Not a conversion, but show that ! is allowed in a format spec.
self.assertEqual(f'{3.14:!<10.10}', '3.14!!!!!!')
self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"}', '\u0394')
self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"!r}', "'\u0394'")
self.assertEqual(f'{"\N{GREEK CAPITAL LETTER DELTA}"!a}', "'\\u0394'")
self.assertAllRaise(SyntaxError, 'f-string: invalid conversion character',
["f'{3!g}'",
"f'{3!A}'",
"f'{3!A}'",
"f'{3!A}'",
"f'{3!!}'",
"f'{3!:}'",
"f'{3!\N{GREEK CAPITAL LETTER DELTA}}'",
"f'{3! s}'", # no space before conversion char
"f'{x!\\x00:.<10}'",
])
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
["f'{x!s{y}}'",
"f'{3!ss}'",
"f'{3!ss:}'",
"f'{3!ss:s}'",
])
def test_assignment(self):
self.assertAllRaise(SyntaxError, 'invalid syntax',
["f'' = 3",
"f'{0}' = x",
"f'{x}' = x",
])
def test_del(self):
self.assertAllRaise(CompileError, 'invalid syntax', # CPython raises SyntaxError
["del f''",
"del '' f''",
])
def test_mismatched_braces(self):
self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed",
["f'{{}'",
"f'{{}}}'",
"f'}'",
"f'x}'",
"f'x}x'",
# Can't have { or } in a format spec.
"f'{3:}>10}'",
r"f'{3:\\}>10}'",
"f'{3:}}>10}'",
])
self.assertAllRaise(SyntaxError, "f-string: expecting '}'",
["f'{3:{{>10}'",
"f'{3'",
"f'{3!'",
"f'{3:'",
"f'{3!s'",
"f'{3!s:'",
"f'{3!s:3'",
"f'x{'",
"f'x{x'",
"f'{3:s'",
"f'{{{'",
"f'{{}}{'",
"f'{'",
])
self.assertAllRaise(SyntaxError, 'invalid syntax',
[r"f'{3:\\{>10}'",
])
# But these are just normal strings.
self.assertEqual(f'{"{"}', '{')
self.assertEqual(f'{"}"}', '}')
self.assertEqual(f'{3:{"}"}>10}', '}}}}}}}}}3')
self.assertEqual(f'{2:{"{"}>10}', '{{{{{{{{{2')
def test_if_conditional(self):
# There's special logic in compile.c to test if the
# conditional for an if (and while) are constants. Exercise
# that code.
def test_fstring(x, expected):
flag = 0
if f'{x}':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_empty(x, expected):
flag = 0
if '' f'{x}':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_non_empty(x, expected):
flag = 0
if ' ' f'{x}':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
test_fstring('', 2)
test_fstring(' ', 1)
test_concat_empty('', 2)
test_concat_empty(' ', 1)
test_concat_non_empty('', 1)
test_concat_non_empty(' ', 1)
def test_empty_format_specifier(self):
x = 'test'
self.assertEqual(f'{x}', 'test')
self.assertEqual(f'{x:}', 'test')
self.assertEqual(f'{x!s:}', 'test')
self.assertEqual(f'{x!r:}', "'test'")
def test_str_format_differences(self):
d = {'a': 'string',
0: 'integer',
}
a = 0
self.assertEqual(f'{d[0]}', 'integer')
self.assertEqual(f'{d["a"]}', 'string')
self.assertEqual(f'{d[a]}', 'integer')
self.assertEqual('{d[a]}'.format(d=d), 'string')
self.assertEqual('{d[0]}'.format(d=d), 'integer')
def test_invalid_expressions(self):
self.assertAllRaise(SyntaxError, 'invalid syntax',
[r"f'{a[4)}'",
r"f'{a(4]}'",
])
def test_errors(self):
# see issue 26287
self.assertAllRaise((TypeError, ValueError), 'non-empty', # TypeError in Py3.4+
[r"f'{(lambda: 0):x}'",
r"f'{(0,):x}'",
])
self.assertAllRaise(ValueError, 'Unknown format code',
[r"f'{1000:j}'",
r"f'{1000:j}'",
])
def test_loop(self):
for i in range(1000):
self.assertEqual(f'i:{i}', 'i:' + str(i))
def test_dict(self):
d = {'"': 'dquote',
"'": 'squote',
'foo': 'bar',
}
self.assertEqual(f'{d["\'"]}', 'squote')
self.assertEqual(f"{d['\"']}", 'dquote')
self.assertEqual(f'''{d["'"]}''', 'squote')
self.assertEqual(f"""{d['"']}""", 'dquote')
self.assertEqual(f'{d["foo"]}', 'bar')
self.assertEqual(f"{d['foo']}", 'bar')
self.assertEqual(f'{d[\'foo\']}', 'bar')
self.assertEqual(f"{d[\"foo\"]}", 'bar')
def test_escaped_quotes(self):
d = {'"': 'a',
"'": 'b'}
self.assertEqual(fr"{d['\"']}", 'a')
self.assertEqual(fr'{d["\'"]}', 'b')
self.assertEqual(fr"{'\"'}", '"')
self.assertEqual(fr'{"\'"}', "'")
self.assertEqual(f'{"\\"3"}', '"3')
self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
[r'''f'{"""\\}' ''', # Backslash at end of expression
])
self.assertAllRaise(SyntaxError, 'unexpected character after line continuation',
[r"rf'{3\}'",
])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment