Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
fe3a65f7
Commit
fe3a65f7
authored
Mar 24, 2016
by
Robert Bradshaw
Browse files
Options
Browse Files
Download
Plain Diff
F strings (PEP 498)
parents
37534635
b9c66f41
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1212 additions
and
39 deletions
+1212
-39
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+105
-0
Cython/Compiler/Lexicon.py
Cython/Compiler/Lexicon.py
+3
-3
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+3
-0
Cython/Compiler/Naming.py
Cython/Compiler/Naming.py
+1
-0
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+39
-0
Cython/Compiler/Parsing.pxd
Cython/Compiler/Parsing.pxd
+4
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+225
-36
Cython/Utility/ModuleSetupCode.c
Cython/Utility/ModuleSetupCode.c
+4
-0
Cython/Utility/StringTools.c
Cython/Utility/StringTools.c
+20
-0
tests/run/test_fstring.pyx
tests/run/test_fstring.pyx
+808
-0
No files found.
Cython/Compiler/ExprNodes.py
View file @
fe3a65f7
...
...
@@ -2954,6 +2954,111 @@ class RawCNameExprNode(ExprNode):
pass
#-------------------------------------------------------------------
#
# F-strings
#
#-------------------------------------------------------------------
class
JoinedStrNode
(
ExprNode
):
# F-strings
#
# values [UnicodeNode|FormattedValueNode] Substrings of the f-string
#
type
=
unicode_type
is_temp
=
True
subexprs
=
[
'values'
]
def
analyse_types
(
self
,
env
):
self
.
values
=
[
v
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
for
v
in
self
.
values
]
return
self
def
generate_evaluation_code
(
self
,
code
):
code
.
mark_pos
(
self
.
pos
)
num_items
=
len
(
self
.
values
)
list_var
=
code
.
funcstate
.
allocate_temp
(
py_object_type
,
manage_ref
=
True
)
code
.
putln
(
'%s = PyList_New(%s); %s'
%
(
list_var
,
num_items
,
code
.
error_goto_if_null
(
list_var
,
self
.
pos
)))
code
.
put_gotref
(
list_var
)
for
i
,
node
in
enumerate
(
self
.
values
):
node
.
generate_evaluation_code
(
code
)
node
.
make_owned_reference
(
code
)
code
.
put_giveref
(
node
.
py_result
())
code
.
putln
(
'PyList_SET_ITEM(%s, %s, %s);'
%
(
list_var
,
i
,
node
.
py_result
()))
node
.
generate_post_assignment_code
(
code
)
node
.
free_temps
(
code
)
code
.
mark_pos
(
self
.
pos
)
self
.
allocate_temp_result
(
code
)
code
.
putln
(
'%s = PyUnicode_Join(%s, %s); %s'
%
(
self
.
result
(),
Naming
.
empty_unicode
,
list_var
,
code
.
error_goto_if_null
(
self
.
py_result
(),
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
code
.
put_decref_clear
(
list_var
,
py_object_type
)
code
.
funcstate
.
release_temp
(
list_var
)
class
FormattedValueNode
(
ExprNode
):
# {}-delimited portions of an f-string
#
# value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__
subexprs
=
[
'value'
,
'format_spec'
]
type
=
py_object_type
is_temp
=
True
find_conversion_func
=
{
's'
:
'PyObject_Str'
,
'r'
:
'PyObject_Repr'
,
'a'
:
'PyObject_ASCII'
,
# NOTE: Py3-only!
}.
get
def
analyse_types
(
self
,
env
):
self
.
value
=
self
.
value
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
if
self
.
format_spec
:
self
.
format_spec
=
self
.
format_spec
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
return
self
def
generate_result_code
(
self
,
code
):
value_result
=
self
.
value
.
py_result
()
if
self
.
format_spec
:
format_func
=
'__Pyx_PyObject_Format'
format_spec
=
self
.
format_spec
.
py_result
()
else
:
# common case: expect simple Unicode pass-through if no format spec
format_func
=
'__Pyx_PyObject_FormatSimple'
format_spec
=
Naming
.
empty_unicode
if
self
.
conversion_char
:
fn
=
self
.
find_conversion_func
(
self
.
conversion_char
)
assert
fn
is
not
None
,
"invalid conversion character found: '%s'"
%
self
.
conversion_char
value_result
=
'%s(%s)'
%
(
fn
,
value_result
)
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyObjectFormatAndDecref"
,
"StringTools.c"
))
format_func
+=
'AndDecref'
elif
not
self
.
format_spec
:
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyObjectFormatSimple"
,
"StringTools.c"
))
else
:
format_func
=
'PyObject_Format'
code
.
putln
(
"%s = %s(%s, %s); %s"
%
(
self
.
result
(),
format_func
,
value_result
,
format_spec
,
code
.
error_goto_if_null
(
self
.
result
(),
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
#-------------------------------------------------------------------
#
# Parallel nodes (cython.parallel.thread(savailable|id))
...
...
Cython/Compiler/Lexicon.py
View file @
fe3a65f7
...
...
@@ -7,7 +7,7 @@ from __future__ import absolute_import
raw_prefixes
=
"rR"
bytes_prefixes
=
"bB"
string_prefixes
=
"uU"
+
bytes_prefixes
string_prefixes
=
"
fF
uU"
+
bytes_prefixes
char_prefixes
=
"cC"
any_string_prefix
=
raw_prefixes
+
string_prefixes
+
char_prefixes
IDENT
=
'IDENT'
...
...
@@ -40,8 +40,8 @@ def make_lexicon():
fltconst
=
(
decimal_fract
+
Opt
(
exponent
))
|
(
decimal
+
exponent
)
imagconst
=
(
intconst
|
fltconst
)
+
Any
(
"jJ"
)
beginstring
=
Opt
(
Any
(
string_prefixes
)
+
Opt
(
Any
(
raw_prefixes
))
|
Any
(
raw_prefixes
)
+
Opt
(
Any
(
bytes
_prefixes
))
|
# invalid combinations of prefixes are caught in p_string_literal
beginstring
=
Opt
(
Rep
(
Any
(
string_prefixes
+
raw
_prefixes
))
|
Any
(
char_prefixes
)
)
+
(
Str
(
"'"
)
|
Str
(
'"'
)
|
Str
(
"'''"
)
|
Str
(
'"""'
))
two_oct
=
octdigit
+
octdigit
...
...
Cython/Compiler/ModuleNode.py
View file @
fe3a65f7
...
...
@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
builtins_cname
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_tuple
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_bytes
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_unicode
)
if
Options
.
pre_import
is
not
None
:
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
preimport_cname
)
code
.
putln
(
'static int %s;'
%
Naming
.
lineno_cname
)
...
...
@@ -2117,6 +2118,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
Naming
.
empty_tuple
,
code
.
error_goto_if_null
(
Naming
.
empty_tuple
,
self
.
pos
)))
code
.
putln
(
"%s = PyBytes_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_bytes
,
code
.
error_goto_if_null
(
Naming
.
empty_bytes
,
self
.
pos
)))
code
.
putln
(
"%s = PyUnicode_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_unicode
,
code
.
error_goto_if_null
(
Naming
.
empty_unicode
,
self
.
pos
)))
for
ext_type
in
(
'CyFunction'
,
'FusedFunction'
,
'Coroutine'
,
'Generator'
,
'StopAsyncIteration'
):
code
.
putln
(
"#ifdef __Pyx_%s_USED"
%
ext_type
)
...
...
Cython/Compiler/Naming.py
View file @
fe3a65f7
...
...
@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state"
skip_dispatch_cname
=
pyrex_prefix
+
"skip_dispatch"
empty_tuple
=
pyrex_prefix
+
"empty_tuple"
empty_bytes
=
pyrex_prefix
+
"empty_bytes"
empty_unicode
=
pyrex_prefix
+
"empty_unicode"
print_function
=
pyrex_prefix
+
"print"
print_function_kwargs
=
pyrex_prefix
+
"print_kwargs"
cleanup_cname
=
pyrex_prefix
+
"module_cleanup"
...
...
Cython/Compiler/Optimize.py
View file @
fe3a65f7
...
...
@@ -3,6 +3,7 @@ from __future__ import absolute_import
import
sys
import
copy
import
codecs
import
itertools
from
.
import
TypeSlots
from
.ExprNodes
import
not_a_constant
...
...
@@ -3934,6 +3935,44 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
sequence_node
.
mult_factor
=
factor
return
sequence_node
def
visit_FormattedValueNode
(
self
,
node
):
self
.
visitchildren
(
node
)
if
isinstance
(
node
.
format_spec
,
ExprNodes
.
UnicodeNode
)
and
not
node
.
format_spec
.
value
:
node
.
format_spec
=
None
if
node
.
format_spec
is
None
and
node
.
conversion_char
is
None
and
isinstance
(
node
.
value
,
ExprNodes
.
UnicodeNode
):
return
node
.
value
return
node
def
visit_JoinedStrNode
(
self
,
node
):
"""
Clean up after the parser by discarding empty Unicode strings and merging
substring sequences. Empty or single-value join lists are not uncommon
because f-string format specs are always parsed into JoinedStrNodes.
"""
self
.
visitchildren
(
node
)
unicode_node
=
ExprNodes
.
UnicodeNode
values
=
[]
for
is_unode_group
,
substrings
in
itertools
.
groupby
(
node
.
values
,
lambda
v
:
isinstance
(
v
,
unicode_node
)):
if
is_unode_group
:
substrings
=
list
(
substrings
)
unode
=
substrings
[
0
]
if
len
(
substrings
)
>
1
:
unode
.
value
=
EncodedString
(
u''
.
join
(
value
.
value
for
value
in
substrings
))
# ignore empty Unicode strings
if
unode
.
value
:
values
.
append
(
unode
)
else
:
values
.
extend
(
substrings
)
if
not
values
:
node
=
ExprNodes
.
UnicodeNode
(
node
.
pos
,
value
=
EncodedString
(
''
))
elif
len
(
values
)
==
1
:
node
=
values
[
0
]
else
:
node
.
values
=
values
return
node
def
visit_MergedDictNode
(
self
,
node
):
"""Unpack **args in place if we can."""
self
.
visitchildren
(
node
)
...
...
Cython/Compiler/Parsing.pxd
View file @
fe3a65f7
...
...
@@ -68,6 +68,10 @@ cdef p_opt_string_literal(PyrexScanner s, required_type=*)
cdef
bint
check_for_non_ascii_characters
(
unicode
string
)
@
cython
.
locals
(
systr
=
unicode
,
is_python3_source
=
bint
,
is_raw
=
bint
)
cdef
p_string_literal
(
PyrexScanner
s
,
kind_override
=*
)
@
cython
.
locals
(
i
=
Py_ssize_t
,
size
=
Py_ssize_t
)
cdef
list
p_f_string
(
PyrexScanner
s
,
unicode_value
,
pos
)
@
cython
.
locals
(
i
=
Py_ssize_t
,
size
=
Py_ssize_t
,
c
=
Py_UCS4
,
quote_char
=
Py_UCS4
)
cdef
tuple
p_f_string_expr
(
PyrexScanner
s
,
unicode_value
,
pos
,
Py_ssize_t
starting_index
)
cdef
p_list_maker
(
PyrexScanner
s
)
cdef
p_comp_iter
(
PyrexScanner
s
,
body
)
cdef
p_comp_for
(
PyrexScanner
s
,
body
)
...
...
Cython/Compiler/Parsing.py
View file @
fe3a65f7
...
...
@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
re
=
object
,
_unicode
=
object
,
_bytes
=
object
,
partial
=
object
,
reduce
=
object
,
_IS_PY3
=
cython
.
bint
)
from
io
import
StringIO
import
re
import
sys
from
unicodedata
import
lookup
as
lookup_unicodechar
from
functools
import
partial
,
reduce
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
,
StringSourceDescriptor
from
.
import
Nodes
from
.
import
ExprNodes
from
.
import
Builtin
...
...
@@ -693,8 +694,12 @@ def p_atom(s):
return
ExprNodes
.
UnicodeNode
(
pos
,
value
=
unicode_value
,
bytes_value
=
bytes_value
)
elif
kind
==
'b'
:
return
ExprNodes
.
BytesNode
(
pos
,
value
=
bytes_value
)
else
:
elif
kind
==
'f'
:
return
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
unicode_value
)
elif
kind
==
''
:
return
ExprNodes
.
StringNode
(
pos
,
value
=
bytes_value
,
unicode_value
=
unicode_value
)
else
:
s
.
error
(
"invalid string kind '%s'"
%
kind
)
elif
sy
==
'IDENT'
:
name
=
s
.
systring
s
.
next
()
...
...
@@ -788,42 +793,61 @@ def wrap_compile_time_constant(pos, value):
def
p_cat_string_literal
(
s
):
# A sequence of one or more adjacent string literals.
# Returns (kind, bytes_value, unicode_value)
# where kind in ('b', 'c', 'u', '')
# where kind in ('b', 'c', 'u', 'f', '')
pos
=
s
.
position
()
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
)
if
kind
==
'c'
or
s
.
sy
!=
'BEGIN_STRING'
:
return
kind
,
bytes_value
,
unicode_value
bstrings
,
ustrings
=
[
bytes_value
],
[
unicode_value
]
bstrings
,
ustrings
,
positions
=
[
bytes_value
],
[
unicode_value
],
[
pos
]
bytes_value
=
unicode_value
=
None
while
s
.
sy
==
'BEGIN_STRING'
:
pos
=
s
.
position
()
next_kind
,
next_bytes_value
,
next_unicode_value
=
p_string_literal
(
s
)
if
next_kind
==
'c'
:
error
(
pos
,
"Cannot concatenate char literal with another string or char literal"
)
continue
elif
next_kind
!=
kind
:
# concatenating f strings and normal strings is allowed and leads to an f string
if
set
([
kind
,
next_kind
])
in
(
set
([
'f'
,
'u'
]),
set
([
'f'
,
''
])):
kind
=
'f'
else
:
error
(
pos
,
"Cannot mix string literals of different types, expected %s'', got %s''"
%
(
kind
,
next_kind
))
else
:
continue
bstrings
.
append
(
next_bytes_value
)
ustrings
.
append
(
next_unicode_value
)
positions
.
append
(
pos
)
# join and rewrap the partial literals
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
None
not
in
bstrings
:
# Py3 enforced unicode literals are parsed as bytes/unicode combination
bytes_value
=
bytes_literal
(
StringEncoding
.
join_bytes
(
bstrings
),
s
.
source_encoding
)
if
kind
in
(
'u'
,
''
):
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
])
)
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
]))
if
kind
==
'f'
:
unicode_value
=
[]
for
u
,
pos
in
zip
(
ustrings
,
positions
):
if
isinstance
(
u
,
list
):
unicode_value
+=
u
else
:
# non-f-string concatenated into the f-string
unicode_value
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
EncodedString
(
u
)))
return
kind
,
bytes_value
,
unicode_value
def
p_opt_string_literal
(
s
,
required_type
=
'u'
):
if
s
.
sy
==
'BEGIN_STRING'
:
if
s
.
sy
!=
'BEGIN_STRING'
:
return
None
pos
=
s
.
position
()
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
,
required_type
)
if
required_type
==
'u'
:
if
kind
==
'f'
:
s
.
error
(
"f-string not allowed here"
,
pos
)
return
unicode_value
elif
required_type
==
'b'
:
return
bytes_value
else
:
s
.
error
(
"internal parser configuration error"
)
else
:
return
None
def
check_for_non_ascii_characters
(
string
):
for
c
in
string
:
...
...
@@ -831,38 +855,55 @@ def check_for_non_ascii_characters(string):
return
True
return
False
def
p_string_literal
(
s
,
kind_override
=
None
):
# A single string or char literal. Returns (kind, bvalue, uvalue)
# where kind in ('b', 'c', 'u', ''). The 'bvalue' is the source
# where kind in ('b', 'c', 'u', '
f', '
'). The 'bvalue' is the source
# code byte sequence of the string literal, 'uvalue' is the
# decoded Unicode string. Either of the two may be None depending
# on the 'kind' of string, only unprefixed strings have both
# representations.
# representations. In f-strings, the uvalue is a list of the Unicode
# strings and f-string expressions that make up the f-string.
# s.sy == 'BEGIN_STRING'
pos
=
s
.
position
()
is_raw
=
False
is_python3_source
=
s
.
context
.
language_level
>=
3
has_non_ascii_literal_characters
=
False
kind
=
s
.
systring
[:
1
].
lower
()
if
kind
==
'r'
:
# Py3 allows both 'br' and 'rb' as prefix
if
s
.
systring
[
1
:
2
].
lower
()
==
'b'
:
kind_string
=
s
.
systring
.
rstrip
(
'"
\
'
'
).
lower
()
if
len
(
set
(
kind_string
))
!=
len
(
kind_string
):
s
.
error
(
'Duplicate string prefix character'
)
if
'b'
in
kind_string
and
'u'
in
kind_string
:
s
.
error
(
'String prefixes b and u cannot be combined'
)
if
'b'
in
kind_string
and
'f'
in
kind_string
:
s
.
error
(
'String prefixes b and f cannot be combined'
)
if
'u'
in
kind_string
and
'f'
in
kind_string
:
s
.
error
(
'String prefixes u and f cannot be combined'
)
is_raw
=
'r'
in
kind_string
if
'c'
in
kind_string
:
# this should never happen, since the lexer does not allow combining c
# with other prefix characters
if
len
(
kind_string
)
!=
1
:
s
.
error
(
'Invalid string prefix for character literal'
)
kind
=
'c'
elif
'f'
in
kind_string
:
kind
=
'f'
# u is ignored
elif
'b'
in
kind_string
:
kind
=
'b'
elif
'u'
in
kind_string
:
kind
=
'u'
else
:
kind
=
''
is_raw
=
True
elif
kind
in
'ub'
:
is_raw
=
s
.
systring
[
1
:
2
].
lower
()
==
'r'
elif
kind
!=
'c'
:
kind
=
''
if
kind
==
''
and
kind_override
is
None
and
Future
.
unicode_literals
in
s
.
context
.
future_directives
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
kind
=
'u'
else
:
if
kind_override
is
not
None
and
kind_override
in
'ub'
:
kind
=
kind_override
if
kind
==
'u'
:
if
kind
in
(
'u'
,
'f'
):
# f-strings are scanned exactly like Unicode literals, but are parsed further later
chars
=
StringEncoding
.
UnicodeLiteralBuilder
()
elif
kind
==
''
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
...
...
@@ -873,7 +914,7 @@ def p_string_literal(s, kind_override=None):
s
.
next
()
sy
=
s
.
sy
systr
=
s
.
systring
#print "p_string_literal: sy =", sy, repr(s.systring) ###
#
print "p_string_literal: sy =", sy, repr(s.systring) ###
if
sy
==
'CHARS'
:
chars
.
append
(
systr
)
if
is_python3_source
and
not
has_non_ascii_literal_characters
and
check_for_non_ascii_characters
(
systr
):
...
...
@@ -901,7 +942,7 @@ def p_string_literal(s, kind_override=None):
else
:
s
.
error
(
"Invalid hex escape '%s'"
%
systr
,
fatal
=
False
)
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
''
):
# \uxxxx, \Uxxxxxxxx, \N{...}
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
'
f'
,
'
'
):
# \uxxxx, \Uxxxxxxxx, \N{...}
chrval
=
-
1
if
c
==
u'N'
:
try
:
...
...
@@ -943,14 +984,162 @@ def p_string_literal(s, kind_override=None):
bytes_value
,
unicode_value
=
chars
.
getstrings
()
if
is_python3_source
and
has_non_ascii_literal_characters
:
# Python 3 forbids literal non-ASCII characters in byte strings
if
kind
!=
'u'
:
if
kind
not
in
(
'u'
,
'f'
)
:
s
.
error
(
"bytes can only contain ASCII literal characters."
,
pos
=
pos
,
fatal
=
False
)
bytes_value
=
None
if
kind
==
'f'
:
unicode_value
=
p_f_string
(
s
,
unicode_value
,
pos
)
s
.
next
()
return
(
kind
,
bytes_value
,
unicode_value
)
def
p_f_string
(
s
,
unicode_value
,
pos
):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes.
values
=
[]
i
=
0
size
=
len
(
unicode_value
)
current_literal_start
=
0
while
i
<
size
:
c
=
unicode_value
[
i
]
if
c
in
'{}'
:
if
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
c
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
+
1
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
+=
2
current_literal_start
=
i
elif
c
==
'}'
:
s
.
error
(
"single '}' encountered in format string"
)
else
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
,
expr_node
=
p_f_string_expr
(
s
,
unicode_value
,
pos
,
i
+
1
)
current_literal_start
=
i
values
.
append
(
expr_node
)
else
:
i
+=
1
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
return
values
def
p_f_string_expr
(
s
,
unicode_value
,
pos
,
starting_index
):
# Parses a {}-delimited expression inside an f-string. Returns a FormattedValueNode
# and the index in the string that follows the expression.
i
=
starting_index
size
=
len
(
unicode_value
)
conversion_char
=
terminal_char
=
format_spec
=
None
format_spec_str
=
None
NO_CHAR
=
2
**
30
nested_depth
=
0
quote_char
=
NO_CHAR
in_triple_quotes
=
False
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format string expression"
)
c
=
unicode_value
[
i
]
if
quote_char
!=
NO_CHAR
:
if
c
==
'
\
\
'
:
i
+=
1
elif
c
==
quote_char
:
if
in_triple_quotes
:
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
False
quote_char
=
NO_CHAR
i
+=
2
else
:
quote_char
=
NO_CHAR
elif
c
in
'
\
'
"'
:
quote_char
=
c
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
True
i
+=
2
elif
c
in
'{[('
:
nested_depth
+=
1
elif
nested_depth
!=
0
and
c
in
'}])'
:
nested_depth
-=
1
elif
c
==
'#'
:
s
.
error
(
"format string cannot include #"
)
elif
nested_depth
==
0
and
c
in
'!:}'
:
# allow != as a special case
if
c
==
'!'
and
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
'='
:
i
+=
1
continue
terminal_char
=
c
break
i
+=
1
# normalise line endings as the parser expects that
expr_str
=
unicode_value
[
starting_index
:
i
].
replace
(
'
\
r
\
n
'
,
'
\
n
'
).
replace
(
'
\
r
'
,
'
\
n
'
)
expr_pos
=
(
pos
[
0
],
pos
[
1
],
pos
[
2
]
+
starting_index
+
2
)
# TODO: find exact code position (concat, multi-line, ...)
if
not
expr_str
.
strip
():
s
.
error
(
"empty expression not allowed in f-string"
)
if
terminal_char
==
'!'
:
i
+=
1
if
i
+
2
>
size
:
s
.
error
(
"invalid conversion char at end of string"
)
conversion_char
=
unicode_value
[
i
]
i
+=
1
terminal_char
=
unicode_value
[
i
]
if
terminal_char
==
':'
:
in_triple_quotes
=
False
in_string
=
False
nested_depth
=
0
start_format_spec
=
i
+
1
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format specifier"
)
c
=
unicode_value
[
i
]
if
not
in_triple_quotes
and
not
in_string
:
if
c
==
'{'
:
if
nested_depth
>=
1
:
s
.
error
(
"nesting of '{' in format specifier is not allowed"
)
nested_depth
+=
1
elif
c
==
'}'
:
if
nested_depth
>
0
:
nested_depth
-=
1
else
:
terminal_char
=
c
break
if
c
in
'
\
'
"'
:
if
not
in_string
and
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
not
in_triple_quotes
i
+=
2
elif
not
in_triple_quotes
:
in_string
=
not
in_string
i
+=
1
format_spec_str
=
unicode_value
[
start_format_spec
:
i
]
if
terminal_char
!=
'}'
:
s
.
error
(
"missing '}' in format string expression', found '%s'"
%
terminal_char
)
# parse the expression as if it was surrounded by parentheses
buf
=
StringIO
(
'(%s)'
%
expr_str
)
scanner
=
PyrexScanner
(
buf
,
expr_pos
[
0
],
parent_scanner
=
s
,
source_encoding
=
s
.
source_encoding
,
initial_pos
=
expr_pos
)
expr
=
p_testlist
(
scanner
)
# TODO is testlist right here?
# validate the conversion char
if
conversion_char
is
not
None
and
not
ExprNodes
.
FormattedValueNode
.
find_conversion_func
(
conversion_char
):
s
.
error
(
"invalid conversion character '%s'"
%
conversion_char
)
# the format spec is itself treated like an f-string
if
format_spec_str
:
format_spec
=
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
p_f_string
(
s
,
format_spec_str
,
pos
))
return
i
+
1
,
ExprNodes
.
FormattedValueNode
(
s
.
position
(),
value
=
expr
,
conversion_char
=
conversion_char
,
format_spec
=
format_spec
)
# since PEP 448:
# list_display ::= "[" [listmaker] "]"
# listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
...
...
Cython/Utility/ModuleSetupCode.c
View file @
fe3a65f7
...
...
@@ -134,6 +134,10 @@
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
#endif
#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
#define PyObject_ASCII(o) PyObject_Repr(o)
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBaseString_Type PyUnicode_Type
#define PyStringObject PyUnicodeObject
...
...
Cython/Utility/StringTools.c
View file @
fe3a65f7
...
...
@@ -811,3 +811,23 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value
Py_DECREF
(
retval
);
return
0
;
}
//////////////////// PyObjectFormatSimple.proto ////////////////////
#define __Pyx_PyObject_FormatSimple(s, f) (likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) : PyObject_Format(s, f))
//////////////////// PyObjectFormatAndDecref.proto ////////////////////
#define __Pyx_PyObject_FormatSimpleAndDecref(s, f) \
((unlikely(!s) || likely(PyUnicode_CheckExact(s))) ? s : __Pyx_PyObject_FormatAndDecref(s, f))
static
CYTHON_INLINE
PyObject
*
__Pyx_PyObject_FormatAndDecref
(
PyObject
*
s
,
PyObject
*
f
);
//////////////////// PyObjectFormatAndDecref ////////////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_PyObject_FormatAndDecref
(
PyObject
*
s
,
PyObject
*
f
)
{
PyObject
*
result
=
PyObject_Format
(
s
,
f
);
Py_DECREF
(
s
);
return
result
;
}
tests/run/test_fstring.pyx
0 → 100644
View file @
fe3a65f7
# cython: language_level=3
# mode: run
# tag: allow_unknown_names, f_strings, pep498
import
ast
import
types
import
decimal
import
unittest
import
contextlib
import
sys
IS_PY2
=
sys
.
version_info
[
0
]
<
3
IS_PY26
=
sys
.
version_info
[:
2
]
<
(
2
,
7
)
from
Cython.Build.Inline
import
cython_inline
from
Cython.TestUtils
import
CythonTest
from
Cython.Compiler.Errors
import
CompileError
,
hold_errors
,
release_errors
,
error_stack
def
cy_eval
(
s
,
**
kwargs
):
return
cython_inline
(
'return '
+
s
,
force
=
True
,
**
kwargs
)
a_global
=
'global variable'
# You could argue that I'm too strict in looking for specific error
# values with assertRaisesRegex, but without it it's way too easy to
# make a syntax error in the test strings. Especially with all of the
# triple quotes, raw strings, backslashes, etc. I think it's a
# worthwhile tradeoff. When I switched to this method, I found many
# examples where I wasn't testing what I thought I was.
class
TestCase
(
CythonTest
):
def
assertAllRaise
(
self
,
exception_type
,
regex
,
error_strings
):
for
str
in
error_strings
:
if
exception_type
is
SyntaxError
:
try
:
self
.
fragment
(
str
)
except
CompileError
:
assert
True
else
:
assert
False
,
"Invalid Cython code failed to raise SyntaxError: %s"
%
str
else
:
hold_errors
()
try
:
cython_inline
(
str
,
quiet
=
True
)
except
exception_type
:
assert
True
else
:
assert
False
,
"Invalid Cython code failed to raise %s: %s"
%
(
exception_type
,
str
)
finally
:
if
error_stack
:
release_errors
(
ignore
=
True
)
if
IS_PY2
:
def
assertEqual
(
self
,
first
,
second
,
msg
=
None
):
# strip u'' string prefixes in Py2
if
first
!=
second
and
isinstance
(
first
,
unicode
):
stripped_first
=
first
.
replace
(
"u'"
,
"'"
).
replace
(
'u"'
,
'"'
)
if
stripped_first
==
second
:
first
=
stripped_first
elif
stripped_first
.
decode
(
'unicode_escape'
)
==
second
:
first
=
stripped_first
.
decode
(
'unicode_escape'
)
super
(
TestCase
,
self
).
assertEqual
(
first
,
second
,
msg
)
if
IS_PY26
:
@
contextlib
.
contextmanager
def
assertRaises
(
self
,
exc
):
try
:
yield
except
exc
:
pass
else
:
assert
False
,
"exception '%s' not raised"
%
exc
def
assertIn
(
self
,
value
,
collection
):
self
.
assertTrue
(
value
in
collection
)
def
test__format__lookup
(
self
):
if
IS_PY26
:
return
elif
IS_PY2
:
raise
unittest
.
SkipTest
(
"Py3-only"
)
# Make sure __format__ is looked up on the type, not the instance.
class
X
:
def
__format__
(
self
,
spec
):
return
'class'
x
=
X
()
# Add a bound __format__ method to the 'y' instance, but not
# the 'x' instance.
y
=
X
()
y
.
__format__
=
types
.
MethodType
(
lambda
self
,
spec
:
'instance'
,
y
)
self
.
assertEqual
(
f'
{
y
}
'
,
format
(
y
))
self
.
assertEqual
(
f'
{
y
}
'
,
'class'
)
self
.
assertEqual
(
format
(
x
),
format
(
y
))
# __format__ is not called this way, but still make sure it
# returns what we expect (so we can make sure we're bypassing
# it).
self
.
assertEqual
(
x
.
__format__
(
''
),
'class'
)
self
.
assertEqual
(
y
.
__format__
(
''
),
'instance'
)
# This is how __format__ is actually called.
self
.
assertEqual
(
type
(
x
).
__format__
(
x
,
''
),
'class'
)
self
.
assertEqual
(
type
(
y
).
__format__
(
y
,
''
),
'class'
)
def
__test_ast
(
self
):
# Inspired by http://bugs.python.org/issue24975
class
X
:
def
__init__
(
self
):
self
.
called
=
False
def
__call__
(
self
):
self
.
called
=
True
return
4
x
=
X
()
expr
=
"""
a = 10
f'{a * x()}'"""
t
=
ast
.
parse
(
expr
)
c
=
compile
(
t
,
''
,
'exec'
)
# Make sure x was not called.
self
.
assertFalse
(
x
.
called
)
# Actually run the code.
exec
(
c
)
# Make sure x was called.
self
.
assertTrue
(
x
.
called
)
def
__test_literal_eval
(
self
):
# With no expressions, an f-string is okay.
self
.
assertEqual
(
ast
.
literal_eval
(
"f'x'"
),
'x'
)
self
.
assertEqual
(
ast
.
literal_eval
(
"f'x' 'y'"
),
'xy'
)
# But this should raise an error.
with
self
.
assertRaisesRegex
(
ValueError
,
'malformed node or string'
):
ast
.
literal_eval
(
"f'x{3}'"
)
# As should this, which uses a different ast node
with
self
.
assertRaisesRegex
(
ValueError
,
'malformed node or string'
):
ast
.
literal_eval
(
"f'{3}'"
)
def
__test_ast_compile_time_concat
(
self
):
x
=
[
''
]
expr
=
"""x[0] = 'foo' f'{3}'"""
t
=
ast
.
parse
(
expr
)
c
=
compile
(
t
,
''
,
'exec'
)
exec
(
c
)
self
.
assertEqual
(
x
[
0
],
'foo3'
)
def
test_literal
(
self
):
self
.
assertEqual
(
f''
,
''
)
self
.
assertEqual
(
f'a'
,
'a'
)
self
.
assertEqual
(
f' '
,
' '
)
self
.
assertEqual
(
f'
\
N{GREEK CAPITAL LETTER DELTA}
'
,
'
\
N{GREEK CAPITAL LETTER DELTA}
'
)
self
.
assertEqual
(
f'
\
N{GREEK CAPITAL LETTER DELTA}
'
,
'
\
u0394
'
)
self
.
assertEqual
(
f'
\
N{True}
'
,
'
\
u22a8
'
)
self
.
assertEqual
(
rf'\N{True}'
,
r'\
NT
rue'
)
def
test_escape_order
(
self
):
# note that hex(ord('{')) == 0x7b, so this
# string becomes f'a{4*10}b'
self
.
assertEqual
(
f'a
\
u007b
4*10}b'
,
'a40b'
)
self
.
assertEqual
(
f'a
\
x7b
4*10}b'
,
'a40b'
)
self
.
assertEqual
(
f'a
\
x7b
4*10
\
N{RIGHT CURLY BRACKET}
b'
,
'a40b'
)
self
.
assertEqual
(
f'
{
"a"
!
\
N
{
LATIN
SMALL
LETTER
R
}}
'
,
"'a'"
)
self
.
assertEqual
(
f'
{
10
\
x3a02X
}
'
,
'0A'
)
self
.
assertEqual
(
f'
{
10
:
02
\
N
{
LATIN
CAPITAL
LETTER
X
}}
'
,
'0A'
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string: single '}' is not allowed"
,
[
r"""f'a{\u007b4*10}b'"""
,
# mis-matched brackets
])
self
.
assertAllRaise
(
SyntaxError
,
'unexpected character after line continuation character'
,
[
r"""f'{"a"\
!
r}'"""
,
r"""f'{a\
!
r}'"""
,
])
def
test_unterminated_string
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: unterminated string'
,
[
r"""f'{"x'"""
,
r"""f'{"x}'"""
,
r"""f'{("x'"""
,
r"""f'{("x}'"""
,
])
def
test_mismatched_parens
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: mismatched'
,
[
"f'{((}'"
,
])
def
test_double_braces
(
self
):
self
.
assertEqual
(
f'{{'
,
'{'
)
self
.
assertEqual
(
f'a{{'
,
'a{'
)
self
.
assertEqual
(
f'{{b'
,
'{b'
)
self
.
assertEqual
(
f'a{{b'
,
'a{b'
)
self
.
assertEqual
(
f'}}'
,
'}'
)
self
.
assertEqual
(
f'a}}'
,
'a}'
)
self
.
assertEqual
(
f'}}b'
,
'}b'
)
self
.
assertEqual
(
f'a}}b'
,
'a}b'
)
self
.
assertEqual
(
f'{{
{
10
}
'
,
'{10'
)
self
.
assertEqual
(
f'}}
{
10
}
'
,
'}10'
)
self
.
assertEqual
(
f'}}{{
{
10
}
'
,
'}{10'
)
self
.
assertEqual
(
f'}}a{{
{
10
}
'
,
'}a{10'
)
self
.
assertEqual
(
f'
{
10
}
{{'
,
'10{'
)
self
.
assertEqual
(
f'
{
10
}
}}'
,
'10}'
)
self
.
assertEqual
(
f'
{
10
}
}}{{'
,
'10}{'
)
self
.
assertEqual
(
f'
{
10
}
}}a{{'
'}'
,
'10}a{}'
)
# Inside of strings, don't interpret doubled brackets.
self
.
assertEqual
(
f'
{
"
{{}}
"
}
'
,
'{{}}'
)
self
.
assertAllRaise
(
TypeError
,
'unhashable type'
,
[
"f'{ {{}} }'"
,
# dict in a set
])
def
test_compile_time_concat
(
self
):
x
=
'def'
self
.
assertEqual
(
'abc'
f'##
{
x
}
ghi'
,
'abc## defghi'
)
self
.
assertEqual
(
'abc'
f'
{
x
}
'
'ghi'
,
'abcdefghi'
)
self
.
assertEqual
(
'abc'
f'
{
x
}
'
'gh'
f'i
{
x
:
4
}
'
,
'abcdefghidef '
)
self
.
assertEqual
(
'{x}'
f'
{
x
}
'
,
'{x}def'
)
self
.
assertEqual
(
'{x'
f'
{
x
}
'
,
'{xdef'
)
self
.
assertEqual
(
'{x}'
f'
{
x
}
'
,
'{x}def'
)
self
.
assertEqual
(
'{{x}}'
f'
{
x
}
'
,
'{{x}}def'
)
self
.
assertEqual
(
'{{x'
f'
{
x
}
'
,
'{{xdef'
)
self
.
assertEqual
(
'x}}'
f'
{
x
}
'
,
'x}}def'
)
self
.
assertEqual
(
f'
{
x
}
'
'x}}'
,
'defx}}'
)
self
.
assertEqual
(
f'
{
x
}
'
''
,
'def'
)
self
.
assertEqual
(
''
f'
{
x
}
'
''
,
'def'
)
self
.
assertEqual
(
''
f'
{
x
}
'
,
'def'
)
self
.
assertEqual
(
f'
{
x
}
'
'2'
,
'def2'
)
self
.
assertEqual
(
'1'
f'
{
x
}
'
'2'
,
'1def2'
)
self
.
assertEqual
(
'1'
f'
{
x
}
'
,
'1def'
)
self
.
assertEqual
(
f'
{
x
}
'
f'-
{
x
}
'
,
'def-def'
)
self
.
assertEqual
(
''
f''
,
''
)
self
.
assertEqual
(
''
f''
''
,
''
)
self
.
assertEqual
(
''
f''
''
f''
,
''
)
self
.
assertEqual
(
f''
,
''
)
self
.
assertEqual
(
f''
''
,
''
)
self
.
assertEqual
(
f''
''
f''
,
''
)
self
.
assertEqual
(
f''
''
f''
''
,
''
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3' f'}'"
,
# can't concat to get a valid f-string
])
def
test_comments
(
self
):
# These aren't comments, since they're in strings.
d
=
{
'#'
:
'hash'
}
self
.
assertEqual
(
f'
{
"#"
}
'
,
'#'
)
self
.
assertEqual
(
f'
{
d
[
"#"
]
}
'
,
'hash'
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string cannot include '#'"
,
[
"f'{1#}'"
,
# error because the expression becomes "(1#)"
"f'{3(#)}'"
,
])
def
test_many_expressions
(
self
):
# Create a string with many expressions in it. Note that
# because we have a space in here as a literal, we're actually
# going to use twice as many ast nodes: one for each literal
# plus one for each expression.
def
build_fstr
(
n
,
extra
=
''
):
return
"f'"
+
(
'{x} '
*
n
)
+
extra
+
"'"
x
=
'X'
width
=
1
# Test around 256.
for
i
in
range
(
250
,
260
):
self
.
assertEqual
(
cy_eval
(
build_fstr
(
i
),
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
i
)
# Test concatenating 2 largs fstrings.
self
.
assertEqual
(
cy_eval
(
build_fstr
(
255
)
*
3
,
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
(
255
*
3
))
# CPython uses 255*256
s
=
build_fstr
(
253
,
'{x:{width}} '
)
self
.
assertEqual
(
cy_eval
(
s
,
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
254
)
# Test lots of expressions and constants, concatenated.
s
=
"f'{1}' 'x' 'y'"
*
1024
self
.
assertEqual
(
cy_eval
(
s
,
x
=
x
,
width
=
width
),
'1xy'
*
1024
)
def
test_format_specifier_expressions
(
self
):
width
=
10
precision
=
4
value
=
decimal
.
Decimal
(
'12.34567'
)
if
not
IS_PY26
:
self
.
assertEqual
(
f'result:
{
value
:
{
width
}
.
{
precision
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
width
!
r
}
.
{
precision
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
width
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
1
}{
0
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
1
}{
0
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'
{
10
:
#
{
1
}
0
x
}
'
,
' 0xa'
)
self
.
assertEqual
(
f'
{
10
:
{
"#"
}
1
{
0
}{
"x"
}}
'
,
' 0xa'
)
self
.
assertEqual
(
f'
{
-
10
:
-
{
"#"
}
1
{
0
}
x
}
'
,
' -0xa'
)
self
.
assertEqual
(
f'
{
-
10
:
{
"-"
}
#
{
1
}
0
{
"x"
}}
'
,
' -0xa'
)
# self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"""f'{"s"!r{":10"}}'"""
,
# This looks like a nested format spec.
])
self
.
assertAllRaise
(
SyntaxError
,
"invalid syntax"
,
[
# Invalid sytax inside a nested spec.
"f'{4:{/5}}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expressions nested too deeply"
,
[
# Can't nest format specifiers.
"f'result: {value:{width:{0}}.{precision:1}}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'f-string: invalid conversion character'
,
[
# No expansion inside conversion or for
# the : or ! itself.
"""f'{"s"!{"r"}}'"""
,
])
def
test_side_effect_order
(
self
):
class
X
:
def
__init__
(
self
):
self
.
i
=
0
def
__format__
(
self
,
spec
):
self
.
i
+=
1
return
str
(
self
.
i
)
x
=
X
()
self
.
assertEqual
(
f'
{
x
}
{
x
}
'
,
'1 2'
)
def
test_missing_expression
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: empty expression not allowed'
,
[
"f'{}'"
,
"f'{ }'"
"f' {} '"
,
"f'{!r}'"
,
"f'{ !r}'"
,
"f'{10:{ }}'"
,
"f' { } '"
,
r"f'{\n}'"
,
r"f'{\n \n}'"
,
# Catch the empty expression before the
# invalid conversion.
"f'{!x}'"
,
"f'{ !xr}'"
,
"f'{!x:}'"
,
"f'{!x:a}'"
,
"f'{ !xr:}'"
,
"f'{ !xr:a}'"
,
"f'{!}'"
,
"f'{:}'"
,
# We find the empty expression before the
# missing closing brace.
"f'{!'"
,
"f'{!s:'"
,
"f'{:'"
,
"f'{:x'"
,
])
def
test_parens_in_expressions
(
self
):
self
.
assertEqual
(
f'
{
3
,
}
'
,
'(3,)'
)
# Add these because when an expression is evaluated, parens
# are added around it. But we shouldn't go from an invalid
# expression to a valid one. The added parens are just
# supposed to allow whitespace (including newlines).
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
"f'{,}'"
,
"f'{,}'"
,
# this is (,), which is an error
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3)+(4}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'EOL while scanning string literal'
,
[
"f'{
\
n
}'"
,
])
def
test_newlines_in_expressions
(
self
):
self
.
assertEqual
(
f'
{
0
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
0
\
n
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
\
n
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
r0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
3
+
\
n4
}
'
,
'7'
)
self
.
assertEqual
(
f'
{
3
+
\\\
n4
}
'
,
'7'
)
self
.
assertEqual
(
rf'''
{
3
+
4
}
'''
,
'7'
)
self
.
assertEqual
(
f'''
{
3
+
\
4
}
'''
,
'7'
)
self
.
assertAllRaise
(
SyntaxError
,
'f-string: empty expression not allowed'
,
[
r"f'{\n}'"
,
])
def
test_lambda
(
self
):
x
=
5
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
)
!
r
}
'
,
"'88888'"
)
if
not
IS_PY2
:
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
)
!
r
:
10
}
'
,
"'88888' "
)
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
):
10
}
'
,
"88888 "
)
# lambda doesn't work without parens, because the colon
# makes the parser think it's a format_spec
self
.
assertAllRaise
(
SyntaxError
,
'unexpected EOF while parsing'
,
[
"f'{lambda x:x}'"
,
])
def
test_yield
(
self
):
# Not terribly useful, but make sure the yield turns
# a function into a generator
def
fn
(
y
):
f'y:
{
yield
y
*
2
}
'
g
=
fn
(
4
)
self
.
assertEqual
(
next
(
g
),
8
)
def
test_yield_send
(
self
):
def
fn
(
x
):
yield
f'x:
{
yield
(
lambda
i
:
x
*
i
)
}
'
g
=
fn
(
10
)
the_lambda
=
next
(
g
)
self
.
assertEqual
(
the_lambda
(
4
),
40
)
self
.
assertEqual
(
g
.
send
(
'string'
),
'x:string'
)
def
test_expressions_with_triple_quoted_strings
(
self
):
self
.
assertEqual
(
f"
{
'''x'''
}
"
,
'x'
)
self
.
assertEqual
(
f"
{
'''eric's'''
}
"
,
"eric's"
)
self
.
assertEqual
(
f'
{
"""eric
\
'
s"""
}
'
,
"eric's"
)
self
.
assertEqual
(
f"
{
'''eric
\
"
s'''
}
"
,
'eric"s'
)
self
.
assertEqual
(
f'
{
"""eric"s"""
}
'
,
'eric"s'
)
# Test concatenation within an expression
self
.
assertEqual
(
f'
{
"x"
"""eric"s"""
"y"
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
"x"
"""eric"s"""
}
'
,
'xeric"s'
)
self
.
assertEqual
(
f'
{
"""eric"s"""
"y"
}
'
,
'eric"sy'
)
self
.
assertEqual
(
f'
{
"""x"""
"""eric"s"""
"y"
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
"""x"""
"""eric"s"""
"""y"""
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
r"""x"""
"""eric"s"""
"""y"""
}
'
,
'xeric"sy'
)
def
test_multiple_vars
(
self
):
x
=
98
y
=
'abc'
self
.
assertEqual
(
f'
{
x
}{
y
}
'
,
'98abc'
)
self
.
assertEqual
(
f'X
{
x
}{
y
}
'
,
'X98abc'
)
self
.
assertEqual
(
f'
{
x
}
X
{
y
}
'
,
'98Xabc'
)
self
.
assertEqual
(
f'
{
x
}{
y
}
X'
,
'98abcX'
)
self
.
assertEqual
(
f'X
{
x
}
Y
{
y
}
'
,
'X98Yabc'
)
self
.
assertEqual
(
f'X
{
x
}{
y
}
Y'
,
'X98abcY'
)
self
.
assertEqual
(
f'
{
x
}
X
{
y
}
Y'
,
'98XabcY'
)
self
.
assertEqual
(
f'X
{
x
}
Y
{
y
}
Z'
,
'X98YabcZ'
)
def
test_closure
(
self
):
def
outer
(
x
):
def
inner
():
return
f'x:
{
x
}
'
return
inner
self
.
assertEqual
(
outer
(
'987'
)(),
'x:987'
)
self
.
assertEqual
(
outer
(
7
)(),
'x:7'
)
def
test_arguments
(
self
):
y
=
2
def
f
(
x
,
width
):
return
f'x=
{
x
*
y
:
{
width
}}
'
self
.
assertEqual
(
f
(
'foo'
,
10
),
'x=foofoo '
)
x
=
'bar'
self
.
assertEqual
(
f
(
10
,
10
),
'x= 20'
)
def
test_locals
(
self
):
value
=
123
self
.
assertEqual
(
f'v:
{
value
}
'
,
'v:123'
)
def
test_missing_variable
(
self
):
with
self
.
assertRaises
(
NameError
):
f'v:
{
value
}
'
def
test_missing_format_spec
(
self
):
class
O
:
def
__format__
(
self
,
spec
):
if
not
spec
:
return
'*'
return
spec
self
.
assertEqual
(
f'
{
O
():
x
}
'
,
'x'
)
self
.
assertEqual
(
f'
{
O
()
}
'
,
'*'
)
self
.
assertEqual
(
f'
{
O
():
}
'
,
'*'
)
self
.
assertEqual
(
f'
{
3
:
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
!
s
:
}
'
,
'3'
)
def
test_global
(
self
):
self
.
assertEqual
(
f'g:
{
a_global
}
'
,
'g:global variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'global variable'"
)
a_local
=
'local variable'
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
}
'
,
'g:global variable l:local variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'global variable'"
)
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
!
r
}
'
,
"g:global variable l:'local variable'"
)
self
.
assertIn
(
"module 'unittest' from"
,
f'
{
unittest
}
'
)
def
test_shadowed_global
(
self
):
a_global
=
'really a local'
self
.
assertEqual
(
f'g:
{
a_global
}
'
,
'g:really a local'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'really a local'"
)
a_local
=
'local variable'
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
}
'
,
'g:really a local l:local variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'really a local'"
)
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
!
r
}
'
,
"g:really a local l:'local variable'"
)
def
test_call
(
self
):
def
foo
(
x
):
return
'x='
+
str
(
x
)
self
.
assertEqual
(
f'
{
foo
(
10
)
}
'
,
'x=10'
)
def
test_nested_fstrings
(
self
):
y
=
5
self
.
assertEqual
(
f'
{
f"
{
0
}
"*3
}
'
,
'000'
)
self
.
assertEqual
(
f'
{
f"
{
y
}
"*3
}
'
,
'555'
)
self
.
assertEqual
(
f'
{
f"
{
\
'x
\
'
}
"*3
}
'
,
'xxx'
)
self
.
assertEqual
(
f"
{
r'x'
f'
{
\
"s
\
"
}
'
}
"
,
'xs'
)
self
.
assertEqual
(
f"
{
r'x'rf'
{
\
"s
\
"
}
'
}
"
,
'xs'
)
def
test_invalid_string_prefixes
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'unexpected EOF while parsing'
,
[
"fu''"
,
"uf''"
,
"Fu''"
,
"fU''"
,
"Uf''"
,
"uF''"
,
"ufr''"
,
"urf''"
,
"fur''"
,
"fru''"
,
"rfu''"
,
"ruf''"
,
"FUR''"
,
"Fur''"
,
])
def
test_leading_trailing_spaces
(
self
):
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
\
t3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
\
t
\
t3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
\
t
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
\
t
\
t
}
'
,
'3'
)
self
.
assertEqual
(
f'expr=
{
{
x
:
y
for
x
,
y
in
[(
1
,
2
),
]
}}
'
,
'expr={1: 2}'
)
self
.
assertEqual
(
f'expr=
{
{
x
:
y
for
x
,
y
in
[(
1
,
2
),
]
}
}
'
,
'expr={1: 2}'
)
def
test_character_name
(
self
):
self
.
assertEqual
(
f'
{
4
}\
N{GREEK CAPITAL LETTER DELTA}
{
3
}
'
,
'4
\
N{GREEK CAPITAL LETTER DELTA}
3'
)
self
.
assertEqual
(
f'{{}}
\
N{GREEK CAPITAL LETTER DELTA}
{
3
}
'
,
'{}
\
N{GREEK CAPITAL LETTER DELTA}
3'
)
def
test_not_equal
(
self
):
# There's a special test for this because there's a special
# case in the f-string parser to look for != as not ending an
# expression. Normally it would, while looking for !s or !r.
self
.
assertEqual
(
f'
{
3
!=
4
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
:
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
!
s
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
!
s
:.
3
}
'
,
'Tru'
)
def
test_conversions
(
self
):
self
.
assertEqual
(
f'
{
3.14
:
10.10
}
'
,
' 3.14'
)
if
not
IS_PY26
:
self
.
assertEqual
(
f'
{
3.14
!
s
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
3.14
!
r
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
3.14
!
a
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
"a"
}
'
,
'a'
)
self
.
assertEqual
(
f'
{
"a"
!
r
}
'
,
"'a'"
)
self
.
assertEqual
(
f'
{
"a"
!
a
}
'
,
"'a'"
)
# Not a conversion.
self
.
assertEqual
(
f'
{
"a!r"
}
'
,
"a!r"
)
# Not a conversion, but show that ! is allowed in a format spec.
self
.
assertEqual
(
f'
{
3.14
:
!
<
10.10
}
'
,
'3.14!!!!!!'
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"
}
'
,
'
\
u0394
'
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"!r
}
'
,
"'
\
u0394
'"
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"!a
}
'
,
"'
\
\
u0394'"
)
self
.
assertAllRaise
(
SyntaxError
,
'f-string: invalid conversion character'
,
[
"f'{3!g}'"
,
"f'{3!A}'"
,
"f'{3!A}'"
,
"f'{3!A}'"
,
"f'{3!!}'"
,
"f'{3!:}'"
,
"f'{3!
\
N{GREEK CAPITAL LETTER DELTA}
}'"
,
"f'{3! s}'"
,
# no space before conversion char
"f'{x!
\
\
x00:.<10}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{x!s{y}}'"
,
"f'{3!ss}'"
,
"f'{3!ss:}'"
,
"f'{3!ss:s}'"
,
])
def
test_assignment
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
"f'' = 3"
,
"f'{0}' = x"
,
"f'{x}' = x"
,
])
def
test_del
(
self
):
self
.
assertAllRaise
(
CompileError
,
'invalid syntax'
,
# CPython raises SyntaxError
[
"del f''"
,
"del '' f''"
,
])
def
test_mismatched_braces
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
"f-string: single '}' is not allowed"
,
[
"f'{{}'"
,
"f'{{}}}'"
,
"f'}'"
,
"f'x}'"
,
"f'x}x'"
,
# Can't have { or } in a format spec.
"f'{3:}>10}'"
,
r"f'{3:\\}>10}'"
,
"f'{3:}}>10}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3:{{>10}'"
,
"f'{3'"
,
"f'{3!'"
,
"f'{3:'"
,
"f'{3!s'"
,
"f'{3!s:'"
,
"f'{3!s:3'"
,
"f'x{'"
,
"f'x{x'"
,
"f'{3:s'"
,
"f'{{{'"
,
"f'{{}}{'"
,
"f'{'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
r"f'{3:\\{>10}'"
,
])
# But these are just normal strings.
self
.
assertEqual
(
f'
{
"
{
"
}
', '
{
')
self.assertEqual(f'
{
"
}
"
}
', '
}
'
)
self
.
assertEqual
(
f'
{
3
:
{
"
}
"
}
>10}'
,
'}}}}}}}}}3'
)
self
.
assertEqual
(
f'
{
2
:
{
"
{
"
}
>
10
}
', '
{{{{{{{{{
2
')
def test_if_conditional(self):
# There'
s
special
logic
in
compile
.
c
to
test
if
the
# conditional for an if (and while) are constants. Exercise
# that code.
def
test_fstring
(
x
,
expected
):
flag
=
0
if
f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_empty(x, expected):
flag = 0
if '' f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_non_empty(x, expected):
flag = 0
if '
' f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
test_fstring('', 2)
test_fstring('
', 1)
test_concat_empty('', 2)
test_concat_empty('
', 1)
test_concat_non_empty('', 1)
test_concat_non_empty('
', 1)
def test_empty_format_specifier(self):
x = '
test
'
self.assertEqual(f'
{
x
}
', '
test
')
self.assertEqual(f'
{
x
:
}
', '
test
')
self.assertEqual(f'
{
x
!
s
:
}
', '
test
')
self.assertEqual(f'
{
x
!
r
:
}
', "'
test
'")
def test_str_format_differences(self):
d =
{
'a'
:
'string'
,
0
:
'integer'
,
}
a
=
0
self
.
assertEqual
(
f'
{
d
[
0
]
}
', '
integer
')
self.assertEqual(f'
{
d
[
"a"
]
}
', '
string
')
self.assertEqual(f'
{
d
[
a
]
}
', '
integer
')
self.assertEqual('
{
d
[
a
]
}
'.format(d=d), '
string
')
self.assertEqual('
{
d
[
0
]
}
'.format(d=d), '
integer
')
def test_invalid_expressions(self):
self.assertAllRaise(SyntaxError, '
invalid
syntax
',
[r"f'
{
a
[
4
)
}
'",
r"f'
{
a
(
4
]
}
'",
])
def test_errors(self):
# see issue 26287
self.assertAllRaise((TypeError, ValueError), '
non
-
empty
', # TypeError in Py3.4+
[r"f'
{
(
lambda
:
0
):
x
}
'",
r"f'
{
(
0
,):
x
}
'",
])
self.assertAllRaise(ValueError, '
Unknown
format
code
',
[r"f'
{
1000
:
j
}
'",
r"f'
{
1000
:
j
}
'",
])
def test_loop(self):
for i in range(1000):
self.assertEqual(f'
i
:
{
i
}
', '
i
:
' + str(i))
def test_dict(self):
d =
{
'"'
:
'dquote'
,
"'"
:
'squote'
,
'foo'
:
'bar'
,
}
self
.
assertEqual
(
f'
{
d
[
"
\
'
"
]
}
', '
squote
')
self.assertEqual(f"
{
d
[
'
\
"
'
]
}
", 'dquote')
self.assertEqual(f'''
{
d
[
"'"
]
}
''', 'squote')
self.assertEqual(f"""
{
d
[
'"'
]
}
""", 'dquote')
self.assertEqual(f'
{
d
[
"foo"
]
}
', '
bar
')
self.assertEqual(f"
{
d
[
'foo'
]
}
", 'bar')
self.assertEqual(f'
{
d
[
\
'foo
\
'
]
}
', '
bar
')
self.assertEqual(f"
{
d
[
\
"foo
\
"
]
}
", 'bar')
def test_escaped_quotes(self):
d =
{
'"'
:
'a'
,
"'"
:
'b'
}
self
.
assertEqual
(
fr"
{
d
[
'
\
"
'
]
}
", 'a')
self.assertEqual(fr'
{
d
[
"
\
'
"
]
}
', 'b')
self.assertEqual(fr"
{
'
\
"
'
}
", '"')
self.assertEqual(fr'
{
"
\
'
"
}
', "'")
self.assertEqual(f'
{
"
\
\
"
3
"
}
', '"3')
self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
[r'''f'
{
"""
\
\
}
' ''', # Backslash at end of expression
])
self.assertAllRaise(SyntaxError, '
unexpected
character
after
line
continuation
',
[r"rf'
{
3
\
}
'",
])
if __name__ == '
__main__
':
unittest.main()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment