Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
fe3a65f7
Commit
fe3a65f7
authored
Mar 24, 2016
by
Robert Bradshaw
Browse files
Options
Browse Files
Download
Plain Diff
F strings (PEP 498)
parents
37534635
b9c66f41
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1212 additions
and
39 deletions
+1212
-39
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+105
-0
Cython/Compiler/Lexicon.py
Cython/Compiler/Lexicon.py
+3
-3
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+3
-0
Cython/Compiler/Naming.py
Cython/Compiler/Naming.py
+1
-0
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+39
-0
Cython/Compiler/Parsing.pxd
Cython/Compiler/Parsing.pxd
+4
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+225
-36
Cython/Utility/ModuleSetupCode.c
Cython/Utility/ModuleSetupCode.c
+4
-0
Cython/Utility/StringTools.c
Cython/Utility/StringTools.c
+20
-0
tests/run/test_fstring.pyx
tests/run/test_fstring.pyx
+808
-0
No files found.
Cython/Compiler/ExprNodes.py
View file @
fe3a65f7
...
...
@@ -2954,6 +2954,111 @@ class RawCNameExprNode(ExprNode):
pass
#-------------------------------------------------------------------
#
# F-strings
#
#-------------------------------------------------------------------
class
JoinedStrNode
(
ExprNode
):
# F-strings
#
# values [UnicodeNode|FormattedValueNode] Substrings of the f-string
#
type
=
unicode_type
is_temp
=
True
subexprs
=
[
'values'
]
def
analyse_types
(
self
,
env
):
self
.
values
=
[
v
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
for
v
in
self
.
values
]
return
self
def
generate_evaluation_code
(
self
,
code
):
code
.
mark_pos
(
self
.
pos
)
num_items
=
len
(
self
.
values
)
list_var
=
code
.
funcstate
.
allocate_temp
(
py_object_type
,
manage_ref
=
True
)
code
.
putln
(
'%s = PyList_New(%s); %s'
%
(
list_var
,
num_items
,
code
.
error_goto_if_null
(
list_var
,
self
.
pos
)))
code
.
put_gotref
(
list_var
)
for
i
,
node
in
enumerate
(
self
.
values
):
node
.
generate_evaluation_code
(
code
)
node
.
make_owned_reference
(
code
)
code
.
put_giveref
(
node
.
py_result
())
code
.
putln
(
'PyList_SET_ITEM(%s, %s, %s);'
%
(
list_var
,
i
,
node
.
py_result
()))
node
.
generate_post_assignment_code
(
code
)
node
.
free_temps
(
code
)
code
.
mark_pos
(
self
.
pos
)
self
.
allocate_temp_result
(
code
)
code
.
putln
(
'%s = PyUnicode_Join(%s, %s); %s'
%
(
self
.
result
(),
Naming
.
empty_unicode
,
list_var
,
code
.
error_goto_if_null
(
self
.
py_result
(),
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
code
.
put_decref_clear
(
list_var
,
py_object_type
)
code
.
funcstate
.
release_temp
(
list_var
)
class
FormattedValueNode
(
ExprNode
):
# {}-delimited portions of an f-string
#
# value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__
subexprs
=
[
'value'
,
'format_spec'
]
type
=
py_object_type
is_temp
=
True
find_conversion_func
=
{
's'
:
'PyObject_Str'
,
'r'
:
'PyObject_Repr'
,
'a'
:
'PyObject_ASCII'
,
# NOTE: Py3-only!
}.
get
def
analyse_types
(
self
,
env
):
self
.
value
=
self
.
value
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
if
self
.
format_spec
:
self
.
format_spec
=
self
.
format_spec
.
analyse_types
(
env
).
coerce_to_pyobject
(
env
)
return
self
def
generate_result_code
(
self
,
code
):
value_result
=
self
.
value
.
py_result
()
if
self
.
format_spec
:
format_func
=
'__Pyx_PyObject_Format'
format_spec
=
self
.
format_spec
.
py_result
()
else
:
# common case: expect simple Unicode pass-through if no format spec
format_func
=
'__Pyx_PyObject_FormatSimple'
format_spec
=
Naming
.
empty_unicode
if
self
.
conversion_char
:
fn
=
self
.
find_conversion_func
(
self
.
conversion_char
)
assert
fn
is
not
None
,
"invalid conversion character found: '%s'"
%
self
.
conversion_char
value_result
=
'%s(%s)'
%
(
fn
,
value_result
)
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyObjectFormatAndDecref"
,
"StringTools.c"
))
format_func
+=
'AndDecref'
elif
not
self
.
format_spec
:
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyObjectFormatSimple"
,
"StringTools.c"
))
else
:
format_func
=
'PyObject_Format'
code
.
putln
(
"%s = %s(%s, %s); %s"
%
(
self
.
result
(),
format_func
,
value_result
,
format_spec
,
code
.
error_goto_if_null
(
self
.
result
(),
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
#-------------------------------------------------------------------
#
# Parallel nodes (cython.parallel.thread(savailable|id))
...
...
Cython/Compiler/Lexicon.py
View file @
fe3a65f7
...
...
@@ -7,7 +7,7 @@ from __future__ import absolute_import
raw_prefixes
=
"rR"
bytes_prefixes
=
"bB"
string_prefixes
=
"uU"
+
bytes_prefixes
string_prefixes
=
"
fF
uU"
+
bytes_prefixes
char_prefixes
=
"cC"
any_string_prefix
=
raw_prefixes
+
string_prefixes
+
char_prefixes
IDENT
=
'IDENT'
...
...
@@ -40,8 +40,8 @@ def make_lexicon():
fltconst
=
(
decimal_fract
+
Opt
(
exponent
))
|
(
decimal
+
exponent
)
imagconst
=
(
intconst
|
fltconst
)
+
Any
(
"jJ"
)
beginstring
=
Opt
(
Any
(
string_prefixes
)
+
Opt
(
Any
(
raw_prefixes
))
|
Any
(
raw_prefixes
)
+
Opt
(
Any
(
bytes
_prefixes
))
|
# invalid combinations of prefixes are caught in p_string_literal
beginstring
=
Opt
(
Rep
(
Any
(
string_prefixes
+
raw
_prefixes
))
|
Any
(
char_prefixes
)
)
+
(
Str
(
"'"
)
|
Str
(
'"'
)
|
Str
(
"'''"
)
|
Str
(
'"""'
))
two_oct
=
octdigit
+
octdigit
...
...
Cython/Compiler/ModuleNode.py
View file @
fe3a65f7
...
...
@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
builtins_cname
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_tuple
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_bytes
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_unicode
)
if
Options
.
pre_import
is
not
None
:
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
preimport_cname
)
code
.
putln
(
'static int %s;'
%
Naming
.
lineno_cname
)
...
...
@@ -2117,6 +2118,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
Naming
.
empty_tuple
,
code
.
error_goto_if_null
(
Naming
.
empty_tuple
,
self
.
pos
)))
code
.
putln
(
"%s = PyBytes_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_bytes
,
code
.
error_goto_if_null
(
Naming
.
empty_bytes
,
self
.
pos
)))
code
.
putln
(
"%s = PyUnicode_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_unicode
,
code
.
error_goto_if_null
(
Naming
.
empty_unicode
,
self
.
pos
)))
for
ext_type
in
(
'CyFunction'
,
'FusedFunction'
,
'Coroutine'
,
'Generator'
,
'StopAsyncIteration'
):
code
.
putln
(
"#ifdef __Pyx_%s_USED"
%
ext_type
)
...
...
Cython/Compiler/Naming.py
View file @
fe3a65f7
...
...
@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state"
skip_dispatch_cname
=
pyrex_prefix
+
"skip_dispatch"
empty_tuple
=
pyrex_prefix
+
"empty_tuple"
empty_bytes
=
pyrex_prefix
+
"empty_bytes"
empty_unicode
=
pyrex_prefix
+
"empty_unicode"
print_function
=
pyrex_prefix
+
"print"
print_function_kwargs
=
pyrex_prefix
+
"print_kwargs"
cleanup_cname
=
pyrex_prefix
+
"module_cleanup"
...
...
Cython/Compiler/Optimize.py
View file @
fe3a65f7
...
...
@@ -3,6 +3,7 @@ from __future__ import absolute_import
import
sys
import
copy
import
codecs
import
itertools
from
.
import
TypeSlots
from
.ExprNodes
import
not_a_constant
...
...
@@ -3934,6 +3935,44 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
sequence_node
.
mult_factor
=
factor
return
sequence_node
def
visit_FormattedValueNode
(
self
,
node
):
self
.
visitchildren
(
node
)
if
isinstance
(
node
.
format_spec
,
ExprNodes
.
UnicodeNode
)
and
not
node
.
format_spec
.
value
:
node
.
format_spec
=
None
if
node
.
format_spec
is
None
and
node
.
conversion_char
is
None
and
isinstance
(
node
.
value
,
ExprNodes
.
UnicodeNode
):
return
node
.
value
return
node
def
visit_JoinedStrNode
(
self
,
node
):
"""
Clean up after the parser by discarding empty Unicode strings and merging
substring sequences. Empty or single-value join lists are not uncommon
because f-string format specs are always parsed into JoinedStrNodes.
"""
self
.
visitchildren
(
node
)
unicode_node
=
ExprNodes
.
UnicodeNode
values
=
[]
for
is_unode_group
,
substrings
in
itertools
.
groupby
(
node
.
values
,
lambda
v
:
isinstance
(
v
,
unicode_node
)):
if
is_unode_group
:
substrings
=
list
(
substrings
)
unode
=
substrings
[
0
]
if
len
(
substrings
)
>
1
:
unode
.
value
=
EncodedString
(
u''
.
join
(
value
.
value
for
value
in
substrings
))
# ignore empty Unicode strings
if
unode
.
value
:
values
.
append
(
unode
)
else
:
values
.
extend
(
substrings
)
if
not
values
:
node
=
ExprNodes
.
UnicodeNode
(
node
.
pos
,
value
=
EncodedString
(
''
))
elif
len
(
values
)
==
1
:
node
=
values
[
0
]
else
:
node
.
values
=
values
return
node
def
visit_MergedDictNode
(
self
,
node
):
"""Unpack **args in place if we can."""
self
.
visitchildren
(
node
)
...
...
Cython/Compiler/Parsing.pxd
View file @
fe3a65f7
...
...
@@ -68,6 +68,10 @@ cdef p_opt_string_literal(PyrexScanner s, required_type=*)
cdef
bint
check_for_non_ascii_characters
(
unicode
string
)
@
cython
.
locals
(
systr
=
unicode
,
is_python3_source
=
bint
,
is_raw
=
bint
)
cdef
p_string_literal
(
PyrexScanner
s
,
kind_override
=*
)
@
cython
.
locals
(
i
=
Py_ssize_t
,
size
=
Py_ssize_t
)
cdef
list
p_f_string
(
PyrexScanner
s
,
unicode_value
,
pos
)
@
cython
.
locals
(
i
=
Py_ssize_t
,
size
=
Py_ssize_t
,
c
=
Py_UCS4
,
quote_char
=
Py_UCS4
)
cdef
tuple
p_f_string_expr
(
PyrexScanner
s
,
unicode_value
,
pos
,
Py_ssize_t
starting_index
)
cdef
p_list_maker
(
PyrexScanner
s
)
cdef
p_comp_iter
(
PyrexScanner
s
,
body
)
cdef
p_comp_for
(
PyrexScanner
s
,
body
)
...
...
Cython/Compiler/Parsing.py
View file @
fe3a65f7
...
...
@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
re
=
object
,
_unicode
=
object
,
_bytes
=
object
,
partial
=
object
,
reduce
=
object
,
_IS_PY3
=
cython
.
bint
)
from
io
import
StringIO
import
re
import
sys
from
unicodedata
import
lookup
as
lookup_unicodechar
from
functools
import
partial
,
reduce
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
,
StringSourceDescriptor
from
.
import
Nodes
from
.
import
ExprNodes
from
.
import
Builtin
...
...
@@ -693,8 +694,12 @@ def p_atom(s):
return
ExprNodes
.
UnicodeNode
(
pos
,
value
=
unicode_value
,
bytes_value
=
bytes_value
)
elif
kind
==
'b'
:
return
ExprNodes
.
BytesNode
(
pos
,
value
=
bytes_value
)
else
:
elif
kind
==
'f'
:
return
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
unicode_value
)
elif
kind
==
''
:
return
ExprNodes
.
StringNode
(
pos
,
value
=
bytes_value
,
unicode_value
=
unicode_value
)
else
:
s
.
error
(
"invalid string kind '%s'"
%
kind
)
elif
sy
==
'IDENT'
:
name
=
s
.
systring
s
.
next
()
...
...
@@ -788,42 +793,61 @@ def wrap_compile_time_constant(pos, value):
def
p_cat_string_literal
(
s
):
# A sequence of one or more adjacent string literals.
# Returns (kind, bytes_value, unicode_value)
# where kind in ('b', 'c', 'u', '')
# where kind in ('b', 'c', 'u', 'f', '')
pos
=
s
.
position
()
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
)
if
kind
==
'c'
or
s
.
sy
!=
'BEGIN_STRING'
:
return
kind
,
bytes_value
,
unicode_value
bstrings
,
ustrings
=
[
bytes_value
],
[
unicode_value
]
bstrings
,
ustrings
,
positions
=
[
bytes_value
],
[
unicode_value
],
[
pos
]
bytes_value
=
unicode_value
=
None
while
s
.
sy
==
'BEGIN_STRING'
:
pos
=
s
.
position
()
next_kind
,
next_bytes_value
,
next_unicode_value
=
p_string_literal
(
s
)
if
next_kind
==
'c'
:
error
(
pos
,
"Cannot concatenate char literal with another string or char literal"
)
continue
elif
next_kind
!=
kind
:
# concatenating f strings and normal strings is allowed and leads to an f string
if
set
([
kind
,
next_kind
])
in
(
set
([
'f'
,
'u'
]),
set
([
'f'
,
''
])):
kind
=
'f'
else
:
error
(
pos
,
"Cannot mix string literals of different types, expected %s'', got %s''"
%
(
kind
,
next_kind
))
else
:
continue
bstrings
.
append
(
next_bytes_value
)
ustrings
.
append
(
next_unicode_value
)
positions
.
append
(
pos
)
# join and rewrap the partial literals
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
None
not
in
bstrings
:
# Py3 enforced unicode literals are parsed as bytes/unicode combination
bytes_value
=
bytes_literal
(
StringEncoding
.
join_bytes
(
bstrings
),
s
.
source_encoding
)
if
kind
in
(
'u'
,
''
):
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
])
)
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
]))
if
kind
==
'f'
:
unicode_value
=
[]
for
u
,
pos
in
zip
(
ustrings
,
positions
):
if
isinstance
(
u
,
list
):
unicode_value
+=
u
else
:
# non-f-string concatenated into the f-string
unicode_value
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
EncodedString
(
u
)))
return
kind
,
bytes_value
,
unicode_value
def
p_opt_string_literal
(
s
,
required_type
=
'u'
):
if
s
.
sy
==
'BEGIN_STRING'
:
if
s
.
sy
!=
'BEGIN_STRING'
:
return
None
pos
=
s
.
position
()
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
,
required_type
)
if
required_type
==
'u'
:
if
kind
==
'f'
:
s
.
error
(
"f-string not allowed here"
,
pos
)
return
unicode_value
elif
required_type
==
'b'
:
return
bytes_value
else
:
s
.
error
(
"internal parser configuration error"
)
else
:
return
None
def
check_for_non_ascii_characters
(
string
):
for
c
in
string
:
...
...
@@ -831,38 +855,55 @@ def check_for_non_ascii_characters(string):
return
True
return
False
def
p_string_literal
(
s
,
kind_override
=
None
):
# A single string or char literal. Returns (kind, bvalue, uvalue)
# where kind in ('b', 'c', 'u', ''). The 'bvalue' is the source
# where kind in ('b', 'c', 'u', '
f', '
'). The 'bvalue' is the source
# code byte sequence of the string literal, 'uvalue' is the
# decoded Unicode string. Either of the two may be None depending
# on the 'kind' of string, only unprefixed strings have both
# representations.
# representations. In f-strings, the uvalue is a list of the Unicode
# strings and f-string expressions that make up the f-string.
# s.sy == 'BEGIN_STRING'
pos
=
s
.
position
()
is_raw
=
False
is_python3_source
=
s
.
context
.
language_level
>=
3
has_non_ascii_literal_characters
=
False
kind
=
s
.
systring
[:
1
].
lower
()
if
kind
==
'r'
:
# Py3 allows both 'br' and 'rb' as prefix
if
s
.
systring
[
1
:
2
].
lower
()
==
'b'
:
kind_string
=
s
.
systring
.
rstrip
(
'"
\
'
'
).
lower
()
if
len
(
set
(
kind_string
))
!=
len
(
kind_string
):
s
.
error
(
'Duplicate string prefix character'
)
if
'b'
in
kind_string
and
'u'
in
kind_string
:
s
.
error
(
'String prefixes b and u cannot be combined'
)
if
'b'
in
kind_string
and
'f'
in
kind_string
:
s
.
error
(
'String prefixes b and f cannot be combined'
)
if
'u'
in
kind_string
and
'f'
in
kind_string
:
s
.
error
(
'String prefixes u and f cannot be combined'
)
is_raw
=
'r'
in
kind_string
if
'c'
in
kind_string
:
# this should never happen, since the lexer does not allow combining c
# with other prefix characters
if
len
(
kind_string
)
!=
1
:
s
.
error
(
'Invalid string prefix for character literal'
)
kind
=
'c'
elif
'f'
in
kind_string
:
kind
=
'f'
# u is ignored
elif
'b'
in
kind_string
:
kind
=
'b'
elif
'u'
in
kind_string
:
kind
=
'u'
else
:
kind
=
''
is_raw
=
True
elif
kind
in
'ub'
:
is_raw
=
s
.
systring
[
1
:
2
].
lower
()
==
'r'
elif
kind
!=
'c'
:
kind
=
''
if
kind
==
''
and
kind_override
is
None
and
Future
.
unicode_literals
in
s
.
context
.
future_directives
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
kind
=
'u'
else
:
if
kind_override
is
not
None
and
kind_override
in
'ub'
:
kind
=
kind_override
if
kind
==
'u'
:
if
kind
in
(
'u'
,
'f'
):
# f-strings are scanned exactly like Unicode literals, but are parsed further later
chars
=
StringEncoding
.
UnicodeLiteralBuilder
()
elif
kind
==
''
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
...
...
@@ -873,7 +914,7 @@ def p_string_literal(s, kind_override=None):
s
.
next
()
sy
=
s
.
sy
systr
=
s
.
systring
#print "p_string_literal: sy =", sy, repr(s.systring) ###
#
print "p_string_literal: sy =", sy, repr(s.systring) ###
if
sy
==
'CHARS'
:
chars
.
append
(
systr
)
if
is_python3_source
and
not
has_non_ascii_literal_characters
and
check_for_non_ascii_characters
(
systr
):
...
...
@@ -901,7 +942,7 @@ def p_string_literal(s, kind_override=None):
else
:
s
.
error
(
"Invalid hex escape '%s'"
%
systr
,
fatal
=
False
)
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
''
):
# \uxxxx, \Uxxxxxxxx, \N{...}
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
'
f'
,
'
'
):
# \uxxxx, \Uxxxxxxxx, \N{...}
chrval
=
-
1
if
c
==
u'N'
:
try
:
...
...
@@ -943,14 +984,162 @@ def p_string_literal(s, kind_override=None):
bytes_value
,
unicode_value
=
chars
.
getstrings
()
if
is_python3_source
and
has_non_ascii_literal_characters
:
# Python 3 forbids literal non-ASCII characters in byte strings
if
kind
!=
'u'
:
if
kind
not
in
(
'u'
,
'f'
)
:
s
.
error
(
"bytes can only contain ASCII literal characters."
,
pos
=
pos
,
fatal
=
False
)
bytes_value
=
None
if
kind
==
'f'
:
unicode_value
=
p_f_string
(
s
,
unicode_value
,
pos
)
s
.
next
()
return
(
kind
,
bytes_value
,
unicode_value
)
def
p_f_string
(
s
,
unicode_value
,
pos
):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes.
values
=
[]
i
=
0
size
=
len
(
unicode_value
)
current_literal_start
=
0
while
i
<
size
:
c
=
unicode_value
[
i
]
if
c
in
'{}'
:
if
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
c
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
+
1
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
+=
2
current_literal_start
=
i
elif
c
==
'}'
:
s
.
error
(
"single '}' encountered in format string"
)
else
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
,
expr_node
=
p_f_string_expr
(
s
,
unicode_value
,
pos
,
i
+
1
)
current_literal_start
=
i
values
.
append
(
expr_node
)
else
:
i
+=
1
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
return
values
def
p_f_string_expr
(
s
,
unicode_value
,
pos
,
starting_index
):
# Parses a {}-delimited expression inside an f-string. Returns a FormattedValueNode
# and the index in the string that follows the expression.
i
=
starting_index
size
=
len
(
unicode_value
)
conversion_char
=
terminal_char
=
format_spec
=
None
format_spec_str
=
None
NO_CHAR
=
2
**
30
nested_depth
=
0
quote_char
=
NO_CHAR
in_triple_quotes
=
False
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format string expression"
)
c
=
unicode_value
[
i
]
if
quote_char
!=
NO_CHAR
:
if
c
==
'
\
\
'
:
i
+=
1
elif
c
==
quote_char
:
if
in_triple_quotes
:
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
False
quote_char
=
NO_CHAR
i
+=
2
else
:
quote_char
=
NO_CHAR
elif
c
in
'
\
'
"'
:
quote_char
=
c
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
True
i
+=
2
elif
c
in
'{[('
:
nested_depth
+=
1
elif
nested_depth
!=
0
and
c
in
'}])'
:
nested_depth
-=
1
elif
c
==
'#'
:
s
.
error
(
"format string cannot include #"
)
elif
nested_depth
==
0
and
c
in
'!:}'
:
# allow != as a special case
if
c
==
'!'
and
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
'='
:
i
+=
1
continue
terminal_char
=
c
break
i
+=
1
# normalise line endings as the parser expects that
expr_str
=
unicode_value
[
starting_index
:
i
].
replace
(
'
\
r
\
n
'
,
'
\
n
'
).
replace
(
'
\
r
'
,
'
\
n
'
)
expr_pos
=
(
pos
[
0
],
pos
[
1
],
pos
[
2
]
+
starting_index
+
2
)
# TODO: find exact code position (concat, multi-line, ...)
if
not
expr_str
.
strip
():
s
.
error
(
"empty expression not allowed in f-string"
)
if
terminal_char
==
'!'
:
i
+=
1
if
i
+
2
>
size
:
s
.
error
(
"invalid conversion char at end of string"
)
conversion_char
=
unicode_value
[
i
]
i
+=
1
terminal_char
=
unicode_value
[
i
]
if
terminal_char
==
':'
:
in_triple_quotes
=
False
in_string
=
False
nested_depth
=
0
start_format_spec
=
i
+
1
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format specifier"
)
c
=
unicode_value
[
i
]
if
not
in_triple_quotes
and
not
in_string
:
if
c
==
'{'
:
if
nested_depth
>=
1
:
s
.
error
(
"nesting of '{' in format specifier is not allowed"
)
nested_depth
+=
1
elif
c
==
'}'
:
if
nested_depth
>
0
:
nested_depth
-=
1
else
:
terminal_char
=
c
break
if
c
in
'
\
'
"'
:
if
not
in_string
and
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
not
in_triple_quotes
i
+=
2
elif
not
in_triple_quotes
:
in_string
=
not
in_string
i
+=
1
format_spec_str
=
unicode_value
[
start_format_spec
:
i
]
if
terminal_char
!=
'}'
:
s
.
error
(
"missing '}' in format string expression', found '%s'"
%
terminal_char
)
# parse the expression as if it was surrounded by parentheses
buf
=
StringIO
(
'(%s)'
%
expr_str
)
scanner
=
PyrexScanner
(
buf
,
expr_pos
[
0
],
parent_scanner
=
s
,
source_encoding
=
s
.
source_encoding
,
initial_pos
=
expr_pos
)
expr
=
p_testlist
(
scanner
)
# TODO is testlist right here?
# validate the conversion char
if
conversion_char
is
not
None
and
not
ExprNodes
.
FormattedValueNode
.
find_conversion_func
(
conversion_char
):
s
.
error
(
"invalid conversion character '%s'"
%
conversion_char
)
# the format spec is itself treated like an f-string
if
format_spec_str
:
format_spec
=
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
p_f_string
(
s
,
format_spec_str
,
pos
))
return
i
+
1
,
ExprNodes
.
FormattedValueNode
(
s
.
position
(),
value
=
expr
,
conversion_char
=
conversion_char
,
format_spec
=
format_spec
)
# since PEP 448:
# list_display ::= "[" [listmaker] "]"
# listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
...
...
Cython/Utility/ModuleSetupCode.c
View file @
fe3a65f7
...
...
@@ -134,6 +134,10 @@
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
#endif
#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
#define PyObject_ASCII(o) PyObject_Repr(o)
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBaseString_Type PyUnicode_Type
#define PyStringObject PyUnicodeObject
...
...
Cython/Utility/StringTools.c
View file @
fe3a65f7
...
...
@@ -811,3 +811,23 @@ static CYTHON_INLINE int __Pyx_PyByteArray_Append(PyObject* bytearray, int value
Py_DECREF
(
retval
);
return
0
;
}
//////////////////// PyObjectFormatSimple.proto ////////////////////
#define __Pyx_PyObject_FormatSimple(s, f) (likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) : PyObject_Format(s, f))
//////////////////// PyObjectFormatAndDecref.proto ////////////////////
#define __Pyx_PyObject_FormatSimpleAndDecref(s, f) \
((unlikely(!s) || likely(PyUnicode_CheckExact(s))) ? s : __Pyx_PyObject_FormatAndDecref(s, f))
static
CYTHON_INLINE
PyObject
*
__Pyx_PyObject_FormatAndDecref
(
PyObject
*
s
,
PyObject
*
f
);
//////////////////// PyObjectFormatAndDecref ////////////////////
static
CYTHON_INLINE
PyObject
*
__Pyx_PyObject_FormatAndDecref
(
PyObject
*
s
,
PyObject
*
f
)
{
PyObject
*
result
=
PyObject_Format
(
s
,
f
);
Py_DECREF
(
s
);
return
result
;
}
tests/run/test_fstring.pyx
0 → 100644
View file @
fe3a65f7
# cython: language_level=3
# mode: run
# tag: allow_unknown_names, f_strings, pep498
import
ast
import
types
import
decimal
import
unittest
import
contextlib
import
sys
IS_PY2
=
sys
.
version_info
[
0
]
<
3
IS_PY26
=
sys
.
version_info
[:
2
]
<
(
2
,
7
)
from
Cython.Build.Inline
import
cython_inline
from
Cython.TestUtils
import
CythonTest
from
Cython.Compiler.Errors
import
CompileError
,
hold_errors
,
release_errors
,
error_stack
def
cy_eval
(
s
,
**
kwargs
):
return
cython_inline
(
'return '
+
s
,
force
=
True
,
**
kwargs
)
a_global
=
'global variable'
# You could argue that I'm too strict in looking for specific error
# values with assertRaisesRegex, but without it it's way too easy to
# make a syntax error in the test strings. Especially with all of the
# triple quotes, raw strings, backslashes, etc. I think it's a
# worthwhile tradeoff. When I switched to this method, I found many
# examples where I wasn't testing what I thought I was.
class
TestCase
(
CythonTest
):
def
assertAllRaise
(
self
,
exception_type
,
regex
,
error_strings
):
for
str
in
error_strings
:
if
exception_type
is
SyntaxError
:
try
:
self
.
fragment
(
str
)
except
CompileError
:
assert
True
else
:
assert
False
,
"Invalid Cython code failed to raise SyntaxError: %s"
%
str
else
:
hold_errors
()
try
:
cython_inline
(
str
,
quiet
=
True
)
except
exception_type
:
assert
True
else
:
assert
False
,
"Invalid Cython code failed to raise %s: %s"
%
(
exception_type
,
str
)
finally
:
if
error_stack
:
release_errors
(
ignore
=
True
)
if
IS_PY2
:
def
assertEqual
(
self
,
first
,
second
,
msg
=
None
):
# strip u'' string prefixes in Py2
if
first
!=
second
and
isinstance
(
first
,
unicode
):
stripped_first
=
first
.
replace
(
"u'"
,
"'"
).
replace
(
'u"'
,
'"'
)
if
stripped_first
==
second
:
first
=
stripped_first
elif
stripped_first
.
decode
(
'unicode_escape'
)
==
second
:
first
=
stripped_first
.
decode
(
'unicode_escape'
)
super
(
TestCase
,
self
).
assertEqual
(
first
,
second
,
msg
)
if
IS_PY26
:
@
contextlib
.
contextmanager
def
assertRaises
(
self
,
exc
):
try
:
yield
except
exc
:
pass
else
:
assert
False
,
"exception '%s' not raised"
%
exc
def
assertIn
(
self
,
value
,
collection
):
self
.
assertTrue
(
value
in
collection
)
def
test__format__lookup
(
self
):
if
IS_PY26
:
return
elif
IS_PY2
:
raise
unittest
.
SkipTest
(
"Py3-only"
)
# Make sure __format__ is looked up on the type, not the instance.
class
X
:
def
__format__
(
self
,
spec
):
return
'class'
x
=
X
()
# Add a bound __format__ method to the 'y' instance, but not
# the 'x' instance.
y
=
X
()
y
.
__format__
=
types
.
MethodType
(
lambda
self
,
spec
:
'instance'
,
y
)
self
.
assertEqual
(
f'
{
y
}
'
,
format
(
y
))
self
.
assertEqual
(
f'
{
y
}
'
,
'class'
)
self
.
assertEqual
(
format
(
x
),
format
(
y
))
# __format__ is not called this way, but still make sure it
# returns what we expect (so we can make sure we're bypassing
# it).
self
.
assertEqual
(
x
.
__format__
(
''
),
'class'
)
self
.
assertEqual
(
y
.
__format__
(
''
),
'instance'
)
# This is how __format__ is actually called.
self
.
assertEqual
(
type
(
x
).
__format__
(
x
,
''
),
'class'
)
self
.
assertEqual
(
type
(
y
).
__format__
(
y
,
''
),
'class'
)
def
__test_ast
(
self
):
# Inspired by http://bugs.python.org/issue24975
class
X
:
def
__init__
(
self
):
self
.
called
=
False
def
__call__
(
self
):
self
.
called
=
True
return
4
x
=
X
()
expr
=
"""
a = 10
f'{a * x()}'"""
t
=
ast
.
parse
(
expr
)
c
=
compile
(
t
,
''
,
'exec'
)
# Make sure x was not called.
self
.
assertFalse
(
x
.
called
)
# Actually run the code.
exec
(
c
)
# Make sure x was called.
self
.
assertTrue
(
x
.
called
)
def
__test_literal_eval
(
self
):
# With no expressions, an f-string is okay.
self
.
assertEqual
(
ast
.
literal_eval
(
"f'x'"
),
'x'
)
self
.
assertEqual
(
ast
.
literal_eval
(
"f'x' 'y'"
),
'xy'
)
# But this should raise an error.
with
self
.
assertRaisesRegex
(
ValueError
,
'malformed node or string'
):
ast
.
literal_eval
(
"f'x{3}'"
)
# As should this, which uses a different ast node
with
self
.
assertRaisesRegex
(
ValueError
,
'malformed node or string'
):
ast
.
literal_eval
(
"f'{3}'"
)
def
__test_ast_compile_time_concat
(
self
):
x
=
[
''
]
expr
=
"""x[0] = 'foo' f'{3}'"""
t
=
ast
.
parse
(
expr
)
c
=
compile
(
t
,
''
,
'exec'
)
exec
(
c
)
self
.
assertEqual
(
x
[
0
],
'foo3'
)
def
test_literal
(
self
):
self
.
assertEqual
(
f''
,
''
)
self
.
assertEqual
(
f'a'
,
'a'
)
self
.
assertEqual
(
f' '
,
' '
)
self
.
assertEqual
(
f'
\
N{GREEK CAPITAL LETTER DELTA}
'
,
'
\
N{GREEK CAPITAL LETTER DELTA}
'
)
self
.
assertEqual
(
f'
\
N{GREEK CAPITAL LETTER DELTA}
'
,
'
\
u0394
'
)
self
.
assertEqual
(
f'
\
N{True}
'
,
'
\
u22a8
'
)
self
.
assertEqual
(
rf'\N{True}'
,
r'\
NT
rue'
)
def
test_escape_order
(
self
):
# note that hex(ord('{')) == 0x7b, so this
# string becomes f'a{4*10}b'
self
.
assertEqual
(
f'a
\
u007b
4*10}b'
,
'a40b'
)
self
.
assertEqual
(
f'a
\
x7b
4*10}b'
,
'a40b'
)
self
.
assertEqual
(
f'a
\
x7b
4*10
\
N{RIGHT CURLY BRACKET}
b'
,
'a40b'
)
self
.
assertEqual
(
f'
{
"a"
!
\
N
{
LATIN
SMALL
LETTER
R
}}
'
,
"'a'"
)
self
.
assertEqual
(
f'
{
10
\
x3a02X
}
'
,
'0A'
)
self
.
assertEqual
(
f'
{
10
:
02
\
N
{
LATIN
CAPITAL
LETTER
X
}}
'
,
'0A'
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string: single '}' is not allowed"
,
[
r"""f'a{\u007b4*10}b'"""
,
# mis-matched brackets
])
self
.
assertAllRaise
(
SyntaxError
,
'unexpected character after line continuation character'
,
[
r"""f'{"a"\
!
r}'"""
,
r"""f'{a\
!
r}'"""
,
])
def
test_unterminated_string
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: unterminated string'
,
[
r"""f'{"x'"""
,
r"""f'{"x}'"""
,
r"""f'{("x'"""
,
r"""f'{("x}'"""
,
])
def
test_mismatched_parens
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: mismatched'
,
[
"f'{((}'"
,
])
def
test_double_braces
(
self
):
self
.
assertEqual
(
f'{{'
,
'{'
)
self
.
assertEqual
(
f'a{{'
,
'a{'
)
self
.
assertEqual
(
f'{{b'
,
'{b'
)
self
.
assertEqual
(
f'a{{b'
,
'a{b'
)
self
.
assertEqual
(
f'}}'
,
'}'
)
self
.
assertEqual
(
f'a}}'
,
'a}'
)
self
.
assertEqual
(
f'}}b'
,
'}b'
)
self
.
assertEqual
(
f'a}}b'
,
'a}b'
)
self
.
assertEqual
(
f'{{
{
10
}
'
,
'{10'
)
self
.
assertEqual
(
f'}}
{
10
}
'
,
'}10'
)
self
.
assertEqual
(
f'}}{{
{
10
}
'
,
'}{10'
)
self
.
assertEqual
(
f'}}a{{
{
10
}
'
,
'}a{10'
)
self
.
assertEqual
(
f'
{
10
}
{{'
,
'10{'
)
self
.
assertEqual
(
f'
{
10
}
}}'
,
'10}'
)
self
.
assertEqual
(
f'
{
10
}
}}{{'
,
'10}{'
)
self
.
assertEqual
(
f'
{
10
}
}}a{{'
'}'
,
'10}a{}'
)
# Inside of strings, don't interpret doubled brackets.
self
.
assertEqual
(
f'
{
"
{{}}
"
}
'
,
'{{}}'
)
self
.
assertAllRaise
(
TypeError
,
'unhashable type'
,
[
"f'{ {{}} }'"
,
# dict in a set
])
def
test_compile_time_concat
(
self
):
x
=
'def'
self
.
assertEqual
(
'abc'
f'##
{
x
}
ghi'
,
'abc## defghi'
)
self
.
assertEqual
(
'abc'
f'
{
x
}
'
'ghi'
,
'abcdefghi'
)
self
.
assertEqual
(
'abc'
f'
{
x
}
'
'gh'
f'i
{
x
:
4
}
'
,
'abcdefghidef '
)
self
.
assertEqual
(
'{x}'
f'
{
x
}
'
,
'{x}def'
)
self
.
assertEqual
(
'{x'
f'
{
x
}
'
,
'{xdef'
)
self
.
assertEqual
(
'{x}'
f'
{
x
}
'
,
'{x}def'
)
self
.
assertEqual
(
'{{x}}'
f'
{
x
}
'
,
'{{x}}def'
)
self
.
assertEqual
(
'{{x'
f'
{
x
}
'
,
'{{xdef'
)
self
.
assertEqual
(
'x}}'
f'
{
x
}
'
,
'x}}def'
)
self
.
assertEqual
(
f'
{
x
}
'
'x}}'
,
'defx}}'
)
self
.
assertEqual
(
f'
{
x
}
'
''
,
'def'
)
self
.
assertEqual
(
''
f'
{
x
}
'
''
,
'def'
)
self
.
assertEqual
(
''
f'
{
x
}
'
,
'def'
)
self
.
assertEqual
(
f'
{
x
}
'
'2'
,
'def2'
)
self
.
assertEqual
(
'1'
f'
{
x
}
'
'2'
,
'1def2'
)
self
.
assertEqual
(
'1'
f'
{
x
}
'
,
'1def'
)
self
.
assertEqual
(
f'
{
x
}
'
f'-
{
x
}
'
,
'def-def'
)
self
.
assertEqual
(
''
f''
,
''
)
self
.
assertEqual
(
''
f''
''
,
''
)
self
.
assertEqual
(
''
f''
''
f''
,
''
)
self
.
assertEqual
(
f''
,
''
)
self
.
assertEqual
(
f''
''
,
''
)
self
.
assertEqual
(
f''
''
f''
,
''
)
self
.
assertEqual
(
f''
''
f''
''
,
''
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3' f'}'"
,
# can't concat to get a valid f-string
])
def
test_comments
(
self
):
# These aren't comments, since they're in strings.
d
=
{
'#'
:
'hash'
}
self
.
assertEqual
(
f'
{
"#"
}
'
,
'#'
)
self
.
assertEqual
(
f'
{
d
[
"#"
]
}
'
,
'hash'
)
self
.
assertAllRaise
(
SyntaxError
,
"f-string cannot include '#'"
,
[
"f'{1#}'"
,
# error because the expression becomes "(1#)"
"f'{3(#)}'"
,
])
def
test_many_expressions
(
self
):
# Create a string with many expressions in it. Note that
# because we have a space in here as a literal, we're actually
# going to use twice as many ast nodes: one for each literal
# plus one for each expression.
def
build_fstr
(
n
,
extra
=
''
):
return
"f'"
+
(
'{x} '
*
n
)
+
extra
+
"'"
x
=
'X'
width
=
1
# Test around 256.
for
i
in
range
(
250
,
260
):
self
.
assertEqual
(
cy_eval
(
build_fstr
(
i
),
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
i
)
# Test concatenating 2 largs fstrings.
self
.
assertEqual
(
cy_eval
(
build_fstr
(
255
)
*
3
,
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
(
255
*
3
))
# CPython uses 255*256
s
=
build_fstr
(
253
,
'{x:{width}} '
)
self
.
assertEqual
(
cy_eval
(
s
,
x
=
x
,
width
=
width
),
(
x
+
' '
)
*
254
)
# Test lots of expressions and constants, concatenated.
s
=
"f'{1}' 'x' 'y'"
*
1024
self
.
assertEqual
(
cy_eval
(
s
,
x
=
x
,
width
=
width
),
'1xy'
*
1024
)
def
test_format_specifier_expressions
(
self
):
width
=
10
precision
=
4
value
=
decimal
.
Decimal
(
'12.34567'
)
if
not
IS_PY26
:
self
.
assertEqual
(
f'result:
{
value
:
{
width
}
.
{
precision
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
width
!
r
}
.
{
precision
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
width
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
1
}{
0
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'result:
{
value
:
{
1
}{
0
:
0
}
.
{
precision
:
1
}}
'
,
'result: 12.35'
)
self
.
assertEqual
(
f'
{
10
:
#
{
1
}
0
x
}
'
,
' 0xa'
)
self
.
assertEqual
(
f'
{
10
:
{
"#"
}
1
{
0
}{
"x"
}}
'
,
' 0xa'
)
self
.
assertEqual
(
f'
{
-
10
:
-
{
"#"
}
1
{
0
}
x
}
'
,
' -0xa'
)
self
.
assertEqual
(
f'
{
-
10
:
{
"-"
}
#
{
1
}
0
{
"x"
}}
'
,
' -0xa'
)
# self.assertEqual(f'{10:#{3 != {4:5} and width}x}', ' 0xa')
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"""f'{"s"!r{":10"}}'"""
,
# This looks like a nested format spec.
])
self
.
assertAllRaise
(
SyntaxError
,
"invalid syntax"
,
[
# Invalid sytax inside a nested spec.
"f'{4:{/5}}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expressions nested too deeply"
,
[
# Can't nest format specifiers.
"f'result: {value:{width:{0}}.{precision:1}}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'f-string: invalid conversion character'
,
[
# No expansion inside conversion or for
# the : or ! itself.
"""f'{"s"!{"r"}}'"""
,
])
def
test_side_effect_order
(
self
):
class
X
:
def
__init__
(
self
):
self
.
i
=
0
def
__format__
(
self
,
spec
):
self
.
i
+=
1
return
str
(
self
.
i
)
x
=
X
()
self
.
assertEqual
(
f'
{
x
}
{
x
}
'
,
'1 2'
)
def
test_missing_expression
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'f-string: empty expression not allowed'
,
[
"f'{}'"
,
"f'{ }'"
"f' {} '"
,
"f'{!r}'"
,
"f'{ !r}'"
,
"f'{10:{ }}'"
,
"f' { } '"
,
r"f'{\n}'"
,
r"f'{\n \n}'"
,
# Catch the empty expression before the
# invalid conversion.
"f'{!x}'"
,
"f'{ !xr}'"
,
"f'{!x:}'"
,
"f'{!x:a}'"
,
"f'{ !xr:}'"
,
"f'{ !xr:a}'"
,
"f'{!}'"
,
"f'{:}'"
,
# We find the empty expression before the
# missing closing brace.
"f'{!'"
,
"f'{!s:'"
,
"f'{:'"
,
"f'{:x'"
,
])
def
test_parens_in_expressions
(
self
):
self
.
assertEqual
(
f'
{
3
,
}
'
,
'(3,)'
)
# Add these because when an expression is evaluated, parens
# are added around it. But we shouldn't go from an invalid
# expression to a valid one. The added parens are just
# supposed to allow whitespace (including newlines).
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
"f'{,}'"
,
"f'{,}'"
,
# this is (,), which is an error
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3)+(4}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'EOL while scanning string literal'
,
[
"f'{
\
n
}'"
,
])
def
test_newlines_in_expressions
(
self
):
self
.
assertEqual
(
f'
{
0
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
0
\
n
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
\
n
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
r0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
\
r
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
\
n0
}
'
,
'0'
)
self
.
assertEqual
(
f'
{
3
+
\
n4
}
'
,
'7'
)
self
.
assertEqual
(
f'
{
3
+
\\\
n4
}
'
,
'7'
)
self
.
assertEqual
(
rf'''
{
3
+
4
}
'''
,
'7'
)
self
.
assertEqual
(
f'''
{
3
+
\
4
}
'''
,
'7'
)
self
.
assertAllRaise
(
SyntaxError
,
'f-string: empty expression not allowed'
,
[
r"f'{\n}'"
,
])
def
test_lambda
(
self
):
x
=
5
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
)
!
r
}
'
,
"'88888'"
)
if
not
IS_PY2
:
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
)
!
r
:
10
}
'
,
"'88888' "
)
self
.
assertEqual
(
f'
{
(
lambda
y
:
x
*
y
)(
"8"
):
10
}
'
,
"88888 "
)
# lambda doesn't work without parens, because the colon
# makes the parser think it's a format_spec
self
.
assertAllRaise
(
SyntaxError
,
'unexpected EOF while parsing'
,
[
"f'{lambda x:x}'"
,
])
def
test_yield
(
self
):
# Not terribly useful, but make sure the yield turns
# a function into a generator
def
fn
(
y
):
f'y:
{
yield
y
*
2
}
'
g
=
fn
(
4
)
self
.
assertEqual
(
next
(
g
),
8
)
def
test_yield_send
(
self
):
def
fn
(
x
):
yield
f'x:
{
yield
(
lambda
i
:
x
*
i
)
}
'
g
=
fn
(
10
)
the_lambda
=
next
(
g
)
self
.
assertEqual
(
the_lambda
(
4
),
40
)
self
.
assertEqual
(
g
.
send
(
'string'
),
'x:string'
)
def
test_expressions_with_triple_quoted_strings
(
self
):
self
.
assertEqual
(
f"
{
'''x'''
}
"
,
'x'
)
self
.
assertEqual
(
f"
{
'''eric's'''
}
"
,
"eric's"
)
self
.
assertEqual
(
f'
{
"""eric
\
'
s"""
}
'
,
"eric's"
)
self
.
assertEqual
(
f"
{
'''eric
\
"
s'''
}
"
,
'eric"s'
)
self
.
assertEqual
(
f'
{
"""eric"s"""
}
'
,
'eric"s'
)
# Test concatenation within an expression
self
.
assertEqual
(
f'
{
"x"
"""eric"s"""
"y"
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
"x"
"""eric"s"""
}
'
,
'xeric"s'
)
self
.
assertEqual
(
f'
{
"""eric"s"""
"y"
}
'
,
'eric"sy'
)
self
.
assertEqual
(
f'
{
"""x"""
"""eric"s"""
"y"
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
"""x"""
"""eric"s"""
"""y"""
}
'
,
'xeric"sy'
)
self
.
assertEqual
(
f'
{
r"""x"""
"""eric"s"""
"""y"""
}
'
,
'xeric"sy'
)
def
test_multiple_vars
(
self
):
x
=
98
y
=
'abc'
self
.
assertEqual
(
f'
{
x
}{
y
}
'
,
'98abc'
)
self
.
assertEqual
(
f'X
{
x
}{
y
}
'
,
'X98abc'
)
self
.
assertEqual
(
f'
{
x
}
X
{
y
}
'
,
'98Xabc'
)
self
.
assertEqual
(
f'
{
x
}{
y
}
X'
,
'98abcX'
)
self
.
assertEqual
(
f'X
{
x
}
Y
{
y
}
'
,
'X98Yabc'
)
self
.
assertEqual
(
f'X
{
x
}{
y
}
Y'
,
'X98abcY'
)
self
.
assertEqual
(
f'
{
x
}
X
{
y
}
Y'
,
'98XabcY'
)
self
.
assertEqual
(
f'X
{
x
}
Y
{
y
}
Z'
,
'X98YabcZ'
)
def
test_closure
(
self
):
def
outer
(
x
):
def
inner
():
return
f'x:
{
x
}
'
return
inner
self
.
assertEqual
(
outer
(
'987'
)(),
'x:987'
)
self
.
assertEqual
(
outer
(
7
)(),
'x:7'
)
def
test_arguments
(
self
):
y
=
2
def
f
(
x
,
width
):
return
f'x=
{
x
*
y
:
{
width
}}
'
self
.
assertEqual
(
f
(
'foo'
,
10
),
'x=foofoo '
)
x
=
'bar'
self
.
assertEqual
(
f
(
10
,
10
),
'x= 20'
)
def
test_locals
(
self
):
value
=
123
self
.
assertEqual
(
f'v:
{
value
}
'
,
'v:123'
)
def
test_missing_variable
(
self
):
with
self
.
assertRaises
(
NameError
):
f'v:
{
value
}
'
def
test_missing_format_spec
(
self
):
class
O
:
def
__format__
(
self
,
spec
):
if
not
spec
:
return
'*'
return
spec
self
.
assertEqual
(
f'
{
O
():
x
}
'
,
'x'
)
self
.
assertEqual
(
f'
{
O
()
}
'
,
'*'
)
self
.
assertEqual
(
f'
{
O
():
}
'
,
'*'
)
self
.
assertEqual
(
f'
{
3
:
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
!
s
:
}
'
,
'3'
)
def
test_global
(
self
):
self
.
assertEqual
(
f'g:
{
a_global
}
'
,
'g:global variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'global variable'"
)
a_local
=
'local variable'
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
}
'
,
'g:global variable l:local variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'global variable'"
)
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
!
r
}
'
,
"g:global variable l:'local variable'"
)
self
.
assertIn
(
"module 'unittest' from"
,
f'
{
unittest
}
'
)
def
test_shadowed_global
(
self
):
a_global
=
'really a local'
self
.
assertEqual
(
f'g:
{
a_global
}
'
,
'g:really a local'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'really a local'"
)
a_local
=
'local variable'
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
}
'
,
'g:really a local l:local variable'
)
self
.
assertEqual
(
f'g:
{
a_global
!
r
}
'
,
"g:'really a local'"
)
self
.
assertEqual
(
f'g:
{
a_global
}
l:
{
a_local
!
r
}
'
,
"g:really a local l:'local variable'"
)
def
test_call
(
self
):
def
foo
(
x
):
return
'x='
+
str
(
x
)
self
.
assertEqual
(
f'
{
foo
(
10
)
}
'
,
'x=10'
)
def
test_nested_fstrings
(
self
):
y
=
5
self
.
assertEqual
(
f'
{
f"
{
0
}
"*3
}
'
,
'000'
)
self
.
assertEqual
(
f'
{
f"
{
y
}
"*3
}
'
,
'555'
)
self
.
assertEqual
(
f'
{
f"
{
\
'x
\
'
}
"*3
}
'
,
'xxx'
)
self
.
assertEqual
(
f"
{
r'x'
f'
{
\
"s
\
"
}
'
}
"
,
'xs'
)
self
.
assertEqual
(
f"
{
r'x'rf'
{
\
"s
\
"
}
'
}
"
,
'xs'
)
def
test_invalid_string_prefixes
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'unexpected EOF while parsing'
,
[
"fu''"
,
"uf''"
,
"Fu''"
,
"fU''"
,
"Uf''"
,
"uF''"
,
"ufr''"
,
"urf''"
,
"fur''"
,
"fru''"
,
"rfu''"
,
"ruf''"
,
"FUR''"
,
"Fur''"
,
])
def
test_leading_trailing_spaces
(
self
):
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
\
t3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
\
t
\
t3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
\
t
}
'
,
'3'
)
self
.
assertEqual
(
f'
{
3
\
t
\
t
}
'
,
'3'
)
self
.
assertEqual
(
f'expr=
{
{
x
:
y
for
x
,
y
in
[(
1
,
2
),
]
}}
'
,
'expr={1: 2}'
)
self
.
assertEqual
(
f'expr=
{
{
x
:
y
for
x
,
y
in
[(
1
,
2
),
]
}
}
'
,
'expr={1: 2}'
)
def
test_character_name
(
self
):
self
.
assertEqual
(
f'
{
4
}\
N{GREEK CAPITAL LETTER DELTA}
{
3
}
'
,
'4
\
N{GREEK CAPITAL LETTER DELTA}
3'
)
self
.
assertEqual
(
f'{{}}
\
N{GREEK CAPITAL LETTER DELTA}
{
3
}
'
,
'{}
\
N{GREEK CAPITAL LETTER DELTA}
3'
)
def
test_not_equal
(
self
):
# There's a special test for this because there's a special
# case in the f-string parser to look for != as not ending an
# expression. Normally it would, while looking for !s or !r.
self
.
assertEqual
(
f'
{
3
!=
4
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
:
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
!
s
}
'
,
'True'
)
self
.
assertEqual
(
f'
{
3
!=
4
!
s
:.
3
}
'
,
'Tru'
)
def
test_conversions
(
self
):
self
.
assertEqual
(
f'
{
3.14
:
10.10
}
'
,
' 3.14'
)
if
not
IS_PY26
:
self
.
assertEqual
(
f'
{
3.14
!
s
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
3.14
!
r
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
3.14
!
a
:
10.10
}
'
,
'3.14 '
)
self
.
assertEqual
(
f'
{
"a"
}
'
,
'a'
)
self
.
assertEqual
(
f'
{
"a"
!
r
}
'
,
"'a'"
)
self
.
assertEqual
(
f'
{
"a"
!
a
}
'
,
"'a'"
)
# Not a conversion.
self
.
assertEqual
(
f'
{
"a!r"
}
'
,
"a!r"
)
# Not a conversion, but show that ! is allowed in a format spec.
self
.
assertEqual
(
f'
{
3.14
:
!
<
10.10
}
'
,
'3.14!!!!!!'
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"
}
'
,
'
\
u0394
'
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"!r
}
'
,
"'
\
u0394
'"
)
self
.
assertEqual
(
f'
{
"
\
N
{
GREEK
CAPITAL
LETTER
DELTA
}
"!a
}
'
,
"'
\
\
u0394'"
)
self
.
assertAllRaise
(
SyntaxError
,
'f-string: invalid conversion character'
,
[
"f'{3!g}'"
,
"f'{3!A}'"
,
"f'{3!A}'"
,
"f'{3!A}'"
,
"f'{3!!}'"
,
"f'{3!:}'"
,
"f'{3!
\
N{GREEK CAPITAL LETTER DELTA}
}'"
,
"f'{3! s}'"
,
# no space before conversion char
"f'{x!
\
\
x00:.<10}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{x!s{y}}'"
,
"f'{3!ss}'"
,
"f'{3!ss:}'"
,
"f'{3!ss:s}'"
,
])
def
test_assignment
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
"f'' = 3"
,
"f'{0}' = x"
,
"f'{x}' = x"
,
])
def
test_del
(
self
):
self
.
assertAllRaise
(
CompileError
,
'invalid syntax'
,
# CPython raises SyntaxError
[
"del f''"
,
"del '' f''"
,
])
def
test_mismatched_braces
(
self
):
self
.
assertAllRaise
(
SyntaxError
,
"f-string: single '}' is not allowed"
,
[
"f'{{}'"
,
"f'{{}}}'"
,
"f'}'"
,
"f'x}'"
,
"f'x}x'"
,
# Can't have { or } in a format spec.
"f'{3:}>10}'"
,
r"f'{3:\\}>10}'"
,
"f'{3:}}>10}'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
"f-string: expecting '}'"
,
[
"f'{3:{{>10}'"
,
"f'{3'"
,
"f'{3!'"
,
"f'{3:'"
,
"f'{3!s'"
,
"f'{3!s:'"
,
"f'{3!s:3'"
,
"f'x{'"
,
"f'x{x'"
,
"f'{3:s'"
,
"f'{{{'"
,
"f'{{}}{'"
,
"f'{'"
,
])
self
.
assertAllRaise
(
SyntaxError
,
'invalid syntax'
,
[
r"f'{3:\\{>10}'"
,
])
# But these are just normal strings.
self
.
assertEqual
(
f'
{
"
{
"
}
', '
{
')
self.assertEqual(f'
{
"
}
"
}
', '
}
'
)
self
.
assertEqual
(
f'
{
3
:
{
"
}
"
}
>10}'
,
'}}}}}}}}}3'
)
self
.
assertEqual
(
f'
{
2
:
{
"
{
"
}
>
10
}
', '
{{{{{{{{{
2
')
def test_if_conditional(self):
# There'
s
special
logic
in
compile
.
c
to
test
if
the
# conditional for an if (and while) are constants. Exercise
# that code.
def
test_fstring
(
x
,
expected
):
flag
=
0
if
f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_empty(x, expected):
flag = 0
if '' f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
def test_concat_non_empty(x, expected):
flag = 0
if '
' f'
{
x
}
':
flag = 1
else:
flag = 2
self.assertEqual(flag, expected)
test_fstring('', 2)
test_fstring('
', 1)
test_concat_empty('', 2)
test_concat_empty('
', 1)
test_concat_non_empty('', 1)
test_concat_non_empty('
', 1)
def test_empty_format_specifier(self):
x = '
test
'
self.assertEqual(f'
{
x
}
', '
test
')
self.assertEqual(f'
{
x
:
}
', '
test
')
self.assertEqual(f'
{
x
!
s
:
}
', '
test
')
self.assertEqual(f'
{
x
!
r
:
}
', "'
test
'")
def test_str_format_differences(self):
d =
{
'a'
:
'string'
,
0
:
'integer'
,
}
a
=
0
self
.
assertEqual
(
f'
{
d
[
0
]
}
', '
integer
')
self.assertEqual(f'
{
d
[
"a"
]
}
', '
string
')
self.assertEqual(f'
{
d
[
a
]
}
', '
integer
')
self.assertEqual('
{
d
[
a
]
}
'.format(d=d), '
string
')
self.assertEqual('
{
d
[
0
]
}
'.format(d=d), '
integer
')
def test_invalid_expressions(self):
self.assertAllRaise(SyntaxError, '
invalid
syntax
',
[r"f'
{
a
[
4
)
}
'",
r"f'
{
a
(
4
]
}
'",
])
def test_errors(self):
# see issue 26287
self.assertAllRaise((TypeError, ValueError), '
non
-
empty
', # TypeError in Py3.4+
[r"f'
{
(
lambda
:
0
):
x
}
'",
r"f'
{
(
0
,):
x
}
'",
])
self.assertAllRaise(ValueError, '
Unknown
format
code
',
[r"f'
{
1000
:
j
}
'",
r"f'
{
1000
:
j
}
'",
])
def test_loop(self):
for i in range(1000):
self.assertEqual(f'
i
:
{
i
}
', '
i
:
' + str(i))
def test_dict(self):
d =
{
'"'
:
'dquote'
,
"'"
:
'squote'
,
'foo'
:
'bar'
,
}
self
.
assertEqual
(
f'
{
d
[
"
\
'
"
]
}
', '
squote
')
self.assertEqual(f"
{
d
[
'
\
"
'
]
}
", 'dquote')
self.assertEqual(f'''
{
d
[
"'"
]
}
''', 'squote')
self.assertEqual(f"""
{
d
[
'"'
]
}
""", 'dquote')
self.assertEqual(f'
{
d
[
"foo"
]
}
', '
bar
')
self.assertEqual(f"
{
d
[
'foo'
]
}
", 'bar')
self.assertEqual(f'
{
d
[
\
'foo
\
'
]
}
', '
bar
')
self.assertEqual(f"
{
d
[
\
"foo
\
"
]
}
", 'bar')
def test_escaped_quotes(self):
d =
{
'"'
:
'a'
,
"'"
:
'b'
}
self
.
assertEqual
(
fr"
{
d
[
'
\
"
'
]
}
", 'a')
self.assertEqual(fr'
{
d
[
"
\
'
"
]
}
', 'b')
self.assertEqual(fr"
{
'
\
"
'
}
", '"')
self.assertEqual(fr'
{
"
\
'
"
}
', "'")
self.assertEqual(f'
{
"
\
\
"
3
"
}
', '"3')
self.assertAllRaise(SyntaxError, 'f-string: unterminated string',
[r'''f'
{
"""
\
\
}
' ''', # Backslash at end of expression
])
self.assertAllRaise(SyntaxError, '
unexpected
character
after
line
continuation
',
[r"rf'
{
3
\
}
'",
])
if __name__ == '
__main__
':
unittest.main()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment