Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
6f6e1b10
Commit
6f6e1b10
authored
Mar 21, 2016
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'f_string' into f_strings_2
parents
dec1d449
c6a07689
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
320 additions
and
29 deletions
+320
-29
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+109
-0
Cython/Compiler/Lexicon.py
Cython/Compiler/Lexicon.py
+3
-3
Cython/Compiler/ModuleNode.py
Cython/Compiler/ModuleNode.py
+3
-0
Cython/Compiler/Naming.py
Cython/Compiler/Naming.py
+1
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+204
-26
No files found.
Cython/Compiler/ExprNodes.py
View file @
6f6e1b10
...
@@ -2954,6 +2954,115 @@ class RawCNameExprNode(ExprNode):
...
@@ -2954,6 +2954,115 @@ class RawCNameExprNode(ExprNode):
pass
pass
#-------------------------------------------------------------------
#
# F-strings
#
#-------------------------------------------------------------------
class
JoinedStrNode
(
ExprNode
):
# F-strings
#
# values [UnicodeNode|FormattedValueNode] Substrings of the f-string
#
type
=
py_object_type
subexprs
=
[
'values'
]
def
analyse_types
(
self
,
env
):
self
.
values
=
[
v
.
analyse_types
(
env
)
for
v
in
self
.
values
]
self
.
values
=
[
v
.
coerce_to_pyobject
(
env
)
for
v
in
self
.
values
]
self
.
is_temp
=
1
if
len
(
self
.
values
)
==
1
:
# this is not uncommon because f-string format specs are parsed into JoinedStrNodes
return
self
.
values
[
0
]
else
:
return
self
def
generate_result_code
(
self
,
code
):
list_var
=
Naming
.
quick_temp_cname
num_items
=
len
(
self
.
values
)
code
.
putln
(
'{'
)
code
.
putln
(
'PyObject *%s = PyList_New(%s); %s'
%
(
list_var
,
num_items
,
code
.
error_goto_if_null
(
list_var
,
self
.
pos
)))
code
.
put_gotref
(
list_var
)
for
i
,
value
in
enumerate
(
self
.
values
):
code
.
put_incref
(
value
.
result
(),
value
.
ctype
())
code
.
put_giveref
(
value
.
py_result
())
code
.
putln
(
'PyList_SET_ITEM(%s, %s, %s);'
%
(
list_var
,
i
,
value
.
py_result
()))
code
.
putln
(
'%s = PyUnicode_Join(%s, %s); __Pyx_DECREF(%s); %s'
%
(
self
.
result
(),
Naming
.
empty_unicode
,
list_var
,
list_var
,
code
.
error_goto_if_null
(
list_var
,
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
code
.
putln
(
'}'
)
class
FormattedValueNode
(
ExprNode
):
# {}-delimited portions of an f-string
#
# value ExprNode The expression itself
# conversion_char str or None Type conversion (!s, !r, !a, or none)
# format_spec JoinedStrNode or None Format string passed to __format__
subexprs
=
[
'value'
,
'format_spec'
]
conversion_chars
=
'sra'
type
=
py_object_type
def
analyse_types
(
self
,
env
):
value
=
self
.
value
.
analyse_types
(
env
)
format_spec
=
self
.
format_spec
.
analyse_types
(
env
)
self
.
value
=
value
.
coerce_to_pyobject
(
env
)
self
.
format_spec
=
format_spec
.
coerce_to_pyobject
(
env
)
self
.
is_temp
=
True
return
self
def
generate_result_code
(
self
,
code
):
value_result
=
self
.
value
.
py_result
()
conversion_result
=
Naming
.
quick_temp_cname
format_spec_result
=
self
.
format_spec
.
py_result
()
if
self
.
conversion_char
==
's'
:
fn
=
'PyObject_Str'
elif
self
.
conversion_char
==
'r'
:
fn
=
'PyObject_Repr'
elif
self
.
conversion_char
==
'a'
:
fn
=
'PyObject_ASCII'
else
:
fn
=
None
code
.
putln
(
'{'
)
if
fn
is
not
None
:
code
.
putln
(
'PyObject *%s = %s(%s); %s'
%
(
conversion_result
,
fn
,
value_result
,
code
.
error_goto_if_null
(
conversion_result
,
self
.
pos
)
))
else
:
code
.
putln
(
'PyObject *%s = %s;'
%
(
conversion_result
,
value_result
))
#code.put_incref(conversion_result, py_object_type)
# TODO this should need more refcounting, figure out whether this is correct
#code.put_gotref(conversion_result)
#code.put_decref(value_result, self.value.ctype())
decref_line
=
''
# '__Pyx_DECREF(%s);' % conversion_result
code
.
putln
(
"%s = PyObject_Format(%s, %s); %s %s"
%
(
self
.
result
(),
conversion_result
,
format_spec_result
,
decref_line
,
code
.
error_goto_if_null
(
self
.
result
(),
self
.
pos
)))
code
.
put_gotref
(
self
.
py_result
())
code
.
putln
(
'}'
)
#-------------------------------------------------------------------
#-------------------------------------------------------------------
#
#
# Parallel nodes (cython.parallel.thread(savailable|id))
# Parallel nodes (cython.parallel.thread(savailable|id))
...
...
Cython/Compiler/Lexicon.py
View file @
6f6e1b10
...
@@ -7,7 +7,7 @@ from __future__ import absolute_import
...
@@ -7,7 +7,7 @@ from __future__ import absolute_import
raw_prefixes
=
"rR"
raw_prefixes
=
"rR"
bytes_prefixes
=
"bB"
bytes_prefixes
=
"bB"
string_prefixes
=
"uU"
+
bytes_prefixes
string_prefixes
=
"
fF
uU"
+
bytes_prefixes
char_prefixes
=
"cC"
char_prefixes
=
"cC"
any_string_prefix
=
raw_prefixes
+
string_prefixes
+
char_prefixes
any_string_prefix
=
raw_prefixes
+
string_prefixes
+
char_prefixes
IDENT
=
'IDENT'
IDENT
=
'IDENT'
...
@@ -40,8 +40,8 @@ def make_lexicon():
...
@@ -40,8 +40,8 @@ def make_lexicon():
fltconst
=
(
decimal_fract
+
Opt
(
exponent
))
|
(
decimal
+
exponent
)
fltconst
=
(
decimal_fract
+
Opt
(
exponent
))
|
(
decimal
+
exponent
)
imagconst
=
(
intconst
|
fltconst
)
+
Any
(
"jJ"
)
imagconst
=
(
intconst
|
fltconst
)
+
Any
(
"jJ"
)
beginstring
=
Opt
(
Any
(
string_prefixes
)
+
Opt
(
Any
(
raw_prefixes
))
|
# invalid combinations of prefixes are caught in p_string_literal
Any
(
raw_prefixes
)
+
Opt
(
Any
(
bytes
_prefixes
))
|
beginstring
=
Opt
(
Rep
(
Any
(
string_prefixes
+
raw
_prefixes
))
|
Any
(
char_prefixes
)
Any
(
char_prefixes
)
)
+
(
Str
(
"'"
)
|
Str
(
'"'
)
|
Str
(
"'''"
)
|
Str
(
'"""'
))
)
+
(
Str
(
"'"
)
|
Str
(
'"'
)
|
Str
(
"'''"
)
|
Str
(
'"""'
))
two_oct
=
octdigit
+
octdigit
two_oct
=
octdigit
+
octdigit
...
...
Cython/Compiler/ModuleNode.py
View file @
6f6e1b10
...
@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
...
@@ -695,6 +695,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
builtins_cname
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
builtins_cname
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_tuple
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_tuple
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_bytes
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_bytes
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
empty_unicode
)
if
Options
.
pre_import
is
not
None
:
if
Options
.
pre_import
is
not
None
:
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
preimport_cname
)
code
.
putln
(
'static PyObject *%s;'
%
Naming
.
preimport_cname
)
code
.
putln
(
'static int %s;'
%
Naming
.
lineno_cname
)
code
.
putln
(
'static int %s;'
%
Naming
.
lineno_cname
)
...
@@ -2123,6 +2124,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
...
@@ -2123,6 +2124,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
Naming
.
empty_tuple
,
code
.
error_goto_if_null
(
Naming
.
empty_tuple
,
self
.
pos
)))
Naming
.
empty_tuple
,
code
.
error_goto_if_null
(
Naming
.
empty_tuple
,
self
.
pos
)))
code
.
putln
(
"%s = PyBytes_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
code
.
putln
(
"%s = PyBytes_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_bytes
,
code
.
error_goto_if_null
(
Naming
.
empty_bytes
,
self
.
pos
)))
Naming
.
empty_bytes
,
code
.
error_goto_if_null
(
Naming
.
empty_bytes
,
self
.
pos
)))
code
.
putln
(
"%s = PyUnicode_FromStringAndSize(
\
"
\
"
, 0); %s"
%
(
Naming
.
empty_unicode
,
code
.
error_goto_if_null
(
Naming
.
empty_unicode
,
self
.
pos
)))
for
ext_type
in
(
'CyFunction'
,
'FusedFunction'
,
'Coroutine'
,
'Generator'
,
'StopAsyncIteration'
):
for
ext_type
in
(
'CyFunction'
,
'FusedFunction'
,
'Coroutine'
,
'Generator'
,
'StopAsyncIteration'
):
code
.
putln
(
"#ifdef __Pyx_%s_USED"
%
ext_type
)
code
.
putln
(
"#ifdef __Pyx_%s_USED"
%
ext_type
)
...
...
Cython/Compiler/Naming.py
View file @
6f6e1b10
...
@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state"
...
@@ -96,6 +96,7 @@ gilstate_cname = pyrex_prefix + "state"
skip_dispatch_cname
=
pyrex_prefix
+
"skip_dispatch"
skip_dispatch_cname
=
pyrex_prefix
+
"skip_dispatch"
empty_tuple
=
pyrex_prefix
+
"empty_tuple"
empty_tuple
=
pyrex_prefix
+
"empty_tuple"
empty_bytes
=
pyrex_prefix
+
"empty_bytes"
empty_bytes
=
pyrex_prefix
+
"empty_bytes"
empty_unicode
=
pyrex_prefix
+
"empty_unicode"
print_function
=
pyrex_prefix
+
"print"
print_function
=
pyrex_prefix
+
"print"
print_function_kwargs
=
pyrex_prefix
+
"print_kwargs"
print_function_kwargs
=
pyrex_prefix
+
"print_kwargs"
cleanup_cname
=
pyrex_prefix
+
"module_cleanup"
cleanup_cname
=
pyrex_prefix
+
"module_cleanup"
...
...
Cython/Compiler/Parsing.py
View file @
6f6e1b10
...
@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
...
@@ -15,12 +15,13 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
re
=
object
,
_unicode
=
object
,
_bytes
=
object
,
re
=
object
,
_unicode
=
object
,
_bytes
=
object
,
partial
=
object
,
reduce
=
object
,
_IS_PY3
=
cython
.
bint
)
partial
=
object
,
reduce
=
object
,
_IS_PY3
=
cython
.
bint
)
from
io
import
StringIO
import
re
import
re
import
sys
import
sys
from
unicodedata
import
lookup
as
lookup_unicodechar
from
unicodedata
import
lookup
as
lookup_unicodechar
from
functools
import
partial
,
reduce
from
functools
import
partial
,
reduce
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
from
.Scanning
import
PyrexScanner
,
FileSourceDescriptor
,
StringSourceDescriptor
from
.
import
Nodes
from
.
import
Nodes
from
.
import
ExprNodes
from
.
import
ExprNodes
from
.
import
Builtin
from
.
import
Builtin
...
@@ -693,8 +694,12 @@ def p_atom(s):
...
@@ -693,8 +694,12 @@ def p_atom(s):
return
ExprNodes
.
UnicodeNode
(
pos
,
value
=
unicode_value
,
bytes_value
=
bytes_value
)
return
ExprNodes
.
UnicodeNode
(
pos
,
value
=
unicode_value
,
bytes_value
=
bytes_value
)
elif
kind
==
'b'
:
elif
kind
==
'b'
:
return
ExprNodes
.
BytesNode
(
pos
,
value
=
bytes_value
)
return
ExprNodes
.
BytesNode
(
pos
,
value
=
bytes_value
)
else
:
elif
kind
==
'f'
:
return
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
unicode_value
)
elif
kind
==
''
:
return
ExprNodes
.
StringNode
(
pos
,
value
=
bytes_value
,
unicode_value
=
unicode_value
)
return
ExprNodes
.
StringNode
(
pos
,
value
=
bytes_value
,
unicode_value
=
unicode_value
)
else
:
s
.
error
(
"invalid string kind '%s'"
%
kind
)
elif
sy
==
'IDENT'
:
elif
sy
==
'IDENT'
:
name
=
s
.
systring
name
=
s
.
systring
s
.
next
()
s
.
next
()
...
@@ -788,29 +793,44 @@ def wrap_compile_time_constant(pos, value):
...
@@ -788,29 +793,44 @@ def wrap_compile_time_constant(pos, value):
def
p_cat_string_literal
(
s
):
def
p_cat_string_literal
(
s
):
# A sequence of one or more adjacent string literals.
# A sequence of one or more adjacent string literals.
# Returns (kind, bytes_value, unicode_value)
# Returns (kind, bytes_value, unicode_value)
# where kind in ('b', 'c', 'u', '')
# where kind in ('b', 'c', 'u', 'f', '')
pos
=
s
.
position
()
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
)
kind
,
bytes_value
,
unicode_value
=
p_string_literal
(
s
)
if
kind
==
'c'
or
s
.
sy
!=
'BEGIN_STRING'
:
if
kind
==
'c'
or
s
.
sy
!=
'BEGIN_STRING'
:
return
kind
,
bytes_value
,
unicode_value
return
kind
,
bytes_value
,
unicode_value
bstrings
,
ustrings
=
[
bytes_value
],
[
unicode_value
]
bstrings
,
ustrings
,
positions
=
[
bytes_value
],
[
unicode_value
],
[
pos
]
bytes_value
=
unicode_value
=
None
bytes_value
=
unicode_value
=
None
while
s
.
sy
==
'BEGIN_STRING'
:
while
s
.
sy
==
'BEGIN_STRING'
:
pos
=
s
.
position
()
pos
=
s
.
position
()
next_kind
,
next_bytes_value
,
next_unicode_value
=
p_string_literal
(
s
)
next_kind
,
next_bytes_value
,
next_unicode_value
=
p_string_literal
(
s
)
if
next_kind
==
'c'
:
if
next_kind
==
'c'
:
error
(
pos
,
"Cannot concatenate char literal with another string or char literal"
)
error
(
pos
,
"Cannot concatenate char literal with another string or char literal"
)
continue
elif
next_kind
!=
kind
:
elif
next_kind
!=
kind
:
error
(
pos
,
"Cannot mix string literals of different types, expected %s'', got %s''"
%
# concatenating f strings and normal strings is allowed and leads to an f string
(
kind
,
next_kind
))
if
{
kind
,
next_kind
}
==
{
'f'
,
'u'
}
or
{
kind
,
next_kind
}
==
{
'f'
,
''
}:
else
:
kind
=
'f'
bstrings
.
append
(
next_bytes_value
)
else
:
ustrings
.
append
(
next_unicode_value
)
error
(
pos
,
"Cannot mix string literals of different types, expected %s'', got %s''"
%
(
kind
,
next_kind
))
continue
bstrings
.
append
(
next_bytes_value
)
ustrings
.
append
(
next_unicode_value
)
positions
.
append
(
pos
)
# join and rewrap the partial literals
# join and rewrap the partial literals
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
None
not
in
bstrings
:
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
None
not
in
bstrings
:
# Py3 enforced unicode literals are parsed as bytes/unicode combination
# Py3 enforced unicode literals are parsed as bytes/unicode combination
bytes_value
=
bytes_literal
(
StringEncoding
.
join_bytes
(
bstrings
),
s
.
source_encoding
)
bytes_value
=
bytes_literal
(
StringEncoding
.
join_bytes
(
bstrings
),
s
.
source_encoding
)
if
kind
in
(
'u'
,
''
):
if
kind
in
(
'u'
,
''
):
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
])
)
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
])
)
if
kind
==
'f'
:
unicode_value
=
[]
for
u
,
pos
in
zip
(
ustrings
,
positions
):
if
isinstance
(
u
,
list
):
unicode_value
+=
u
else
:
# non-f-string concatenated into the f-string
unicode_value
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
EncodedString
(
u
)))
return
kind
,
bytes_value
,
unicode_value
return
kind
,
bytes_value
,
unicode_value
def
p_opt_string_literal
(
s
,
required_type
=
'u'
):
def
p_opt_string_literal
(
s
,
required_type
=
'u'
):
...
@@ -833,36 +853,52 @@ def check_for_non_ascii_characters(string):
...
@@ -833,36 +853,52 @@ def check_for_non_ascii_characters(string):
def
p_string_literal
(
s
,
kind_override
=
None
):
def
p_string_literal
(
s
,
kind_override
=
None
):
# A single string or char literal. Returns (kind, bvalue, uvalue)
# A single string or char literal. Returns (kind, bvalue, uvalue)
# where kind in ('b', 'c', 'u', ''). The 'bvalue' is the source
# where kind in ('b', 'c', 'u', '
f', '
'). The 'bvalue' is the source
# code byte sequence of the string literal, 'uvalue' is the
# code byte sequence of the string literal, 'uvalue' is the
# decoded Unicode string. Either of the two may be None depending
# decoded Unicode string. Either of the two may be None depending
# on the 'kind' of string, only unprefixed strings have both
# on the 'kind' of string, only unprefixed strings have both
# representations.
# representations. In f-strings, the uvalue is a list of the Unicode
# strings and f-string expressions that make up the f-string.
# s.sy == 'BEGIN_STRING'
# s.sy == 'BEGIN_STRING'
pos
=
s
.
position
()
pos
=
s
.
position
()
is_raw
=
False
is_raw
=
False
is_python3_source
=
s
.
context
.
language_level
>=
3
is_python3_source
=
s
.
context
.
language_level
>=
3
has_non_ascii_literal_characters
=
False
has_non_ascii_literal_characters
=
False
kind
=
s
.
systring
[:
1
].
lower
()
kind_string
=
s
.
systring
.
rstrip
(
'"
\
'
'
).
lower
()
if
kind
==
'r'
:
if
len
(
set
(
kind_string
))
!=
len
(
kind_string
):
# Py3 allows both 'br' and 'rb' as prefix
s
.
error
(
'Duplicate string prefix character'
)
if
s
.
systring
[
1
:
2
].
lower
()
==
'b'
:
if
'b'
in
kind_string
and
'u'
in
kind_string
:
kind
=
'b'
s
.
error
(
'String prefixes b and u cannot be combined'
)
else
:
if
'b'
in
kind_string
and
'f'
in
kind_string
:
kind
=
''
s
.
error
(
'String prefixes b and f cannot be combined'
)
is_raw
=
True
if
'u'
in
kind_string
and
'f'
in
kind_string
:
elif
kind
in
'ub'
:
s
.
error
(
'String prefixes u and f cannot be combined'
)
is_raw
=
s
.
systring
[
1
:
2
].
lower
()
==
'r'
elif
kind
!=
'c'
:
is_raw
=
'r'
in
kind_string
if
'c'
in
kind_string
:
# this should never happen, since the lexer does not allow combining c
# with other prefix characters
if
len
(
kind_string
)
!=
1
:
s
.
error
(
'Invalid string prefix for character literal'
)
kind
=
'c'
elif
'f'
in
kind_string
:
kind
=
'f'
# u is ignored
elif
'b'
in
kind_string
:
kind
=
'b'
elif
'u'
in
kind_string
:
kind
=
'u'
else
:
kind
=
''
kind
=
''
if
kind
==
''
and
kind_override
is
None
and
Future
.
unicode_literals
in
s
.
context
.
future_directives
:
if
kind
==
''
and
kind_override
is
None
and
Future
.
unicode_literals
in
s
.
context
.
future_directives
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
kind
=
'u'
kind
=
'u'
else
:
else
:
if
kind_override
is
not
None
and
kind_override
in
'ub'
:
if
kind_override
is
not
None
and
kind_override
in
'ub'
:
kind
=
kind_override
kind
=
kind_override
if
kind
==
'u'
:
if
kind
in
{
'u'
,
'f'
}:
# f-strings are scanned exactly like Unicode literals, but are parsed further later
chars
=
StringEncoding
.
UnicodeLiteralBuilder
()
chars
=
StringEncoding
.
UnicodeLiteralBuilder
()
elif
kind
==
''
:
elif
kind
==
''
:
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
chars
=
StringEncoding
.
StrLiteralBuilder
(
s
.
source_encoding
)
...
@@ -873,7 +909,7 @@ def p_string_literal(s, kind_override=None):
...
@@ -873,7 +909,7 @@ def p_string_literal(s, kind_override=None):
s
.
next
()
s
.
next
()
sy
=
s
.
sy
sy
=
s
.
sy
systr
=
s
.
systring
systr
=
s
.
systring
#print "p_string_literal: sy =", sy, repr(s.systring) ###
#
print "p_string_literal: sy =", sy, repr(s.systring) ###
if
sy
==
'CHARS'
:
if
sy
==
'CHARS'
:
chars
.
append
(
systr
)
chars
.
append
(
systr
)
if
is_python3_source
and
not
has_non_ascii_literal_characters
and
check_for_non_ascii_characters
(
systr
):
if
is_python3_source
and
not
has_non_ascii_literal_characters
and
check_for_non_ascii_characters
(
systr
):
...
@@ -901,7 +937,7 @@ def p_string_literal(s, kind_override=None):
...
@@ -901,7 +937,7 @@ def p_string_literal(s, kind_override=None):
else
:
else
:
s
.
error
(
"Invalid hex escape '%s'"
%
systr
,
s
.
error
(
"Invalid hex escape '%s'"
%
systr
,
fatal
=
False
)
fatal
=
False
)
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
''
):
# \uxxxx, \Uxxxxxxxx, \N{...}
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
'
f'
,
'
'
):
# \uxxxx, \Uxxxxxxxx, \N{...}
chrval
=
-
1
chrval
=
-
1
if
c
==
u'N'
:
if
c
==
u'N'
:
try
:
try
:
...
@@ -943,14 +979,156 @@ def p_string_literal(s, kind_override=None):
...
@@ -943,14 +979,156 @@ def p_string_literal(s, kind_override=None):
bytes_value
,
unicode_value
=
chars
.
getstrings
()
bytes_value
,
unicode_value
=
chars
.
getstrings
()
if
is_python3_source
and
has_non_ascii_literal_characters
:
if
is_python3_source
and
has_non_ascii_literal_characters
:
# Python 3 forbids literal non-ASCII characters in byte strings
# Python 3 forbids literal non-ASCII characters in byte strings
if
kind
!=
'u'
:
if
kind
not
in
(
'u'
,
'f'
)
:
s
.
error
(
"bytes can only contain ASCII literal characters."
,
s
.
error
(
"bytes can only contain ASCII literal characters."
,
pos
=
pos
,
fatal
=
False
)
pos
=
pos
,
fatal
=
False
)
bytes_value
=
None
bytes_value
=
None
if
kind
==
'f'
:
unicode_value
=
p_f_string
(
s
,
unicode_value
,
pos
)
s
.
next
()
s
.
next
()
return
(
kind
,
bytes_value
,
unicode_value
)
return
(
kind
,
bytes_value
,
unicode_value
)
def
p_f_string
(
s
,
unicode_value
,
pos
):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes.
values
=
[]
i
=
0
size
=
len
(
unicode_value
)
current_literal_start
=
0
while
i
<
size
:
c
=
unicode_value
[
i
]
if
c
in
(
'{'
,
'}'
):
if
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
c
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
+
1
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
+=
2
current_literal_start
=
i
elif
c
==
'}'
:
s
.
error
(
"single '}' encountered in format string"
)
else
:
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:
i
])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
i
,
expr_node
=
p_f_string_expr
(
s
,
unicode_value
,
pos
,
i
+
1
)
current_literal_start
=
i
values
.
append
(
expr_node
)
else
:
i
+=
1
encoded_str
=
EncodedString
(
unicode_value
[
current_literal_start
:])
values
.
append
(
ExprNodes
.
UnicodeNode
(
pos
,
value
=
encoded_str
))
return
values
def
p_f_string_expr
(
s
,
unicode_value
,
pos
,
starting_index
):
# Parses a {}-delimited expression inside an f-string. Returns a FormattedValueNode
# and the index in the string that follows the expression.
i
=
starting_index
size
=
len
(
unicode_value
)
conversion_char
=
None
format_spec_str
=
u''
nested_depth
=
0
quote_char
=
None
in_triple_quotes
=
False
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format string expression"
)
c
=
unicode_value
[
i
]
if
quote_char
is
not
None
:
if
c
==
'
\
\
'
:
i
+=
1
elif
c
==
quote_char
:
if
in_triple_quotes
:
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
False
quote_char
=
None
i
+=
2
else
:
quote_char
=
None
elif
c
in
'
\
'
"'
:
quote_char
=
c
if
i
+
2
<
size
and
unicode_value
[
i
+
1
]
==
c
and
unicode_value
[
i
+
2
]
==
c
:
in_triple_quotes
=
True
i
+=
2
elif
c
in
'{[('
:
nested_depth
+=
1
elif
nested_depth
!=
0
and
c
in
'}])'
:
nested_depth
-=
1
elif
c
==
'#'
:
s
.
error
(
"format string cannot include #"
)
elif
nested_depth
==
0
and
c
in
'!:}'
:
# allow != as a special case
if
c
==
'!'
and
i
+
1
<
size
and
unicode_value
[
i
+
1
]
==
'='
:
i
+=
1
continue
terminal_char
=
c
break
i
+=
1
# the expression is parsed as if it is surrounded by parentheses
expr_str
=
u'(%s)'
%
unicode_value
[
starting_index
:
i
]
if
terminal_char
==
'!'
:
i
+=
1
if
i
>=
size
:
s
.
error
(
"invalid conversion char at end of string"
)
conversion_char
=
unicode_value
[
i
]
i
+=
1
if
i
>=
size
:
s
.
error
(
"invalid conversion char at end of string"
)
terminal_char
=
unicode_value
[
i
]
if
terminal_char
==
':'
:
nested_depth
=
0
start_format_spec
=
i
+
1
while
True
:
if
i
>=
size
:
s
.
error
(
"missing '}' in format specifier"
)
c
=
unicode_value
[
i
]
if
c
==
'{'
:
if
nested_depth
>=
1
:
s
.
error
(
"nesting of '{' in format specifier is not allowed"
)
nested_depth
+=
1
elif
c
==
'}'
and
nested_depth
==
0
:
terminal_char
=
c
break
elif
c
==
'}'
:
nested_depth
-=
1
i
+=
1
format_spec_str
=
unicode_value
[
start_format_spec
:
i
]
if
terminal_char
!=
'}'
:
s
.
error
(
"missing '}' in format string expression'"
)
# parse the expression
name
=
'format string expression'
code_source
=
StringSourceDescriptor
(
name
,
expr_str
)
buf
=
StringIO
(
expr_str
)
scanner
=
PyrexScanner
(
buf
,
code_source
,
parent_scanner
=
s
,
source_encoding
=
s
.
source_encoding
)
expr
=
p_testlist
(
scanner
)
# TODO is testlist right here?
# validate the conversion char
if
conversion_char
is
not
None
and
conversion_char
not
in
ExprNodes
.
FormattedValueNode
.
conversion_chars
:
s
.
error
(
"invalid conversion character '%s'"
%
conversion_char
)
# the format spec is itself treated like an f-string
if
format_spec_str
is
not
None
:
format_spec
=
ExprNodes
.
JoinedStrNode
(
pos
,
values
=
p_f_string
(
s
,
format_spec_str
,
pos
))
else
:
format_spec
=
None
return
i
+
1
,
ExprNodes
.
FormattedValueNode
(
s
.
position
(),
value
=
expr
,
conversion_char
=
conversion_char
,
format_spec
=
format_spec
)
# since PEP 448:
# since PEP 448:
# list_display ::= "[" [listmaker] "]"
# list_display ::= "[" [listmaker] "]"
# listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
# listmaker ::= (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment