Commit c7a1e68b authored by Stefan Behnel's avatar Stefan Behnel

Optimise %-formatting of strings into f-strings when possible.

parent 7e653adb
...@@ -63,6 +63,8 @@ Features added ...@@ -63,6 +63,8 @@ Features added
* Formatting C enum values in f-strings is faster, as well as some other special cases. * Formatting C enum values in f-strings is faster, as well as some other special cases.
* String formatting with the '%' operator is optimised into f-strings in simple cases.
* Subscripting (item access) is faster in some cases. * Subscripting (item access) is faster in some cases.
* Some ``bytearray`` operations have been optimised similar to ``bytes``. * Some ``bytearray`` operations have been optimised similar to ``bytes``.
......
from __future__ import absolute_import from __future__ import absolute_import
import re
import sys import sys
import copy import copy
import codecs import codecs
...@@ -29,7 +30,7 @@ from . import Options ...@@ -29,7 +30,7 @@ from . import Options
from .Code import UtilityCode, TempitaUtilityCode from .Code import UtilityCode, TempitaUtilityCode
from .StringEncoding import EncodedString, bytes_literal, encoded_string from .StringEncoding import EncodedString, bytes_literal, encoded_string
from .Errors import error from .Errors import error, warning
from .ParseTreeTransforms import SkipDeclarations from .ParseTreeTransforms import SkipDeclarations
try: try:
...@@ -4250,6 +4251,78 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): ...@@ -4250,6 +4251,78 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
sequence_node.mult_factor = factor sequence_node.mult_factor = factor
return sequence_node return sequence_node
def visit_ModNode(self, node):
self.visitchildren(node)
if isinstance(node.operand1, ExprNodes.UnicodeNode) and isinstance(node.operand2, ExprNodes.TupleNode):
if not node.operand2.mult_factor:
fstring = self._build_fstring(node.operand1.pos, node.operand1.value, node.operand2.args)
if fstring is not None:
return fstring
return node
_parse_string_format_regex = (
u'(%(?:' # %...
u'(?:[0-9]+|[ ])?' # width (optional) or space prefix fill character (optional)
u'(?:[.][0-9]+)?' # precision (optional)
u')?.)' # format type (or something different for unsupported formats)
)
def _build_fstring(self, pos, ustring, format_args):
# Issues formatting warnings instead of errors since we really only catch a few errors by accident.
args = iter(format_args)
substrings = []
can_be_optimised = True
for s in re.split(self._parse_string_format_regex, ustring):
if not s:
continue
if s == u'%%':
substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u'%'), constant_result=u'%'))
continue
if s[0] != u'%':
if s[-1] == u'%':
warning(pos, "Incomplete format: '...%s'" % s[-3:], level=1)
can_be_optimised = False
substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(s), constant_result=s))
continue
format_type = s[-1]
try:
arg = next(args)
except StopIteration:
warning(pos, "Too few arguments for format placeholders", level=1)
can_be_optimised = False
break
if format_type in u'srfdoxX':
format_spec = s[1:]
if format_type in u'doxX' and u'.' in format_spec:
# Precision is not allowed for integers in format(), but ok in %-formatting.
can_be_optimised = False
elif format_type in u'rs':
format_spec = format_spec[:-1]
substrings.append(ExprNodes.FormattedValueNode(
arg.pos, value=arg,
conversion_char=format_type if format_type in u'rs' else None,
format_spec=ExprNodes.UnicodeNode(
pos, value=EncodedString(format_spec), constant_result=format_spec)
if format_spec else None,
))
else:
# keep it simple for now ...
can_be_optimised = False
if not can_be_optimised:
# Print all warnings we can find before finally giving up here.
return None
try:
next(args)
except StopIteration: pass
else:
warning(pos, "Too many arguments for format placeholders", level=1)
return None
node = ExprNodes.JoinedStrNode(pos, values=substrings)
return self.visit_JoinedStrNode(node)
def visit_FormattedValueNode(self, node): def visit_FormattedValueNode(self, node):
self.visitchildren(node) self.visitchildren(node)
conversion_char = node.conversion_char or 's' conversion_char = node.conversion_char or 's'
......
# mode: run # mode: run
# tag: f_strings, pep498 # tag: f_strings, pep498, werror
#### ####
# Cython specific PEP 498 tests in addition to test_fstring.pyx from CPython # Cython specific PEP 498 tests in addition to test_fstring.pyx from CPython
...@@ -444,3 +444,39 @@ def format_decoded_bytes(bytes value): ...@@ -444,3 +444,39 @@ def format_decoded_bytes(bytes value):
U-xyz U-xyz
""" """
return f"U-{value.decode('utf-8')}" return f"U-{value.decode('utf-8')}"
@cython.test_fail_if_path_exists(
"//AddNode",
"//ModNode",
)
@cython.test_assert_path_exists(
"//FormattedValueNode",
"//JoinedStrNode",
)
def generated_fstring(int i, unicode u not None, o):
"""
>>> i, u, o = 11, u'xyz', [1]
>>> print(((
... u"(i) %s-%.3s-%r-%.3r-%d-%3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
... u"(u) %s-%.2s-%r-%.7r %% "
... u"(o) %s-%.2s-%r-%.2r"
... ) % (
... i, i, i, i, i, i, i, i, i, i, i, i, i, i,
... u, u, u, u,
... o, o, o, o,
... )).replace("-u'xyz'", "-'xyz'"))
(i) 11-11-11-11-11- 11-13-0013-b- b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz' % (o) [1]-[1-[1]-[1
>>> print(generated_fstring(i, u, o).replace("-u'xyz'", "-'xyz'"))
(i) 11-11-11-11-11- 11-13-0013-b- b-B-00B-11.0-11.00 % (u) xyz-xy-'xyz'-'xyz' % (o) [1]-[1-[1]-[1
"""
return (
u"(i) %s-%.3s-%r-%.3r-%d-%3d-%o-%04o-%x-%4x-%X-%03X-%.1f-%04.2f %% "
u"(u) %s-%.2s-%r-%.7r %% "
u"(o) %s-%.2s-%r-%.2r"
) % (
i, i, i, i, i, i, i, i, i, i, i, i, i, i,
u, u, u, u,
o, o, o, o,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment