Commit e09468bd authored by Stefan Behnel's avatar Stefan Behnel

Infer bytes/str type for safe bytes/str %-formatting cases that never returns...

Infer bytes/str type for safe bytes/str %-formatting cases that never returns Unicode strings in Py2.
Closes #2153.
parent 54a5fb54
......@@ -22,6 +22,10 @@ Bugs fixed
fail if the base class constructor was declared without ``nogil``.
(Github issue #2157)
* Bytes %-formatting inferred ``basestring`` (bytes or unicode) as result type
in some cases where ``bytes`` would have been safe to infer.
(Github issue #2153)
0.28 (2018-03-13)
=================
......
......@@ -16,6 +16,7 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj
bytearray_type=object, slice_type=object, _py_int_types=object,
IS_PYTHON3=cython.bint)
import re
import sys
import copy
import os.path
......@@ -11484,6 +11485,20 @@ class DivNode(NumBinopNode):
self.operand2.result())
_find_formatting_types = re.compile(
br"%"
br"(?:%|" # %%
br"(?:\([^)]+\))?" # %(name)
br"[-+#,0-9 ]*([a-z])" # %.2f etc.
br")").findall
# These format conversion types can never trigger a Unicode string conversion in Py2.
_safe_bytes_formats = set([
# Excludes 's' and 'r', which can generate non-bytes strings.
b'd', b'i', b'o', b'u', b'x', b'X', b'e', b'E', b'f', b'F', b'g', b'G', b'c', b'b', b'a',
])
class ModNode(DivNode):
# '%' operator.
......@@ -11493,7 +11508,7 @@ class ModNode(DivNode):
or NumBinopNode.is_py_operation_types(self, type1, type2))
def infer_builtin_types_operation(self, type1, type2):
# b'%s' % xyz raises an exception in Py3, so it's safe to infer the type for Py2
# b'%s' % xyz raises an exception in Py3<3.5, so it's safe to infer the type for Py2 and later Py3's.
if type1 is unicode_type:
# None + xyz may be implemented by RHS
if type2.is_builtin_type or not self.operand1.may_be_none():
......@@ -11503,6 +11518,11 @@ class ModNode(DivNode):
return type2
elif type2.is_numeric:
return type1
elif self.operand1.is_string_literal:
if type1 is str_type or type1 is bytes_type:
if set(_find_formatting_types(self.operand1.value)) <= _safe_bytes_formats:
return type1
return basestring_type
elif type1 is bytes_type and not type2.is_builtin_type:
return None # RHS might implement '% operator differently in Py3
else:
......
......@@ -215,10 +215,18 @@ def def_to_cdef(source):
return '\n'.join(output)
def exclude_extension_in_pyver(*versions):
def check(ext):
return EXCLUDE_EXT if sys.version_info[:2] in versions else ext
return check
def update_linetrace_extension(ext):
ext.define_macros.append(('CYTHON_TRACE', 1))
return ext
def update_numpy_extension(ext):
import numpy
from numpy.distutils.misc_util import get_info
......@@ -339,6 +347,7 @@ EXT_EXTRAS = {
'tag:openmp': update_openmp_extension,
'tag:cpp11': update_cpp11_extension,
'tag:trace' : update_linetrace_extension,
'tag:bytesformat': exclude_extension_in_pyver((3, 3), (3, 4)), # no %-bytes formatting
}
......
# mode: run
# tag: stringformat, bytesformat
import sys
IS_PY2 = sys.version_info[0] < 3
if IS_PY2:
__doc__ = """
>>> print(format_bytes_with_str(u'abc'))
1 12170405abc6A
"""
def format_bytes():
"""
>>> print(format_bytes())
1 121704056A
"""
cdef bytes result = b'%d%3i%x%02X%02.0f%g%c' % (
1, 12, 23, 4, 5, 6, 65)
assert type(result) is bytes
return result.decode('ascii')
def format_bytes_with_str(s):
"""
>>> print(format_bytes_with_str(b'abc'))
1 12170405abc6A
"""
result = b'%d%3i%x%02X%02.0f%s%g%c' % (
1, 12, 23, 4, 5, s, 6, 65)
return result if IS_PY2 else result.decode('ascii')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment