Commit 8dc18957 authored by Stefan Behnel's avatar Stefan Behnel

make C char* string literals "const" unless specified otherwise

prevent non-const char* string literals from being interned
parent 06b21b17
......@@ -1180,6 +1180,11 @@ class GlobalState(object):
def get_interned_identifier(self, text):
return self.get_py_string_const(text, identifier=True)
def as_c_string_literal(self, byte_string):
value = StringEncoding.split_string_literal(
StringEncoding.escape_byte_string(byte_string.byteencode()))
return '"%s"' % value
def new_string_const(self, text, byte_string):
cname = self.new_string_const_cname(byte_string)
c = StringConst(cname, text, byte_string)
......@@ -1313,7 +1318,7 @@ class GlobalState(object):
conditional = True
decls_writer.putln("#if PY_MAJOR_VERSION %s 3" % (
(2 in c.py_versions) and '<' or '>='))
decls_writer.putln('static char %s[] = "%s";' % (
decls_writer.putln('static const char %s[] = "%s";' % (
cname, StringEncoding.split_string_literal(c.escaped_value)))
if conditional:
decls_writer.putln("#endif")
......@@ -1639,6 +1644,9 @@ class CCodeWriter(object):
return self.globalstate.get_py_string_const(
text, identifier, is_str, unicode_value).cname
def as_c_string_literal(self, text):
return self.globalstate.as_c_string_literal(text)
def get_argument_default_const(self, type):
return self.globalstate.get_py_const(type).cname
......
......@@ -81,21 +81,33 @@ coercion_error_dict = {
" This is not portable and requires explicit encoding."),
(unicode_type, bytes_type): "Cannot convert Unicode string to 'bytes' implicitly, encoding required.",
(unicode_type, PyrexTypes.c_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_const_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_const_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(bytes_type, unicode_type): "Cannot convert 'bytes' object to unicode implicitly, decoding required",
(bytes_type, str_type): "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
(bytes_type, basestring_type): ("Cannot convert 'bytes' object to basestring implicitly."
" This is not portable to Py3."),
(bytes_type, PyrexTypes.c_py_unicode_ptr_type): "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
(bytes_type, PyrexTypes.c_const_py_unicode_ptr_type): (
"Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'."),
(basestring_type, bytes_type): "Cannot convert 'basestring' object to bytes implicitly. This is not portable.",
(str_type, unicode_type): ("str objects do not support coercion to unicode,"
" use a unicode string literal instead (u'')"),
(str_type, bytes_type): "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
(str_type, PyrexTypes.c_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_const_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_const_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_py_unicode_ptr_type): "'str' objects do not support coercion to C types (use 'unicode'?).",
(str_type, PyrexTypes.c_const_py_unicode_ptr_type): (
"'str' objects do not support coercion to C types (use 'unicode'?)."),
(PyrexTypes.c_char_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_const_char_ptr_type, unicode_type): (
"Cannot convert 'char*' to unicode implicitly, decoding required"),
(PyrexTypes.c_uchar_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_const_uchar_ptr_type, unicode_type): (
"Cannot convert 'char*' to unicode implicitly, decoding required"),
}
......@@ -103,8 +115,9 @@ def find_coercion_error(type_tuple, default, env):
err = coercion_error_dict.get(type_tuple)
if err is None:
return default
elif ((PyrexTypes.c_char_ptr_type in type_tuple or PyrexTypes.c_uchar_ptr_type in type_tuple)
and env.directives['c_string_encoding']):
elif (env.directives['c_string_encoding'] and
any(t in type_tuple for t in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_uchar_ptr_type,
PyrexTypes.c_const_char_ptr_type, PyrexTypes.c_const_uchar_ptr_type))):
if type_tuple[1].is_pyobject:
return default
elif env.directives['c_string_encoding'] in ('ascii', 'default'):
......@@ -1301,20 +1314,20 @@ class BytesNode(ConstNode):
return CharNode(self.pos, value=self.value,
constant_result=ord(self.value))
node = BytesNode(self.pos, value=self.value,
constant_result=self.constant_result)
node = BytesNode(self.pos, value=self.value, constant_result=self.constant_result)
if dst_type.is_pyobject:
if dst_type in (py_object_type, Builtin.bytes_type):
node.type = Builtin.bytes_type
else:
self.check_for_coercion_error(dst_type, env, fail=True)
return node
elif dst_type == PyrexTypes.c_char_ptr_type:
elif dst_type in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
node.type = dst_type
return node
elif dst_type == PyrexTypes.c_uchar_ptr_type:
node.type = PyrexTypes.c_char_ptr_type
return CastNode(node, PyrexTypes.c_uchar_ptr_type)
elif dst_type in (PyrexTypes.c_uchar_ptr_type, PyrexTypes.c_const_uchar_ptr_type):
node.type = (PyrexTypes.c_const_char_ptr_type if dst_type == PyrexTypes.c_const_uchar_ptr_type
else PyrexTypes.c_char_ptr_type)
return CastNode(node, dst_type)
elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type):
node.type = dst_type
return node
......@@ -1326,9 +1339,15 @@ class BytesNode(ConstNode):
def generate_evaluation_code(self, code):
if self.type.is_pyobject:
self.result_code = code.get_py_string_const(self.value)
result = code.get_py_string_const(self.value)
elif self.type.is_const:
result = code.get_string_const(self.value)
else:
self.result_code = code.get_string_const(self.value)
# not const => use plain C string literal and cast to mutable type
literal = code.as_c_string_literal(self.value)
# C++ may require a cast
result = typecast(self.type, PyrexTypes.c_void_ptr_type, literal)
self.result_code = result
def get_constant_c_result_code(self):
return None # FIXME
......@@ -4146,7 +4165,7 @@ class SliceIndexNode(ExprNode):
stop_code = self.stop_code()
if self.base.type.is_string:
base_result = self.base.result()
if self.base.type != PyrexTypes.c_char_ptr_type:
if self.base.type not in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
base_result = '((const char*)%s)' % base_result
if self.type is bytearray_type:
type_name = 'ByteArray'
......
......@@ -351,8 +351,8 @@ class IterationTransform(Visitor.EnvTransform):
base=ExprNodes.BytesNode(
slice_node.pos, value=bytes_value,
constant_result=bytes_value,
type=PyrexTypes.c_char_ptr_type).coerce_to(
PyrexTypes.c_uchar_ptr_type, self.current_env()),
type=PyrexTypes.c_const_char_ptr_type).coerce_to(
PyrexTypes.c_const_uchar_ptr_type, self.current_env()),
start=None,
stop=ExprNodes.IntNode(
slice_node.pos, value=str(len(bytes_value)),
......@@ -2315,12 +2315,12 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
Pyx_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None)
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
])
Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_py_unicode_ptr_type, None)
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None)
])
PyObject_Size_func_type = PyrexTypes.CFuncType(
......@@ -3253,8 +3253,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
Builtin.bytes_type, [
PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
])
PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
......@@ -3321,30 +3321,30 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
]))
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
]))
_decode_c_string_func_type = PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
])
])
_decode_bytes_func_type = PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
])
])
_decode_cpp_string_func_type = None # lazy init
......@@ -3436,8 +3436,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("string", string_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None),
])
helper_func_type = self._decode_cpp_string_func_type
......@@ -3509,14 +3509,14 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
encoding = node.value
node = ExprNodes.BytesNode(
node.pos, value=BytesLiteral(encoding.utf8encode()),
type=PyrexTypes.c_char_ptr_type)
type=PyrexTypes.c_const_char_ptr_type)
elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)):
encoding = node.value.decode('ISO-8859-1')
node = ExprNodes.BytesNode(
node.pos, value=node.value, type=PyrexTypes.c_char_ptr_type)
node.pos, value=node.value, type=PyrexTypes.c_const_char_ptr_type)
elif node.type is Builtin.bytes_type:
encoding = None
node = node.coerce_to(PyrexTypes.c_char_ptr_type, self.current_env())
node = node.coerce_to(PyrexTypes.c_const_char_ptr_type, self.current_env())
elif node.type.is_string:
encoding = None
else:
......
......@@ -2186,6 +2186,8 @@ class CPointerBaseType(CType):
def __init__(self, base_type):
self.base_type = base_type
if base_type.is_const:
base_type = base_type.const_base_type
for char_type in (c_char_type, c_uchar_type, c_schar_type):
if base_type.same_as(char_type):
self.is_string = 1
......@@ -3860,10 +3862,13 @@ c_null_ptr_type = CNullPtrType(c_void_type)
c_void_ptr_type = CPtrType(c_void_type)
c_void_ptr_ptr_type = CPtrType(c_void_ptr_type)
c_char_ptr_type = CPtrType(c_char_type)
c_const_char_ptr_type = CPtrType(CConstType(c_char_type))
c_uchar_ptr_type = CPtrType(c_uchar_type)
c_const_uchar_ptr_type = CPtrType(CConstType(c_uchar_type))
c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
c_int_ptr_type = CPtrType(c_int_type)
c_py_unicode_ptr_type = CPtrType(c_py_unicode_type)
c_const_py_unicode_ptr_type = CPtrType(CConstType(c_py_unicode_type))
c_py_ssize_t_ptr_type = CPtrType(c_py_ssize_t_type)
c_ssize_t_ptr_type = CPtrType(c_ssize_t_type)
c_size_t_ptr_type = CPtrType(c_size_t_type)
......@@ -4064,14 +4069,13 @@ def best_match(args, functions, pos=None, env=None):
assignable = dst_type.assignable_from(src_type)
# Now take care of normal string literals. So when you call a cdef
# Now take care of unprefixed string literals. So when you call a cdef
# function that takes a char *, the coercion will mean that the
# type will simply become bytes. We need to do this coercion
# manually for overloaded and fused functions
if not assignable and src_type.is_pyobject:
if (src_type.is_builtin_type and src_type.name == 'str' and
dst_type.resolve() is c_char_ptr_type):
c_src_type = c_char_ptr_type
if src_type.is_builtin_type and src_type.name == 'str' and dst_type.resolve().is_string:
c_src_type = dst_type.resolve()
else:
c_src_type = src_type.default_coerced_ctype()
......
......@@ -310,7 +310,7 @@ static CYTHON_INLINE float __PYX_NAN() {
# endif
#endif
typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
/////////////// ForceInitThreads.proto ///////////////
......
......@@ -4,7 +4,7 @@ cimport cython
############################################################
# tests for char* slicing
cdef char* cstring = "abcABCqtp"
cdef const char* cstring = "abcABCqtp"
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
......@@ -37,8 +37,8 @@ def slice_charptr_decode_unknown_encoding():
>>> print(str(slice_charptr_decode_unknown_encoding()).replace("u'", "'"))
('abcABCqtp', 'abcABCqtp', 'abc', 'abcABCqt')
"""
cdef char* enc = 'UTF-8'
cdef char* error_handling = 'strict'
cdef const char* enc = 'UTF-8'
cdef const char* error_handling = 'strict'
return (cstring.decode(enc),
cstring.decode(enc, error_handling),
cstring[:3].decode(enc),
......
cimport cython
cdef char* s = b"abcdefg"
cdef const char* cs = b"abcdefg"
cdef unsigned char* us = b"abcdefg"
cdef const unsigned char* cus = b"abcdefg"
cdef bytes pystr = b"abcdefg"
......@@ -16,6 +18,17 @@ def lentest_char():
return len(s)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
def lentest_const_char():
"""
>>> lentest_const_char()
7
"""
return len(cs)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
......@@ -63,6 +76,17 @@ def lentest_uchar():
return len(us)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
def lentest_const_uchar():
"""
>>> lentest_const_uchar()
7
"""
return len(cus)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
......
......@@ -3,18 +3,23 @@ from libc.stdio cimport *
from posix.unistd cimport *
from posix.fcntl cimport *
cdef int noisy_function() except -1:
cdef int ret = 0
ret = printf(b"0123456789\n", 0)
assert ret == 11
ret = printf(b"012%s6789\n", "345")
assert ret == 11 # printf()
ret = printf(b"012%d6789\n", 345)
assert ret == 11 # printf()
ret = printf(b"0123456789\n")
assert ret == 11 # printf()
ret = fflush(stdout)
assert ret == 0
ret = fprintf(stdout, b"0123456789\n", 0)
assert ret == 11
assert ret == 0 # fflush()
ret = fprintf(stdout, b"012%d6789\n", 345)
assert ret == 11 # fprintf()
ret = fflush(stdout)
assert ret == 0
assert ret == 0 # fflush()
ret = write(STDOUT_FILENO, b"0123456789\n", 11)
assert ret == 11
assert ret == 11 # write()
return 0
......@@ -40,8 +45,8 @@ def test_silent_stdout():
ret = close(stdout_save)
assert ret == 0
cdef class silent_fd:
cdef class silent_fd:
cdef int fd_save, fd
def __cinit__(self, int fd=-1):
......@@ -77,6 +82,7 @@ cdef class silent_fd:
self.fd_save = -1
return None
def test_silent_stdout_ctxmanager():
"""
>> test_silent_stdout_ctxmanager()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment