Commit 20c7084a authored by scoder's avatar scoder

Merge pull request #428 from scoder/_const_char_literals

make C char* string literals "const" unless specified otherwise
parents 6325889a 36283e52
......@@ -1313,7 +1313,7 @@ class GlobalState(object):
conditional = True
decls_writer.putln("#if PY_MAJOR_VERSION %s 3" % (
(2 in c.py_versions) and '<' or '>='))
decls_writer.putln('static char %s[] = "%s";' % (
decls_writer.putln('static const char %s[] = "%s";' % (
cname, StringEncoding.split_string_literal(c.escaped_value)))
if conditional:
decls_writer.putln("#endif")
......
......@@ -81,21 +81,33 @@ coercion_error_dict = {
" This is not portable and requires explicit encoding."),
(unicode_type, bytes_type): "Cannot convert Unicode string to 'bytes' implicitly, encoding required.",
(unicode_type, PyrexTypes.c_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_const_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(unicode_type, PyrexTypes.c_const_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
(bytes_type, unicode_type): "Cannot convert 'bytes' object to unicode implicitly, decoding required",
(bytes_type, str_type): "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
(bytes_type, basestring_type): ("Cannot convert 'bytes' object to basestring implicitly."
" This is not portable to Py3."),
(bytes_type, PyrexTypes.c_py_unicode_ptr_type): "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
(bytes_type, PyrexTypes.c_const_py_unicode_ptr_type): (
"Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'."),
(basestring_type, bytes_type): "Cannot convert 'basestring' object to bytes implicitly. This is not portable.",
(str_type, unicode_type): ("str objects do not support coercion to unicode,"
" use a unicode string literal instead (u'')"),
(str_type, bytes_type): "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
(str_type, PyrexTypes.c_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_const_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_const_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
(str_type, PyrexTypes.c_py_unicode_ptr_type): "'str' objects do not support coercion to C types (use 'unicode'?).",
(str_type, PyrexTypes.c_const_py_unicode_ptr_type): (
"'str' objects do not support coercion to C types (use 'unicode'?)."),
(PyrexTypes.c_char_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_const_char_ptr_type, unicode_type): (
"Cannot convert 'char*' to unicode implicitly, decoding required"),
(PyrexTypes.c_uchar_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_const_uchar_ptr_type, unicode_type): (
"Cannot convert 'char*' to unicode implicitly, decoding required"),
}
......@@ -103,8 +115,9 @@ def find_coercion_error(type_tuple, default, env):
err = coercion_error_dict.get(type_tuple)
if err is None:
return default
elif ((PyrexTypes.c_char_ptr_type in type_tuple or PyrexTypes.c_uchar_ptr_type in type_tuple)
and env.directives['c_string_encoding']):
elif (env.directives['c_string_encoding'] and
any(t in type_tuple for t in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_uchar_ptr_type,
PyrexTypes.c_const_char_ptr_type, PyrexTypes.c_const_uchar_ptr_type))):
if type_tuple[1].is_pyobject:
return default
elif env.directives['c_string_encoding'] in ('ascii', 'default'):
......@@ -1301,20 +1314,20 @@ class BytesNode(ConstNode):
return CharNode(self.pos, value=self.value,
constant_result=ord(self.value))
node = BytesNode(self.pos, value=self.value,
constant_result=self.constant_result)
node = BytesNode(self.pos, value=self.value, constant_result=self.constant_result)
if dst_type.is_pyobject:
if dst_type in (py_object_type, Builtin.bytes_type):
node.type = Builtin.bytes_type
else:
self.check_for_coercion_error(dst_type, env, fail=True)
return node
elif dst_type == PyrexTypes.c_char_ptr_type:
elif dst_type in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
node.type = dst_type
return node
elif dst_type == PyrexTypes.c_uchar_ptr_type:
node.type = PyrexTypes.c_char_ptr_type
return CastNode(node, PyrexTypes.c_uchar_ptr_type)
elif dst_type in (PyrexTypes.c_uchar_ptr_type, PyrexTypes.c_const_uchar_ptr_type, PyrexTypes.c_void_ptr_type):
node.type = (PyrexTypes.c_const_char_ptr_type if dst_type == PyrexTypes.c_const_uchar_ptr_type
else PyrexTypes.c_char_ptr_type)
return CastNode(node, dst_type)
elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type):
node.type = dst_type
return node
......@@ -1326,9 +1339,15 @@ class BytesNode(ConstNode):
def generate_evaluation_code(self, code):
if self.type.is_pyobject:
self.result_code = code.get_py_string_const(self.value)
result = code.get_py_string_const(self.value)
elif self.type.is_const:
result = code.get_string_const(self.value)
else:
self.result_code = code.get_string_const(self.value)
# not const => use plain C string literal and cast to mutable type
literal = self.value.as_c_string_literal()
# C++ may require a cast
result = typecast(self.type, PyrexTypes.c_void_ptr_type, literal)
self.result_code = result
def get_constant_c_result_code(self):
return None # FIXME
......@@ -4146,7 +4165,7 @@ class SliceIndexNode(ExprNode):
stop_code = self.stop_code()
if self.base.type.is_string:
base_result = self.base.result()
if self.base.type != PyrexTypes.c_char_ptr_type:
if self.base.type not in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
base_result = '((const char*)%s)' % base_result
if self.type is bytearray_type:
type_name = 'ByteArray'
......
......@@ -1956,12 +1956,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
"static struct PyGetSetDef %s[] = {" %
env.getset_table_cname)
for entry in env.property_entries:
if entry.doc:
doc_code = "%s" % code.get_string_const(entry.doc)
doc = entry.doc
if doc:
if doc.is_unicode:
doc = doc.as_utf8_string()
doc_code = doc.as_c_string_literal()
else:
doc_code = "0"
code.putln(
'{(char *)"%s", %s, %s, %s, 0},' % (
'{(char *)"%s", %s, %s, (char *)%s, 0},' % (
entry.name,
entry.getter_cname or "0",
entry.setter_cname or "0",
......
......@@ -29,7 +29,7 @@ from .PyrexTypes import py_object_type, error_type
from .Symtab import (ModuleScope, LocalScope, ClosureScope,
StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope)
from .Code import UtilityCode
from .StringEncoding import EncodedString, escape_byte_string, split_string_literal
from .StringEncoding import EncodedString
from . import Future
from . import Options
from . import DebugFlags
......@@ -3318,12 +3318,12 @@ class DefNodeWrapper(FuncDefNode):
docstr = entry.doc
if docstr.is_unicode:
docstr = docstr.utf8encode()
docstr = docstr.as_utf8_string()
code.putln(
'static char %s[] = "%s";' % (
'static char %s[] = %s;' % (
entry.doc_cname,
split_string_literal(escape_byte_string(docstr))))
docstr.as_c_string_literal()))
if entry.is_special:
code.putln('#if CYTHON_COMPILING_IN_CPYTHON')
......
......@@ -351,8 +351,8 @@ class IterationTransform(Visitor.EnvTransform):
base=ExprNodes.BytesNode(
slice_node.pos, value=bytes_value,
constant_result=bytes_value,
type=PyrexTypes.c_char_ptr_type).coerce_to(
PyrexTypes.c_uchar_ptr_type, self.current_env()),
type=PyrexTypes.c_const_char_ptr_type).coerce_to(
PyrexTypes.c_const_uchar_ptr_type, self.current_env()),
start=None,
stop=ExprNodes.IntNode(
slice_node.pos, value=str(len(bytes_value)),
......@@ -2315,12 +2315,12 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
Pyx_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_char_ptr_type, None)
PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
])
Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
PyrexTypes.c_size_t_type, [
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_py_unicode_ptr_type, None)
PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None)
])
PyObject_Size_func_type = PyrexTypes.CFuncType(
......@@ -3253,8 +3253,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
Builtin.bytes_type, [
PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
])
PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
......@@ -3321,30 +3321,30 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
]))
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
]))
_decode_c_string_func_type = PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
])
])
_decode_bytes_func_type = PyrexTypes.CFuncType(
Builtin.unicode_type, [
PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
])
])
_decode_cpp_string_func_type = None # lazy init
......@@ -3436,8 +3436,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("string", string_type, None),
PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None),
])
helper_func_type = self._decode_cpp_string_func_type
......@@ -3509,14 +3509,14 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
encoding = node.value
node = ExprNodes.BytesNode(
node.pos, value=BytesLiteral(encoding.utf8encode()),
type=PyrexTypes.c_char_ptr_type)
type=PyrexTypes.c_const_char_ptr_type)
elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)):
encoding = node.value.decode('ISO-8859-1')
node = ExprNodes.BytesNode(
node.pos, value=node.value, type=PyrexTypes.c_char_ptr_type)
node.pos, value=node.value, type=PyrexTypes.c_const_char_ptr_type)
elif node.type is Builtin.bytes_type:
encoding = None
node = node.coerce_to(PyrexTypes.c_char_ptr_type, self.current_env())
node = node.coerce_to(PyrexTypes.c_const_char_ptr_type, self.current_env())
elif node.type.is_string:
encoding = None
else:
......
......@@ -2186,6 +2186,8 @@ class CPointerBaseType(CType):
def __init__(self, base_type):
self.base_type = base_type
if base_type.is_const:
base_type = base_type.const_base_type
for char_type in (c_char_type, c_uchar_type, c_schar_type):
if base_type.same_as(char_type):
self.is_string = 1
......@@ -3860,10 +3862,13 @@ c_null_ptr_type = CNullPtrType(c_void_type)
c_void_ptr_type = CPtrType(c_void_type)
c_void_ptr_ptr_type = CPtrType(c_void_ptr_type)
c_char_ptr_type = CPtrType(c_char_type)
c_const_char_ptr_type = CPtrType(CConstType(c_char_type))
c_uchar_ptr_type = CPtrType(c_uchar_type)
c_const_uchar_ptr_type = CPtrType(CConstType(c_uchar_type))
c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
c_int_ptr_type = CPtrType(c_int_type)
c_py_unicode_ptr_type = CPtrType(c_py_unicode_type)
c_const_py_unicode_ptr_type = CPtrType(CConstType(c_py_unicode_type))
c_py_ssize_t_ptr_type = CPtrType(c_py_ssize_t_type)
c_ssize_t_ptr_type = CPtrType(c_ssize_t_type)
c_size_t_ptr_type = CPtrType(c_size_t_type)
......@@ -4064,14 +4069,13 @@ def best_match(args, functions, pos=None, env=None):
assignable = dst_type.assignable_from(src_type)
# Now take care of normal string literals. So when you call a cdef
# Now take care of unprefixed string literals. So when you call a cdef
# function that takes a char *, the coercion will mean that the
# type will simply become bytes. We need to do this coercion
# manually for overloaded and fused functions
if not assignable and src_type.is_pyobject:
if (src_type.is_builtin_type and src_type.name == 'str' and
dst_type.resolve() is c_char_ptr_type):
c_src_type = c_char_ptr_type
if src_type.is_builtin_type and src_type.name == 'str' and dst_type.resolve().is_string:
c_src_type = dst_type.resolve()
else:
c_src_type = src_type.default_coerced_ctype()
......
......@@ -136,6 +136,9 @@ class EncodedString(_unicode):
def contains_surrogates(self):
return string_contains_surrogates(self)
def as_utf8_string(self):
return BytesLiteral(self.utf8encode())
def string_contains_surrogates(ustring):
"""
......@@ -177,6 +180,10 @@ class BytesLiteral(_bytes):
is_unicode = False
def as_c_string_literal(self):
value = split_string_literal(escape_byte_string(self))
return '"%s"' % value
char_from_escape_sequence = {
r'\a' : u'\a',
......
......@@ -416,14 +416,12 @@ class DocStringSlot(SlotDescriptor):
# Descriptor for the docstring slot.
def slot_code(self, scope):
if scope.doc is not None:
if scope.doc.is_unicode:
doc = scope.doc.utf8encode()
else:
doc = scope.doc.byteencode()
return '"%s"' % StringEncoding.escape_byte_string(doc)
else:
doc = scope.doc
if doc is None:
return "0"
if doc.is_unicode:
doc = doc.as_utf8_string()
return doc.as_c_string_literal()
class SuiteSlot(SlotDescriptor):
......
......@@ -310,7 +310,7 @@ static CYTHON_INLINE float __PYX_NAN() {
# endif
#endif
typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
/////////////// ForceInitThreads.proto ///////////////
......
......@@ -4,7 +4,7 @@ cimport cython
############################################################
# tests for char* slicing
cdef char* cstring = "abcABCqtp"
cdef const char* cstring = "abcABCqtp"
@cython.test_assert_path_exists("//PythonCapiCallNode")
@cython.test_fail_if_path_exists("//AttributeNode")
......@@ -37,8 +37,8 @@ def slice_charptr_decode_unknown_encoding():
>>> print(str(slice_charptr_decode_unknown_encoding()).replace("u'", "'"))
('abcABCqtp', 'abcABCqtp', 'abc', 'abcABCqt')
"""
cdef char* enc = 'UTF-8'
cdef char* error_handling = 'strict'
cdef const char* enc = 'UTF-8'
cdef const char* error_handling = 'strict'
return (cstring.decode(enc),
cstring.decode(enc, error_handling),
cstring[:3].decode(enc),
......
cimport cython
cdef char* s = b"abcdefg"
cdef const char* cs = b"abcdefg"
cdef unsigned char* us = b"abcdefg"
cdef const unsigned char* cus = b"abcdefg"
cdef bytes pystr = b"abcdefg"
......@@ -16,6 +18,17 @@ def lentest_char():
return len(s)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
def lentest_const_char():
"""
>>> lentest_const_char()
7
"""
return len(cs)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
......@@ -63,6 +76,17 @@ def lentest_uchar():
return len(us)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
def lentest_const_uchar():
"""
>>> lentest_const_uchar()
7
"""
return len(cus)
@cython.test_assert_path_exists(
"//PythonCapiCallNode",
)
......
......@@ -3,18 +3,23 @@ from libc.stdio cimport *
from posix.unistd cimport *
from posix.fcntl cimport *
cdef int noisy_function() except -1:
cdef int ret = 0
ret = printf(b"0123456789\n", 0)
assert ret == 11
ret = printf(b"012%s6789\n", "345")
assert ret == 11 # printf()
ret = printf(b"012%d6789\n", 345)
assert ret == 11 # printf()
ret = printf(b"0123456789\n")
assert ret == 11 # printf()
ret = fflush(stdout)
assert ret == 0
ret = fprintf(stdout, b"0123456789\n", 0)
assert ret == 11
assert ret == 0 # fflush()
ret = fprintf(stdout, b"012%d6789\n", 345)
assert ret == 11 # fprintf()
ret = fflush(stdout)
assert ret == 0
assert ret == 0 # fflush()
ret = write(STDOUT_FILENO, b"0123456789\n", 11)
assert ret == 11
assert ret == 11 # write()
return 0
......@@ -40,8 +45,8 @@ def test_silent_stdout():
ret = close(stdout_save)
assert ret == 0
cdef class silent_fd:
cdef class silent_fd:
cdef int fd_save, fd
def __cinit__(self, int fd=-1):
......@@ -77,6 +82,7 @@ cdef class silent_fd:
self.fd_save = -1
return None
def test_silent_stdout_ctxmanager():
"""
>> test_silent_stdout_ctxmanager()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment