Commit 37e4a206 authored by Stefan Behnel's avatar Stefan Behnel

generally postpone the cleanup for char* temps to improve safety and move the...

generally postpone the cleanup for char* temps to improve safety and move the 'char* from temp' compile error to assignments to allow non-trivial char* expressions

--HG--
rename : tests/run/charptr_cfunc_args.pyx => tests/run/charptr_from_temp.pyx
parent e36ec6c2
...@@ -9,6 +9,12 @@ Latest ...@@ -9,6 +9,12 @@ Latest
Features added Features added
-------------- --------------
* Taking a ``char*`` from a temporary Python string object is safer
in more cases and can be done inside of non-trivial expressions,
including arguments of a function call. A compile time error
is raised only when such a pointer is assigned to a variable and
would thus exceed the lifetime of the string itself.
* Cascaded assignments (a = b = ...) try to minimise the number of * Cascaded assignments (a = b = ...) try to minimise the number of
type coercions. type coercions.
...@@ -40,6 +46,9 @@ Features added ...@@ -40,6 +46,9 @@ Features added
Bugs fixed Bugs fixed
---------- ----------
* Taking a ``char*`` from an indexed Python string generated unsafe
reference counting code.
* Set literals now create all of their items before trying to add them * Set literals now create all of their items before trying to add them
to the set, following the behaviour in CPython. This makes a to the set, following the behaviour in CPython. This makes a
difference in the rare case that the item creation has side effects difference in the rare case that the item creation has side effects
......
...@@ -596,7 +596,7 @@ class ExprNode(Node): ...@@ -596,7 +596,7 @@ class ExprNode(Node):
self.allocate_temp_result(code) self.allocate_temp_result(code)
self.generate_result_code(code) self.generate_result_code(code)
if self.is_temp: if self.is_temp and not (self.type.is_string or self.type.is_pyunicode_ptr):
# If we are temp we do not need to wait until this node is disposed # If we are temp we do not need to wait until this node is disposed
# before disposing children. # before disposing children.
self.generate_subexpr_disposal_code(code) self.generate_subexpr_disposal_code(code)
...@@ -611,6 +611,10 @@ class ExprNode(Node): ...@@ -611,6 +611,10 @@ class ExprNode(Node):
def generate_disposal_code(self, code): def generate_disposal_code(self, code):
if self.is_temp: if self.is_temp:
if self.type.is_string or self.type.is_pyunicode_ptr:
# postponed from self.generate_evaluation_code()
self.generate_subexpr_disposal_code(code)
self.free_subexpr_temps(code)
if self.result(): if self.result():
if self.type.is_pyobject: if self.type.is_pyobject:
code.put_decref_clear(self.result(), self.ctype()) code.put_decref_clear(self.result(), self.ctype())
...@@ -629,7 +633,11 @@ class ExprNode(Node): ...@@ -629,7 +633,11 @@ class ExprNode(Node):
def generate_post_assignment_code(self, code): def generate_post_assignment_code(self, code):
if self.is_temp: if self.is_temp:
if self.type.is_pyobject: if self.type.is_string or self.type.is_pyunicode_ptr:
# postponed from self.generate_evaluation_code()
self.generate_subexpr_disposal_code(code)
self.free_subexpr_temps(code)
elif self.type.is_pyobject:
code.putln("%s = 0;" % self.result()) code.putln("%s = 0;" % self.result())
elif self.type.is_memoryviewslice: elif self.type.is_memoryviewslice:
code.putln("%s.memview = NULL;" % self.result()) code.putln("%s.memview = NULL;" % self.result())
...@@ -8449,6 +8457,10 @@ class TypecastNode(ExprNode): ...@@ -8449,6 +8457,10 @@ class TypecastNode(ExprNode):
# either temp or a C cast => no side effects other than the operand's # either temp or a C cast => no side effects other than the operand's
return self.operand.is_simple() return self.operand.is_simple()
def is_ephemeral(self):
# either temp or a C cast => no side effects other than the operand's
return self.operand.is_ephemeral()
def nonlocally_immutable(self): def nonlocally_immutable(self):
return self.is_temp or self.operand.nonlocally_immutable() return self.is_temp or self.operand.nonlocally_immutable()
...@@ -9008,6 +9020,10 @@ class BinopNode(ExprNode): ...@@ -9008,6 +9020,10 @@ class BinopNode(ExprNode):
def check_const(self): def check_const(self):
return self.operand1.check_const() and self.operand2.check_const() return self.operand1.check_const() and self.operand2.check_const()
def is_ephemeral(self):
return (super(BinopNode, self).is_ephemeral() or
self.operand1.is_ephemeral() or self.operand2.is_ephemeral())
def generate_result_code(self, code): def generate_result_code(self, code):
#print "BinopNode.generate_result_code:", self.operand1, self.operand2 ### #print "BinopNode.generate_result_code:", self.operand1, self.operand2 ###
if self.operand1.type.is_pyobject: if self.operand1.type.is_pyobject:
...@@ -10865,15 +10881,7 @@ class CoerceFromPyTypeNode(CoercionNode): ...@@ -10865,15 +10881,7 @@ class CoerceFromPyTypeNode(CoercionNode):
error(arg.pos, error(arg.pos,
"Cannot convert Python object to '%s'" % result_type) "Cannot convert Python object to '%s'" % result_type)
if self.type.is_string or self.type.is_pyunicode_ptr: if self.type.is_string or self.type.is_pyunicode_ptr:
if self.arg.is_ephemeral(): if self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal:
# FIXME: instead of always raising an error here, we should trace what happens
# with the result (by passing on the "ephemeral" state) and only raise the
# error when we notice illegal usage. Something like "(<char*>pystr)[0] + 1"
# can be perfectly legal, whereas "cdef char* s = pystr1 + pystr2" should
# fail on the assignment.
error(arg.pos,
"Obtaining '%s' from temporary Python value" % result_type)
elif self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal:
warning(arg.pos, warning(arg.pos,
"Obtaining '%s' from externally modifiable global Python value" % result_type, "Obtaining '%s' from externally modifiable global Python value" % result_type,
level=1) level=1)
...@@ -10882,31 +10890,8 @@ class CoerceFromPyTypeNode(CoercionNode): ...@@ -10882,31 +10890,8 @@ class CoerceFromPyTypeNode(CoercionNode):
# The arg is always already analysed # The arg is always already analysed
return self return self
def generate_evaluation_code(self, code): def is_ephemeral(self):
if self.type.is_string: return self.type.is_ptr and self.arg.is_temp and not self.arg.is_name
# when coercing Python strings to C, we may have to keep the Python
# object alive a little longer, e.g. during a function call, so we do
# not dispose of subexpression temps here and do it later during cleanup
self.generate_subexpr_evaluation_code(code)
code.mark_pos(self.pos)
self.allocate_temp_result(code)
self.generate_result_code(code)
else:
super(CoerceFromPyTypeNode, self).generate_evaluation_code(code)
def generate_disposal_code(self, code):
if self.type.is_string:
# postponed from self.generate_evaluation_code()
self.generate_subexpr_disposal_code(code)
self.free_subexpr_temps(code)
super(CoerceFromPyTypeNode, self).generate_disposal_code(code)
def generate_post_assignment_code(self, code):
if self.type.is_string:
# postponed from self.generate_evaluation_code()
self.generate_subexpr_disposal_code(code)
self.free_subexpr_temps(code)
super(CoerceFromPyTypeNode, self).generate_post_assignment_code(code)
def generate_result_code(self, code): def generate_result_code(self, code):
function = self.type.from_py_function function = self.type.from_py_function
......
...@@ -4557,7 +4557,11 @@ class AssignmentNode(StatNode): ...@@ -4557,7 +4557,11 @@ class AssignmentNode(StatNode):
# to any of the left hand sides. # to any of the left hand sides.
def analyse_expressions(self, env): def analyse_expressions(self, env):
return self.analyse_types(env) node = self.analyse_types(env)
if isinstance(node, AssignmentNode):
if not node.rhs.type.is_pyobject and node.rhs.is_ephemeral():
error(self.pos, "Storing unsafe C derivative of temporary Python reference")
return node
# def analyse_expressions(self, env): # def analyse_expressions(self, env):
# self.analyse_expressions_1(env) # self.analyse_expressions_1(env)
......
...@@ -18,6 +18,13 @@ cptr = s ...@@ -18,6 +18,13 @@ cptr = s
# temp => error # temp => error
cptr = s + b"cba" cptr = s + b"cba"
# indexing => error
cptr = s[0]
cdef char* x = <char*>s[0]
# slicing => error
cptr = s[:2]
cdef unicode c_u = u"abc" cdef unicode c_u = u"abc"
u = u"abc" u = u"abc"
...@@ -39,7 +46,10 @@ cuptr = u + u"cba" ...@@ -39,7 +46,10 @@ cuptr = u + u"cba"
_ERRORS = """ _ERRORS = """
16:8: Obtaining 'char *' from externally modifiable global Python value 16:8: Obtaining 'char *' from externally modifiable global Python value
19:9: Obtaining 'char *' from temporary Python value 19:9: Storing unsafe C derivative of temporary Python reference
34:9: Obtaining 'Py_UNICODE *' from externally modifiable global Python value 22:8: Storing unsafe C derivative of temporary Python reference
37:10: Obtaining 'Py_UNICODE *' from temporary Python value 23:5: Storing unsafe C derivative of temporary Python reference
26:8: Storing unsafe C derivative of temporary Python reference
41:9: Obtaining 'Py_UNICODE *' from externally modifiable global Python value
44:10: Storing unsafe C derivative of temporary Python reference
""" """
...@@ -9,4 +9,5 @@ def foo(obj): ...@@ -9,4 +9,5 @@ def foo(obj):
_ERRORS = u""" _ERRORS = u"""
8:5: Casting temporary Python object to non-numeric non-Python type 8:5: Casting temporary Python object to non-numeric non-Python type
8:5: Storing unsafe C derivative of temporary Python reference
""" """
...@@ -2,36 +2,59 @@ ...@@ -2,36 +2,59 @@
from cpython.version cimport PY_MAJOR_VERSION from cpython.version cimport PY_MAJOR_VERSION
cdef cfunc(char* s): cdef cfunc1(char* s):
if PY_MAJOR_VERSION == 2: if PY_MAJOR_VERSION == 2:
return s return s
else: else:
return s.decode('ASCII') return s.decode('ASCII')
cdef cfunc3(int x, char* s, object y):
return cfunc1(s)
def test_one_arg_indexing(s): def test_one_arg_indexing(s):
""" """
>>> test_one_arg_indexing(b'xyz') >>> test_one_arg_indexing(b'xyz')
'y' 'y'
""" """
cfunc(s[0]) cfunc1(s[0])
z = cfunc(s[2]) z = cfunc1(s[2])
assert z == 'z', repr(z) assert z == 'z', repr(z)
return cfunc(s[1]) return cfunc1(s[1])
def test_more_args_indexing(s):
"""
>>> test_more_args_indexing(b'xyz')
'y'
"""
cfunc3(1, s[0], 6.5)
z = cfunc3(2, s[2], 'abc' * 2)
assert z == 'z', repr(z)
return cfunc3(3, s[1], 1)
'''
# FIXME: should these be allowed?
def test_one_arg_slicing(s): def test_one_arg_slicing(s):
""" """
>>> test_one_arg_indexing(b'xyz') >>> test_one_arg_slicing(b'xyz')
'y' 'y'
""" """
cfunc(s[:2]) cfunc1(s[:2])
z = cfunc(s[2:]) z = cfunc1(s[2:])
assert z == 'z', repr(z) assert z == 'z', repr(z)
return cfunc(s[1:2]) return cfunc1(s[1:2])
def test_more_args_slicing(s):
"""
>>> test_more_args_slicing(b'xyz')
'y'
"""
cfunc3(1, s[:2], 'abc')
z = cfunc3(123, s[2:], 5)
assert z == 'z', repr(z)
return cfunc3(2, s[1:2], 1.4)
def test_one_arg_adding(s): def test_one_arg_adding(s):
...@@ -39,5 +62,12 @@ def test_one_arg_adding(s): ...@@ -39,5 +62,12 @@ def test_one_arg_adding(s):
>>> test_one_arg_adding(b'xyz') >>> test_one_arg_adding(b'xyz')
'abxyzqr' 'abxyzqr'
""" """
return cfunc(b"a" + b"b" + s + b"q" + b"r") return cfunc1(b"a" + b"b" + s + b"q" + b"r")
'''
def test_more_args_adding(s):
"""
>>> test_more_args_adding(b'xyz')
'abxyzqr'
"""
return cfunc3(1, b"a" + b"b" + s + b"q" + b"r", 'xyz%d' % 3)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment